[bluesky] download images in original resolution (#4438)
at least up to 2000 px
This commit is contained in:
@@ -20,7 +20,7 @@ class BlueskyExtractor(Extractor):
|
||||
"""Base class for bluesky extractors"""
|
||||
category = "bluesky"
|
||||
directory_fmt = ("{category}", "{author[handle]}")
|
||||
filename_fmt = "{indexedAt[:19]}_{post_id}_{num}.{extension}"
|
||||
filename_fmt = "{createdAt[:19]}_{post_id}_{num}.{extension}"
|
||||
archive_fmt = "{filename}"
|
||||
root = "https://bsky.app"
|
||||
|
||||
@@ -34,20 +34,31 @@ class BlueskyExtractor(Extractor):
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
post = post["post"]
|
||||
post.update(post["record"])
|
||||
del post["record"]
|
||||
|
||||
try:
|
||||
images = post["embed"]["images"]
|
||||
except KeyError:
|
||||
images = ()
|
||||
images = ()
|
||||
if "embed" in post:
|
||||
media = post["embed"]
|
||||
if "media" in media:
|
||||
media = media["media"]
|
||||
if "images" in media:
|
||||
images = media["images"]
|
||||
|
||||
post["post_id"] = post["uri"].rpartition("/")[2]
|
||||
post["count"] = len(images)
|
||||
post["date"] = text.parse_datetime(
|
||||
post["indexedAt"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
yield Message.Directory, post
|
||||
|
||||
if not images:
|
||||
continue
|
||||
|
||||
base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
|
||||
"?did={}&cid=".format(post["author"]["did"]))
|
||||
post["num"] = 0
|
||||
|
||||
for file in images:
|
||||
post["num"] += 1
|
||||
post["description"] = file["alt"]
|
||||
@@ -59,11 +70,11 @@ class BlueskyExtractor(Extractor):
|
||||
except KeyError:
|
||||
post["width"] = post["height"] = 0
|
||||
|
||||
url = file["fullsize"]
|
||||
name = url.rpartition("/")[2]
|
||||
post["filename"], _, post["extension"] = name.rpartition("@")
|
||||
image = file["image"]
|
||||
post["filename"] = link = image["ref"]["$link"]
|
||||
post["extension"] = image["mimeType"].rpartition("/")[2]
|
||||
|
||||
yield Message.Url, url, post
|
||||
yield Message.Url, base + link, post
|
||||
|
||||
def posts(self):
|
||||
return ()
|
||||
|
||||
@@ -12,8 +12,8 @@ __tests__ = (
|
||||
"#url" : "https://bsky.app/profile/bsky.app/post/3kh5rarr3gn2n",
|
||||
"#category": ("", "bluesky", "post"),
|
||||
"#class" : bluesky.BlueskyPostExtractor,
|
||||
"#urls" : "https://cdn.bsky.app/img/feed_fullsize/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri@jpeg",
|
||||
"#sha1_content": "c36a27d135277dc08b7bfd289e4078af7b32c720",
|
||||
"#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri",
|
||||
"#sha1_content": "ffcf25e7c511173a12de5276b85903309fcd8d14",
|
||||
|
||||
"author": {
|
||||
"avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreihagr2cmvl2jt4mgx3sppwe2it3fwolkrbtjrhcnwjk4jdijhsoze@jpeg",
|
||||
@@ -24,6 +24,7 @@ __tests__ = (
|
||||
},
|
||||
"cid" : "bafyreihh7m6bfrwlcjfklwturmja7qfse5gte7lskpmgw76flivimbnoqm",
|
||||
"count" : 1,
|
||||
"createdAt" : "2023-12-22T18:58:32.715Z",
|
||||
"date" : "dt:2023-12-22 18:58:32",
|
||||
"description": "The bluesky logo with the blue butterfly",
|
||||
"extension" : "jpeg",
|
||||
|
||||
Reference in New Issue
Block a user