From 55bbd49a0eccbf207c6833983d3a2a0ff6f73287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 9 Feb 2024 21:33:33 +0100 Subject: [PATCH] [bluesky] download images in original resolution (#4438) at least up to 2000 px --- gallery_dl/extractor/bluesky.py | 31 +++++++++++++++++++++---------- test/results/bluesky.py | 5 +++-- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index 3e540936..bd183bd5 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -20,7 +20,7 @@ class BlueskyExtractor(Extractor): """Base class for bluesky extractors""" category = "bluesky" directory_fmt = ("{category}", "{author[handle]}") - filename_fmt = "{indexedAt[:19]}_{post_id}_{num}.{extension}" + filename_fmt = "{createdAt[:19]}_{post_id}_{num}.{extension}" archive_fmt = "{filename}" root = "https://bsky.app" @@ -34,20 +34,31 @@ class BlueskyExtractor(Extractor): def items(self): for post in self.posts(): post = post["post"] + post.update(post["record"]) + del post["record"] - try: - images = post["embed"]["images"] - except KeyError: - images = () + images = () + if "embed" in post: + media = post["embed"] + if "media" in media: + media = media["media"] + if "images" in media: + images = media["images"] post["post_id"] = post["uri"].rpartition("/")[2] post["count"] = len(images) post["date"] = text.parse_datetime( - post["indexedAt"][:19], "%Y-%m-%dT%H:%M:%S") + post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") yield Message.Directory, post + if not images: + continue + + base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" + "?did={}&cid=".format(post["author"]["did"])) post["num"] = 0 + for file in images: post["num"] += 1 post["description"] = file["alt"] @@ -59,11 +70,11 @@ class BlueskyExtractor(Extractor): except KeyError: post["width"] = post["height"] = 0 - url = file["fullsize"] - name = url.rpartition("/")[2] - post["filename"], _, post["extension"] = name.rpartition("@") + image = file["image"] + post["filename"] = link = image["ref"]["$link"] + post["extension"] = image["mimeType"].rpartition("/")[2] - yield Message.Url, url, post + yield Message.Url, base + link, post def posts(self): return () diff --git a/test/results/bluesky.py b/test/results/bluesky.py index fe531490..7b9278ab 100644 --- a/test/results/bluesky.py +++ b/test/results/bluesky.py @@ -12,8 +12,8 @@ __tests__ = ( "#url" : "https://bsky.app/profile/bsky.app/post/3kh5rarr3gn2n", "#category": ("", "bluesky", "post"), "#class" : bluesky.BlueskyPostExtractor, - "#urls" : "https://cdn.bsky.app/img/feed_fullsize/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri@jpeg", - "#sha1_content": "c36a27d135277dc08b7bfd289e4078af7b32c720", + "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri", + "#sha1_content": "ffcf25e7c511173a12de5276b85903309fcd8d14", "author": { "avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreihagr2cmvl2jt4mgx3sppwe2it3fwolkrbtjrhcnwjk4jdijhsoze@jpeg", @@ -24,6 +24,7 @@ __tests__ = ( }, "cid" : "bafyreihh7m6bfrwlcjfklwturmja7qfse5gte7lskpmgw76flivimbnoqm", "count" : 1, + "createdAt" : "2023-12-22T18:58:32.715Z", "date" : "dt:2023-12-22 18:58:32", "description": "The bluesky logo with the blue butterfly", "extension" : "jpeg",