diff --git a/docs/configuration.rst b/docs/configuration.rst index 1835ac8e..e08ad53e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1489,6 +1489,16 @@ Description (See `depth` parameter of `app.bsky.feed.getPostThread `__) +extractor.bluesky.quoted +------------------------ +Type + ``bool`` +Default + ``false`` +Description + Fetch media from quoted posts. + + extractor.bluesky.reposts ------------------------- Type diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index 39c56359..a1a488ee 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -42,62 +42,76 @@ class BlueskyExtractor(Extractor): self._user = self._user_did = None self.instance = self.root.partition("://")[2] self.videos = self.config("videos", True) + self.quoted = self.config("quoted", False) def items(self): for post in self.posts(): if "post" in post: post = post["post"] - - pid = post["uri"].rpartition("/")[2] if self._user_did and post["author"]["did"] != self._user_did: - self.log.debug("Skipping %s (repost)", pid) + self.log.debug("Skipping %s (repost)", self._pid(post)) continue + embed = post.get("embed") + post.update(post.pop("record")) - post.update(post["record"]) - del post["record"] + while True: + self._prepare(post) + files = self._extract_files(post) - if self._metadata_facets: - if "facets" in post: - post["hashtags"] = tags = [] - post["mentions"] = dids = [] - post["uris"] = uris = [] - for facet in post["facets"]: - features = facet["features"][0] - if "tag" in features: - tags.append(features["tag"]) - elif "did" in features: - dids.append(features["did"]) - elif "uri" in features: - uris.append(features["uri"]) - else: - post["hashtags"] = post["mentions"] = post["uris"] = () + yield Message.Directory, post + if files: + base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" + "?did={}&cid=".format(post["author"]["did"])) + for post["num"], file in enumerate(files, 1): + post.update(file) + yield Message.Url, base + file["filename"], post - if self._metadata_user: - post["user"] = self._user or post["author"] + if not self.quoted or not embed or "record" not in embed: + break - files = self._extract_files(post) - post["instance"] = self.instance - post["post_id"] = pid - post["count"] = len(files) - post["date"] = text.parse_datetime( - post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") - - yield Message.Directory, post - - if not files: - continue - - base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" - "?did={}&cid=".format(post["author"]["did"])) - for post["num"], file in enumerate(files, 1): - post.update(file) - yield Message.Url, base + file["filename"], post + quote = embed["record"] + if "record" in quote: + quote = quote["record"] + quote["quote_id"] = self._pid(post) + quote["quote_by"] = post["author"] + embed = quote.get("embed") + quote.update(quote.pop("value")) + post = quote def posts(self): return () + def _pid(self, post): + return post["uri"].rpartition("/")[2] + + def _prepare(self, post): + if self._metadata_facets: + if "facets" in post: + post["hashtags"] = tags = [] + post["mentions"] = dids = [] + post["uris"] = uris = [] + for facet in post["facets"]: + features = facet["features"][0] + if "tag" in features: + tags.append(features["tag"]) + elif "did" in features: + dids.append(features["did"]) + elif "uri" in features: + uris.append(features["uri"]) + else: + post["hashtags"] = post["mentions"] = post["uris"] = () + + if self._metadata_user: + post["user"] = self._user or post["author"] + + post["instance"] = self.instance + post["post_id"] = self._pid(post) + post["date"] = text.parse_datetime( + post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") + def _extract_files(self, post): if "embed" not in post: + post["count"] = 0 return () files = [] @@ -111,6 +125,7 @@ class BlueskyExtractor(Extractor): if "video" in media and self.videos: files.append(self._extract_media(media, "video")) + post["count"] = len(files) return files def _extract_media(self, media, key): diff --git a/test/results/bluesky.py b/test/results/bluesky.py index a30e4018..3831aca7 100644 --- a/test/results/bluesky.py +++ b/test/results/bluesky.py @@ -227,4 +227,46 @@ __tests__ = ( "extension" : "mp4", }, +{ + "#url" : "https://bsky.app/profile/mikf.bsky.social/post/3kmfodjotln2f", + "#comment" : "quote (#6183)", + "#class" : bluesky.BlueskyPostExtractor, + "#options" : {"quoted": True}, + "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreib6eb7tfozksquveaj3z5msyx3hkniubrulxdys3eftthvmuzrtme", + + "author": { + "associated" : dict, + "avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:eyhmjdxsnthqhvvszdejaocz/bafkreigjrftlw7tabtpie32saydttpnoi7276v252vnycr6zt6euef7vdi@jpeg", + "createdAt" : "2024-01-11T00:27:37.404Z", + "did" : "did:plc:eyhmjdxsnthqhvvszdejaocz", + "displayName": "フナ", + "handle" : "ykfuna.bsky.social", + "labels" : list, + }, + "quote_by": { + "avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:cslxjqkeexku6elp5xowxkq7/bafkreic5jqkn5ohqhgsm6zzi7vnapuz54trojv3io4tfkrcyaprl4b2ztm@jpeg", + "createdAt" : "2024-02-05T00:03:54.087Z", + "did" : "did:plc:cslxjqkeexku6elp5xowxkq7", + "displayName": "mikf", + "handle" : "mikf.bsky.social", + "labels" : list, + }, + "quote_id": "3kmfodjotln2f", + "post_id" : "3km4qy5y3jc2z", +}, + +{ + "#url" : "https://bsky.app/profile/mikf.bsky.social/post/3kmfp2qktil25", + "#comment" : "quote with media (#6183)", + "#class" : bluesky.BlueskyPostExtractor, + "#options" : {"quoted": True}, + "#urls" : ( + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreiegcyremdrecmnpisci3a3nduc7lm3zdcl76z5o5rd4nstyolrxki", + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:eyhmjdxsnthqhvvszdejaocz&cid=bafkreicojrnwiw5eqo3ko2q6duduyjaoyiqvdc25kuikcedlijtbgvlt5e", + + ), + + "text" : {"quote with media", ""}, +}, + )