From 40c0553523bb28790de0e6a07a978a42e2be88c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 7 Mar 2024 00:52:50 +0100 Subject: [PATCH] [twitter] add 'quotes' extractor (#5262) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/mikf/gallery-dl/issues/5262#issuecomment-1981571924 It's implemented as a search for 'quoted_tweet_id:…' on Twitter. --- docs/supportedsites.md | 2 +- gallery_dl/extractor/twitter.py | 16 ++++++++++++++-- test/results/twitter.py | 10 +++++++++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1bbfa12e..b004d7dc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -898,7 +898,7 @@ Consider all listed sites to potentially be NSFW. Twitter https://twitter.com/ - Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles + Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles Supported diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index ad5bfc62..e6bf9b0f 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -731,9 +731,9 @@ class TwitterEventExtractor(TwitterExtractor): class TwitterTweetExtractor(TwitterExtractor): - """Extractor for images from individual tweets""" + """Extractor for individual tweets""" subcategory = "tweet" - pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)/?$" example = "https://twitter.com/USER/status/12345" def __init__(self, match): @@ -810,6 +810,18 @@ class TwitterTweetExtractor(TwitterExtractor): return itertools.chain(buffer, tweets) +class TwitterQuotesExtractor(TwitterExtractor): + """Extractor for quotes of a Tweet""" + subcategory = "quotes" + pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes" + example = "https://twitter.com/USER/status/12345/quotes" + + def items(self): + url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user) + data = {"_extractor": TwitterSearchExtractor} + yield Message.Queue, url, data + + class TwitterAvatarExtractor(TwitterExtractor): subcategory = "avatar" filename_fmt = "avatar {date}.{extension}" diff --git a/test/results/twitter.py b/test/results/twitter.py index 5150a11a..f7fd8dba 100644 --- a/test/results/twitter.py +++ b/test/results/twitter.py @@ -218,7 +218,7 @@ __tests__ = ( "#category": ("", "twitter", "hashtag"), "#class" : twitter.TwitterHashtagExtractor, "#pattern" : twitter.TwitterSearchExtractor.pattern, - "#sha1_url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9", + "#urls" : "https://twitter.com/search?q=%23nature", }, { @@ -537,6 +537,14 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi "The analysis by Texas A&M University seems to contradict statements by state and federal regulators that air near the crash site is completely safe, despite residents complaining about rashes, breathing problems and other health effects." Your reaction.""", }, +{ + "#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes", + "#category": ("", "twitter", "quotes"), + "#class" : twitter.TwitterQuotesExtractor, + "#pattern" : twitter.TwitterSearchExtractor.pattern, + "#urls" : "https://twitter.com/search?q=quoted_tweet_id:1263832915173048321", +}, + { "#url" : "https://twitter.com/supernaturepics/photo", "#category": ("", "twitter", "avatar"),