From e9dd2eff1d89f3227330d123a1613813e5e5f4cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 19 Aug 2018 20:36:33 +0200 Subject: [PATCH] [twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/smugmug.py | 2 +- gallery_dl/extractor/twitter.py | 69 ++++++++++++++++++++------------- scripts/build_supportedsites.py | 3 ++ 4 files changed, 46 insertions(+), 30 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 0bb69310..2761138c 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -76,7 +76,7 @@ SmugMug https://www.smugmug.com/ |Albums, individ-5| Subapics https://subapics.com/ Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) -Twitter https://twitter.com/ Timelines, Tweets +Twitter https://twitter.com/ Timelines, Tweets, Media Tweets Warosu https://warosu.org/ Threads World Three http://www.slide.world-three.org/ Chapters, Manga XVideos https://www.xvideos.com/ Images from Users, Galleries diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index dc919f37..742f7d95 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -81,7 +81,7 @@ class SmugmugImageExtractor(SmugmugExtractor): pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] test = [("https://acapella.smugmug.com/Micro-Macro/Drops/i-g2Dmf9z", { "url": "ab0d7aa001a53ff3fd228622070b39005b6fc179", - "keyword": "4fcc02599d180321b22a7f7238102c48d5410c05", + "keyword": "a116167929c22338e6067b81c5d3bee641df3af3", "content": "64a8f69a1d824921eebbdf2420087937adfa45cd", })] diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index bb256566..6343b8b6 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -20,10 +20,10 @@ class TwitterExtractor(Extractor): archive_fmt = "{tweet_id}_{retweet_id}_{num}" root = "https://twitter.com" - def __init__(self): + def __init__(self, match): Extractor.__init__(self) - self.user = None - self.retweets = True + self.user = match.group(1) + self.retweets = self.config("retweets", True) def items(self): yield Message.Version, 1 @@ -45,9 +45,11 @@ class TwitterExtractor(Extractor): def metadata(self): """Return general metadata""" + return {"user": self.user} def tweets(self): """Yield HTML content of all relevant tweets""" + return () @staticmethod def _data_from_tweet(tweet): @@ -64,29 +66,7 @@ class TwitterExtractor(Extractor): data["retweeter"] = data["retweeter"] or "" return data - -class TwitterTimelineExtractor(TwitterExtractor): - """Extractor for all tweeted images from a user's timeline""" - subcategory = "timeline" - pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/([^/?&#]+)/?$"] - test = [("https://twitter.com/PicturesEarth", { - "range": (1, 40), - "url": "2f4d51cbba81e56c1c755677b3ad58fc167c9771", - "keyword": "cbae53b6f4ba133078bb13c95dbd3cbb4fa40b9f", - })] - - def __init__(self, match): - TwitterExtractor.__init__(self) - self.user = match.group(1) - self.retweets = self.config("retweets", True) - - def metadata(self): - return {"user": self.user} - - def tweets(self): - url = "{}/i/profiles/show/{}/timeline/tweets".format( - self.root, self.user) + def _tweets_from_api(self, url): params = { "include_available_features": "1", "include_entities": "1", @@ -112,6 +92,39 @@ class TwitterTimelineExtractor(TwitterExtractor): tweet, 'data-tweet-id="', '"')[0] +class TwitterTimelineExtractor(TwitterExtractor): + """Extractor for all images from a user's timeline""" + subcategory = "timeline" + pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" + r"/([^/?&#]+)/?$"] + test = [("https://twitter.com/PicturesEarth", { + "range": (1, 40), + "url": "2f4d51cbba81e56c1c755677b3ad58fc167c9771", + "keyword": "cbae53b6f4ba133078bb13c95dbd3cbb4fa40b9f", + })] + + def tweets(self): + url = "{}/i/profiles/show/{}/timeline/tweets".format( + self.root, self.user) + return self._tweets_from_api(url) + + +class TwitterMediaExtractor(TwitterExtractor): + """Extractor for all images from a user's Media Tweets""" + subcategory = "media" + pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" + r"/([^/?&#]+)/media(?!\w)"] + test = [("https://twitter.com/PicturesEarth/media", { + "range": (1, 40), + "url": "2f4d51cbba81e56c1c755677b3ad58fc167c9771", + })] + + def tweets(self): + url = "{}/i/profiles/show/{}/media_timeline".format( + self.root, self.user) + return self._tweets_from_api(url) + + class TwitterTweetExtractor(TwitterExtractor): """Extractor for images from individual tweets""" subcategory = "tweet" @@ -130,8 +143,8 @@ class TwitterTweetExtractor(TwitterExtractor): ] def __init__(self, match): - TwitterExtractor.__init__(self) - self.user, self.tweet_id = match.groups() + TwitterExtractor.__init__(self, match) + self.tweet_id = match.group(2) def metadata(self): return {"user": self.user, "tweet_id": self.tweet_id} diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 6b7f0f56..b2691564 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -68,6 +68,7 @@ SUBCATEGORY_MAP = { "issue" : "Comic-Issues", "manga" : "Manga", "me" : "pixiv.me Links", + "media" : "Media Tweets", "path" : "Images from Users and Folders", "pinit" : "pin.it Links", "popular": "Popular Images", @@ -226,6 +227,8 @@ def category_key(extrlist): def subcategory_key(cls): if cls.subcategory in ("user", "issue"): return "A" + if cls.subcategory in ("media",): + return "z" return cls.subcategory