diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a395c89b..9e37f407 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -994,7 +994,7 @@ Consider all listed sites to potentially be NSFW. Twitter https://x.com/ - Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, Highlights, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles + Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, Highlights, Home Feed, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles Supported diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index e3952f88..00bd0c33 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -640,9 +640,23 @@ class TwitterExtractor(Extractor): return self.cookies_update(_login_impl(self, username, password)) +class TwitterHomeExtractor(TwitterExtractor): + """Extractor for Twitter home timelines""" + subcategory = "home" + pattern = (BASE_PATTERN + + r"/(?:home(?:/fo(?:llowing|r[-_ ]?you()))?|i/timeline)/?$") + example = "https://x.com/home" + + def tweets(self): + if self.groups[0] is None: + return self.api.home_latest_timeline() + return self.api.home_timeline() + + class TwitterUserExtractor(Dispatch, TwitterExtractor): """Extractor for a Twitter user""" - pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])" + pattern = (BASE_PATTERN + r"/(?!search\b|home\b|i/timeline)(?:" + r"([^/?#]+)/?(?:$|\?|#)" r"|i(?:/user/|ntent/user\?user_id=)(\d+))") example = "https://x.com/USER" @@ -668,7 +682,8 @@ class TwitterUserExtractor(Dispatch, TwitterExtractor): class TwitterTimelineExtractor(TwitterExtractor): """Extractor for a Twitter user timeline""" subcategory = "timeline" - pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)" + pattern = (BASE_PATTERN + + r"/(?!search\b|home\b|i\b)([^/?#]+)/timeline(?!\w)") example = "https://x.com/USER/timeline" def _init_cursor(self): @@ -1484,6 +1499,27 @@ class TwitterAPI(): endpoint, variables, ("viewer", "communities_timeline", "timeline")) + def home_timeline(self): + endpoint = "/graphql/DXmgQYmIft1oLP6vMkJixw/HomeTimeline" + variables = { + "count": 100, + "includePromotedContent": False, + "latestControlAvailable": True, + "withCommunity": True, + } + return self._pagination_tweets( + endpoint, variables, ("home", "home_timeline_urt")) + + def home_latest_timeline(self): + endpoint = "/graphql/SFxmNKWfN9ySJcXG_tjX8g/HomeLatestTimeline" + variables = { + "count": 100, + "includePromotedContent": False, + "latestControlAvailable": True, + } + return self._pagination_tweets( + endpoint, variables, ("home", "home_timeline_urt")) + def live_event_timeline(self, event_id): endpoint = f"/2/live_event/timeline/{event_id}.json" params = self.params.copy() diff --git a/test/results/twitter.py b/test/results/twitter.py index 38f7789f..3fca6515 100644 --- a/test/results/twitter.py +++ b/test/results/twitter.py @@ -777,4 +777,24 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi "#class" : twitter.TwitterHighlightsExtractor, }, +{ + "#url" : "https://x.com/home", + "#class" : twitter.TwitterHomeExtractor, +}, + +{ + "#url" : "https://x.com/home/for_you", + "#class" : twitter.TwitterHomeExtractor, +}, + +{ + "#url" : "https://x.com/home/following", + "#class" : twitter.TwitterHomeExtractor, +}, + +{ + "#url" : "https://x.com/i/timeline", + "#class" : twitter.TwitterHomeExtractor, +}, + )