[twitter] add 'highlights' extractor (#7826)

This commit is contained in:
Mike Fährmann
2025-08-19 09:08:44 +02:00
parent 8252980264
commit 47150f3e8a
4 changed files with 50 additions and 18 deletions

View File

@@ -649,18 +649,19 @@ class TwitterUserExtractor(Dispatch, TwitterExtractor):
def items(self):
user, user_id = self.groups
if user_id is not None:
user = "id:" + user_id
user = f"id:{user_id}"
base = f"{self.root}/{user}/"
return self._dispatch_extractors((
(TwitterInfoExtractor , base + "info"),
(TwitterAvatarExtractor , base + "photo"),
(TwitterBackgroundExtractor, base + "header_photo"),
(TwitterTimelineExtractor , base + "timeline"),
(TwitterTweetsExtractor , base + "tweets"),
(TwitterMediaExtractor , base + "media"),
(TwitterRepliesExtractor , base + "with_replies"),
(TwitterLikesExtractor , base + "likes"),
(TwitterInfoExtractor , f"{base}info"),
(TwitterAvatarExtractor , f"{base}photo"),
(TwitterBackgroundExtractor, f"{base}header_photo"),
(TwitterTimelineExtractor , f"{base}timeline"),
(TwitterTweetsExtractor , f"{base}tweets"),
(TwitterMediaExtractor , f"{base}media"),
(TwitterRepliesExtractor , f"{base}with_replies"),
(TwitterHighlightsExtractor, f"{base}highlights"),
(TwitterLikesExtractor , f"{base}likes"),
), ("timeline",))
@@ -781,6 +782,16 @@ class TwitterRepliesExtractor(TwitterExtractor):
return self.api.user_tweets_and_replies(self.user)
class TwitterHighlightsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's highlights timeline"""
subcategory = "highlights"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/highlights(?!\w)"
example = "https://x.com/USER/highlights"
def tweets(self):
return self.api.user_highlights(self.user)
class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
@@ -1354,6 +1365,20 @@ class TwitterAPI():
return self._pagination_tweets(
endpoint, variables, field_toggles=field_toggles)
def user_highlights(self, screen_name):
endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
"withVoice": True,
}
field_toggles = {
"withArticlePlainText": False,
}
return self._pagination_tweets(
endpoint, variables, field_toggles=field_toggles)
def user_media(self, screen_name):
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
variables = {