[twitter] add 'highlights' extractor (#7826)

This commit is contained in:
Mike Fährmann
2025-08-19 09:08:44 +02:00
parent 8252980264
commit 47150f3e8a
4 changed files with 50 additions and 18 deletions

View File

@@ -5422,14 +5422,16 @@ Description
when processing a user profile. when processing a user profile.
Possible values are Possible values are
``"info"``,
``"avatar"``, * ``"info"``
``"background"``, * ``"avatar"``
``"timeline"``, * ``"background"``
``"tweets"``, * ``"timeline"``
``"media"``, * ``"tweets"``
``"replies"``, * ``"media"``
``"likes"``. * ``"replies"``
* ``"highlights"``
* ``"likes"``
It is possible to use ``"all"`` instead of listing all values separately. It is possible to use ``"all"`` instead of listing all values separately.

View File

@@ -994,7 +994,7 @@ Consider all listed sites to potentially be NSFW.
<tr id="twitter" title="twitter"> <tr id="twitter" title="twitter">
<td>Twitter</td> <td>Twitter</td>
<td>https://x.com/</td> <td>https://x.com/</td>
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td> <td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, Highlights, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
<td>Supported</td> <td>Supported</td>
</tr> </tr>
<tr id="unsplash" title="unsplash"> <tr id="unsplash" title="unsplash">

View File

@@ -649,18 +649,19 @@ class TwitterUserExtractor(Dispatch, TwitterExtractor):
def items(self): def items(self):
user, user_id = self.groups user, user_id = self.groups
if user_id is not None: if user_id is not None:
user = "id:" + user_id user = f"id:{user_id}"
base = f"{self.root}/{user}/" base = f"{self.root}/{user}/"
return self._dispatch_extractors(( return self._dispatch_extractors((
(TwitterInfoExtractor , base + "info"), (TwitterInfoExtractor , f"{base}info"),
(TwitterAvatarExtractor , base + "photo"), (TwitterAvatarExtractor , f"{base}photo"),
(TwitterBackgroundExtractor, base + "header_photo"), (TwitterBackgroundExtractor, f"{base}header_photo"),
(TwitterTimelineExtractor , base + "timeline"), (TwitterTimelineExtractor , f"{base}timeline"),
(TwitterTweetsExtractor , base + "tweets"), (TwitterTweetsExtractor , f"{base}tweets"),
(TwitterMediaExtractor , base + "media"), (TwitterMediaExtractor , f"{base}media"),
(TwitterRepliesExtractor , base + "with_replies"), (TwitterRepliesExtractor , f"{base}with_replies"),
(TwitterLikesExtractor , base + "likes"), (TwitterHighlightsExtractor, f"{base}highlights"),
(TwitterLikesExtractor , f"{base}likes"),
), ("timeline",)) ), ("timeline",))
@@ -781,6 +782,16 @@ class TwitterRepliesExtractor(TwitterExtractor):
return self.api.user_tweets_and_replies(self.user) return self.api.user_tweets_and_replies(self.user)
class TwitterHighlightsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's highlights timeline"""
subcategory = "highlights"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/highlights(?!\w)"
example = "https://x.com/USER/highlights"
def tweets(self):
return self.api.user_highlights(self.user)
class TwitterMediaExtractor(TwitterExtractor): class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Media timeline""" """Extractor for Tweets from a user's Media timeline"""
subcategory = "media" subcategory = "media"
@@ -1354,6 +1365,20 @@ class TwitterAPI():
return self._pagination_tweets( return self._pagination_tweets(
endpoint, variables, field_toggles=field_toggles) endpoint, variables, field_toggles=field_toggles)
def user_highlights(self, screen_name):
endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
"withVoice": True,
}
field_toggles = {
"withArticlePlainText": False,
}
return self._pagination_tweets(
endpoint, variables, field_toggles=field_toggles)
def user_media(self, screen_name): def user_media(self, screen_name):
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia" endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
variables = { variables = {

View File

@@ -772,4 +772,9 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
"#class" : twitter.TwitterImageExtractor, "#class" : twitter.TwitterImageExtractor,
}, },
{
"#url" : "https://x.com/tetsuoai/highlights",
"#class" : twitter.TwitterHighlightsExtractor,
},
) )