[twitter] add 'highlights' extractor (#7826)

This commit is contained in:
Mike Fährmann
2025-08-19 09:08:44 +02:00
parent 8252980264
commit 47150f3e8a
4 changed files with 50 additions and 18 deletions

View File

@@ -5422,14 +5422,16 @@ Description
when processing a user profile.
Possible values are
``"info"``,
``"avatar"``,
``"background"``,
``"timeline"``,
``"tweets"``,
``"media"``,
``"replies"``,
``"likes"``.
* ``"info"``
* ``"avatar"``
* ``"background"``
* ``"timeline"``
* ``"tweets"``
* ``"media"``
* ``"replies"``
* ``"highlights"``
* ``"likes"``
It is possible to use ``"all"`` instead of listing all values separately.

View File

@@ -994,7 +994,7 @@ Consider all listed sites to potentially be NSFW.
<tr id="twitter" title="twitter">
<td>Twitter</td>
<td>https://x.com/</td>
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, Highlights, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
<td>Supported</td>
</tr>
<tr id="unsplash" title="unsplash">

View File

@@ -649,18 +649,19 @@ class TwitterUserExtractor(Dispatch, TwitterExtractor):
def items(self):
user, user_id = self.groups
if user_id is not None:
user = "id:" + user_id
user = f"id:{user_id}"
base = f"{self.root}/{user}/"
return self._dispatch_extractors((
(TwitterInfoExtractor , base + "info"),
(TwitterAvatarExtractor , base + "photo"),
(TwitterBackgroundExtractor, base + "header_photo"),
(TwitterTimelineExtractor , base + "timeline"),
(TwitterTweetsExtractor , base + "tweets"),
(TwitterMediaExtractor , base + "media"),
(TwitterRepliesExtractor , base + "with_replies"),
(TwitterLikesExtractor , base + "likes"),
(TwitterInfoExtractor , f"{base}info"),
(TwitterAvatarExtractor , f"{base}photo"),
(TwitterBackgroundExtractor, f"{base}header_photo"),
(TwitterTimelineExtractor , f"{base}timeline"),
(TwitterTweetsExtractor , f"{base}tweets"),
(TwitterMediaExtractor , f"{base}media"),
(TwitterRepliesExtractor , f"{base}with_replies"),
(TwitterHighlightsExtractor, f"{base}highlights"),
(TwitterLikesExtractor , f"{base}likes"),
), ("timeline",))
@@ -781,6 +782,16 @@ class TwitterRepliesExtractor(TwitterExtractor):
return self.api.user_tweets_and_replies(self.user)
class TwitterHighlightsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's highlights timeline"""
subcategory = "highlights"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/highlights(?!\w)"
example = "https://x.com/USER/highlights"
def tweets(self):
return self.api.user_highlights(self.user)
class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
@@ -1354,6 +1365,20 @@ class TwitterAPI():
return self._pagination_tweets(
endpoint, variables, field_toggles=field_toggles)
def user_highlights(self, screen_name):
endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
"withVoice": True,
}
field_toggles = {
"withArticlePlainText": False,
}
return self._pagination_tweets(
endpoint, variables, field_toggles=field_toggles)
def user_media(self, screen_name):
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
variables = {

View File

@@ -772,4 +772,9 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
"#class" : twitter.TwitterImageExtractor,
},
{
"#url" : "https://x.com/tetsuoai/highlights",
"#class" : twitter.TwitterHighlightsExtractor,
},
)