[twitter] add 'highlights' extractor (#7826)
This commit is contained in:
@@ -5422,14 +5422,16 @@ Description
|
||||
when processing a user profile.
|
||||
|
||||
Possible values are
|
||||
``"info"``,
|
||||
``"avatar"``,
|
||||
``"background"``,
|
||||
``"timeline"``,
|
||||
``"tweets"``,
|
||||
``"media"``,
|
||||
``"replies"``,
|
||||
``"likes"``.
|
||||
|
||||
* ``"info"``
|
||||
* ``"avatar"``
|
||||
* ``"background"``
|
||||
* ``"timeline"``
|
||||
* ``"tweets"``
|
||||
* ``"media"``
|
||||
* ``"replies"``
|
||||
* ``"highlights"``
|
||||
* ``"likes"``
|
||||
|
||||
It is possible to use ``"all"`` instead of listing all values separately.
|
||||
|
||||
|
||||
@@ -994,7 +994,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr id="twitter" title="twitter">
|
||||
<td>Twitter</td>
|
||||
<td>https://x.com/</td>
|
||||
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
|
||||
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, Highlights, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr id="unsplash" title="unsplash">
|
||||
|
||||
@@ -649,18 +649,19 @@ class TwitterUserExtractor(Dispatch, TwitterExtractor):
|
||||
def items(self):
|
||||
user, user_id = self.groups
|
||||
if user_id is not None:
|
||||
user = "id:" + user_id
|
||||
user = f"id:{user_id}"
|
||||
|
||||
base = f"{self.root}/{user}/"
|
||||
return self._dispatch_extractors((
|
||||
(TwitterInfoExtractor , base + "info"),
|
||||
(TwitterAvatarExtractor , base + "photo"),
|
||||
(TwitterBackgroundExtractor, base + "header_photo"),
|
||||
(TwitterTimelineExtractor , base + "timeline"),
|
||||
(TwitterTweetsExtractor , base + "tweets"),
|
||||
(TwitterMediaExtractor , base + "media"),
|
||||
(TwitterRepliesExtractor , base + "with_replies"),
|
||||
(TwitterLikesExtractor , base + "likes"),
|
||||
(TwitterInfoExtractor , f"{base}info"),
|
||||
(TwitterAvatarExtractor , f"{base}photo"),
|
||||
(TwitterBackgroundExtractor, f"{base}header_photo"),
|
||||
(TwitterTimelineExtractor , f"{base}timeline"),
|
||||
(TwitterTweetsExtractor , f"{base}tweets"),
|
||||
(TwitterMediaExtractor , f"{base}media"),
|
||||
(TwitterRepliesExtractor , f"{base}with_replies"),
|
||||
(TwitterHighlightsExtractor, f"{base}highlights"),
|
||||
(TwitterLikesExtractor , f"{base}likes"),
|
||||
), ("timeline",))
|
||||
|
||||
|
||||
@@ -781,6 +782,16 @@ class TwitterRepliesExtractor(TwitterExtractor):
|
||||
return self.api.user_tweets_and_replies(self.user)
|
||||
|
||||
|
||||
class TwitterHighlightsExtractor(TwitterExtractor):
|
||||
"""Extractor for Tweets from a user's highlights timeline"""
|
||||
subcategory = "highlights"
|
||||
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/highlights(?!\w)"
|
||||
example = "https://x.com/USER/highlights"
|
||||
|
||||
def tweets(self):
|
||||
return self.api.user_highlights(self.user)
|
||||
|
||||
|
||||
class TwitterMediaExtractor(TwitterExtractor):
|
||||
"""Extractor for Tweets from a user's Media timeline"""
|
||||
subcategory = "media"
|
||||
@@ -1354,6 +1365,20 @@ class TwitterAPI():
|
||||
return self._pagination_tweets(
|
||||
endpoint, variables, field_toggles=field_toggles)
|
||||
|
||||
def user_highlights(self, screen_name):
|
||||
endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
|
||||
variables = {
|
||||
"userId": self._user_id_by_screen_name(screen_name),
|
||||
"count": 100,
|
||||
"includePromotedContent": False,
|
||||
"withVoice": True,
|
||||
}
|
||||
field_toggles = {
|
||||
"withArticlePlainText": False,
|
||||
}
|
||||
return self._pagination_tweets(
|
||||
endpoint, variables, field_toggles=field_toggles)
|
||||
|
||||
def user_media(self, screen_name):
|
||||
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
|
||||
variables = {
|
||||
|
||||
@@ -772,4 +772,9 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
|
||||
"#class" : twitter.TwitterImageExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://x.com/tetsuoai/highlights",
|
||||
"#class" : twitter.TwitterHighlightsExtractor,
|
||||
},
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user