[twitter] add 'highlights' extractor (#7826)
This commit is contained in:
@@ -5422,14 +5422,16 @@ Description
|
|||||||
when processing a user profile.
|
when processing a user profile.
|
||||||
|
|
||||||
Possible values are
|
Possible values are
|
||||||
``"info"``,
|
|
||||||
``"avatar"``,
|
* ``"info"``
|
||||||
``"background"``,
|
* ``"avatar"``
|
||||||
``"timeline"``,
|
* ``"background"``
|
||||||
``"tweets"``,
|
* ``"timeline"``
|
||||||
``"media"``,
|
* ``"tweets"``
|
||||||
``"replies"``,
|
* ``"media"``
|
||||||
``"likes"``.
|
* ``"replies"``
|
||||||
|
* ``"highlights"``
|
||||||
|
* ``"likes"``
|
||||||
|
|
||||||
It is possible to use ``"all"`` instead of listing all values separately.
|
It is possible to use ``"all"`` instead of listing all values separately.
|
||||||
|
|
||||||
|
|||||||
@@ -994,7 +994,7 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr id="twitter" title="twitter">
|
<tr id="twitter" title="twitter">
|
||||||
<td>Twitter</td>
|
<td>Twitter</td>
|
||||||
<td>https://x.com/</td>
|
<td>https://x.com/</td>
|
||||||
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
|
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followers, Followed Users, Hashtags, Highlights, individual Images, User Profile Information, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
|
||||||
<td>Supported</td>
|
<td>Supported</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr id="unsplash" title="unsplash">
|
<tr id="unsplash" title="unsplash">
|
||||||
|
|||||||
@@ -649,18 +649,19 @@ class TwitterUserExtractor(Dispatch, TwitterExtractor):
|
|||||||
def items(self):
|
def items(self):
|
||||||
user, user_id = self.groups
|
user, user_id = self.groups
|
||||||
if user_id is not None:
|
if user_id is not None:
|
||||||
user = "id:" + user_id
|
user = f"id:{user_id}"
|
||||||
|
|
||||||
base = f"{self.root}/{user}/"
|
base = f"{self.root}/{user}/"
|
||||||
return self._dispatch_extractors((
|
return self._dispatch_extractors((
|
||||||
(TwitterInfoExtractor , base + "info"),
|
(TwitterInfoExtractor , f"{base}info"),
|
||||||
(TwitterAvatarExtractor , base + "photo"),
|
(TwitterAvatarExtractor , f"{base}photo"),
|
||||||
(TwitterBackgroundExtractor, base + "header_photo"),
|
(TwitterBackgroundExtractor, f"{base}header_photo"),
|
||||||
(TwitterTimelineExtractor , base + "timeline"),
|
(TwitterTimelineExtractor , f"{base}timeline"),
|
||||||
(TwitterTweetsExtractor , base + "tweets"),
|
(TwitterTweetsExtractor , f"{base}tweets"),
|
||||||
(TwitterMediaExtractor , base + "media"),
|
(TwitterMediaExtractor , f"{base}media"),
|
||||||
(TwitterRepliesExtractor , base + "with_replies"),
|
(TwitterRepliesExtractor , f"{base}with_replies"),
|
||||||
(TwitterLikesExtractor , base + "likes"),
|
(TwitterHighlightsExtractor, f"{base}highlights"),
|
||||||
|
(TwitterLikesExtractor , f"{base}likes"),
|
||||||
), ("timeline",))
|
), ("timeline",))
|
||||||
|
|
||||||
|
|
||||||
@@ -781,6 +782,16 @@ class TwitterRepliesExtractor(TwitterExtractor):
|
|||||||
return self.api.user_tweets_and_replies(self.user)
|
return self.api.user_tweets_and_replies(self.user)
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterHighlightsExtractor(TwitterExtractor):
|
||||||
|
"""Extractor for Tweets from a user's highlights timeline"""
|
||||||
|
subcategory = "highlights"
|
||||||
|
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/highlights(?!\w)"
|
||||||
|
example = "https://x.com/USER/highlights"
|
||||||
|
|
||||||
|
def tweets(self):
|
||||||
|
return self.api.user_highlights(self.user)
|
||||||
|
|
||||||
|
|
||||||
class TwitterMediaExtractor(TwitterExtractor):
|
class TwitterMediaExtractor(TwitterExtractor):
|
||||||
"""Extractor for Tweets from a user's Media timeline"""
|
"""Extractor for Tweets from a user's Media timeline"""
|
||||||
subcategory = "media"
|
subcategory = "media"
|
||||||
@@ -1354,6 +1365,20 @@ class TwitterAPI():
|
|||||||
return self._pagination_tweets(
|
return self._pagination_tweets(
|
||||||
endpoint, variables, field_toggles=field_toggles)
|
endpoint, variables, field_toggles=field_toggles)
|
||||||
|
|
||||||
|
def user_highlights(self, screen_name):
|
||||||
|
endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
|
||||||
|
variables = {
|
||||||
|
"userId": self._user_id_by_screen_name(screen_name),
|
||||||
|
"count": 100,
|
||||||
|
"includePromotedContent": False,
|
||||||
|
"withVoice": True,
|
||||||
|
}
|
||||||
|
field_toggles = {
|
||||||
|
"withArticlePlainText": False,
|
||||||
|
}
|
||||||
|
return self._pagination_tweets(
|
||||||
|
endpoint, variables, field_toggles=field_toggles)
|
||||||
|
|
||||||
def user_media(self, screen_name):
|
def user_media(self, screen_name):
|
||||||
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
|
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
|
||||||
variables = {
|
variables = {
|
||||||
|
|||||||
@@ -772,4 +772,9 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
|
|||||||
"#class" : twitter.TwitterImageExtractor,
|
"#class" : twitter.TwitterImageExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://x.com/tetsuoai/highlights",
|
||||||
|
"#class" : twitter.TwitterHighlightsExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user