[twitter] add 'bookmark' extractor (closes #625)
This commit is contained in:
@@ -117,7 +117,7 @@ SmugMug https://www.smugmug.com/ |smugmug-C|
|
|||||||
The /b/ Archive https://thebarchive.com/ Threads
|
The /b/ Archive https://thebarchive.com/ Threads
|
||||||
Tsumino https://www.tsumino.com/ Galleries, Search Results Optional
|
Tsumino https://www.tsumino.com/ Galleries, Search Results Optional
|
||||||
Tumblr https://www.tumblr.com/ Likes, Posts, Tag-Searches, User Profiles Optional (OAuth)
|
Tumblr https://www.tumblr.com/ Likes, Posts, Tag-Searches, User Profiles Optional (OAuth)
|
||||||
Twitter https://twitter.com/ Media Timelines, Search Results, Timelines, Tweets Optional
|
Twitter https://twitter.com/ |twitter-C| Optional
|
||||||
VSCO https://vsco.co/ Collections, individual Images, User Profiles
|
VSCO https://vsco.co/ Collections, individual Images, User Profiles
|
||||||
Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A|
|
Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A|
|
||||||
Warosu https://warosu.org/ Threads
|
Warosu https://warosu.org/ Threads
|
||||||
@@ -151,5 +151,6 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
|
|||||||
.. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, individual Images
|
.. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, individual Images
|
||||||
.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles
|
.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles
|
||||||
.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders
|
.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders
|
||||||
|
.. |twitter-C| replace:: Bookmarks, Media Timelines, Search Results, Timelines, Tweets
|
||||||
.. |wallhaven-A| replace:: Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__)
|
.. |wallhaven-A| replace:: Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__)
|
||||||
.. |yuki-S| replace:: yuki.la 4chan archive
|
.. |yuki-S| replace:: yuki.la 4chan archive
|
||||||
|
|||||||
@@ -406,6 +406,81 @@ class TwitterTweetExtractor(TwitterExtractor):
|
|||||||
return (page[beg:end],)
|
return (page[beg:end],)
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterBookmarkExtractor(TwitterExtractor):
|
||||||
|
"""Extractor for bookmarked tweets"""
|
||||||
|
subcategory = "bookmark"
|
||||||
|
pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
|
||||||
|
test = ("https://twitter.com/i/bookmarks",)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
self.login()
|
||||||
|
if not self.logged_in:
|
||||||
|
raise exception.AuthorizationError("Login required")
|
||||||
|
for cookie in self.session.cookies:
|
||||||
|
cookie.expires = None
|
||||||
|
|
||||||
|
url = "https://api.twitter.com/2/timeline/bookmark.json"
|
||||||
|
params = {
|
||||||
|
"include_profile_interstitial_type": "1",
|
||||||
|
"include_blocking": "1",
|
||||||
|
"include_blocked_by": "1",
|
||||||
|
"include_followed_by": "1",
|
||||||
|
"include_want_retweets": "1",
|
||||||
|
"include_mute_edge": "1",
|
||||||
|
"include_can_dm": "1",
|
||||||
|
"include_can_media_tag": "1",
|
||||||
|
"skip_status": "1",
|
||||||
|
"cards_platform": "Web-12",
|
||||||
|
"include_cards": "1",
|
||||||
|
"include_composer_source": "true",
|
||||||
|
"include_ext_alt_text": "true",
|
||||||
|
"include_reply_count": "1",
|
||||||
|
"tweet_mode": "extended",
|
||||||
|
"include_entities": "true",
|
||||||
|
"include_user_entities": "true",
|
||||||
|
"include_ext_media_color": "true",
|
||||||
|
"include_ext_media_availability": "true",
|
||||||
|
"send_error_codes": "true",
|
||||||
|
"simple_quoted_tweets": "true",
|
||||||
|
"count": "100",
|
||||||
|
"cursor": None,
|
||||||
|
"ext": "mediaStats%2CcameraMoment",
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
|
||||||
|
"COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
|
||||||
|
"4FA33AGWWjCpTnA",
|
||||||
|
"Origin": self.root,
|
||||||
|
"Referer": self.root + "/i/bookmarks",
|
||||||
|
"x-csrf-token": self.session.cookies.get("ct0"),
|
||||||
|
"x-twitter-active-user": "yes",
|
||||||
|
"x-twitter-auth-type": "Auth2Session",
|
||||||
|
"x-twitter-client-language": "en",
|
||||||
|
}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
response = self.request(
|
||||||
|
url, params=params, headers=headers, fatal=False)
|
||||||
|
if response.status_code >= 400:
|
||||||
|
raise exception.StopExtraction(response.text)
|
||||||
|
data = response.json()
|
||||||
|
tweets = data["globalObjects"]["tweets"]
|
||||||
|
|
||||||
|
if not tweets:
|
||||||
|
return
|
||||||
|
for tweet_id, tweet_data in tweets.items():
|
||||||
|
tweet_url = "{}/i/web/status/{}".format(self.root, tweet_id)
|
||||||
|
tweet_data["_extractor"] = TwitterTweetExtractor
|
||||||
|
yield Message.Queue, tweet_url, tweet_data
|
||||||
|
|
||||||
|
inst = data["timeline"]["instructions"][0]
|
||||||
|
for entry in inst["addEntries"]["entries"]:
|
||||||
|
if entry["entryId"].startswith("cursor-bottom-"):
|
||||||
|
params["cursor"] = \
|
||||||
|
entry["content"]["operation"]["cursor"]["value"]
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
@memcache()
|
@memcache()
|
||||||
def _guest_token(extr, headers):
|
def _guest_token(extr, headers):
|
||||||
return extr.request(
|
return extr.request(
|
||||||
|
|||||||
Reference in New Issue
Block a user