From de0c57886d3dace5a97668c34a21728492e166e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 13 Nov 2020 06:47:45 +0100 Subject: [PATCH] [twitter] add 'list-members' extractor (closes #1096) --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/twitter.py | 56 +++++++++++++++++++++++++++++++-- scripts/supportedsites.py | 1 + 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 1f2c9097..8780e289 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -162,5 +162,5 @@ Turboimagehost https://www.turboimagehost.com/ individual Images .. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, individual Images .. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles .. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders -.. |twitter-C| replace:: Bookmarks, Likes, Media Timelines, Search Results, Timelines, Tweets +.. |twitter-C| replace:: Bookmarks, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets .. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 3fc15750..fe0b3c5f 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -11,7 +11,7 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache - +import json BASE_PATTERN = ( r"(?:https?://)?(?:www\.|mobile\.)?" @@ -324,7 +324,7 @@ class TwitterBookmarkExtractor(TwitterExtractor): class TwitterListExtractor(TwitterExtractor): """Extractor for Twitter lists""" subcategory = "list" - pattern = BASE_PATTERN + r"/i/lists/(\d+)" + pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$" test = ("https://twitter.com/i/lists/784214683683127296", { "range": "1-40", "count": 40, @@ -335,6 +335,21 @@ class TwitterListExtractor(TwitterExtractor): return TwitterAPI(self).timeline_list(self.user) +class TwitterListMembersExtractor(TwitterExtractor): + """Extractor for members of a Twitter list""" + subcategory = "list-members" + pattern = BASE_PATTERN + r"/i/lists/(\d+)/members" + test = ("https://twitter.com/i/lists/784214683683127296/members",) + + def items(self): + self.login() + for user in TwitterAPI(self).list_members(self.user): + user["_extractor"] = TwitterTimelineExtractor + url = "{}/intent/user?user_id={}".format( + self.root, user["rest_id"]) + yield Message.Queue, url, user + + class TwitterSearchExtractor(TwitterExtractor): """Extractor for all images from a search timeline""" subcategory = "search" @@ -543,6 +558,16 @@ class TwitterAPI(): return self._pagination( endpoint, params, "sq-I-t-", "sq-cursor-bottom") + def list_members(self, list_id): + endpoint = "graphql/M74V2EwlxxVYGB4DbyAphQ/ListMembers" + variables = { + "listId": list_id, + "count" : 20, + "withTweetResult": False, + "withUserResult" : False, + } + return self._pagination_members(endpoint, variables) + def list_by_rest_id(self, list_id): endpoint = "graphql/LXXTUytSX1QY-2p8Xp9BFA/ListByRestId" params = {"variables": '{"listId":"' + list_id + '"' @@ -655,3 +680,30 @@ class TwitterAPI(): if not cursor or not tweet: return params["cursor"] = cursor + + def _pagination_members(self, endpoint, variables): + while True: + cursor = entry = stop = None + params = {"variables": json.dumps(variables)} + data = self._call(endpoint, params) + + try: + instructions = (data["data"]["list"]["members_timeline"] + ["timeline"]["instructions"]) + except KeyError: + raise exception.AuthorizationError() + + for instr in instructions: + if instr["type"] == "TimelineAddEntries": + for entry in instr["entries"]: + if entry["entryId"].startswith("user-"): + yield entry["content"]["itemContent"]["user"] + elif entry["entryId"].startswith("cursor-bottom-"): + cursor = entry["content"]["value"] + elif instr["type"] == "TimelineTerminateTimeline": + if instr["direction"] == "Bottom": + stop = True + + if stop or not cursor or not entry: + return + variables["cursor"] = cursor diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 27731dbb..e05778d8 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -142,6 +142,7 @@ SUBCATEGORY_MAP = { }, "twitter": { "media": "Media Timelines", + "list-members": "List Members", }, "wikiart": { "artists": "Artist Listings",