diff --git a/docs/configuration.rst b/docs/configuration.rst index 92fd71fe..da89f20e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -5631,6 +5631,22 @@ Description Logout and retry as guest when access to another user's Tweets is blocked. +extractor.twitter.pagination-search +----------------------------------- +Type + ``string`` +Default + ``"cursor"`` +Description + Selects how to paginate over search results. + + ``"cursor"`` + Use ``cursor`` values provided by the API + ``"id"`` | ``"max_id"`` | ``"tweet_id"`` + Update the ``max_id`` search query parameter + to the Tweet ID value of the last retrieved Tweet. + + extractor.twitter.pinned ------------------------ Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index f1e3833c..af91986b 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -812,6 +812,7 @@ "include" : ["timeline"], "locked" : "abort", "logout" : true, + "pagination-search": "cursor", "pinned" : false, "quoted" : false, "ratelimit" : "wait", diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 7f58e6d1..64918cad 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1459,10 +1459,17 @@ class TwitterAPI(): "product": product, "withGrokTranslatedBio": False, } + + if self.extractor.config("pagination-search") in ( + "id", "tweet_id", "max_id"): + update_variables = self._update_variables_search + else: + update_variables = None + return self._pagination_tweets( endpoint, variables, ("search_by_raw_query", "search_timeline", "timeline"), - stop_tweets=3) + stop_tweets=3, update_variables=update_variables) def community_query(self, community_id): endpoint = "/graphql/2W09l7nD7ZbxGQHXvfB22w/CommunityQuery" @@ -1872,7 +1879,7 @@ class TwitterAPI(): params["cursor"] = extr._update_cursor(cursor) def _pagination_tweets(self, endpoint, variables, - path=None, stop_tweets=0, + path=None, stop_tweets=0, update_variables=None, features=None, field_toggles=None): extr = self.extractor original_retweets = (extr.retweets == "original") @@ -2072,6 +2079,7 @@ class TwitterAPI(): if tweet: stop_tweets = stop_tweets_max + last_tweet = tweet else: if stop_tweets <= 0: return extr._update_cursor(None) @@ -2079,9 +2087,14 @@ class TwitterAPI(): "No Tweet results (%s/%s)", stop_tweets_max - stop_tweets + 1, stop_tweets_max) stop_tweets -= 1 + if not cursor or cursor == variables.get("cursor"): return extr._update_cursor(None) - variables["cursor"] = extr._update_cursor(cursor) + + if update_variables is None: + variables["cursor"] = extr._update_cursor(cursor) + else: + variables = update_variables(variables, cursor, last_tweet) def _pagination_users(self, endpoint, variables, path=None): extr = self.extractor @@ -2150,6 +2163,30 @@ class TwitterAPI(): self.log.debug("Skipping %s ('%s')", tweet_id, text) + def _update_variables_search(self, variables, cursor, tweet): + try: + tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"] + max_id = f"max_id:{int(tweet_id)-1}" + + query, n = text.re(r"\bmax_id:\d+").subn( + max_id, variables["rawQuery"]) + if n: + variables["rawQuery"] = query + else: + variables["rawQuery"] = f"{query} {max_id}" + + if prefix := self.extractor._cursor_prefix: + self.extractor._cursor_prefix = \ + f"{prefix.partition('_')[0]}_{tweet_id}/" + variables["cursor"] = None + except Exception as exc: + self.extractor.log.debug( + "Failed to update 'max_id' search query (%s: %s). Falling " + "back to 'cursor' pagination", exc.__class__.__name__, exc) + variables["cursor"] = self.extractor._update_cursor(cursor) + + return variables + @cache(maxage=365*86400, keyarg=1) def _login_impl(extr, username, password):