[twitter] implement '"search-pagination": "date"'

This commit is contained in:
Mike Fährmann
2026-02-02 16:19:48 +01:00
parent a95cf92d61
commit 153e589dd0
2 changed files with 40 additions and 3 deletions

View File

@@ -6663,6 +6663,9 @@ Description
``"max_id"`` | ``"maxid"`` | ``"id"``
Update the ``max_id`` search query parameter
to the Tweet ID value of the last retrieved Tweet.
``"until"`` | ``"date"`` | ``"datetime"`` | ``"dt"``
Update the ``until`` search query parameter
to the date value of the last retrieved Tweet.
extractor.twitter.search-results

View File

@@ -1500,8 +1500,12 @@ class TwitterAPI():
"withGrokTranslatedBio": False,
}
if cfg("search-pagination") in ("max_id", "maxid", "id"):
update_variables = self._update_variables_search
pgn = cfg("search-pagination")
if pgn in ("max_id", "maxid", "id"):
update_variables = self._update_variables_search_maxid
elif pgn in {"until", "date", "datetime", "dt"}:
update_variables = self._update_variables_search_date
self._var_date_prev = None
else:
update_variables = None
@@ -2280,7 +2284,7 @@ class TwitterAPI():
self.log.debug("Skipping %s ('%s')", tweet_id, text)
def _update_variables_search(self, variables, cursor, tweet):
def _update_variables_search_maxid(self, variables, cursor, tweet):
try:
tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"]
max_id = "max_id:" + str(int(tweet_id)-1)
@@ -2304,6 +2308,36 @@ class TwitterAPI():
return variables
def _update_variables_search_date(self, variables, cursor, tweet):
try:
tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"]
date = self.extractor._tweetid_to_datetime(int(tweet_id))
if date == self._var_date_prev:
variables["cursor"] = self.extractor._update_cursor(cursor)
return variables
dstr = f"until:{date.year:>04}-{date.month:>02}-{date.day:>02}"
query, n = text.re(r"\buntil:\d{4}-\d{2}-\d{2}").subn(
dstr, variables["rawQuery"])
if n:
variables["rawQuery"] = query
else:
variables["rawQuery"] = f"{query} {dstr}"
if prefix := getattr(self.extractor, "_cursor_prefix", None):
self.extractor._cursor_prefix = \
f"{prefix.partition('_')[0]}_{tweet_id}/"
variables["cursor"] = None
self._var_date_prev = date
except Exception as exc:
self.extractor.log.debug(
"Failed to update 'until' search query (%s: %s). Falling "
"back to 'cursor' pagination", exc.__class__.__name__, exc)
variables["cursor"] = self.extractor._update_cursor(cursor)
return variables
@cache(maxage=365*86400, keyarg=1)
def _login_impl(extr, username, password):