[twitter] implement '"search-pagination": "date"'
This commit is contained in:
@@ -6663,6 +6663,9 @@ Description
|
|||||||
``"max_id"`` | ``"maxid"`` | ``"id"``
|
``"max_id"`` | ``"maxid"`` | ``"id"``
|
||||||
Update the ``max_id`` search query parameter
|
Update the ``max_id`` search query parameter
|
||||||
to the Tweet ID value of the last retrieved Tweet.
|
to the Tweet ID value of the last retrieved Tweet.
|
||||||
|
``"until"`` | ``"date"`` | ``"datetime"`` | ``"dt"``
|
||||||
|
Update the ``until`` search query parameter
|
||||||
|
to the date value of the last retrieved Tweet.
|
||||||
|
|
||||||
|
|
||||||
extractor.twitter.search-results
|
extractor.twitter.search-results
|
||||||
|
|||||||
@@ -1500,8 +1500,12 @@ class TwitterAPI():
|
|||||||
"withGrokTranslatedBio": False,
|
"withGrokTranslatedBio": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg("search-pagination") in ("max_id", "maxid", "id"):
|
pgn = cfg("search-pagination")
|
||||||
update_variables = self._update_variables_search
|
if pgn in ("max_id", "maxid", "id"):
|
||||||
|
update_variables = self._update_variables_search_maxid
|
||||||
|
elif pgn in {"until", "date", "datetime", "dt"}:
|
||||||
|
update_variables = self._update_variables_search_date
|
||||||
|
self._var_date_prev = None
|
||||||
else:
|
else:
|
||||||
update_variables = None
|
update_variables = None
|
||||||
|
|
||||||
@@ -2280,7 +2284,7 @@ class TwitterAPI():
|
|||||||
|
|
||||||
self.log.debug("Skipping %s ('%s')", tweet_id, text)
|
self.log.debug("Skipping %s ('%s')", tweet_id, text)
|
||||||
|
|
||||||
def _update_variables_search(self, variables, cursor, tweet):
|
def _update_variables_search_maxid(self, variables, cursor, tweet):
|
||||||
try:
|
try:
|
||||||
tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"]
|
tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"]
|
||||||
max_id = "max_id:" + str(int(tweet_id)-1)
|
max_id = "max_id:" + str(int(tweet_id)-1)
|
||||||
@@ -2304,6 +2308,36 @@ class TwitterAPI():
|
|||||||
|
|
||||||
return variables
|
return variables
|
||||||
|
|
||||||
|
def _update_variables_search_date(self, variables, cursor, tweet):
|
||||||
|
try:
|
||||||
|
tweet_id = tweet.get("id_str") or tweet["legacy"]["id_str"]
|
||||||
|
date = self.extractor._tweetid_to_datetime(int(tweet_id))
|
||||||
|
|
||||||
|
if date == self._var_date_prev:
|
||||||
|
variables["cursor"] = self.extractor._update_cursor(cursor)
|
||||||
|
return variables
|
||||||
|
|
||||||
|
dstr = f"until:{date.year:>04}-{date.month:>02}-{date.day:>02}"
|
||||||
|
query, n = text.re(r"\buntil:\d{4}-\d{2}-\d{2}").subn(
|
||||||
|
dstr, variables["rawQuery"])
|
||||||
|
if n:
|
||||||
|
variables["rawQuery"] = query
|
||||||
|
else:
|
||||||
|
variables["rawQuery"] = f"{query} {dstr}"
|
||||||
|
|
||||||
|
if prefix := getattr(self.extractor, "_cursor_prefix", None):
|
||||||
|
self.extractor._cursor_prefix = \
|
||||||
|
f"{prefix.partition('_')[0]}_{tweet_id}/"
|
||||||
|
variables["cursor"] = None
|
||||||
|
self._var_date_prev = date
|
||||||
|
except Exception as exc:
|
||||||
|
self.extractor.log.debug(
|
||||||
|
"Failed to update 'until' search query (%s: %s). Falling "
|
||||||
|
"back to 'cursor' pagination", exc.__class__.__name__, exc)
|
||||||
|
variables["cursor"] = self.extractor._update_cursor(cursor)
|
||||||
|
|
||||||
|
return variables
|
||||||
|
|
||||||
|
|
||||||
@cache(maxage=365*86400, keyarg=1)
|
@cache(maxage=365*86400, keyarg=1)
|
||||||
def _login_impl(extr, username, password):
|
def _login_impl(extr, username, password):
|
||||||
|
|||||||
Reference in New Issue
Block a user