[weibo] rework pagination logic (#4168)

don't automatically stop when receiving an empty status list

shouldn't improve 'tabtype=feed' results, but at least 'tabtype=album'
ones and others using cursors won't end prematurely
This commit is contained in:
Mike Fährmann
2024-03-13 22:36:38 +01:00
parent d53db6e11a
commit 5158cbb4c1

View File

@@ -186,23 +186,34 @@ class WeiboExtractor(Extractor):
data = data["data"] data = data["data"]
statuses = data["list"] statuses = data["list"]
if not statuses:
return
yield from statuses yield from statuses
if "next_cursor" in data: # videos, newvideo # videos, newvideo
if data["next_cursor"] == -1: cursor = data.get("next_cursor")
if cursor:
if cursor == -1:
return return
params["cursor"] = data["next_cursor"] params["cursor"] = cursor
elif "page" in params: # home, article continue
params["page"] += 1
elif data["since_id"]: # album # album
since_id = data.get("since_id")
if since_id:
params["sinceid"] = data["since_id"] params["sinceid"] = data["since_id"]
else: # feed, last album page continue
try:
params["since_id"] = statuses[-1]["id"] - 1 # home, article
except KeyError: if "page" in params:
if not statuses:
return return
params["page"] += 1
continue
# feed, last album page
try:
params["since_id"] = statuses[-1]["id"] - 1
except LookupError:
return
def _sina_visitor_system(self, response): def _sina_visitor_system(self, response):
self.log.info("Sina Visitor System") self.log.info("Sina Visitor System")