[weibo] rework pagination logic (#4168)
don't automatically stop when receiving an empty status list shouldn't improve 'tabtype=feed' results, but at least 'tabtype=album' ones and others using cursors won't end prematurely
This commit is contained in:
@@ -186,23 +186,34 @@ class WeiboExtractor(Extractor):
|
|||||||
|
|
||||||
data = data["data"]
|
data = data["data"]
|
||||||
statuses = data["list"]
|
statuses = data["list"]
|
||||||
if not statuses:
|
|
||||||
return
|
|
||||||
yield from statuses
|
yield from statuses
|
||||||
|
|
||||||
if "next_cursor" in data: # videos, newvideo
|
# videos, newvideo
|
||||||
if data["next_cursor"] == -1:
|
cursor = data.get("next_cursor")
|
||||||
|
if cursor:
|
||||||
|
if cursor == -1:
|
||||||
return
|
return
|
||||||
params["cursor"] = data["next_cursor"]
|
params["cursor"] = cursor
|
||||||
elif "page" in params: # home, article
|
continue
|
||||||
params["page"] += 1
|
|
||||||
elif data["since_id"]: # album
|
# album
|
||||||
|
since_id = data.get("since_id")
|
||||||
|
if since_id:
|
||||||
params["sinceid"] = data["since_id"]
|
params["sinceid"] = data["since_id"]
|
||||||
else: # feed, last album page
|
continue
|
||||||
try:
|
|
||||||
params["since_id"] = statuses[-1]["id"] - 1
|
# home, article
|
||||||
except KeyError:
|
if "page" in params:
|
||||||
|
if not statuses:
|
||||||
return
|
return
|
||||||
|
params["page"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# feed, last album page
|
||||||
|
try:
|
||||||
|
params["since_id"] = statuses[-1]["id"] - 1
|
||||||
|
except LookupError:
|
||||||
|
return
|
||||||
|
|
||||||
def _sina_visitor_system(self, response):
|
def _sina_visitor_system(self, response):
|
||||||
self.log.info("Sina Visitor System")
|
self.log.info("Sina Visitor System")
|
||||||
|
|||||||
Reference in New Issue
Block a user