[idolcomplex] fix/update pagination logic (#7549)
This commit is contained in:
@@ -187,32 +187,39 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
||||
return {"search_tags": " ".join(tags)}
|
||||
|
||||
def post_ids(self):
|
||||
params = {"tags": self.tags}
|
||||
url = self.root + "/en/posts"
|
||||
|
||||
params = {"auto_page": "t"}
|
||||
if self.next:
|
||||
params["next"] = self.next
|
||||
else:
|
||||
params["page"] = self.start_page
|
||||
params["tags"] = self.tags
|
||||
|
||||
while True:
|
||||
page = self.request(self.root, params=params, retries=10).text
|
||||
pos = ((page.find('id="more-popular-posts-link"') + 1) or
|
||||
(page.find('<span class="thumb') + 1))
|
||||
response = self.request(url, params=params, retries=10)
|
||||
if response.history and "/posts/premium" in response.url:
|
||||
self.log.warning("HTTP redirect to %s", response.url)
|
||||
page = response.text
|
||||
|
||||
yield from self.find_pids(page, pos)
|
||||
yield from text.extract_iter(page, '"id":"', '"')
|
||||
|
||||
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
|
||||
if not next_url:
|
||||
next_page_url = text.extr(page, 'next-page-url="', '"')
|
||||
if not next_page_url:
|
||||
return
|
||||
|
||||
next_params = text.parse_query(text.unquote(text.unescape(
|
||||
text.unescape(next_url).lstrip("?/"))))
|
||||
url, _, next_params = text.unquote(
|
||||
text.unescape(text.unescape(next_page_url))).partition("?")
|
||||
next_params = text.parse_query(next_params)
|
||||
|
||||
if "next" in next_params:
|
||||
# stop if the same "next" value occurs twice in a row (#265)
|
||||
if "next" in params and params["next"] == next_params["next"]:
|
||||
return
|
||||
next_params["page"] = "2"
|
||||
|
||||
if url[0] == "/":
|
||||
url = self.root + url
|
||||
params = next_params
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user