[idolcomplex] fix/update pagination logic (#7549)

This commit is contained in:
Mike Fährmann
2025-05-20 21:50:40 +02:00
parent c929a3d3cc
commit 96f7f626d5

View File

@@ -187,32 +187,39 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
return {"search_tags": " ".join(tags)}
def post_ids(self):
params = {"tags": self.tags}
url = self.root + "/en/posts"
params = {"auto_page": "t"}
if self.next:
params["next"] = self.next
else:
params["page"] = self.start_page
params["tags"] = self.tags
while True:
page = self.request(self.root, params=params, retries=10).text
pos = ((page.find('id="more-popular-posts-link"') + 1) or
(page.find('<span class="thumb') + 1))
response = self.request(url, params=params, retries=10)
if response.history and "/posts/premium" in response.url:
self.log.warning("HTTP redirect to %s", response.url)
page = response.text
yield from self.find_pids(page, pos)
yield from text.extract_iter(page, '"id":"', '"')
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
if not next_url:
next_page_url = text.extr(page, 'next-page-url="', '"')
if not next_page_url:
return
next_params = text.parse_query(text.unquote(text.unescape(
text.unescape(next_url).lstrip("?/"))))
url, _, next_params = text.unquote(
text.unescape(text.unescape(next_page_url))).partition("?")
next_params = text.parse_query(next_params)
if "next" in next_params:
# stop if the same "next" value occurs twice in a row (#265)
if "next" in params and params["next"] == next_params["next"]:
return
next_params["page"] = "2"
if url[0] == "/":
url = self.root + url
params = next_params