[idolcomplex] improve and fix pagination (#1601)
always rely on the 'next-page-url' value and its query parameters
This commit is contained in:
@@ -132,11 +132,16 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
|||||||
archive_fmt = "t_{search_tags}_{id}"
|
archive_fmt = "t_{search_tags}_{id}"
|
||||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
|
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
|
||||||
test = (
|
test = (
|
||||||
("https://idol.sankakucomplex.com/?tags=lyumos+wreath", {
|
("https://idol.sankakucomplex.com/?tags=lyumos", {
|
||||||
"count": ">= 6",
|
"count": 5,
|
||||||
|
"range": "18-22",
|
||||||
"pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
|
"pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
|
||||||
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
|
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
|
||||||
}),
|
}),
|
||||||
|
("https://idol.sankakucomplex.com/?tags=order:favcount", {
|
||||||
|
"count": 5,
|
||||||
|
"range": "18-22",
|
||||||
|
}),
|
||||||
("https://idol.sankakucomplex.com"
|
("https://idol.sankakucomplex.com"
|
||||||
"/?tags=lyumos+wreath&page=3&next=694215"),
|
"/?tags=lyumos+wreath&page=3&next=694215"),
|
||||||
)
|
)
|
||||||
@@ -184,21 +189,21 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
|||||||
while True:
|
while True:
|
||||||
page = self.request(self.root, params=params, retries=10).text
|
page = self.request(self.root, params=params, retries=10).text
|
||||||
pos = page.find("<div id=more-popular-posts-link>") + 1
|
pos = page.find("<div id=more-popular-posts-link>") + 1
|
||||||
|
yield from text.extract_iter(page, '" id=p', '>', pos)
|
||||||
|
|
||||||
ids = list(text.extract_iter(page, '" id=p', '>', pos))
|
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
|
||||||
if not ids:
|
if not next_url:
|
||||||
return
|
|
||||||
yield from ids
|
|
||||||
|
|
||||||
next_qs = text.extract(page, 'next-page-url="?', '"', pos)[0]
|
|
||||||
next_id = text.parse_query(next_qs).get("next")
|
|
||||||
|
|
||||||
# stop if the same "next" parameter occurs twice in a row (#265)
|
|
||||||
if "next" in params and params["next"] == next_id:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
params["next"] = next_id or (text.parse_int(ids[-1]) - 1)
|
next_params = text.parse_query(text.unescape(
|
||||||
params["page"] = "2"
|
next_url).lstrip("?/"))
|
||||||
|
|
||||||
|
if "next" in next_params:
|
||||||
|
# stop if the same "next" value occurs twice in a row (#265)
|
||||||
|
if "next" in params and params["next"] == next_params["next"]:
|
||||||
|
return
|
||||||
|
next_params["page"] = "2"
|
||||||
|
params = next_params
|
||||||
|
|
||||||
|
|
||||||
class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
||||||
|
|||||||
Reference in New Issue
Block a user