From 6a0c5e34f4e5b9e3113fc819e93e645e69cd19b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 9 Nov 2022 11:15:34 +0100 Subject: [PATCH] [exhentai] fix pagination (#3181) --- gallery_dl/extractor/exhentai.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 7d1c468f..10f685e6 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -490,15 +490,18 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): self.params = {"f_search": tag, "page": 0} else: self.params = text.parse_query(query) - self.params["page"] = text.parse_int(self.params.get("page")) + if "next" not in self.params: + self.params["page"] = text.parse_int(self.params.get("page")) def items(self): self.login() data = {"_extractor": ExhentaiGalleryExtractor} + search_url = self.search_url + params = self.params while True: last = None - page = self.request(self.search_url, params=self.params).text + page = self.request(search_url, params=params).text for gallery in ExhentaiGalleryExtractor.pattern.finditer(page): url = gallery.group(0) @@ -507,9 +510,17 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): last = url yield Message.Queue, url + "/", data - if 'class="ptdd">><' in page or ">No hits found

" in page: + next_url = text.extr(page, 'nexturl = "', '"', None) + if next_url is not None: + if not next_url: + return + search_url = next_url + params = None + + elif 'class="ptdd">><' in page or ">No hits found

" in page: return - self.params["page"] += 1 + else: + params["page"] += 1 class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):