From 49463f76bbfbb6f6bf7f646a0d0853e8a74e76a1 Mon Sep 17 00:00:00 2001 From: Jad Date: Mon, 26 Feb 2018 18:13:49 +0800 Subject: [PATCH] support multi-page URL (#79) * support multi-page URL * fix * all done. * fix, again --- gallery_dl/extractor/idolcomplex.py | 16 ++++++++++------ gallery_dl/extractor/sankaku.py | 13 ++++++++++--- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 210749ae..35b88616 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -22,12 +22,16 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor, sankaku.SankakuTagExtractor): """Extractor for images from idol.sankakucomplex.com by search-tags""" pattern = [r"(?:https?://)?idol\.sankakucomplex\.com" - r"/\?(?:[^&#]*&)*tags=([^&#]+)"] - test = [("https://idol.sankakucomplex.com/?tags=lyumos+wreath", { - "count": ">= 6", - "pattern": (r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" - r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+"), - })] + r"(?:/\?([^#]*))+"] + test = [ + ("https://idol.sankakucomplex.com/?tags=lyumos+wreath", { + "count": ">= 6", + "pattern": (r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" + r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+"), + }), + (("https://idol.sankakucomplex.com/" + "?tags=marie_rose&page=3&next=615855"), None), + ] class IdolcomplexPoolExtractor(IdolcomplexExtractor, diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index a7f58b11..359f4b05 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -133,7 +133,7 @@ class SankakuTagExtractor(SankakuExtractor): subcategory = "tag" directory_fmt = ["{category}", "{tags}"] pattern = [r"(?:https?://)?chan\.sankakucomplex\.com" - r"/\?(?:[^&#]*&)*tags=([^&#]+)"] + r"(?:/\?([^#]*))+"] test = [ ("https://chan.sankakucomplex.com/?tags=bonocho", { "count": 5, @@ -143,13 +143,18 @@ class SankakuTagExtractor(SankakuExtractor): ("https://chan.sankakucomplex.com/?tags=bonocho+a+b+c+d", { "options": (("username", None),), "exception": exception.StopExtraction, - }) + }), + (("https://chan.sankakucomplex.com/" + "?tags=marie_rose&page=98&next=3874906"), None), ] per_page = 20 def __init__(self, match): SankakuExtractor.__init__(self) - self.tags = text.unquote(match.group(1).replace("+", " ")) + query = text.parse_query(match.group(1)) + self.tags = query.get("tags", "").replace("+", " ") + self.start_page = util.safe_int(query.get("page"), 1) + self.next = util.safe_int(query.get("next"), 0) def skip(self, num): pages, posts = divmod(num, self.per_page) @@ -170,6 +175,8 @@ class SankakuTagExtractor(SankakuExtractor): def get_posts(self): params = {"tags": self.tags, "page": self.start_page} + if self.next > 0: + params["next"] = self.next while self.logged_in or params["page"] <= 25: page = self.request(self.root, params=params, retries=10).text