diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 8ba6194d..06ec5715 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -29,7 +29,6 @@ class AryionExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) self.recursive = True - self._needle = "class='gallery-item' id='" def login(self): if self._check_cookies(self.cookienames): @@ -68,7 +67,7 @@ class AryionExtractor(Extractor): elif post is False and self.recursive: base = self.root + "/g4/view/" data = {"_extractor": AryionPostExtractor} - for post_id in self._pagination(base + post_id): + for post_id in self._pagination_params(base + post_id): yield Message.Queue, base + post_id, data def posts(self): @@ -77,10 +76,29 @@ class AryionExtractor(Extractor): def metadata(self): """Return general metadata""" - def _pagination(self, url): + def _pagination_params(self, url, params=None): + if params is None: + params = {"p": 1} + else: + params["p"] = text.parse_int(params.get("p"), 1) + + while True: + page = self.request(url, params=params).text + + cnt = 0 + for post_id in text.extract_iter( + page, "class='gallery-item' id='", "'"): + cnt += 1 + yield post_id + + if cnt < 40: + return + params["p"] += 1 + + def _pagination_next(self, url): while True: page = self.request(url).text - yield from text.extract_iter(page, self._needle, "'") + yield from text.extract_iter(page, "thumb' href='/g4/view/", "'") pos = page.find("Next >>") if pos < 0: @@ -186,11 +204,10 @@ class AryionGalleryExtractor(AryionExtractor): def posts(self): if self.recursive: url = "{}/g4/gallery/{}".format(self.root, self.user) - return self._pagination(url) + return self._pagination_params(url) else: - self._needle = "thumb' href='/g4/view/" url = "{}/g4/latest.php?name={}".format(self.root, self.user) - return util.advance(self._pagination(url), self.offset) + return util.advance(self._pagination_next(url), self.offset) class AryionTagExtractor(AryionExtractor): @@ -199,17 +216,18 @@ class AryionTagExtractor(AryionExtractor): directory_fmt = ("{category}", "tags", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)" - test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=18", { + test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", { "count": ">= 5", }) def metadata(self): - return {"search_tags": text.parse_query(self.user).get("tag")} + self.params = text.parse_query(self.user) + self.user = None + return {"search_tags": self.params.get("tag")} def posts(self): - url = "{}/g4/tags.php?{}".format(self.root, self.user) - self.user = None - return self._pagination(url) + url = self.root + "/g4/tags.php" + return self._pagination_params(url, self.params) class AryionPostExtractor(AryionExtractor):