[aryion] update/improve pagination (#1849)

Manually increment the 'p' query parameter,
instead of relying on a "Next" link which only works up to page 200.
This commit is contained in:
Mike Fährmann
2021-09-16 00:39:12 +02:00
parent 266ed9b62e
commit 4b3e309b90

View File

@@ -29,7 +29,6 @@ class AryionExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.recursive = True
self._needle = "class='gallery-item' id='"
def login(self):
if self._check_cookies(self.cookienames):
@@ -68,7 +67,7 @@ class AryionExtractor(Extractor):
elif post is False and self.recursive:
base = self.root + "/g4/view/"
data = {"_extractor": AryionPostExtractor}
for post_id in self._pagination(base + post_id):
for post_id in self._pagination_params(base + post_id):
yield Message.Queue, base + post_id, data
def posts(self):
@@ -77,10 +76,29 @@ class AryionExtractor(Extractor):
def metadata(self):
"""Return general metadata"""
def _pagination(self, url):
def _pagination_params(self, url, params=None):
if params is None:
params = {"p": 1}
else:
params["p"] = text.parse_int(params.get("p"), 1)
while True:
page = self.request(url, params=params).text
cnt = 0
for post_id in text.extract_iter(
page, "class='gallery-item' id='", "'"):
cnt += 1
yield post_id
if cnt < 40:
return
params["p"] += 1
def _pagination_next(self, url):
while True:
page = self.request(url).text
yield from text.extract_iter(page, self._needle, "'")
yield from text.extract_iter(page, "thumb' href='/g4/view/", "'")
pos = page.find("Next &gt;&gt;")
if pos < 0:
@@ -186,11 +204,10 @@ class AryionGalleryExtractor(AryionExtractor):
def posts(self):
if self.recursive:
url = "{}/g4/gallery/{}".format(self.root, self.user)
return self._pagination(url)
return self._pagination_params(url)
else:
self._needle = "thumb' href='/g4/view/"
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
return util.advance(self._pagination(url), self.offset)
return util.advance(self._pagination_next(url), self.offset)
class AryionTagExtractor(AryionExtractor):
@@ -199,17 +216,18 @@ class AryionTagExtractor(AryionExtractor):
directory_fmt = ("{category}", "tags", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=18", {
test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
"count": ">= 5",
})
def metadata(self):
return {"search_tags": text.parse_query(self.user).get("tag")}
self.params = text.parse_query(self.user)
self.user = None
return {"search_tags": self.params.get("tag")}
def posts(self):
url = "{}/g4/tags.php?{}".format(self.root, self.user)
self.user = None
return self._pagination(url)
url = self.root + "/g4/tags.php"
return self._pagination_params(url, self.params)
class AryionPostExtractor(AryionExtractor):