[aryion] update/improve pagination (#1849)
Manually increment the 'p' query parameter, instead of relying on a "Next" link which only works up to page 200.
This commit is contained in:
@@ -29,7 +29,6 @@ class AryionExtractor(Extractor):
|
||||
Extractor.__init__(self, match)
|
||||
self.user = match.group(1)
|
||||
self.recursive = True
|
||||
self._needle = "class='gallery-item' id='"
|
||||
|
||||
def login(self):
|
||||
if self._check_cookies(self.cookienames):
|
||||
@@ -68,7 +67,7 @@ class AryionExtractor(Extractor):
|
||||
elif post is False and self.recursive:
|
||||
base = self.root + "/g4/view/"
|
||||
data = {"_extractor": AryionPostExtractor}
|
||||
for post_id in self._pagination(base + post_id):
|
||||
for post_id in self._pagination_params(base + post_id):
|
||||
yield Message.Queue, base + post_id, data
|
||||
|
||||
def posts(self):
|
||||
@@ -77,10 +76,29 @@ class AryionExtractor(Extractor):
|
||||
def metadata(self):
|
||||
"""Return general metadata"""
|
||||
|
||||
def _pagination(self, url):
|
||||
def _pagination_params(self, url, params=None):
|
||||
if params is None:
|
||||
params = {"p": 1}
|
||||
else:
|
||||
params["p"] = text.parse_int(params.get("p"), 1)
|
||||
|
||||
while True:
|
||||
page = self.request(url, params=params).text
|
||||
|
||||
cnt = 0
|
||||
for post_id in text.extract_iter(
|
||||
page, "class='gallery-item' id='", "'"):
|
||||
cnt += 1
|
||||
yield post_id
|
||||
|
||||
if cnt < 40:
|
||||
return
|
||||
params["p"] += 1
|
||||
|
||||
def _pagination_next(self, url):
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
yield from text.extract_iter(page, self._needle, "'")
|
||||
yield from text.extract_iter(page, "thumb' href='/g4/view/", "'")
|
||||
|
||||
pos = page.find("Next >>")
|
||||
if pos < 0:
|
||||
@@ -186,11 +204,10 @@ class AryionGalleryExtractor(AryionExtractor):
|
||||
def posts(self):
|
||||
if self.recursive:
|
||||
url = "{}/g4/gallery/{}".format(self.root, self.user)
|
||||
return self._pagination(url)
|
||||
return self._pagination_params(url)
|
||||
else:
|
||||
self._needle = "thumb' href='/g4/view/"
|
||||
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
|
||||
return util.advance(self._pagination(url), self.offset)
|
||||
return util.advance(self._pagination_next(url), self.offset)
|
||||
|
||||
|
||||
class AryionTagExtractor(AryionExtractor):
|
||||
@@ -199,17 +216,18 @@ class AryionTagExtractor(AryionExtractor):
|
||||
directory_fmt = ("{category}", "tags", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
|
||||
test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=18", {
|
||||
test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
|
||||
"count": ">= 5",
|
||||
})
|
||||
|
||||
def metadata(self):
|
||||
return {"search_tags": text.parse_query(self.user).get("tag")}
|
||||
self.params = text.parse_query(self.user)
|
||||
self.user = None
|
||||
return {"search_tags": self.params.get("tag")}
|
||||
|
||||
def posts(self):
|
||||
url = "{}/g4/tags.php?{}".format(self.root, self.user)
|
||||
self.user = None
|
||||
return self._pagination(url)
|
||||
url = self.root + "/g4/tags.php"
|
||||
return self._pagination_params(url, self.params)
|
||||
|
||||
|
||||
class AryionPostExtractor(AryionExtractor):
|
||||
|
||||
Reference in New Issue
Block a user