[imagefap] fix and improve gallery pagination (#3013)

This commit is contained in:
Mike Fährmann
2022-10-07 17:40:56 +02:00
parent 8b1fe0bcf1
commit 55fca5fe4b

View File

@@ -44,7 +44,9 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
("https://www.imagefap.com/gallery/5486966", { ("https://www.imagefap.com/gallery/5486966", {
"pattern": r"https://cdnh?\.imagefap\.com" "pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg", r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": "3e24eace5b09639b881ebd393165862feb46adde", "keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98",
"archive": False,
"count": 62,
}), }),
("https://www.imagefap.com/gallery.php?gid=7102714"), ("https://www.imagefap.com/gallery.php?gid=7102714"),
("https://beta.imagefap.com/gallery.php?gid=7102714"), ("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -73,32 +75,42 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
title, _, descr = descr.partition(" porn picture gallery by ") title, _, descr = descr.partition(" porn picture gallery by ")
uploader, _, tags = descr.partition(" to see hottest ") uploader, _, tags = descr.partition(" to see hottest ")
self._count = text.parse_int(count)
return { return {
"gallery_id": text.parse_int(self.gid), "gallery_id": text.parse_int(self.gid),
"title": text.unescape(title), "title": text.unescape(title),
"uploader": uploader, "uploader": uploader,
"tags": tags[:-11].split(", "), "tags": tags[:-11].split(", "),
"count": text.parse_int(count), "count": self._count,
} }
def get_images(self): def get_images(self):
"""Collect image-urls and -metadata""" """Collect image-urls and -metadata"""
num = 0
url = "{}/photo/{}/".format(self.root, self.image_id) url = "{}/photo/{}/".format(self.root, self.image_id)
params = {"gid": self.gid, "idx": 0, "partial": "true"} params = {"gid": self.gid, "idx": 0, "partial": "true"}
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"X-Requested-With": "XMLHttpRequest",
"Referer": "{}?pgid=&gid={}&page=0".format(url, self.image_id)
}
num = 0
total = self._count
while True: while True:
pos = 0 page = self.request(url, params=params, headers=headers).text
page = self.request(url, params=params).text
for _ in range(24): cnt = 0
imgurl, pos = text.extract(page, '<a href="', '"', pos) for image_url in text.extract_iter(page, '<a href="', '"'):
if not imgurl:
return
num += 1 num += 1
data = text.nameext_from_url(imgurl) cnt += 1
data = text.nameext_from_url(image_url)
data["num"] = num data["num"] = num
data["image_id"] = text.parse_int(data["filename"]) data["image_id"] = text.parse_int(data["filename"])
yield imgurl, data yield image_url, data
params["idx"] += 24
if cnt < 24 and num >= total:
return
params["idx"] += cnt
class ImagefapImageExtractor(ImagefapExtractor): class ImagefapImageExtractor(ImagefapExtractor):