[imagefap] fix and improve gallery pagination (#3013)
This commit is contained in:
@@ -44,7 +44,9 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
|
|||||||
("https://www.imagefap.com/gallery/5486966", {
|
("https://www.imagefap.com/gallery/5486966", {
|
||||||
"pattern": r"https://cdnh?\.imagefap\.com"
|
"pattern": r"https://cdnh?\.imagefap\.com"
|
||||||
r"/images/full/\d+/\d+/\d+\.jpg",
|
r"/images/full/\d+/\d+/\d+\.jpg",
|
||||||
"keyword": "3e24eace5b09639b881ebd393165862feb46adde",
|
"keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98",
|
||||||
|
"archive": False,
|
||||||
|
"count": 62,
|
||||||
}),
|
}),
|
||||||
("https://www.imagefap.com/gallery.php?gid=7102714"),
|
("https://www.imagefap.com/gallery.php?gid=7102714"),
|
||||||
("https://beta.imagefap.com/gallery.php?gid=7102714"),
|
("https://beta.imagefap.com/gallery.php?gid=7102714"),
|
||||||
@@ -73,32 +75,42 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
|
|||||||
|
|
||||||
title, _, descr = descr.partition(" porn picture gallery by ")
|
title, _, descr = descr.partition(" porn picture gallery by ")
|
||||||
uploader, _, tags = descr.partition(" to see hottest ")
|
uploader, _, tags = descr.partition(" to see hottest ")
|
||||||
|
self._count = text.parse_int(count)
|
||||||
return {
|
return {
|
||||||
"gallery_id": text.parse_int(self.gid),
|
"gallery_id": text.parse_int(self.gid),
|
||||||
"title": text.unescape(title),
|
"title": text.unescape(title),
|
||||||
"uploader": uploader,
|
"uploader": uploader,
|
||||||
"tags": tags[:-11].split(", "),
|
"tags": tags[:-11].split(", "),
|
||||||
"count": text.parse_int(count),
|
"count": self._count,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_images(self):
|
def get_images(self):
|
||||||
"""Collect image-urls and -metadata"""
|
"""Collect image-urls and -metadata"""
|
||||||
num = 0
|
|
||||||
url = "{}/photo/{}/".format(self.root, self.image_id)
|
url = "{}/photo/{}/".format(self.root, self.image_id)
|
||||||
params = {"gid": self.gid, "idx": 0, "partial": "true"}
|
params = {"gid": self.gid, "idx": 0, "partial": "true"}
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
|
"Referer": "{}?pgid=&gid={}&page=0".format(url, self.image_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
num = 0
|
||||||
|
total = self._count
|
||||||
while True:
|
while True:
|
||||||
pos = 0
|
page = self.request(url, params=params, headers=headers).text
|
||||||
page = self.request(url, params=params).text
|
|
||||||
for _ in range(24):
|
cnt = 0
|
||||||
imgurl, pos = text.extract(page, '<a href="', '"', pos)
|
for image_url in text.extract_iter(page, '<a href="', '"'):
|
||||||
if not imgurl:
|
|
||||||
return
|
|
||||||
num += 1
|
num += 1
|
||||||
data = text.nameext_from_url(imgurl)
|
cnt += 1
|
||||||
|
data = text.nameext_from_url(image_url)
|
||||||
data["num"] = num
|
data["num"] = num
|
||||||
data["image_id"] = text.parse_int(data["filename"])
|
data["image_id"] = text.parse_int(data["filename"])
|
||||||
yield imgurl, data
|
yield image_url, data
|
||||||
params["idx"] += 24
|
|
||||||
|
if cnt < 24 and num >= total:
|
||||||
|
return
|
||||||
|
params["idx"] += cnt
|
||||||
|
|
||||||
|
|
||||||
class ImagefapImageExtractor(ImagefapExtractor):
|
class ImagefapImageExtractor(ImagefapExtractor):
|
||||||
|
|||||||
Reference in New Issue
Block a user