[imagefap] fix and improve gallery pagination (#3013)

2022-10-07 17:40:56 +02:00
parent 8b1fe0bcf1
commit 55fca5fe4b
1 changed files with 24 additions and 12 deletions
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -44,7 +44,9 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
        ("https://www.imagefap.com/gallery/5486966", {
            "pattern": r"https://cdnh?\.imagefap\.com"
                       r"/images/full/\d+/\d+/\d+\.jpg",
-            "keyword": "3e24eace5b09639b881ebd393165862feb46adde",
+            "keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98",
            "archive": False,
            "count": 62,
        }),
        ("https://www.imagefap.com/gallery.php?gid=7102714"),
        ("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -73,32 +75,42 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
        title, _, descr = descr.partition(" porn picture gallery by ")
        uploader, _, tags = descr.partition(" to see hottest ")
        self._count = text.parse_int(count)
        return {
            "gallery_id": text.parse_int(self.gid),
            "title": text.unescape(title),
            "uploader": uploader,
            "tags": tags[:-11].split(", "),
-            "count": text.parse_int(count),
+            "count": self._count,
        }
    def get_images(self):
        """Collect image-urls and -metadata"""
        num = 0
        url = "{}/photo/{}/".format(self.root, self.image_id)
        params = {"gid": self.gid, "idx": 0, "partial": "true"}
        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "X-Requested-With": "XMLHttpRequest",
            "Referer": "{}?pgid=&gid={}&page=0".format(url, self.image_id)
        }
        num = 0
        total = self._count
        while True:
-            pos = 0
+            page = self.request(url, params=params, headers=headers).text
-            page = self.request(url, params=params).text
+
-            for _ in range(24):
+            cnt = 0
-                imgurl, pos = text.extract(page, '<a href="', '"', pos)
+            for image_url in text.extract_iter(page, '<a href="', '"'):
                if not imgurl:
                    return
                num += 1
-                data = text.nameext_from_url(imgurl)
+                cnt += 1
                data = text.nameext_from_url(image_url)
                data["num"] = num
                data["image_id"] = text.parse_int(data["filename"])
-                yield imgurl, data
+                yield image_url, data
-            params["idx"] += 24
+
            if cnt < 24 and num >= total:
                return
            params["idx"] += cnt
 class ImagefapImageExtractor(ImagefapExtractor):