From bc9d291c13f7eb09cef35b3fd715a2da48bcf60f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 7 Oct 2022 22:07:34 +0200 Subject: [PATCH] [imagefap] fix and improve folder extraction (#3013) --- gallery_dl/extractor/imagefap.py | 71 ++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 3d6388d4..2c899eba 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -182,40 +182,49 @@ class ImagefapUserExtractor(ImagefapExtractor): self.user, self.user_id = match.groups() def items(self): - for gid, name in self.get_gallery_data(): - url = "{}/gallery/{}".format(self.root, gid) - data = { - "gallery_id": text.parse_int(gid), - "title": text.unescape(name), - "_extractor": ImagefapGalleryExtractor, - } - yield Message.Queue, url, data + for folder_id in self.folders(): + for gallery_id, name in self.galleries(folder_id): + url = "{}/gallery/{}".format(self.root, gallery_id) + data = { + "gallery_id": text.parse_int(gallery_id), + "title" : text.unescape(name), + "_extractor": ImagefapGalleryExtractor, + } + yield Message.Queue, url, data - def get_gallery_data(self): - """Yield all gallery_ids of a specific user""" - folders = self.get_gallery_folders() - url = "{}/ajax_usergallery_folder.php".format(self.root) - params = {"userid": self.user_id} - for folder_id in folders: - params["id"] = folder_id - page = self.request(url, params=params).text - - pos = 0 - while True: - gid, pos = text.extract(page, '", "<", pos) - yield gid, name - - def get_gallery_folders(self): - """Create a list of all folder_ids of a specific user""" + def folders(self): + """Return a list of folder_ids of a specific user""" if self.user: url = "{}/profile/{}/galleries".format(self.root, self.user) else: url = "{}/usergallery.php?userid={}".format( self.root, self.user_id) - page = self.request(url).text - self.user_id, pos = text.extract(page, '?userid=', '"') - folders, pos = text.extract(page, ' id="tgl_all" value="', '"', pos) - return folders.split("|")[:-1] + + response = self.request(url) + self.user = response.url.split("/")[-2] + folders = text.extract(response.text, ' id="tgl_all" value="', '"')[0] + return folders.rstrip("|").split("|") + + def galleries(self, folder_id): + """Yield gallery_ids of a folder""" + if folder_id == "-1": + url = "{}/profile/{}/galleries?folderid=-1".format( + self.root, self.user) + else: + url = "{}/organizer/{}/".format(self.root, folder_id) + params = {"page": 0} + + while True: + extr = text.extract_from(self.request(url, params=params).text) + cnt = 0 + + while True: + gid = extr('", "<") + cnt += 1 + + if cnt < 25: + break + params["page"] += 1