From 92178b369cec269376f7f2e8734bb8c82e702bea Mon Sep 17 00:00:00 2001 From: chio0hai <94094996+chio0hai@users.noreply.github.com> Date: Sat, 3 Jun 2023 00:23:34 -0400 Subject: [PATCH] [postimage] add gallery support, update image extractor to download original image instead of main image --- gallery_dl/extractor/imagehosts.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index df4ff265..44571758 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -295,19 +295,38 @@ class PostimgImageExtractor(ImagehostImageExtractor): """Extractor for single images from postimages.org""" category = "postimg" pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)" - r"/(?:image/)?([^/?#]+)/?)") + r"/(?!gallery/)(?:image/)?([^/?#]+)/?)") test = ("https://postimg.cc/Wtn2b3hC", { - "url": "0794cfda9b8951a8ac3aa692472484200254ab86", + "url": "72f3c8b1d6c6601a20ad58f35635494b4891a99e", "keyword": "2d05808d04e4e83e33200db83521af06e3147a84", "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee", }) def get_info(self, page): - url , pos = text.extract(page, 'id="main-image" src="', '"') + pos = page.index(' id="download"') + url , pos = text.rextract(page, ' href="', '"', pos) filename, pos = text.extract(page, 'class="imagename">', '<', pos) return url, text.unescape(filename) +class PostimgGalleryExtractor(ImagehostImageExtractor): + """Extractor for images galleries from postimages.org""" + category = "postimg" + subcategory = "gallery" + pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)" + r"/(?:gallery/)([^/?#]+)/?)") + test = ("https://postimg.cc/gallery/wxpDLgX", { + "pattern": PostimgImageExtractor.pattern, + "count": 22, + }) + + def items(self): + page = self.request(self.page_url).text + data = {"_extractor": PostimgImageExtractor} + for url in text.extract_iter(page, ' class="thumb">