# -*- coding: utf-8 -*- # Copyright 2016-2026 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Collection of extractors for various imagehosts""" from .common import Extractor, Message from .. import text from ..cache import memcache class ImagehostImageExtractor(Extractor): """Base class for single-image extractors for various imagehosts""" basecategory = "imagehost" subcategory = "image" archive_fmt = "{token}" parent = True _params = None _cookies = None _encoding = None _validate = None def __init__(self, match): Extractor.__init__(self, match) self.page_url = (self.root or "https://") + match[1] self.token = match[2] if self._params == "simple": self._params = { "imgContinue": "Continue+to+image+...+", } elif self._params == "complex": self._params = { "op": "view", "id": self.token, "pre": "1", "adb": "1", "next": "Continue+to+image+...+", } def items(self): page = self.request( self.page_url, method=("POST" if self._params else "GET"), data=self._params, cookies=self._cookies, encoding=self._encoding, ).text url, filename = self.get_info(page) if not url: return if filename: data = text.nameext_from_name(filename) if not data["extension"]: data["extension"] = text.ext_from_url(url) else: data = text.nameext_from_url(url) data["token"] = self.token data["post_url"] = self.page_url data.update(self.metadata(page)) if url.startswith("http:"): url = "https:" + url[5:] if self._validate is not None: data["_http_validate"] = self._validate yield Message.Directory, "", data yield Message.Url, url, data def get_info(self, page): """Find image-url and string to get filename from""" def metadata(self, page): """Return additional metadata""" return () def not_found(self, resource=None): raise self.exc.NotFoundError(resource or self.__class__.subcategory) class ImxtoImageExtractor(ImagehostImageExtractor): """Extractor for single images from imx.to""" category = "imxto" pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)" r"/(?:i/|img-)(\w+)(\.html)?)") example = "https://imx.to/i/ID" _params = "simple" _encoding = "utf-8" def __init__(self, match): ImagehostImageExtractor.__init__(self, match) if "/img-" in self.page_url: self.page_url = self.page_url.replace("img.yt", "imx.to") def get_info(self, page): url, pos = text.extract( page, '
", "").replace(" ", "")[:-1] width, _, height = extr(">", " px").partition("x") return { "size" : text.parse_bytes(size), "width" : text.parse_int(width), "height": text.parse_int(height), "hash" : extr(">", ""), } class ImxtoGalleryExtractor(ImagehostImageExtractor): """Extractor for image galleries from imx.to""" category = "imxto" subcategory = "gallery" pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))" example = "https://imx.to/g/ID" def items(self): page = self.request(self.page_url).text title, pos = text.extract(page, '
")[2]).strip(), } params = {"page": 1} while True: for url in text.extract_iter(page, "Last' in page: return params["page"] += 1 page = self.request(self.page_url, params=params).text class AcidimgImageExtractor(ImagehostImageExtractor): """Extractor for single images from acidimg.cc""" category = "acidimg" pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)" example = "https://acidimg.cc/img-abc123.html" _params = "simple" _encoding = "utf-8" def get_info(self, page): url, pos = text.extract(page, "', '', "<")), "gallery_id" : self.groups[2] or extr("&fld_id=", "&"), } del extr while True: gallery = text.extr(page, 'class="gallerys', "") if pos < 0: break qs = text.unescape(text.rextr(page, "href='", "'", pos)) page = self.request(f"{root}/{qs}").text class ImgadultImageExtractor(ImagehostImageExtractor): """Extractor for single images from imgadult.com""" category = "imgadult" _cookies = {"img_i_d": "1"} pattern = r"(?:https?://)?((?:www\.)?imgadult\.com/img-([0-9a-f]+)\.html)" example = "https://imgadult.com/img-0123456789abc.html" def get_info(self, page): url , pos = text.extract(page, "' src='", "'") name, pos = text.extract(page, "alt='", "'", pos) if name: name, _, rhs = name.rpartition(" image hosted at ImgAdult.com") if not name: name = rhs name = text.unescape(name) return url, name class ImgspiceImageExtractor(ImagehostImageExtractor): """Extractor for single images from imgspice.com""" category = "imgspice" pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?#]+))" example = "https://imgspice.com/ID/NAME.EXT.html" def get_info(self, page): pos = page.find('id="imgpreview"') if pos < 0: self.not_found() url , pos = text.extract(page, 'src="', '"', pos) name, pos = text.extract(page, 'alt="', '"', pos) return url, text.unescape(name) class PixhostImageExtractor(ImagehostImageExtractor): """Extractor for single images from pixhost.to""" category = "pixhost" root = "https://pixhost.to" pattern = (r"(?:https?://)?(?:www\.)?pixhost\.(?:to|org)" r"(/show/\d+/(\d+)_[^/?#]+)") example = "https://pixhost.to/show/123/12345_NAME.EXT" _cookies = {"pixhostads": "1", "pixhosttest": "1"} def get_info(self, page): self.kwdict["directory"] = self.page_url.rsplit("/")[-2] url , pos = text.extract(page, "class=\"image-img\" src=\"", "\"") name, pos = text.extract(page, "alt=\"", "\"", pos) return url, text.unescape(name) if name else None class PixhostGalleryExtractor(ImagehostImageExtractor): """Extractor for image galleries from pixhost.to""" category = "pixhost" subcategory = "gallery" root = "https://pixhost.to" pattern = (r"(?:https?://)?(?:www\.)?pixhost\.(?:to|org)" r"(/gallery/([^/?#]+))") example = "https://pixhost.to/gallery/ID" def items(self): page = text.extr(self.request( self.page_url).text, 'class="images"', "
") data = {"_extractor": PixhostImageExtractor} for url in text.extract_iter(page, '', '<', pos) return url, text.unescape(filename) if filename else None class PostimgGalleryExtractor(ImagehostImageExtractor): """Extractor for images galleries from postimages.org""" category = "postimg" subcategory = "gallery" root = "https://postimg.cc" pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)" r"\.(?:cc|org)(/gallery/([^/?#]+))") example = "https://postimg.cc/gallery/ID" def items(self): page = self.request(self.page_url).text title = text.extr( page, 'property="og:title" content="', ' — Postimages"') data = { "_extractor" : PostimgImageExtractor, "gallery_title": text.unescape(title), } for token in text.extract_iter(page, 'data-image="', '"'): url = f"{self.root}/{token}" yield Message.Queue, url, data class TurboimagehostImageExtractor(ImagehostImageExtractor): """Extractor for single images from www.turboimagehost.com""" category = "turboimagehost" pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com" r"/p/(\d+)/[^/?#]+\.html)") example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html" def get_info(self, page): url = text.extract(page, 'src="', '"', page.index("') date, pos = text.extract(page, '', 'by', pos) user, pos = text.extract(page, '>', '<', pos) date = date.split() return { "date": self.parse_datetime_iso(f"{date[0][:10]} {date[1]}"), "user": text.unescape(user), }