# -*- coding: utf-8 -*- # Copyright 2016-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Collection of extractors for various imagehosts""" from .common import Extractor, Message from .. import text, exception from ..cache import memcache class ImagehostImageExtractor(Extractor): """Base class for single-image extractors for various imagehosts""" basecategory = "imagehost" subcategory = "image" archive_fmt = "{token}" parent = True _params = None _cookies = None _encoding = None _validate = None def __init__(self, match): Extractor.__init__(self, match) self.page_url = (self.root or "https://") + match[1] self.token = match[2] if self._params == "simple": self._params = { "imgContinue": "Continue+to+image+...+", } elif self._params == "complex": self._params = { "op": "view", "id": self.token, "pre": "1", "adb": "1", "next": "Continue+to+image+...+", } def items(self): page = self.request( self.page_url, method=("POST" if self._params else "GET"), data=self._params, cookies=self._cookies, encoding=self._encoding, ).text url, filename = self.get_info(page) if not url: return if filename: data = text.nameext_from_name(filename) if not data["extension"]: data["extension"] = text.ext_from_url(url) else: data = text.nameext_from_url(url) data["token"] = self.token data["post_url"] = self.page_url data.update(self.metadata(page)) if url.startswith("http:"): url = "https:" + url[5:] if self._validate is not None: data["_http_validate"] = self._validate yield Message.Directory, "", data yield Message.Url, url, data def get_info(self, page): """Find image-url and string to get filename from""" def metadata(self, page): """Return additional metadata""" return () def not_found(self, resource=None): raise exception.NotFoundError(resource or self.__class__.subcategory) class ImxtoImageExtractor(ImagehostImageExtractor): """Extractor for single images from imx.to""" category = "imxto" pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)" r"/(?:i/|img-)(\w+)(\.html)?)") example = "https://imx.to/i/ID" _params = "simple" _encoding = "utf-8" def __init__(self, match): ImagehostImageExtractor.__init__(self, match) if "/img-" in self.page_url: self.page_url = self.page_url.replace("img.yt", "imx.to") def get_info(self, page): url, pos = text.extract( page, '