diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py index f9100879..3974b5f9 100644 --- a/gallery_dl/extractor/hentaicosplays.py +++ b/gallery_dl/extractor/hentaicosplays.py @@ -4,29 +4,23 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -""" -Extractor for https://hentai-cosplays.com/ -(also works for hentai-img.com and porn-images-xxx.com) -""" +"""Extractors for https://hentai-cosplays.com/ +(also works for hentai-img.com and porn-images-xxx.com)""" -from .common import Extractor, Message +from .common import GalleryExtractor from .. import text -class HentaicosplaysGalleryExtractor(Extractor): - """ - Extractor for image galleries from hentai-cosplays.com, hentai-img.com, - and porn-images-xxx.com - """ +class HentaicosplaysGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from + hentai-cosplays.com, hentai-img.com, and porn-images-xxx.com""" category = "hentaicosplays" - subcategory = "gallery" directory_fmt = ("{site}", "{title}") filename_fmt = "{filename}.{extension}" archive_fmt = "{title}_{filename}" - root = "https://hentai-cosplays.com" - pattern = r"(?:https?://)?(?:\w{2}.)?" \ - r"(hentai-cosplays|hentai-img|porn-images-xxx)\.com/" \ - r"(?:image|story)/([\w-]+)(/\w+/\d+)?" + pattern = r"((?:https?://)?(?:\w{2}\.)?" \ + r"(hentai-cosplays|hentai-img|porn-images-xxx)\.com)/" \ + r"(?:image|story)/([\w-]+)" test = ( ("https://hentai-cosplays.com/image/---devilism--tide-kurihara-/", { "pattern": r"https://static\d?.hentai-cosplays.com/upload/" @@ -58,27 +52,21 @@ class HentaicosplaysGalleryExtractor(Extractor): ) def __init__(self, match): - Extractor.__init__(self, match) - self.site = match.group(1) - self.title = match.group(2) - - def items(self): - url = "https://{}.com/story/{}/".format( - self.site, self.title) - page = self.request(url).text - data = self.metadata(page) - images = text.extract_iter(page, - '", "")[0] - title, _, _ = title.rpartition(" Story Viewer - ") return { - "title": title, - "site": self.site, + "title": text.unescape(title.rpartition(" Story Viewer - ")[0]), + "site" : self.site, } + + def images(self, page): + return [ + (url, None) + for url in text.extract_iter( + page, '