diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 026b5c31..170e8483 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -439,6 +439,12 @@ Consider all listed sites to potentially be NSFW. Albums, Favorites, Favorites Folders, Galleries, individual Images, Search Results, Subreddits, Tag Searches, User Profiles + + IMHentai + https://imhentai.xxx/ + Galleries, Search Results, Tag Searches + + Imxto https://imx.to/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fc8d7b20..faefa951 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -80,6 +80,7 @@ modules = [ "imgbox", "imgth", "imgur", + "imhentai", "inkbunny", "instagram", "issuu", diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py new file mode 100644 index 00000000..bba43da0 --- /dev/null +++ b/gallery_dl/extractor/imhentai.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://imhentai.xxx/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?imhentai\.xxx" + + +class ImhentaiExtractor(Extractor): + category = "imhentai" + root = "https://imhentai.xxx" + + def _pagination(self, url): + base = self.root + "/gallery/" + data = {"_extractor": ImhentaiGalleryExtractor} + + while True: + page = self.request(url).text + extr = text.extract_from(page) + + while True: + gallery_id = extr('", "<")), + "title_alt" : text.unescape(extr('class="subtitle">', "<")), + "parody" : self._split(extr(">Parodies:", "")), + "character" : self._split(extr(">Characters:", "")), + "tags" : self._split(extr(">Tags:", "")), + "artist" : self._split(extr(">Artists:", "")), + "group" : self._split(extr(">Groups:", "")), + "language" : self._split(extr(">Languages:", "")), + "type" : text.remove_html(extr(">Category:", "", ""): + tag = tag.partition(" ")[0] + if "<" in tag: + tag = text.remove_html(tag) + results.append(tag) + return results + + def images(self, _): + url = "{}/view/{}/1/".format(self.root, self.gallery_id) + page = self.request(url).text + data = util.json_loads(text.extr(page, "$.parseJSON('", "'")) + base = text.extr(page, 'data-src="', '"').rpartition("/")[0] + "/" + exts = {"j": "jpg", "p": "png", "g": "gif", "w": "webp", "a": "avif"} + + results = [] + for i in map(str, range(1, len(data)+1)): + ext, width, height = data[i].split(",") + url = base + i + "." + exts[ext] + results.append((url, { + "width" : text.parse_int(width), + "height": text.parse_int(height), + })) + return results + + +class ImhentaiTagExtractor(ImhentaiExtractor): + """Extractor for imhentai tag searches""" + subcategory = "tag" + pattern = (BASE_PATTERN + r"(/(?:" + r"artist|category|character|group|language|parody|tag" + r")/([^/?#]+))") + example = "https://imhentai.xxx/tag/TAG/" + + def items(self): + url = self.root + self.groups[0] + "/" + return self._pagination(url) + + +class ImhentaiSearchExtractor(ImhentaiExtractor): + """Extractor for imhentai search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" + example = "https://imhentai.xxx/search/?key=QUERY" + + def items(self): + url = self.root + "/search/?" + self.groups[0] + return self._pagination(url) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index bc4dbe61..fdc84a43 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -79,6 +79,7 @@ CATEGORY_MAP = { "imgkiwi" : "IMG.Kiwi", "imgth" : "imgth", "imgur" : "imgur", + "imhentai" : "IMHentai", "joyreactor" : "JoyReactor", "itchio" : "itch.io", "jpgfish" : "JPG Fish", diff --git a/test/results/imhentai.py b/test/results/imhentai.py new file mode 100644 index 00000000..6eee93e5 --- /dev/null +++ b/test/results/imhentai.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import imhentai + + +__tests__ = ( +{ + "#url" : "https://imhentai.xxx/gallery/12/", + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern": r"https://m1\.imhentai\.xxx/001/3x907ntq18/\d+\.jpg", + "#count" : 94, + + "count" : 94, + "extension" : "jpg", + "filename" : str, + "gallery_id": 12, + "lang" : "en", + "num" : range(1, 94), + "title" : "(C67) [Studio Kimigabuchi (Kimimaru)] RE-TAKE 2 (Neon Genesis Evangelion) [English]", + "title_alt" : "(C67) [スタジオKIMIGABUCHI (きみまる)] RE-TAKE2 (新世紀エヴァンゲリオン) [英訳]", + "type" : "doujinshi", + "width" : {835, 838, 841, 1200}, + "height" : {862, 865, 1200}, + + "artist": [ + "kimimaru | entokkun", + ], + "character": [ + "asuka langley soryu", + "gendo ikari", + "makoto hyuga", + "maya ibuki", + "misato katsuragi", + "rei ayanami", + "shigeru aoba", + "shinji ikari", + ], + "group": [ + "studio kimigabuchi", + ], + "language": [ + "english", + "translated", + ], + "parody": [ + "neon genesis evangelion | shin seiki evangelion", + ], + "tags": [ + "multi-work series", + "schoolboy uniform", + "schoolgirl uniform", + "sole female", + "sole male", + "story arc", + "twintails", + ], +}, + +{ + "#url" : "https://imhentai.xxx/gallery/1396508/", + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern": r"https://m9\.imhentai\.xxx/028/po9f4w3jzx/\d+\.webp", + "#count" : 34, + + "count" : 34, + "extension" : "webp", + "filename" : str, + "gallery_id": 1396508, + "lang" : "ko", + "num" : range(1, 34), + "title" : "[Beruennea (Skylader)] Tada no Kouhai ni Natta Kimi | 그냥 후배가 돼 버린 너 [Korean] [Digital]", + "title_alt" : "[ベルエンネーア (すかいれーだー)] ただの後輩になった君 [韓国翻訳] [DL版]", + "type" : "doujinshi", + "width" : 1280, + "height" : {1790, 1791}, + + "artist": [ + "skylader", + ], + "character": [], + "group": [ + "beruennea", + ], + "language": [ + "korean", + "translated", + ], + "parody": [ + "original", + ], + "tags": [ + "ahegao", + "big ass", + "big breasts", + "big nipples", + "big penis", + "bike shorts", + "blowjob", + "gokkun", + "hairy", + "huge breasts", + "mosaic censorship", + "muscle", + "nakadashi", + "netorare", + "schoolgirl uniform", + "tanlines", + ], +}, + +{ + "#url" : "https://imhentai.xxx/artist/asutora/", + "#class" : imhentai.ImhentaiTagExtractor, + "#pattern": imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(30, 50), +}, + +{ + "#url" : "https://imhentai.xxx/search/?lt=1&pp=0&m=1&d=1&w=1&i=1&a=1&g=1&key=asutora&apply=Search&en=1&jp=1&es=1&fr=1&kr=1&de=1&ru=1&dl=0&tr=0", + "#class" : imhentai.ImhentaiSearchExtractor, + "#pattern": imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(30, 50), +}, + +)