diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 157b3768..25a05b60 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -343,12 +343,6 @@ Consider all listed sites to potentially be NSFW. Chapters, Manga - - HentaiFox - https://hentaifox.com/ - Galleries, Search Results - - HentaiHand https://hentaihand.com/ @@ -1351,6 +1345,24 @@ Consider all listed sites to potentially be NSFW. Galleries, Search Results, Tag Searches + + HentaiFox + https://hentaifox.com/ + Galleries, Search Results, Tag Searches + + + + HentaiEnvy + https://hentaienvy.com/ + Galleries, Search Results, Tag Searches + + + + HentaiZap + https://hentaizap.com/ + Galleries, Search Results, Tag Searches + + jschan Imageboards diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 8198619e..87c37981 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -68,7 +68,6 @@ modules = [ "hentai2read", "hentaicosplays", "hentaifoundry", - "hentaifox", "hentaihand", "hentaihere", "hentainexus", diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py deleted file mode 100644 index 31a302d1..00000000 --- a/gallery_dl/extractor/hentaifox.py +++ /dev/null @@ -1,119 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2019-2023 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://hentaifox.com/""" - -from .common import GalleryExtractor, Extractor, Message -from .. import text, util - - -class HentaifoxBase(): - """Base class for hentaifox extractors""" - category = "hentaifox" - root = "https://hentaifox.com" - - -class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): - """Extractor for image galleries on hentaifox.com""" - pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))" - example = "https://hentaifox.com/gallery/12345/" - - def __init__(self, match): - GalleryExtractor.__init__(self, match) - self.gallery_id = match.group(2) - - @staticmethod - def _split(txt): - return [ - text.remove_html(tag.partition(">")[2], "", "") - for tag in text.extract_iter( - txt, "class='tag_btn", "') - - yield { - "url" : text.urljoin(self.root, url), - "gallery_id": text.parse_int( - url.strip("/").rpartition("/")[2]), - "title" : text.unescape(title), - "_extractor": HentaifoxGalleryExtractor, - } - - pos = page.find(">Next<") - url = text.rextract(page, "href=", ">", pos)[0] - if pos == -1 or "/pag" not in url: - return - num += 1 diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py index 0439f5be..1b0fba39 100644 --- a/gallery_dl/extractor/imhentai.py +++ b/gallery_dl/extractor/imhentai.py @@ -22,10 +22,15 @@ class ImhentaiExtractor(BaseExtractor): while True: page = self.request(url).text + + pos = page.find('class="ranking_list"') + if pos >= 0: + page = page[:pos] + extr = text.extract_from(page) while True: - gallery_id = extr('", "<") + title_alt = extr('class="subtitle">', "<") + end = "" if extr('" data = { "gallery_id": text.parse_int(self.gallery_id), - "title" : text.unescape(extr("

", "<")), - "title_alt" : text.unescape(extr('class="subtitle">', "<")), - "parody" : self._split(extr(">Parodies", "")), - "character" : self._split(extr(">Characters", "")), - "tags" : self._split(extr(">Tags", "")), - "artist" : self._split(extr(">Artists", "")), - "group" : self._split(extr(">Groups", "")), - "language" : self._split(extr(">Languages", "")), + "title" : text.unescape(title), + "title_alt" : text.unescape(title_alt), + "parody" : self._split(extr(">Parodies", end)), + "character" : self._split(extr(">Characters", end)), + "tags" : self._split(extr(">Tags", end)), + "artist" : self._split(extr(">Artists", end)), + "group" : self._split(extr(">Groups", end)), + "language" : self._split(extr(">Languages", end)), "type" : extr("href='/category/", "/"), } @@ -94,10 +114,12 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): def _split(self, html): results = [] for tag in text.extract_iter(html, ">", ""): - tag = tag.partition(" ")[0] - if "<" in tag: - tag = text.remove_html(tag) + badge = ("badge'>" in tag or "class='badge" in tag) + tag = text.remove_html(tag) + if badge: + tag = tag.rpartition(" ")[0] results.append(tag) + results.sort() return results def images(self, page): @@ -132,9 +154,9 @@ class ImhentaiTagExtractor(ImhentaiExtractor): class ImhentaiSearchExtractor(ImhentaiExtractor): """Extractor for imhentai search results""" subcategory = "search" - pattern = BASE_PATTERN + r"/search/?\?([^#]+)" + pattern = BASE_PATTERN + r"/search(/?\?[^#]+|/[^/?#]+/?)" example = "https://imhentai.xxx/search/?key=QUERY" def items(self): - url = self.root + "/search/?" + self.groups[-1] + url = self.root + "/search" + self.groups[-1] return self._pagination(url) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 07961a43..9f16b506 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -61,6 +61,7 @@ CATEGORY_MAP = { "hbrowse" : "HBrowse", "hentai2read" : "Hentai2Read", "hentaicosplay" : "Hentai Cosplay", + "hentaienvy" : "HentaiEnvy", "hentaiera" : "HentaiEra", "hentaifoundry" : "Hentai Foundry", "hentaifox" : "HentaiFox", @@ -69,6 +70,7 @@ CATEGORY_MAP = { "hentaiimg" : "Hentai Image", "hentainexus" : "HentaiNexus", "hentairox" : "HentaiRox", + "hentaizap" : "HentaiZap", "hiperdex" : "HiperDEX", "hitomi" : "Hitomi.la", "horne" : "horne", diff --git a/test/results/hentaienvy.py b/test/results/hentaienvy.py new file mode 100644 index 00000000..c24b1b1a --- /dev/null +++ b/test/results/hentaienvy.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import imhentai + + +__tests__ = ( +{ + "#url" : "https://hentaienvy.com/gallery/12/", + "#category": ("IMHentai", "hentaienvy", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m1\.hentaienvy\.com/001/3x907ntq18/\d+\.jpg", + "#count" : 94, + + "count" : 94, + "extension" : "jpg", + "filename" : str, + "gallery_id": 12, + "lang" : "en", + "num" : range(1, 94), + "title" : "(C67) [Studio Kimigabuchi (Kimimaru)] RE-TAKE 2 (Neon Genesis Evangelion) [English]", + "title_alt" : "", + "type" : "doujinshi", + "width" : {835, 838, 841, 1200}, + "height" : {862, 865, 1200}, + + "artist": [ + "kimimaru | entokkun", + ], + "character": [ + "asuka langley soryu", + "gendo ikari", + "makoto hyuga", + "maya ibuki", + "misato katsuragi", + "rei ayanami", + "shigeru aoba", + "shinji ikari", + ], + "group": [ + "studio kimigabuchi", + ], + "language": [ + "english", + "translated", + ], + "parody": [ + "neon genesis evangelion | shin seiki evangelion", + ], + "tags": [ + "multi-work series", + "schoolboy uniform", + "schoolgirl uniform", + "sole female", + "sole male", + "story arc", + "twintails", + ], +}, + +{ + "#url" : "https://hentaienvy.com/gallery/1293743/", + "#category": ("IMHentai", "hentaienvy", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m9\.hentaienvy\.com/029/tk70aw8b4y/\d+\.webp", + "#count" : 25, + + "count" : 25, + "num" : range(1, 25), + "extension" : "webp", + "filename" : str, + "gallery_id": 1293743, + "lang" : "ru", + "title" : "(C102) [Koniro Kajitsu (KonKa)] Konbucha wa Ikaga desu ka | Хотите немного чая из водорослей? (Blue Archive) [Russian] [graun]", + "title_alt" : "", + "type" : "doujinshi", + "width" : 1280, + "height" : range(1804, 1832), + + "artist": [ + "konka", + ], + "character": [ + "nagisa kirifuji", + "sensei", + ], + "group": [ + "koniro kajitsu", + ], + "language": [ + "russian", + "translated", + ], + "parody": [ + "blue archive", + ], + "tags": [ + "angel", + "defloration", + "halo", + "kissing", + "pantyhose", + "sole female", + "sole male", + "wings", + ], +}, + +{ + "#url" : "https://hentaienvy.com/artist/asutora/", + "#category": ("IMHentai", "hentaienvy", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(45, 50), +}, + +{ + "#url" : "https://hentaienvy.com/search/?s_key=asutora", + "#category": ("IMHentai", "hentaienvy", "search"), + "#class" : imhentai.ImhentaiSearchExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(45, 50), +}, + +) diff --git a/test/results/hentaifox.py b/test/results/hentaifox.py index 123bd277..2822e5df 100644 --- a/test/results/hentaifox.py +++ b/test/results/hentaifox.py @@ -4,24 +4,62 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from gallery_dl.extractor import hentaifox +from gallery_dl.extractor import imhentai __tests__ = ( { "#url" : "https://hentaifox.com/gallery/56622/", - "#category": ("", "hentaifox", "gallery"), - "#class" : hentaifox.HentaifoxGalleryExtractor, - "#pattern" : r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg", - "#count" : 24, - "#sha1_metadata": "bcd6b67284f378e5cc30b89b761140e3e60fcd92", + "#category": ("IMHentai", "hentaifox", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg", + "#count" : 24, + + "count" : 24, + "extension" : "jpg", + "filename" : str, + "gallery_id": 56622, + "width" : 1143, + "height" : 1600, + "lang" : "en", + "num" : range(1, 24), + "title" : "TSF no F no Hon Sono 3 no B - Ch.1", + "title_alt" : "", + "type" : "doujinshi", + + "artist" : [ + "taniyaraku", + ], + "character" : [], + "group" : [ + "tsf no f", + ], + "language" : [ + "english", + "translated", + ], + "parody" : [ + "original", + ], + "tags" : [ + "breast expansion", + "clothed male nude female", + "fingering", + "full censorship", + "gender bender", + "glasses", + "mind break", + "sole female", + "sole male", + "transformation", + ], }, { "#url" : "https://hentaifox.com/gallery/630/", "#comment" : "'split_tag' element (#1378)", - "#category": ("", "hentaifox", "gallery"), - "#class" : hentaifox.HentaifoxGalleryExtractor, + "#category": ("IMHentai", "hentaifox", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, "artist" : [ "beti", @@ -29,7 +67,7 @@ __tests__ = ( "magi", "mimikaki", ], - "characters": [ + "character": [ "aerith gainsborough", "tifa lockhart", "yuffie kisaragi", @@ -54,8 +92,8 @@ __tests__ = ( { "#url" : "https://hentaifox.com/gallery/35261/", "#comment" : "email-protected title (#4201)", - "#category": ("", "hentaifox", "gallery"), - "#class" : hentaifox.HentaifoxGalleryExtractor, + "#category": ("IMHentai", "hentaifox", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, "gallery_id": 35261, "title" : "ManageM@ster!", @@ -65,44 +103,48 @@ __tests__ = ( { "#url" : "https://hentaifox.com/parody/touhou-project/", - "#category": ("", "hentaifox", "search"), - "#class" : hentaifox.HentaifoxSearchExtractor, + "#category": ("IMHentai", "hentaifox", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, }, { "#url" : "https://hentaifox.com/character/reimu-hakurei/", - "#category": ("", "hentaifox", "search"), - "#class" : hentaifox.HentaifoxSearchExtractor, + "#category": ("IMHentai", "hentaifox", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, }, { "#url" : "https://hentaifox.com/artist/distance/", - "#category": ("", "hentaifox", "search"), - "#class" : hentaifox.HentaifoxSearchExtractor, -}, - -{ - "#url" : "https://hentaifox.com/search/touhou/", - "#category": ("", "hentaifox", "search"), - "#class" : hentaifox.HentaifoxSearchExtractor, + "#category": ("IMHentai", "hentaifox", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, }, { "#url" : "https://hentaifox.com/group/v-slash/", - "#category": ("", "hentaifox", "search"), - "#class" : hentaifox.HentaifoxSearchExtractor, + "#category": ("IMHentai", "hentaifox", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, }, { "#url" : "https://hentaifox.com/tag/heterochromia/", - "#category": ("", "hentaifox", "search"), - "#class" : hentaifox.HentaifoxSearchExtractor, - "#pattern" : hentaifox.HentaifoxGalleryExtractor.pattern, - "#count" : ">= 60", + "#category": ("IMHentai", "hentaifox", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(180, 220), +}, - "url" : str, - "gallery_id": int, - "title" : str, +{ + "#url" : "https://hentaifox.com/search/?q=touhou+filming", + "#category": ("IMHentai", "hentaifox", "search"), + "#class" : imhentai.ImhentaiSearchExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(20, 30), +}, + +{ + "#url" : "https://hentaifox.com/search/touhou/", + "#category": ("IMHentai", "hentaifox", "search"), + "#class" : imhentai.ImhentaiSearchExtractor, }, ) diff --git a/test/results/hentaizap.py b/test/results/hentaizap.py new file mode 100644 index 00000000..b2a0cd51 --- /dev/null +++ b/test/results/hentaizap.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import imhentai + + +__tests__ = ( +{ + "#url" : "https://hentaizap.com/gallery/12/", + "#category": ("IMHentai", "hentaizap", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m1\.hentaizap\.com/001/3x907ntq18/\d+\.jpg", + "#count" : 94, + + "count" : 94, + "extension" : "jpg", + "filename" : str, + "gallery_id": 12, + "lang" : "en", + "num" : range(1, 94), + "title" : "(C67) [Studio Kimigabuchi (Kimimaru)] RE-TAKE 2 (Neon Genesis Evangelion) [English]", + "title_alt" : "", + "type" : "doujinshi", + "width" : {835, 838, 841, 1200}, + "height" : {862, 865, 1200}, + + "artist": [ + "kimimaru | entokkun", + ], + "character": [ + "asuka langley soryu", + "gendo ikari", + "makoto hyuga", + "maya ibuki", + "misato katsuragi", + "rei ayanami", + "shigeru aoba", + "shinji ikari", + ], + "group": [ + "studio kimigabuchi", + ], + "language": [ + "english", + "translated", + ], + "parody": [ + "neon genesis evangelion | shin seiki evangelion", + ], + "tags": [ + "multi-work series", + "schoolboy uniform", + "schoolgirl uniform", + "sole female", + "sole male", + "story arc", + "twintails", + ], +}, + +{ + "#url" : "https://hentaizap.com/gallery/1329498/", + "#category": ("IMHentai", "hentaizap", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m9\.hentaizap\.com/029/tk70aw8b4y/\d+\.webp", + "#count" : 25, + + "count" : 25, + "num" : range(1, 25), + "extension" : "webp", + "filename" : str, + "gallery_id": 1329498, + "lang" : "ru", + "title" : "(C102) [Koniro Kajitsu (KonKa)] Konbucha wa Ikaga desu ka | Хотите немного чая из водорослей? (Blue Archive) [Russian] [graun]", + "title_alt" : "", + "type" : "doujinshi", + "width" : 1280, + "height" : range(1804, 1832), + + "artist": [ + "konka", + ], + "character": [ + "nagisa kirifuji", + "sensei", + ], + "group": [ + "koniro kajitsu", + ], + "language": [ + "russian", + "translated", + ], + "parody": [ + "blue archive", + ], + "tags": [ + "angel", + "defloration", + "halo", + "kissing", + "pantyhose", + "sole female", + "sole male", + "wings", + ], +}, + +{ + "#url" : "https://hentaizap.com/artist/asutora/", + "#category": ("IMHentai", "hentaizap", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(45, 50), +}, + +{ + "#url" : "https://hentaizap.com/search/?key=asutora", + "#category": ("IMHentai", "hentaizap", "search"), + "#class" : imhentai.ImhentaiSearchExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(45, 50), +}, + +) diff --git a/test/results/imhentai.py b/test/results/imhentai.py index 75c58eb6..e6ab1e4f 100644 --- a/test/results/imhentai.py +++ b/test/results/imhentai.py @@ -119,7 +119,7 @@ __tests__ = ( "#category": ("IMHentai", "imhentai", "tag"), "#class" : imhentai.ImhentaiTagExtractor, "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, - "#count" : range(30, 50), + "#count" : range(45, 50), }, { @@ -127,7 +127,7 @@ __tests__ = ( "#category": ("IMHentai", "imhentai", "search"), "#class" : imhentai.ImhentaiSearchExtractor, "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, - "#count" : range(30, 50), + "#count" : range(45, 50), }, )