[imhentai] support 'hentaienvy.com' and 'hentaizap.com' (#7192 #7218)

and move 'hentaifox' support to this module as well
This commit is contained in:
Mike Fährmann
2025-03-24 15:12:58 +01:00
parent b3500b41dd
commit 7a6899c647
9 changed files with 388 additions and 174 deletions

View File

@@ -343,12 +343,6 @@ Consider all listed sites to potentially be NSFW.
<td>Chapters, Manga</td>
<td></td>
</tr>
<tr>
<td>HentaiFox</td>
<td>https://hentaifox.com/</td>
<td>Galleries, Search Results</td>
<td></td>
</tr>
<tr>
<td>HentaiHand</td>
<td>https://hentaihand.com/</td>
@@ -1351,6 +1345,24 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>HentaiFox</td>
<td>https://hentaifox.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>HentaiEnvy</td>
<td>https://hentaienvy.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>HentaiZap</td>
<td>https://hentaizap.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>
<td colspan="4"><strong>jschan Imageboards</strong></td>

View File

@@ -68,7 +68,6 @@ modules = [
"hentai2read",
"hentaicosplays",
"hentaifoundry",
"hentaifox",
"hentaihand",
"hentaihere",
"hentainexus",

View File

@@ -1,119 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hentaifox.com/"""
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
class HentaifoxBase():
"""Base class for hentaifox extractors"""
category = "hentaifox"
root = "https://hentaifox.com"
class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
"""Extractor for image galleries on hentaifox.com"""
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
example = "https://hentaifox.com/gallery/12345/"
def __init__(self, match):
GalleryExtractor.__init__(self, match)
self.gallery_id = match.group(2)
@staticmethod
def _split(txt):
return [
text.remove_html(tag.partition(">")[2], "", "")
for tag in text.extract_iter(
txt, "class='tag_btn", "<span class='t_badge")
]
def metadata(self, page):
extr = text.extract_from(page)
split = self._split
return {
"gallery_id": text.parse_int(self.gallery_id),
"parody" : split(extr(">Parodies:" , "</ul>")),
"characters": split(extr(">Characters:", "</ul>")),
"tags" : split(extr(">Tags:" , "</ul>")),
"artist" : split(extr(">Artists:" , "</ul>")),
"group" : split(extr(">Groups:" , "</ul>")),
"type" : text.remove_html(extr(">Category:", "<span")),
"title" : text.unescape(extr(
'id="gallery_title" value="', '"')),
"language" : "English",
"lang" : "en",
}
def images(self, page):
cover, pos = text.extract(page, '<img src="', '"')
data , pos = text.extract(page, "$.parseJSON('", "');", pos)
path = "/".join(cover.split("/")[3:-1])
result = []
append = result.append
extmap = {"j": "jpg", "p": "png", "g": "gif"}
urlfmt = ("/" + path + "/{}.{}").format
server1 = "https://i.hentaifox.com"
server2 = "https://i2.hentaifox.com"
for num, image in util.json_loads(data).items():
ext, width, height = image.split(",")
path = urlfmt(num, extmap[ext])
append((server1 + path, {
"width" : width,
"height" : height,
"_fallback": (server2 + path,),
}))
return result
class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
"""Extractor for search results and listings on hentaifox.com"""
subcategory = "search"
pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)")
example = "https://hentaifox.com/tag/TAG/"
def __init__(self, match):
Extractor.__init__(self, match)
self.path = match.group(1)
def items(self):
for gallery in self.galleries():
yield Message.Queue, gallery["url"], gallery
def galleries(self):
num = 1
while True:
url = "{}{}/pag/{}/".format(self.root, self.path, num)
page = self.request(url).text
for info in text.extract_iter(
page, 'class="g_title"><a href="', '</a>'):
url, _, title = info.partition('">')
yield {
"url" : text.urljoin(self.root, url),
"gallery_id": text.parse_int(
url.strip("/").rpartition("/")[2]),
"title" : text.unescape(title),
"_extractor": HentaifoxGalleryExtractor,
}
pos = page.find(">Next<")
url = text.rextract(page, "href=", ">", pos)[0]
if pos == -1 or "/pag" not in url:
return
num += 1

View File

@@ -22,10 +22,15 @@ class ImhentaiExtractor(BaseExtractor):
while True:
page = self.request(url).text
pos = page.find('class="ranking_list"')
if pos >= 0:
page = page[:pos]
extr = text.extract_from(page)
while True:
gallery_id = extr('<a href="/gallery/', '"')
gallery_id = extr('href="/gallery/', '"')
if gallery_id == prev:
continue
if not gallery_id:
@@ -57,6 +62,18 @@ BASE_PATTERN = ImhentaiExtractor.update({
"root": "https://hentairox.com",
"pattern": r"(?:www\.)?hentairox\.com",
},
"hentaifox": {
"root": "https://hentaifox.com",
"pattern": r"(?:www\.)?hentaifox\.com",
},
"hentaienvy": {
"root": "https://hentaienvy.com",
"pattern": r"(?:www\.)?hentaienvy\.com",
},
"hentaizap": {
"root": "https://hentaizap.com",
"pattern": r"(?:www\.)?hentaizap\.com",
},
})
@@ -72,17 +89,20 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
title = extr("<h1>", "<")
title_alt = extr('class="subtitle">', "<")
end = "</li>" if extr('<ul class="galleries_info', ">") else "</ul>"
data = {
"gallery_id": text.parse_int(self.gallery_id),
"title" : text.unescape(extr("<h1>", "<")),
"title_alt" : text.unescape(extr('class="subtitle">', "<")),
"parody" : self._split(extr(">Parodies", "</li>")),
"character" : self._split(extr(">Characters", "</li>")),
"tags" : self._split(extr(">Tags", "</li>")),
"artist" : self._split(extr(">Artists", "</li>")),
"group" : self._split(extr(">Groups", "</li>")),
"language" : self._split(extr(">Languages", "</li>")),
"title" : text.unescape(title),
"title_alt" : text.unescape(title_alt),
"parody" : self._split(extr(">Parodies", end)),
"character" : self._split(extr(">Characters", end)),
"tags" : self._split(extr(">Tags", end)),
"artist" : self._split(extr(">Artists", end)),
"group" : self._split(extr(">Groups", end)),
"language" : self._split(extr(">Languages", end)),
"type" : extr("href='/category/", "/"),
}
@@ -94,10 +114,12 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
def _split(self, html):
results = []
for tag in text.extract_iter(html, ">", "</a>"):
tag = tag.partition(" <span class='badge'>")[0]
if "<" in tag:
tag = text.remove_html(tag)
badge = ("badge'>" in tag or "class='badge" in tag)
tag = text.remove_html(tag)
if badge:
tag = tag.rpartition(" ")[0]
results.append(tag)
results.sort()
return results
def images(self, page):
@@ -132,9 +154,9 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
class ImhentaiSearchExtractor(ImhentaiExtractor):
"""Extractor for imhentai search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
pattern = BASE_PATTERN + r"/search(/?\?[^#]+|/[^/?#]+/?)"
example = "https://imhentai.xxx/search/?key=QUERY"
def items(self):
url = self.root + "/search/?" + self.groups[-1]
url = self.root + "/search" + self.groups[-1]
return self._pagination(url)

View File

@@ -61,6 +61,7 @@ CATEGORY_MAP = {
"hbrowse" : "HBrowse",
"hentai2read" : "Hentai2Read",
"hentaicosplay" : "Hentai Cosplay",
"hentaienvy" : "HentaiEnvy",
"hentaiera" : "HentaiEra",
"hentaifoundry" : "Hentai Foundry",
"hentaifox" : "HentaiFox",
@@ -69,6 +70,7 @@ CATEGORY_MAP = {
"hentaiimg" : "Hentai Image",
"hentainexus" : "HentaiNexus",
"hentairox" : "HentaiRox",
"hentaizap" : "HentaiZap",
"hiperdex" : "HiperDEX",
"hitomi" : "Hitomi.la",
"horne" : "horne",

128
test/results/hentaienvy.py Normal file
View File

@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import imhentai
__tests__ = (
{
"#url" : "https://hentaienvy.com/gallery/12/",
"#category": ("IMHentai", "hentaienvy", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"#pattern" : r"https://m1\.hentaienvy\.com/001/3x907ntq18/\d+\.jpg",
"#count" : 94,
"count" : 94,
"extension" : "jpg",
"filename" : str,
"gallery_id": 12,
"lang" : "en",
"num" : range(1, 94),
"title" : "(C67) [Studio Kimigabuchi (Kimimaru)] RE-TAKE 2 (Neon Genesis Evangelion) [English]",
"title_alt" : "",
"type" : "doujinshi",
"width" : {835, 838, 841, 1200},
"height" : {862, 865, 1200},
"artist": [
"kimimaru | entokkun",
],
"character": [
"asuka langley soryu",
"gendo ikari",
"makoto hyuga",
"maya ibuki",
"misato katsuragi",
"rei ayanami",
"shigeru aoba",
"shinji ikari",
],
"group": [
"studio kimigabuchi",
],
"language": [
"english",
"translated",
],
"parody": [
"neon genesis evangelion | shin seiki evangelion",
],
"tags": [
"multi-work series",
"schoolboy uniform",
"schoolgirl uniform",
"sole female",
"sole male",
"story arc",
"twintails",
],
},
{
"#url" : "https://hentaienvy.com/gallery/1293743/",
"#category": ("IMHentai", "hentaienvy", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"#pattern" : r"https://m9\.hentaienvy\.com/029/tk70aw8b4y/\d+\.webp",
"#count" : 25,
"count" : 25,
"num" : range(1, 25),
"extension" : "webp",
"filename" : str,
"gallery_id": 1293743,
"lang" : "ru",
"title" : "(C102) [Koniro Kajitsu (KonKa)] Konbucha wa Ikaga desu ka | Хотите немного чая из водорослей? (Blue Archive) [Russian] [graun]",
"title_alt" : "",
"type" : "doujinshi",
"width" : 1280,
"height" : range(1804, 1832),
"artist": [
"konka",
],
"character": [
"nagisa kirifuji",
"sensei",
],
"group": [
"koniro kajitsu",
],
"language": [
"russian",
"translated",
],
"parody": [
"blue archive",
],
"tags": [
"angel",
"defloration",
"halo",
"kissing",
"pantyhose",
"sole female",
"sole male",
"wings",
],
},
{
"#url" : "https://hentaienvy.com/artist/asutora/",
"#category": ("IMHentai", "hentaienvy", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(45, 50),
},
{
"#url" : "https://hentaienvy.com/search/?s_key=asutora",
"#category": ("IMHentai", "hentaienvy", "search"),
"#class" : imhentai.ImhentaiSearchExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(45, 50),
},
)

View File

@@ -4,24 +4,62 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import hentaifox
from gallery_dl.extractor import imhentai
__tests__ = (
{
"#url" : "https://hentaifox.com/gallery/56622/",
"#category": ("", "hentaifox", "gallery"),
"#class" : hentaifox.HentaifoxGalleryExtractor,
"#pattern" : r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
"#count" : 24,
"#sha1_metadata": "bcd6b67284f378e5cc30b89b761140e3e60fcd92",
"#category": ("IMHentai", "hentaifox", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"#pattern" : r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
"#count" : 24,
"count" : 24,
"extension" : "jpg",
"filename" : str,
"gallery_id": 56622,
"width" : 1143,
"height" : 1600,
"lang" : "en",
"num" : range(1, 24),
"title" : "TSF no F no Hon Sono 3 no B - Ch.1",
"title_alt" : "",
"type" : "doujinshi",
"artist" : [
"taniyaraku",
],
"character" : [],
"group" : [
"tsf no f",
],
"language" : [
"english",
"translated",
],
"parody" : [
"original",
],
"tags" : [
"breast expansion",
"clothed male nude female",
"fingering",
"full censorship",
"gender bender",
"glasses",
"mind break",
"sole female",
"sole male",
"transformation",
],
},
{
"#url" : "https://hentaifox.com/gallery/630/",
"#comment" : "'split_tag' element (#1378)",
"#category": ("", "hentaifox", "gallery"),
"#class" : hentaifox.HentaifoxGalleryExtractor,
"#category": ("IMHentai", "hentaifox", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"artist" : [
"beti",
@@ -29,7 +67,7 @@ __tests__ = (
"magi",
"mimikaki",
],
"characters": [
"character": [
"aerith gainsborough",
"tifa lockhart",
"yuffie kisaragi",
@@ -54,8 +92,8 @@ __tests__ = (
{
"#url" : "https://hentaifox.com/gallery/35261/",
"#comment" : "email-protected title (#4201)",
"#category": ("", "hentaifox", "gallery"),
"#class" : hentaifox.HentaifoxGalleryExtractor,
"#category": ("IMHentai", "hentaifox", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"gallery_id": 35261,
"title" : "ManageM@ster!",
@@ -65,44 +103,48 @@ __tests__ = (
{
"#url" : "https://hentaifox.com/parody/touhou-project/",
"#category": ("", "hentaifox", "search"),
"#class" : hentaifox.HentaifoxSearchExtractor,
"#category": ("IMHentai", "hentaifox", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
},
{
"#url" : "https://hentaifox.com/character/reimu-hakurei/",
"#category": ("", "hentaifox", "search"),
"#class" : hentaifox.HentaifoxSearchExtractor,
"#category": ("IMHentai", "hentaifox", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
},
{
"#url" : "https://hentaifox.com/artist/distance/",
"#category": ("", "hentaifox", "search"),
"#class" : hentaifox.HentaifoxSearchExtractor,
},
{
"#url" : "https://hentaifox.com/search/touhou/",
"#category": ("", "hentaifox", "search"),
"#class" : hentaifox.HentaifoxSearchExtractor,
"#category": ("IMHentai", "hentaifox", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
},
{
"#url" : "https://hentaifox.com/group/v-slash/",
"#category": ("", "hentaifox", "search"),
"#class" : hentaifox.HentaifoxSearchExtractor,
"#category": ("IMHentai", "hentaifox", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
},
{
"#url" : "https://hentaifox.com/tag/heterochromia/",
"#category": ("", "hentaifox", "search"),
"#class" : hentaifox.HentaifoxSearchExtractor,
"#pattern" : hentaifox.HentaifoxGalleryExtractor.pattern,
"#count" : ">= 60",
"#category": ("IMHentai", "hentaifox", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(180, 220),
},
"url" : str,
"gallery_id": int,
"title" : str,
{
"#url" : "https://hentaifox.com/search/?q=touhou+filming",
"#category": ("IMHentai", "hentaifox", "search"),
"#class" : imhentai.ImhentaiSearchExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(20, 30),
},
{
"#url" : "https://hentaifox.com/search/touhou/",
"#category": ("IMHentai", "hentaifox", "search"),
"#class" : imhentai.ImhentaiSearchExtractor,
},
)

128
test/results/hentaizap.py Normal file
View File

@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import imhentai
__tests__ = (
{
"#url" : "https://hentaizap.com/gallery/12/",
"#category": ("IMHentai", "hentaizap", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"#pattern" : r"https://m1\.hentaizap\.com/001/3x907ntq18/\d+\.jpg",
"#count" : 94,
"count" : 94,
"extension" : "jpg",
"filename" : str,
"gallery_id": 12,
"lang" : "en",
"num" : range(1, 94),
"title" : "(C67) [Studio Kimigabuchi (Kimimaru)] RE-TAKE 2 (Neon Genesis Evangelion) [English]",
"title_alt" : "",
"type" : "doujinshi",
"width" : {835, 838, 841, 1200},
"height" : {862, 865, 1200},
"artist": [
"kimimaru | entokkun",
],
"character": [
"asuka langley soryu",
"gendo ikari",
"makoto hyuga",
"maya ibuki",
"misato katsuragi",
"rei ayanami",
"shigeru aoba",
"shinji ikari",
],
"group": [
"studio kimigabuchi",
],
"language": [
"english",
"translated",
],
"parody": [
"neon genesis evangelion | shin seiki evangelion",
],
"tags": [
"multi-work series",
"schoolboy uniform",
"schoolgirl uniform",
"sole female",
"sole male",
"story arc",
"twintails",
],
},
{
"#url" : "https://hentaizap.com/gallery/1329498/",
"#category": ("IMHentai", "hentaizap", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"#pattern" : r"https://m9\.hentaizap\.com/029/tk70aw8b4y/\d+\.webp",
"#count" : 25,
"count" : 25,
"num" : range(1, 25),
"extension" : "webp",
"filename" : str,
"gallery_id": 1329498,
"lang" : "ru",
"title" : "(C102) [Koniro Kajitsu (KonKa)] Konbucha wa Ikaga desu ka | Хотите немного чая из водорослей? (Blue Archive) [Russian] [graun]",
"title_alt" : "",
"type" : "doujinshi",
"width" : 1280,
"height" : range(1804, 1832),
"artist": [
"konka",
],
"character": [
"nagisa kirifuji",
"sensei",
],
"group": [
"koniro kajitsu",
],
"language": [
"russian",
"translated",
],
"parody": [
"blue archive",
],
"tags": [
"angel",
"defloration",
"halo",
"kissing",
"pantyhose",
"sole female",
"sole male",
"wings",
],
},
{
"#url" : "https://hentaizap.com/artist/asutora/",
"#category": ("IMHentai", "hentaizap", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(45, 50),
},
{
"#url" : "https://hentaizap.com/search/?key=asutora",
"#category": ("IMHentai", "hentaizap", "search"),
"#class" : imhentai.ImhentaiSearchExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(45, 50),
},
)

View File

@@ -119,7 +119,7 @@ __tests__ = (
"#category": ("IMHentai", "imhentai", "tag"),
"#class" : imhentai.ImhentaiTagExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(30, 50),
"#count" : range(45, 50),
},
{
@@ -127,7 +127,7 @@ __tests__ = (
"#category": ("IMHentai", "imhentai", "search"),
"#class" : imhentai.ImhentaiSearchExtractor,
"#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(30, 50),
"#count" : range(45, 50),
},
)