[hentairox] add support (#7003)

This commit is contained in:
Mike Fährmann
2025-02-18 21:21:15 +01:00
parent 95c446fcd1
commit 82493a6672
6 changed files with 184 additions and 3 deletions

View File

@@ -355,6 +355,12 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries, Search Results</td>
<td></td>
</tr>
<tr>
<td>HentaiRox</td>
<td>https://hentairox.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>HiperDEX</td>
<td>https://hiperdex.com/</td>

View File

@@ -70,6 +70,7 @@ modules = [
"hentaihand",
"hentaihere",
"hentainexus",
"hentairox",
"hiperdex",
"hitomi",
"hotleak",

View File

@@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hentairox.com/"""
from . import imhentai
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
class HentairoxExtractor():
category = "hentairox"
root = "https://hentairox.com"
class HentairoxGalleryExtractor(
HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
"""Extractor for hentairox galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://hentairox.com/gallery/12345/"
class HentairoxTagExtractor(
HentairoxExtractor, imhentai.ImhentaiTagExtractor):
"""Extractor for hentairox tag searches"""
subcategory = "tag"
pattern = (BASE_PATTERN + r"(/(?:"
r"artist|category|character|group|language|parody|tag"
r")/([^/?#]+))")
example = "https://hentairox.com/tag/TAG/"
class HentairoxSearchExtractor(
HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
"""Extractor for hentairox search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
example = "https://hentairox.com/search/?key=QUERY"
HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor

View File

@@ -20,7 +20,7 @@ class ImhentaiExtractor(Extractor):
def _pagination(self, url):
base = self.root + "/gallery/"
data = {"_extractor": ImhentaiGalleryExtractor}
data = {"_extractor": self._gallery_extractor}
while True:
page = self.request(url).text
@@ -36,7 +36,12 @@ class ImhentaiExtractor(Extractor):
href = text.rextract(page, "class='page-link' href='", "'")[0]
if not href or href == "#":
return
url = text.ensure_http_scheme(href)
if href[0] == "/":
if href[1] == "/":
href = "https:" + href
else:
href = self.root + href
url = href
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
@@ -62,7 +67,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
"artist" : self._split(extr(">Artists:</span>", "</li>")),
"group" : self._split(extr(">Groups:</span>", "</li>")),
"language" : self._split(extr(">Languages:</span>", "</li>")),
"type" : text.remove_html(extr(">Category:</span>", "<span")),
"type" : extr("href='/category/", "/"),
}
if data["language"]:
@@ -117,3 +122,6 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
def items(self):
url = self.root + "/search/?" + self.groups[0]
return self._pagination(url)
ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor

View File

@@ -66,6 +66,7 @@ CATEGORY_MAP = {
"hentaihere" : "HentaiHere",
"hentaiimg" : "Hentai Image",
"hentainexus" : "HentaiNexus",
"hentairox" : "HentaiRox",
"hiperdex" : "HiperDEX",
"hitomi" : "Hitomi.la",
"horne" : "horne",

119
test/results/hentairox.py Normal file
View File

@@ -0,0 +1,119 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import hentairox
__tests__ = (
{
"#url" : "https://hentairox.com/gallery/25/",
"#class" : hentairox.HentairoxGalleryExtractor,
"#pattern": r"https://m1\.hentairox\.com/001/knrxtga49v/\d+\.jpg",
"#count" : 25,
"count" : 25,
"extension" : "jpg",
"filename" : r"re:\d+",
"gallery_id": 25,
"lang" : "ja",
"num" : range(1, 25),
"title" : "(Shikei wa Iyadakara na) [Kujira Logic, TOYBOX (Kujiran, Kurikara)] Gensou-kyou Chichi Zukan - Kurenai (Touhou Project)",
"title_alt" : "(死刑はいやだからな) [くぢらろじっく, といぼっくす (くぢらん, くりから)] 幻想郷乳図鑑 - 紅 (東方Project)",
"type" : "doujinshi",
"width" : {696, 701},
"height" : {999, 1000},
"artist": [
"kujiran",
"kurikara",
],
"character": [
"hong meiling",
"koakuma",
"patchouli knowledge",
"remilia scarlet",
"sakuya izayoi",
],
"group": [
"kujira logic",
"toybox",
],
"language": [
"japanese",
],
"parody": [
"touhou project",
],
"tags": [
"big breasts",
"footjob",
"futanari",
"lolicon",
"maid",
"paizuri",
],
},
{
"#url" : "https://hentairox.com/gallery/8526/",
"#class" : hentairox.HentairoxGalleryExtractor,
"#pattern": r"https://m1\.hentairox\.com/001/gkchsf3x5m/\d+\.jpg",
"#count" : 8,
"count" : 8,
"extension" : "jpg",
"filename" : r"re:\d+",
"gallery_id": 8526,
"lang" : "ja",
"num" : range(1, 8),
"title" : "(C70) [UDON-YA (Kizuki Aruchu, ZAN)] Udonko CM70 Omake Hon (Various)",
"title_alt" : "(C70) [うどんや (鬼月あるちゅ、ZAN)] うどんこ CM70オマケ本 (よろず)",
"type" : "doujinshi",
"width" : 1076,
"height" : 1517,
"artist": [
"kizuki aruchu",
"zan",
],
"character": [
"mikuru asahina",
"reisen udongein inaba",
"tsuruya",
],
"group": [
"udon-ya",
],
"language": [
"japanese",
],
"parody": [
"fate stay night",
"super robot wars | super robot taisen",
"the melancholy of haruhi suzumiya | suzumiya haruhi no yuuutsu",
],
"tags": [
"big breasts",
"okaasan to issho",
"touhou kaeidzuka",
],
},
{
"#url" : "https://hentairox.com/artist/kizuki-aruchu/",
"#class" : hentairox.HentairoxTagExtractor,
"#pattern": hentairox.HentairoxGalleryExtractor.pattern,
"#count" : range(140, 160),
},
{
"#url" : "https://hentairox.com/search/?key=aruchu",
"#class" : hentairox.HentairoxSearchExtractor,
"#pattern": hentairox.HentairoxGalleryExtractor.pattern,
"#count" : range(140, 160),
},
)