[hentairox] add support (#7003)
This commit is contained in:
@@ -355,6 +355,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Galleries, Search Results</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>HentaiRox</td>
|
||||
<td>https://hentairox.com/</td>
|
||||
<td>Galleries, Search Results, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>HiperDEX</td>
|
||||
<td>https://hiperdex.com/</td>
|
||||
|
||||
@@ -70,6 +70,7 @@ modules = [
|
||||
"hentaihand",
|
||||
"hentaihere",
|
||||
"hentainexus",
|
||||
"hentairox",
|
||||
"hiperdex",
|
||||
"hitomi",
|
||||
"hotleak",
|
||||
|
||||
46
gallery_dl/extractor/hentairox.py
Normal file
46
gallery_dl/extractor/hentairox.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://hentairox.com/"""
|
||||
|
||||
from . import imhentai
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
|
||||
|
||||
|
||||
class HentairoxExtractor():
|
||||
category = "hentairox"
|
||||
root = "https://hentairox.com"
|
||||
|
||||
|
||||
class HentairoxGalleryExtractor(
|
||||
HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
|
||||
"""Extractor for hentairox galleries"""
|
||||
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
|
||||
example = "https://hentairox.com/gallery/12345/"
|
||||
|
||||
|
||||
class HentairoxTagExtractor(
|
||||
HentairoxExtractor, imhentai.ImhentaiTagExtractor):
|
||||
"""Extractor for hentairox tag searches"""
|
||||
subcategory = "tag"
|
||||
pattern = (BASE_PATTERN + r"(/(?:"
|
||||
r"artist|category|character|group|language|parody|tag"
|
||||
r")/([^/?#]+))")
|
||||
example = "https://hentairox.com/tag/TAG/"
|
||||
|
||||
|
||||
class HentairoxSearchExtractor(
|
||||
HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
|
||||
"""Extractor for hentairox search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
|
||||
example = "https://hentairox.com/search/?key=QUERY"
|
||||
|
||||
|
||||
HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor
|
||||
@@ -20,7 +20,7 @@ class ImhentaiExtractor(Extractor):
|
||||
|
||||
def _pagination(self, url):
|
||||
base = self.root + "/gallery/"
|
||||
data = {"_extractor": ImhentaiGalleryExtractor}
|
||||
data = {"_extractor": self._gallery_extractor}
|
||||
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
@@ -36,7 +36,12 @@ class ImhentaiExtractor(Extractor):
|
||||
href = text.rextract(page, "class='page-link' href='", "'")[0]
|
||||
if not href or href == "#":
|
||||
return
|
||||
url = text.ensure_http_scheme(href)
|
||||
if href[0] == "/":
|
||||
if href[1] == "/":
|
||||
href = "https:" + href
|
||||
else:
|
||||
href = self.root + href
|
||||
url = href
|
||||
|
||||
|
||||
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
|
||||
@@ -62,7 +67,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
|
||||
"artist" : self._split(extr(">Artists:</span>", "</li>")),
|
||||
"group" : self._split(extr(">Groups:</span>", "</li>")),
|
||||
"language" : self._split(extr(">Languages:</span>", "</li>")),
|
||||
"type" : text.remove_html(extr(">Category:</span>", "<span")),
|
||||
"type" : extr("href='/category/", "/"),
|
||||
}
|
||||
|
||||
if data["language"]:
|
||||
@@ -117,3 +122,6 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
|
||||
def items(self):
|
||||
url = self.root + "/search/?" + self.groups[0]
|
||||
return self._pagination(url)
|
||||
|
||||
|
||||
ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor
|
||||
|
||||
@@ -66,6 +66,7 @@ CATEGORY_MAP = {
|
||||
"hentaihere" : "HentaiHere",
|
||||
"hentaiimg" : "Hentai Image",
|
||||
"hentainexus" : "HentaiNexus",
|
||||
"hentairox" : "HentaiRox",
|
||||
"hiperdex" : "HiperDEX",
|
||||
"hitomi" : "Hitomi.la",
|
||||
"horne" : "horne",
|
||||
|
||||
119
test/results/hentairox.py
Normal file
119
test/results/hentairox.py
Normal file
@@ -0,0 +1,119 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import hentairox
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://hentairox.com/gallery/25/",
|
||||
"#class" : hentairox.HentairoxGalleryExtractor,
|
||||
"#pattern": r"https://m1\.hentairox\.com/001/knrxtga49v/\d+\.jpg",
|
||||
"#count" : 25,
|
||||
|
||||
"count" : 25,
|
||||
"extension" : "jpg",
|
||||
"filename" : r"re:\d+",
|
||||
"gallery_id": 25,
|
||||
"lang" : "ja",
|
||||
"num" : range(1, 25),
|
||||
"title" : "(Shikei wa Iyadakara na) [Kujira Logic, TOYBOX (Kujiran, Kurikara)] Gensou-kyou Chichi Zukan - Kurenai (Touhou Project)",
|
||||
"title_alt" : "(死刑はいやだからな) [くぢらろじっく, といぼっくす (くぢらん, くりから)] 幻想郷乳図鑑 - 紅 (東方Project)",
|
||||
"type" : "doujinshi",
|
||||
"width" : {696, 701},
|
||||
"height" : {999, 1000},
|
||||
|
||||
"artist": [
|
||||
"kujiran",
|
||||
"kurikara",
|
||||
],
|
||||
"character": [
|
||||
"hong meiling",
|
||||
"koakuma",
|
||||
"patchouli knowledge",
|
||||
"remilia scarlet",
|
||||
"sakuya izayoi",
|
||||
],
|
||||
"group": [
|
||||
"kujira logic",
|
||||
"toybox",
|
||||
],
|
||||
"language": [
|
||||
"japanese",
|
||||
],
|
||||
"parody": [
|
||||
"touhou project",
|
||||
],
|
||||
"tags": [
|
||||
"big breasts",
|
||||
"footjob",
|
||||
"futanari",
|
||||
"lolicon",
|
||||
"maid",
|
||||
"paizuri",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hentairox.com/gallery/8526/",
|
||||
"#class" : hentairox.HentairoxGalleryExtractor,
|
||||
"#pattern": r"https://m1\.hentairox\.com/001/gkchsf3x5m/\d+\.jpg",
|
||||
"#count" : 8,
|
||||
|
||||
"count" : 8,
|
||||
"extension" : "jpg",
|
||||
"filename" : r"re:\d+",
|
||||
"gallery_id": 8526,
|
||||
"lang" : "ja",
|
||||
"num" : range(1, 8),
|
||||
"title" : "(C70) [UDON-YA (Kizuki Aruchu, ZAN)] Udonko CM70 Omake Hon (Various)",
|
||||
"title_alt" : "(C70) [うどんや (鬼月あるちゅ、ZAN)] うどんこ CM70オマケ本 (よろず)",
|
||||
"type" : "doujinshi",
|
||||
"width" : 1076,
|
||||
"height" : 1517,
|
||||
|
||||
"artist": [
|
||||
"kizuki aruchu",
|
||||
"zan",
|
||||
],
|
||||
"character": [
|
||||
"mikuru asahina",
|
||||
"reisen udongein inaba",
|
||||
"tsuruya",
|
||||
],
|
||||
"group": [
|
||||
"udon-ya",
|
||||
],
|
||||
"language": [
|
||||
"japanese",
|
||||
],
|
||||
"parody": [
|
||||
"fate stay night",
|
||||
"super robot wars | super robot taisen",
|
||||
"the melancholy of haruhi suzumiya | suzumiya haruhi no yuuutsu",
|
||||
],
|
||||
"tags": [
|
||||
"big breasts",
|
||||
"okaasan to issho",
|
||||
"touhou kaeidzuka",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hentairox.com/artist/kizuki-aruchu/",
|
||||
"#class" : hentairox.HentairoxTagExtractor,
|
||||
"#pattern": hentairox.HentairoxGalleryExtractor.pattern,
|
||||
"#count" : range(140, 160),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hentairox.com/search/?key=aruchu",
|
||||
"#class" : hentairox.HentairoxSearchExtractor,
|
||||
"#pattern": hentairox.HentairoxGalleryExtractor.pattern,
|
||||
"#count" : range(140, 160),
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user