[hentairox] add support (#7003)

2025-02-18 21:21:15 +01:00
parent 95c446fcd1
commit 82493a6672
6 changed files with 184 additions and 3 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -355,6 +355,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Galleries, Search Results</td>
    <td></td>
 </tr>
+<tr>
+    <td>HentaiRox</td>
+    <td>https://hentairox.com/</td>
+    <td>Galleries, Search Results, Tag Searches</td>
+    <td></td>
+</tr>
 <tr>
    <td>HiperDEX</td>
    <td>https://hiperdex.com/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -70,6 +70,7 @@ modules = [
    "hentaihand",
    "hentaihere",
    "hentainexus",
+    "hentairox",
    "hiperdex",
    "hitomi",
    "hotleak",
--- a/gallery_dl/extractor/hentairox.py
+++ b/gallery_dl/extractor/hentairox.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hentairox.com/"""
+
+from . import imhentai
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
+
+
+class HentairoxExtractor():
+    category = "hentairox"
+    root = "https://hentairox.com"
+
+
+class HentairoxGalleryExtractor(
+        HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
+    """Extractor for hentairox galleries"""
+    pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
+    example = "https://hentairox.com/gallery/12345/"
+
+
+class HentairoxTagExtractor(
+        HentairoxExtractor, imhentai.ImhentaiTagExtractor):
+    """Extractor for hentairox tag searches"""
+    subcategory = "tag"
+    pattern = (BASE_PATTERN + r"(/(?:"
+               r"artist|category|character|group|language|parody|tag"
+               r")/([^/?#]+))")
+    example = "https://hentairox.com/tag/TAG/"
+
+
+class HentairoxSearchExtractor(
+        HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
+    """Extractor for hentairox search results"""
+    subcategory = "search"
+    pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
+    example = "https://hentairox.com/search/?key=QUERY"
+
+
+HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor
--- a/gallery_dl/extractor/imhentai.py
+++ b/gallery_dl/extractor/imhentai.py
@@ -20,7 +20,7 @@ class ImhentaiExtractor(Extractor):

    def _pagination(self, url):
        base = self.root + "/gallery/"
-        data = {"_extractor": ImhentaiGalleryExtractor}
+        data = {"_extractor": self._gallery_extractor}

        while True:
            page = self.request(url).text
@@ -36,7 +36,12 @@ class ImhentaiExtractor(Extractor):
            href = text.rextract(page, "class='page-link' href='", "'")[0]
            if not href or href == "#":
                return
-            url = text.ensure_http_scheme(href)
+            if href[0] == "/":
+                if href[1] == "/":
+                    href = "https:" + href
+                else:
+                    href = self.root + href
+            url = href


 class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
@@ -62,7 +67,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
            "artist"    : self._split(extr(">Artists:</span>", "</li>")),
            "group"     : self._split(extr(">Groups:</span>", "</li>")),
            "language"  : self._split(extr(">Languages:</span>", "</li>")),
-            "type"      : text.remove_html(extr(">Category:</span>", "<span")),
+            "type"      : extr("href='/category/", "/"),
        }

        if data["language"]:
@@ -117,3 +122,6 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
    def items(self):
        url = self.root + "/search/?" + self.groups[0]
        return self._pagination(url)
+
+
+ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -66,6 +66,7 @@ CATEGORY_MAP = {
    "hentaihere"     : "HentaiHere",
    "hentaiimg"      : "Hentai Image",
    "hentainexus"    : "HentaiNexus",
+    "hentairox"      : "HentaiRox",
    "hiperdex"       : "HiperDEX",
    "hitomi"         : "Hitomi.la",
    "horne"          : "horne",
--- a/test/results/hentairox.py
+++ b/test/results/hentairox.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import hentairox
+
+
+__tests__ = (
+{
+    "#url"    : "https://hentairox.com/gallery/25/",
+    "#class"  : hentairox.HentairoxGalleryExtractor,
+    "#pattern": r"https://m1\.hentairox\.com/001/knrxtga49v/\d+\.jpg",
+    "#count"  : 25,
+
+    "count"     : 25,
+    "extension" : "jpg",
+    "filename"  :  r"re:\d+",
+    "gallery_id": 25,
+    "lang"      : "ja",
+    "num"       : range(1, 25),
+    "title"     : "(Shikei wa Iyadakara na) [Kujira Logic, TOYBOX (Kujiran, Kurikara)] Gensou-kyou Chichi Zukan - Kurenai (Touhou Project)",
+    "title_alt" : "(死刑はいやだからな) [くぢらろじっく, といぼっくす (くぢらん, くりから)] 幻想郷乳図鑑 - 紅 (東方Project)",
+    "type"      : "doujinshi",
+    "width"     : {696, 701},
+    "height"    : {999, 1000},
+
+    "artist": [
+        "kujiran",
+        "kurikara",
+    ],
+    "character": [
+        "hong meiling",
+        "koakuma",
+        "patchouli knowledge",
+        "remilia scarlet",
+        "sakuya izayoi",
+    ],
+    "group": [
+        "kujira logic",
+        "toybox",
+    ],
+    "language": [
+        "japanese",
+    ],
+    "parody": [
+        "touhou project",
+    ],
+    "tags": [
+        "big breasts",
+        "footjob",
+        "futanari",
+        "lolicon",
+        "maid",
+        "paizuri",
+    ],
+},
+
+{
+    "#url"    : "https://hentairox.com/gallery/8526/",
+    "#class"  : hentairox.HentairoxGalleryExtractor,
+    "#pattern": r"https://m1\.hentairox\.com/001/gkchsf3x5m/\d+\.jpg",
+    "#count"  : 8,
+
+    "count"     : 8,
+    "extension" : "jpg",
+    "filename"  : r"re:\d+",
+    "gallery_id": 8526,
+    "lang"      : "ja",
+    "num"       : range(1, 8),
+    "title"     : "(C70) [UDON-YA (Kizuki Aruchu, ZAN)] Udonko CM70 Omake Hon (Various)",
+    "title_alt" : "(C70) [うどんや (鬼月あるちゅ、ZAN)] うどんこ CM70オマケ本 (よろず)",
+    "type"      : "doujinshi",
+    "width"     : 1076,
+    "height"    : 1517,
+
+    "artist": [
+        "kizuki aruchu",
+        "zan",
+    ],
+    "character": [
+        "mikuru asahina",
+        "reisen udongein inaba",
+        "tsuruya",
+    ],
+    "group": [
+        "udon-ya",
+    ],
+    "language": [
+        "japanese",
+    ],
+    "parody": [
+        "fate stay night",
+        "super robot wars | super robot taisen",
+        "the melancholy of haruhi suzumiya | suzumiya haruhi no yuuutsu",
+    ],
+    "tags": [
+        "big breasts",
+        "okaasan to issho",
+        "touhou kaeidzuka",
+    ],
+},
+
+{
+    "#url"    : "https://hentairox.com/artist/kizuki-aruchu/",
+    "#class"  : hentairox.HentairoxTagExtractor,
+    "#pattern": hentairox.HentairoxGalleryExtractor.pattern,
+    "#count"  : range(140, 160),
+},
+
+{
+    "#url"    : "https://hentairox.com/search/?key=aruchu",
+    "#class"  : hentairox.HentairoxSearchExtractor,
+    "#pattern": hentairox.HentairoxGalleryExtractor.pattern,
+    "#count"  : range(140, 160),
+},
+
+)