[hentairox] add support (#7003)

2025-02-18 21:21:15 +01:00
parent 95c446fcd1
commit 82493a6672
6 changed files with 184 additions and 3 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -70,6 +70,7 @@ modules = [
    "hentaihand",
    "hentaihere",
    "hentainexus",
+    "hentairox",
    "hiperdex",
    "hitomi",
    "hotleak",
--- a/gallery_dl/extractor/hentairox.py
+++ b/gallery_dl/extractor/hentairox.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hentairox.com/"""
+
+from . import imhentai
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
+
+
+class HentairoxExtractor():
+    category = "hentairox"
+    root = "https://hentairox.com"
+
+
+class HentairoxGalleryExtractor(
+        HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
+    """Extractor for hentairox galleries"""
+    pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
+    example = "https://hentairox.com/gallery/12345/"
+
+
+class HentairoxTagExtractor(
+        HentairoxExtractor, imhentai.ImhentaiTagExtractor):
+    """Extractor for hentairox tag searches"""
+    subcategory = "tag"
+    pattern = (BASE_PATTERN + r"(/(?:"
+               r"artist|category|character|group|language|parody|tag"
+               r")/([^/?#]+))")
+    example = "https://hentairox.com/tag/TAG/"
+
+
+class HentairoxSearchExtractor(
+        HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
+    """Extractor for hentairox search results"""
+    subcategory = "search"
+    pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
+    example = "https://hentairox.com/search/?key=QUERY"
+
+
+HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor
--- a/gallery_dl/extractor/imhentai.py
+++ b/gallery_dl/extractor/imhentai.py
@@ -20,7 +20,7 @@ class ImhentaiExtractor(Extractor):

    def _pagination(self, url):
        base = self.root + "/gallery/"
-        data = {"_extractor": ImhentaiGalleryExtractor}
+        data = {"_extractor": self._gallery_extractor}

        while True:
            page = self.request(url).text
@@ -36,7 +36,12 @@ class ImhentaiExtractor(Extractor):
            href = text.rextract(page, "class='page-link' href='", "'")[0]
            if not href or href == "#":
                return
-            url = text.ensure_http_scheme(href)
+            if href[0] == "/":
+                if href[1] == "/":
+                    href = "https:" + href
+                else:
+                    href = self.root + href
+            url = href


 class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
@@ -62,7 +67,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
            "artist"    : self._split(extr(">Artists:</span>", "</li>")),
            "group"     : self._split(extr(">Groups:</span>", "</li>")),
            "language"  : self._split(extr(">Languages:</span>", "</li>")),
-            "type"      : text.remove_html(extr(">Category:</span>", "<span")),
+            "type"      : extr("href='/category/", "/"),
        }

        if data["language"]:
@@ -117,3 +122,6 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
    def items(self):
        url = self.root + "/search/?" + self.groups[0]
        return self._pagination(url)
+
+
+ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor