[imhentai] inherit from BaseExtractor

combine all imhentai-like sites into one module
2025-02-19 22:14:52 +01:00
parent 7a11d02e7a
commit 52d4e1a100
9 changed files with 108 additions and 175 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -66,13 +66,11 @@ modules = [
    "hatenablog",
    "hentai2read",
    "hentaicosplays",
-    "hentaiera",
    "hentaifoundry",
    "hentaifox",
    "hentaihand",
    "hentaihere",
    "hentainexus",
-    "hentairox",
    "hiperdex",
    "hitomi",
    "hotleak",
--- a/gallery_dl/extractor/hentaiera.py
+++ b/gallery_dl/extractor/hentaiera.py
@@ -1,46 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2025 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://hentaiera.com/"""
-
-from . import imhentai
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentaiera\.com"
-
-
-class HentaieraExtractor():
-    category = "hentaiera"
-    root = "https://hentaiera.com"
-
-
-class HentaieraGalleryExtractor(
-        HentaieraExtractor, imhentai.ImhentaiGalleryExtractor):
-    """Extractor for hentaiera galleries"""
-    pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
-    example = "https://hentaiera.com/gallery/12345/"
-
-
-class HentaieraTagExtractor(
-        HentaieraExtractor, imhentai.ImhentaiTagExtractor):
-    """Extractor for hentaiera tag searches"""
-    subcategory = "tag"
-    pattern = (BASE_PATTERN + r"(/(?:"
-               r"artist|category|character|group|language|parody|tag"
-               r")/([^/?#]+))")
-    example = "https://hentaiera.com/tag/TAG/"
-
-
-class HentaieraSearchExtractor(
-        HentaieraExtractor, imhentai.ImhentaiSearchExtractor):
-    """Extractor for hentaiera search results"""
-    subcategory = "search"
-    pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
-    example = "https://hentaiera.com/search/?key=QUERY"
-
-
-HentaieraExtractor._gallery_extractor = HentaieraGalleryExtractor
--- a/gallery_dl/extractor/hentairox.py
+++ b/gallery_dl/extractor/hentairox.py
@@ -1,46 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2025 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://hentairox.com/"""
-
-from . import imhentai
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
-
-
-class HentairoxExtractor():
-    category = "hentairox"
-    root = "https://hentairox.com"
-
-
-class HentairoxGalleryExtractor(
-        HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
-    """Extractor for hentairox galleries"""
-    pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
-    example = "https://hentairox.com/gallery/12345/"
-
-
-class HentairoxTagExtractor(
-        HentairoxExtractor, imhentai.ImhentaiTagExtractor):
-    """Extractor for hentairox tag searches"""
-    subcategory = "tag"
-    pattern = (BASE_PATTERN + r"(/(?:"
-               r"artist|category|character|group|language|parody|tag"
-               r")/([^/?#]+))")
-    example = "https://hentairox.com/tag/TAG/"
-
-
-class HentairoxSearchExtractor(
-        HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
-    """Extractor for hentairox search results"""
-    subcategory = "search"
-    pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
-    example = "https://hentairox.com/search/?key=QUERY"
-
-
-HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor
--- a/gallery_dl/extractor/imhentai.py
+++ b/gallery_dl/extractor/imhentai.py
@@ -6,21 +6,18 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extractors for https://imhentai.xxx/"""
+"""Extractors for https://imhentai.xxx/ and mirror sites"""

-from .common import GalleryExtractor, Extractor, Message
+from .common import GalleryExtractor, BaseExtractor, Message
 from .. import text, util

-BASE_PATTERN = r"(?:https?://)?(?:www\.)?imhentai\.xxx"

-
-class ImhentaiExtractor(Extractor):
-    category = "imhentai"
-    root = "https://imhentai.xxx"
+class ImhentaiExtractor(BaseExtractor):
+    basecategory = "IMHentai"

    def _pagination(self, url):
        base = self.root + "/gallery/"
-        data = {"_extractor": self._gallery_extractor}
+        data = {"_extractor": ImhentaiGalleryExtractor}

        while True:
            page = self.request(url).text
@@ -44,15 +41,31 @@ class ImhentaiExtractor(Extractor):
            url = href


+BASE_PATTERN = ImhentaiExtractor.update({
+    "imhentai": {
+        "root": "https://imhentai.xxx",
+        "pattern": r"(?:www\.)?imhentai\.xxx",
+    },
+    "hentaiera": {
+        "root": "https://hentaiera.com",
+        "pattern": r"(?:www\.)?hentaiera\.com",
+    },
+    "hentairox": {
+        "root": "https://hentairox.com",
+        "pattern": r"(?:www\.)?hentairox\.com",
+    },
+})
+
+
 class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
    """Extractor for imhentai galleries"""
    pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
    example = "https://imhentai.xxx/gallery/12345/"

    def __init__(self, match):
-        self.gallery_id = match.group(1)
-        url = "{}/gallery/{}/".format(self.root, self.gallery_id)
-        GalleryExtractor.__init__(self, match, url)
+        ImhentaiExtractor.__init__(self, match)
+        self.gallery_id = self.groups[-1]
+        self.gallery_url = "{}/gallery/{}/".format(self.root, self.gallery_id)

    def metadata(self, page):
        extr = text.extract_from(page)
@@ -109,7 +122,7 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
    example = "https://imhentai.xxx/tag/TAG/"

    def items(self):
-        url = self.root + self.groups[0] + "/"
+        url = self.root + self.groups[-2] + "/"
        return self._pagination(url)


@@ -120,8 +133,5 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
    example = "https://imhentai.xxx/search/?key=QUERY"

    def items(self):
-        url = self.root + "/search/?" + self.groups[0]
+        url = self.root + "/search/?" + self.groups[-1]
        return self._pagination(url)
-
-
-ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor