[imhentai] inherit from BaseExtractor

combine all imhentai-like sites into one module
This commit is contained in:
Mike Fährmann
2025-02-19 22:14:52 +01:00
parent 7a11d02e7a
commit 52d4e1a100
9 changed files with 108 additions and 175 deletions

View File

@@ -66,13 +66,11 @@ modules = [
"hatenablog",
"hentai2read",
"hentaicosplays",
"hentaiera",
"hentaifoundry",
"hentaifox",
"hentaihand",
"hentaihere",
"hentainexus",
"hentairox",
"hiperdex",
"hitomi",
"hotleak",

View File

@@ -1,46 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hentaiera.com/"""
from . import imhentai
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentaiera\.com"
class HentaieraExtractor():
category = "hentaiera"
root = "https://hentaiera.com"
class HentaieraGalleryExtractor(
HentaieraExtractor, imhentai.ImhentaiGalleryExtractor):
"""Extractor for hentaiera galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://hentaiera.com/gallery/12345/"
class HentaieraTagExtractor(
HentaieraExtractor, imhentai.ImhentaiTagExtractor):
"""Extractor for hentaiera tag searches"""
subcategory = "tag"
pattern = (BASE_PATTERN + r"(/(?:"
r"artist|category|character|group|language|parody|tag"
r")/([^/?#]+))")
example = "https://hentaiera.com/tag/TAG/"
class HentaieraSearchExtractor(
HentaieraExtractor, imhentai.ImhentaiSearchExtractor):
"""Extractor for hentaiera search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
example = "https://hentaiera.com/search/?key=QUERY"
HentaieraExtractor._gallery_extractor = HentaieraGalleryExtractor

View File

@@ -1,46 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hentairox.com/"""
from . import imhentai
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
class HentairoxExtractor():
category = "hentairox"
root = "https://hentairox.com"
class HentairoxGalleryExtractor(
HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
"""Extractor for hentairox galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://hentairox.com/gallery/12345/"
class HentairoxTagExtractor(
HentairoxExtractor, imhentai.ImhentaiTagExtractor):
"""Extractor for hentairox tag searches"""
subcategory = "tag"
pattern = (BASE_PATTERN + r"(/(?:"
r"artist|category|character|group|language|parody|tag"
r")/([^/?#]+))")
example = "https://hentairox.com/tag/TAG/"
class HentairoxSearchExtractor(
HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
"""Extractor for hentairox search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
example = "https://hentairox.com/search/?key=QUERY"
HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor

View File

@@ -6,21 +6,18 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://imhentai.xxx/"""
"""Extractors for https://imhentai.xxx/ and mirror sites"""
from .common import GalleryExtractor, Extractor, Message
from .common import GalleryExtractor, BaseExtractor, Message
from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imhentai\.xxx"
class ImhentaiExtractor(Extractor):
category = "imhentai"
root = "https://imhentai.xxx"
class ImhentaiExtractor(BaseExtractor):
basecategory = "IMHentai"
def _pagination(self, url):
base = self.root + "/gallery/"
data = {"_extractor": self._gallery_extractor}
data = {"_extractor": ImhentaiGalleryExtractor}
while True:
page = self.request(url).text
@@ -44,15 +41,31 @@ class ImhentaiExtractor(Extractor):
url = href
BASE_PATTERN = ImhentaiExtractor.update({
"imhentai": {
"root": "https://imhentai.xxx",
"pattern": r"(?:www\.)?imhentai\.xxx",
},
"hentaiera": {
"root": "https://hentaiera.com",
"pattern": r"(?:www\.)?hentaiera\.com",
},
"hentairox": {
"root": "https://hentairox.com",
"pattern": r"(?:www\.)?hentairox\.com",
},
})
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
"""Extractor for imhentai galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://imhentai.xxx/gallery/12345/"
def __init__(self, match):
self.gallery_id = match.group(1)
url = "{}/gallery/{}/".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)
ImhentaiExtractor.__init__(self, match)
self.gallery_id = self.groups[-1]
self.gallery_url = "{}/gallery/{}/".format(self.root, self.gallery_id)
def metadata(self, page):
extr = text.extract_from(page)
@@ -109,7 +122,7 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
example = "https://imhentai.xxx/tag/TAG/"
def items(self):
url = self.root + self.groups[0] + "/"
url = self.root + self.groups[-2] + "/"
return self._pagination(url)
@@ -120,8 +133,5 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
example = "https://imhentai.xxx/search/?key=QUERY"
def items(self):
url = self.root + "/search/?" + self.groups[0]
url = self.root + "/search/?" + self.groups[-1]
return self._pagination(url)
ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor