[imhentai] inherit from BaseExtractor
combine all imhentai-like sites into one module
This commit is contained in:
@@ -66,13 +66,11 @@ modules = [
|
||||
"hatenablog",
|
||||
"hentai2read",
|
||||
"hentaicosplays",
|
||||
"hentaiera",
|
||||
"hentaifoundry",
|
||||
"hentaifox",
|
||||
"hentaihand",
|
||||
"hentaihere",
|
||||
"hentainexus",
|
||||
"hentairox",
|
||||
"hiperdex",
|
||||
"hitomi",
|
||||
"hotleak",
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://hentaiera.com/"""
|
||||
|
||||
from . import imhentai
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentaiera\.com"
|
||||
|
||||
|
||||
class HentaieraExtractor():
|
||||
category = "hentaiera"
|
||||
root = "https://hentaiera.com"
|
||||
|
||||
|
||||
class HentaieraGalleryExtractor(
|
||||
HentaieraExtractor, imhentai.ImhentaiGalleryExtractor):
|
||||
"""Extractor for hentaiera galleries"""
|
||||
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
|
||||
example = "https://hentaiera.com/gallery/12345/"
|
||||
|
||||
|
||||
class HentaieraTagExtractor(
|
||||
HentaieraExtractor, imhentai.ImhentaiTagExtractor):
|
||||
"""Extractor for hentaiera tag searches"""
|
||||
subcategory = "tag"
|
||||
pattern = (BASE_PATTERN + r"(/(?:"
|
||||
r"artist|category|character|group|language|parody|tag"
|
||||
r")/([^/?#]+))")
|
||||
example = "https://hentaiera.com/tag/TAG/"
|
||||
|
||||
|
||||
class HentaieraSearchExtractor(
|
||||
HentaieraExtractor, imhentai.ImhentaiSearchExtractor):
|
||||
"""Extractor for hentaiera search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
|
||||
example = "https://hentaiera.com/search/?key=QUERY"
|
||||
|
||||
|
||||
HentaieraExtractor._gallery_extractor = HentaieraGalleryExtractor
|
||||
@@ -1,46 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://hentairox.com/"""
|
||||
|
||||
from . import imhentai
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
|
||||
|
||||
|
||||
class HentairoxExtractor():
|
||||
category = "hentairox"
|
||||
root = "https://hentairox.com"
|
||||
|
||||
|
||||
class HentairoxGalleryExtractor(
|
||||
HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
|
||||
"""Extractor for hentairox galleries"""
|
||||
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
|
||||
example = "https://hentairox.com/gallery/12345/"
|
||||
|
||||
|
||||
class HentairoxTagExtractor(
|
||||
HentairoxExtractor, imhentai.ImhentaiTagExtractor):
|
||||
"""Extractor for hentairox tag searches"""
|
||||
subcategory = "tag"
|
||||
pattern = (BASE_PATTERN + r"(/(?:"
|
||||
r"artist|category|character|group|language|parody|tag"
|
||||
r")/([^/?#]+))")
|
||||
example = "https://hentairox.com/tag/TAG/"
|
||||
|
||||
|
||||
class HentairoxSearchExtractor(
|
||||
HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
|
||||
"""Extractor for hentairox search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
|
||||
example = "https://hentairox.com/search/?key=QUERY"
|
||||
|
||||
|
||||
HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor
|
||||
@@ -6,21 +6,18 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://imhentai.xxx/"""
|
||||
"""Extractors for https://imhentai.xxx/ and mirror sites"""
|
||||
|
||||
from .common import GalleryExtractor, Extractor, Message
|
||||
from .common import GalleryExtractor, BaseExtractor, Message
|
||||
from .. import text, util
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imhentai\.xxx"
|
||||
|
||||
|
||||
class ImhentaiExtractor(Extractor):
|
||||
category = "imhentai"
|
||||
root = "https://imhentai.xxx"
|
||||
class ImhentaiExtractor(BaseExtractor):
|
||||
basecategory = "IMHentai"
|
||||
|
||||
def _pagination(self, url):
|
||||
base = self.root + "/gallery/"
|
||||
data = {"_extractor": self._gallery_extractor}
|
||||
data = {"_extractor": ImhentaiGalleryExtractor}
|
||||
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
@@ -44,15 +41,31 @@ class ImhentaiExtractor(Extractor):
|
||||
url = href
|
||||
|
||||
|
||||
BASE_PATTERN = ImhentaiExtractor.update({
|
||||
"imhentai": {
|
||||
"root": "https://imhentai.xxx",
|
||||
"pattern": r"(?:www\.)?imhentai\.xxx",
|
||||
},
|
||||
"hentaiera": {
|
||||
"root": "https://hentaiera.com",
|
||||
"pattern": r"(?:www\.)?hentaiera\.com",
|
||||
},
|
||||
"hentairox": {
|
||||
"root": "https://hentairox.com",
|
||||
"pattern": r"(?:www\.)?hentairox\.com",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
|
||||
"""Extractor for imhentai galleries"""
|
||||
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
|
||||
example = "https://imhentai.xxx/gallery/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_id = match.group(1)
|
||||
url = "{}/gallery/{}/".format(self.root, self.gallery_id)
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
ImhentaiExtractor.__init__(self, match)
|
||||
self.gallery_id = self.groups[-1]
|
||||
self.gallery_url = "{}/gallery/{}/".format(self.root, self.gallery_id)
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
@@ -109,7 +122,7 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
|
||||
example = "https://imhentai.xxx/tag/TAG/"
|
||||
|
||||
def items(self):
|
||||
url = self.root + self.groups[0] + "/"
|
||||
url = self.root + self.groups[-2] + "/"
|
||||
return self._pagination(url)
|
||||
|
||||
|
||||
@@ -120,8 +133,5 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
|
||||
example = "https://imhentai.xxx/search/?key=QUERY"
|
||||
|
||||
def items(self):
|
||||
url = self.root + "/search/?" + self.groups[0]
|
||||
url = self.root + "/search/?" + self.groups[-1]
|
||||
return self._pagination(url)
|
||||
|
||||
|
||||
ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor
|
||||
|
||||
Reference in New Issue
Block a user