[hentairox] add support (#7003)

This commit is contained in:
Mike Fährmann
2025-02-18 21:21:15 +01:00
parent 95c446fcd1
commit 82493a6672
6 changed files with 184 additions and 3 deletions

View File

@@ -70,6 +70,7 @@ modules = [
"hentaihand",
"hentaihere",
"hentainexus",
"hentairox",
"hiperdex",
"hitomi",
"hotleak",

View File

@@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hentairox.com/"""
from . import imhentai
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
class HentairoxExtractor():
category = "hentairox"
root = "https://hentairox.com"
class HentairoxGalleryExtractor(
HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
"""Extractor for hentairox galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://hentairox.com/gallery/12345/"
class HentairoxTagExtractor(
HentairoxExtractor, imhentai.ImhentaiTagExtractor):
"""Extractor for hentairox tag searches"""
subcategory = "tag"
pattern = (BASE_PATTERN + r"(/(?:"
r"artist|category|character|group|language|parody|tag"
r")/([^/?#]+))")
example = "https://hentairox.com/tag/TAG/"
class HentairoxSearchExtractor(
HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
"""Extractor for hentairox search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
example = "https://hentairox.com/search/?key=QUERY"
HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor

View File

@@ -20,7 +20,7 @@ class ImhentaiExtractor(Extractor):
def _pagination(self, url):
base = self.root + "/gallery/"
data = {"_extractor": ImhentaiGalleryExtractor}
data = {"_extractor": self._gallery_extractor}
while True:
page = self.request(url).text
@@ -36,7 +36,12 @@ class ImhentaiExtractor(Extractor):
href = text.rextract(page, "class='page-link' href='", "'")[0]
if not href or href == "#":
return
url = text.ensure_http_scheme(href)
if href[0] == "/":
if href[1] == "/":
href = "https:" + href
else:
href = self.root + href
url = href
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
@@ -62,7 +67,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
"artist" : self._split(extr(">Artists:</span>", "</li>")),
"group" : self._split(extr(">Groups:</span>", "</li>")),
"language" : self._split(extr(">Languages:</span>", "</li>")),
"type" : text.remove_html(extr(">Category:</span>", "<span")),
"type" : extr("href='/category/", "/"),
}
if data["language"]:
@@ -117,3 +122,6 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
def items(self):
url = self.root + "/search/?" + self.groups[0]
return self._pagination(url)
ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor