[imhentai] inherit from BaseExtractor

combine all imhentai-like sites into one module
This commit is contained in:
Mike Fährmann
2025-02-19 22:14:52 +01:00
parent 7a11d02e7a
commit 52d4e1a100
9 changed files with 108 additions and 175 deletions

View File

@@ -337,12 +337,6 @@ Consider all listed sites to potentially be NSFW.
<td>Chapters, Manga</td> <td>Chapters, Manga</td>
<td></td> <td></td>
</tr> </tr>
<tr>
<td>HentaiEra</td>
<td>https://hentaiera.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr> <tr>
<td>HentaiFox</td> <td>HentaiFox</td>
<td>https://hentaifox.com/</td> <td>https://hentaifox.com/</td>
@@ -367,12 +361,6 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries, Search Results</td> <td>Galleries, Search Results</td>
<td></td> <td></td>
</tr> </tr>
<tr>
<td>HentaiRox</td>
<td>https://hentairox.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr> <tr>
<td>HiperDEX</td> <td>HiperDEX</td>
<td>https://hiperdex.com/</td> <td>https://hiperdex.com/</td>
@@ -463,12 +451,6 @@ Consider all listed sites to potentially be NSFW.
<td>Albums, Favorites, Favorites Folders, Galleries, individual Images, Personal Posts, Search Results, Subreddits, Tag Searches, User Profiles</td> <td>Albums, Favorites, Favorites Folders, Galleries, individual Images, Personal Posts, Search Results, Subreddits, Tag Searches, User Profiles</td>
<td></td> <td></td>
</tr> </tr>
<tr>
<td>IMHentai</td>
<td>https://imhentai.xxx/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr> <tr>
<td>Imxto</td> <td>Imxto</td>
<td>https://imx.to/</td> <td>https://imx.to/</td>
@@ -1326,6 +1308,28 @@ Consider all listed sites to potentially be NSFW.
<td></td> <td></td>
</tr> </tr>
<tr>
<td colspan="4"><strong>IMHentai and Mirror Sites</strong></td>
</tr>
<tr>
<td>IMHentai</td>
<td>https://imhentai.xxx/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>HentaiEra</td>
<td>https://hentaiera.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>HentaiRox</td>
<td>https://hentairox.com/</td>
<td>Galleries, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr> <tr>
<td colspan="4"><strong>jschan Imageboards</strong></td> <td colspan="4"><strong>jschan Imageboards</strong></td>
</tr> </tr>

View File

@@ -66,13 +66,11 @@ modules = [
"hatenablog", "hatenablog",
"hentai2read", "hentai2read",
"hentaicosplays", "hentaicosplays",
"hentaiera",
"hentaifoundry", "hentaifoundry",
"hentaifox", "hentaifox",
"hentaihand", "hentaihand",
"hentaihere", "hentaihere",
"hentainexus", "hentainexus",
"hentairox",
"hiperdex", "hiperdex",
"hitomi", "hitomi",
"hotleak", "hotleak",

View File

@@ -1,46 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hentaiera.com/"""
from . import imhentai
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentaiera\.com"
class HentaieraExtractor():
category = "hentaiera"
root = "https://hentaiera.com"
class HentaieraGalleryExtractor(
HentaieraExtractor, imhentai.ImhentaiGalleryExtractor):
"""Extractor for hentaiera galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://hentaiera.com/gallery/12345/"
class HentaieraTagExtractor(
HentaieraExtractor, imhentai.ImhentaiTagExtractor):
"""Extractor for hentaiera tag searches"""
subcategory = "tag"
pattern = (BASE_PATTERN + r"(/(?:"
r"artist|category|character|group|language|parody|tag"
r")/([^/?#]+))")
example = "https://hentaiera.com/tag/TAG/"
class HentaieraSearchExtractor(
HentaieraExtractor, imhentai.ImhentaiSearchExtractor):
"""Extractor for hentaiera search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
example = "https://hentaiera.com/search/?key=QUERY"
HentaieraExtractor._gallery_extractor = HentaieraGalleryExtractor

View File

@@ -1,46 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hentairox.com/"""
from . import imhentai
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com"
class HentairoxExtractor():
category = "hentairox"
root = "https://hentairox.com"
class HentairoxGalleryExtractor(
HentairoxExtractor, imhentai.ImhentaiGalleryExtractor):
"""Extractor for hentairox galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://hentairox.com/gallery/12345/"
class HentairoxTagExtractor(
HentairoxExtractor, imhentai.ImhentaiTagExtractor):
"""Extractor for hentairox tag searches"""
subcategory = "tag"
pattern = (BASE_PATTERN + r"(/(?:"
r"artist|category|character|group|language|parody|tag"
r")/([^/?#]+))")
example = "https://hentairox.com/tag/TAG/"
class HentairoxSearchExtractor(
HentairoxExtractor, imhentai.ImhentaiSearchExtractor):
"""Extractor for hentairox search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
example = "https://hentairox.com/search/?key=QUERY"
HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor

View File

@@ -6,21 +6,18 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extractors for https://imhentai.xxx/""" """Extractors for https://imhentai.xxx/ and mirror sites"""
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, BaseExtractor, Message
from .. import text, util from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imhentai\.xxx"
class ImhentaiExtractor(BaseExtractor):
class ImhentaiExtractor(Extractor): basecategory = "IMHentai"
category = "imhentai"
root = "https://imhentai.xxx"
def _pagination(self, url): def _pagination(self, url):
base = self.root + "/gallery/" base = self.root + "/gallery/"
data = {"_extractor": self._gallery_extractor} data = {"_extractor": ImhentaiGalleryExtractor}
while True: while True:
page = self.request(url).text page = self.request(url).text
@@ -44,15 +41,31 @@ class ImhentaiExtractor(Extractor):
url = href url = href
BASE_PATTERN = ImhentaiExtractor.update({
"imhentai": {
"root": "https://imhentai.xxx",
"pattern": r"(?:www\.)?imhentai\.xxx",
},
"hentaiera": {
"root": "https://hentaiera.com",
"pattern": r"(?:www\.)?hentaiera\.com",
},
"hentairox": {
"root": "https://hentairox.com",
"pattern": r"(?:www\.)?hentairox\.com",
},
})
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
"""Extractor for imhentai galleries""" """Extractor for imhentai galleries"""
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
example = "https://imhentai.xxx/gallery/12345/" example = "https://imhentai.xxx/gallery/12345/"
def __init__(self, match): def __init__(self, match):
self.gallery_id = match.group(1) ImhentaiExtractor.__init__(self, match)
url = "{}/gallery/{}/".format(self.root, self.gallery_id) self.gallery_id = self.groups[-1]
GalleryExtractor.__init__(self, match, url) self.gallery_url = "{}/gallery/{}/".format(self.root, self.gallery_id)
def metadata(self, page): def metadata(self, page):
extr = text.extract_from(page) extr = text.extract_from(page)
@@ -109,7 +122,7 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
example = "https://imhentai.xxx/tag/TAG/" example = "https://imhentai.xxx/tag/TAG/"
def items(self): def items(self):
url = self.root + self.groups[0] + "/" url = self.root + self.groups[-2] + "/"
return self._pagination(url) return self._pagination(url)
@@ -120,8 +133,5 @@ class ImhentaiSearchExtractor(ImhentaiExtractor):
example = "https://imhentai.xxx/search/?key=QUERY" example = "https://imhentai.xxx/search/?key=QUERY"
def items(self): def items(self):
url = self.root + "/search/?" + self.groups[0] url = self.root + "/search/?" + self.groups[-1]
return self._pagination(url) return self._pagination(url)
ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor

View File

@@ -383,6 +383,7 @@ BASE_MAP = {
"gelbooru_v01": "Gelbooru Beta 0.1.11", "gelbooru_v01": "Gelbooru Beta 0.1.11",
"gelbooru_v02": "Gelbooru Beta 0.2", "gelbooru_v02": "Gelbooru Beta 0.2",
"hentaicosplays": "Hentai Cosplay Instances", "hentaicosplays": "Hentai Cosplay Instances",
"IMHentai" : "IMHentai and Mirror Sites",
"jschan" : "jschan Imageboards", "jschan" : "jschan Imageboards",
"lolisafe" : "lolisafe and chibisafe", "lolisafe" : "lolisafe and chibisafe",
"lynxchan" : "LynxChan Imageboards", "lynxchan" : "LynxChan Imageboards",

View File

@@ -4,15 +4,16 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
from gallery_dl.extractor import hentaiera from gallery_dl.extractor import imhentai
__tests__ = ( __tests__ = (
{ {
"#url" : "https://hentaiera.com/gallery/28/", "#url" : "https://hentaiera.com/gallery/28/",
"#class" : hentaiera.HentaieraGalleryExtractor, "#category": ("IMHentai", "hentaiera", "gallery"),
"#pattern": r"https://m1\.hentaiera\.com/001/knrxtga49v/\d+\.jpg", "#class" : imhentai.ImhentaiGalleryExtractor,
"#count" : 25, "#pattern" : r"https://m1\.hentaiera\.com/001/knrxtga49v/\d+\.jpg",
"#count" : 25,
"count" : 25, "count" : 25,
"extension" : "jpg", "extension" : "jpg",
@@ -58,10 +59,11 @@ __tests__ = (
}, },
{ {
"#url" : "https://hentaiera.com/gallery/9319/", "#url" : "https://hentaiera.com/gallery/9319/",
"#class" : hentaiera.HentaieraGalleryExtractor, "#category": ("IMHentai", "hentaiera", "gallery"),
"#pattern": r"https://m1\.hentaiera\.com/001/gkchsf3x5m/\d+\.jpg", "#class" : imhentai.ImhentaiGalleryExtractor,
"#count" : 8, "#pattern" : r"https://m1\.hentaiera\.com/001/gkchsf3x5m/\d+\.jpg",
"#count" : 8,
"count" : 8, "count" : 8,
"extension" : "jpg", "extension" : "jpg",
@@ -103,17 +105,19 @@ __tests__ = (
}, },
{ {
"#url" : "https://hentaiera.com/artist/kujiran/", "#url" : "https://hentaiera.com/artist/kujiran/",
"#class" : hentaiera.HentaieraTagExtractor, "#category": ("IMHentai", "hentaiera", "tag"),
"#pattern": hentaiera.HentaieraGalleryExtractor.pattern, "#class" : imhentai.ImhentaiTagExtractor,
"#count" : range(120, 150), "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(120, 150),
}, },
{ {
"#url" : "https://hentaiera.com/search/?key=kujiran", "#url" : "https://hentaiera.com/search/?key=kujiran",
"#class" : hentaiera.HentaieraSearchExtractor, "#category": ("IMHentai", "hentaiera", "search"),
"#pattern": hentaiera.HentaieraGalleryExtractor.pattern, "#class" : imhentai.ImhentaiSearchExtractor,
"#count" : range(120, 150), "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(120, 150),
}, },
) )

View File

@@ -4,15 +4,16 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
from gallery_dl.extractor import hentairox from gallery_dl.extractor import imhentai
__tests__ = ( __tests__ = (
{ {
"#url" : "https://hentairox.com/gallery/25/", "#url" : "https://hentairox.com/gallery/25/",
"#class" : hentairox.HentairoxGalleryExtractor, "#category": ("IMHentai", "hentairox", "gallery"),
"#pattern": r"https://m1\.hentairox\.com/001/knrxtga49v/\d+\.jpg", "#class" : imhentai.ImhentaiGalleryExtractor,
"#count" : 25, "#pattern" : r"https://m1\.hentairox\.com/001/knrxtga49v/\d+\.jpg",
"#count" : 25,
"count" : 25, "count" : 25,
"extension" : "jpg", "extension" : "jpg",
@@ -59,7 +60,8 @@ __tests__ = (
{ {
"#url" : "https://hentairox.com/gallery/8526/", "#url" : "https://hentairox.com/gallery/8526/",
"#class" : hentairox.HentairoxGalleryExtractor, "#category": ("IMHentai", "hentairox", "gallery"),
"#class" : imhentai.ImhentaiGalleryExtractor,
"#pattern": r"https://m1\.hentairox\.com/001/gkchsf3x5m/\d+\.jpg", "#pattern": r"https://m1\.hentairox\.com/001/gkchsf3x5m/\d+\.jpg",
"#count" : 8, "#count" : 8,
@@ -103,17 +105,19 @@ __tests__ = (
}, },
{ {
"#url" : "https://hentairox.com/artist/kizuki-aruchu/", "#url" : "https://hentairox.com/artist/kizuki-aruchu/",
"#class" : hentairox.HentairoxTagExtractor, "#category": ("IMHentai", "hentairox", "tag"),
"#pattern": hentairox.HentairoxGalleryExtractor.pattern, "#class" : imhentai.ImhentaiTagExtractor,
"#count" : range(140, 160), "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(140, 160),
}, },
{ {
"#url" : "https://hentairox.com/search/?key=aruchu", "#url" : "https://hentairox.com/search/?key=aruchu",
"#class" : hentairox.HentairoxSearchExtractor, "#category": ("IMHentai", "hentairox", "search"),
"#pattern": hentairox.HentairoxGalleryExtractor.pattern, "#class" : imhentai.ImhentaiSearchExtractor,
"#count" : range(140, 160), "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(140, 160),
}, },
) )

View File

@@ -9,10 +9,11 @@ from gallery_dl.extractor import imhentai
__tests__ = ( __tests__ = (
{ {
"#url" : "https://imhentai.xxx/gallery/12/", "#url" : "https://imhentai.xxx/gallery/12/",
"#class" : imhentai.ImhentaiGalleryExtractor, "#category": ("IMHentai", "imhentai", "gallery"),
"#pattern": r"https://m1\.imhentai\.xxx/001/3x907ntq18/\d+\.jpg", "#class" : imhentai.ImhentaiGalleryExtractor,
"#count" : 94, "#pattern" : r"https://m1\.imhentai\.xxx/001/3x907ntq18/\d+\.jpg",
"#count" : 94,
"count" : 94, "count" : 94,
"extension" : "jpg", "extension" : "jpg",
@@ -61,10 +62,11 @@ __tests__ = (
}, },
{ {
"#url" : "https://imhentai.xxx/gallery/1396508/", "#url" : "https://imhentai.xxx/gallery/1396508/",
"#class" : imhentai.ImhentaiGalleryExtractor, "#category": ("IMHentai", "imhentai", "gallery"),
"#pattern": r"https://m9\.imhentai\.xxx/028/po9f4w3jzx/\d+\.webp", "#class" : imhentai.ImhentaiGalleryExtractor,
"#count" : 34, "#pattern" : r"https://m9\.imhentai\.xxx/028/po9f4w3jzx/\d+\.webp",
"#count" : 34,
"count" : 34, "count" : 34,
"extension" : "webp", "extension" : "webp",
@@ -113,17 +115,19 @@ __tests__ = (
}, },
{ {
"#url" : "https://imhentai.xxx/artist/asutora/", "#url" : "https://imhentai.xxx/artist/asutora/",
"#class" : imhentai.ImhentaiTagExtractor, "#category": ("IMHentai", "imhentai", "tag"),
"#pattern": imhentai.ImhentaiGalleryExtractor.pattern, "#class" : imhentai.ImhentaiTagExtractor,
"#count" : range(30, 50), "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(30, 50),
}, },
{ {
"#url" : "https://imhentai.xxx/search/?lt=1&pp=0&m=1&d=1&w=1&i=1&a=1&g=1&key=asutora&apply=Search&en=1&jp=1&es=1&fr=1&kr=1&de=1&ru=1&dl=0&tr=0", "#url" : "https://imhentai.xxx/search/?lt=1&pp=0&m=1&d=1&w=1&i=1&a=1&g=1&key=asutora&apply=Search&en=1&jp=1&es=1&fr=1&kr=1&de=1&ru=1&dl=0&tr=0",
"#class" : imhentai.ImhentaiSearchExtractor, "#category": ("IMHentai", "imhentai", "search"),
"#pattern": imhentai.ImhentaiGalleryExtractor.pattern, "#class" : imhentai.ImhentaiSearchExtractor,
"#count" : range(30, 50), "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern,
"#count" : range(30, 50),
}, },
) )