From 52d4e1a100cb1883cf36fb99530f96b2c03f083a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 19 Feb 2025 22:14:52 +0100 Subject: [PATCH] [imhentai] inherit from BaseExtractor combine all imhentai-like sites into one module --- docs/supportedsites.md | 40 +++++++++++++++------------ gallery_dl/extractor/__init__.py | 2 -- gallery_dl/extractor/hentaiera.py | 46 ------------------------------- gallery_dl/extractor/hentairox.py | 46 ------------------------------- gallery_dl/extractor/imhentai.py | 42 +++++++++++++++++----------- scripts/supportedsites.py | 1 + test/results/hentaiera.py | 38 +++++++++++++------------ test/results/hentairox.py | 32 +++++++++++---------- test/results/imhentai.py | 36 +++++++++++++----------- 9 files changed, 108 insertions(+), 175 deletions(-) delete mode 100644 gallery_dl/extractor/hentaiera.py delete mode 100644 gallery_dl/extractor/hentairox.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bcdd1809..dc716619 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -337,12 +337,6 @@ Consider all listed sites to potentially be NSFW. Chapters, Manga - - HentaiEra - https://hentaiera.com/ - Galleries, Search Results, Tag Searches - - HentaiFox https://hentaifox.com/ @@ -367,12 +361,6 @@ Consider all listed sites to potentially be NSFW. Galleries, Search Results - - HentaiRox - https://hentairox.com/ - Galleries, Search Results, Tag Searches - - HiperDEX https://hiperdex.com/ @@ -463,12 +451,6 @@ Consider all listed sites to potentially be NSFW. Albums, Favorites, Favorites Folders, Galleries, individual Images, Personal Posts, Search Results, Subreddits, Tag Searches, User Profiles - - IMHentai - https://imhentai.xxx/ - Galleries, Search Results, Tag Searches - - Imxto https://imx.to/ @@ -1326,6 +1308,28 @@ Consider all listed sites to potentially be NSFW. + + IMHentai and Mirror Sites + + + IMHentai + https://imhentai.xxx/ + Galleries, Search Results, Tag Searches + + + + HentaiEra + https://hentaiera.com/ + Galleries, Search Results, Tag Searches + + + + HentaiRox + https://hentairox.com/ + Galleries, Search Results, Tag Searches + + + jschan Imageboards diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3fe44101..ae45d2d6 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -66,13 +66,11 @@ modules = [ "hatenablog", "hentai2read", "hentaicosplays", - "hentaiera", "hentaifoundry", "hentaifox", "hentaihand", "hentaihere", "hentainexus", - "hentairox", "hiperdex", "hitomi", "hotleak", diff --git a/gallery_dl/extractor/hentaiera.py b/gallery_dl/extractor/hentaiera.py deleted file mode 100644 index 432ee002..00000000 --- a/gallery_dl/extractor/hentaiera.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2025 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://hentaiera.com/""" - -from . import imhentai - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentaiera\.com" - - -class HentaieraExtractor(): - category = "hentaiera" - root = "https://hentaiera.com" - - -class HentaieraGalleryExtractor( - HentaieraExtractor, imhentai.ImhentaiGalleryExtractor): - """Extractor for hentaiera galleries""" - pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" - example = "https://hentaiera.com/gallery/12345/" - - -class HentaieraTagExtractor( - HentaieraExtractor, imhentai.ImhentaiTagExtractor): - """Extractor for hentaiera tag searches""" - subcategory = "tag" - pattern = (BASE_PATTERN + r"(/(?:" - r"artist|category|character|group|language|parody|tag" - r")/([^/?#]+))") - example = "https://hentaiera.com/tag/TAG/" - - -class HentaieraSearchExtractor( - HentaieraExtractor, imhentai.ImhentaiSearchExtractor): - """Extractor for hentaiera search results""" - subcategory = "search" - pattern = BASE_PATTERN + r"/search/?\?([^#]+)" - example = "https://hentaiera.com/search/?key=QUERY" - - -HentaieraExtractor._gallery_extractor = HentaieraGalleryExtractor diff --git a/gallery_dl/extractor/hentairox.py b/gallery_dl/extractor/hentairox.py deleted file mode 100644 index 6b12380b..00000000 --- a/gallery_dl/extractor/hentairox.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2025 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://hentairox.com/""" - -from . import imhentai - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com" - - -class HentairoxExtractor(): - category = "hentairox" - root = "https://hentairox.com" - - -class HentairoxGalleryExtractor( - HentairoxExtractor, imhentai.ImhentaiGalleryExtractor): - """Extractor for hentairox galleries""" - pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" - example = "https://hentairox.com/gallery/12345/" - - -class HentairoxTagExtractor( - HentairoxExtractor, imhentai.ImhentaiTagExtractor): - """Extractor for hentairox tag searches""" - subcategory = "tag" - pattern = (BASE_PATTERN + r"(/(?:" - r"artist|category|character|group|language|parody|tag" - r")/([^/?#]+))") - example = "https://hentairox.com/tag/TAG/" - - -class HentairoxSearchExtractor( - HentairoxExtractor, imhentai.ImhentaiSearchExtractor): - """Extractor for hentairox search results""" - subcategory = "search" - pattern = BASE_PATTERN + r"/search/?\?([^#]+)" - example = "https://hentairox.com/search/?key=QUERY" - - -HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py index c3488acf..c2096b79 100644 --- a/gallery_dl/extractor/imhentai.py +++ b/gallery_dl/extractor/imhentai.py @@ -6,21 +6,18 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://imhentai.xxx/""" +"""Extractors for https://imhentai.xxx/ and mirror sites""" -from .common import GalleryExtractor, Extractor, Message +from .common import GalleryExtractor, BaseExtractor, Message from .. import text, util -BASE_PATTERN = r"(?:https?://)?(?:www\.)?imhentai\.xxx" - -class ImhentaiExtractor(Extractor): - category = "imhentai" - root = "https://imhentai.xxx" +class ImhentaiExtractor(BaseExtractor): + basecategory = "IMHentai" def _pagination(self, url): base = self.root + "/gallery/" - data = {"_extractor": self._gallery_extractor} + data = {"_extractor": ImhentaiGalleryExtractor} while True: page = self.request(url).text @@ -44,15 +41,31 @@ class ImhentaiExtractor(Extractor): url = href +BASE_PATTERN = ImhentaiExtractor.update({ + "imhentai": { + "root": "https://imhentai.xxx", + "pattern": r"(?:www\.)?imhentai\.xxx", + }, + "hentaiera": { + "root": "https://hentaiera.com", + "pattern": r"(?:www\.)?hentaiera\.com", + }, + "hentairox": { + "root": "https://hentairox.com", + "pattern": r"(?:www\.)?hentairox\.com", + }, +}) + + class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): """Extractor for imhentai galleries""" pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" example = "https://imhentai.xxx/gallery/12345/" def __init__(self, match): - self.gallery_id = match.group(1) - url = "{}/gallery/{}/".format(self.root, self.gallery_id) - GalleryExtractor.__init__(self, match, url) + ImhentaiExtractor.__init__(self, match) + self.gallery_id = self.groups[-1] + self.gallery_url = "{}/gallery/{}/".format(self.root, self.gallery_id) def metadata(self, page): extr = text.extract_from(page) @@ -109,7 +122,7 @@ class ImhentaiTagExtractor(ImhentaiExtractor): example = "https://imhentai.xxx/tag/TAG/" def items(self): - url = self.root + self.groups[0] + "/" + url = self.root + self.groups[-2] + "/" return self._pagination(url) @@ -120,8 +133,5 @@ class ImhentaiSearchExtractor(ImhentaiExtractor): example = "https://imhentai.xxx/search/?key=QUERY" def items(self): - url = self.root + "/search/?" + self.groups[0] + url = self.root + "/search/?" + self.groups[-1] return self._pagination(url) - - -ImhentaiExtractor._gallery_extractor = ImhentaiGalleryExtractor diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index e691c175..92fa4268 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -383,6 +383,7 @@ BASE_MAP = { "gelbooru_v01": "Gelbooru Beta 0.1.11", "gelbooru_v02": "Gelbooru Beta 0.2", "hentaicosplays": "Hentai Cosplay Instances", + "IMHentai" : "IMHentai and Mirror Sites", "jschan" : "jschan Imageboards", "lolisafe" : "lolisafe and chibisafe", "lynxchan" : "LynxChan Imageboards", diff --git a/test/results/hentaiera.py b/test/results/hentaiera.py index f94b5e6d..c2c7faed 100644 --- a/test/results/hentaiera.py +++ b/test/results/hentaiera.py @@ -4,15 +4,16 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from gallery_dl.extractor import hentaiera +from gallery_dl.extractor import imhentai __tests__ = ( { - "#url" : "https://hentaiera.com/gallery/28/", - "#class" : hentaiera.HentaieraGalleryExtractor, - "#pattern": r"https://m1\.hentaiera\.com/001/knrxtga49v/\d+\.jpg", - "#count" : 25, + "#url" : "https://hentaiera.com/gallery/28/", + "#category": ("IMHentai", "hentaiera", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m1\.hentaiera\.com/001/knrxtga49v/\d+\.jpg", + "#count" : 25, "count" : 25, "extension" : "jpg", @@ -58,10 +59,11 @@ __tests__ = ( }, { - "#url" : "https://hentaiera.com/gallery/9319/", - "#class" : hentaiera.HentaieraGalleryExtractor, - "#pattern": r"https://m1\.hentaiera\.com/001/gkchsf3x5m/\d+\.jpg", - "#count" : 8, + "#url" : "https://hentaiera.com/gallery/9319/", + "#category": ("IMHentai", "hentaiera", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m1\.hentaiera\.com/001/gkchsf3x5m/\d+\.jpg", + "#count" : 8, "count" : 8, "extension" : "jpg", @@ -103,17 +105,19 @@ __tests__ = ( }, { - "#url" : "https://hentaiera.com/artist/kujiran/", - "#class" : hentaiera.HentaieraTagExtractor, - "#pattern": hentaiera.HentaieraGalleryExtractor.pattern, - "#count" : range(120, 150), + "#url" : "https://hentaiera.com/artist/kujiran/", + "#category": ("IMHentai", "hentaiera", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(120, 150), }, { - "#url" : "https://hentaiera.com/search/?key=kujiran", - "#class" : hentaiera.HentaieraSearchExtractor, - "#pattern": hentaiera.HentaieraGalleryExtractor.pattern, - "#count" : range(120, 150), + "#url" : "https://hentaiera.com/search/?key=kujiran", + "#category": ("IMHentai", "hentaiera", "search"), + "#class" : imhentai.ImhentaiSearchExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(120, 150), }, ) diff --git a/test/results/hentairox.py b/test/results/hentairox.py index 071083bb..4cea5ea6 100644 --- a/test/results/hentairox.py +++ b/test/results/hentairox.py @@ -4,15 +4,16 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from gallery_dl.extractor import hentairox +from gallery_dl.extractor import imhentai __tests__ = ( { - "#url" : "https://hentairox.com/gallery/25/", - "#class" : hentairox.HentairoxGalleryExtractor, - "#pattern": r"https://m1\.hentairox\.com/001/knrxtga49v/\d+\.jpg", - "#count" : 25, + "#url" : "https://hentairox.com/gallery/25/", + "#category": ("IMHentai", "hentairox", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m1\.hentairox\.com/001/knrxtga49v/\d+\.jpg", + "#count" : 25, "count" : 25, "extension" : "jpg", @@ -59,7 +60,8 @@ __tests__ = ( { "#url" : "https://hentairox.com/gallery/8526/", - "#class" : hentairox.HentairoxGalleryExtractor, + "#category": ("IMHentai", "hentairox", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, "#pattern": r"https://m1\.hentairox\.com/001/gkchsf3x5m/\d+\.jpg", "#count" : 8, @@ -103,17 +105,19 @@ __tests__ = ( }, { - "#url" : "https://hentairox.com/artist/kizuki-aruchu/", - "#class" : hentairox.HentairoxTagExtractor, - "#pattern": hentairox.HentairoxGalleryExtractor.pattern, - "#count" : range(140, 160), + "#url" : "https://hentairox.com/artist/kizuki-aruchu/", + "#category": ("IMHentai", "hentairox", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(140, 160), }, { - "#url" : "https://hentairox.com/search/?key=aruchu", - "#class" : hentairox.HentairoxSearchExtractor, - "#pattern": hentairox.HentairoxGalleryExtractor.pattern, - "#count" : range(140, 160), + "#url" : "https://hentairox.com/search/?key=aruchu", + "#category": ("IMHentai", "hentairox", "search"), + "#class" : imhentai.ImhentaiSearchExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(140, 160), }, ) diff --git a/test/results/imhentai.py b/test/results/imhentai.py index 6eee93e5..75c58eb6 100644 --- a/test/results/imhentai.py +++ b/test/results/imhentai.py @@ -9,10 +9,11 @@ from gallery_dl.extractor import imhentai __tests__ = ( { - "#url" : "https://imhentai.xxx/gallery/12/", - "#class" : imhentai.ImhentaiGalleryExtractor, - "#pattern": r"https://m1\.imhentai\.xxx/001/3x907ntq18/\d+\.jpg", - "#count" : 94, + "#url" : "https://imhentai.xxx/gallery/12/", + "#category": ("IMHentai", "imhentai", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m1\.imhentai\.xxx/001/3x907ntq18/\d+\.jpg", + "#count" : 94, "count" : 94, "extension" : "jpg", @@ -61,10 +62,11 @@ __tests__ = ( }, { - "#url" : "https://imhentai.xxx/gallery/1396508/", - "#class" : imhentai.ImhentaiGalleryExtractor, - "#pattern": r"https://m9\.imhentai\.xxx/028/po9f4w3jzx/\d+\.webp", - "#count" : 34, + "#url" : "https://imhentai.xxx/gallery/1396508/", + "#category": ("IMHentai", "imhentai", "gallery"), + "#class" : imhentai.ImhentaiGalleryExtractor, + "#pattern" : r"https://m9\.imhentai\.xxx/028/po9f4w3jzx/\d+\.webp", + "#count" : 34, "count" : 34, "extension" : "webp", @@ -113,17 +115,19 @@ __tests__ = ( }, { - "#url" : "https://imhentai.xxx/artist/asutora/", - "#class" : imhentai.ImhentaiTagExtractor, - "#pattern": imhentai.ImhentaiGalleryExtractor.pattern, - "#count" : range(30, 50), + "#url" : "https://imhentai.xxx/artist/asutora/", + "#category": ("IMHentai", "imhentai", "tag"), + "#class" : imhentai.ImhentaiTagExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(30, 50), }, { - "#url" : "https://imhentai.xxx/search/?lt=1&pp=0&m=1&d=1&w=1&i=1&a=1&g=1&key=asutora&apply=Search&en=1&jp=1&es=1&fr=1&kr=1&de=1&ru=1&dl=0&tr=0", - "#class" : imhentai.ImhentaiSearchExtractor, - "#pattern": imhentai.ImhentaiGalleryExtractor.pattern, - "#count" : range(30, 50), + "#url" : "https://imhentai.xxx/search/?lt=1&pp=0&m=1&d=1&w=1&i=1&a=1&g=1&key=asutora&apply=Search&en=1&jp=1&es=1&fr=1&kr=1&de=1&ru=1&dl=0&tr=0", + "#category": ("IMHentai", "imhentai", "search"), + "#class" : imhentai.ImhentaiSearchExtractor, + "#pattern" : imhentai.ImhentaiGalleryExtractor.pattern, + "#count" : range(30, 50), }, )