diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b7b8dec8..2ee97048 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -355,6 +355,12 @@ Consider all listed sites to potentially be NSFW. Galleries, Search Results + + HentaiRox + https://hentairox.com/ + Galleries, Search Results, Tag Searches + + HiperDEX https://hiperdex.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index c5778bfa..f2b141a6 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -70,6 +70,7 @@ modules = [ "hentaihand", "hentaihere", "hentainexus", + "hentairox", "hiperdex", "hitomi", "hotleak", diff --git a/gallery_dl/extractor/hentairox.py b/gallery_dl/extractor/hentairox.py new file mode 100644 index 00000000..6b12380b --- /dev/null +++ b/gallery_dl/extractor/hentairox.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://hentairox.com/""" + +from . import imhentai + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentairox\.com" + + +class HentairoxExtractor(): + category = "hentairox" + root = "https://hentairox.com" + + +class HentairoxGalleryExtractor( + HentairoxExtractor, imhentai.ImhentaiGalleryExtractor): + """Extractor for hentairox galleries""" + pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" + example = "https://hentairox.com/gallery/12345/" + + +class HentairoxTagExtractor( + HentairoxExtractor, imhentai.ImhentaiTagExtractor): + """Extractor for hentairox tag searches""" + subcategory = "tag" + pattern = (BASE_PATTERN + r"(/(?:" + r"artist|category|character|group|language|parody|tag" + r")/([^/?#]+))") + example = "https://hentairox.com/tag/TAG/" + + +class HentairoxSearchExtractor( + HentairoxExtractor, imhentai.ImhentaiSearchExtractor): + """Extractor for hentairox search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" + example = "https://hentairox.com/search/?key=QUERY" + + +HentairoxExtractor._gallery_extractor = HentairoxGalleryExtractor diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py index 47027a8f..091caba9 100644 --- a/gallery_dl/extractor/imhentai.py +++ b/gallery_dl/extractor/imhentai.py @@ -20,7 +20,7 @@ class ImhentaiExtractor(Extractor): def _pagination(self, url): base = self.root + "/gallery/" - data = {"_extractor": ImhentaiGalleryExtractor} + data = {"_extractor": self._gallery_extractor} while True: page = self.request(url).text @@ -36,7 +36,12 @@ class ImhentaiExtractor(Extractor): href = text.rextract(page, "class='page-link' href='", "'")[0] if not href or href == "#": return - url = text.ensure_http_scheme(href) + if href[0] == "/": + if href[1] == "/": + href = "https:" + href + else: + href = self.root + href + url = href class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): @@ -62,7 +67,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): "artist" : self._split(extr(">Artists:", "")), "group" : self._split(extr(">Groups:", "")), "language" : self._split(extr(">Languages:", "")), - "type" : text.remove_html(extr(">Category:", "