@@ -439,6 +439,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Albums, Favorites, Favorites Folders, Galleries, individual Images, Search Results, Subreddits, Tag Searches, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IMHentai</td>
|
||||
<td>https://imhentai.xxx/</td>
|
||||
<td>Galleries, Search Results, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Imxto</td>
|
||||
<td>https://imx.to/</td>
|
||||
|
||||
@@ -80,6 +80,7 @@ modules = [
|
||||
"imgbox",
|
||||
"imgth",
|
||||
"imgur",
|
||||
"imhentai",
|
||||
"inkbunny",
|
||||
"instagram",
|
||||
"issuu",
|
||||
|
||||
121
gallery_dl/extractor/imhentai.py
Normal file
121
gallery_dl/extractor/imhentai.py
Normal file
@@ -0,0 +1,121 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://imhentai.xxx/"""
|
||||
|
||||
from .common import GalleryExtractor, Extractor, Message
|
||||
from .. import text, util
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imhentai\.xxx"
|
||||
|
||||
|
||||
class ImhentaiExtractor(Extractor):
|
||||
category = "imhentai"
|
||||
root = "https://imhentai.xxx"
|
||||
|
||||
def _pagination(self, url):
|
||||
base = self.root + "/gallery/"
|
||||
data = {"_extractor": ImhentaiGalleryExtractor}
|
||||
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
extr = text.extract_from(page)
|
||||
|
||||
while True:
|
||||
gallery_id = extr('<a href="/gallery/', '"')
|
||||
if not gallery_id:
|
||||
break
|
||||
yield Message.Queue, base + gallery_id, data
|
||||
extr('<a href="/gallery/', '"') # skip duplicate GIDs
|
||||
|
||||
href = text.rextract(page, "class='page-link' href='", "'")[0]
|
||||
if not href or href == "#":
|
||||
return
|
||||
url = text.ensure_http_scheme(href)
|
||||
|
||||
|
||||
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
|
||||
"""Extractor for imhentai galleries"""
|
||||
pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
|
||||
example = "https://imhentai.xxx/gallery/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_id = match.group(1)
|
||||
url = "{}/gallery/{}/".format(self.root, self.gallery_id)
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
|
||||
data = {
|
||||
"gallery_id": text.parse_int(self.gallery_id),
|
||||
"title" : text.unescape(extr("<h1>", "<")),
|
||||
"title_alt" : text.unescape(extr('class="subtitle">', "<")),
|
||||
"parody" : self._split(extr(">Parodies:</span>", "</li>")),
|
||||
"character" : self._split(extr(">Characters:</span>", "</li>")),
|
||||
"tags" : self._split(extr(">Tags:</span>", "</li>")),
|
||||
"artist" : self._split(extr(">Artists:</span>", "</li>")),
|
||||
"group" : self._split(extr(">Groups:</span>", "</li>")),
|
||||
"language" : self._split(extr(">Languages:</span>", "</li>")),
|
||||
"type" : text.remove_html(extr(">Category:</span>", "<span")),
|
||||
}
|
||||
|
||||
if data["language"]:
|
||||
data["lang"] = util.language_to_code(data["language"][0])
|
||||
|
||||
return data
|
||||
|
||||
def _split(self, html):
|
||||
results = []
|
||||
for tag in text.extract_iter(html, ">", "</a>"):
|
||||
tag = tag.partition(" <span class='badge'>")[0]
|
||||
if "<" in tag:
|
||||
tag = text.remove_html(tag)
|
||||
results.append(tag)
|
||||
return results
|
||||
|
||||
def images(self, _):
|
||||
url = "{}/view/{}/1/".format(self.root, self.gallery_id)
|
||||
page = self.request(url).text
|
||||
data = util.json_loads(text.extr(page, "$.parseJSON('", "'"))
|
||||
base = text.extr(page, 'data-src="', '"').rpartition("/")[0] + "/"
|
||||
exts = {"j": "jpg", "p": "png", "g": "gif", "w": "webp", "a": "avif"}
|
||||
|
||||
results = []
|
||||
for i in map(str, range(1, len(data)+1)):
|
||||
ext, width, height = data[i].split(",")
|
||||
url = base + i + "." + exts[ext]
|
||||
results.append((url, {
|
||||
"width" : text.parse_int(width),
|
||||
"height": text.parse_int(height),
|
||||
}))
|
||||
return results
|
||||
|
||||
|
||||
class ImhentaiTagExtractor(ImhentaiExtractor):
|
||||
"""Extractor for imhentai tag searches"""
|
||||
subcategory = "tag"
|
||||
pattern = (BASE_PATTERN + r"(/(?:"
|
||||
r"artist|category|character|group|language|parody|tag"
|
||||
r")/([^/?#]+))")
|
||||
example = "https://imhentai.xxx/tag/TAG/"
|
||||
|
||||
def items(self):
|
||||
url = self.root + self.groups[0] + "/"
|
||||
return self._pagination(url)
|
||||
|
||||
|
||||
class ImhentaiSearchExtractor(ImhentaiExtractor):
|
||||
"""Extractor for imhentai search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
|
||||
example = "https://imhentai.xxx/search/?key=QUERY"
|
||||
|
||||
def items(self):
|
||||
url = self.root + "/search/?" + self.groups[0]
|
||||
return self._pagination(url)
|
||||
@@ -79,6 +79,7 @@ CATEGORY_MAP = {
|
||||
"imgkiwi" : "IMG.Kiwi",
|
||||
"imgth" : "imgth",
|
||||
"imgur" : "imgur",
|
||||
"imhentai" : "IMHentai",
|
||||
"joyreactor" : "JoyReactor",
|
||||
"itchio" : "itch.io",
|
||||
"jpgfish" : "JPG Fish",
|
||||
|
||||
129
test/results/imhentai.py
Normal file
129
test/results/imhentai.py
Normal file
@@ -0,0 +1,129 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import imhentai
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://imhentai.xxx/gallery/12/",
|
||||
"#class" : imhentai.ImhentaiGalleryExtractor,
|
||||
"#pattern": r"https://m1\.imhentai\.xxx/001/3x907ntq18/\d+\.jpg",
|
||||
"#count" : 94,
|
||||
|
||||
"count" : 94,
|
||||
"extension" : "jpg",
|
||||
"filename" : str,
|
||||
"gallery_id": 12,
|
||||
"lang" : "en",
|
||||
"num" : range(1, 94),
|
||||
"title" : "(C67) [Studio Kimigabuchi (Kimimaru)] RE-TAKE 2 (Neon Genesis Evangelion) [English]",
|
||||
"title_alt" : "(C67) [スタジオKIMIGABUCHI (きみまる)] RE-TAKE2 (新世紀エヴァンゲリオン) [英訳]",
|
||||
"type" : "doujinshi",
|
||||
"width" : {835, 838, 841, 1200},
|
||||
"height" : {862, 865, 1200},
|
||||
|
||||
"artist": [
|
||||
"kimimaru | entokkun",
|
||||
],
|
||||
"character": [
|
||||
"asuka langley soryu",
|
||||
"gendo ikari",
|
||||
"makoto hyuga",
|
||||
"maya ibuki",
|
||||
"misato katsuragi",
|
||||
"rei ayanami",
|
||||
"shigeru aoba",
|
||||
"shinji ikari",
|
||||
],
|
||||
"group": [
|
||||
"studio kimigabuchi",
|
||||
],
|
||||
"language": [
|
||||
"english",
|
||||
"translated",
|
||||
],
|
||||
"parody": [
|
||||
"neon genesis evangelion | shin seiki evangelion",
|
||||
],
|
||||
"tags": [
|
||||
"multi-work series",
|
||||
"schoolboy uniform",
|
||||
"schoolgirl uniform",
|
||||
"sole female",
|
||||
"sole male",
|
||||
"story arc",
|
||||
"twintails",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://imhentai.xxx/gallery/1396508/",
|
||||
"#class" : imhentai.ImhentaiGalleryExtractor,
|
||||
"#pattern": r"https://m9\.imhentai\.xxx/028/po9f4w3jzx/\d+\.webp",
|
||||
"#count" : 34,
|
||||
|
||||
"count" : 34,
|
||||
"extension" : "webp",
|
||||
"filename" : str,
|
||||
"gallery_id": 1396508,
|
||||
"lang" : "ko",
|
||||
"num" : range(1, 34),
|
||||
"title" : "[Beruennea (Skylader)] Tada no Kouhai ni Natta Kimi | 그냥 후배가 돼 버린 너 [Korean] [Digital]",
|
||||
"title_alt" : "[ベルエンネーア (すかいれーだー)] ただの後輩になった君 [韓国翻訳] [DL版]",
|
||||
"type" : "doujinshi",
|
||||
"width" : 1280,
|
||||
"height" : {1790, 1791},
|
||||
|
||||
"artist": [
|
||||
"skylader",
|
||||
],
|
||||
"character": [],
|
||||
"group": [
|
||||
"beruennea",
|
||||
],
|
||||
"language": [
|
||||
"korean",
|
||||
"translated",
|
||||
],
|
||||
"parody": [
|
||||
"original",
|
||||
],
|
||||
"tags": [
|
||||
"ahegao",
|
||||
"big ass",
|
||||
"big breasts",
|
||||
"big nipples",
|
||||
"big penis",
|
||||
"bike shorts",
|
||||
"blowjob",
|
||||
"gokkun",
|
||||
"hairy",
|
||||
"huge breasts",
|
||||
"mosaic censorship",
|
||||
"muscle",
|
||||
"nakadashi",
|
||||
"netorare",
|
||||
"schoolgirl uniform",
|
||||
"tanlines",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://imhentai.xxx/artist/asutora/",
|
||||
"#class" : imhentai.ImhentaiTagExtractor,
|
||||
"#pattern": imhentai.ImhentaiGalleryExtractor.pattern,
|
||||
"#count" : range(30, 50),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://imhentai.xxx/search/?lt=1&pp=0&m=1&d=1&w=1&i=1&a=1&g=1&key=asutora&apply=Search&en=1&jp=1&es=1&fr=1&kr=1&de=1&ru=1&dl=0&tr=0",
|
||||
"#class" : imhentai.ImhentaiSearchExtractor,
|
||||
"#pattern": imhentai.ImhentaiGalleryExtractor.pattern,
|
||||
"#count" : range(30, 50),
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user