@@ -427,6 +427,7 @@ Default
|
||||
``[Danbooru]``,
|
||||
``[E621]``,
|
||||
``[foolfuuka]:search``,
|
||||
``hdoujin``,
|
||||
``itaku``,
|
||||
``newgrounds``,
|
||||
``[philomena]``,
|
||||
@@ -438,6 +439,7 @@ Default
|
||||
``scrolller``,
|
||||
``sizebooru``,
|
||||
``soundgasm``,
|
||||
``thehentaiworld``,
|
||||
``urlgalleries``,
|
||||
``vk``,
|
||||
``webtoons``,
|
||||
|
||||
@@ -769,6 +769,10 @@
|
||||
{
|
||||
"format": ["gif", "mp4", "webm", "webp"]
|
||||
},
|
||||
"thehentaiworld":
|
||||
{
|
||||
"sleep-request": "0.5-1.5"
|
||||
},
|
||||
"tiktok":
|
||||
{
|
||||
"audio" : true,
|
||||
|
||||
@@ -997,6 +997,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>individual Images, Search Results, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="thehentaiworld" title="thehentaiworld">
|
||||
<td>The Hentai World</td>
|
||||
<td>https://thehentaiworld.com/</td>
|
||||
<td>Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="tiktok" title="tiktok">
|
||||
<td>TikTok</td>
|
||||
<td>https://www.tiktok.com/</td>
|
||||
|
||||
@@ -191,6 +191,7 @@ modules = [
|
||||
"tcbscans",
|
||||
"telegraph",
|
||||
"tenor",
|
||||
"thehentaiworld",
|
||||
"tiktok",
|
||||
"tmohentai",
|
||||
"toyhouse",
|
||||
|
||||
132
gallery_dl/extractor/thehentaiworld.py
Normal file
132
gallery_dl/extractor/thehentaiworld.py
Normal file
@@ -0,0 +1,132 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://thehentaiworld.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
import collections
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?thehentaiworld\.com"
|
||||
|
||||
|
||||
class ThehentaiworldExtractor(Extractor):
|
||||
"""Base class for thehentaiworld extractors"""
|
||||
category = "thehentaiworld"
|
||||
root = "https://thehentaiworld.com"
|
||||
filename_fmt = "{title} ({id}{num:?-//}).{extension}"
|
||||
archive_fmt = "{id}_{num}"
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
def items(self):
|
||||
for url in self.posts():
|
||||
post = self._extract_post(url)
|
||||
|
||||
if "file_urls" in post:
|
||||
urls = post["file_urls"]
|
||||
post["count"] = len(urls)
|
||||
yield Message.Directory, post
|
||||
for post["num"], url in enumerate(urls, 1):
|
||||
text.nameext_from_url(url, post)
|
||||
yield Message.Url, url, post
|
||||
else:
|
||||
yield Message.Directory, post
|
||||
url = post["file_url"]
|
||||
text.nameext_from_url(url, post)
|
||||
yield Message.Url, url, post
|
||||
|
||||
def _extract_post(self, url):
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
|
||||
post = {
|
||||
"num" : 0,
|
||||
"count" : 1,
|
||||
"title" : text.unescape(extr("<title>", "<").strip()),
|
||||
"id" : text.parse_int(extr(" postid-", " ")),
|
||||
"slug" : extr(" post-", '"'),
|
||||
"tags" : extr('id="tagsHead">', "</ul>"),
|
||||
"date" : text.parse_datetime(extr(
|
||||
"<li>Posted: ", "<"), "%Y-%m-%d"),
|
||||
}
|
||||
|
||||
if "/videos/" in url:
|
||||
post["type"] = "video"
|
||||
post["width"] = post["height"] = 0
|
||||
post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
|
||||
post["score"] = text.parse_float(extr("<strong>", "<"))
|
||||
post["file_url"] = extr('<source src="', '"')
|
||||
else:
|
||||
post["type"] = "image"
|
||||
post["width"] = text.parse_int(extr("<li>Size: ", " "))
|
||||
post["height"] = text.parse_int(extr("x ", "<"))
|
||||
post["file_url"] = extr('a href="', '"')
|
||||
post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
|
||||
post["score"] = text.parse_float(extr("<strong>", "<"))
|
||||
|
||||
if doujin := extr('<a id="prev-page"', "</div></div><"):
|
||||
repl = text.re(r"-220x\d+\.").sub
|
||||
post["file_urls"] = [
|
||||
repl(".", url)
|
||||
for url in text.extract_iter(
|
||||
doujin, 'class="border" src="', '"')
|
||||
]
|
||||
|
||||
tags = collections.defaultdict(list)
|
||||
pattern = text.re(r'<li><a class="([^"]*)" href="[^"]*">([^<]+)')
|
||||
for tag_type, tag_name in pattern.findall(post["tags"]):
|
||||
tags[tag_type].append(tag_name)
|
||||
post["tags"] = tags_list = []
|
||||
for key, value in tags.items():
|
||||
tags_list.extend(value)
|
||||
post[f"tags_{key}" if key else "tags_general"] = value
|
||||
|
||||
return post
|
||||
|
||||
def _pagination(self, endpoint):
|
||||
base = f"{self.root}{endpoint}"
|
||||
pnum = self.page_start
|
||||
|
||||
while True:
|
||||
url = base if pnum < 2 else f"{base}page/{pnum}/"
|
||||
page = self.request(url).text
|
||||
|
||||
yield from text.extract_iter(text.extr(
|
||||
page, 'id="thumbContainer"', "<script"), ' href="', '"')
|
||||
|
||||
if 'class="next"' not in page:
|
||||
return
|
||||
pnum += 1
|
||||
|
||||
|
||||
class ThehentaiworldPostExtractor(ThehentaiworldExtractor):
|
||||
subcategory = "post"
|
||||
pattern = rf"{BASE_PATTERN}(/(?:hentai-image|video)s/([^/?#]+))"
|
||||
example = "https://thehentaiworld.com/hentai-images/SLUG/"
|
||||
|
||||
def posts(self):
|
||||
return (f"{self.root}{self.groups[0]}/",)
|
||||
|
||||
|
||||
class ThehentaiworldTagExtractor(ThehentaiworldExtractor):
|
||||
subcategory = "tag"
|
||||
per_page = 24
|
||||
page_start = 1
|
||||
post_start = 0
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
pattern = rf"{BASE_PATTERN}/tag/([^/?#]+)"
|
||||
example = "https://thehentaiworld.com/tag/TAG/"
|
||||
|
||||
def posts(self):
|
||||
self.kwdict["search_tags"] = tag = self.groups[0]
|
||||
return util.advance(self._pagination(f"/tag/{tag}/"), self.post_start)
|
||||
|
||||
def skip(self, num):
|
||||
pages, posts = divmod(num, self.per_page)
|
||||
self.page_start += pages
|
||||
self.post_start += posts
|
||||
return num
|
||||
@@ -179,6 +179,7 @@ CATEGORY_MAP = {
|
||||
"thebarchive" : "The /b/ Archive",
|
||||
"thecollection" : "The /co/llection",
|
||||
"thecollectionS" : "The /co/llection",
|
||||
"thehentaiworld" : "The Hentai World",
|
||||
"tiktok" : "TikTok",
|
||||
"tmohentai" : "TMOHentai",
|
||||
"tumblrgallery" : "TumblrGallery",
|
||||
|
||||
202
test/results/thehentaiworld.py
Normal file
202
test/results/thehentaiworld.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import thehentaiworld
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://thehentaiworld.com/hentai-images/samus-aran-aurahack-metroid-2/",
|
||||
"#class" : thehentaiworld.ThehentaiworldPostExtractor,
|
||||
"#results" : "https://thehentaiworld.com/wp-content/uploads/2020/06/Samus-Aran-Aurahack-Metroid-Hentai.jpeg",
|
||||
|
||||
"count" : 1,
|
||||
"num" : 0,
|
||||
"date" : "dt:2020-06-05 00:00:00",
|
||||
"extension" : "jpeg",
|
||||
"file_url" : "https://thehentaiworld.com/wp-content/uploads/2020/06/Samus-Aran-Aurahack-Metroid-Hentai.jpeg",
|
||||
"filename" : "Samus-Aran-Aurahack-Metroid-Hentai",
|
||||
"height" : 2893,
|
||||
"id" : 147048,
|
||||
"score" : range(3, 5),
|
||||
"slug" : "samus-aran-aurahack-metroid-2",
|
||||
"title" : "Samus Aran – Aurahack – Metroid",
|
||||
"type" : "image",
|
||||
"votes" : range(5, 20),
|
||||
"width" : 2000,
|
||||
"tags" : [
|
||||
"Metroid",
|
||||
"Samus Aran",
|
||||
"Aurahack18",
|
||||
"Blonde",
|
||||
"blush",
|
||||
"sweat",
|
||||
],
|
||||
"tags_general" : [
|
||||
"Blonde",
|
||||
"blush",
|
||||
"sweat",
|
||||
],
|
||||
"tags_artist" : ["Aurahack18"],
|
||||
"tags_character": ["Samus Aran"],
|
||||
"tags_origin" : ["Metroid"],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://thehentaiworld.com/hentai-images/ubel-nt00-sousou-no-frieren/",
|
||||
"#class" : thehentaiworld.ThehentaiworldPostExtractor,
|
||||
"#results" : (
|
||||
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-nt00-Sousou-no-Frieren-Hentai.jpg",
|
||||
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-–-nt00-–-Sousou-no-Frieren-Hentai.jpg",
|
||||
),
|
||||
|
||||
"count" : 2,
|
||||
"num" : range(1, 2),
|
||||
"date" : "dt:2024-04-16 00:00:00",
|
||||
"extension" : "jpg",
|
||||
"file_url" : "https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-nt00-Sousou-no-Frieren-Hentai.jpg",
|
||||
"filename" : {
|
||||
"Ubel-nt00-Sousou-no-Frieren-Hentai",
|
||||
"Ubel-–-nt00-–-Sousou-no-Frieren-Hentai",
|
||||
},
|
||||
"height" : 1422,
|
||||
"id" : 226208,
|
||||
"score" : range(3, 5),
|
||||
"slug" : "ubel-nt00-sousou-no-frieren",
|
||||
"title" : "Ubel – nt00 – Sousou no Frieren",
|
||||
"type" : "image",
|
||||
"votes" : range(10, 20),
|
||||
"width" : 800,
|
||||
"file_urls" : [
|
||||
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-nt00-Sousou-no-Frieren-Hentai.jpg",
|
||||
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-–-nt00-–-Sousou-no-Frieren-Hentai.jpg",
|
||||
],
|
||||
"tags" : [
|
||||
"Sousou no Frieren",
|
||||
"Ubel",
|
||||
"nt00",
|
||||
"blush",
|
||||
"Green Hair",
|
||||
"pubic hair",
|
||||
"smile",
|
||||
],
|
||||
"tags_general" : [
|
||||
"blush",
|
||||
"Green Hair",
|
||||
"pubic hair",
|
||||
"smile",
|
||||
],
|
||||
"tags_artist" : ["nt00"],
|
||||
"tags_character": ["Ubel"],
|
||||
"tags_origin" : ["Sousou no Frieren"],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://thehentaiworld.com/videos/lucy-heartfilia-and-natsu-dragneel-shiina-ecchi-fairy-tail/#comment-396839",
|
||||
"#class" : thehentaiworld.ThehentaiworldPostExtractor,
|
||||
"#results" : "https://thehentaiworld.com/wp-content/uploads/2025/09/Lucy-Heartfilia-and-Natsu-Dragneel-Shiina-Ecchi-Fairy-Tail-Animated-Hentai-Video.mp4",
|
||||
|
||||
"count" : 1,
|
||||
"num" : 0,
|
||||
"date" : "dt:2025-09-19 00:00:00",
|
||||
"extension" : "mp4",
|
||||
"file_url" : "https://thehentaiworld.com/wp-content/uploads/2025/09/Lucy-Heartfilia-and-Natsu-Dragneel-Shiina-Ecchi-Fairy-Tail-Animated-Hentai-Video.mp4",
|
||||
"filename" : "Lucy-Heartfilia-and-Natsu-Dragneel-Shiina-Ecchi-Fairy-Tail-Animated-Hentai-Video",
|
||||
"height" : 0,
|
||||
"id" : 253263,
|
||||
"score" : 5.0,
|
||||
"slug" : "lucy-heartfilia-and-natsu-dragneel-shiina-ecchi-fairy-tail",
|
||||
"title" : "Lucy Heartfilia and Natsu Dragneel – Shiina Ecchi – Fairy Tail",
|
||||
"type" : "video",
|
||||
"votes" : range(25, 50),
|
||||
"width" : 0,
|
||||
"tags" : [
|
||||
"Fairy Tail",
|
||||
"Animated",
|
||||
"sound",
|
||||
"video",
|
||||
"lucy heartfilia",
|
||||
"Natsu Dragneel",
|
||||
"Shiina Ecchi",
|
||||
"arse",
|
||||
"blush",
|
||||
"Cowgirl Ride",
|
||||
"cum",
|
||||
"cum inside",
|
||||
"eye roll",
|
||||
"Fingering",
|
||||
"Jiggle",
|
||||
"legs spread",
|
||||
"masturbating",
|
||||
"moan",
|
||||
"panties",
|
||||
"pov",
|
||||
"ride",
|
||||
"smile",
|
||||
"squeeze",
|
||||
"vagina",
|
||||
"x-ray",
|
||||
],
|
||||
"tags_character": [
|
||||
"lucy heartfilia",
|
||||
"Natsu Dragneel",
|
||||
],
|
||||
"tags_general" : [
|
||||
"arse",
|
||||
"blush",
|
||||
"Cowgirl Ride",
|
||||
"cum",
|
||||
"cum inside",
|
||||
"eye roll",
|
||||
"Fingering",
|
||||
"Jiggle",
|
||||
"legs spread",
|
||||
"masturbating",
|
||||
"moan",
|
||||
"panties",
|
||||
"pov",
|
||||
"ride",
|
||||
"smile",
|
||||
"squeeze",
|
||||
"vagina",
|
||||
"x-ray",
|
||||
],
|
||||
"tags_media" : [
|
||||
"Animated",
|
||||
"sound",
|
||||
"video",
|
||||
],
|
||||
"tags_artist" : ["Shiina Ecchi"],
|
||||
"tags_origin" : ["Fairy Tail"],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://thehentaiworld.com/tag/aurahack/",
|
||||
"#class" : thehentaiworld.ThehentaiworldTagExtractor,
|
||||
"#pattern" : r"https://thehentaiworld\.com/wp\-content/uploads/20\d\d/.+",
|
||||
"#range" : "20-",
|
||||
"#count" : 10,
|
||||
|
||||
"count" : {1, 2},
|
||||
"num" : {1, 2, 0},
|
||||
"date" : "type:datetime",
|
||||
"extension" : {"jpg", "png"},
|
||||
"file_url" : str,
|
||||
"filename" : str,
|
||||
"height" : int,
|
||||
"id" : int,
|
||||
"score" : float,
|
||||
"search_tags" : "aurahack",
|
||||
"slug" : str,
|
||||
"tags_artist" : ["Aurahack18"],
|
||||
"title" : str,
|
||||
"type" : "image",
|
||||
"votes" : int,
|
||||
"width" : int,
|
||||
"tags" : list,
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user