@@ -427,6 +427,7 @@ Default
|
|||||||
``[Danbooru]``,
|
``[Danbooru]``,
|
||||||
``[E621]``,
|
``[E621]``,
|
||||||
``[foolfuuka]:search``,
|
``[foolfuuka]:search``,
|
||||||
|
``hdoujin``,
|
||||||
``itaku``,
|
``itaku``,
|
||||||
``newgrounds``,
|
``newgrounds``,
|
||||||
``[philomena]``,
|
``[philomena]``,
|
||||||
@@ -438,6 +439,7 @@ Default
|
|||||||
``scrolller``,
|
``scrolller``,
|
||||||
``sizebooru``,
|
``sizebooru``,
|
||||||
``soundgasm``,
|
``soundgasm``,
|
||||||
|
``thehentaiworld``,
|
||||||
``urlgalleries``,
|
``urlgalleries``,
|
||||||
``vk``,
|
``vk``,
|
||||||
``webtoons``,
|
``webtoons``,
|
||||||
|
|||||||
@@ -769,6 +769,10 @@
|
|||||||
{
|
{
|
||||||
"format": ["gif", "mp4", "webm", "webp"]
|
"format": ["gif", "mp4", "webm", "webp"]
|
||||||
},
|
},
|
||||||
|
"thehentaiworld":
|
||||||
|
{
|
||||||
|
"sleep-request": "0.5-1.5"
|
||||||
|
},
|
||||||
"tiktok":
|
"tiktok":
|
||||||
{
|
{
|
||||||
"audio" : true,
|
"audio" : true,
|
||||||
|
|||||||
@@ -997,6 +997,12 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<td>individual Images, Search Results, User Profiles</td>
|
<td>individual Images, Search Results, User Profiles</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr id="thehentaiworld" title="thehentaiworld">
|
||||||
|
<td>The Hentai World</td>
|
||||||
|
<td>https://thehentaiworld.com/</td>
|
||||||
|
<td>Posts, Tag Searches</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
<tr id="tiktok" title="tiktok">
|
<tr id="tiktok" title="tiktok">
|
||||||
<td>TikTok</td>
|
<td>TikTok</td>
|
||||||
<td>https://www.tiktok.com/</td>
|
<td>https://www.tiktok.com/</td>
|
||||||
|
|||||||
@@ -191,6 +191,7 @@ modules = [
|
|||||||
"tcbscans",
|
"tcbscans",
|
||||||
"telegraph",
|
"telegraph",
|
||||||
"tenor",
|
"tenor",
|
||||||
|
"thehentaiworld",
|
||||||
"tiktok",
|
"tiktok",
|
||||||
"tmohentai",
|
"tmohentai",
|
||||||
"toyhouse",
|
"toyhouse",
|
||||||
|
|||||||
132
gallery_dl/extractor/thehentaiworld.py
Normal file
132
gallery_dl/extractor/thehentaiworld.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2025 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extractors for https://thehentaiworld.com/"""
|
||||||
|
|
||||||
|
from .common import Extractor, Message
|
||||||
|
from .. import text, util
|
||||||
|
import collections
|
||||||
|
|
||||||
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?thehentaiworld\.com"
|
||||||
|
|
||||||
|
|
||||||
|
class ThehentaiworldExtractor(Extractor):
|
||||||
|
"""Base class for thehentaiworld extractors"""
|
||||||
|
category = "thehentaiworld"
|
||||||
|
root = "https://thehentaiworld.com"
|
||||||
|
filename_fmt = "{title} ({id}{num:?-//}).{extension}"
|
||||||
|
archive_fmt = "{id}_{num}"
|
||||||
|
request_interval = (0.5, 1.5)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
for url in self.posts():
|
||||||
|
post = self._extract_post(url)
|
||||||
|
|
||||||
|
if "file_urls" in post:
|
||||||
|
urls = post["file_urls"]
|
||||||
|
post["count"] = len(urls)
|
||||||
|
yield Message.Directory, post
|
||||||
|
for post["num"], url in enumerate(urls, 1):
|
||||||
|
text.nameext_from_url(url, post)
|
||||||
|
yield Message.Url, url, post
|
||||||
|
else:
|
||||||
|
yield Message.Directory, post
|
||||||
|
url = post["file_url"]
|
||||||
|
text.nameext_from_url(url, post)
|
||||||
|
yield Message.Url, url, post
|
||||||
|
|
||||||
|
def _extract_post(self, url):
|
||||||
|
extr = text.extract_from(self.request(url).text)
|
||||||
|
|
||||||
|
post = {
|
||||||
|
"num" : 0,
|
||||||
|
"count" : 1,
|
||||||
|
"title" : text.unescape(extr("<title>", "<").strip()),
|
||||||
|
"id" : text.parse_int(extr(" postid-", " ")),
|
||||||
|
"slug" : extr(" post-", '"'),
|
||||||
|
"tags" : extr('id="tagsHead">', "</ul>"),
|
||||||
|
"date" : text.parse_datetime(extr(
|
||||||
|
"<li>Posted: ", "<"), "%Y-%m-%d"),
|
||||||
|
}
|
||||||
|
|
||||||
|
if "/videos/" in url:
|
||||||
|
post["type"] = "video"
|
||||||
|
post["width"] = post["height"] = 0
|
||||||
|
post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
|
||||||
|
post["score"] = text.parse_float(extr("<strong>", "<"))
|
||||||
|
post["file_url"] = extr('<source src="', '"')
|
||||||
|
else:
|
||||||
|
post["type"] = "image"
|
||||||
|
post["width"] = text.parse_int(extr("<li>Size: ", " "))
|
||||||
|
post["height"] = text.parse_int(extr("x ", "<"))
|
||||||
|
post["file_url"] = extr('a href="', '"')
|
||||||
|
post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
|
||||||
|
post["score"] = text.parse_float(extr("<strong>", "<"))
|
||||||
|
|
||||||
|
if doujin := extr('<a id="prev-page"', "</div></div><"):
|
||||||
|
repl = text.re(r"-220x\d+\.").sub
|
||||||
|
post["file_urls"] = [
|
||||||
|
repl(".", url)
|
||||||
|
for url in text.extract_iter(
|
||||||
|
doujin, 'class="border" src="', '"')
|
||||||
|
]
|
||||||
|
|
||||||
|
tags = collections.defaultdict(list)
|
||||||
|
pattern = text.re(r'<li><a class="([^"]*)" href="[^"]*">([^<]+)')
|
||||||
|
for tag_type, tag_name in pattern.findall(post["tags"]):
|
||||||
|
tags[tag_type].append(tag_name)
|
||||||
|
post["tags"] = tags_list = []
|
||||||
|
for key, value in tags.items():
|
||||||
|
tags_list.extend(value)
|
||||||
|
post[f"tags_{key}" if key else "tags_general"] = value
|
||||||
|
|
||||||
|
return post
|
||||||
|
|
||||||
|
def _pagination(self, endpoint):
|
||||||
|
base = f"{self.root}{endpoint}"
|
||||||
|
pnum = self.page_start
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = base if pnum < 2 else f"{base}page/{pnum}/"
|
||||||
|
page = self.request(url).text
|
||||||
|
|
||||||
|
yield from text.extract_iter(text.extr(
|
||||||
|
page, 'id="thumbContainer"', "<script"), ' href="', '"')
|
||||||
|
|
||||||
|
if 'class="next"' not in page:
|
||||||
|
return
|
||||||
|
pnum += 1
|
||||||
|
|
||||||
|
|
||||||
|
class ThehentaiworldPostExtractor(ThehentaiworldExtractor):
|
||||||
|
subcategory = "post"
|
||||||
|
pattern = rf"{BASE_PATTERN}(/(?:hentai-image|video)s/([^/?#]+))"
|
||||||
|
example = "https://thehentaiworld.com/hentai-images/SLUG/"
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
return (f"{self.root}{self.groups[0]}/",)
|
||||||
|
|
||||||
|
|
||||||
|
class ThehentaiworldTagExtractor(ThehentaiworldExtractor):
|
||||||
|
subcategory = "tag"
|
||||||
|
per_page = 24
|
||||||
|
page_start = 1
|
||||||
|
post_start = 0
|
||||||
|
directory_fmt = ("{category}", "{search_tags}")
|
||||||
|
pattern = rf"{BASE_PATTERN}/tag/([^/?#]+)"
|
||||||
|
example = "https://thehentaiworld.com/tag/TAG/"
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
self.kwdict["search_tags"] = tag = self.groups[0]
|
||||||
|
return util.advance(self._pagination(f"/tag/{tag}/"), self.post_start)
|
||||||
|
|
||||||
|
def skip(self, num):
|
||||||
|
pages, posts = divmod(num, self.per_page)
|
||||||
|
self.page_start += pages
|
||||||
|
self.post_start += posts
|
||||||
|
return num
|
||||||
@@ -179,6 +179,7 @@ CATEGORY_MAP = {
|
|||||||
"thebarchive" : "The /b/ Archive",
|
"thebarchive" : "The /b/ Archive",
|
||||||
"thecollection" : "The /co/llection",
|
"thecollection" : "The /co/llection",
|
||||||
"thecollectionS" : "The /co/llection",
|
"thecollectionS" : "The /co/llection",
|
||||||
|
"thehentaiworld" : "The Hentai World",
|
||||||
"tiktok" : "TikTok",
|
"tiktok" : "TikTok",
|
||||||
"tmohentai" : "TMOHentai",
|
"tmohentai" : "TMOHentai",
|
||||||
"tumblrgallery" : "TumblrGallery",
|
"tumblrgallery" : "TumblrGallery",
|
||||||
|
|||||||
202
test/results/thehentaiworld.py
Normal file
202
test/results/thehentaiworld.py
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import thehentaiworld
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://thehentaiworld.com/hentai-images/samus-aran-aurahack-metroid-2/",
|
||||||
|
"#class" : thehentaiworld.ThehentaiworldPostExtractor,
|
||||||
|
"#results" : "https://thehentaiworld.com/wp-content/uploads/2020/06/Samus-Aran-Aurahack-Metroid-Hentai.jpeg",
|
||||||
|
|
||||||
|
"count" : 1,
|
||||||
|
"num" : 0,
|
||||||
|
"date" : "dt:2020-06-05 00:00:00",
|
||||||
|
"extension" : "jpeg",
|
||||||
|
"file_url" : "https://thehentaiworld.com/wp-content/uploads/2020/06/Samus-Aran-Aurahack-Metroid-Hentai.jpeg",
|
||||||
|
"filename" : "Samus-Aran-Aurahack-Metroid-Hentai",
|
||||||
|
"height" : 2893,
|
||||||
|
"id" : 147048,
|
||||||
|
"score" : range(3, 5),
|
||||||
|
"slug" : "samus-aran-aurahack-metroid-2",
|
||||||
|
"title" : "Samus Aran – Aurahack – Metroid",
|
||||||
|
"type" : "image",
|
||||||
|
"votes" : range(5, 20),
|
||||||
|
"width" : 2000,
|
||||||
|
"tags" : [
|
||||||
|
"Metroid",
|
||||||
|
"Samus Aran",
|
||||||
|
"Aurahack18",
|
||||||
|
"Blonde",
|
||||||
|
"blush",
|
||||||
|
"sweat",
|
||||||
|
],
|
||||||
|
"tags_general" : [
|
||||||
|
"Blonde",
|
||||||
|
"blush",
|
||||||
|
"sweat",
|
||||||
|
],
|
||||||
|
"tags_artist" : ["Aurahack18"],
|
||||||
|
"tags_character": ["Samus Aran"],
|
||||||
|
"tags_origin" : ["Metroid"],
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://thehentaiworld.com/hentai-images/ubel-nt00-sousou-no-frieren/",
|
||||||
|
"#class" : thehentaiworld.ThehentaiworldPostExtractor,
|
||||||
|
"#results" : (
|
||||||
|
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-nt00-Sousou-no-Frieren-Hentai.jpg",
|
||||||
|
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-–-nt00-–-Sousou-no-Frieren-Hentai.jpg",
|
||||||
|
),
|
||||||
|
|
||||||
|
"count" : 2,
|
||||||
|
"num" : range(1, 2),
|
||||||
|
"date" : "dt:2024-04-16 00:00:00",
|
||||||
|
"extension" : "jpg",
|
||||||
|
"file_url" : "https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-nt00-Sousou-no-Frieren-Hentai.jpg",
|
||||||
|
"filename" : {
|
||||||
|
"Ubel-nt00-Sousou-no-Frieren-Hentai",
|
||||||
|
"Ubel-–-nt00-–-Sousou-no-Frieren-Hentai",
|
||||||
|
},
|
||||||
|
"height" : 1422,
|
||||||
|
"id" : 226208,
|
||||||
|
"score" : range(3, 5),
|
||||||
|
"slug" : "ubel-nt00-sousou-no-frieren",
|
||||||
|
"title" : "Ubel – nt00 – Sousou no Frieren",
|
||||||
|
"type" : "image",
|
||||||
|
"votes" : range(10, 20),
|
||||||
|
"width" : 800,
|
||||||
|
"file_urls" : [
|
||||||
|
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-nt00-Sousou-no-Frieren-Hentai.jpg",
|
||||||
|
"https://thehentaiworld.com/wp-content/uploads/2024/04/Ubel-–-nt00-–-Sousou-no-Frieren-Hentai.jpg",
|
||||||
|
],
|
||||||
|
"tags" : [
|
||||||
|
"Sousou no Frieren",
|
||||||
|
"Ubel",
|
||||||
|
"nt00",
|
||||||
|
"blush",
|
||||||
|
"Green Hair",
|
||||||
|
"pubic hair",
|
||||||
|
"smile",
|
||||||
|
],
|
||||||
|
"tags_general" : [
|
||||||
|
"blush",
|
||||||
|
"Green Hair",
|
||||||
|
"pubic hair",
|
||||||
|
"smile",
|
||||||
|
],
|
||||||
|
"tags_artist" : ["nt00"],
|
||||||
|
"tags_character": ["Ubel"],
|
||||||
|
"tags_origin" : ["Sousou no Frieren"],
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://thehentaiworld.com/videos/lucy-heartfilia-and-natsu-dragneel-shiina-ecchi-fairy-tail/#comment-396839",
|
||||||
|
"#class" : thehentaiworld.ThehentaiworldPostExtractor,
|
||||||
|
"#results" : "https://thehentaiworld.com/wp-content/uploads/2025/09/Lucy-Heartfilia-and-Natsu-Dragneel-Shiina-Ecchi-Fairy-Tail-Animated-Hentai-Video.mp4",
|
||||||
|
|
||||||
|
"count" : 1,
|
||||||
|
"num" : 0,
|
||||||
|
"date" : "dt:2025-09-19 00:00:00",
|
||||||
|
"extension" : "mp4",
|
||||||
|
"file_url" : "https://thehentaiworld.com/wp-content/uploads/2025/09/Lucy-Heartfilia-and-Natsu-Dragneel-Shiina-Ecchi-Fairy-Tail-Animated-Hentai-Video.mp4",
|
||||||
|
"filename" : "Lucy-Heartfilia-and-Natsu-Dragneel-Shiina-Ecchi-Fairy-Tail-Animated-Hentai-Video",
|
||||||
|
"height" : 0,
|
||||||
|
"id" : 253263,
|
||||||
|
"score" : 5.0,
|
||||||
|
"slug" : "lucy-heartfilia-and-natsu-dragneel-shiina-ecchi-fairy-tail",
|
||||||
|
"title" : "Lucy Heartfilia and Natsu Dragneel – Shiina Ecchi – Fairy Tail",
|
||||||
|
"type" : "video",
|
||||||
|
"votes" : range(25, 50),
|
||||||
|
"width" : 0,
|
||||||
|
"tags" : [
|
||||||
|
"Fairy Tail",
|
||||||
|
"Animated",
|
||||||
|
"sound",
|
||||||
|
"video",
|
||||||
|
"lucy heartfilia",
|
||||||
|
"Natsu Dragneel",
|
||||||
|
"Shiina Ecchi",
|
||||||
|
"arse",
|
||||||
|
"blush",
|
||||||
|
"Cowgirl Ride",
|
||||||
|
"cum",
|
||||||
|
"cum inside",
|
||||||
|
"eye roll",
|
||||||
|
"Fingering",
|
||||||
|
"Jiggle",
|
||||||
|
"legs spread",
|
||||||
|
"masturbating",
|
||||||
|
"moan",
|
||||||
|
"panties",
|
||||||
|
"pov",
|
||||||
|
"ride",
|
||||||
|
"smile",
|
||||||
|
"squeeze",
|
||||||
|
"vagina",
|
||||||
|
"x-ray",
|
||||||
|
],
|
||||||
|
"tags_character": [
|
||||||
|
"lucy heartfilia",
|
||||||
|
"Natsu Dragneel",
|
||||||
|
],
|
||||||
|
"tags_general" : [
|
||||||
|
"arse",
|
||||||
|
"blush",
|
||||||
|
"Cowgirl Ride",
|
||||||
|
"cum",
|
||||||
|
"cum inside",
|
||||||
|
"eye roll",
|
||||||
|
"Fingering",
|
||||||
|
"Jiggle",
|
||||||
|
"legs spread",
|
||||||
|
"masturbating",
|
||||||
|
"moan",
|
||||||
|
"panties",
|
||||||
|
"pov",
|
||||||
|
"ride",
|
||||||
|
"smile",
|
||||||
|
"squeeze",
|
||||||
|
"vagina",
|
||||||
|
"x-ray",
|
||||||
|
],
|
||||||
|
"tags_media" : [
|
||||||
|
"Animated",
|
||||||
|
"sound",
|
||||||
|
"video",
|
||||||
|
],
|
||||||
|
"tags_artist" : ["Shiina Ecchi"],
|
||||||
|
"tags_origin" : ["Fairy Tail"],
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://thehentaiworld.com/tag/aurahack/",
|
||||||
|
"#class" : thehentaiworld.ThehentaiworldTagExtractor,
|
||||||
|
"#pattern" : r"https://thehentaiworld\.com/wp\-content/uploads/20\d\d/.+",
|
||||||
|
"#range" : "20-",
|
||||||
|
"#count" : 10,
|
||||||
|
|
||||||
|
"count" : {1, 2},
|
||||||
|
"num" : {1, 2, 0},
|
||||||
|
"date" : "type:datetime",
|
||||||
|
"extension" : {"jpg", "png"},
|
||||||
|
"file_url" : str,
|
||||||
|
"filename" : str,
|
||||||
|
"height" : int,
|
||||||
|
"id" : int,
|
||||||
|
"score" : float,
|
||||||
|
"search_tags" : "aurahack",
|
||||||
|
"slug" : str,
|
||||||
|
"tags_artist" : ["Aurahack18"],
|
||||||
|
"title" : str,
|
||||||
|
"type" : "image",
|
||||||
|
"votes" : int,
|
||||||
|
"width" : int,
|
||||||
|
"tags" : list,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user