[thefap] add support (#8821 #8822)

* adding site support for thefap.com
* fixing typo in url tld
* improve & simplify 'model' extractor
* update 'post' extractor
* update docs/supportedsites
* add tests

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
Stephon Parker
2026-01-14 15:11:56 -05:00
committed by GitHub
parent 71e7403dfc
commit 43387c535d
5 changed files with 221 additions and 0 deletions

View File

@@ -1087,6 +1087,12 @@ Consider all listed sites to potentially be NSFW.
<td>Posts, Tag Searches</td>
<td></td>
</tr>
<tr id="thefap" title="thefap">
<td>TheFap</td>
<td>https://thefap.net/</td>
<td>Models, Posts</td>
<td></td>
</tr>
<tr id="tiktok" title="tiktok">
<td>TikTok</td>
<td>https://www.tiktok.com/</td>

View File

@@ -206,6 +206,7 @@ modules = [
"tcbscans",
"telegraph",
"tenor",
"thefap",
"thehentaiworld",
"tiktok",
"tmohentai",

View File

@@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://thefap.net/"""
from .common import Extractor, Message
from .. import text, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net"
class ThefapExtractor(Extractor):
"""Base class for thefap extractors"""
category = "thefap"
root = "https://thefap.net"
directory_fmt = ("{category}", "{model_name} ({model_id})")
filename_fmt = "{model}_{num:>03}.{extension}"
archive_fmt = "{model_id}_{filename}"
def _normalize_url(self, url):
if not url:
return ""
url = url.strip()
if "?w=" in url:
url = url[:url.rfind("?")]
elif url.endswith(":small"):
url = url[:-6] + ":orig"
if url.startswith("//"):
url = "https:" + url
elif url.startswith("/"):
url = self.root + url
return url
class ThefapPostExtractor(ThefapExtractor):
"""Extractor for individual thefap.net posts"""
subcategory = "post"
pattern = (BASE_PATTERN +
r"(/([^/?#]+)-(\d+)/([^/?#]+)/i(\d+))")
example = "https://thefap.net/MODEL-12345/KIND/i12345"
def items(self):
path, model, model_id, kind, post_id = self.groups
page = self.request(self.root + path).text
if "Not Found" in page:
raise exception.NotFoundError("post")
if model_name := text.extr(page, "<title>", " / "):
model_name = text.unescape(model_name)
else:
model_name = text.unquote(model).replace(".", " ")
data = {
"model" : model,
"model_id" : text.parse_int(model_id),
"model_name": model_name,
"kind" : kind,
"post_id" : text.parse_int(post_id),
"_http_headers": {"Referer": None},
}
yield Message.Directory, "", data
data["num"] = 0
page = text.extract(
page, "\n</div>", "\n<!---->", page.index("</header>"))[0]
for url in text.extract_iter(page, '<img src="', '"'):
if url := self._normalize_url(url):
data["num"] += 1
yield Message.Url, url, text.nameext_from_url(url, data)
class ThefapModelExtractor(ThefapExtractor):
"""Extractor for thefap.net model pages"""
subcategory = "model"
pattern = BASE_PATTERN + r"/([^/?#]+)-(\d+)"
example = "https://thefap.net/MODEL-12345/"
def items(self):
model, model_id = self.groups
url = f"{self.root}/{model}-{model_id}/"
page = self.request(url).text
if 'id="content"' not in page:
raise exception.NotFoundError("model")
if model_name := text.extr(page, "<h2", "</h2>"):
model_name = text.unescape(model_name[model_name.find(">")+1:])
else:
model_name = text.unquote(model).replace(".", " ")
data = {
"model" : model,
"model_id" : text.parse_int(model_id),
"model_name": model_name,
"_http_headers": {"Referer": None},
}
yield Message.Directory, "", data
base = f"{self.root}/ajax/model/{model_id}/page-"
headers = {
"X-Requested-With": "XMLHttpRequest",
"Sec-Fetch-Dest" : "empty",
"Sec-Fetch-Mode" : "cors",
"Sec-Fetch-Site" : "same-origin",
}
page = text.extr(page, '<div id="content"', '<div id="showmore"')
imgs = text.extract_iter(page, 'data-src="', '"')
pnum = 1
data["num"] = 0
while True:
for url in imgs:
if url := self._normalize_url(url):
data["num"] += 1
yield Message.Url, url, text.nameext_from_url(url, data)
pnum += 1
page = self.request(base + str(pnum), headers=headers).text
if not page:
break
imgs = text.extract_iter(page, '<img src="', '"')

View File

@@ -193,6 +193,7 @@ CATEGORY_MAP = {
"thebarchive" : "The /b/ Archive",
"thecollection" : "The /co/llection",
"thecollectionS" : "The /co/llection",
"thefap" : "TheFap",
"thehentaiworld" : "The Hentai World",
"tiktok" : "TikTok",
"tmohentai" : "TMOHentai",

86
test/results/thefap.py Normal file
View File

@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import thefap
__tests__ = (
{
"#url" : "https://thefap.net/zoey.curly-374261/xpics/i8",
"#class" : thefap.ThefapPostExtractor,
"#results" : "https://cdn31.xpics.me/photo/2024/10/01/09/CR98EY1fSquX.jpg",
"extension" : "jpg",
"filename" : "CR98EY1fSquX",
"kind" : "xpics",
"model" : "zoey.curly",
"model_id" : 374261,
"model_name": "Zoey Curly",
"num" : 1,
"post_id" : 8,
},
{
"#url" : "https://thefap.net/analovesbananaas-979268/fap-onlyfans-0-1qcckka/i2",
"#class" : thefap.ThefapPostExtractor,
"#results" : "https://i0.wp.com/i.redd.it/b4o1olbgi8dg1.jpg",
"extension" : "jpg",
"kind" : "fap-onlyfans-0-1qcckka",
"model" : "analovesbananaas",
"model_id" : 979268,
"model_name": "analovesbananaas",
"num" : 1,
"post_id" : 2,
},
{
"#url" : "https://thefap.net/tatted-mamma-979518/twpornstars/i1",
"#class" : thefap.ThefapPostExtractor,
"#results" : "https://pbs.twimg.com/media/GFmqJn2a8AAAtKu.jpg:orig",
"extension" : "jpg:orig",
"filename" : "GFmqJn2a8AAAtKu",
"kind" : "twpornstars",
"model" : "tatted-mamma",
"model_id" : 979518,
"model_name": "tatted_mamma",
"num" : 1,
"post_id" : 1,
},
{
"#url" : "https://thefap.net/zoey.curly-374261/",
"#class" : thefap.ThefapModelExtractor,
"#range" : "1-100",
"#count" : 100,
"extension" : "jpg",
"filename" : str,
"model" : "zoey.curly",
"model_id" : 374261,
"model_name": "Zoey Curly",
"num" : range(1, 100),
},
{
"#url" : "https://thefap.net/analovesbananaas-979268/",
"#class" : thefap.ThefapModelExtractor,
"#results" : (
"https://i0.wp.com/i.redd.it/icndsjbgi8dg1.jpg",
"https://i0.wp.com/i.redd.it/b4o1olbgi8dg1.jpg",
"https://i0.wp.com/i.redd.it/aqilnkbgi8dg1.jpg",
),
"extension" : "jpg",
"filename" : str,
"model" : "analovesbananaas",
"model_id" : 979268,
"model_name": "analovesbananaas",
"num" : range(1, 3),
},
)