* adding site support for thefap.com * fixing typo in url tld * improve & simplify 'model' extractor * update 'post' extractor * update docs/supportedsites * add tests --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -1087,6 +1087,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="thefap" title="thefap">
|
||||
<td>TheFap</td>
|
||||
<td>https://thefap.net/</td>
|
||||
<td>Models, Posts</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="tiktok" title="tiktok">
|
||||
<td>TikTok</td>
|
||||
<td>https://www.tiktok.com/</td>
|
||||
|
||||
@@ -206,6 +206,7 @@ modules = [
|
||||
"tcbscans",
|
||||
"telegraph",
|
||||
"tenor",
|
||||
"thefap",
|
||||
"thehentaiworld",
|
||||
"tiktok",
|
||||
"tmohentai",
|
||||
|
||||
127
gallery_dl/extractor/thefap.py
Normal file
127
gallery_dl/extractor/thefap.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://thefap.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net"
|
||||
|
||||
|
||||
class ThefapExtractor(Extractor):
|
||||
"""Base class for thefap extractors"""
|
||||
category = "thefap"
|
||||
root = "https://thefap.net"
|
||||
directory_fmt = ("{category}", "{model_name} ({model_id})")
|
||||
filename_fmt = "{model}_{num:>03}.{extension}"
|
||||
archive_fmt = "{model_id}_{filename}"
|
||||
|
||||
def _normalize_url(self, url):
|
||||
if not url:
|
||||
return ""
|
||||
url = url.strip()
|
||||
if "?w=" in url:
|
||||
url = url[:url.rfind("?")]
|
||||
elif url.endswith(":small"):
|
||||
url = url[:-6] + ":orig"
|
||||
if url.startswith("//"):
|
||||
url = "https:" + url
|
||||
elif url.startswith("/"):
|
||||
url = self.root + url
|
||||
return url
|
||||
|
||||
|
||||
class ThefapPostExtractor(ThefapExtractor):
|
||||
"""Extractor for individual thefap.net posts"""
|
||||
subcategory = "post"
|
||||
pattern = (BASE_PATTERN +
|
||||
r"(/([^/?#]+)-(\d+)/([^/?#]+)/i(\d+))")
|
||||
example = "https://thefap.net/MODEL-12345/KIND/i12345"
|
||||
|
||||
def items(self):
|
||||
path, model, model_id, kind, post_id = self.groups
|
||||
|
||||
page = self.request(self.root + path).text
|
||||
if "Not Found" in page:
|
||||
raise exception.NotFoundError("post")
|
||||
|
||||
if model_name := text.extr(page, "<title>", " / "):
|
||||
model_name = text.unescape(model_name)
|
||||
else:
|
||||
model_name = text.unquote(model).replace(".", " ")
|
||||
|
||||
data = {
|
||||
"model" : model,
|
||||
"model_id" : text.parse_int(model_id),
|
||||
"model_name": model_name,
|
||||
"kind" : kind,
|
||||
"post_id" : text.parse_int(post_id),
|
||||
"_http_headers": {"Referer": None},
|
||||
}
|
||||
yield Message.Directory, "", data
|
||||
|
||||
data["num"] = 0
|
||||
page = text.extract(
|
||||
page, "\n</div>", "\n<!---->", page.index("</header>"))[0]
|
||||
for url in text.extract_iter(page, '<img src="', '"'):
|
||||
if url := self._normalize_url(url):
|
||||
data["num"] += 1
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
|
||||
class ThefapModelExtractor(ThefapExtractor):
|
||||
"""Extractor for thefap.net model pages"""
|
||||
subcategory = "model"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)-(\d+)"
|
||||
example = "https://thefap.net/MODEL-12345/"
|
||||
|
||||
def items(self):
|
||||
model, model_id = self.groups
|
||||
|
||||
url = f"{self.root}/{model}-{model_id}/"
|
||||
page = self.request(url).text
|
||||
|
||||
if 'id="content"' not in page:
|
||||
raise exception.NotFoundError("model")
|
||||
|
||||
if model_name := text.extr(page, "<h2", "</h2>"):
|
||||
model_name = text.unescape(model_name[model_name.find(">")+1:])
|
||||
else:
|
||||
model_name = text.unquote(model).replace(".", " ")
|
||||
|
||||
data = {
|
||||
"model" : model,
|
||||
"model_id" : text.parse_int(model_id),
|
||||
"model_name": model_name,
|
||||
"_http_headers": {"Referer": None},
|
||||
}
|
||||
yield Message.Directory, "", data
|
||||
|
||||
base = f"{self.root}/ajax/model/{model_id}/page-"
|
||||
headers = {
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Sec-Fetch-Dest" : "empty",
|
||||
"Sec-Fetch-Mode" : "cors",
|
||||
"Sec-Fetch-Site" : "same-origin",
|
||||
}
|
||||
|
||||
page = text.extr(page, '<div id="content"', '<div id="showmore"')
|
||||
imgs = text.extract_iter(page, 'data-src="', '"')
|
||||
pnum = 1
|
||||
data["num"] = 0
|
||||
|
||||
while True:
|
||||
for url in imgs:
|
||||
if url := self._normalize_url(url):
|
||||
data["num"] += 1
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
pnum += 1
|
||||
page = self.request(base + str(pnum), headers=headers).text
|
||||
if not page:
|
||||
break
|
||||
imgs = text.extract_iter(page, '<img src="', '"')
|
||||
@@ -193,6 +193,7 @@ CATEGORY_MAP = {
|
||||
"thebarchive" : "The /b/ Archive",
|
||||
"thecollection" : "The /co/llection",
|
||||
"thecollectionS" : "The /co/llection",
|
||||
"thefap" : "TheFap",
|
||||
"thehentaiworld" : "The Hentai World",
|
||||
"tiktok" : "TikTok",
|
||||
"tmohentai" : "TMOHentai",
|
||||
|
||||
86
test/results/thefap.py
Normal file
86
test/results/thefap.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import thefap
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://thefap.net/zoey.curly-374261/xpics/i8",
|
||||
"#class" : thefap.ThefapPostExtractor,
|
||||
"#results" : "https://cdn31.xpics.me/photo/2024/10/01/09/CR98EY1fSquX.jpg",
|
||||
|
||||
"extension" : "jpg",
|
||||
"filename" : "CR98EY1fSquX",
|
||||
"kind" : "xpics",
|
||||
"model" : "zoey.curly",
|
||||
"model_id" : 374261,
|
||||
"model_name": "Zoey Curly",
|
||||
"num" : 1,
|
||||
"post_id" : 8,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://thefap.net/analovesbananaas-979268/fap-onlyfans-0-1qcckka/i2",
|
||||
"#class" : thefap.ThefapPostExtractor,
|
||||
"#results" : "https://i0.wp.com/i.redd.it/b4o1olbgi8dg1.jpg",
|
||||
|
||||
"extension" : "jpg",
|
||||
"kind" : "fap-onlyfans-0-1qcckka",
|
||||
"model" : "analovesbananaas",
|
||||
"model_id" : 979268,
|
||||
"model_name": "analovesbananaas",
|
||||
"num" : 1,
|
||||
"post_id" : 2,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://thefap.net/tatted-mamma-979518/twpornstars/i1",
|
||||
"#class" : thefap.ThefapPostExtractor,
|
||||
"#results" : "https://pbs.twimg.com/media/GFmqJn2a8AAAtKu.jpg:orig",
|
||||
|
||||
"extension" : "jpg:orig",
|
||||
"filename" : "GFmqJn2a8AAAtKu",
|
||||
"kind" : "twpornstars",
|
||||
"model" : "tatted-mamma",
|
||||
"model_id" : 979518,
|
||||
"model_name": "tatted_mamma",
|
||||
"num" : 1,
|
||||
"post_id" : 1,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://thefap.net/zoey.curly-374261/",
|
||||
"#class" : thefap.ThefapModelExtractor,
|
||||
"#range" : "1-100",
|
||||
"#count" : 100,
|
||||
|
||||
"extension" : "jpg",
|
||||
"filename" : str,
|
||||
"model" : "zoey.curly",
|
||||
"model_id" : 374261,
|
||||
"model_name": "Zoey Curly",
|
||||
"num" : range(1, 100),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://thefap.net/analovesbananaas-979268/",
|
||||
"#class" : thefap.ThefapModelExtractor,
|
||||
"#results" : (
|
||||
"https://i0.wp.com/i.redd.it/icndsjbgi8dg1.jpg",
|
||||
"https://i0.wp.com/i.redd.it/b4o1olbgi8dg1.jpg",
|
||||
"https://i0.wp.com/i.redd.it/aqilnkbgi8dg1.jpg",
|
||||
),
|
||||
|
||||
"extension" : "jpg",
|
||||
"filename" : str,
|
||||
"model" : "analovesbananaas",
|
||||
"model_id" : 979268,
|
||||
"model_name": "analovesbananaas",
|
||||
"num" : range(1, 3),
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user