From 43387c535deaaa7928b8c4f42bbb5bbfab3cb94f Mon Sep 17 00:00:00 2001 From: Stephon Parker Date: Wed, 14 Jan 2026 15:11:56 -0500 Subject: [PATCH] [thefap] add support (#8821 #8822) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adding site support for thefap.com * fixing typo in url tld * improve & simplify 'model' extractor * update 'post' extractor * update docs/supportedsites * add tests --------- Co-authored-by: Mike Fährmann --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/thefap.py | 127 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + test/results/thefap.py | 86 +++++++++++++++++++++ 5 files changed, 221 insertions(+) create mode 100644 gallery_dl/extractor/thefap.py create mode 100644 test/results/thefap.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c7a40659..01326dd0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1087,6 +1087,12 @@ Consider all listed sites to potentially be NSFW. Posts, Tag Searches + + TheFap + https://thefap.net/ + Models, Posts + + TikTok https://www.tiktok.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 9f182041..e747c5d9 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -206,6 +206,7 @@ modules = [ "tcbscans", "telegraph", "tenor", + "thefap", "thehentaiworld", "tiktok", "tmohentai", diff --git a/gallery_dl/extractor/thefap.py b/gallery_dl/extractor/thefap.py new file mode 100644 index 00000000..71b68873 --- /dev/null +++ b/gallery_dl/extractor/thefap.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://thefap.net/""" + +from .common import Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net" + + +class ThefapExtractor(Extractor): + """Base class for thefap extractors""" + category = "thefap" + root = "https://thefap.net" + directory_fmt = ("{category}", "{model_name} ({model_id})") + filename_fmt = "{model}_{num:>03}.{extension}" + archive_fmt = "{model_id}_{filename}" + + def _normalize_url(self, url): + if not url: + return "" + url = url.strip() + if "?w=" in url: + url = url[:url.rfind("?")] + elif url.endswith(":small"): + url = url[:-6] + ":orig" + if url.startswith("//"): + url = "https:" + url + elif url.startswith("/"): + url = self.root + url + return url + + +class ThefapPostExtractor(ThefapExtractor): + """Extractor for individual thefap.net posts""" + subcategory = "post" + pattern = (BASE_PATTERN + + r"(/([^/?#]+)-(\d+)/([^/?#]+)/i(\d+))") + example = "https://thefap.net/MODEL-12345/KIND/i12345" + + def items(self): + path, model, model_id, kind, post_id = self.groups + + page = self.request(self.root + path).text + if "Not Found" in page: + raise exception.NotFoundError("post") + + if model_name := text.extr(page, "", " / "): + model_name = text.unescape(model_name) + else: + model_name = text.unquote(model).replace(".", " ") + + data = { + "model" : model, + "model_id" : text.parse_int(model_id), + "model_name": model_name, + "kind" : kind, + "post_id" : text.parse_int(post_id), + "_http_headers": {"Referer": None}, + } + yield Message.Directory, "", data + + data["num"] = 0 + page = text.extract( + page, "\n</div>", "\n<!---->", page.index("</header>"))[0] + for url in text.extract_iter(page, '<img src="', '"'): + if url := self._normalize_url(url): + data["num"] += 1 + yield Message.Url, url, text.nameext_from_url(url, data) + + +class ThefapModelExtractor(ThefapExtractor): + """Extractor for thefap.net model pages""" + subcategory = "model" + pattern = BASE_PATTERN + r"/([^/?#]+)-(\d+)" + example = "https://thefap.net/MODEL-12345/" + + def items(self): + model, model_id = self.groups + + url = f"{self.root}/{model}-{model_id}/" + page = self.request(url).text + + if 'id="content"' not in page: + raise exception.NotFoundError("model") + + if model_name := text.extr(page, "<h2", "</h2>"): + model_name = text.unescape(model_name[model_name.find(">")+1:]) + else: + model_name = text.unquote(model).replace(".", " ") + + data = { + "model" : model, + "model_id" : text.parse_int(model_id), + "model_name": model_name, + "_http_headers": {"Referer": None}, + } + yield Message.Directory, "", data + + base = f"{self.root}/ajax/model/{model_id}/page-" + headers = { + "X-Requested-With": "XMLHttpRequest", + "Sec-Fetch-Dest" : "empty", + "Sec-Fetch-Mode" : "cors", + "Sec-Fetch-Site" : "same-origin", + } + + page = text.extr(page, '<div id="content"', '<div id="showmore"') + imgs = text.extract_iter(page, 'data-src="', '"') + pnum = 1 + data["num"] = 0 + + while True: + for url in imgs: + if url := self._normalize_url(url): + data["num"] += 1 + yield Message.Url, url, text.nameext_from_url(url, data) + + pnum += 1 + page = self.request(base + str(pnum), headers=headers).text + if not page: + break + imgs = text.extract_iter(page, '<img src="', '"') diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index cdebb3f6..8d130e54 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -193,6 +193,7 @@ CATEGORY_MAP = { "thebarchive" : "The /b/ Archive", "thecollection" : "The /co/llection", "thecollectionS" : "The /co/llection", + "thefap" : "TheFap", "thehentaiworld" : "The Hentai World", "tiktok" : "TikTok", "tmohentai" : "TMOHentai", diff --git a/test/results/thefap.py b/test/results/thefap.py new file mode 100644 index 00000000..f6890324 --- /dev/null +++ b/test/results/thefap.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import thefap + + +__tests__ = ( +{ + "#url" : "https://thefap.net/zoey.curly-374261/xpics/i8", + "#class" : thefap.ThefapPostExtractor, + "#results" : "https://cdn31.xpics.me/photo/2024/10/01/09/CR98EY1fSquX.jpg", + + "extension" : "jpg", + "filename" : "CR98EY1fSquX", + "kind" : "xpics", + "model" : "zoey.curly", + "model_id" : 374261, + "model_name": "Zoey Curly", + "num" : 1, + "post_id" : 8, +}, + +{ + "#url" : "https://thefap.net/analovesbananaas-979268/fap-onlyfans-0-1qcckka/i2", + "#class" : thefap.ThefapPostExtractor, + "#results" : "https://i0.wp.com/i.redd.it/b4o1olbgi8dg1.jpg", + + "extension" : "jpg", + "kind" : "fap-onlyfans-0-1qcckka", + "model" : "analovesbananaas", + "model_id" : 979268, + "model_name": "analovesbananaas", + "num" : 1, + "post_id" : 2, +}, + +{ + "#url" : "https://thefap.net/tatted-mamma-979518/twpornstars/i1", + "#class" : thefap.ThefapPostExtractor, + "#results" : "https://pbs.twimg.com/media/GFmqJn2a8AAAtKu.jpg:orig", + + "extension" : "jpg:orig", + "filename" : "GFmqJn2a8AAAtKu", + "kind" : "twpornstars", + "model" : "tatted-mamma", + "model_id" : 979518, + "model_name": "tatted_mamma", + "num" : 1, + "post_id" : 1, +}, + +{ + "#url" : "https://thefap.net/zoey.curly-374261/", + "#class" : thefap.ThefapModelExtractor, + "#range" : "1-100", + "#count" : 100, + + "extension" : "jpg", + "filename" : str, + "model" : "zoey.curly", + "model_id" : 374261, + "model_name": "Zoey Curly", + "num" : range(1, 100), +}, + +{ + "#url" : "https://thefap.net/analovesbananaas-979268/", + "#class" : thefap.ThefapModelExtractor, + "#results" : ( + "https://i0.wp.com/i.redd.it/icndsjbgi8dg1.jpg", + "https://i0.wp.com/i.redd.it/b4o1olbgi8dg1.jpg", + "https://i0.wp.com/i.redd.it/aqilnkbgi8dg1.jpg", + ), + + "extension" : "jpg", + "filename" : str, + "model" : "analovesbananaas", + "model_id" : 979268, + "model_name": "analovesbananaas", + "num" : range(1, 3), +}, + +)