# -*- coding: utf-8 -*- # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://thefap.net/""" from .common import Extractor, Message from .. import text BASE_PATTERN = r"(?:https?://)?(?:www\.)?thefap\.net" class ThefapExtractor(Extractor): """Base class for thefap extractors""" category = "thefap" root = "https://thefap.net" directory_fmt = ("{category}", "{model_name} ({model_id})") filename_fmt = "{model}_{num:>03}.{extension}" archive_fmt = "{model_id}_{filename}" def _normalize_url(self, url): if not url: return "" url = url.strip() if "?w=" in url: url = url[:url.rfind("?")] elif url.endswith(":small"): url = url[:-6] + ":orig" if url.startswith("//"): url = "https:" + url elif url.startswith("/"): url = self.root + url return url class ThefapPostExtractor(ThefapExtractor): """Extractor for individual thefap.net posts""" subcategory = "post" pattern = (BASE_PATTERN + r"(/([^/?#]+)-(\d+)/([^/?#]+)/i(\d+))") example = "https://thefap.net/MODEL-12345/KIND/i12345" def items(self): path, model, model_id, kind, post_id = self.groups page = self.request(self.root + path).text if "Not Found" in page: raise self.exc.NotFoundError("post") if model_name := text.extr(page, "