From e5076ba056f204b8d22906ee0da75ef1ebc956a7 Mon Sep 17 00:00:00 2001 From: enduser420 Date: Tue, 13 Dec 2022 21:33:09 +0530 Subject: [PATCH] [fapello] add 'post', 'user' and 'path' extractors --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/fapello.py | 151 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 3 + 4 files changed, 161 insertions(+) create mode 100644 gallery_dl/extractor/fapello.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 55252789..e6a1c845 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -199,6 +199,12 @@ Consider all sites to be NSFW unless otherwise known. Posts, User Profiles + + Fapello + https://fapello.com/ + Models, Videos, Trending Posts, Popular Videos, Top Models, Posts + + Fappic https://fappic.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 383c76b3..444075c1 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -39,6 +39,7 @@ modules = [ "fallenangels", "fanbox", "fantia", + "fapello", "fapachi", "flickr", "furaffinity", diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py new file mode 100644 index 00000000..0d425ca5 --- /dev/null +++ b/gallery_dl/extractor/fapello.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://fapello.com/""" + +from .common import Extractor, Message +from .. import text, exception + + +class FapelloPostExtractor(Extractor): + """Extractor for individual posts on fapello.com""" + category = "fapello" + subcategory = "post" + directory_fmt = ("{category}", "{model}") + archive_fmt = "{type}_{model}_{id}" + pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)") + test = ( + ("https://fapello.com/carrykey/530/", { + "pattern": (r"https://fapello\.com/content/c/a" + r"/carrykey/1000/carrykey_0530\.jpg"), + "keyword": { + "model": "carrykey", + "id" : 530, + "type" : "photo", + "thumbnail": "", + }, + }), + ("https://fapello.com/vladislava-661/693/", { + "pattern": (r"https://cdn\.fapello\.com/content/v/l" + r"/vladislava-661/1000/vladislava-661_0693\.mp4"), + "keyword": { + "model": "vladislava-661", + "id" : 693, + "type" : "video", + "thumbnail": ("https://fapello.com/content/v/l" + "/vladislava-661/1000/vladislava-661_0693.jpg"), + }, + }), + ("https://fapello.com/carrykey/000/", { + "exception": exception.NotFoundError, + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.model, self.id = match.groups() + + def items(self): + url = "https://fapello.com/{}/{}/".format(self.model, self.id) + page = text.extr( + self.request(url, allow_redirects=False).text, + 'class="uk-align-center"', "", None) + if page is None: + raise exception.NotFoundError("post") + + data = { + "model": self.model, + "id" : text.parse_int(self.id), + "type" : "video" if 'type="video' in page else "photo", + "thumbnail": text.extr(page, 'poster="', '"'), + } + url = text.extr(page, 'src="', '"') + yield Message.Directory, data + yield Message.Url, url, text.nameext_from_url(url, data) + + +class FapelloModelExtractor(Extractor): + """Extractor for all posts from a fapello model""" + category = "fapello" + subcategory = "model" + pattern = (r"(?:https?://)?(?:www\.)?fapello\.com" + r"/(?!top-(?:likes|followers)|popular_videos" + r"|videos|trending|search/?$)" + r"([^/?#]+)/?$") + test = ( + ("https://fapello.com/hyoon/", { + "pattern": FapelloPostExtractor.pattern, + "range" : "1-50", + "count" : 50, + }), + ("https://fapello.com/kobaebeefboo/"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.model = match.group(1) + + def items(self): + num = 1 + data = {"_extractor": FapelloPostExtractor} + while True: + url = "https://fapello.com/ajax/model/{}/page-{}/".format( + self.model, num) + page = self.request(url).text + if not page: + return + + for url in text.extract_iter(page, '', ""): + yield Message.Queue, text.extr(item, '