diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8dd583da..afefe36f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -193,6 +193,12 @@ Consider all sites to be NSFW unless otherwise known. Creators, Posts Cookies + + Fapachi + https://fapachi.com/ + Posts, User Profiles + + Fappic https://fappic.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index a563bfd6..d2bbcbb7 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -39,6 +39,7 @@ modules = [ "fallenangels", "fanbox", "fantia", + "fapachi", "flickr", "furaffinity", "fuskator", diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py new file mode 100644 index 00000000..ee6d15a1 --- /dev/null +++ b/gallery_dl/extractor/fapachi.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://fapachi.com/""" + +from .common import Extractor, Message +from .. import text + + +class FapachiPostExtractor(Extractor): + """Extractor for individual posts on fapachi.com""" + category = "fapachi" + subcategory = "post" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{user}_{id}.{extension}" + archive_fmt = "{user}_{id}" + pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com" + r"/(?!search/)([^/?#]+)/media/(\d+)") + root = "https://fapachi.com" + test = ( + # NSFW + ("https://fapachi.com/sonson/media/0082", { + "pattern": (r"https://fapachi\.com/models/s/o/" + r"sonson/1/full/sonson_0082\.jpeg"), + "keyword": { + "user": "sonson", + "id" : "0082", + }, + }), + # NSFW + ("https://fapachi.com/ferxiita/media/0159"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user, self.id = match.groups() + + def items(self): + data = { + "user": self.user, + "id" : self.id, + } + page = self.request("{}/{}/media/{}".format( + self.root, self.user, self.id)).text + url = self.root + text.extr(page, 'd-block" src="', '"') + yield Message.Directory, data + yield Message.Url, url, text.nameext_from_url(url, data) + + +class FapachiUserExtractor(Extractor): + """Extractor for all posts from a fapachi user""" + category = "fapachi" + subcategory = "user" + pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com" + r"/(?!search(?:/|$))([^/?#]+)(?:/page/(\d+))?$") + root = "https://fapachi.com" + test = ( + ("https://fapachi.com/sonson", { + "pattern": FapachiPostExtractor.pattern, + "range" : "1-50", + "count" : 50, + }), + ("https://fapachi.com/ferxiita/page/3"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user = match.group(1) + self.num = text.parse_int(match.group(2), 1) + + def items(self): + data = {"_extractor": FapachiPostExtractor} + while True: + page = self.request("{}/{}/page/{}".format( + self.root, self.user, self.num)).text + for post in text.extract_iter(page, 'model-media-prew">', ">"): + url = self.root + text.extr(post, 'Next page' not in page: + return + self.num += 1