diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 947442f9..eed7aa10 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -631,6 +631,12 @@ Consider all sites to be NSFW unless otherwise known. Articles, Tag Searches + + Seiso + https://seiso.party/ + Posts, User Profiles + + Sen Manga https://raw.senmanga.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d927d708..d07c0636 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -105,6 +105,7 @@ modules = [ "sankaku", "sankakucomplex", "seiga", + "seisoparty", "senmanga", "sexcom", "simplyhentai", diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py new file mode 100644 index 00000000..3dc89dee --- /dev/null +++ b/gallery_dl/extractor/seisoparty.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://seiso.party/""" + +from .common import Extractor, Message +from .. import text +import re + + +class SeisopartyExtractor(Extractor): + """Base class for seisoparty extractors""" + category = "seisoparty" + root = "https://seiso.party" + directory_fmt = ("{category}", "{service}", "{user}") + filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}" + archive_fmt = "{service}_{user}_{id}_{num}" + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_name = None + self._find_files = re.compile( + r'href="(https://cdn\.seiso\.party/files/[^"]+)').findall + + def items(self): + for post in self.posts(): + files = post.pop("files") + yield Message.Directory, post + for post["num"], url in enumerate(files, 1): + yield Message.Url, url, text.nameext_from_url(url, post) + + def _parse_post(self, page, post_id): + extr = text.extract_from(page) + return { + "service" : self.service, + "user" : self.user_id, + "username": self.user_name, + "id" : post_id, + "date" : text.parse_datetime(extr( + '
', '<'), + "%Y-%m-%d %H:%M:%S %Z"), + "title" : text.unescape(extr('class="post-title">', '<')), + "content" : text.unescape(extr("\n

\n", "\n

\n").strip()), + "files" : self._find_files(page), + } + + +class SeisopartyUserExtractor(SeisopartyExtractor): + """Extractor for all posts from a seiso.party user listing""" + subcategory = "user" + pattern = r"(?:https?://)?seiso\.party/artists/([^/?#]+)/([^/?#]+)" + test = ( + ("https://seiso.party/artists/fanbox/21", { + "pattern": r"https://cdn\.seiso\.party/files/fanbox/\d+/", + "count": ">=15", + "keyword": { + "content": str, + "date": "type:datetime", + "id": r"re:\d+", + "num": int, + "service": "fanbox", + "title": str, + "user": "21", + "username": "雨", + }, + }), + ) + + def __init__(self, match): + SeisopartyExtractor.__init__(self, match) + self.service, self.user_id = match.groups() + + def posts(self): + url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id) + page = self.request(url).text + self.user_name, pos = text.extract(page, '', '<') + + url = self.root + text.extract( + page, 'href="', '"', page.index('id="content"', pos))[0] + response = self.request(url) + headers = {"Referer": url} + + while True: + yield self._parse_post(response.text, url.rpartition("/")[2]) + response = self.request(url + "/next", headers=headers) + if url == response.url: + return + url = headers["Referer"] = response.url + + +class SeisopartyPostExtractor(SeisopartyExtractor): + """Extractor for a single seiso.party post""" + subcategory = "post" + pattern = r"(?:https?://)?seiso\.party/post/([^/?#]+)/([^/?#]+)/([^/?#]+)" + test = ( + ("https://seiso.party/post/fanbox/21/371", { + "url": "75f13b92de0ce399b6163c3de18f1f36011c2366", + "count": 2, + "keyword": { + "content": "この前描いためぐるちゃんのPSDファイルです。\n" + "どうぞよろしくお願いします。", + "date": "dt:2021-05-06 12:38:31", + "extension": "re:psd|jpg", + "filename": "re:backcourt|ffb2ccb7a3586d05f9a4620329dd131e", + "id": "371", + "num": int, + "service": "fanbox", + "title": "MEGURU.PSD", + "user": "21", + "username": "雨", + }, + }), + ) + + def __init__(self, match): + SeisopartyExtractor.__init__(self, match) + self.service, self.user_id, self.post_id = match.groups() + + def posts(self): + url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id) + page = self.request(url).text + self.user_name, pos = text.extract(page, '', '<') + + url = "{}/post/{}/{}/{}".format( + self.root, self.service, self.user_id, self.post_id) + return (self._parse_post(self.request(url).text, self.post_id),) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 0e81f2dd..1bb5d654 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -90,6 +90,7 @@ CATEGORY_MAP = { "sankakucomplex" : "Sankaku Complex", "seaotterscans" : "Sea Otter Scans", "seiga" : "Niconico Seiga", + "seisoparty" : "Seiso", "senmanga" : "Sen Manga", "sensescans" : "Sense-Scans", "sexcom" : "Sex.com", diff --git a/test/test_results.py b/test/test_results.py index 9c6845d2..43b2f739 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -317,6 +317,8 @@ def setup_test_config(): config.set(("extractor", "kemonoparty"), "cookies", { "__ddg1": "0gBDGpJ3KZQmA4B9QH25", "__ddg2": "lmj5s1jnJOvhPXCX"}) + config.set(("extractor", "seisoparty"), "cookies", { + "__ddg1": "Y8rBxSDHO5UCEtQvzyI9", "__ddg2": "lmj5s1jnJOvhPXCX"}) config.set(("extractor", "mastodon.social"), "access-token", "Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ")