From 87202b8d7473a4d732d820a87416e734d063d9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 22 Jul 2020 22:19:53 +0200 Subject: [PATCH] [inkbunny] add 'user' and 'post' extractors (#283) --- CHANGELOG.md | 2 + docs/supportedsites.rst | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/inkbunny.py | 209 +++++++++++++++++++++++++++++++ gallery_dl/version.py | 2 +- test/test_results.py | 2 +- 6 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 gallery_dl/extractor/inkbunny.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ac09ee79..f26ff84a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## Unreleased + ## 1.14.3 - 2020-07-18 ### Additions - [8muses] support `comics.8muses.com` URLs diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 775f23ec..7909460d 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -59,6 +59,7 @@ ImgBB https://imgbb.com/ Albums, individual Imag imgbox https://imgbox.com/ Galleries, individual Images imgth https://imgth.com/ Galleries imgur https://imgur.com/ |imgur-C| +Inkbunny https://inkbunny.net/ Posts, User Profiles Instagram https://www.instagram.com/ |instagram-C| Optional Issuu https://issuu.com/ Publications, User Profiles Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 31846633..0f50705f 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -54,6 +54,7 @@ modules = [ "imgbox", "imgth", "imgur", + "inkbunny", "instagram", "issuu", "kabeuchi", diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py new file mode 100644 index 00000000..5836e4f9 --- /dev/null +++ b/gallery_dl/extractor/inkbunny.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://inkbunny.net/""" + +from .common import Extractor, Message +from .. import text, exception +from ..cache import cache + + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?inkbunny\.net" + + +class InkbunnyExtractor(Extractor): + """Base class for inkbunny extractors""" + category = "inkbunny" + directory_fmt = ("{category}", "{post[username]!l}") + filename_fmt = "{post[submission_id]} {file_id} {post[title]}.{extension}" + archive_fmt = "{file_id}" + root = "https://inkbunny.net" + + def __init__(self, match): + Extractor.__init__(self, match) + self.item = match.group(1) + + def items(self): + to_bool = ("deleted", "digitalsales", "favorite", "forsale", + "friends_only", "guest_block", "hidden", "printsales", + "public", "scraps") + + for post in self.posts(): + post["date"] = text.parse_datetime( + post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") + post["tags"] = [kw["keyword_name"] for kw in post["keywords"]] + files = post["files"] + + for key in to_bool: + post[key] = (post[key] == "t") + + del post["keywords"] + del post["files"] + + yield Message.Directory, {"post": post} + for file in files: + file["post"] = post + file["deleted"] = (file["deleted"] == "t") + file["date"] = text.parse_datetime( + file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") + text.nameext_from_url(file["file_name"], file) + yield Message.Url, file["file_url_full"], file + + +class InkbunnyUserExtractor(InkbunnyExtractor): + """Extractor for inkbunny user profile""" + subcategory = "user" + pattern = BASE_PATTERN + r"/(?!s/)([^/?&#]+)" + test = ("https://inkbunny.net/soina", { + # "pattern": "", + "range": "20-50", + "keyword": { + "date": "type:datetime", + "deleted": bool, + "file_id": "re:[0-9]+", + "filename": r"re:[0-9]+_soina_\w+", + "full_file_md5": "re:[0-9a-f]{32}", + "mimetype": str, + "submission_file_order": "re:[0-9]+", + "submission_id": "re:[0-9]+", + "user_id": "20969", + "post": { + "comments_count": "re:[0-9]+", + "date": "type:datetime", + "deleted": bool, + "digitalsales": bool, + "favorite": bool, + "favorites_count": "re:[0-9]+", + "forsale": bool, + "friends_only": bool, + "guest_block": bool, + "hidden": bool, + "pagecount": "re:[0-9]+", + "pools": list, + "pools_count": int, + "printsales": bool, + "public": bool, + "rating_id": "re:[0-9]+", + "rating_name": str, + "ratings": list, + "scraps": bool, + "submission_id": "re:[0-9]+", + "tags": list, + "title": str, + "type_name": str, + "user_id": "20969", + "username": "soina", + "views": str, + }, + }, + }) + + def posts(self): + api = InkbunnyAPI(self) + return api.search(username=self.item) + + +class InkbunnyPostExtractor(InkbunnyExtractor): + """Extractor for individual Inkbunny posts""" + subcategory = "post" + pattern = BASE_PATTERN + r"/s/(\d+)" + test = ( + ("https://inkbunny.net/s/1829715", { + "url": "5967eadf1fcfa7214744f5f814717ab73d14dfd3", + "content": "cf69d8dddf0822a12b4eef1f4b2258bd600b36c8", + }), + ("https://inkbunny.net/s/2044094", { + "count": 4, + }), + ) + + def posts(self): + api = InkbunnyAPI(self) + return api.detail(self.item) + + +class InkbunnyAPI(): + """Interface for the Inkunny API + + Ref: https://wiki.inkbunny.net/wiki/API + """ + + def __init__(self, extractor): + self.extractor = extractor + self.session_id = None + + def detail(self, submission_ids): + """Get full details about submissions with the given IDs""" + params = {"submission_ids": submission_ids} + return self._call("submissions", params)["submissions"] + + def search(self, username): + """Perform a search""" + params = {"username": username} + return self._pagination_search(params) + + def set_allowed_ratings(self, nudity=True, sexual=True, + violence=True, strong_violence=True): + """Change allowed submission ratings""" + params = { + "tag[2]": "yes" if nudity else "no", + "tag[3]": "yes" if violence else "no", + "tag[4]": "yes" if sexual else "no", + "tag[5]": "yes" if strong_violence else "no", + } + self._call("userrating", params) + + def authenticate(self): + username, password = self.extractor._get_auth_info() + if username: + self.session_id = self._authenticate_impl(username, password) + else: + self.session_id = self._authenticate_impl("guest", "") + self.set_allowed_ratings() + + @cache(maxage=360*24*3600, keyarg=1) + def _authenticate_impl(self, username, password): + self.extractor.log.info("Logging in as %s", username) + + url = "https://inkbunny.net/api_login.php" + data = {"username": username, "password": password} + response = self.extractor.request(url, method="POST", data=data) + + data = response.json() + if "sid" not in data: + raise exception.AuthenticationError(data.get("error_message")) + return data["sid"] + + def _call(self, endpoint, params): + if not self.session_id: + self.authenticate() + + url = "https://inkbunny.net/api_" + endpoint + ".php" + params["sid"] = self.session_id + response = self.extractor.request(url, params=params) + + if response.status_code < 400: + return response.json() + + def _pagination_search(self, params): + params["get_rid"] = "yes" + params["submission_ids_only"] = "yes" + + while True: + data = self._call("search", params) + yield from self.detail( + ",".join(s["submission_id"] for s in data["submissions"])) + + if data["page"] >= data["pages_count"]: + return + if "get_rid" in params: + del params["get_rid"] + params["rid"] = data["rid"] + params["page"] = 2 + else: + params["page"] += 1 diff --git a/gallery_dl/version.py b/gallery_dl/version.py index fd520775..8914de4d 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.14.3" +__version__ = "1.14.4-dev" diff --git a/test/test_results.py b/test/test_results.py index dd1ed1d5..bd480cb1 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -317,7 +317,7 @@ def setup_test_config(): config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma") for category in ("danbooru", "instagram", "twitter", "subscribestar", - "e621"): + "e621", "inkbunny"): config.set(("extractor", category), "username", None) config.set(("extractor", "mastodon.social"), "access-token",