diff --git a/docs/configuration.rst b/docs/configuration.rst index 2f7ef7ca..ae650009 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -696,6 +696,34 @@ Description * ``false``: Converted video files +extractor.derpibooru.api-key +---------------------------- +Type + ``string`` +Default + ``null`` +Description + Your `Derpibooru API Key `__, + to use your account's browsing settings and filters. + + +extractor.derpibooru.filter +--------------------------- +Type + ``integer`` +Default + ``null`` +Example + ``56027`` (`Everything `_ filter) +Description + The content filter ID to use. + + Setting an explicit filter ID overrides any default filters and can be used + to access 18+ content without `API Key `_. + + See `Filters `_ for details. + + extractor.deviantart.extra -------------------------- Type diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 19015feb..320fb1b9 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -22,6 +22,7 @@ baraag https://baraag.net/ Images from Statuses, U Behance https://www.behance.net/ Collections, Galleries, User Profiles Blogger https://www.blogger.com/ Blogs, Posts, Search Results Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag Searches Supported +Derpibooru https://derpibooru.org/ Galleries, Posts, Search Results `API Key `__ Desuarchive https://desuarchive.org/ Boards, Search Results, Threads DeviantArt https://www.deviantart.com/ |deviantart-C| `OAuth `__ Doki Reader https://kobato.hologfx.com/reader/ Chapters, Manga diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index b38cddcf..36107d9f 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ modules = [ "behance", "blogger", "danbooru", + "derpibooru", "deviantart", "dynastyscans", "e621", diff --git a/gallery_dl/extractor/derpibooru.py b/gallery_dl/extractor/derpibooru.py new file mode 100644 index 00000000..2aa15031 --- /dev/null +++ b/gallery_dl/extractor/derpibooru.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://derpibooru.org/""" + +from .booru import BooruExtractor +from .. import text, exception +import operator + +BASE_PATTERN = r"(?:https?://)?derpibooru\.org" + + +class DerpibooruExtractor(BooruExtractor): + """Base class for derpibooru extractors""" + category = "derpibooru" + filename_fmt = "{filename}.{extension}" + archive_fmt = "{id}" + root = "https://derpibooru.org" + request_interval = 1.0 + per_page = 50 + + _file_url = operator.itemgetter("view_url") + + @staticmethod + def _prepare(post): + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%dT%H:%M:%S") + + @staticmethod + def _extended_tags(post): + pass + + def _pagination(self, url, params): + params["page"] = 1 + params["per_page"] = self.per_page + + api_key = self.config("api-key") + if api_key: + params["key"] = api_key + + filter_id = self.config("filter") + if filter_id: + params["filter_id"] = filter_id + + while True: + data = self.request(url, params=params).json() + yield from data["images"] + + if len(data["images"]) < self.per_page: + return + params["page"] += 1 + + +class DerpibooruPostExtractor(DerpibooruExtractor): + """Extractor for single posts from derpibooru.org""" + subcategory = "post" + pattern = BASE_PATTERN + r"/images/(\d+)" + test = ("https://derpibooru.org/images/1", { + "content": "88449eeb0c4fa5d3583d0b794f6bc1d70bf7f889", + "count": 1, + "keyword": { + "animated": False, + "aspect_ratio": 1.0, + "comment_count": int, + "created_at": "2012-01-02T03:12:33", + "date": "dt:2012-01-02 03:12:33", + "deletion_reason": None, + "description": "", + "downvotes": int, + "duplicate_of": None, + "duration": 0.04, + "extension": "png", + "faves": int, + "first_seen_at": "2012-01-02T03:12:33", + "format": "png", + "height": 900, + "hidden_from_users": False, + "id": 1, + "mime_type": "image/png", + "name": "1__safe_fluttershy_solo_cloud_happy_flying_upvotes+galore" + "_artist-colon-speccysy_get_sunshine", + "orig_sha512_hash": None, + "processed": True, + "representations": dict, + "score": int, + "sha512_hash": "f16c98e2848c2f1bfff3985e8f1a54375cc49f78125391aeb8" + "0534ce011ead14e3e452a5c4bc98a66f56bdfcd07ef7800663" + "b994f3f343c572da5ecc22a9660f", + "size": 860914, + "source_url": "https://www.deviantart.com/speccysy/art" + "/Afternoon-Flight-215193985", + "spoilered": False, + "tag_count": 36, + "tag_ids": list, + "tags": list, + "thumbnails_generated": True, + "updated_at": "2020-05-28T13:14:07", + "uploader": "Clover the Clever", + "uploader_id": 211188, + "upvotes": int, + "view_url": str, + "width": 900, + "wilson_score": float, + }, + }) + + def __init__(self, match): + DerpibooruExtractor.__init__(self, match) + self.image_id = match.group(1) + + def posts(self): + url = self.root + "/api/v1/json/images/" + self.image_id + return (self.request(url).json()["image"],) + + +class DerpibooruSearchExtractor(DerpibooruExtractor): + """Extractor for search results on derpibooru.org""" + subcategory = "search" + directory_fmt = ("{category}", "search", "{search_tags}") + pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))" + test = ( + ("https://derpibooru.org/search?q=cute", { + "range": "40-60", + "count": 21, + }), + ("https://derpibooru.org/tags/cute", { + "range": "40-60", + "count": 21, + }), + ) + + def __init__(self, match): + DerpibooruExtractor.__init__(self, match) + query, tags = match.groups() + self.params = text.parse_query(query) if query else {"q": tags} + + def metadata(self): + return {"search_tags": self.params.get("q", "")} + + def posts(self): + url = self.root + "/api/v1/json/search/images" + return self._pagination(url, self.params) + + +class DerpibooruGalleryExtractor(DerpibooruExtractor): + """Extractor for galleries on derpibooru.org""" + subcategory = "gallery" + directory_fmt = ("{category}", "galleries", + "{gallery[id]} {gallery[title]}") + pattern = BASE_PATTERN + r"/galleries/(\d+)" + test = ("https://derpibooru.org/galleries/1", { + "pattern": r"https://derpicdn\.net/img/view/\d+/\d+/\d+/\d+[^/]+$", + "keyword": { + "gallery": { + "description": "Indexes start at 1 :P", + "id": 1, + "spoiler_warning": "", + "thumbnail_id": 1, + "title": "The Very First Gallery", + "user": "DeliciousBlackInk", + "user_id": 365446, + }, + }, + }) + + def __init__(self, match): + DerpibooruExtractor.__init__(self, match) + self.gallery_id = match.group(1) + + def metadata(self): + url = self.root + "/api/v1/json/search/galleries" + params = {"q": "id:" + self.gallery_id} + galleries = self.request(url, params=params).json()["galleries"] + if not galleries: + raise exception.NotFoundError("gallery") + return {"gallery": galleries[0]} + + def posts(self): + gallery_id = "gallery_id:" + self.gallery_id + url = self.root + "/api/v1/json/search/images" + params = {"sd": "desc", "sf": gallery_id, "q" : gallery_id} + return self._pagination(url, params) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 2f71e85f..49f87768 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -160,6 +160,7 @@ SUBCATEGORY_MAP = { _OAUTH = "`OAuth `__" _COOKIES = "`Cookies `__" +_APIKEY_DB = "`API Key `__" _APIKEY_WH = "`API Key `__" _APIKEY_WY = "`API Key `__" @@ -167,6 +168,7 @@ AUTH_MAP = { "aryion" : "Supported", "baraag" : _OAUTH, "danbooru" : "Supported", + "derpibooru" : _APIKEY_DB, "deviantart" : _OAUTH, "e621" : "Supported", "e-hentai" : "Supported",