diff --git a/docs/configuration.rst b/docs/configuration.rst index 8b410788..2ea0bc55 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -436,6 +436,7 @@ Default ``pornpics``, ``schalenetwork``, ``scrolller``, + ``sizebooru``, ``soundgasm``, ``urlgalleries``, ``vk``, @@ -4777,6 +4778,28 @@ Description Download animated images as ``.gif`` instead of ``.webp`` +extractor.sizebooru.metadata +---------------------------- +Type + ``bool`` +Default + ``false`` +Description + Extract additional metadata: + + * ``approver`` + * ``artist`` + * ``date`` + * ``date_approved`` + * ``favorite`` + * ``source`` + * ``tags`` + * ``uploader`` + * ``views`` +Note + This requires 1 additional HTTP request per post. + + extractor.skeb.article ---------------------- Type @@ -8074,20 +8097,21 @@ Default .. code:: json { - "coomerparty" : "coomer", - "kemonoparty" : "kemono", - "koharu" : "schalenetwork", - "chzzk" : "naver-chzzk", - "naver" : "naver-blog", - "naverwebtoon": "naver-webtoon", - "pixiv" : "pixiv-novel" + "coomerparty" : "coomer", + "kemonoparty" : "kemono", + "giantessbooru": "sizebooru", + "koharu" : "schalenetwork", + "chzzk" : "naver-chzzk", + "naver" : "naver-blog", + "naverwebtoon" : "naver-webtoon", + "pixiv" : "pixiv-novel" } Description Duplicate the configuration settings of extractor `categories` to other names. For example, a ``"naver": "naver-blog"`` key-value pair will make all - ``naver`` config settings available for ´´naver-blog´´ extractors as well. + ``naver`` config settings available for ``naver-blog`` extractors as well. jinja.environment diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 4ecd300c..1890b72d 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -91,13 +91,14 @@ "category-map": {}, "config-map": { - "coomerparty" : "coomer", - "kemonoparty" : "kemono", - "koharu" : "schalenetwork", - "chzzk" : "naver-chzzk", - "naver" : "naver-blog", - "naverwebtoon": "naver-webtoon", - "pixiv" : "pixiv-novel" + "coomerparty" : "coomer", + "kemonoparty" : "kemono", + "giantessbooru": "sizebooru", + "koharu" : "schalenetwork", + "chzzk" : "naver-chzzk", + "naver" : "naver-blog", + "naverwebtoon" : "naver-webtoon", + "pixiv" : "pixiv-novel" }, @@ -679,6 +680,12 @@ { "gifs": true }, + "sizebooru": + { + "sleep-request": "0.5-1.5", + + "metadata": false + }, "skeb": { "article" : false, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cbcf4320..6b368034 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -877,6 +877,12 @@ Consider all listed sites to potentially be NSFW. Galleries, individual Images, Videos + + Size Booru + https://sizebooru.com/ + Favorites, Galleries, Posts, Tag Searches, User Uploads + + Skeb https://skeb.jp/ diff --git a/gallery_dl/config.py b/gallery_dl/config.py index 3571c65c..33a3b958 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -169,6 +169,7 @@ def remap_categories(): cmap = ( ("coomerparty" , "coomer"), ("kemonoparty" , "kemono"), + ("giantessbooru", "sizebooru"), ("koharu" , "schalenetwork"), ("naver" , "naver-blog"), ("chzzk" , "naver-chzzk"), diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3b10c50e..aabaa933 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -170,6 +170,7 @@ modules = [ "sexcom", "shimmie2", "simplyhentai", + "sizebooru", "skeb", "slickpic", "slideshare", diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 3b97e9ab..ae455bf3 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -52,7 +52,8 @@ class BooruExtractor(BaseExtractor): if notes: self._notes(post, html) - text.nameext_from_url(url, post) + if "extension" not in post: + text.nameext_from_url(url, post) post.update(data) self._prepare(post) diff --git a/gallery_dl/extractor/sizebooru.py b/gallery_dl/extractor/sizebooru.py new file mode 100644 index 00000000..cad4b23a --- /dev/null +++ b/gallery_dl/extractor/sizebooru.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://sizebooru.com/""" + +from .booru import BooruExtractor +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?sizebooru\.com" + + +class SizebooruExtractor(BooruExtractor): + """Base class for sizebooru extractors""" + category = "sizebooru" + root = "https://sizebooru.com" + filename_fmt = "{id}.{extension}" + archive_fmt = "{id}" + page_start = 1 + request_interval = (0.5, 1.5) + + def _init(self): + if self.config("metadata", False): + self._prepare = self._prepare_metadata + + def _file_url(self, post): + post["file_url"] = url = f"{self.root}/Picture/{post['id']}" + return url + + def _prepare(self, post): + post_id = post["id"] + post["id"] = text.parse_int(post_id) + post["filename"] = post_id + if not post["extension"]: + post["extension"] = "jpg" + + def _prepare_metadata(self, post): + post_id = post["id"] + url = f"{self.root}/Details/{post_id}" + extr = text.extract_from(self.request(url).text) + + post.update({ + "id" : text.parse_int(post_id), + "date" : text.parse_datetime( + extr("Posted Date: ", "<"), "%m/%d/%Y"), + "date_approved": text.parse_datetime( + extr("Approved Date: ", "<"), "%m/%d/%Y"), + "approver" : text.remove_html(extr("Approved By:", "Posted By:", "Artist: ", "Views:", "<")), + "source" : text.extr(extr( + "Source Link:", "Related Tags", "")), + "favorite" : text.split_html(extr( + "
Favorited By
", "")), + }) + + post["filename"], _, ext = extr('" alt="', '"').rpartition(".") + if not post["extension"]: + post["extension"] = ext.lower() + + return post + + def _pagination(self, url, callback=None): + params = { + "pageNo" : self.page_start, + "pageSize": self.per_page, + } + + page = self.request(url, params=params).text + if callback is not None: + callback(page) + + while True: + thumb = None + for thumb in text.extract_iter( + page, '") or \ + thumb is None: + return + params["pageNo"] += 1 + page = self.request(url, params=params).text + + +class SizebooruPostExtractor(SizebooruExtractor): + """Extractor for sizebooru posts""" + subcategory = "post" + pattern = rf"{BASE_PATTERN}/Details/(\d+)" + example = "https://sizebooru.com/Details/12345" + + def posts(self): + return ({"id": self.groups[0], "extension": None},) + + +class SizebooruTagExtractor(SizebooruExtractor): + """Extractor for sizebooru tag searches""" + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + pattern = rf"{BASE_PATTERN}/Search/([^/?#]+)" + example = "https://sizebooru.com/Search/TAG" + + def posts(self): + tag = self.groups[0] + self.kwdict["search_tags"] = text.unquote(tag) + return self._pagination(f"{self.root}/Search/{tag}") + + +class SizebooruGalleryExtractor(SizebooruExtractor): + """Extractor for sizebooru galleries""" + subcategory = "gallery" + directory_fmt = ("{category}", "{gallery_name} ({gallery_id})") + pattern = rf"{BASE_PATTERN}/Galleries/List/(\d+)" + example = "https://sizebooru.com/Galleries/List/123" + + def posts(self): + gid = self.groups[0] + self.kwdict["gallery_id"] = text.parse_int(gid) + return self._pagination( + f"{self.root}/Galleries/List/{gid}", self._extract_name) + + def _extract_name(self, page): + self.kwdict["gallery_name"] = text.unescape(text.extr( + page, "Gallery: ", " - Size Booru<")) + + +class SizebooruUserExtractor(SizebooruExtractor): + """Extractor for a sizebooru user's uploads""" + subcategory = "user" + directory_fmt = ("{category}", "Uploads {user}") + pattern = rf"{BASE_PATTERN}/Profile/Uploads/([^/?#]+)" + example = "https://sizebooru.com/Profile/Uploads/USER" + + def posts(self): + user = self.groups[0] + self.kwdict["user"] = text.unquote(user) + return self._pagination(f"{self.root}/Profile/Uploads/{user}",) + + +class SizebooruFavoriteExtractor(SizebooruExtractor): + """Extractor for a sizebooru user's favorites""" + subcategory = "favorite" + directory_fmt = ("{category}", "Favorites {user}") + pattern = rf"{BASE_PATTERN}/Profile/Favorites/([^/?#]+)" + example = "https://sizebooru.com/Profile/Favorites/USER" + + def posts(self): + user = self.groups[0] + self.kwdict["user"] = text.unquote(user) + return self._pagination(f"{self.root}/Profile/Favorites/{user}",) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 7557b8e2..fb09d58e 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -159,6 +159,7 @@ CATEGORY_MAP = { "sexcom" : "Sex.com", "silverpic" : "SilverPic.com", "simplyhentai" : "Simply Hentai", + "sizebooru" : "Size Booru", "slickpic" : "SlickPic", "slideshare" : "SlideShare", "smugmug" : "SmugMug", @@ -391,6 +392,9 @@ SUBCATEGORY_MAP = { "sexcom": { "pins": "User Pins", }, + "sizebooru": { + "user": "User Uploads", + }, "skeb": { "following" : "Followed Creators", "following-users": "Followed Users", diff --git a/test/results/sizebooru.py b/test/results/sizebooru.py new file mode 100644 index 00000000..51aea9d0 --- /dev/null +++ b/test/results/sizebooru.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import sizebooru + + +__tests__ = ( +{ + "#url" : "https://sizebooru.com/Details/283342", + "#class" : sizebooru.SizebooruPostExtractor, + "#results" : "https://sizebooru.com/Picture/283342", + "#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515", + + "id" : 283342, + "filename" : "283342", + "extension" : "jpg", + "file_url" : "https://sizebooru.com/Picture/283342", +}, + +{ + "#url" : "https://sizebooru.com/Details/283342", + "#class" : sizebooru.SizebooruPostExtractor, + "#options" : {"metadata": True}, + "#results" : "https://sizebooru.com/Picture/283342", + "#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515", + + "approver" : "Mr_Red", + "artist" : None, + "date" : "dt:2025-07-30 00:00:00", + "date_approved": "dt:2025-08-01 00:00:00", + "extension" : "jpg", + "file_url" : "https://sizebooru.com/Picture/283342", + "filename" : "Gnlib9eaMAAXtfQ", + "id" : 283342, + "source" : "https://x.com/kashmimo/status/1907664168381255942", + "uploader" : "Shadow_Blaze_23", + "views" : range(200, 900), + "favorite" : [ + "GTSfan295", + "Zephyr", + "HeroDjango", + ], + "tags" : [ + "drawing", + "giantess", + "pokemon", + "blushing", + "black_hair", + "color", + "long_hair", + "sweat", + "parody", + "shrunken_man", + "hat", + "orange_hair", + "looking_at_tiny", + "leaf_(pokemon)", + "kashmimo", + ], +}, + +{ + "#url" : "https://sizebooru.com/Details/2", + "#class" : sizebooru.SizebooruPostExtractor, + "#options" : {"metadata": True}, + "#results" : "https://sizebooru.com/Picture/2", + + "approver" : "Giantessbooru", + "artist" : None, + "date" : "dt:2010-11-26 00:00:00", + "date_approved": "dt:2010-11-26 00:00:00", + "extension" : "jpg", + "file_url" : "https://sizebooru.com/Picture/2", + "filename" : "10000 - tagme", + "id" : 2, + "source" : None, + "uploader" : "Giantess-7of9", + "views" : range(40, 200), + "favorite" : list, + "tags" : [ + "breasts", + "gentle", + "nude", + "black_hair", + "long_hair", + "brunette", + "hand", + "shrunken_man", + "indoors", + "digital_render", + ], +}, + +{ + "#url" : "https://sizebooru.com/Details/283318", + "#class" : sizebooru.SizebooruPostExtractor, + "#options" : {"metadata": True}, + "#results" : "https://sizebooru.com/Picture/283318", + + "approver" : "Mr_Red", + "artist" : "megamaliit", + "date" : "dt:2025-07-26 00:00:00", + "date_approved": "dt:2025-07-26 00:00:00", + "extension" : "png", + "file_url" : "https://sizebooru.com/Picture/283318", + "filename" : "big babes of bed rock", + "id" : 283318, + "source" : "https://www.deviantart.com/megamaliit/art/Big-Babes-of-Bed-Rock-AT-845335093", + "uploader" : "Mr_Red", + "views" : int, + "favorite" : list, + "tags" : list, +}, + +{ + "#url" : "https://sizebooru.com/Search/parody", + "#category": ("booru", "sizebooru", "tag"), + "#class" : sizebooru.SizebooruTagExtractor, + "#pattern" : r"https://sizebooru\.com/Picture/\d+", + "#count" : range(200, 300), + + "id" : int, + "filename" : r"re:\d+", + "extension" : {"jpg", "png"}, + "file_url" : r"re:https://stizebooru.com/Picture/\d+", + "search_tags": "parody", +}, + +{ + "#url" : "https://sizebooru.com/Galleries/List/7", + "#category": ("booru", "sizebooru", "gallery"), + "#class" : sizebooru.SizebooruGalleryExtractor, + "#pattern" : r"https://sizebooru\.com/Picture/\d+", + "#count" : 103, + + "gallery_id" : 7, + "gallery_name": "lilipucien's work", +}, + +{ + "#url" : "https://sizebooru.com/Profile/Uploads/hueyriley", + "#category": ("booru", "sizebooru", "user"), + "#class" : sizebooru.SizebooruUserExtractor, + "#count" : 0, +}, + +{ + "#url" : "https://sizebooru.com/Profile/Uploads/GtsXxx", + "#category": ("booru", "sizebooru", "user"), + "#class" : sizebooru.SizebooruUserExtractor, + "#pattern" : r"https://sizebooru\.com/Picture/\d+", + "#count" : 256, + + "user" : "GtsXxx", +}, + +{ + "#url" : "https://sizebooru.com/Profile/Favorites/GtsXxx", + "#category": ("booru", "sizebooru", "favorite"), + "#class" : sizebooru.SizebooruFavoriteExtractor, + "#results" : ( + "https://sizebooru.com/Picture/266778", + "https://sizebooru.com/Picture/266385", + "https://sizebooru.com/Picture/266243", + "https://sizebooru.com/Picture/265039", + ), + + "user" : "GtsXxx", +}, + +)