diff --git a/docs/configuration.rst b/docs/configuration.rst
index 8b410788..2ea0bc55 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -436,6 +436,7 @@ Default
``pornpics``,
``schalenetwork``,
``scrolller``,
+ ``sizebooru``,
``soundgasm``,
``urlgalleries``,
``vk``,
@@ -4777,6 +4778,28 @@ Description
Download animated images as ``.gif`` instead of ``.webp``
+extractor.sizebooru.metadata
+----------------------------
+Type
+ ``bool``
+Default
+ ``false``
+Description
+ Extract additional metadata:
+
+ * ``approver``
+ * ``artist``
+ * ``date``
+ * ``date_approved``
+ * ``favorite``
+ * ``source``
+ * ``tags``
+ * ``uploader``
+ * ``views``
+Note
+ This requires 1 additional HTTP request per post.
+
+
extractor.skeb.article
----------------------
Type
@@ -8074,20 +8097,21 @@ Default
.. code:: json
{
- "coomerparty" : "coomer",
- "kemonoparty" : "kemono",
- "koharu" : "schalenetwork",
- "chzzk" : "naver-chzzk",
- "naver" : "naver-blog",
- "naverwebtoon": "naver-webtoon",
- "pixiv" : "pixiv-novel"
+ "coomerparty" : "coomer",
+ "kemonoparty" : "kemono",
+ "giantessbooru": "sizebooru",
+ "koharu" : "schalenetwork",
+ "chzzk" : "naver-chzzk",
+ "naver" : "naver-blog",
+ "naverwebtoon" : "naver-webtoon",
+ "pixiv" : "pixiv-novel"
}
Description
Duplicate the configuration settings of extractor `categories`
to other names.
For example, a ``"naver": "naver-blog"`` key-value pair will make all
- ``naver`` config settings available for ´´naver-blog´´ extractors as well.
+ ``naver`` config settings available for ``naver-blog`` extractors as well.
jinja.environment
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 4ecd300c..1890b72d 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -91,13 +91,14 @@
"category-map": {},
"config-map": {
- "coomerparty" : "coomer",
- "kemonoparty" : "kemono",
- "koharu" : "schalenetwork",
- "chzzk" : "naver-chzzk",
- "naver" : "naver-blog",
- "naverwebtoon": "naver-webtoon",
- "pixiv" : "pixiv-novel"
+ "coomerparty" : "coomer",
+ "kemonoparty" : "kemono",
+ "giantessbooru": "sizebooru",
+ "koharu" : "schalenetwork",
+ "chzzk" : "naver-chzzk",
+ "naver" : "naver-blog",
+ "naverwebtoon" : "naver-webtoon",
+ "pixiv" : "pixiv-novel"
},
@@ -679,6 +680,12 @@
{
"gifs": true
},
+ "sizebooru":
+ {
+ "sleep-request": "0.5-1.5",
+
+ "metadata": false
+ },
"skeb":
{
"article" : false,
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index cbcf4320..6b368034 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -877,6 +877,12 @@ Consider all listed sites to potentially be NSFW.
Galleries, individual Images, Videos |
|
+
+ | Size Booru |
+ https://sizebooru.com/ |
+ Favorites, Galleries, Posts, Tag Searches, User Uploads |
+ |
+
| Skeb |
https://skeb.jp/ |
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index 3571c65c..33a3b958 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -169,6 +169,7 @@ def remap_categories():
cmap = (
("coomerparty" , "coomer"),
("kemonoparty" , "kemono"),
+ ("giantessbooru", "sizebooru"),
("koharu" , "schalenetwork"),
("naver" , "naver-blog"),
("chzzk" , "naver-chzzk"),
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 3b10c50e..aabaa933 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -170,6 +170,7 @@ modules = [
"sexcom",
"shimmie2",
"simplyhentai",
+ "sizebooru",
"skeb",
"slickpic",
"slideshare",
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index 3b97e9ab..ae455bf3 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -52,7 +52,8 @@ class BooruExtractor(BaseExtractor):
if notes:
self._notes(post, html)
- text.nameext_from_url(url, post)
+ if "extension" not in post:
+ text.nameext_from_url(url, post)
post.update(data)
self._prepare(post)
diff --git a/gallery_dl/extractor/sizebooru.py b/gallery_dl/extractor/sizebooru.py
new file mode 100644
index 00000000..cad4b23a
--- /dev/null
+++ b/gallery_dl/extractor/sizebooru.py
@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://sizebooru.com/"""
+
+from .booru import BooruExtractor
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?sizebooru\.com"
+
+
+class SizebooruExtractor(BooruExtractor):
+ """Base class for sizebooru extractors"""
+ category = "sizebooru"
+ root = "https://sizebooru.com"
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ page_start = 1
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ if self.config("metadata", False):
+ self._prepare = self._prepare_metadata
+
+ def _file_url(self, post):
+ post["file_url"] = url = f"{self.root}/Picture/{post['id']}"
+ return url
+
+ def _prepare(self, post):
+ post_id = post["id"]
+ post["id"] = text.parse_int(post_id)
+ post["filename"] = post_id
+ if not post["extension"]:
+ post["extension"] = "jpg"
+
+ def _prepare_metadata(self, post):
+ post_id = post["id"]
+ url = f"{self.root}/Details/{post_id}"
+ extr = text.extract_from(self.request(url).text)
+
+ post.update({
+ "id" : text.parse_int(post_id),
+ "date" : text.parse_datetime(
+ extr("Posted Date: ", "<"), "%m/%d/%Y"),
+ "date_approved": text.parse_datetime(
+ extr("Approved Date: ", "<"), "%m/%d/%Y"),
+ "approver" : text.remove_html(extr("Approved By:", "")),
+ "uploader" : text.remove_html(extr("Posted By:", "")),
+ "artist" : None
+ if (artist := extr("Artist: ", "")) == "N/A" else # noqa: E131 E501
+ text.remove_html(artist), # noqa: E131
+ "views" : text.parse_int(extr("Views:", "<")),
+ "source" : text.extr(extr(
+ "Source Link:", ""), ' href="', '"') or None,
+ "tags" : text.split_html(extr(
+ "Related Tags
", "")),
+ "favorite" : text.split_html(extr(
+ "Favorited By
", "")),
+ })
+
+ post["filename"], _, ext = extr('" alt="', '"').rpartition(".")
+ if not post["extension"]:
+ post["extension"] = ext.lower()
+
+ return post
+
+ def _pagination(self, url, callback=None):
+ params = {
+ "pageNo" : self.page_start,
+ "pageSize": self.per_page,
+ }
+
+ page = self.request(url, params=params).text
+ if callback is not None:
+ callback(page)
+
+ while True:
+ thumb = None
+ for thumb in text.extract_iter(
+ page, '") or \
+ thumb is None:
+ return
+ params["pageNo"] += 1
+ page = self.request(url, params=params).text
+
+
+class SizebooruPostExtractor(SizebooruExtractor):
+ """Extractor for sizebooru posts"""
+ subcategory = "post"
+ pattern = rf"{BASE_PATTERN}/Details/(\d+)"
+ example = "https://sizebooru.com/Details/12345"
+
+ def posts(self):
+ return ({"id": self.groups[0], "extension": None},)
+
+
+class SizebooruTagExtractor(SizebooruExtractor):
+ """Extractor for sizebooru tag searches"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = rf"{BASE_PATTERN}/Search/([^/?#]+)"
+ example = "https://sizebooru.com/Search/TAG"
+
+ def posts(self):
+ tag = self.groups[0]
+ self.kwdict["search_tags"] = text.unquote(tag)
+ return self._pagination(f"{self.root}/Search/{tag}")
+
+
+class SizebooruGalleryExtractor(SizebooruExtractor):
+ """Extractor for sizebooru galleries"""
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "{gallery_name} ({gallery_id})")
+ pattern = rf"{BASE_PATTERN}/Galleries/List/(\d+)"
+ example = "https://sizebooru.com/Galleries/List/123"
+
+ def posts(self):
+ gid = self.groups[0]
+ self.kwdict["gallery_id"] = text.parse_int(gid)
+ return self._pagination(
+ f"{self.root}/Galleries/List/{gid}", self._extract_name)
+
+ def _extract_name(self, page):
+ self.kwdict["gallery_name"] = text.unescape(text.extr(
+ page, "Gallery: ", " - Size Booru<"))
+
+
+class SizebooruUserExtractor(SizebooruExtractor):
+ """Extractor for a sizebooru user's uploads"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "Uploads {user}")
+ pattern = rf"{BASE_PATTERN}/Profile/Uploads/([^/?#]+)"
+ example = "https://sizebooru.com/Profile/Uploads/USER"
+
+ def posts(self):
+ user = self.groups[0]
+ self.kwdict["user"] = text.unquote(user)
+ return self._pagination(f"{self.root}/Profile/Uploads/{user}",)
+
+
+class SizebooruFavoriteExtractor(SizebooruExtractor):
+ """Extractor for a sizebooru user's favorites"""
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "Favorites {user}")
+ pattern = rf"{BASE_PATTERN}/Profile/Favorites/([^/?#]+)"
+ example = "https://sizebooru.com/Profile/Favorites/USER"
+
+ def posts(self):
+ user = self.groups[0]
+ self.kwdict["user"] = text.unquote(user)
+ return self._pagination(f"{self.root}/Profile/Favorites/{user}",)
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 7557b8e2..fb09d58e 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -159,6 +159,7 @@ CATEGORY_MAP = {
"sexcom" : "Sex.com",
"silverpic" : "SilverPic.com",
"simplyhentai" : "Simply Hentai",
+ "sizebooru" : "Size Booru",
"slickpic" : "SlickPic",
"slideshare" : "SlideShare",
"smugmug" : "SmugMug",
@@ -391,6 +392,9 @@ SUBCATEGORY_MAP = {
"sexcom": {
"pins": "User Pins",
},
+ "sizebooru": {
+ "user": "User Uploads",
+ },
"skeb": {
"following" : "Followed Creators",
"following-users": "Followed Users",
diff --git a/test/results/sizebooru.py b/test/results/sizebooru.py
new file mode 100644
index 00000000..51aea9d0
--- /dev/null
+++ b/test/results/sizebooru.py
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import sizebooru
+
+
+__tests__ = (
+{
+ "#url" : "https://sizebooru.com/Details/283342",
+ "#class" : sizebooru.SizebooruPostExtractor,
+ "#results" : "https://sizebooru.com/Picture/283342",
+ "#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515",
+
+ "id" : 283342,
+ "filename" : "283342",
+ "extension" : "jpg",
+ "file_url" : "https://sizebooru.com/Picture/283342",
+},
+
+{
+ "#url" : "https://sizebooru.com/Details/283342",
+ "#class" : sizebooru.SizebooruPostExtractor,
+ "#options" : {"metadata": True},
+ "#results" : "https://sizebooru.com/Picture/283342",
+ "#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515",
+
+ "approver" : "Mr_Red",
+ "artist" : None,
+ "date" : "dt:2025-07-30 00:00:00",
+ "date_approved": "dt:2025-08-01 00:00:00",
+ "extension" : "jpg",
+ "file_url" : "https://sizebooru.com/Picture/283342",
+ "filename" : "Gnlib9eaMAAXtfQ",
+ "id" : 283342,
+ "source" : "https://x.com/kashmimo/status/1907664168381255942",
+ "uploader" : "Shadow_Blaze_23",
+ "views" : range(200, 900),
+ "favorite" : [
+ "GTSfan295",
+ "Zephyr",
+ "HeroDjango",
+ ],
+ "tags" : [
+ "drawing",
+ "giantess",
+ "pokemon",
+ "blushing",
+ "black_hair",
+ "color",
+ "long_hair",
+ "sweat",
+ "parody",
+ "shrunken_man",
+ "hat",
+ "orange_hair",
+ "looking_at_tiny",
+ "leaf_(pokemon)",
+ "kashmimo",
+ ],
+},
+
+{
+ "#url" : "https://sizebooru.com/Details/2",
+ "#class" : sizebooru.SizebooruPostExtractor,
+ "#options" : {"metadata": True},
+ "#results" : "https://sizebooru.com/Picture/2",
+
+ "approver" : "Giantessbooru",
+ "artist" : None,
+ "date" : "dt:2010-11-26 00:00:00",
+ "date_approved": "dt:2010-11-26 00:00:00",
+ "extension" : "jpg",
+ "file_url" : "https://sizebooru.com/Picture/2",
+ "filename" : "10000 - tagme",
+ "id" : 2,
+ "source" : None,
+ "uploader" : "Giantess-7of9",
+ "views" : range(40, 200),
+ "favorite" : list,
+ "tags" : [
+ "breasts",
+ "gentle",
+ "nude",
+ "black_hair",
+ "long_hair",
+ "brunette",
+ "hand",
+ "shrunken_man",
+ "indoors",
+ "digital_render",
+ ],
+},
+
+{
+ "#url" : "https://sizebooru.com/Details/283318",
+ "#class" : sizebooru.SizebooruPostExtractor,
+ "#options" : {"metadata": True},
+ "#results" : "https://sizebooru.com/Picture/283318",
+
+ "approver" : "Mr_Red",
+ "artist" : "megamaliit",
+ "date" : "dt:2025-07-26 00:00:00",
+ "date_approved": "dt:2025-07-26 00:00:00",
+ "extension" : "png",
+ "file_url" : "https://sizebooru.com/Picture/283318",
+ "filename" : "big babes of bed rock",
+ "id" : 283318,
+ "source" : "https://www.deviantart.com/megamaliit/art/Big-Babes-of-Bed-Rock-AT-845335093",
+ "uploader" : "Mr_Red",
+ "views" : int,
+ "favorite" : list,
+ "tags" : list,
+},
+
+{
+ "#url" : "https://sizebooru.com/Search/parody",
+ "#category": ("booru", "sizebooru", "tag"),
+ "#class" : sizebooru.SizebooruTagExtractor,
+ "#pattern" : r"https://sizebooru\.com/Picture/\d+",
+ "#count" : range(200, 300),
+
+ "id" : int,
+ "filename" : r"re:\d+",
+ "extension" : {"jpg", "png"},
+ "file_url" : r"re:https://stizebooru.com/Picture/\d+",
+ "search_tags": "parody",
+},
+
+{
+ "#url" : "https://sizebooru.com/Galleries/List/7",
+ "#category": ("booru", "sizebooru", "gallery"),
+ "#class" : sizebooru.SizebooruGalleryExtractor,
+ "#pattern" : r"https://sizebooru\.com/Picture/\d+",
+ "#count" : 103,
+
+ "gallery_id" : 7,
+ "gallery_name": "lilipucien's work",
+},
+
+{
+ "#url" : "https://sizebooru.com/Profile/Uploads/hueyriley",
+ "#category": ("booru", "sizebooru", "user"),
+ "#class" : sizebooru.SizebooruUserExtractor,
+ "#count" : 0,
+},
+
+{
+ "#url" : "https://sizebooru.com/Profile/Uploads/GtsXxx",
+ "#category": ("booru", "sizebooru", "user"),
+ "#class" : sizebooru.SizebooruUserExtractor,
+ "#pattern" : r"https://sizebooru\.com/Picture/\d+",
+ "#count" : 256,
+
+ "user" : "GtsXxx",
+},
+
+{
+ "#url" : "https://sizebooru.com/Profile/Favorites/GtsXxx",
+ "#category": ("booru", "sizebooru", "favorite"),
+ "#class" : sizebooru.SizebooruFavoriteExtractor,
+ "#results" : (
+ "https://sizebooru.com/Picture/266778",
+ "https://sizebooru.com/Picture/266385",
+ "https://sizebooru.com/Picture/266243",
+ "https://sizebooru.com/Picture/265039",
+ ),
+
+ "user" : "GtsXxx",
+},
+
+)