[sizebooru] add support (#7667)

This commit is contained in:
Mike Fährmann
2025-08-24 10:27:19 +02:00
parent aafb53db9a
commit ed3ac5a402
9 changed files with 396 additions and 16 deletions

View File

@@ -436,6 +436,7 @@ Default
``pornpics``,
``schalenetwork``,
``scrolller``,
``sizebooru``,
``soundgasm``,
``urlgalleries``,
``vk``,
@@ -4777,6 +4778,28 @@ Description
Download animated images as ``.gif`` instead of ``.webp``
extractor.sizebooru.metadata
----------------------------
Type
``bool``
Default
``false``
Description
Extract additional metadata:
* ``approver``
* ``artist``
* ``date``
* ``date_approved``
* ``favorite``
* ``source``
* ``tags``
* ``uploader``
* ``views``
Note
This requires 1 additional HTTP request per post.
extractor.skeb.article
----------------------
Type
@@ -8074,20 +8097,21 @@ Default
.. code:: json
{
"coomerparty" : "coomer",
"kemonoparty" : "kemono",
"koharu" : "schalenetwork",
"chzzk" : "naver-chzzk",
"naver" : "naver-blog",
"naverwebtoon": "naver-webtoon",
"pixiv" : "pixiv-novel"
"coomerparty" : "coomer",
"kemonoparty" : "kemono",
"giantessbooru": "sizebooru",
"koharu" : "schalenetwork",
"chzzk" : "naver-chzzk",
"naver" : "naver-blog",
"naverwebtoon" : "naver-webtoon",
"pixiv" : "pixiv-novel"
}
Description
Duplicate the configuration settings of extractor `categories`
to other names.
For example, a ``"naver": "naver-blog"`` key-value pair will make all
``naver`` config settings available for ´´naver-blog´´ extractors as well.
``naver`` config settings available for ``naver-blog`` extractors as well.
jinja.environment

View File

@@ -91,13 +91,14 @@
"category-map": {},
"config-map": {
"coomerparty" : "coomer",
"kemonoparty" : "kemono",
"koharu" : "schalenetwork",
"chzzk" : "naver-chzzk",
"naver" : "naver-blog",
"naverwebtoon": "naver-webtoon",
"pixiv" : "pixiv-novel"
"coomerparty" : "coomer",
"kemonoparty" : "kemono",
"giantessbooru": "sizebooru",
"koharu" : "schalenetwork",
"chzzk" : "naver-chzzk",
"naver" : "naver-blog",
"naverwebtoon" : "naver-webtoon",
"pixiv" : "pixiv-novel"
},
@@ -679,6 +680,12 @@
{
"gifs": true
},
"sizebooru":
{
"sleep-request": "0.5-1.5",
"metadata": false
},
"skeb":
{
"article" : false,

View File

@@ -877,6 +877,12 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries, individual Images, Videos</td>
<td></td>
</tr>
<tr id="sizebooru" title="sizebooru">
<td>Size Booru</td>
<td>https://sizebooru.com/</td>
<td>Favorites, Galleries, Posts, Tag Searches, User Uploads</td>
<td></td>
</tr>
<tr id="skeb" title="skeb">
<td>Skeb</td>
<td>https://skeb.jp/</td>

View File

@@ -169,6 +169,7 @@ def remap_categories():
cmap = (
("coomerparty" , "coomer"),
("kemonoparty" , "kemono"),
("giantessbooru", "sizebooru"),
("koharu" , "schalenetwork"),
("naver" , "naver-blog"),
("chzzk" , "naver-chzzk"),

View File

@@ -170,6 +170,7 @@ modules = [
"sexcom",
"shimmie2",
"simplyhentai",
"sizebooru",
"skeb",
"slickpic",
"slideshare",

View File

@@ -52,7 +52,8 @@ class BooruExtractor(BaseExtractor):
if notes:
self._notes(post, html)
text.nameext_from_url(url, post)
if "extension" not in post:
text.nameext_from_url(url, post)
post.update(data)
self._prepare(post)

View File

@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://sizebooru.com/"""
from .booru import BooruExtractor
from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?sizebooru\.com"
class SizebooruExtractor(BooruExtractor):
"""Base class for sizebooru extractors"""
category = "sizebooru"
root = "https://sizebooru.com"
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
page_start = 1
request_interval = (0.5, 1.5)
def _init(self):
if self.config("metadata", False):
self._prepare = self._prepare_metadata
def _file_url(self, post):
post["file_url"] = url = f"{self.root}/Picture/{post['id']}"
return url
def _prepare(self, post):
post_id = post["id"]
post["id"] = text.parse_int(post_id)
post["filename"] = post_id
if not post["extension"]:
post["extension"] = "jpg"
def _prepare_metadata(self, post):
post_id = post["id"]
url = f"{self.root}/Details/{post_id}"
extr = text.extract_from(self.request(url).text)
post.update({
"id" : text.parse_int(post_id),
"date" : text.parse_datetime(
extr("<b>Posted Date:</b> ", "<"), "%m/%d/%Y"),
"date_approved": text.parse_datetime(
extr("<b>Approved Date:</b> ", "<"), "%m/%d/%Y"),
"approver" : text.remove_html(extr("<b>Approved By:</b>", "</")),
"uploader" : text.remove_html(extr("<b>Posted By:</b>", "</")),
"artist" : None
if (artist := extr("<b>Artist:</b> ", "</")) == "N/A" else # noqa: E131 E501
text.remove_html(artist), # noqa: E131
"views" : text.parse_int(extr("<b>Views:</b>", "<")),
"source" : text.extr(extr(
"<b>Source Link:</b>", "</"), ' href="', '"') or None,
"tags" : text.split_html(extr(
"<h6>Related Tags</h6>", "</ul>")),
"favorite" : text.split_html(extr(
"<h6>Favorited By</h6>", "</ul>")),
})
post["filename"], _, ext = extr('" alt="', '"').rpartition(".")
if not post["extension"]:
post["extension"] = ext.lower()
return post
def _pagination(self, url, callback=None):
params = {
"pageNo" : self.page_start,
"pageSize": self.per_page,
}
page = self.request(url, params=params).text
if callback is not None:
callback(page)
while True:
thumb = None
for thumb in text.extract_iter(
page, '<a href="/Details/', ';base64'):
yield {
"id" : thumb[:thumb.find('"')],
"extension": thumb[thumb.rfind("/")+1:],
}
if "disabled" in text.extr(page, 'area-label="Next"', ">") or \
thumb is None:
return
params["pageNo"] += 1
page = self.request(url, params=params).text
class SizebooruPostExtractor(SizebooruExtractor):
"""Extractor for sizebooru posts"""
subcategory = "post"
pattern = rf"{BASE_PATTERN}/Details/(\d+)"
example = "https://sizebooru.com/Details/12345"
def posts(self):
return ({"id": self.groups[0], "extension": None},)
class SizebooruTagExtractor(SizebooruExtractor):
"""Extractor for sizebooru tag searches"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
pattern = rf"{BASE_PATTERN}/Search/([^/?#]+)"
example = "https://sizebooru.com/Search/TAG"
def posts(self):
tag = self.groups[0]
self.kwdict["search_tags"] = text.unquote(tag)
return self._pagination(f"{self.root}/Search/{tag}")
class SizebooruGalleryExtractor(SizebooruExtractor):
"""Extractor for sizebooru galleries"""
subcategory = "gallery"
directory_fmt = ("{category}", "{gallery_name} ({gallery_id})")
pattern = rf"{BASE_PATTERN}/Galleries/List/(\d+)"
example = "https://sizebooru.com/Galleries/List/123"
def posts(self):
gid = self.groups[0]
self.kwdict["gallery_id"] = text.parse_int(gid)
return self._pagination(
f"{self.root}/Galleries/List/{gid}", self._extract_name)
def _extract_name(self, page):
self.kwdict["gallery_name"] = text.unescape(text.extr(
page, "<title>Gallery: ", " - Size Booru<"))
class SizebooruUserExtractor(SizebooruExtractor):
"""Extractor for a sizebooru user's uploads"""
subcategory = "user"
directory_fmt = ("{category}", "Uploads {user}")
pattern = rf"{BASE_PATTERN}/Profile/Uploads/([^/?#]+)"
example = "https://sizebooru.com/Profile/Uploads/USER"
def posts(self):
user = self.groups[0]
self.kwdict["user"] = text.unquote(user)
return self._pagination(f"{self.root}/Profile/Uploads/{user}",)
class SizebooruFavoriteExtractor(SizebooruExtractor):
"""Extractor for a sizebooru user's favorites"""
subcategory = "favorite"
directory_fmt = ("{category}", "Favorites {user}")
pattern = rf"{BASE_PATTERN}/Profile/Favorites/([^/?#]+)"
example = "https://sizebooru.com/Profile/Favorites/USER"
def posts(self):
user = self.groups[0]
self.kwdict["user"] = text.unquote(user)
return self._pagination(f"{self.root}/Profile/Favorites/{user}",)

View File

@@ -159,6 +159,7 @@ CATEGORY_MAP = {
"sexcom" : "Sex.com",
"silverpic" : "SilverPic.com",
"simplyhentai" : "Simply Hentai",
"sizebooru" : "Size Booru",
"slickpic" : "SlickPic",
"slideshare" : "SlideShare",
"smugmug" : "SmugMug",
@@ -391,6 +392,9 @@ SUBCATEGORY_MAP = {
"sexcom": {
"pins": "User Pins",
},
"sizebooru": {
"user": "User Uploads",
},
"skeb": {
"following" : "Followed Creators",
"following-users": "Followed Users",

174
test/results/sizebooru.py Normal file
View File

@@ -0,0 +1,174 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import sizebooru
__tests__ = (
{
"#url" : "https://sizebooru.com/Details/283342",
"#class" : sizebooru.SizebooruPostExtractor,
"#results" : "https://sizebooru.com/Picture/283342",
"#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515",
"id" : 283342,
"filename" : "283342",
"extension" : "jpg",
"file_url" : "https://sizebooru.com/Picture/283342",
},
{
"#url" : "https://sizebooru.com/Details/283342",
"#class" : sizebooru.SizebooruPostExtractor,
"#options" : {"metadata": True},
"#results" : "https://sizebooru.com/Picture/283342",
"#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515",
"approver" : "Mr_Red",
"artist" : None,
"date" : "dt:2025-07-30 00:00:00",
"date_approved": "dt:2025-08-01 00:00:00",
"extension" : "jpg",
"file_url" : "https://sizebooru.com/Picture/283342",
"filename" : "Gnlib9eaMAAXtfQ",
"id" : 283342,
"source" : "https://x.com/kashmimo/status/1907664168381255942",
"uploader" : "Shadow_Blaze_23",
"views" : range(200, 900),
"favorite" : [
"GTSfan295",
"Zephyr",
"HeroDjango",
],
"tags" : [
"drawing",
"giantess",
"pokemon",
"blushing",
"black_hair",
"color",
"long_hair",
"sweat",
"parody",
"shrunken_man",
"hat",
"orange_hair",
"looking_at_tiny",
"leaf_(pokemon)",
"kashmimo",
],
},
{
"#url" : "https://sizebooru.com/Details/2",
"#class" : sizebooru.SizebooruPostExtractor,
"#options" : {"metadata": True},
"#results" : "https://sizebooru.com/Picture/2",
"approver" : "Giantessbooru",
"artist" : None,
"date" : "dt:2010-11-26 00:00:00",
"date_approved": "dt:2010-11-26 00:00:00",
"extension" : "jpg",
"file_url" : "https://sizebooru.com/Picture/2",
"filename" : "10000 - tagme",
"id" : 2,
"source" : None,
"uploader" : "Giantess-7of9",
"views" : range(40, 200),
"favorite" : list,
"tags" : [
"breasts",
"gentle",
"nude",
"black_hair",
"long_hair",
"brunette",
"hand",
"shrunken_man",
"indoors",
"digital_render",
],
},
{
"#url" : "https://sizebooru.com/Details/283318",
"#class" : sizebooru.SizebooruPostExtractor,
"#options" : {"metadata": True},
"#results" : "https://sizebooru.com/Picture/283318",
"approver" : "Mr_Red",
"artist" : "megamaliit",
"date" : "dt:2025-07-26 00:00:00",
"date_approved": "dt:2025-07-26 00:00:00",
"extension" : "png",
"file_url" : "https://sizebooru.com/Picture/283318",
"filename" : "big babes of bed rock",
"id" : 283318,
"source" : "https://www.deviantart.com/megamaliit/art/Big-Babes-of-Bed-Rock-AT-845335093",
"uploader" : "Mr_Red",
"views" : int,
"favorite" : list,
"tags" : list,
},
{
"#url" : "https://sizebooru.com/Search/parody",
"#category": ("booru", "sizebooru", "tag"),
"#class" : sizebooru.SizebooruTagExtractor,
"#pattern" : r"https://sizebooru\.com/Picture/\d+",
"#count" : range(200, 300),
"id" : int,
"filename" : r"re:\d+",
"extension" : {"jpg", "png"},
"file_url" : r"re:https://stizebooru.com/Picture/\d+",
"search_tags": "parody",
},
{
"#url" : "https://sizebooru.com/Galleries/List/7",
"#category": ("booru", "sizebooru", "gallery"),
"#class" : sizebooru.SizebooruGalleryExtractor,
"#pattern" : r"https://sizebooru\.com/Picture/\d+",
"#count" : 103,
"gallery_id" : 7,
"gallery_name": "lilipucien's work",
},
{
"#url" : "https://sizebooru.com/Profile/Uploads/hueyriley",
"#category": ("booru", "sizebooru", "user"),
"#class" : sizebooru.SizebooruUserExtractor,
"#count" : 0,
},
{
"#url" : "https://sizebooru.com/Profile/Uploads/GtsXxx",
"#category": ("booru", "sizebooru", "user"),
"#class" : sizebooru.SizebooruUserExtractor,
"#pattern" : r"https://sizebooru\.com/Picture/\d+",
"#count" : 256,
"user" : "GtsXxx",
},
{
"#url" : "https://sizebooru.com/Profile/Favorites/GtsXxx",
"#category": ("booru", "sizebooru", "favorite"),
"#class" : sizebooru.SizebooruFavoriteExtractor,
"#results" : (
"https://sizebooru.com/Picture/266778",
"https://sizebooru.com/Picture/266385",
"https://sizebooru.com/Picture/266243",
"https://sizebooru.com/Picture/265039",
),
"user" : "GtsXxx",
},
)