[sizebooru] add support (#7667)
This commit is contained in:
@@ -436,6 +436,7 @@ Default
|
||||
``pornpics``,
|
||||
``schalenetwork``,
|
||||
``scrolller``,
|
||||
``sizebooru``,
|
||||
``soundgasm``,
|
||||
``urlgalleries``,
|
||||
``vk``,
|
||||
@@ -4777,6 +4778,28 @@ Description
|
||||
Download animated images as ``.gif`` instead of ``.webp``
|
||||
|
||||
|
||||
extractor.sizebooru.metadata
|
||||
----------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Extract additional metadata:
|
||||
|
||||
* ``approver``
|
||||
* ``artist``
|
||||
* ``date``
|
||||
* ``date_approved``
|
||||
* ``favorite``
|
||||
* ``source``
|
||||
* ``tags``
|
||||
* ``uploader``
|
||||
* ``views``
|
||||
Note
|
||||
This requires 1 additional HTTP request per post.
|
||||
|
||||
|
||||
extractor.skeb.article
|
||||
----------------------
|
||||
Type
|
||||
@@ -8074,20 +8097,21 @@ Default
|
||||
.. code:: json
|
||||
|
||||
{
|
||||
"coomerparty" : "coomer",
|
||||
"kemonoparty" : "kemono",
|
||||
"koharu" : "schalenetwork",
|
||||
"chzzk" : "naver-chzzk",
|
||||
"naver" : "naver-blog",
|
||||
"naverwebtoon": "naver-webtoon",
|
||||
"pixiv" : "pixiv-novel"
|
||||
"coomerparty" : "coomer",
|
||||
"kemonoparty" : "kemono",
|
||||
"giantessbooru": "sizebooru",
|
||||
"koharu" : "schalenetwork",
|
||||
"chzzk" : "naver-chzzk",
|
||||
"naver" : "naver-blog",
|
||||
"naverwebtoon" : "naver-webtoon",
|
||||
"pixiv" : "pixiv-novel"
|
||||
}
|
||||
Description
|
||||
Duplicate the configuration settings of extractor `categories`
|
||||
to other names.
|
||||
|
||||
For example, a ``"naver": "naver-blog"`` key-value pair will make all
|
||||
``naver`` config settings available for ´´naver-blog´´ extractors as well.
|
||||
``naver`` config settings available for ``naver-blog`` extractors as well.
|
||||
|
||||
|
||||
jinja.environment
|
||||
|
||||
@@ -91,13 +91,14 @@
|
||||
|
||||
"category-map": {},
|
||||
"config-map": {
|
||||
"coomerparty" : "coomer",
|
||||
"kemonoparty" : "kemono",
|
||||
"koharu" : "schalenetwork",
|
||||
"chzzk" : "naver-chzzk",
|
||||
"naver" : "naver-blog",
|
||||
"naverwebtoon": "naver-webtoon",
|
||||
"pixiv" : "pixiv-novel"
|
||||
"coomerparty" : "coomer",
|
||||
"kemonoparty" : "kemono",
|
||||
"giantessbooru": "sizebooru",
|
||||
"koharu" : "schalenetwork",
|
||||
"chzzk" : "naver-chzzk",
|
||||
"naver" : "naver-blog",
|
||||
"naverwebtoon" : "naver-webtoon",
|
||||
"pixiv" : "pixiv-novel"
|
||||
},
|
||||
|
||||
|
||||
@@ -679,6 +680,12 @@
|
||||
{
|
||||
"gifs": true
|
||||
},
|
||||
"sizebooru":
|
||||
{
|
||||
"sleep-request": "0.5-1.5",
|
||||
|
||||
"metadata": false
|
||||
},
|
||||
"skeb":
|
||||
{
|
||||
"article" : false,
|
||||
|
||||
@@ -877,6 +877,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Galleries, individual Images, Videos</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="sizebooru" title="sizebooru">
|
||||
<td>Size Booru</td>
|
||||
<td>https://sizebooru.com/</td>
|
||||
<td>Favorites, Galleries, Posts, Tag Searches, User Uploads</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="skeb" title="skeb">
|
||||
<td>Skeb</td>
|
||||
<td>https://skeb.jp/</td>
|
||||
|
||||
@@ -169,6 +169,7 @@ def remap_categories():
|
||||
cmap = (
|
||||
("coomerparty" , "coomer"),
|
||||
("kemonoparty" , "kemono"),
|
||||
("giantessbooru", "sizebooru"),
|
||||
("koharu" , "schalenetwork"),
|
||||
("naver" , "naver-blog"),
|
||||
("chzzk" , "naver-chzzk"),
|
||||
|
||||
@@ -170,6 +170,7 @@ modules = [
|
||||
"sexcom",
|
||||
"shimmie2",
|
||||
"simplyhentai",
|
||||
"sizebooru",
|
||||
"skeb",
|
||||
"slickpic",
|
||||
"slideshare",
|
||||
|
||||
@@ -52,7 +52,8 @@ class BooruExtractor(BaseExtractor):
|
||||
if notes:
|
||||
self._notes(post, html)
|
||||
|
||||
text.nameext_from_url(url, post)
|
||||
if "extension" not in post:
|
||||
text.nameext_from_url(url, post)
|
||||
post.update(data)
|
||||
self._prepare(post)
|
||||
|
||||
|
||||
162
gallery_dl/extractor/sizebooru.py
Normal file
162
gallery_dl/extractor/sizebooru.py
Normal file
@@ -0,0 +1,162 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://sizebooru.com/"""
|
||||
|
||||
from .booru import BooruExtractor
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?sizebooru\.com"
|
||||
|
||||
|
||||
class SizebooruExtractor(BooruExtractor):
|
||||
"""Base class for sizebooru extractors"""
|
||||
category = "sizebooru"
|
||||
root = "https://sizebooru.com"
|
||||
filename_fmt = "{id}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
page_start = 1
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
def _init(self):
|
||||
if self.config("metadata", False):
|
||||
self._prepare = self._prepare_metadata
|
||||
|
||||
def _file_url(self, post):
|
||||
post["file_url"] = url = f"{self.root}/Picture/{post['id']}"
|
||||
return url
|
||||
|
||||
def _prepare(self, post):
|
||||
post_id = post["id"]
|
||||
post["id"] = text.parse_int(post_id)
|
||||
post["filename"] = post_id
|
||||
if not post["extension"]:
|
||||
post["extension"] = "jpg"
|
||||
|
||||
def _prepare_metadata(self, post):
|
||||
post_id = post["id"]
|
||||
url = f"{self.root}/Details/{post_id}"
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
|
||||
post.update({
|
||||
"id" : text.parse_int(post_id),
|
||||
"date" : text.parse_datetime(
|
||||
extr("<b>Posted Date:</b> ", "<"), "%m/%d/%Y"),
|
||||
"date_approved": text.parse_datetime(
|
||||
extr("<b>Approved Date:</b> ", "<"), "%m/%d/%Y"),
|
||||
"approver" : text.remove_html(extr("<b>Approved By:</b>", "</")),
|
||||
"uploader" : text.remove_html(extr("<b>Posted By:</b>", "</")),
|
||||
"artist" : None
|
||||
if (artist := extr("<b>Artist:</b> ", "</")) == "N/A" else # noqa: E131 E501
|
||||
text.remove_html(artist), # noqa: E131
|
||||
"views" : text.parse_int(extr("<b>Views:</b>", "<")),
|
||||
"source" : text.extr(extr(
|
||||
"<b>Source Link:</b>", "</"), ' href="', '"') or None,
|
||||
"tags" : text.split_html(extr(
|
||||
"<h6>Related Tags</h6>", "</ul>")),
|
||||
"favorite" : text.split_html(extr(
|
||||
"<h6>Favorited By</h6>", "</ul>")),
|
||||
})
|
||||
|
||||
post["filename"], _, ext = extr('" alt="', '"').rpartition(".")
|
||||
if not post["extension"]:
|
||||
post["extension"] = ext.lower()
|
||||
|
||||
return post
|
||||
|
||||
def _pagination(self, url, callback=None):
|
||||
params = {
|
||||
"pageNo" : self.page_start,
|
||||
"pageSize": self.per_page,
|
||||
}
|
||||
|
||||
page = self.request(url, params=params).text
|
||||
if callback is not None:
|
||||
callback(page)
|
||||
|
||||
while True:
|
||||
thumb = None
|
||||
for thumb in text.extract_iter(
|
||||
page, '<a href="/Details/', ';base64'):
|
||||
yield {
|
||||
"id" : thumb[:thumb.find('"')],
|
||||
"extension": thumb[thumb.rfind("/")+1:],
|
||||
}
|
||||
|
||||
if "disabled" in text.extr(page, 'area-label="Next"', ">") or \
|
||||
thumb is None:
|
||||
return
|
||||
params["pageNo"] += 1
|
||||
page = self.request(url, params=params).text
|
||||
|
||||
|
||||
class SizebooruPostExtractor(SizebooruExtractor):
|
||||
"""Extractor for sizebooru posts"""
|
||||
subcategory = "post"
|
||||
pattern = rf"{BASE_PATTERN}/Details/(\d+)"
|
||||
example = "https://sizebooru.com/Details/12345"
|
||||
|
||||
def posts(self):
|
||||
return ({"id": self.groups[0], "extension": None},)
|
||||
|
||||
|
||||
class SizebooruTagExtractor(SizebooruExtractor):
|
||||
"""Extractor for sizebooru tag searches"""
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
pattern = rf"{BASE_PATTERN}/Search/([^/?#]+)"
|
||||
example = "https://sizebooru.com/Search/TAG"
|
||||
|
||||
def posts(self):
|
||||
tag = self.groups[0]
|
||||
self.kwdict["search_tags"] = text.unquote(tag)
|
||||
return self._pagination(f"{self.root}/Search/{tag}")
|
||||
|
||||
|
||||
class SizebooruGalleryExtractor(SizebooruExtractor):
|
||||
"""Extractor for sizebooru galleries"""
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ("{category}", "{gallery_name} ({gallery_id})")
|
||||
pattern = rf"{BASE_PATTERN}/Galleries/List/(\d+)"
|
||||
example = "https://sizebooru.com/Galleries/List/123"
|
||||
|
||||
def posts(self):
|
||||
gid = self.groups[0]
|
||||
self.kwdict["gallery_id"] = text.parse_int(gid)
|
||||
return self._pagination(
|
||||
f"{self.root}/Galleries/List/{gid}", self._extract_name)
|
||||
|
||||
def _extract_name(self, page):
|
||||
self.kwdict["gallery_name"] = text.unescape(text.extr(
|
||||
page, "<title>Gallery: ", " - Size Booru<"))
|
||||
|
||||
|
||||
class SizebooruUserExtractor(SizebooruExtractor):
|
||||
"""Extractor for a sizebooru user's uploads"""
|
||||
subcategory = "user"
|
||||
directory_fmt = ("{category}", "Uploads {user}")
|
||||
pattern = rf"{BASE_PATTERN}/Profile/Uploads/([^/?#]+)"
|
||||
example = "https://sizebooru.com/Profile/Uploads/USER"
|
||||
|
||||
def posts(self):
|
||||
user = self.groups[0]
|
||||
self.kwdict["user"] = text.unquote(user)
|
||||
return self._pagination(f"{self.root}/Profile/Uploads/{user}",)
|
||||
|
||||
|
||||
class SizebooruFavoriteExtractor(SizebooruExtractor):
|
||||
"""Extractor for a sizebooru user's favorites"""
|
||||
subcategory = "favorite"
|
||||
directory_fmt = ("{category}", "Favorites {user}")
|
||||
pattern = rf"{BASE_PATTERN}/Profile/Favorites/([^/?#]+)"
|
||||
example = "https://sizebooru.com/Profile/Favorites/USER"
|
||||
|
||||
def posts(self):
|
||||
user = self.groups[0]
|
||||
self.kwdict["user"] = text.unquote(user)
|
||||
return self._pagination(f"{self.root}/Profile/Favorites/{user}",)
|
||||
@@ -159,6 +159,7 @@ CATEGORY_MAP = {
|
||||
"sexcom" : "Sex.com",
|
||||
"silverpic" : "SilverPic.com",
|
||||
"simplyhentai" : "Simply Hentai",
|
||||
"sizebooru" : "Size Booru",
|
||||
"slickpic" : "SlickPic",
|
||||
"slideshare" : "SlideShare",
|
||||
"smugmug" : "SmugMug",
|
||||
@@ -391,6 +392,9 @@ SUBCATEGORY_MAP = {
|
||||
"sexcom": {
|
||||
"pins": "User Pins",
|
||||
},
|
||||
"sizebooru": {
|
||||
"user": "User Uploads",
|
||||
},
|
||||
"skeb": {
|
||||
"following" : "Followed Creators",
|
||||
"following-users": "Followed Users",
|
||||
|
||||
174
test/results/sizebooru.py
Normal file
174
test/results/sizebooru.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import sizebooru
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Details/283342",
|
||||
"#class" : sizebooru.SizebooruPostExtractor,
|
||||
"#results" : "https://sizebooru.com/Picture/283342",
|
||||
"#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515",
|
||||
|
||||
"id" : 283342,
|
||||
"filename" : "283342",
|
||||
"extension" : "jpg",
|
||||
"file_url" : "https://sizebooru.com/Picture/283342",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Details/283342",
|
||||
"#class" : sizebooru.SizebooruPostExtractor,
|
||||
"#options" : {"metadata": True},
|
||||
"#results" : "https://sizebooru.com/Picture/283342",
|
||||
"#sha1_content": "ae8bcbe95d58ba8ed4f33fe017088c9ec0f09515",
|
||||
|
||||
"approver" : "Mr_Red",
|
||||
"artist" : None,
|
||||
"date" : "dt:2025-07-30 00:00:00",
|
||||
"date_approved": "dt:2025-08-01 00:00:00",
|
||||
"extension" : "jpg",
|
||||
"file_url" : "https://sizebooru.com/Picture/283342",
|
||||
"filename" : "Gnlib9eaMAAXtfQ",
|
||||
"id" : 283342,
|
||||
"source" : "https://x.com/kashmimo/status/1907664168381255942",
|
||||
"uploader" : "Shadow_Blaze_23",
|
||||
"views" : range(200, 900),
|
||||
"favorite" : [
|
||||
"GTSfan295",
|
||||
"Zephyr",
|
||||
"HeroDjango",
|
||||
],
|
||||
"tags" : [
|
||||
"drawing",
|
||||
"giantess",
|
||||
"pokemon",
|
||||
"blushing",
|
||||
"black_hair",
|
||||
"color",
|
||||
"long_hair",
|
||||
"sweat",
|
||||
"parody",
|
||||
"shrunken_man",
|
||||
"hat",
|
||||
"orange_hair",
|
||||
"looking_at_tiny",
|
||||
"leaf_(pokemon)",
|
||||
"kashmimo",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Details/2",
|
||||
"#class" : sizebooru.SizebooruPostExtractor,
|
||||
"#options" : {"metadata": True},
|
||||
"#results" : "https://sizebooru.com/Picture/2",
|
||||
|
||||
"approver" : "Giantessbooru",
|
||||
"artist" : None,
|
||||
"date" : "dt:2010-11-26 00:00:00",
|
||||
"date_approved": "dt:2010-11-26 00:00:00",
|
||||
"extension" : "jpg",
|
||||
"file_url" : "https://sizebooru.com/Picture/2",
|
||||
"filename" : "10000 - tagme",
|
||||
"id" : 2,
|
||||
"source" : None,
|
||||
"uploader" : "Giantess-7of9",
|
||||
"views" : range(40, 200),
|
||||
"favorite" : list,
|
||||
"tags" : [
|
||||
"breasts",
|
||||
"gentle",
|
||||
"nude",
|
||||
"black_hair",
|
||||
"long_hair",
|
||||
"brunette",
|
||||
"hand",
|
||||
"shrunken_man",
|
||||
"indoors",
|
||||
"digital_render",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Details/283318",
|
||||
"#class" : sizebooru.SizebooruPostExtractor,
|
||||
"#options" : {"metadata": True},
|
||||
"#results" : "https://sizebooru.com/Picture/283318",
|
||||
|
||||
"approver" : "Mr_Red",
|
||||
"artist" : "megamaliit",
|
||||
"date" : "dt:2025-07-26 00:00:00",
|
||||
"date_approved": "dt:2025-07-26 00:00:00",
|
||||
"extension" : "png",
|
||||
"file_url" : "https://sizebooru.com/Picture/283318",
|
||||
"filename" : "big babes of bed rock",
|
||||
"id" : 283318,
|
||||
"source" : "https://www.deviantart.com/megamaliit/art/Big-Babes-of-Bed-Rock-AT-845335093",
|
||||
"uploader" : "Mr_Red",
|
||||
"views" : int,
|
||||
"favorite" : list,
|
||||
"tags" : list,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Search/parody",
|
||||
"#category": ("booru", "sizebooru", "tag"),
|
||||
"#class" : sizebooru.SizebooruTagExtractor,
|
||||
"#pattern" : r"https://sizebooru\.com/Picture/\d+",
|
||||
"#count" : range(200, 300),
|
||||
|
||||
"id" : int,
|
||||
"filename" : r"re:\d+",
|
||||
"extension" : {"jpg", "png"},
|
||||
"file_url" : r"re:https://stizebooru.com/Picture/\d+",
|
||||
"search_tags": "parody",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Galleries/List/7",
|
||||
"#category": ("booru", "sizebooru", "gallery"),
|
||||
"#class" : sizebooru.SizebooruGalleryExtractor,
|
||||
"#pattern" : r"https://sizebooru\.com/Picture/\d+",
|
||||
"#count" : 103,
|
||||
|
||||
"gallery_id" : 7,
|
||||
"gallery_name": "lilipucien's work",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Profile/Uploads/hueyriley",
|
||||
"#category": ("booru", "sizebooru", "user"),
|
||||
"#class" : sizebooru.SizebooruUserExtractor,
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Profile/Uploads/GtsXxx",
|
||||
"#category": ("booru", "sizebooru", "user"),
|
||||
"#class" : sizebooru.SizebooruUserExtractor,
|
||||
"#pattern" : r"https://sizebooru\.com/Picture/\d+",
|
||||
"#count" : 256,
|
||||
|
||||
"user" : "GtsXxx",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sizebooru.com/Profile/Favorites/GtsXxx",
|
||||
"#category": ("booru", "sizebooru", "favorite"),
|
||||
"#class" : sizebooru.SizebooruFavoriteExtractor,
|
||||
"#results" : (
|
||||
"https://sizebooru.com/Picture/266778",
|
||||
"https://sizebooru.com/Picture/266385",
|
||||
"https://sizebooru.com/Picture/266243",
|
||||
"https://sizebooru.com/Picture/265039",
|
||||
),
|
||||
|
||||
"user" : "GtsXxx",
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user