[booth] add 'item' extractor (#7920)

This commit is contained in:
Mike Fährmann
2025-07-28 21:43:45 +02:00
parent a26d71c228
commit 8aa2fe33d5
5 changed files with 261 additions and 0 deletions

View File

@@ -157,6 +157,12 @@ Consider all listed sites to potentially be NSFW.
<td>DMs, Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
</tr>
<tr>
<td>BOOTH</td>
<td>https://booth.pm/</td>
<td>Items</td>
<td></td>
</tr>
<tr>
<td>Bunkr</td>
<td>https://bunkr.si/</td>

View File

@@ -35,6 +35,7 @@ modules = [
"blogger",
"bluesky",
"boosty",
"booth",
"bunkr",
"catbox",
"chevereto",

View File

@@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://booth.pm/"""
from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?booth\.pm(?:/\w\w)?"
class BoothExtractor(Extractor):
"""Base class for booth extractors"""
category = "booth"
root = "https://booth.pm"
directory_fmt = ("{category}", "{shop[name]}", "{id} {name}")
filename_fmt = "{num:>02} {filename}.{extension}"
archive_fmt = "{id}_{filename}"
def _init(self):
self.cookies.set("adult", "1", domain=".booth.pm")
class BoothItemExtractor(BoothExtractor):
subcategory = "item"
pattern = BASE_PATTERN + r"/items/(\d+)"
example = "https://booth.pm/items/12345"
def items(self):
url = f"{self.root}/ja/items/{self.groups[0]}.json"
item = self.request_json(url)
item["booth_category"] = item.pop("category", None)
item["date"] = text.parse_datetime(
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
item["tags"] = [t["name"] for t in item["tags"]]
files = self._extract_files(item)
item["count"] = len(files)
yield Message.Directory, item
for num, file in enumerate(files, 1):
url = file["url"]
file["num"] = num
text.nameext_from_url(url, file)
yield Message.Url, url, {**item, **file}
def _extract_files(self, item):
files = []
for image in item.pop("images"):
url = image["original"].replace("_base_resized", "")
files.append({
"url" : url,
"_fallback": _fallback(url),
})
return files
def _fallback(url):
base = url[:-3]
yield base + "jpeg"
yield base + "png"
yield base + "webp"

View File

@@ -37,6 +37,7 @@ CATEGORY_MAP = {
"baraag" : "baraag",
"batoto" : "BATO.TO",
"bbc" : "BBC",
"booth" : "BOOTH",
"cien" : "Ci-en",
"cohost" : "cohost!",
"comicvine" : "Comic Vine",

183
test/results/booth.py Normal file
View File

@@ -0,0 +1,183 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import booth
__tests__ = (
{
"#url" : "https://booth.pm/ja/items/4693741",
"#class" : booth.BoothItemExtractor,
"#pattern" : r"https://booth.pximg.net/792d497b-6e82-4df3-86de-31577e10f476/i/4693741/[\w-]{36}.jpg",
"#count" : 10,
"booth_category" : {
"id" : 56,
"name" : "漫画・マンガ",
"url" : "https://booth.pm/ja/browse/%E6%BC%AB%E7%94%BB%E3%83%BB%E3%83%9E%E3%83%B3%E3%82%AC",
"parent": {
"name": "漫画",
"url" : "https://booth.pm/ja/browse/%E6%BC%AB%E7%94%BB",
},
},
"buyee_variations": [],
"count" : 10,
"num" : range(1, 10),
"date" : "dt:2023-04-16 14:25:29",
"description" : """※※英語版※※
【踏切の音はもう聞こえない。の英訳ver.のダウンロード版になります。】
【This is the downloadable version of the English translation ver.】
Goto Hitori is 25 years old.
She loses all hope and tries to jump into a railroad crossing.
But at that moment, She is transported back in time to that era...?
The story is spun by the tag-team of manga and music,
The story of Bocchi-chan's inspiration and courage.
The set includes the manga book and a download card for the music (doujin music)!
Please enjoy the world of Bocchi ·the·rock! brought to you by "Futari Bocchi no Solitude".
wano-Twitter
https://twitter.com/wano49
Japanese version
https://www.melonbooks.co.jp/detail/detail.php?product_id=1872452""",
"embeds" : [],
"extension" : "jpg",
"factory_description": None,
"filename" : str,
"gift" : None,
"id" : 4693741,
"is_adult" : False,
"is_buyee_possible": False,
"is_end_of_sale" : False,
"is_placeholder" : False,
"is_sold_out" : False,
"name" : "※英語版※ I can no longer hear the railway crossing.【Bocchi the rock!】",
"order" : None,
"price" : "¥ 700",
"published_at" : "2023-04-16T23:25:29.000+09:00",
"purchase_limit" : None,
"report_url" : "https://wanoazayaka.booth.pm/items/4693741/report",
"share" : {
"hashtags": ["booth_pm"],
"text" : "※英語版※ I can no longer hear the railway crossing.【Bocchi the rock!】 | ふたりぼっちのSolitude",
},
"shipping_info" : "支払いから発送までの日数4日以内",
"shop" : {
"name" : "ふたりぼっちのSolitude",
"subdomain" : "wanoazayaka",
"thumbnail_url": "https://booth.pximg.net/c/48x48/users/5742915/icon_image/1448e5d8-f93f-445e-8e1e-acb29aa45aa4_base_resized.jpg",
"url" : "https://wanoazayaka.booth.pm/",
"verified" : False,
},
"small_stock" : None,
"sound" : None,
"tracks" : None,
"url" : str,
"wish_list_url" : "https://booth.pm/items/4693741/wish_list",
"wish_lists_count": range(80, 120),
"wished" : False,
"tag_banners" : "len:list:5",
"tag_combination" : {
"category": "漫画・マンガ",
"tag" : "ぼっち・ざ・ろっく!",
"url" : "https://booth.pm/ja/browse/%E6%BC%AB%E7%94%BB%E3%83%BB%E3%83%9E%E3%83%B3%E3%82%AC?tags%5B%5D=%E3%81%BC%E3%81%A3%E3%81%A1%E3%83%BB%E3%81%96%E3%83%BB%E3%82%8D%E3%81%A3%E3%81%8F%21",
},
"tags" : [
"ぼっち・ざ・ろっく!",
"ぼっちざろっく",
"ぼっち・ざ・ろっく",
"Bocchi the Rock!",
"BocchiTheRock",
],
"variations" : [{
"buyee_html" : None,
"downloadable" : None,
"factory_image_url": None,
"has_download_code": False,
"id" : 7869860,
"is_anshin_booth_pack": False,
"is_empty_allocatable_stock_with_preorder": False,
"is_empty_stock" : False,
"is_factory_item": False,
"is_mailbin" : False,
"is_waiting_on_arrival": False,
"name" : None,
"order_url" : None,
"price" : 700,
"small_stock" : None,
"status" : "addable_to_cart",
"type" : "digital",
}],
},
{
"#url" : "https://caramel-crunch.booth.pm/items/7236173?utm_source=pixiv&utm_medium=popboard&utm_campaign=popboard",
"#class" : booth.BoothItemExtractor,
"#results" : (
"https://booth.pximg.net/74488d0d-e533-443c-82ce-fa961d5cbaf0/i/7236173/131bf61c-0534-4af3-9408-f19f08cb3622.jpg",
"https://booth.pximg.net/74488d0d-e533-443c-82ce-fa961d5cbaf0/i/7236173/fb65233a-7a93-4219-ba9f-b63e11329fda.jpg",
"https://booth.pximg.net/74488d0d-e533-443c-82ce-fa961d5cbaf0/i/7236173/e18c16a0-b285-4cd8-aacc-6b3c4f4c6ce3.jpg",
),
"count" : 3,
"date" : "dt:2025-07-28 07:00:43",
"description" : """C106新作おっぱいマウスパッドです
コミケ開始時間に合わせてカート開放します
■お届け9月中旬頃予定
印刷:熱転写
素材:表面/SuperSmooth Fabric 裏面/PUゲル
乳首パーツ付き
ブリスターパック封入
納品済みの為数量限定です。
数がなくなり次第終了となります。
当日コミケにも持ち込みます。
2日目,東7ホール 26ab CARAMEL CRUNCH!""",
"id" : 7236173,
"is_adult" : True,
"is_buyee_possible": False,
"is_end_of_sale" : False,
"is_placeholder" : False,
"is_sold_out" : False,
"name" : "こ〇ちゃんおっぱいマウスパッド(乳首パーツ付き)",
"price" : "¥ 6,500",
"published_at" : "2025-07-28T16:00:43.000+09:00",
"purchase_limit" : 1,
"shipping_info" : "支払いから発送までの日数7日以内",
"booth_category" : {
"id" : 171,
"name" : "マウスパッド",
"url" : "https://booth.pm/ja/browse/%E3%83%9E%E3%82%A6%E3%82%B9%E3%83%91%E3%83%83%E3%83%89",
"parent": {
"name": "グッズ",
"url" : "https://booth.pm/ja/browse/%E3%82%B0%E3%83%83%E3%82%BA",
},
},
"shop" : {
"name" : " ",
"subdomain" : "caramel-crunch",
"thumbnail_url": "https://booth.pximg.net/c/48x48/users/49832/icon_image/a240e313-6a0f-4155-8310-a0d6abb299e6_base_resized.jpg",
"url" : "https://caramel-crunch.booth.pm/",
"verified" : False,
},
"tag_combination" : {
"category": "マウスパッド",
"tag" : "おっぱいマウスパッド",
"url" : "https://booth.pm/ja/browse/%E3%83%9E%E3%82%A6%E3%82%B9%E3%83%91%E3%83%83%E3%83%89?tags%5B%5D=%E3%81%8A%E3%81%A3%E3%81%B1%E3%81%84%E3%83%9E%E3%82%A6%E3%82%B9%E3%83%91%E3%83%83%E3%83%89",
},
"tags" : [
"おっぱいマウスパッド",
"C106",
"c106新作",
],
},
)