[booth] add 'item' extractor (#7920)
This commit is contained in:
@@ -157,6 +157,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>DMs, Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>BOOTH</td>
|
||||
<td>https://booth.pm/</td>
|
||||
<td>Items</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Bunkr</td>
|
||||
<td>https://bunkr.si/</td>
|
||||
|
||||
@@ -35,6 +35,7 @@ modules = [
|
||||
"blogger",
|
||||
"bluesky",
|
||||
"boosty",
|
||||
"booth",
|
||||
"bunkr",
|
||||
"catbox",
|
||||
"chevereto",
|
||||
|
||||
70
gallery_dl/extractor/booth.py
Normal file
70
gallery_dl/extractor/booth.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://booth.pm/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?booth\.pm(?:/\w\w)?"
|
||||
|
||||
|
||||
class BoothExtractor(Extractor):
|
||||
"""Base class for booth extractors"""
|
||||
category = "booth"
|
||||
root = "https://booth.pm"
|
||||
directory_fmt = ("{category}", "{shop[name]}", "{id} {name}")
|
||||
filename_fmt = "{num:>02} {filename}.{extension}"
|
||||
archive_fmt = "{id}_{filename}"
|
||||
|
||||
def _init(self):
|
||||
self.cookies.set("adult", "1", domain=".booth.pm")
|
||||
|
||||
|
||||
class BoothItemExtractor(BoothExtractor):
|
||||
subcategory = "item"
|
||||
pattern = BASE_PATTERN + r"/items/(\d+)"
|
||||
example = "https://booth.pm/items/12345"
|
||||
|
||||
def items(self):
|
||||
url = f"{self.root}/ja/items/{self.groups[0]}.json"
|
||||
item = self.request_json(url)
|
||||
|
||||
item["booth_category"] = item.pop("category", None)
|
||||
item["date"] = text.parse_datetime(
|
||||
item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
item["tags"] = [t["name"] for t in item["tags"]]
|
||||
|
||||
files = self._extract_files(item)
|
||||
item["count"] = len(files)
|
||||
|
||||
yield Message.Directory, item
|
||||
for num, file in enumerate(files, 1):
|
||||
url = file["url"]
|
||||
file["num"] = num
|
||||
text.nameext_from_url(url, file)
|
||||
yield Message.Url, url, {**item, **file}
|
||||
|
||||
def _extract_files(self, item):
|
||||
files = []
|
||||
|
||||
for image in item.pop("images"):
|
||||
url = image["original"].replace("_base_resized", "")
|
||||
files.append({
|
||||
"url" : url,
|
||||
"_fallback": _fallback(url),
|
||||
})
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def _fallback(url):
|
||||
base = url[:-3]
|
||||
yield base + "jpeg"
|
||||
yield base + "png"
|
||||
yield base + "webp"
|
||||
@@ -37,6 +37,7 @@ CATEGORY_MAP = {
|
||||
"baraag" : "baraag",
|
||||
"batoto" : "BATO.TO",
|
||||
"bbc" : "BBC",
|
||||
"booth" : "BOOTH",
|
||||
"cien" : "Ci-en",
|
||||
"cohost" : "cohost!",
|
||||
"comicvine" : "Comic Vine",
|
||||
|
||||
183
test/results/booth.py
Normal file
183
test/results/booth.py
Normal file
@@ -0,0 +1,183 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import booth
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://booth.pm/ja/items/4693741",
|
||||
"#class" : booth.BoothItemExtractor,
|
||||
"#pattern" : r"https://booth.pximg.net/792d497b-6e82-4df3-86de-31577e10f476/i/4693741/[\w-]{36}.jpg",
|
||||
"#count" : 10,
|
||||
|
||||
"booth_category" : {
|
||||
"id" : 56,
|
||||
"name" : "漫画・マンガ",
|
||||
"url" : "https://booth.pm/ja/browse/%E6%BC%AB%E7%94%BB%E3%83%BB%E3%83%9E%E3%83%B3%E3%82%AC",
|
||||
"parent": {
|
||||
"name": "漫画",
|
||||
"url" : "https://booth.pm/ja/browse/%E6%BC%AB%E7%94%BB",
|
||||
},
|
||||
},
|
||||
"buyee_variations": [],
|
||||
"count" : 10,
|
||||
"num" : range(1, 10),
|
||||
"date" : "dt:2023-04-16 14:25:29",
|
||||
"description" : """※※英語版※※
|
||||
【踏切の音はもう聞こえない。の英訳ver.のダウンロード版になります。】
|
||||
【This is the downloadable version of the English translation ver.】
|
||||
|
||||
Goto Hitori is 25 years old.
|
||||
She loses all hope and tries to jump into a railroad crossing.
|
||||
But at that moment, She is transported back in time to that era...?
|
||||
|
||||
The story is spun by the tag-team of manga and music,
|
||||
The story of Bocchi-chan's inspiration and courage.
|
||||
|
||||
The set includes the manga book and a download card for the music (doujin music)!
|
||||
Please enjoy the world of Bocchi ·the·rock! brought to you by "Futari Bocchi no Solitude".
|
||||
|
||||
wano-Twitter
|
||||
https://twitter.com/wano49
|
||||
|
||||
Japanese version
|
||||
https://www.melonbooks.co.jp/detail/detail.php?product_id=1872452""",
|
||||
"embeds" : [],
|
||||
"extension" : "jpg",
|
||||
"factory_description": None,
|
||||
"filename" : str,
|
||||
"gift" : None,
|
||||
"id" : 4693741,
|
||||
"is_adult" : False,
|
||||
"is_buyee_possible": False,
|
||||
"is_end_of_sale" : False,
|
||||
"is_placeholder" : False,
|
||||
"is_sold_out" : False,
|
||||
"name" : "※英語版※ I can no longer hear the railway crossing.【Bocchi the rock!】",
|
||||
"order" : None,
|
||||
"price" : "¥ 700",
|
||||
"published_at" : "2023-04-16T23:25:29.000+09:00",
|
||||
"purchase_limit" : None,
|
||||
"report_url" : "https://wanoazayaka.booth.pm/items/4693741/report",
|
||||
"share" : {
|
||||
"hashtags": ["booth_pm"],
|
||||
"text" : "※英語版※ I can no longer hear the railway crossing.【Bocchi the rock!】 | ふたりぼっちのSolitude",
|
||||
},
|
||||
"shipping_info" : "支払いから発送までの日数:4日以内",
|
||||
"shop" : {
|
||||
"name" : "ふたりぼっちのSolitude",
|
||||
"subdomain" : "wanoazayaka",
|
||||
"thumbnail_url": "https://booth.pximg.net/c/48x48/users/5742915/icon_image/1448e5d8-f93f-445e-8e1e-acb29aa45aa4_base_resized.jpg",
|
||||
"url" : "https://wanoazayaka.booth.pm/",
|
||||
"verified" : False,
|
||||
},
|
||||
"small_stock" : None,
|
||||
"sound" : None,
|
||||
"tracks" : None,
|
||||
"url" : str,
|
||||
"wish_list_url" : "https://booth.pm/items/4693741/wish_list",
|
||||
"wish_lists_count": range(80, 120),
|
||||
"wished" : False,
|
||||
"tag_banners" : "len:list:5",
|
||||
"tag_combination" : {
|
||||
"category": "漫画・マンガ",
|
||||
"tag" : "ぼっち・ざ・ろっく!",
|
||||
"url" : "https://booth.pm/ja/browse/%E6%BC%AB%E7%94%BB%E3%83%BB%E3%83%9E%E3%83%B3%E3%82%AC?tags%5B%5D=%E3%81%BC%E3%81%A3%E3%81%A1%E3%83%BB%E3%81%96%E3%83%BB%E3%82%8D%E3%81%A3%E3%81%8F%21",
|
||||
},
|
||||
"tags" : [
|
||||
"ぼっち・ざ・ろっく!",
|
||||
"ぼっちざろっく",
|
||||
"ぼっち・ざ・ろっく",
|
||||
"Bocchi the Rock!",
|
||||
"BocchiTheRock",
|
||||
],
|
||||
"variations" : [{
|
||||
"buyee_html" : None,
|
||||
"downloadable" : None,
|
||||
"factory_image_url": None,
|
||||
"has_download_code": False,
|
||||
"id" : 7869860,
|
||||
"is_anshin_booth_pack": False,
|
||||
"is_empty_allocatable_stock_with_preorder": False,
|
||||
"is_empty_stock" : False,
|
||||
"is_factory_item": False,
|
||||
"is_mailbin" : False,
|
||||
"is_waiting_on_arrival": False,
|
||||
"name" : None,
|
||||
"order_url" : None,
|
||||
"price" : 700,
|
||||
"small_stock" : None,
|
||||
"status" : "addable_to_cart",
|
||||
"type" : "digital",
|
||||
}],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://caramel-crunch.booth.pm/items/7236173?utm_source=pixiv&utm_medium=popboard&utm_campaign=popboard",
|
||||
"#class" : booth.BoothItemExtractor,
|
||||
"#results" : (
|
||||
"https://booth.pximg.net/74488d0d-e533-443c-82ce-fa961d5cbaf0/i/7236173/131bf61c-0534-4af3-9408-f19f08cb3622.jpg",
|
||||
"https://booth.pximg.net/74488d0d-e533-443c-82ce-fa961d5cbaf0/i/7236173/fb65233a-7a93-4219-ba9f-b63e11329fda.jpg",
|
||||
"https://booth.pximg.net/74488d0d-e533-443c-82ce-fa961d5cbaf0/i/7236173/e18c16a0-b285-4cd8-aacc-6b3c4f4c6ce3.jpg",
|
||||
),
|
||||
|
||||
"count" : 3,
|
||||
"date" : "dt:2025-07-28 07:00:43",
|
||||
"description" : """C106新作おっぱいマウスパッドです
|
||||
コミケ開始時間に合わせてカート開放します
|
||||
■お届け9月中旬頃~予定
|
||||
印刷:熱転写
|
||||
素材:表面/SuperSmooth Fabric 裏面/PUゲル
|
||||
|
||||
乳首パーツ付き
|
||||
ブリスターパック封入
|
||||
|
||||
納品済みの為数量限定です。
|
||||
数がなくなり次第終了となります。
|
||||
|
||||
当日コミケにも持ち込みます。
|
||||
2日目,東7ホール A26ab CARAMEL CRUNCH!""",
|
||||
"id" : 7236173,
|
||||
"is_adult" : True,
|
||||
"is_buyee_possible": False,
|
||||
"is_end_of_sale" : False,
|
||||
"is_placeholder" : False,
|
||||
"is_sold_out" : False,
|
||||
"name" : "こ〇ちゃんおっぱいマウスパッド(乳首パーツ付き)",
|
||||
"price" : "¥ 6,500",
|
||||
"published_at" : "2025-07-28T16:00:43.000+09:00",
|
||||
"purchase_limit" : 1,
|
||||
"shipping_info" : "支払いから発送までの日数:7日以内",
|
||||
"booth_category" : {
|
||||
"id" : 171,
|
||||
"name" : "マウスパッド",
|
||||
"url" : "https://booth.pm/ja/browse/%E3%83%9E%E3%82%A6%E3%82%B9%E3%83%91%E3%83%83%E3%83%89",
|
||||
"parent": {
|
||||
"name": "グッズ",
|
||||
"url" : "https://booth.pm/ja/browse/%E3%82%B0%E3%83%83%E3%82%BA",
|
||||
},
|
||||
},
|
||||
"shop" : {
|
||||
"name" : "CARAMEL CRUNCH!",
|
||||
"subdomain" : "caramel-crunch",
|
||||
"thumbnail_url": "https://booth.pximg.net/c/48x48/users/49832/icon_image/a240e313-6a0f-4155-8310-a0d6abb299e6_base_resized.jpg",
|
||||
"url" : "https://caramel-crunch.booth.pm/",
|
||||
"verified" : False,
|
||||
},
|
||||
"tag_combination" : {
|
||||
"category": "マウスパッド",
|
||||
"tag" : "おっぱいマウスパッド",
|
||||
"url" : "https://booth.pm/ja/browse/%E3%83%9E%E3%82%A6%E3%82%B9%E3%83%91%E3%83%83%E3%83%89?tags%5B%5D=%E3%81%8A%E3%81%A3%E3%81%B1%E3%81%84%E3%83%9E%E3%82%A6%E3%82%B9%E3%83%91%E3%83%83%E3%83%89",
|
||||
},
|
||||
"tags" : [
|
||||
"おっぱいマウスパッド",
|
||||
"C106",
|
||||
"c106新作",
|
||||
],
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user