[booth] add 'shop' extractor (#7920)
This commit is contained in:
@@ -386,6 +386,7 @@ Default
|
||||
* ``"0.5-1.5"``
|
||||
``ao3``,
|
||||
``arcalive``,
|
||||
``booth``,
|
||||
``civitai``,
|
||||
``[Danbooru]``,
|
||||
``[E621]``,
|
||||
|
||||
@@ -182,6 +182,10 @@
|
||||
"metadata": false,
|
||||
"videos" : true
|
||||
},
|
||||
"booth":
|
||||
{
|
||||
"sleep-request": "0.5-1.5"
|
||||
},
|
||||
"bunkr":
|
||||
{
|
||||
"endpoint": "/api/_001_v2",
|
||||
|
||||
@@ -160,7 +160,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>BOOTH</td>
|
||||
<td>https://booth.pm/</td>
|
||||
<td>Items</td>
|
||||
<td>Items, Shops</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
@@ -9,9 +9,7 @@
|
||||
"""Extractors for https://booth.pm/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?booth\.pm(?:/\w\w)?"
|
||||
from .. import text, util
|
||||
|
||||
|
||||
class BoothExtractor(Extractor):
|
||||
@@ -21,14 +19,32 @@ class BoothExtractor(Extractor):
|
||||
directory_fmt = ("{category}", "{shop[name]}", "{id} {name}")
|
||||
filename_fmt = "{num:>02} {filename}.{extension}"
|
||||
archive_fmt = "{id}_{filename}"
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
def _init(self):
|
||||
self.cookies.set("adult", "1", domain=".booth.pm")
|
||||
|
||||
def items(self):
|
||||
for item in self.shop_items():
|
||||
item["_extractor"] = BoothItemExtractor
|
||||
yield Message.Queue, item["shop_item_url"], item
|
||||
|
||||
def _pagination(self, url):
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
|
||||
for item in text.extract_iter(page, ' data-item="', '"'):
|
||||
yield util.json_loads(text.unescape(item))
|
||||
|
||||
next = text.extr(page, 'rel="next" class="nav-item" href="', '"')
|
||||
if not next:
|
||||
break
|
||||
url = self.root + next
|
||||
|
||||
|
||||
class BoothItemExtractor(BoothExtractor):
|
||||
subcategory = "item"
|
||||
pattern = BASE_PATTERN + r"/items/(\d+)"
|
||||
pattern = r"(?:https?://)?(?:[\w-]+\.)?booth\.pm/(?:\w\w/)?items/(\d+)"
|
||||
example = "https://booth.pm/items/12345"
|
||||
|
||||
def items(self):
|
||||
@@ -63,6 +79,19 @@ class BoothItemExtractor(BoothExtractor):
|
||||
return files
|
||||
|
||||
|
||||
class BoothShopExtractor(BoothExtractor):
|
||||
subcategory = "shop"
|
||||
pattern = r"(?:https?://)?([\w-]+\.)booth\.pm/(?:\w\w/)?(?:items)?"
|
||||
example = "https://SHOP.booth.pm/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match[0])
|
||||
BoothExtractor.__init__(self, match)
|
||||
|
||||
def shop_items(self):
|
||||
return self._pagination(f"{self.root}/items")
|
||||
|
||||
|
||||
def _fallback(url):
|
||||
base = url[:-3]
|
||||
yield base + "jpeg"
|
||||
|
||||
@@ -180,4 +180,59 @@ https://www.melonbooks.co.jp/detail/detail.php?product_id=1872452""",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://wanoazayaka.booth.pm/",
|
||||
"#class" : booth.BoothShopExtractor,
|
||||
"#results" : (
|
||||
"https://wanoazayaka.booth.pm/items/4972816",
|
||||
"https://wanoazayaka.booth.pm/items/4855567",
|
||||
"https://wanoazayaka.booth.pm/items/4693741",
|
||||
),
|
||||
|
||||
"event" : None,
|
||||
"id" : int,
|
||||
"is_adult" : False,
|
||||
"is_end_of_sale": False,
|
||||
"is_placeholder": False,
|
||||
"is_sold_out" : False,
|
||||
"is_vrchat" : False,
|
||||
"minimum_stock" : None,
|
||||
"music" : None,
|
||||
"name" : str,
|
||||
"price" : "700 JPY",
|
||||
"url" : r"re:https://booth.pm/en/items/\d+",
|
||||
"shop_item_url" : r"re:https://wanoazayaka.booth.pm/items/\d+",
|
||||
"wish_list_url" : r"re:https://wanoazayaka.booth.pm/items/\d+/wish_list",
|
||||
"thumbnail_image_urls": list,
|
||||
"shop" : {
|
||||
"name" : "ふたりぼっちのSolitude",
|
||||
"thumbnail_url": "https://booth.pximg.net/c/48x48/users/5742915/icon_image/1448e5d8-f93f-445e-8e1e-acb29aa45aa4_base_resized.jpg",
|
||||
"url" : "https://wanoazayaka.booth.pm/",
|
||||
"verified" : False,
|
||||
},
|
||||
"tracking_data" : {
|
||||
"product_brand" : "wanoazayaka",
|
||||
"product_category": 56,
|
||||
"product_event" : None,
|
||||
"product_id" : int,
|
||||
"product_name" : str,
|
||||
"product_price" : 700,
|
||||
"tracking" : "impression_item",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://caramel-crunch.booth.pm/items",
|
||||
"#class" : booth.BoothShopExtractor,
|
||||
"#pattern" : booth.BoothItemExtractor.pattern,
|
||||
"#count" : range(90, 120),
|
||||
|
||||
"shop": {
|
||||
"name" : "CARAMEL CRUNCH!",
|
||||
"thumbnail_url": "https://booth.pximg.net/c/48x48/users/49832/icon_image/a240e313-6a0f-4155-8310-a0d6abb299e6_base_resized.jpg",
|
||||
"url" : "https://caramel-crunch.booth.pm/",
|
||||
"verified" : False,
|
||||
},
|
||||
},
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user