[booth] add 'category' extractor (#8867)

This commit is contained in:
Mike Fährmann
2026-01-11 22:15:14 +01:00
parent 25397903ce
commit d7c1c30c62
4 changed files with 44 additions and 12 deletions

View File

@@ -172,7 +172,7 @@ Consider all listed sites to potentially be NSFW.
<tr id="booth" title="booth">
<td>BOOTH</td>
<td>https://booth.pm/</td>
<td>Items, Shops</td>
<td>Item Categories, Items, Shops</td>
<td></td>
</tr>
<tr id="bunkr" title="bunkr">

View File

@@ -24,22 +24,22 @@ class BoothExtractor(Extractor):
def _init(self):
self.cookies.set("adult", "t", domain=".booth.pm")
def items(self):
for item in self.shop_items():
item["_extractor"] = BoothItemExtractor
yield Message.Queue, item["shop_item_url"], item
def _pagination(self, url):
def _pagination(self, url, json=False):
while True:
page = self.request(url).text
for item in text.extract_iter(page, ' data-item="', '"'):
yield util.json_loads(text.unescape(item))
if json:
for item in text.extract_iter(page, ' data-item="', '"'):
yield util.json_loads(text.unescape(item))
else:
for item in text.extract_iter(
page, "item-card__title", "</div>"):
yield text.unescape(text.extr(item, 'href="', '"'))
next = text.extr(page, 'rel="next" class="nav-item" href="', '"')
if not next:
break
url = self.root + next
url = self.root + text.unescape(next)
class BoothItemExtractor(BoothExtractor):
@@ -115,8 +115,21 @@ class BoothShopExtractor(BoothExtractor):
self.root = text.root_from_url(match[0])
BoothExtractor.__init__(self, match)
def shop_items(self):
return self._pagination(self.root + "/items")
def items(self):
for item in self._pagination(self.root + "/items", json=True):
item["_extractor"] = BoothItemExtractor
yield Message.Queue, item["shop_item_url"], item
class BoothCategoryExtractor(BoothExtractor):
subcategory = "category"
pattern = r"(?:https?://)?booth\.pm(/[a-z]{2}(?:-[^/?#]+)?/browse/.+)"
example = "https://booth.pm/ja/browse/CATEGORY"
def items(self):
data = {"_extractor": BoothItemExtractor}
for url in self._pagination(self.root + self.groups[0]):
yield Message.Queue, url, data
def _fallback(url):

View File

@@ -267,6 +267,9 @@ SUBCATEGORY_MAP = {
"boosty": {
"feed": "Subscriptions Feed",
},
"booth": {
"category": "Item Categories",
},
"cfake": {
"created": "Created",
},

View File

@@ -254,4 +254,20 @@ https://www.melonbooks.co.jp/detail/detail.php?product_id=1872452""",
},
},
{
"#url" : "https://booth.pm/en/browse/Audio%20Goods?adult=only&max_price=3000",
"#class" : booth.BoothCategoryExtractor,
"#pattern" : booth.BoothItemExtractor.pattern,
"#range" : "1-100",
"#count" : 100,
},
{
"#url" : "https://booth.pm/zh-cn/browse/Books%20(Other)",
"#class" : booth.BoothCategoryExtractor,
"#pattern" : booth.BoothItemExtractor.pattern,
"#range" : "1-100",
"#count" : 100,
},
)