[zerochan] parse JSON-LD data (#7178)
This commit is contained in:
@@ -64,16 +64,22 @@ class ZerochanExtractor(BooruExtractor):
|
||||
|
||||
def _parse_entry_html(self, entry_id):
|
||||
url = "{}/{}".format(self.root, entry_id)
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
page = self.request(url).text
|
||||
|
||||
try:
|
||||
jsonld = self._extract_jsonld(page)
|
||||
except Exception:
|
||||
return {"id": entry_id}
|
||||
|
||||
extr = text.extract_from(page)
|
||||
data = {
|
||||
"id" : text.parse_int(entry_id),
|
||||
"author" : text.parse_unicode_escapes(extr(' "name": "', '"')),
|
||||
"file_url": extr('"contentUrl": "', '"'),
|
||||
"date" : text.parse_datetime(extr('"datePublished": "', '"')),
|
||||
"width" : text.parse_int(extr('"width": "', ' ')),
|
||||
"height" : text.parse_int(extr('"height": "', ' ')),
|
||||
"size" : text.parse_bytes(extr('"contentSize": "', 'B')),
|
||||
"author" : jsonld["author"]["name"],
|
||||
"file_url": jsonld["contentUrl"],
|
||||
"date" : text.parse_datetime(jsonld["datePublished"]),
|
||||
"width" : text.parse_int(jsonld["width"][:-3]),
|
||||
"height" : text.parse_int(jsonld["height"][:-3]),
|
||||
"size" : text.parse_bytes(jsonld["contentSize"][:-1]),
|
||||
"path" : text.split_html(extr(
|
||||
'class="breadcrumbs', '</nav>'))[2:],
|
||||
"uploader": extr('href="/user/', '"'),
|
||||
@@ -86,7 +92,7 @@ class ZerochanExtractor(BooruExtractor):
|
||||
tags = data["tags"] = []
|
||||
for tag in html.split("<li class=")[1:]:
|
||||
category = text.extr(tag, '"', '"')
|
||||
name = text.extr(tag, 'data-tag="', '"')
|
||||
name = text.unescape(text.extr(tag, 'data-tag="', '"'))
|
||||
tags.append(category.partition(" ")[0].capitalize() + ":" + name)
|
||||
|
||||
return data
|
||||
|
||||
@@ -250,4 +250,58 @@ __tests__ = (
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.zerochan.net/4354955",
|
||||
"#comment" : "quotes in HTML tags",
|
||||
"#category": ("booru", "zerochan", "image"),
|
||||
"#class" : zerochan.ZerochanImageExtractor,
|
||||
"#auth" : False,
|
||||
"#options" : {"metadata": False},
|
||||
|
||||
"tags": [
|
||||
"Mangaka:Tory Patterson",
|
||||
"Studio:SEGA",
|
||||
"Game:Sonic Origins",
|
||||
"Series:Sonic the Hedgehog",
|
||||
"Character:Miles \"Tails\" Prower",
|
||||
"Theme:Airplane",
|
||||
"Theme:Flying",
|
||||
"Theme:Fox",
|
||||
"Source:Character Sheet",
|
||||
"Source:Official Art",
|
||||
"Source:Official Art from X",
|
||||
"Source:X (Twitter)",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.zerochan.net/2275437",
|
||||
"#comment" : "unicode escapes with surrogate pair (#7178)",
|
||||
"#category": ("booru", "zerochan", "image"),
|
||||
"#class" : zerochan.ZerochanImageExtractor,
|
||||
"#auth" : False,
|
||||
"#options" : {"metadata": False},
|
||||
|
||||
"author" : "MAYO🍚",
|
||||
"date" : "dt:2018-02-25 16:03:48",
|
||||
"extension": "png",
|
||||
"file_url" : "https://static.zerochan.net/Kongou.full.2275437.png",
|
||||
"filename" : "Kongou.full.2275437",
|
||||
"width" : 1047,
|
||||
"height" : 1365,
|
||||
"id" : 2275437,
|
||||
"size" : 502784,
|
||||
"source" : "",
|
||||
"uploader" : "SubaruSumeragi",
|
||||
"path" : [
|
||||
"Kantai Collection",
|
||||
"Kongou",
|
||||
],
|
||||
"tags" : [
|
||||
"Mangaka:MAYO🍚",
|
||||
"Game:Kantai Collection",
|
||||
"Character:Kongou"
|
||||
],
|
||||
},
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user