[zerochan] parse API response manually when json.loads() fails (#6632)

This commit is contained in:
Mike Fährmann
2024-12-12 19:57:37 +01:00
parent d2c66ac34d
commit a33065be86
2 changed files with 68 additions and 7 deletions

View File

@@ -93,14 +93,12 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
text = self.request(url).text
txt = self.request(url).text
try:
item = util.json_loads(text)
except ValueError as exc:
if " control character " not in str(exc):
raise
text = re.sub(r"[\x00-\x1f\x7f]", "", text)
item = util.json_loads(text)
item = util.json_loads('"' + txt)
except ValueError:
item = self._parse_json(txt)
item["id"] = text.parse_int(entry_id)
data = {
"id" : item["id"],
@@ -118,6 +116,27 @@ class ZerochanExtractor(BooruExtractor):
return data
def _parse_json(self, txt):
txt = re.sub(r"[\x00-\x1f\x7f]", "", txt)
main, _, tags = txt.partition('tags": [')
item = {}
for line in main.split(', "')[1:]:
key, _, value = line.partition('": ')
if value:
if value[0] == '"':
value = value[1:-1]
else:
value = text.parse_int(value)
if key:
item[key] = value
item["tags"] = tags = tags[5:].split('", "')
if tags:
tags[-1] = tags[-1][:-5]
return item
def _tags(self, post, page):
tags = collections.defaultdict(list)
for tag in post["tags"]:

View File

@@ -208,4 +208,46 @@ __tests__ = (
"source": "http://www.youtube.com/watch?v=0vodqkGPxt8",
},
{
"#url" : "https://www.zerochan.net/4354955",
"#comment" : "unescaped quotes in 'JSON' data (#6632)",
"#category": ("booru", "zerochan", "image"),
"#class" : zerochan.ZerochanImageExtractor,
"#auth" : False,
"#options" : {"metadata": True},
"author" : "SEGA",
"date" : "dt:2024-12-05 06:06:14",
"file_url": "https://static.zerochan.net/Miles.%22Tails%22.Prower.full.4354955.jpg",
"filename": "Miles.\"Tails\".Prower.full.4354955",
"height" : 705,
"id" : 4354955,
"name" : "Miles \"Tails\" Prower",
"size" : 252928,
"source" : "https://x.com/kellanstover/status/1580237736874606597",
"uploader": "Anima-Chao",
"width" : 4096,
"path" : [
"Sonic the Hedgehog",
"Miles \"Tails\" Prower",
],
"tags" : [
"Male",
"Animal",
"Fox",
"Sonic the Hedgehog",
"Flying",
"Character Sheet",
"Airplane",
"SEGA",
"Miles \"Tails\" Prower",
"Official Art",
"Midair",
"X (Twitter)",
"Sonic Origins",
"Official Art from X",
"Tory Patterson",
],
},
)