[zerochan] parse API response manually when json.loads() fails (#6632)

This commit is contained in:
Mike Fährmann
2024-12-12 19:57:37 +01:00
parent d2c66ac34d
commit a33065be86
2 changed files with 68 additions and 7 deletions

View File

@@ -93,14 +93,12 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
text = self.request(url).text
txt = self.request(url).text
try:
item = util.json_loads(text)
except ValueError as exc:
if " control character " not in str(exc):
raise
text = re.sub(r"[\x00-\x1f\x7f]", "", text)
item = util.json_loads(text)
item = util.json_loads('"' + txt)
except ValueError:
item = self._parse_json(txt)
item["id"] = text.parse_int(entry_id)
data = {
"id" : item["id"],
@@ -118,6 +116,27 @@ class ZerochanExtractor(BooruExtractor):
return data
def _parse_json(self, txt):
txt = re.sub(r"[\x00-\x1f\x7f]", "", txt)
main, _, tags = txt.partition('tags": [')
item = {}
for line in main.split(', "')[1:]:
key, _, value = line.partition('": ')
if value:
if value[0] == '"':
value = value[1:-1]
else:
value = text.parse_int(value)
if key:
item[key] = value
item["tags"] = tags = tags[5:].split('", "')
if tags:
tags[-1] = tags[-1][:-5]
return item
def _tags(self, post, page):
tags = collections.defaultdict(list)
for tag in post["tags"]: