[zerochan] parse API response manually when json.loads() fails (#6632)
This commit is contained in:
@@ -93,14 +93,12 @@ class ZerochanExtractor(BooruExtractor):
|
||||
|
||||
def _parse_entry_api(self, entry_id):
|
||||
url = "{}/{}?json".format(self.root, entry_id)
|
||||
text = self.request(url).text
|
||||
txt = self.request(url).text
|
||||
try:
|
||||
item = util.json_loads(text)
|
||||
except ValueError as exc:
|
||||
if " control character " not in str(exc):
|
||||
raise
|
||||
text = re.sub(r"[\x00-\x1f\x7f]", "", text)
|
||||
item = util.json_loads(text)
|
||||
item = util.json_loads('"' + txt)
|
||||
except ValueError:
|
||||
item = self._parse_json(txt)
|
||||
item["id"] = text.parse_int(entry_id)
|
||||
|
||||
data = {
|
||||
"id" : item["id"],
|
||||
@@ -118,6 +116,27 @@ class ZerochanExtractor(BooruExtractor):
|
||||
|
||||
return data
|
||||
|
||||
def _parse_json(self, txt):
|
||||
txt = re.sub(r"[\x00-\x1f\x7f]", "", txt)
|
||||
main, _, tags = txt.partition('tags": [')
|
||||
|
||||
item = {}
|
||||
for line in main.split(', "')[1:]:
|
||||
key, _, value = line.partition('": ')
|
||||
if value:
|
||||
if value[0] == '"':
|
||||
value = value[1:-1]
|
||||
else:
|
||||
value = text.parse_int(value)
|
||||
if key:
|
||||
item[key] = value
|
||||
|
||||
item["tags"] = tags = tags[5:].split('", "')
|
||||
if tags:
|
||||
tags[-1] = tags[-1][:-5]
|
||||
|
||||
return item
|
||||
|
||||
def _tags(self, post, page):
|
||||
tags = collections.defaultdict(list)
|
||||
for tag in post["tags"]:
|
||||
|
||||
Reference in New Issue
Block a user