[joyreactor] improve error handling for faulty JSON (#148)
- remove all ASCII escape codes, not just \n and \r - ignore faulty posts instead of letting the exception propagate
This commit is contained in:
@@ -62,8 +62,7 @@ class JoyreactorExtractor(Extractor):
|
|||||||
return
|
return
|
||||||
url = self.root + path
|
url = self.root + path
|
||||||
|
|
||||||
@staticmethod
|
def _parse_post(self, post):
|
||||||
def _parse_post(post):
|
|
||||||
post, _, script = post.partition('<script type="application/ld+json">')
|
post, _, script = post.partition('<script type="application/ld+json">')
|
||||||
images = text.extract_iter(post, '<div class="image">', '</div>')
|
images = text.extract_iter(post, '<div class="image">', '</div>')
|
||||||
script = script[:script.index("</")].strip()
|
script = script[:script.index("</")].strip()
|
||||||
@@ -71,10 +70,13 @@ class JoyreactorExtractor(Extractor):
|
|||||||
try:
|
try:
|
||||||
data = json.loads(script)
|
data = json.loads(script)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
data = json.loads(script
|
try:
|
||||||
.replace("\\", "\\\\")
|
mapping = dict.fromkeys(range(32))
|
||||||
.replace("\n", "")
|
script = script.translate(mapping).replace("\\", "\\\\")
|
||||||
.replace("\r", ""))
|
data = json.loads(script)
|
||||||
|
except ValueError as exc:
|
||||||
|
self.log.warning("Unable to parse post: %s", exc)
|
||||||
|
return
|
||||||
|
|
||||||
num = 0
|
num = 0
|
||||||
date = data["datePublished"]
|
date = data["datePublished"]
|
||||||
|
|||||||
Reference in New Issue
Block a user