[zerochan] fix 'Invalid control character' errors (#5892)

This commit is contained in:
Mike Fährmann
2024-07-29 11:19:10 +02:00
parent aa6d00613f
commit 8a6e208605
2 changed files with 20 additions and 1 deletions

View File

@@ -12,6 +12,7 @@ from .booru import BooruExtractor
from ..cache import cache
from .. import text, util, exception
import collections
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
@@ -92,7 +93,14 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
item = self.request(url).json()
text = self.request(url).text
try:
item = util.json_loads(text)
except ValueError as exc:
if " control character " not in str(exc):
raise
text = re.sub(r"[\x00-\x1f\x7f]", "", text)
item = util.json_loads(text)
data = {
"id" : item["id"],

View File

@@ -177,4 +177,15 @@ __tests__ = (
"width" : 750,
},
{
"#url" : "https://www.zerochan.net/1395035",
"#comment" : "Invalid control character '\r' in 'source' field (#5892)",
"#category": ("booru", "zerochan", "image"),
"#class" : zerochan.ZerochanImageExtractor,
"#auth" : True,
"#options" : {"metadata": True},
"source": "http://www.youtube.com/watch?v=0vodqkGPxt8",
},
)