[zerochan] normalize "No results" output message (#8313)
catch and handle '404 Not Found' responses
This commit is contained in:
@@ -193,7 +193,13 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
metadata = self.config("metadata")
|
metadata = self.config("metadata")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
page = self.request(url, params=params, expected=(500,)).text
|
try:
|
||||||
|
page = self.request(
|
||||||
|
url, params=params, expected=(500,)).text
|
||||||
|
except exception.HttpError as exc:
|
||||||
|
if exc.status == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
thumbs = text.extr(page, '<ul id="thumbs', '</ul>')
|
thumbs = text.extr(page, '<ul id="thumbs', '</ul>')
|
||||||
extr = text.extract_from(thumbs)
|
extr = text.extract_from(thumbs)
|
||||||
|
|
||||||
@@ -232,7 +238,13 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
response = self.request(url, params=params, allow_redirects=False)
|
try:
|
||||||
|
response = self.request(
|
||||||
|
url, params=params, allow_redirects=False)
|
||||||
|
except exception.HttpError as exc:
|
||||||
|
if exc.status == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
|
||||||
if response.status_code >= 300:
|
if response.status_code >= 300:
|
||||||
url = text.urljoin(self.root, response.headers["location"])
|
url = text.urljoin(self.root, response.headers["location"])
|
||||||
@@ -276,12 +288,18 @@ class ZerochanImageExtractor(ZerochanExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/(\d+)"
|
pattern = BASE_PATTERN + r"/(\d+)"
|
||||||
example = "https://www.zerochan.net/12345"
|
example = "https://www.zerochan.net/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
ZerochanExtractor.__init__(self, match)
|
|
||||||
self.image_id = match[1]
|
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
post = self._parse_entry_html(self.image_id)
|
image_id = self.groups[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
post = self._parse_entry_html(image_id)
|
||||||
|
except exception.HttpError as exc:
|
||||||
|
if exc.status in (404, 410):
|
||||||
|
if msg := text.extr(exc.response.text, "<h2>", "<"):
|
||||||
|
self.log.warning(f"'{msg}'")
|
||||||
|
return ()
|
||||||
|
raise
|
||||||
|
|
||||||
if self.config("metadata"):
|
if self.config("metadata"):
|
||||||
post.update(self._parse_entry_api(self.image_id))
|
post.update(self._parse_entry_api(image_id))
|
||||||
return (post,)
|
return (post,)
|
||||||
|
|||||||
@@ -45,6 +45,13 @@ __tests__ = (
|
|||||||
"width" : r"re:^\d+$",
|
"width" : r"re:^\d+$",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.zerochan.net/non_existant_tag",
|
||||||
|
"#comment" : "handle HttpError exception (#8313)",
|
||||||
|
"#class" : zerochan.ZerochanTagExtractor,
|
||||||
|
"#count" : 0,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.zerochan.net/2920445",
|
"#url" : "https://www.zerochan.net/2920445",
|
||||||
"#category": ("booru", "zerochan", "image"),
|
"#category": ("booru", "zerochan", "image"),
|
||||||
@@ -341,4 +348,20 @@ __tests__ = (
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.zerochan.net/1",
|
||||||
|
"#category": ("booru", "zerochan", "image"),
|
||||||
|
"#class" : zerochan.ZerochanImageExtractor,
|
||||||
|
"#count" : 0,
|
||||||
|
"#log" : "'deleted'",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.zerochan.net/9876540",
|
||||||
|
"#category": ("booru", "zerochan", "image"),
|
||||||
|
"#class" : zerochan.ZerochanImageExtractor,
|
||||||
|
"#count" : 0,
|
||||||
|
"#log" : "'Not found'",
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user