[newgrounds] make post extraction nonfatal
This commit is contained in:
@@ -36,16 +36,17 @@ class NewgroundsExtractor(Extractor):
|
||||
|
||||
for post_url in self.posts():
|
||||
try:
|
||||
file = self.extract_post(post_url)
|
||||
url = file["url"]
|
||||
# except Exception:
|
||||
post = self.extract_post(post_url)
|
||||
url = post.get("url")
|
||||
except OSError:
|
||||
url = None
|
||||
if not url:
|
||||
self.log.warning("Unable to get download URL for %s", post_url)
|
||||
continue
|
||||
yield Message.Directory, file
|
||||
yield Message.Url, url, text.nameext_from_url(url, file)
|
||||
|
||||
if url:
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
else:
|
||||
self.log.warning(
|
||||
"Unable to get download URL for '%s'", post_url)
|
||||
|
||||
def posts(self):
|
||||
"""Return urls of all relevant image pages"""
|
||||
@@ -83,7 +84,10 @@ class NewgroundsExtractor(Extractor):
|
||||
}
|
||||
|
||||
def extract_post(self, post_url):
|
||||
page = self.request(post_url).text
|
||||
response = self.request(post_url, fatal=False)
|
||||
if response.status_code >= 400:
|
||||
return {}
|
||||
page = response.text
|
||||
extr = text.extract_from(page)
|
||||
|
||||
if "/art/view/" in post_url:
|
||||
@@ -98,8 +102,7 @@ class NewgroundsExtractor(Extractor):
|
||||
data["favorites"] = text.parse_int(extr(
|
||||
'id="faves_load">', '<').replace(",", ""))
|
||||
data["score"] = text.parse_float(extr('id="score_number">', '<'))
|
||||
data["tags"] = text.split_html(extr(
|
||||
'<dd class="tags">', '</dd>'))
|
||||
data["tags"] = text.split_html(extr('<dd class="tags">', '</dd>'))
|
||||
data["artist"] = [
|
||||
text.extract(user, '//', '.')[0]
|
||||
for user in text.extract_iter(page, '<div class="item-user">', '>')
|
||||
|
||||
Reference in New Issue
Block a user