[common] detect empty HTTP response bodies

This commit is contained in:
Mike Fährmann
2017-09-25 13:01:10 +02:00
parent c51616f8d8
commit 9c138dfc1f
4 changed files with 10 additions and 7 deletions

View File

@@ -48,7 +48,8 @@ class Extractor():
("extractor", self.category, self.subcategory, key), default) ("extractor", self.category, self.subcategory, key), default)
def request(self, url, method="GET", encoding=None, fatal=True, retries=3, def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
*args, **kwargs): allow_empty=False, *args, **kwargs):
max_retries = retries
while True: while True:
try: try:
response = self.session.request(method, url, *args, **kwargs) response = self.session.request(method, url, *args, **kwargs)
@@ -56,13 +57,15 @@ class Extractor():
response.raise_for_status() response.raise_for_status()
if encoding: if encoding:
response.encoding = encoding response.encoding = encoding
return response if response.content or allow_empty:
return response
msg = "empty response body"
except requests.exceptions.RequestException as exc: except requests.exceptions.RequestException as exc:
msg = exc msg = exc
retries -= 1
if not retries: if not retries:
raise exception.HttpError(msg) raise exception.HttpError(msg)
time.sleep(1) time.sleep(1 + max_retries - retries)
retries -= 1
def _get_auth_info(self): def _get_auth_info(self):
"""Return authentication information as (username, password) tuple""" """Return authentication information as (username, password) tuple"""

View File

@@ -111,7 +111,7 @@ class HentaifoundryUserExtractor(Extractor):
"filter_type": 0, "filter_type": 0,
} }
self.request("https://www.hentai-foundry.com/site/filters", self.request("https://www.hentai-foundry.com/site/filters",
method="post", data=formdata) method="post", data=formdata, allow_empty=True)
class HentaifoundryImageExtractor(Extractor): class HentaifoundryImageExtractor(Extractor):

View File

@@ -134,7 +134,7 @@ class NijieImageExtractor(NijieExtractor):
def get_job_metadata(self): def get_job_metadata(self):
response = self.request(self.popup_url + self.image_id, response = self.request(self.popup_url + self.image_id,
allow_redirects=False) allow_redirects=False, allow_empty=True)
if 300 <= response.status_code < 400: if 300 <= response.status_code < 400:
raise exception.NotFoundError("image") raise exception.NotFoundError("image")
self.page = response.text self.page = response.text

View File

@@ -62,7 +62,7 @@ class Job():
res = str(exc) or "resource (gallery/image/user)" res = str(exc) or "resource (gallery/image/user)"
log.error("The %s at '%s' does not exist", res, self.url) log.error("The %s at '%s' does not exist", res, self.url)
except exception.HttpError as exc: except exception.HttpError as exc:
log.error("HTTP request failed:\n%s", exc) log.error("HTTP request failed: %s", exc)
except exception.FormatError as exc: except exception.FormatError as exc:
err, obj = exc.args err, obj = exc.args
log.error("Applying %s format string failed: %s: %s", log.error("Applying %s format string failed: %s: %s",