improve extractor.request()
- better retry behavior - exponential back-off - removed 'allow_empty' argument
This commit is contained in:
@@ -73,7 +73,7 @@ class ArtstationExtractor(Extractor):
|
||||
def get_user_info(self, username):
|
||||
"""Return metadata for a specific user"""
|
||||
url = "{}/users/{}/quick.json".format(self.root, username.lower())
|
||||
response = self.request(url, fatal=False, allow_empty=True)
|
||||
response = self.request(url, fatal=False)
|
||||
if response.status_code == 404:
|
||||
raise exception.NotFoundError("user")
|
||||
return response.json()
|
||||
|
||||
@@ -52,34 +52,34 @@ class Extractor():
|
||||
("extractor", self.category, self.subcategory, key), default)
|
||||
|
||||
def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
|
||||
allow_empty=False, *args, **kwargs):
|
||||
max_retries = retries
|
||||
*args, **kwargs):
|
||||
max_tries = retries
|
||||
while True:
|
||||
try:
|
||||
response = None
|
||||
response = self.session.request(method, url, *args, **kwargs)
|
||||
if fatal:
|
||||
response.raise_for_status()
|
||||
if encoding:
|
||||
response.encoding = encoding
|
||||
if response.content or allow_empty:
|
||||
return response
|
||||
msg = "empty response body"
|
||||
except requests.exceptions.HTTPError as exc:
|
||||
except (requests.ConnectionError, requests.Timeout) as exc:
|
||||
msg = exc
|
||||
code = response.status_code
|
||||
if 400 <= code < 500 and code != 429: # Client Error
|
||||
retries = 0
|
||||
except requests.exceptions.RequestException as exc:
|
||||
msg = exc
|
||||
if not retries:
|
||||
raise exception.HttpError(msg)
|
||||
if response and response.status_code == 429: # Too Many Requests
|
||||
waittime = float(response.headers.get("Retry-After", 10.0))
|
||||
raise exception.HttpError(exc)
|
||||
else:
|
||||
waittime = 1
|
||||
if 200 <= response.status_code < 400 or not fatal:
|
||||
if encoding:
|
||||
response.encoding = encoding
|
||||
return response
|
||||
|
||||
msg = "{} HTTP Error: {} for url: {}".format(
|
||||
response.status_code, response.reason, url)
|
||||
if response.status_code < 500 and response.status_code != 429:
|
||||
break
|
||||
|
||||
if not retries:
|
||||
break
|
||||
tries = max_tries - retries
|
||||
retries -= 1
|
||||
time.sleep(waittime * (max_retries - retries))
|
||||
self.log.debug("%s (%d/%d)", msg, tries, max_tries)
|
||||
time.sleep(2 ** tries)
|
||||
|
||||
raise exception.HttpError(msg)
|
||||
|
||||
def _get_auth_info(self):
|
||||
"""Return authentication information as (username, password) tuple"""
|
||||
|
||||
@@ -142,7 +142,7 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
|
||||
"filter_type": 0,
|
||||
}
|
||||
self.request("https://www.hentai-foundry.com/site/filters",
|
||||
method="post", data=formdata, allow_empty=True)
|
||||
method="post", data=formdata)
|
||||
|
||||
|
||||
class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
||||
|
||||
@@ -193,8 +193,8 @@ class NijieImageExtractor(NijieExtractor):
|
||||
self.page = ""
|
||||
|
||||
def get_job_metadata(self):
|
||||
response = self.request(self.popup_url + self.image_id,
|
||||
allow_redirects=False, allow_empty=True)
|
||||
response = self.request(
|
||||
self.popup_url + self.image_id, allow_redirects=False)
|
||||
if 300 <= response.status_code < 400:
|
||||
raise exception.NotFoundError("image")
|
||||
self.page = response.text
|
||||
|
||||
Reference in New Issue
Block a user