improve extractor.request()
- better retry behavior - exponential back-off - removed 'allow_empty' argument
This commit is contained in:
@@ -73,7 +73,7 @@ class ArtstationExtractor(Extractor):
|
|||||||
def get_user_info(self, username):
|
def get_user_info(self, username):
|
||||||
"""Return metadata for a specific user"""
|
"""Return metadata for a specific user"""
|
||||||
url = "{}/users/{}/quick.json".format(self.root, username.lower())
|
url = "{}/users/{}/quick.json".format(self.root, username.lower())
|
||||||
response = self.request(url, fatal=False, allow_empty=True)
|
response = self.request(url, fatal=False)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
raise exception.NotFoundError("user")
|
raise exception.NotFoundError("user")
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|||||||
@@ -52,34 +52,34 @@ class Extractor():
|
|||||||
("extractor", self.category, self.subcategory, key), default)
|
("extractor", self.category, self.subcategory, key), default)
|
||||||
|
|
||||||
def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
|
def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
|
||||||
allow_empty=False, *args, **kwargs):
|
*args, **kwargs):
|
||||||
max_retries = retries
|
max_tries = retries
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
response = None
|
|
||||||
response = self.session.request(method, url, *args, **kwargs)
|
response = self.session.request(method, url, *args, **kwargs)
|
||||||
if fatal:
|
except (requests.ConnectionError, requests.Timeout) as exc:
|
||||||
response.raise_for_status()
|
|
||||||
if encoding:
|
|
||||||
response.encoding = encoding
|
|
||||||
if response.content or allow_empty:
|
|
||||||
return response
|
|
||||||
msg = "empty response body"
|
|
||||||
except requests.exceptions.HTTPError as exc:
|
|
||||||
msg = exc
|
msg = exc
|
||||||
code = response.status_code
|
|
||||||
if 400 <= code < 500 and code != 429: # Client Error
|
|
||||||
retries = 0
|
|
||||||
except requests.exceptions.RequestException as exc:
|
except requests.exceptions.RequestException as exc:
|
||||||
msg = exc
|
raise exception.HttpError(exc)
|
||||||
if not retries:
|
|
||||||
raise exception.HttpError(msg)
|
|
||||||
if response and response.status_code == 429: # Too Many Requests
|
|
||||||
waittime = float(response.headers.get("Retry-After", 10.0))
|
|
||||||
else:
|
else:
|
||||||
waittime = 1
|
if 200 <= response.status_code < 400 or not fatal:
|
||||||
|
if encoding:
|
||||||
|
response.encoding = encoding
|
||||||
|
return response
|
||||||
|
|
||||||
|
msg = "{} HTTP Error: {} for url: {}".format(
|
||||||
|
response.status_code, response.reason, url)
|
||||||
|
if response.status_code < 500 and response.status_code != 429:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not retries:
|
||||||
|
break
|
||||||
|
tries = max_tries - retries
|
||||||
retries -= 1
|
retries -= 1
|
||||||
time.sleep(waittime * (max_retries - retries))
|
self.log.debug("%s (%d/%d)", msg, tries, max_tries)
|
||||||
|
time.sleep(2 ** tries)
|
||||||
|
|
||||||
|
raise exception.HttpError(msg)
|
||||||
|
|
||||||
def _get_auth_info(self):
|
def _get_auth_info(self):
|
||||||
"""Return authentication information as (username, password) tuple"""
|
"""Return authentication information as (username, password) tuple"""
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
|
|||||||
"filter_type": 0,
|
"filter_type": 0,
|
||||||
}
|
}
|
||||||
self.request("https://www.hentai-foundry.com/site/filters",
|
self.request("https://www.hentai-foundry.com/site/filters",
|
||||||
method="post", data=formdata, allow_empty=True)
|
method="post", data=formdata)
|
||||||
|
|
||||||
|
|
||||||
class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
||||||
|
|||||||
@@ -193,8 +193,8 @@ class NijieImageExtractor(NijieExtractor):
|
|||||||
self.page = ""
|
self.page = ""
|
||||||
|
|
||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
response = self.request(self.popup_url + self.image_id,
|
response = self.request(
|
||||||
allow_redirects=False, allow_empty=True)
|
self.popup_url + self.image_id, allow_redirects=False)
|
||||||
if 300 <= response.status_code < 400:
|
if 300 <= response.status_code < 400:
|
||||||
raise exception.NotFoundError("image")
|
raise exception.NotFoundError("image")
|
||||||
self.page = response.text
|
self.page = response.text
|
||||||
|
|||||||
Reference in New Issue
Block a user