implement 'sleep-request' option

This commit is contained in:
Mike Fährmann
2020-09-19 22:07:41 +02:00
parent 65744a7a31
commit 1e313d5b84
2 changed files with 23 additions and 0 deletions

View File

@@ -218,6 +218,16 @@ Description Number of seconds to sleep before handling an input URL,
=========== ===== =========== =====
extractor.*.sleep-request
-------------------------
=========== =====
Type ``float``
Default ``0``
Description Minimal time interval in seconds between each HTTP request
during data extraction.
=========== =====
extractor.*.username & .password extractor.*.username & .password
-------------------------------- --------------------------------
=========== ===== =========== =====

View File

@@ -31,6 +31,8 @@ class Extractor():
cookiedomain = "" cookiedomain = ""
root = "" root = ""
test = None test = None
_request_last = 0
_request_interval = 0
def __init__(self, match): def __init__(self, match):
self.session = requests.Session() self.session = requests.Session()
@@ -46,6 +48,8 @@ class Extractor():
self._retries = self.config("retries", 4) self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30) self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True) self._verify = self.config("verify", True)
self._request_interval = self.config(
"sleep-request", self._request_interval)
if self._retries < 0: if self._retries < 0:
self._retries = float("inf") self._retries = float("inf")
@@ -85,6 +89,13 @@ class Extractor():
kwargs.setdefault("verify", self._verify) kwargs.setdefault("verify", self._verify)
response = None response = None
if self._request_interval:
seconds = (self._request_interval -
(time.time() - Extractor._request_last))
if seconds > 0:
self.log.debug("Sleeping for %.5s seconds", seconds)
time.sleep(seconds)
while True: while True:
try: try:
response = session.request(method, url, **kwargs) response = session.request(method, url, **kwargs)
@@ -123,6 +134,8 @@ class Extractor():
msg = "'{} {}' for '{}'".format(code, reason, url) msg = "'{} {}' for '{}'".format(code, reason, url)
if code < 500 and code != 429 and code != 430: if code < 500 and code != 429 and code != 430:
break break
finally:
Extractor._request_last = time.time()
self.log.debug("%s (%s/%s)", msg, tries, retries+1) self.log.debug("%s (%s/%s)", msg, tries, retries+1)
if tries > retries: if tries > retries: