[options] add 'sleep-retries' option
This commit is contained in:
@@ -537,6 +537,30 @@ Description
|
||||
i.e. before starting a new extractor.
|
||||
|
||||
|
||||
extractor.*.sleep-retries
|
||||
-------------------------
|
||||
Type
|
||||
|Duration+|_
|
||||
Default
|
||||
``"lin=1"``
|
||||
Example
|
||||
* ``"30-50"``
|
||||
* ``"exp=40"``
|
||||
* ``"lin:20=30-60"``
|
||||
Description
|
||||
Number of seconds to sleep before
|
||||
`retrying <extractor.*.retries_>`__
|
||||
an HTTP request.
|
||||
|
||||
If this is a ``string``, its |Duration|_ value can be prefixed with
|
||||
``lin[:START[:MAX]]`` for `linear` or
|
||||
``exp[:BASE[:START[:MAX]]]`` for `exponential` growth.
|
||||
Note
|
||||
| ``lin`` and ``exp`` can be any starting characters of
|
||||
``linear`` and ``exponential``.
|
||||
| For example ``l``, ``li``, ``lin``, ``line``, ``linea``, or ``linear``.
|
||||
|
||||
|
||||
extractor.*.sleep-429
|
||||
---------------------
|
||||
Type
|
||||
@@ -545,16 +569,16 @@ Default
|
||||
``60``
|
||||
Example
|
||||
* ``"30-50"``
|
||||
* ``"exp=40"``
|
||||
* ``"lin:20=30-60"``
|
||||
* ``"e=40"``
|
||||
* ``"linear:20=30-60"``
|
||||
Description
|
||||
Number of seconds to sleep when receiving a
|
||||
`429 Too Many Requests <https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/429>`__
|
||||
response before `retrying <extractor.*.retries_>`__ the request.
|
||||
|
||||
If this is a ``string``, its |Duration|_ value can be prefixed with
|
||||
``lin[:START[:MAX]]`` or ``exp[:BASE[:START[:MAX]]]``
|
||||
for `linear` or `exponential` backoff respectively.
|
||||
``lin[:START[:MAX]]`` for `linear` or
|
||||
``exp[:BASE[:START[:MAX]]]`` for `exponential` backoff.
|
||||
|
||||
|
||||
extractor.*.sleep-request
|
||||
|
||||
@@ -84,6 +84,7 @@
|
||||
"sleep-skip" : 0,
|
||||
"sleep-request" : 0,
|
||||
"sleep-extractor": 0,
|
||||
"sleep-retries" : "lin=1",
|
||||
"sleep-429" : 60.0,
|
||||
|
||||
"actions": [],
|
||||
|
||||
@@ -126,12 +126,15 @@
|
||||
extraction for an input URL
|
||||
--sleep-request SECONDS Number of seconds to wait between HTTP requests
|
||||
during data extraction
|
||||
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
|
||||
Too Many Requests' response. Can be prefixed
|
||||
with 'lin[:START[:MAX]]' or
|
||||
--sleep-retries [TYPE=]SECONDS
|
||||
Number of seconds to wait before retrying an
|
||||
HTTP request. Can be prefixed with
|
||||
'lin[:START[:MAX]]' or
|
||||
'exp[:BASE[:START[:MAX]]]' for linear or
|
||||
exponential growth (e.g. '30', 'exp=40',
|
||||
'lin:20=30-60'
|
||||
exponential growth between consecutive retries
|
||||
(e.g. '30', 'exp=40', 'lin:20=30-60'
|
||||
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
|
||||
Too Many Requests' response
|
||||
|
||||
## Configuration Options:
|
||||
-o, --option KEY=VALUE Additional options. Example: -o browser=firefox
|
||||
|
||||
@@ -184,8 +184,8 @@ class Extractor():
|
||||
response = challenge = None
|
||||
tries = 1
|
||||
|
||||
if self._interval and interval:
|
||||
seconds = (self._interval() -
|
||||
if self._interval_request is not None and interval:
|
||||
seconds = (self._interval_request() -
|
||||
(time.time() - Extractor.request_timestamp))
|
||||
if seconds > 0.0:
|
||||
self.sleep(seconds, "request")
|
||||
@@ -251,9 +251,9 @@ class Extractor():
|
||||
if tries > retries:
|
||||
break
|
||||
|
||||
seconds = tries
|
||||
if self._interval:
|
||||
s = self._interval()
|
||||
seconds = self._interval_retry(tries)
|
||||
if self._interval_request is not None:
|
||||
s = self._interval_request()
|
||||
if seconds < s:
|
||||
seconds = s
|
||||
if code == 429 and self._interval_429 is not None:
|
||||
@@ -414,10 +414,27 @@ class Extractor():
|
||||
self._timeout = self.config("timeout", 30)
|
||||
self._verify = self.config("verify", True)
|
||||
self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
|
||||
self._interval = util.build_duration_func(
|
||||
|
||||
if self._retries < 0:
|
||||
self._retries = float("inf")
|
||||
if not self._retry_codes:
|
||||
self._retry_codes = ()
|
||||
|
||||
self._interval_request = util.build_duration_func(
|
||||
self.config("sleep-request", self.request_interval),
|
||||
self.request_interval_min,
|
||||
)
|
||||
self.request_interval_min)
|
||||
|
||||
_interval_retry = self.config("sleep-retries")
|
||||
if _interval_retry is None:
|
||||
self._interval_retry = util.identity
|
||||
else:
|
||||
try:
|
||||
self._interval_retry = util.build_duration_func_ex(
|
||||
_interval_retry)
|
||||
except Exception as exc:
|
||||
self.log.error("Invalid 'sleep-retry' value '%s' (%s: %s)",
|
||||
_interval_retry, exc.__class__.__name__, exc)
|
||||
self._interval_retry = util.identity
|
||||
|
||||
_interval_429 = self.config("sleep-429")
|
||||
if _interval_429 is None:
|
||||
@@ -430,11 +447,6 @@ class Extractor():
|
||||
self._interval_429 = util.build_duration_func_ex(
|
||||
self.request_interval_429)
|
||||
|
||||
if self._retries < 0:
|
||||
self._retries = float("inf")
|
||||
if not self._retry_codes:
|
||||
self._retry_codes = ()
|
||||
|
||||
def _init_session(self):
|
||||
self.session = session = requests.Session()
|
||||
headers = session.headers
|
||||
|
||||
@@ -590,14 +590,20 @@ def build_parser():
|
||||
help=("Number of seconds to wait between HTTP requests "
|
||||
"during data extraction"),
|
||||
)
|
||||
sleep.add_argument(
|
||||
"--sleep-retries",
|
||||
dest="sleep-retries", metavar="[TYPE=]SECONDS", action=ConfigAction,
|
||||
help=("Number of seconds to wait before retrying an HTTP request. "
|
||||
"Can be prefixed with "
|
||||
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
|
||||
"for linear or exponential growth between consecutive retries "
|
||||
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
|
||||
)
|
||||
sleep.add_argument(
|
||||
"--sleep-429",
|
||||
dest="sleep-429", metavar="[TYPE=]SECONDS", action=ConfigAction,
|
||||
help=("Number of seconds to wait when receiving a "
|
||||
"'429 Too Many Requests' response. Can be prefixed with "
|
||||
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
|
||||
"for linear or exponential growth "
|
||||
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
|
||||
"'429 Too Many Requests' response"),
|
||||
)
|
||||
|
||||
configuration = parser.add_argument_group("Configuration Options")
|
||||
|
||||
Reference in New Issue
Block a user