[options] add 'sleep-retries' option

This commit is contained in:
Mike Fährmann
2026-02-13 18:04:05 +01:00
parent e6f2e31d45
commit d2477a94af
5 changed files with 72 additions and 26 deletions

View File

@@ -537,6 +537,30 @@ Description
i.e. before starting a new extractor.
extractor.*.sleep-retries
-------------------------
Type
|Duration+|_
Default
``"lin=1"``
Example
* ``"30-50"``
* ``"exp=40"``
* ``"lin:20=30-60"``
Description
Number of seconds to sleep before
`retrying <extractor.*.retries_>`__
an HTTP request.
If this is a ``string``, its |Duration|_ value can be prefixed with
``lin[:START[:MAX]]`` for `linear` or
``exp[:BASE[:START[:MAX]]]`` for `exponential` growth.
Note
| ``lin`` and ``exp`` can be any starting characters of
``linear`` and ``exponential``.
| For example ``l``, ``li``, ``lin``, ``line``, ``linea``, or ``linear``.
extractor.*.sleep-429
---------------------
Type
@@ -545,16 +569,16 @@ Default
``60``
Example
* ``"30-50"``
* ``"exp=40"``
* ``"lin:20=30-60"``
* ``"e=40"``
* ``"linear:20=30-60"``
Description
Number of seconds to sleep when receiving a
`429 Too Many Requests <https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/429>`__
response before `retrying <extractor.*.retries_>`__ the request.
If this is a ``string``, its |Duration|_ value can be prefixed with
``lin[:START[:MAX]]`` or ``exp[:BASE[:START[:MAX]]]``
for `linear` or `exponential` backoff respectively.
``lin[:START[:MAX]]`` for `linear` or
``exp[:BASE[:START[:MAX]]]`` for `exponential` backoff.
extractor.*.sleep-request

View File

@@ -84,6 +84,7 @@
"sleep-skip" : 0,
"sleep-request" : 0,
"sleep-extractor": 0,
"sleep-retries" : "lin=1",
"sleep-429" : 60.0,
"actions": [],

View File

@@ -126,12 +126,15 @@
extraction for an input URL
--sleep-request SECONDS Number of seconds to wait between HTTP requests
during data extraction
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
Too Many Requests' response. Can be prefixed
with 'lin[:START[:MAX]]' or
--sleep-retries [TYPE=]SECONDS
Number of seconds to wait before retrying an
HTTP request. Can be prefixed with
'lin[:START[:MAX]]' or
'exp[:BASE[:START[:MAX]]]' for linear or
exponential growth (e.g. '30', 'exp=40',
'lin:20=30-60'
exponential growth between consecutive retries
(e.g. '30', 'exp=40', 'lin:20=30-60'
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
Too Many Requests' response
## Configuration Options:
-o, --option KEY=VALUE Additional options. Example: -o browser=firefox

View File

@@ -184,8 +184,8 @@ class Extractor():
response = challenge = None
tries = 1
if self._interval and interval:
seconds = (self._interval() -
if self._interval_request is not None and interval:
seconds = (self._interval_request() -
(time.time() - Extractor.request_timestamp))
if seconds > 0.0:
self.sleep(seconds, "request")
@@ -251,9 +251,9 @@ class Extractor():
if tries > retries:
break
seconds = tries
if self._interval:
s = self._interval()
seconds = self._interval_retry(tries)
if self._interval_request is not None:
s = self._interval_request()
if seconds < s:
seconds = s
if code == 429 and self._interval_429 is not None:
@@ -414,10 +414,27 @@ class Extractor():
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
self._interval = util.build_duration_func(
if self._retries < 0:
self._retries = float("inf")
if not self._retry_codes:
self._retry_codes = ()
self._interval_request = util.build_duration_func(
self.config("sleep-request", self.request_interval),
self.request_interval_min,
)
self.request_interval_min)
_interval_retry = self.config("sleep-retries")
if _interval_retry is None:
self._interval_retry = util.identity
else:
try:
self._interval_retry = util.build_duration_func_ex(
_interval_retry)
except Exception as exc:
self.log.error("Invalid 'sleep-retry' value '%s' (%s: %s)",
_interval_retry, exc.__class__.__name__, exc)
self._interval_retry = util.identity
_interval_429 = self.config("sleep-429")
if _interval_429 is None:
@@ -430,11 +447,6 @@ class Extractor():
self._interval_429 = util.build_duration_func_ex(
self.request_interval_429)
if self._retries < 0:
self._retries = float("inf")
if not self._retry_codes:
self._retry_codes = ()
def _init_session(self):
self.session = session = requests.Session()
headers = session.headers

View File

@@ -590,14 +590,20 @@ def build_parser():
help=("Number of seconds to wait between HTTP requests "
"during data extraction"),
)
sleep.add_argument(
"--sleep-retries",
dest="sleep-retries", metavar="[TYPE=]SECONDS", action=ConfigAction,
help=("Number of seconds to wait before retrying an HTTP request. "
"Can be prefixed with "
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
"for linear or exponential growth between consecutive retries "
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
)
sleep.add_argument(
"--sleep-429",
dest="sleep-429", metavar="[TYPE=]SECONDS", action=ConfigAction,
help=("Number of seconds to wait when receiving a "
"'429 Too Many Requests' response. Can be prefixed with "
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
"for linear or exponential growth "
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
"'429 Too Many Requests' response"),
)
configuration = parser.add_argument_group("Configuration Options")