implement linear/exponential backoff for 'sleep-429'

This commit is contained in:
Mike Fährmann
2026-02-12 19:23:29 +01:00
parent eb4e44401b
commit 0f41f343f4
5 changed files with 124 additions and 71 deletions

View File

@@ -540,13 +540,22 @@ Description
extractor.*.sleep-429
---------------------
Type
|Duration|_
|Duration+|_
Default
``60``
Example
* ``"30-50"``
* ``"exp=40"``
* ``"lin:20=30-60"``
Description
Number of seconds to sleep when receiving a `429 Too Many Requests`
Number of seconds to sleep when receiving a
`429 Too Many Requests <https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/429>`__
response before `retrying <extractor.*.retries_>`__ the request.
If this is a ``string``, its |Duration|_ value can be prefixed with
``lin[:START[:MAX]]`` or ``exp[:BASE[:START[:MAX]]]``
for `linear` or `exponential` backoff respectively.
extractor.*.sleep-request
-------------------------
@@ -9965,6 +9974,24 @@ Description
value (``"2.85"``) or a range (``"1.5-3.0"``).
Duration+
---------
Type
* |Duration|_
* ``string``
Example
``"1.5-3.0"``
* ``"lin=5"``
* ``"lin:20=30-60"``
* ``"exp:1.8=40"``
Description
A |Duration|_ value.
When given as ``string``, it can optionally be prefixed with
``lin[:START[:MAX]]=`` for `linear` or
``exp[:BASE[:START[:MAX]]]=`` for `exponential` growth.
Module
------
Type
@@ -10316,6 +10343,7 @@ Reference
.. |datetime.max| replace:: ``datetime.max``
.. |Date| replace:: ``Date``
.. |Duration| replace:: ``Duration``
.. |Duration+| replace:: ``Duration+``
.. |Module| replace:: ``Module``
.. |Path| replace:: ``Path``
.. |Last-Modified| replace:: ``Last-Modified``

View File

@@ -11,6 +11,7 @@
* [Output Options](#output-options)
* [Networking Options](#networking-options)
* [Downloader Options](#downloader-options)
* [Sleep Options](#sleep-options)
* [Configuration Options](#configuration-options)
* [Authentication Options](#authentication-options)
* [Cookie Options](#cookie-options)
@@ -109,34 +110,40 @@
-r, --limit-rate RATE Maximum download rate (e.g. 500k, 2.5M, or
800k-2M)
--chunk-size SIZE Size of in-memory data chunks (default: 32k)
--sleep SECONDS Number of seconds to wait before each download.
This can be either a constant value or a range
(e.g. 2.7 or 2.0-3.5)
--sleep-skip SECONDS Number of seconds to wait after skipping a file
download
--sleep-request SECONDS Number of seconds to wait between HTTP requests
during data extraction
--sleep-429 SECONDS Number of seconds to wait when receiving a '429
Too Many Requests' response
--sleep-extractor SECONDS Number of seconds to wait before starting data
extraction for an input URL
--no-part Do not use .part files
--no-skip Do not skip downloads; overwrite existing files
--no-mtime Do not set file modification times according to
Last-Modified HTTP response headers
--no-download Do not download any files
## Sleep Options:
--sleep SECONDS Number of seconds to wait before each download.
This can be either a constant value or a range
(e.g. 2.7 or 2.0-3.5)
--sleep-skip SECONDS Number of seconds to wait after skipping a file
download
--sleep-extractor SECONDS Number of seconds to wait before starting data
extraction for an input URL
--sleep-request SECONDS Number of seconds to wait between HTTP requests
during data extraction
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
Too Many Requests' response. Can be prefixed
with 'lin[:START[:MAX]]' or
'exp[:BASE[:START[:MAX]]]' for linear or
exponential growth (e.g. '30', 'exp=40',
'lin:20=30-60'
## Configuration Options:
-o, --option KEY=VALUE Additional options. Example: -o browser=firefox
-c, --config FILE Additional configuration files
-c, --config FILE Additional configuration files in JSON format
--config-yaml FILE Additional configuration files in YAML format
--config-toml FILE Additional configuration files in TOML format
--config-type TYPE Set filetype of default configuration files
(json, yaml, toml)
--config-ignore Do not load default configuration files
--config-create Create a basic configuration file
--config-status Show configuration file status
--config-open Open configuration file in external application
--config-type TYPE Set filetype of default configuration files
(json, yaml, toml)
--config-ignore Do not read default configuration files
## Authentication Options:
-u, --username USER Username to login with

View File

@@ -87,7 +87,12 @@ class HttpDownloader(DownloaderBase):
if interval_429 is None:
self.interval_429 = extractor._interval_429
else:
self.interval_429 = util.build_duration_func(interval_429)
try:
self.interval_429 = util.build_duration_func_ex(interval_429)
except Exception as exc:
self.log.error("Invalid 'sleep-429' value '%s' (%s: %s)",
interval_429, exc.__class__.__name__, exc)
self.interval_429 = extractor._interval_429
def download(self, url, pathfmt):
try:
@@ -128,7 +133,7 @@ class HttpDownloader(DownloaderBase):
return False
if code == 429 and self.interval_429:
s = self.interval_429()
s = self.interval_429(tries)
time.sleep(s if s > tries else tries)
else:
time.sleep(tries)

View File

@@ -256,8 +256,8 @@ class Extractor():
s = self._interval()
if seconds < s:
seconds = s
if code == 429 and self._interval_429:
s = self._interval_429()
if code == 429 and self._interval_429 is not None:
s = self._interval_429(tries)
if seconds < s:
seconds = s
self.wait(seconds=seconds, reason="429 Too Many Requests")
@@ -418,9 +418,17 @@ class Extractor():
self.config("sleep-request", self.request_interval),
self.request_interval_min,
)
self._interval_429 = util.build_duration_func(
self.config("sleep-429", self.request_interval_429),
)
_interval_429 = self.config("sleep-429")
if _interval_429 is None:
_interval_429 = self.request_interval_429
try:
self._interval_429 = util.build_duration_func_ex(_interval_429)
except Exception as exc:
self.log.error("Invalid 'sleep-429' value '%s' (%s: %s)",
_interval_429, exc.__class__.__name__, exc)
self._interval_429 = util.build_duration_func_ex(
self.request_interval_429)
if self._retries < 0:
self._retries = float("inf")

View File

@@ -543,36 +543,6 @@ def build_parser():
dest="chunk-size", metavar="SIZE", action=ConfigAction,
help="Size of in-memory data chunks (default: 32k)",
)
downloader.add_argument(
"--sleep",
dest="sleep", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before each download. "
"This can be either a constant value or a range "
"(e.g. 2.7 or 2.0-3.5)"),
)
downloader.add_argument(
"--sleep-skip",
dest="sleep-skip", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait after skipping a file download"),
)
downloader.add_argument(
"--sleep-request",
dest="sleep-request", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait between HTTP requests "
"during data extraction"),
)
downloader.add_argument(
"--sleep-429",
dest="sleep-429", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait when receiving a "
"'429 Too Many Requests' response"),
)
downloader.add_argument(
"--sleep-extractor",
dest="sleep-extractor", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before starting data extraction "
"for an input URL"),
)
downloader.add_argument(
"--no-part",
dest="part", nargs=0, action=ConfigConstAction, const=False,
@@ -595,6 +565,41 @@ def build_parser():
help=("Do not download any files")
)
sleep = parser.add_argument_group("Sleep Options")
sleep.add_argument(
"--sleep",
dest="sleep", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before each download. "
"This can be either a constant value or a range "
"(e.g. 2.7 or 2.0-3.5)"),
)
sleep.add_argument(
"--sleep-skip",
dest="sleep-skip", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait after skipping a file download"),
)
sleep.add_argument(
"--sleep-extractor",
dest="sleep-extractor", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before starting data extraction "
"for an input URL"),
)
sleep.add_argument(
"--sleep-request",
dest="sleep-request", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait between HTTP requests "
"during data extraction"),
)
sleep.add_argument(
"--sleep-429",
dest="sleep-429", metavar="[TYPE=]SECONDS", action=ConfigAction,
help=("Number of seconds to wait when receiving a "
"'429 Too Many Requests' response. Can be prefixed with "
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
"for linear or exponential growth "
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
)
configuration = parser.add_argument_group("Configuration Options")
configuration.add_argument(
"-o", "--option",
@@ -606,7 +611,7 @@ def build_parser():
configuration.add_argument(
"-c", "--config",
dest="configs_json", metavar="FILE", action="append",
help="Additional configuration files",
help="Additional configuration files in JSON format",
)
configuration.add_argument(
"--config-yaml",
@@ -618,6 +623,22 @@ def build_parser():
dest="configs_toml", metavar="FILE", action="append",
help="Additional configuration files in TOML format",
)
configuration.add_argument(
"--config-type",
dest="config_type", metavar="TYPE",
help=("Set filetype of default configuration files "
"(json, yaml, toml)"),
)
configuration.add_argument(
"--config-ignore",
dest="config_load", action="store_false",
help="Do not load default configuration files",
)
configuration.add_argument(
"--ignore-config",
dest="config_load", action="store_false",
help=SUPPRESS,
)
configuration.add_argument(
"--config-create",
dest="config", action="store_const", const="init",
@@ -633,22 +654,6 @@ def build_parser():
dest="config", action="store_const", const="open",
help="Open configuration file in external application",
)
configuration.add_argument(
"--config-type",
dest="config_type", metavar="TYPE",
help=("Set filetype of default configuration files "
"(json, yaml, toml)"),
)
configuration.add_argument(
"--config-ignore",
dest="config_load", action="store_false",
help="Do not read default configuration files",
)
configuration.add_argument(
"--ignore-config",
dest="config_load", action="store_false",
help=SUPPRESS,
)
authentication = parser.add_argument_group("Authentication Options")
authentication.add_argument(