implement linear/exponential backoff for 'sleep-429'

This commit is contained in:
Mike Fährmann
2026-02-12 19:23:29 +01:00
parent eb4e44401b
commit 0f41f343f4
5 changed files with 124 additions and 71 deletions

View File

@@ -87,7 +87,12 @@ class HttpDownloader(DownloaderBase):
if interval_429 is None:
self.interval_429 = extractor._interval_429
else:
self.interval_429 = util.build_duration_func(interval_429)
try:
self.interval_429 = util.build_duration_func_ex(interval_429)
except Exception as exc:
self.log.error("Invalid 'sleep-429' value '%s' (%s: %s)",
interval_429, exc.__class__.__name__, exc)
self.interval_429 = extractor._interval_429
def download(self, url, pathfmt):
try:
@@ -128,7 +133,7 @@ class HttpDownloader(DownloaderBase):
return False
if code == 429 and self.interval_429:
s = self.interval_429()
s = self.interval_429(tries)
time.sleep(s if s > tries else tries)
else:
time.sleep(tries)

View File

@@ -256,8 +256,8 @@ class Extractor():
s = self._interval()
if seconds < s:
seconds = s
if code == 429 and self._interval_429:
s = self._interval_429()
if code == 429 and self._interval_429 is not None:
s = self._interval_429(tries)
if seconds < s:
seconds = s
self.wait(seconds=seconds, reason="429 Too Many Requests")
@@ -418,9 +418,17 @@ class Extractor():
self.config("sleep-request", self.request_interval),
self.request_interval_min,
)
self._interval_429 = util.build_duration_func(
self.config("sleep-429", self.request_interval_429),
)
_interval_429 = self.config("sleep-429")
if _interval_429 is None:
_interval_429 = self.request_interval_429
try:
self._interval_429 = util.build_duration_func_ex(_interval_429)
except Exception as exc:
self.log.error("Invalid 'sleep-429' value '%s' (%s: %s)",
_interval_429, exc.__class__.__name__, exc)
self._interval_429 = util.build_duration_func_ex(
self.request_interval_429)
if self._retries < 0:
self._retries = float("inf")

View File

@@ -543,36 +543,6 @@ def build_parser():
dest="chunk-size", metavar="SIZE", action=ConfigAction,
help="Size of in-memory data chunks (default: 32k)",
)
downloader.add_argument(
"--sleep",
dest="sleep", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before each download. "
"This can be either a constant value or a range "
"(e.g. 2.7 or 2.0-3.5)"),
)
downloader.add_argument(
"--sleep-skip",
dest="sleep-skip", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait after skipping a file download"),
)
downloader.add_argument(
"--sleep-request",
dest="sleep-request", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait between HTTP requests "
"during data extraction"),
)
downloader.add_argument(
"--sleep-429",
dest="sleep-429", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait when receiving a "
"'429 Too Many Requests' response"),
)
downloader.add_argument(
"--sleep-extractor",
dest="sleep-extractor", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before starting data extraction "
"for an input URL"),
)
downloader.add_argument(
"--no-part",
dest="part", nargs=0, action=ConfigConstAction, const=False,
@@ -595,6 +565,41 @@ def build_parser():
help=("Do not download any files")
)
sleep = parser.add_argument_group("Sleep Options")
sleep.add_argument(
"--sleep",
dest="sleep", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before each download. "
"This can be either a constant value or a range "
"(e.g. 2.7 or 2.0-3.5)"),
)
sleep.add_argument(
"--sleep-skip",
dest="sleep-skip", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait after skipping a file download"),
)
sleep.add_argument(
"--sleep-extractor",
dest="sleep-extractor", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait before starting data extraction "
"for an input URL"),
)
sleep.add_argument(
"--sleep-request",
dest="sleep-request", metavar="SECONDS", action=ConfigAction,
help=("Number of seconds to wait between HTTP requests "
"during data extraction"),
)
sleep.add_argument(
"--sleep-429",
dest="sleep-429", metavar="[TYPE=]SECONDS", action=ConfigAction,
help=("Number of seconds to wait when receiving a "
"'429 Too Many Requests' response. Can be prefixed with "
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
"for linear or exponential growth "
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
)
configuration = parser.add_argument_group("Configuration Options")
configuration.add_argument(
"-o", "--option",
@@ -606,7 +611,7 @@ def build_parser():
configuration.add_argument(
"-c", "--config",
dest="configs_json", metavar="FILE", action="append",
help="Additional configuration files",
help="Additional configuration files in JSON format",
)
configuration.add_argument(
"--config-yaml",
@@ -618,6 +623,22 @@ def build_parser():
dest="configs_toml", metavar="FILE", action="append",
help="Additional configuration files in TOML format",
)
configuration.add_argument(
"--config-type",
dest="config_type", metavar="TYPE",
help=("Set filetype of default configuration files "
"(json, yaml, toml)"),
)
configuration.add_argument(
"--config-ignore",
dest="config_load", action="store_false",
help="Do not load default configuration files",
)
configuration.add_argument(
"--ignore-config",
dest="config_load", action="store_false",
help=SUPPRESS,
)
configuration.add_argument(
"--config-create",
dest="config", action="store_const", const="init",
@@ -633,22 +654,6 @@ def build_parser():
dest="config", action="store_const", const="open",
help="Open configuration file in external application",
)
configuration.add_argument(
"--config-type",
dest="config_type", metavar="TYPE",
help=("Set filetype of default configuration files "
"(json, yaml, toml)"),
)
configuration.add_argument(
"--config-ignore",
dest="config_load", action="store_false",
help="Do not read default configuration files",
)
configuration.add_argument(
"--ignore-config",
dest="config_load", action="store_false",
help=SUPPRESS,
)
authentication = parser.add_argument_group("Authentication Options")
authentication.add_argument(