implement linear/exponential backoff for 'sleep-429'
This commit is contained in:
@@ -540,13 +540,22 @@ Description
|
|||||||
extractor.*.sleep-429
|
extractor.*.sleep-429
|
||||||
---------------------
|
---------------------
|
||||||
Type
|
Type
|
||||||
|Duration|_
|
|Duration+|_
|
||||||
Default
|
Default
|
||||||
``60``
|
``60``
|
||||||
|
Example
|
||||||
|
* ``"30-50"``
|
||||||
|
* ``"exp=40"``
|
||||||
|
* ``"lin:20=30-60"``
|
||||||
Description
|
Description
|
||||||
Number of seconds to sleep when receiving a `429 Too Many Requests`
|
Number of seconds to sleep when receiving a
|
||||||
|
`429 Too Many Requests <https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/429>`__
|
||||||
response before `retrying <extractor.*.retries_>`__ the request.
|
response before `retrying <extractor.*.retries_>`__ the request.
|
||||||
|
|
||||||
|
If this is a ``string``, its |Duration|_ value can be prefixed with
|
||||||
|
``lin[:START[:MAX]]`` or ``exp[:BASE[:START[:MAX]]]``
|
||||||
|
for `linear` or `exponential` backoff respectively.
|
||||||
|
|
||||||
|
|
||||||
extractor.*.sleep-request
|
extractor.*.sleep-request
|
||||||
-------------------------
|
-------------------------
|
||||||
@@ -9965,6 +9974,24 @@ Description
|
|||||||
value (``"2.85"``) or a range (``"1.5-3.0"``).
|
value (``"2.85"``) or a range (``"1.5-3.0"``).
|
||||||
|
|
||||||
|
|
||||||
|
Duration+
|
||||||
|
---------
|
||||||
|
Type
|
||||||
|
* |Duration|_
|
||||||
|
* ``string``
|
||||||
|
Example
|
||||||
|
``"1.5-3.0"``
|
||||||
|
* ``"lin=5"``
|
||||||
|
* ``"lin:20=30-60"``
|
||||||
|
* ``"exp:1.8=40"``
|
||||||
|
Description
|
||||||
|
A |Duration|_ value.
|
||||||
|
|
||||||
|
When given as ``string``, it can optionally be prefixed with
|
||||||
|
``lin[:START[:MAX]]=`` for `linear` or
|
||||||
|
``exp[:BASE[:START[:MAX]]]=`` for `exponential` growth.
|
||||||
|
|
||||||
|
|
||||||
Module
|
Module
|
||||||
------
|
------
|
||||||
Type
|
Type
|
||||||
@@ -10316,6 +10343,7 @@ Reference
|
|||||||
.. |datetime.max| replace:: ``datetime.max``
|
.. |datetime.max| replace:: ``datetime.max``
|
||||||
.. |Date| replace:: ``Date``
|
.. |Date| replace:: ``Date``
|
||||||
.. |Duration| replace:: ``Duration``
|
.. |Duration| replace:: ``Duration``
|
||||||
|
.. |Duration+| replace:: ``Duration+``
|
||||||
.. |Module| replace:: ``Module``
|
.. |Module| replace:: ``Module``
|
||||||
.. |Path| replace:: ``Path``
|
.. |Path| replace:: ``Path``
|
||||||
.. |Last-Modified| replace:: ``Last-Modified``
|
.. |Last-Modified| replace:: ``Last-Modified``
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
* [Output Options](#output-options)
|
* [Output Options](#output-options)
|
||||||
* [Networking Options](#networking-options)
|
* [Networking Options](#networking-options)
|
||||||
* [Downloader Options](#downloader-options)
|
* [Downloader Options](#downloader-options)
|
||||||
|
* [Sleep Options](#sleep-options)
|
||||||
* [Configuration Options](#configuration-options)
|
* [Configuration Options](#configuration-options)
|
||||||
* [Authentication Options](#authentication-options)
|
* [Authentication Options](#authentication-options)
|
||||||
* [Cookie Options](#cookie-options)
|
* [Cookie Options](#cookie-options)
|
||||||
@@ -109,34 +110,40 @@
|
|||||||
-r, --limit-rate RATE Maximum download rate (e.g. 500k, 2.5M, or
|
-r, --limit-rate RATE Maximum download rate (e.g. 500k, 2.5M, or
|
||||||
800k-2M)
|
800k-2M)
|
||||||
--chunk-size SIZE Size of in-memory data chunks (default: 32k)
|
--chunk-size SIZE Size of in-memory data chunks (default: 32k)
|
||||||
--sleep SECONDS Number of seconds to wait before each download.
|
|
||||||
This can be either a constant value or a range
|
|
||||||
(e.g. 2.7 or 2.0-3.5)
|
|
||||||
--sleep-skip SECONDS Number of seconds to wait after skipping a file
|
|
||||||
download
|
|
||||||
--sleep-request SECONDS Number of seconds to wait between HTTP requests
|
|
||||||
during data extraction
|
|
||||||
--sleep-429 SECONDS Number of seconds to wait when receiving a '429
|
|
||||||
Too Many Requests' response
|
|
||||||
--sleep-extractor SECONDS Number of seconds to wait before starting data
|
|
||||||
extraction for an input URL
|
|
||||||
--no-part Do not use .part files
|
--no-part Do not use .part files
|
||||||
--no-skip Do not skip downloads; overwrite existing files
|
--no-skip Do not skip downloads; overwrite existing files
|
||||||
--no-mtime Do not set file modification times according to
|
--no-mtime Do not set file modification times according to
|
||||||
Last-Modified HTTP response headers
|
Last-Modified HTTP response headers
|
||||||
--no-download Do not download any files
|
--no-download Do not download any files
|
||||||
|
|
||||||
|
## Sleep Options:
|
||||||
|
--sleep SECONDS Number of seconds to wait before each download.
|
||||||
|
This can be either a constant value or a range
|
||||||
|
(e.g. 2.7 or 2.0-3.5)
|
||||||
|
--sleep-skip SECONDS Number of seconds to wait after skipping a file
|
||||||
|
download
|
||||||
|
--sleep-extractor SECONDS Number of seconds to wait before starting data
|
||||||
|
extraction for an input URL
|
||||||
|
--sleep-request SECONDS Number of seconds to wait between HTTP requests
|
||||||
|
during data extraction
|
||||||
|
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
|
||||||
|
Too Many Requests' response. Can be prefixed
|
||||||
|
with 'lin[:START[:MAX]]' or
|
||||||
|
'exp[:BASE[:START[:MAX]]]' for linear or
|
||||||
|
exponential growth (e.g. '30', 'exp=40',
|
||||||
|
'lin:20=30-60'
|
||||||
|
|
||||||
## Configuration Options:
|
## Configuration Options:
|
||||||
-o, --option KEY=VALUE Additional options. Example: -o browser=firefox
|
-o, --option KEY=VALUE Additional options. Example: -o browser=firefox
|
||||||
-c, --config FILE Additional configuration files
|
-c, --config FILE Additional configuration files in JSON format
|
||||||
--config-yaml FILE Additional configuration files in YAML format
|
--config-yaml FILE Additional configuration files in YAML format
|
||||||
--config-toml FILE Additional configuration files in TOML format
|
--config-toml FILE Additional configuration files in TOML format
|
||||||
|
--config-type TYPE Set filetype of default configuration files
|
||||||
|
(json, yaml, toml)
|
||||||
|
--config-ignore Do not load default configuration files
|
||||||
--config-create Create a basic configuration file
|
--config-create Create a basic configuration file
|
||||||
--config-status Show configuration file status
|
--config-status Show configuration file status
|
||||||
--config-open Open configuration file in external application
|
--config-open Open configuration file in external application
|
||||||
--config-type TYPE Set filetype of default configuration files
|
|
||||||
(json, yaml, toml)
|
|
||||||
--config-ignore Do not read default configuration files
|
|
||||||
|
|
||||||
## Authentication Options:
|
## Authentication Options:
|
||||||
-u, --username USER Username to login with
|
-u, --username USER Username to login with
|
||||||
|
|||||||
@@ -87,7 +87,12 @@ class HttpDownloader(DownloaderBase):
|
|||||||
if interval_429 is None:
|
if interval_429 is None:
|
||||||
self.interval_429 = extractor._interval_429
|
self.interval_429 = extractor._interval_429
|
||||||
else:
|
else:
|
||||||
self.interval_429 = util.build_duration_func(interval_429)
|
try:
|
||||||
|
self.interval_429 = util.build_duration_func_ex(interval_429)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.error("Invalid 'sleep-429' value '%s' (%s: %s)",
|
||||||
|
interval_429, exc.__class__.__name__, exc)
|
||||||
|
self.interval_429 = extractor._interval_429
|
||||||
|
|
||||||
def download(self, url, pathfmt):
|
def download(self, url, pathfmt):
|
||||||
try:
|
try:
|
||||||
@@ -128,7 +133,7 @@ class HttpDownloader(DownloaderBase):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if code == 429 and self.interval_429:
|
if code == 429 and self.interval_429:
|
||||||
s = self.interval_429()
|
s = self.interval_429(tries)
|
||||||
time.sleep(s if s > tries else tries)
|
time.sleep(s if s > tries else tries)
|
||||||
else:
|
else:
|
||||||
time.sleep(tries)
|
time.sleep(tries)
|
||||||
|
|||||||
@@ -256,8 +256,8 @@ class Extractor():
|
|||||||
s = self._interval()
|
s = self._interval()
|
||||||
if seconds < s:
|
if seconds < s:
|
||||||
seconds = s
|
seconds = s
|
||||||
if code == 429 and self._interval_429:
|
if code == 429 and self._interval_429 is not None:
|
||||||
s = self._interval_429()
|
s = self._interval_429(tries)
|
||||||
if seconds < s:
|
if seconds < s:
|
||||||
seconds = s
|
seconds = s
|
||||||
self.wait(seconds=seconds, reason="429 Too Many Requests")
|
self.wait(seconds=seconds, reason="429 Too Many Requests")
|
||||||
@@ -418,9 +418,17 @@ class Extractor():
|
|||||||
self.config("sleep-request", self.request_interval),
|
self.config("sleep-request", self.request_interval),
|
||||||
self.request_interval_min,
|
self.request_interval_min,
|
||||||
)
|
)
|
||||||
self._interval_429 = util.build_duration_func(
|
|
||||||
self.config("sleep-429", self.request_interval_429),
|
_interval_429 = self.config("sleep-429")
|
||||||
)
|
if _interval_429 is None:
|
||||||
|
_interval_429 = self.request_interval_429
|
||||||
|
try:
|
||||||
|
self._interval_429 = util.build_duration_func_ex(_interval_429)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.error("Invalid 'sleep-429' value '%s' (%s: %s)",
|
||||||
|
_interval_429, exc.__class__.__name__, exc)
|
||||||
|
self._interval_429 = util.build_duration_func_ex(
|
||||||
|
self.request_interval_429)
|
||||||
|
|
||||||
if self._retries < 0:
|
if self._retries < 0:
|
||||||
self._retries = float("inf")
|
self._retries = float("inf")
|
||||||
|
|||||||
@@ -543,36 +543,6 @@ def build_parser():
|
|||||||
dest="chunk-size", metavar="SIZE", action=ConfigAction,
|
dest="chunk-size", metavar="SIZE", action=ConfigAction,
|
||||||
help="Size of in-memory data chunks (default: 32k)",
|
help="Size of in-memory data chunks (default: 32k)",
|
||||||
)
|
)
|
||||||
downloader.add_argument(
|
|
||||||
"--sleep",
|
|
||||||
dest="sleep", metavar="SECONDS", action=ConfigAction,
|
|
||||||
help=("Number of seconds to wait before each download. "
|
|
||||||
"This can be either a constant value or a range "
|
|
||||||
"(e.g. 2.7 or 2.0-3.5)"),
|
|
||||||
)
|
|
||||||
downloader.add_argument(
|
|
||||||
"--sleep-skip",
|
|
||||||
dest="sleep-skip", metavar="SECONDS", action=ConfigAction,
|
|
||||||
help=("Number of seconds to wait after skipping a file download"),
|
|
||||||
)
|
|
||||||
downloader.add_argument(
|
|
||||||
"--sleep-request",
|
|
||||||
dest="sleep-request", metavar="SECONDS", action=ConfigAction,
|
|
||||||
help=("Number of seconds to wait between HTTP requests "
|
|
||||||
"during data extraction"),
|
|
||||||
)
|
|
||||||
downloader.add_argument(
|
|
||||||
"--sleep-429",
|
|
||||||
dest="sleep-429", metavar="SECONDS", action=ConfigAction,
|
|
||||||
help=("Number of seconds to wait when receiving a "
|
|
||||||
"'429 Too Many Requests' response"),
|
|
||||||
)
|
|
||||||
downloader.add_argument(
|
|
||||||
"--sleep-extractor",
|
|
||||||
dest="sleep-extractor", metavar="SECONDS", action=ConfigAction,
|
|
||||||
help=("Number of seconds to wait before starting data extraction "
|
|
||||||
"for an input URL"),
|
|
||||||
)
|
|
||||||
downloader.add_argument(
|
downloader.add_argument(
|
||||||
"--no-part",
|
"--no-part",
|
||||||
dest="part", nargs=0, action=ConfigConstAction, const=False,
|
dest="part", nargs=0, action=ConfigConstAction, const=False,
|
||||||
@@ -595,6 +565,41 @@ def build_parser():
|
|||||||
help=("Do not download any files")
|
help=("Do not download any files")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
sleep = parser.add_argument_group("Sleep Options")
|
||||||
|
sleep.add_argument(
|
||||||
|
"--sleep",
|
||||||
|
dest="sleep", metavar="SECONDS", action=ConfigAction,
|
||||||
|
help=("Number of seconds to wait before each download. "
|
||||||
|
"This can be either a constant value or a range "
|
||||||
|
"(e.g. 2.7 or 2.0-3.5)"),
|
||||||
|
)
|
||||||
|
sleep.add_argument(
|
||||||
|
"--sleep-skip",
|
||||||
|
dest="sleep-skip", metavar="SECONDS", action=ConfigAction,
|
||||||
|
help=("Number of seconds to wait after skipping a file download"),
|
||||||
|
)
|
||||||
|
sleep.add_argument(
|
||||||
|
"--sleep-extractor",
|
||||||
|
dest="sleep-extractor", metavar="SECONDS", action=ConfigAction,
|
||||||
|
help=("Number of seconds to wait before starting data extraction "
|
||||||
|
"for an input URL"),
|
||||||
|
)
|
||||||
|
sleep.add_argument(
|
||||||
|
"--sleep-request",
|
||||||
|
dest="sleep-request", metavar="SECONDS", action=ConfigAction,
|
||||||
|
help=("Number of seconds to wait between HTTP requests "
|
||||||
|
"during data extraction"),
|
||||||
|
)
|
||||||
|
sleep.add_argument(
|
||||||
|
"--sleep-429",
|
||||||
|
dest="sleep-429", metavar="[TYPE=]SECONDS", action=ConfigAction,
|
||||||
|
help=("Number of seconds to wait when receiving a "
|
||||||
|
"'429 Too Many Requests' response. Can be prefixed with "
|
||||||
|
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
|
||||||
|
"for linear or exponential growth "
|
||||||
|
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
|
||||||
|
)
|
||||||
|
|
||||||
configuration = parser.add_argument_group("Configuration Options")
|
configuration = parser.add_argument_group("Configuration Options")
|
||||||
configuration.add_argument(
|
configuration.add_argument(
|
||||||
"-o", "--option",
|
"-o", "--option",
|
||||||
@@ -606,7 +611,7 @@ def build_parser():
|
|||||||
configuration.add_argument(
|
configuration.add_argument(
|
||||||
"-c", "--config",
|
"-c", "--config",
|
||||||
dest="configs_json", metavar="FILE", action="append",
|
dest="configs_json", metavar="FILE", action="append",
|
||||||
help="Additional configuration files",
|
help="Additional configuration files in JSON format",
|
||||||
)
|
)
|
||||||
configuration.add_argument(
|
configuration.add_argument(
|
||||||
"--config-yaml",
|
"--config-yaml",
|
||||||
@@ -618,6 +623,22 @@ def build_parser():
|
|||||||
dest="configs_toml", metavar="FILE", action="append",
|
dest="configs_toml", metavar="FILE", action="append",
|
||||||
help="Additional configuration files in TOML format",
|
help="Additional configuration files in TOML format",
|
||||||
)
|
)
|
||||||
|
configuration.add_argument(
|
||||||
|
"--config-type",
|
||||||
|
dest="config_type", metavar="TYPE",
|
||||||
|
help=("Set filetype of default configuration files "
|
||||||
|
"(json, yaml, toml)"),
|
||||||
|
)
|
||||||
|
configuration.add_argument(
|
||||||
|
"--config-ignore",
|
||||||
|
dest="config_load", action="store_false",
|
||||||
|
help="Do not load default configuration files",
|
||||||
|
)
|
||||||
|
configuration.add_argument(
|
||||||
|
"--ignore-config",
|
||||||
|
dest="config_load", action="store_false",
|
||||||
|
help=SUPPRESS,
|
||||||
|
)
|
||||||
configuration.add_argument(
|
configuration.add_argument(
|
||||||
"--config-create",
|
"--config-create",
|
||||||
dest="config", action="store_const", const="init",
|
dest="config", action="store_const", const="init",
|
||||||
@@ -633,22 +654,6 @@ def build_parser():
|
|||||||
dest="config", action="store_const", const="open",
|
dest="config", action="store_const", const="open",
|
||||||
help="Open configuration file in external application",
|
help="Open configuration file in external application",
|
||||||
)
|
)
|
||||||
configuration.add_argument(
|
|
||||||
"--config-type",
|
|
||||||
dest="config_type", metavar="TYPE",
|
|
||||||
help=("Set filetype of default configuration files "
|
|
||||||
"(json, yaml, toml)"),
|
|
||||||
)
|
|
||||||
configuration.add_argument(
|
|
||||||
"--config-ignore",
|
|
||||||
dest="config_load", action="store_false",
|
|
||||||
help="Do not read default configuration files",
|
|
||||||
)
|
|
||||||
configuration.add_argument(
|
|
||||||
"--ignore-config",
|
|
||||||
dest="config_load", action="store_false",
|
|
||||||
help=SUPPRESS,
|
|
||||||
)
|
|
||||||
|
|
||||||
authentication = parser.add_argument_group("Authentication Options")
|
authentication = parser.add_argument_group("Authentication Options")
|
||||||
authentication.add_argument(
|
authentication.add_argument(
|
||||||
|
|||||||
Reference in New Issue
Block a user