[options] add 'sleep-retries' option
This commit is contained in:
@@ -537,6 +537,30 @@ Description
|
|||||||
i.e. before starting a new extractor.
|
i.e. before starting a new extractor.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.*.sleep-retries
|
||||||
|
-------------------------
|
||||||
|
Type
|
||||||
|
|Duration+|_
|
||||||
|
Default
|
||||||
|
``"lin=1"``
|
||||||
|
Example
|
||||||
|
* ``"30-50"``
|
||||||
|
* ``"exp=40"``
|
||||||
|
* ``"lin:20=30-60"``
|
||||||
|
Description
|
||||||
|
Number of seconds to sleep before
|
||||||
|
`retrying <extractor.*.retries_>`__
|
||||||
|
an HTTP request.
|
||||||
|
|
||||||
|
If this is a ``string``, its |Duration|_ value can be prefixed with
|
||||||
|
``lin[:START[:MAX]]`` for `linear` or
|
||||||
|
``exp[:BASE[:START[:MAX]]]`` for `exponential` growth.
|
||||||
|
Note
|
||||||
|
| ``lin`` and ``exp`` can be any starting characters of
|
||||||
|
``linear`` and ``exponential``.
|
||||||
|
| For example ``l``, ``li``, ``lin``, ``line``, ``linea``, or ``linear``.
|
||||||
|
|
||||||
|
|
||||||
extractor.*.sleep-429
|
extractor.*.sleep-429
|
||||||
---------------------
|
---------------------
|
||||||
Type
|
Type
|
||||||
@@ -545,16 +569,16 @@ Default
|
|||||||
``60``
|
``60``
|
||||||
Example
|
Example
|
||||||
* ``"30-50"``
|
* ``"30-50"``
|
||||||
* ``"exp=40"``
|
* ``"e=40"``
|
||||||
* ``"lin:20=30-60"``
|
* ``"linear:20=30-60"``
|
||||||
Description
|
Description
|
||||||
Number of seconds to sleep when receiving a
|
Number of seconds to sleep when receiving a
|
||||||
`429 Too Many Requests <https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/429>`__
|
`429 Too Many Requests <https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/429>`__
|
||||||
response before `retrying <extractor.*.retries_>`__ the request.
|
response before `retrying <extractor.*.retries_>`__ the request.
|
||||||
|
|
||||||
If this is a ``string``, its |Duration|_ value can be prefixed with
|
If this is a ``string``, its |Duration|_ value can be prefixed with
|
||||||
``lin[:START[:MAX]]`` or ``exp[:BASE[:START[:MAX]]]``
|
``lin[:START[:MAX]]`` for `linear` or
|
||||||
for `linear` or `exponential` backoff respectively.
|
``exp[:BASE[:START[:MAX]]]`` for `exponential` backoff.
|
||||||
|
|
||||||
|
|
||||||
extractor.*.sleep-request
|
extractor.*.sleep-request
|
||||||
|
|||||||
@@ -84,6 +84,7 @@
|
|||||||
"sleep-skip" : 0,
|
"sleep-skip" : 0,
|
||||||
"sleep-request" : 0,
|
"sleep-request" : 0,
|
||||||
"sleep-extractor": 0,
|
"sleep-extractor": 0,
|
||||||
|
"sleep-retries" : "lin=1",
|
||||||
"sleep-429" : 60.0,
|
"sleep-429" : 60.0,
|
||||||
|
|
||||||
"actions": [],
|
"actions": [],
|
||||||
|
|||||||
@@ -126,12 +126,15 @@
|
|||||||
extraction for an input URL
|
extraction for an input URL
|
||||||
--sleep-request SECONDS Number of seconds to wait between HTTP requests
|
--sleep-request SECONDS Number of seconds to wait between HTTP requests
|
||||||
during data extraction
|
during data extraction
|
||||||
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
|
--sleep-retries [TYPE=]SECONDS
|
||||||
Too Many Requests' response. Can be prefixed
|
Number of seconds to wait before retrying an
|
||||||
with 'lin[:START[:MAX]]' or
|
HTTP request. Can be prefixed with
|
||||||
|
'lin[:START[:MAX]]' or
|
||||||
'exp[:BASE[:START[:MAX]]]' for linear or
|
'exp[:BASE[:START[:MAX]]]' for linear or
|
||||||
exponential growth (e.g. '30', 'exp=40',
|
exponential growth between consecutive retries
|
||||||
'lin:20=30-60'
|
(e.g. '30', 'exp=40', 'lin:20=30-60'
|
||||||
|
--sleep-429 [TYPE=]SECONDS Number of seconds to wait when receiving a '429
|
||||||
|
Too Many Requests' response
|
||||||
|
|
||||||
## Configuration Options:
|
## Configuration Options:
|
||||||
-o, --option KEY=VALUE Additional options. Example: -o browser=firefox
|
-o, --option KEY=VALUE Additional options. Example: -o browser=firefox
|
||||||
|
|||||||
@@ -184,8 +184,8 @@ class Extractor():
|
|||||||
response = challenge = None
|
response = challenge = None
|
||||||
tries = 1
|
tries = 1
|
||||||
|
|
||||||
if self._interval and interval:
|
if self._interval_request is not None and interval:
|
||||||
seconds = (self._interval() -
|
seconds = (self._interval_request() -
|
||||||
(time.time() - Extractor.request_timestamp))
|
(time.time() - Extractor.request_timestamp))
|
||||||
if seconds > 0.0:
|
if seconds > 0.0:
|
||||||
self.sleep(seconds, "request")
|
self.sleep(seconds, "request")
|
||||||
@@ -251,9 +251,9 @@ class Extractor():
|
|||||||
if tries > retries:
|
if tries > retries:
|
||||||
break
|
break
|
||||||
|
|
||||||
seconds = tries
|
seconds = self._interval_retry(tries)
|
||||||
if self._interval:
|
if self._interval_request is not None:
|
||||||
s = self._interval()
|
s = self._interval_request()
|
||||||
if seconds < s:
|
if seconds < s:
|
||||||
seconds = s
|
seconds = s
|
||||||
if code == 429 and self._interval_429 is not None:
|
if code == 429 and self._interval_429 is not None:
|
||||||
@@ -414,10 +414,27 @@ class Extractor():
|
|||||||
self._timeout = self.config("timeout", 30)
|
self._timeout = self.config("timeout", 30)
|
||||||
self._verify = self.config("verify", True)
|
self._verify = self.config("verify", True)
|
||||||
self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
|
self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
|
||||||
self._interval = util.build_duration_func(
|
|
||||||
|
if self._retries < 0:
|
||||||
|
self._retries = float("inf")
|
||||||
|
if not self._retry_codes:
|
||||||
|
self._retry_codes = ()
|
||||||
|
|
||||||
|
self._interval_request = util.build_duration_func(
|
||||||
self.config("sleep-request", self.request_interval),
|
self.config("sleep-request", self.request_interval),
|
||||||
self.request_interval_min,
|
self.request_interval_min)
|
||||||
)
|
|
||||||
|
_interval_retry = self.config("sleep-retries")
|
||||||
|
if _interval_retry is None:
|
||||||
|
self._interval_retry = util.identity
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self._interval_retry = util.build_duration_func_ex(
|
||||||
|
_interval_retry)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.error("Invalid 'sleep-retry' value '%s' (%s: %s)",
|
||||||
|
_interval_retry, exc.__class__.__name__, exc)
|
||||||
|
self._interval_retry = util.identity
|
||||||
|
|
||||||
_interval_429 = self.config("sleep-429")
|
_interval_429 = self.config("sleep-429")
|
||||||
if _interval_429 is None:
|
if _interval_429 is None:
|
||||||
@@ -430,11 +447,6 @@ class Extractor():
|
|||||||
self._interval_429 = util.build_duration_func_ex(
|
self._interval_429 = util.build_duration_func_ex(
|
||||||
self.request_interval_429)
|
self.request_interval_429)
|
||||||
|
|
||||||
if self._retries < 0:
|
|
||||||
self._retries = float("inf")
|
|
||||||
if not self._retry_codes:
|
|
||||||
self._retry_codes = ()
|
|
||||||
|
|
||||||
def _init_session(self):
|
def _init_session(self):
|
||||||
self.session = session = requests.Session()
|
self.session = session = requests.Session()
|
||||||
headers = session.headers
|
headers = session.headers
|
||||||
|
|||||||
@@ -590,14 +590,20 @@ def build_parser():
|
|||||||
help=("Number of seconds to wait between HTTP requests "
|
help=("Number of seconds to wait between HTTP requests "
|
||||||
"during data extraction"),
|
"during data extraction"),
|
||||||
)
|
)
|
||||||
|
sleep.add_argument(
|
||||||
|
"--sleep-retries",
|
||||||
|
dest="sleep-retries", metavar="[TYPE=]SECONDS", action=ConfigAction,
|
||||||
|
help=("Number of seconds to wait before retrying an HTTP request. "
|
||||||
|
"Can be prefixed with "
|
||||||
|
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
|
||||||
|
"for linear or exponential growth between consecutive retries "
|
||||||
|
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
|
||||||
|
)
|
||||||
sleep.add_argument(
|
sleep.add_argument(
|
||||||
"--sleep-429",
|
"--sleep-429",
|
||||||
dest="sleep-429", metavar="[TYPE=]SECONDS", action=ConfigAction,
|
dest="sleep-429", metavar="[TYPE=]SECONDS", action=ConfigAction,
|
||||||
help=("Number of seconds to wait when receiving a "
|
help=("Number of seconds to wait when receiving a "
|
||||||
"'429 Too Many Requests' response. Can be prefixed with "
|
"'429 Too Many Requests' response"),
|
||||||
"'lin[:START[:MAX]]' or 'exp[:BASE[:START[:MAX]]]' "
|
|
||||||
"for linear or exponential growth "
|
|
||||||
"(e.g. '30', 'exp=40', 'lin:20=30-60'"),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
configuration = parser.add_argument_group("Configuration Options")
|
configuration = parser.add_argument_group("Configuration Options")
|
||||||
|
|||||||
Reference in New Issue
Block a user