add 'extractor.retry-codes' option (#3313)
do not retry 429 and 430 by default
This commit is contained in:
@@ -766,6 +766,25 @@ Description
|
||||
giving up, or ``-1`` for infinite retries.
|
||||
|
||||
|
||||
extractor.*.retry-codes
|
||||
-----------------------
|
||||
Type
|
||||
``list`` of ``integers``
|
||||
Example
|
||||
``[404, 429, 430]``
|
||||
Description
|
||||
Additional `HTTP response status codes <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status>`__
|
||||
to retry an HTTP request on.
|
||||
|
||||
``2xx`` codes (success responses) and
|
||||
``3xx`` codes (redirection messages)
|
||||
will never be retried and always count as success,
|
||||
regardless of this option.
|
||||
|
||||
``5xx`` codes (server error responses) will always be retried,
|
||||
regardless of this option.
|
||||
|
||||
|
||||
extractor.*.timeout
|
||||
-------------------
|
||||
Type
|
||||
@@ -3474,7 +3493,7 @@ downloader.http.retry-codes
|
||||
Type
|
||||
``list`` of ``integers``
|
||||
Default
|
||||
``[429]``
|
||||
`extractor.*.retry-codes`_
|
||||
Description
|
||||
Additional `HTTP response status codes <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status>`__
|
||||
to retry a download on.
|
||||
@@ -3483,7 +3502,7 @@ Description
|
||||
download) will never be retried and always count as success,
|
||||
regardless of this option.
|
||||
|
||||
Codes ``500`` - ``599`` (server error responses) will always be retried,
|
||||
``5xx`` codes (server error responses) will always be retried,
|
||||
regardless of this option.
|
||||
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ class HttpDownloader(DownloaderBase):
|
||||
self.minsize = self.config("filesize-min")
|
||||
self.maxsize = self.config("filesize-max")
|
||||
self.retries = self.config("retries", extractor._retries)
|
||||
self.retry_codes = self.config("retry-codes")
|
||||
self.retry_codes = self.config("retry-codes", extractor._retry_codes)
|
||||
self.timeout = self.config("timeout", extractor._timeout)
|
||||
self.verify = self.config("verify", extractor._verify)
|
||||
self.mtime = self.config("mtime", True)
|
||||
@@ -46,8 +46,6 @@ class HttpDownloader(DownloaderBase):
|
||||
|
||||
if self.retries < 0:
|
||||
self.retries = float("inf")
|
||||
if self.retry_codes is None:
|
||||
self.retry_codes = [429]
|
||||
if self.minsize:
|
||||
minsize = text.parse_bytes(self.minsize)
|
||||
if not minsize:
|
||||
@@ -104,7 +102,7 @@ class HttpDownloader(DownloaderBase):
|
||||
|
||||
codes = kwdict.get("_http_retry_codes")
|
||||
if codes:
|
||||
retry_codes = self.retry_codes.copy()
|
||||
retry_codes = list(self.retry_codes)
|
||||
retry_codes += codes
|
||||
else:
|
||||
retry_codes = self.retry_codes
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2022 Mike Fährmann
|
||||
# Copyright 2014-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -53,6 +53,7 @@ class Extractor():
|
||||
self._parentdir = ""
|
||||
|
||||
self._write_pages = self.config("write-pages", False)
|
||||
self._retry_codes = self.config("retry-codes")
|
||||
self._retries = self.config("retries", 4)
|
||||
self._timeout = self.config("timeout", 30)
|
||||
self._verify = self.config("verify", True)
|
||||
@@ -64,6 +65,8 @@ class Extractor():
|
||||
|
||||
if self._retries < 0:
|
||||
self._retries = float("inf")
|
||||
if not self._retry_codes:
|
||||
self._retry_codes = ()
|
||||
|
||||
self._init_session()
|
||||
self._init_cookies()
|
||||
@@ -103,12 +106,15 @@ class Extractor():
|
||||
values[:0] = config.accumulate((self.subcategory,), key, conf=conf)
|
||||
return values
|
||||
|
||||
def request(self, url, *, method="GET", session=None, retries=None,
|
||||
encoding=None, fatal=True, notfound=None, **kwargs):
|
||||
def request(self, url, *, method="GET", session=None,
|
||||
retries=None, retry_codes=None, encoding=None,
|
||||
fatal=True, notfound=None, **kwargs):
|
||||
if session is None:
|
||||
session = self.session
|
||||
if retries is None:
|
||||
retries = self._retries
|
||||
if retry_codes is None:
|
||||
retry_codes = self._retry_codes
|
||||
if "proxies" not in kwargs:
|
||||
kwargs["proxies"] = self._proxies
|
||||
if "timeout" not in kwargs:
|
||||
@@ -153,12 +159,12 @@ class Extractor():
|
||||
code in (403, 503):
|
||||
content = response.content
|
||||
if b"_cf_chl_opt" in content or b"jschl-answer" in content:
|
||||
self.log.warning("Cloudflare IUAM challenge")
|
||||
self.log.warning("Cloudflare challenge")
|
||||
break
|
||||
if b'name="captcha-bypass"' in content:
|
||||
self.log.warning("Cloudflare CAPTCHA")
|
||||
break
|
||||
if code < 500 and code != 429 and code != 430:
|
||||
if code not in retry_codes and code < 500:
|
||||
break
|
||||
|
||||
finally:
|
||||
|
||||
Reference in New Issue
Block a user