From c881548a27f94cbe76d4e6d050ad2b78a786a5ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 14 Jan 2023 17:16:18 +0100 Subject: [PATCH] add 'extractor.retry-codes' option (#3313) do not retry 429 and 430 by default --- docs/configuration.rst | 23 +++++++++++++++++++++-- gallery_dl/downloader/http.py | 6 ++---- gallery_dl/extractor/common.py | 16 +++++++++++----- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 422cdee3..c71e1e04 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -766,6 +766,25 @@ Description giving up, or ``-1`` for infinite retries. +extractor.*.retry-codes +----------------------- +Type + ``list`` of ``integers`` +Example + ``[404, 429, 430]`` +Description + Additional `HTTP response status codes `__ + to retry an HTTP request on. + + ``2xx`` codes (success responses) and + ``3xx`` codes (redirection messages) + will never be retried and always count as success, + regardless of this option. + + ``5xx`` codes (server error responses) will always be retried, + regardless of this option. + + extractor.*.timeout ------------------- Type @@ -3474,7 +3493,7 @@ downloader.http.retry-codes Type ``list`` of ``integers`` Default - ``[429]`` + `extractor.*.retry-codes`_ Description Additional `HTTP response status codes `__ to retry a download on. @@ -3483,7 +3502,7 @@ Description download) will never be retried and always count as success, regardless of this option. - Codes ``500`` - ``599`` (server error responses) will always be retried, + ``5xx`` codes (server error responses) will always be retried, regardless of this option. diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 0bf19c2e..d2525e48 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -38,7 +38,7 @@ class HttpDownloader(DownloaderBase): self.minsize = self.config("filesize-min") self.maxsize = self.config("filesize-max") self.retries = self.config("retries", extractor._retries) - self.retry_codes = self.config("retry-codes") + self.retry_codes = self.config("retry-codes", extractor._retry_codes) self.timeout = self.config("timeout", extractor._timeout) self.verify = self.config("verify", extractor._verify) self.mtime = self.config("mtime", True) @@ -46,8 +46,6 @@ class HttpDownloader(DownloaderBase): if self.retries < 0: self.retries = float("inf") - if self.retry_codes is None: - self.retry_codes = [429] if self.minsize: minsize = text.parse_bytes(self.minsize) if not minsize: @@ -104,7 +102,7 @@ class HttpDownloader(DownloaderBase): codes = kwdict.get("_http_retry_codes") if codes: - retry_codes = self.retry_codes.copy() + retry_codes = list(self.retry_codes) retry_codes += codes else: retry_codes = self.retry_codes diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index ad766dad..660f7dbf 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2022 Mike Fährmann +# Copyright 2014-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -53,6 +53,7 @@ class Extractor(): self._parentdir = "" self._write_pages = self.config("write-pages", False) + self._retry_codes = self.config("retry-codes") self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) @@ -64,6 +65,8 @@ class Extractor(): if self._retries < 0: self._retries = float("inf") + if not self._retry_codes: + self._retry_codes = () self._init_session() self._init_cookies() @@ -103,12 +106,15 @@ class Extractor(): values[:0] = config.accumulate((self.subcategory,), key, conf=conf) return values - def request(self, url, *, method="GET", session=None, retries=None, - encoding=None, fatal=True, notfound=None, **kwargs): + def request(self, url, *, method="GET", session=None, + retries=None, retry_codes=None, encoding=None, + fatal=True, notfound=None, **kwargs): if session is None: session = self.session if retries is None: retries = self._retries + if retry_codes is None: + retry_codes = self._retry_codes if "proxies" not in kwargs: kwargs["proxies"] = self._proxies if "timeout" not in kwargs: @@ -153,12 +159,12 @@ class Extractor(): code in (403, 503): content = response.content if b"_cf_chl_opt" in content or b"jschl-answer" in content: - self.log.warning("Cloudflare IUAM challenge") + self.log.warning("Cloudflare challenge") break if b'name="captcha-bypass"' in content: self.log.warning("Cloudflare CAPTCHA") break - if code < 500 and code != 429 and code != 430: + if code not in retry_codes and code < 500: break finally: