From df77271438238afac534fe48e23938a7f45289b6 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Thu, 9 Mar 2023 20:55:28 +0800 Subject: [PATCH 1/5] [downloader:http] add 'consume-content' option * fix connection not being released when the response is neither successful nor retried * add the ability to consume the HTTP response body instead of closing the connection reference: https://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow --- docs/configuration.rst | 19 +++++++++++++++++++ gallery_dl/downloader/http.py | 17 ++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index fbb0416b..c88f8eb1 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3616,6 +3616,25 @@ Description contains JPEG/JFIF data. +downloader.http.consume-content +--------------------------------- +Type + ``bool`` +Default + ``false`` +Description + Controls the behavior when an HTTP response is considered + unsuccessful + + If the value is ``true``, consume the response body. This + avoids closing the connection and therefore improves connection + reuse. + + If the value is ``false``, immediately close the connection + without reading the response. This can be useful if the server + is known to send large bodies for error responses. + + downloader.http.chunk-size -------------------------- Type diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index f14af249..30b59714 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -44,6 +44,12 @@ class HttpDownloader(DownloaderBase): self.mtime = self.config("mtime", True) self.rate = self.config("rate") + if not self.config("consume-content", False): + # this resets the underlying TCP connection, and therefore + # if the program makes another request to the same domain, + # a new connection (either TLS or plain TCP) must be made + self.release_conn = lambda resp: resp.close() + if self.retries < 0: self.retries = float("inf") if self.minsize: @@ -113,7 +119,7 @@ class HttpDownloader(DownloaderBase): while True: if tries: if response: - response.close() + self.release_conn(response) response = None self.log.warning("%s (%s/%s)", msg, tries, self.retries+1) if tries > self.retries: @@ -170,6 +176,7 @@ class HttpDownloader(DownloaderBase): if code in retry_codes or 500 <= code < 600: continue self.log.warning(msg) + self.release_conn(response) return False # check for invalid responses @@ -182,6 +189,7 @@ class HttpDownloader(DownloaderBase): continue if not result: self.log.warning("Invalid response") + self.release_conn(response) return False # check file size @@ -191,11 +199,13 @@ class HttpDownloader(DownloaderBase): self.log.warning( "File size smaller than allowed minimum (%s < %s)", size, self.minsize) + self.release_conn(response) return False if self.maxsize and size > self.maxsize: self.log.warning( "File size larger than allowed maximum (%s > %s)", size, self.maxsize) + self.release_conn(response) return False build_path = False @@ -284,6 +294,11 @@ class HttpDownloader(DownloaderBase): return True + def release_conn(self, response): + """Release connection back to pool by consuming response body""" + for _ in response.iter_content(self.chunk_size): + pass + @staticmethod def receive(fp, content, bytes_total, bytes_start): write = fp.write From fcaeaf539cb913fa7c0e076e34e48f1e37ccf545 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Sat, 11 Mar 2023 21:36:37 +0800 Subject: [PATCH 2/5] [downloader:http] handle exceptions while consuming content --- docs/configuration.rst | 2 +- gallery_dl/downloader/http.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index c88f8eb1..a64322d8 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3617,7 +3617,7 @@ Description downloader.http.consume-content ---------------------------------- +------------------------------- Type ``bool`` Default diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 30b59714..59cd0ac0 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -296,8 +296,15 @@ class HttpDownloader(DownloaderBase): def release_conn(self, response): """Release connection back to pool by consuming response body""" - for _ in response.iter_content(self.chunk_size): - pass + try: + for _ in response.iter_content(self.chunk_size): + pass + except (RequestException, SSLError, OpenSSLError) as exc: + print() + self.log.debug( + "Unable to consume response body (%s); " + "closing the connection anyway", exc) + response.close() @staticmethod def receive(fp, content, bytes_total, bytes_start): From 1a977f0f62373cc53ef248f9ba901bbe43a01eb6 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Thu, 23 Mar 2023 19:57:13 +0800 Subject: [PATCH 3/5] [downloader:http] handle exceptions in 'validate' This isn't strictly necessary for 'exhentai.py', but it improves efficiency when the adapter is reused --- gallery_dl/downloader/http.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 59cd0ac0..b3f381ec 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -182,7 +182,11 @@ class HttpDownloader(DownloaderBase): # check for invalid responses validate = kwdict.get("_http_validate") if validate and self.validate: - result = validate(response) + try: + result = validate(response) + except Exception: + self.release_conn(response) + raise if isinstance(result, str): url = result tries -= 1 From 775d2ac9995d3efcffff6f696789677ee0f70e4e Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Fri, 31 Mar 2023 20:08:38 +0800 Subject: [PATCH 4/5] [downloader:http] improve error logging when releasing connection --- gallery_dl/downloader/http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index b3f381ec..d8708fba 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -306,8 +306,8 @@ class HttpDownloader(DownloaderBase): except (RequestException, SSLError, OpenSSLError) as exc: print() self.log.debug( - "Unable to consume response body (%s); " - "closing the connection anyway", exc) + "Unable to consume response body (%s: %s); " + "closing the connection anyway", exc.__class__.__name__, exc) response.close() @staticmethod From 6f4a843fbae3026f293d804f730dc1e778d66a8c Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Mon, 24 Apr 2023 23:59:36 +0800 Subject: [PATCH 5/5] [downloader:http] release connection before logging messages This allows connections to be properly released when using 'actions' feature. --- gallery_dl/downloader/http.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index d8708fba..434689f1 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -175,8 +175,8 @@ class HttpDownloader(DownloaderBase): msg = "'{} {}' for '{}'".format(code, response.reason, url) if code in retry_codes or 500 <= code < 600: continue - self.log.warning(msg) self.release_conn(response) + self.log.warning(msg) return False # check for invalid responses @@ -192,24 +192,24 @@ class HttpDownloader(DownloaderBase): tries -= 1 continue if not result: - self.log.warning("Invalid response") self.release_conn(response) + self.log.warning("Invalid response") return False # check file size size = text.parse_int(size, None) if size is not None: if self.minsize and size < self.minsize: + self.release_conn(response) self.log.warning( "File size smaller than allowed minimum (%s < %s)", size, self.minsize) - self.release_conn(response) return False if self.maxsize and size > self.maxsize: + self.release_conn(response) self.log.warning( "File size larger than allowed maximum (%s > %s)", size, self.maxsize) - self.release_conn(response) return False build_path = False