From 1e65a048b0fb14e0f41e63c524bcb57a5c12cee6 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Sun, 26 Nov 2023 19:00:32 +0100 Subject: [PATCH] Improve retry mechanism (#194) (#194) When a ChunkedEncodingError occurs, request and response are not set and there is no way to get the URL that causes the error. With this change all URLs are retried. The max_retries parameter is decreased each time so that we do not get stuck in an infinite loop. I also considered to also wait before retrying, but for now I don't see any benefit to it. Relates to #188. --- src/common/endoflife.py | 39 ++++++++++++++++----------------------- src/unity.py | 2 ++ 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/common/endoflife.py b/src/common/endoflife.py index 19a10c98..ff410a98 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -45,30 +45,23 @@ def list_products(method, products_filter=None, pathname="website/products") -> # Keep the default timeout high enough to avoid errors with web.archive.org. def fetch_urls(urls, data=None, headers=None, max_retries=10, backoff_factor=0.5, timeout=30) -> list[Response]: - adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor)) - session = FuturesSession() - session.mount('http://', adapter) - session.mount('https://', adapter) - - headers = {'User-Agent': USER_AGENT} | ({} if headers is None else headers) - futures = [session.get(url, headers=headers, data=data, timeout=timeout) for url in urls] - - return [result_or_retry(future) for future in as_completed(futures)] - - -def result_or_retry(future) -> Response: - """Return the future's result or retry the request if there is an error. - This may lead to an infinite loop, but let's try it for now. - """ try: - return future.result() - except ChunkedEncodingError as e: - # Intermittent ChunkedEncodingErrors occurs while fetching URLs. This change try to fix it by retrying. - # According to https://stackoverflow.com/a/44511691/374236, most servers transmit all data, but that's not - # what was observed. - url = e.response.url - print(f"Got ChunkedEncodingError while fetching {url}, retrying...") - return fetch_urls([url], e.request.body, e.request.headers)[0] + with FuturesSession() as session: + adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor)) + session.mount('http://', adapter) + session.mount('https://', adapter) + + headers = {'User-Agent': USER_AGENT} | ({} if headers is None else headers) + futures = [session.get(url, headers=headers, data=data, timeout=timeout, stream=None) for url in urls] + return [future.result() for future in as_completed(futures)] + except ChunkedEncodingError as e: # See https://github.com/psf/requests/issues/4771#issue-354077499 + next_max_retries = max_retries - 1 + if next_max_retries == 0: + raise e # So that the function does not get stuck in an infinite loop. + else: + # We could wait a bit before retrying, but it's not clear if it would help. + print(f"Got ChunkedEncodingError while fetching {urls} ({e}), retrying (remaining retries = {next_max_retries}).") + return fetch_urls(urls, data, headers, next_max_retries, backoff_factor, timeout) def fetch_url(url, data=None, headers=None, max_retries=5, backoff_factor=0.5, timeout=30) -> str: diff --git a/src/unity.py b/src/unity.py index ed422c90..fc648d74 100644 --- a/src/unity.py +++ b/src/unity.py @@ -33,6 +33,8 @@ print(f"::group::{PRODUCT}") all_versions = {} next_page_url = URL +# Do not try to fetch multiple pages in parallel: it is raising a lot of ChunkedEncodingErrors and +# make the overall process slower. while next_page_url: next_page_url = fetch_releases(all_versions, next_page_url)