From 3beb0f6e341091b8a32cccabc2500c84209cc3f7 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Sat, 28 Jun 2025 12:57:41 +0200 Subject: [PATCH] Respect HTTP Cache-Control header if available (#450) Allow stale responses in case of error in order to reduce the number of temporary errors. Note that the cache will be reset on the first build of every week in any cases, see .github/workflows/update.yml. --- .github/workflows/update.yml | 2 +- src/apple.py | 2 +- src/common/http.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 7021efb1..3a1249ba 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -24,7 +24,7 @@ jobs: with: format: YYYY-ww # 2022-01 to 2022-52 for eg - - name: Cache fetched repositories + - name: Cache fetched repositories and HTTP requests uses: actions/cache@v4 with: path: ~/.cache diff --git a/src/apple.py b/src/apple.py index 93d6c818..71fb4218 100644 --- a/src/apple.py +++ b/src/apple.py @@ -25,7 +25,7 @@ DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b") for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: # URLs are cached to avoid rate limiting by support.apple.com. - soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS, cache=True)] + soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)] for soup in soups: versions_table = soup.find(id="tableWraper") diff --git a/src/common/http.py b/src/common/http.py index 166c68b9..08084e33 100644 --- a/src/common/http.py +++ b/src/common/http.py @@ -20,13 +20,13 @@ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/1 def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None, - max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30, - cache: bool = False) -> list[Response]: + max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]: logging.info(f"Fetching {urls}") try: - underlying_session = CachedSession('/tmp/http_cache', backend='filesystem') if cache else None - with FuturesSession(session=underlying_session) as session: + # Respect Cache-Control header if available, and allow stale responses in case of errors. + cache_session = CachedSession('~/.cache/http', backend='filesystem', cache_control=True, stale_if_error=True) + with FuturesSession(session=cache_session) as session: adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor)) session.mount('http://', adapter) session.mount('https://', adapter)