diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 7021efb1..3a1249ba 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -24,7 +24,7 @@ jobs: with: format: YYYY-ww # 2022-01 to 2022-52 for eg - - name: Cache fetched repositories + - name: Cache fetched repositories and HTTP requests uses: actions/cache@v4 with: path: ~/.cache diff --git a/src/apple.py b/src/apple.py index 93d6c818..71fb4218 100644 --- a/src/apple.py +++ b/src/apple.py @@ -25,7 +25,7 @@ DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b") for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: # URLs are cached to avoid rate limiting by support.apple.com. - soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS, cache=True)] + soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)] for soup in soups: versions_table = soup.find(id="tableWraper") diff --git a/src/common/http.py b/src/common/http.py index 166c68b9..08084e33 100644 --- a/src/common/http.py +++ b/src/common/http.py @@ -20,13 +20,13 @@ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/1 def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None, - max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30, - cache: bool = False) -> list[Response]: + max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]: logging.info(f"Fetching {urls}") try: - underlying_session = CachedSession('/tmp/http_cache', backend='filesystem') if cache else None - with FuturesSession(session=underlying_session) as session: + # Respect Cache-Control header if available, and allow stale responses in case of errors. + cache_session = CachedSession('~/.cache/http', backend='filesystem', cache_control=True, stale_if_error=True) + with FuturesSession(session=cache_session) as session: adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor)) session.mount('http://', adapter) session.mount('https://', adapter)