Respect HTTP Cache-Control header if available (#450)

Allow stale responses in case of error in order to reduce the number of temporary errors.

Note that the cache will be reset on the first build of every week in any cases, see .github/workflows/update.yml.
This commit is contained in:
Marc Wrobel
2025-06-28 12:57:41 +02:00
committed by GitHub
parent e288cb3e8f
commit 3beb0f6e34
3 changed files with 6 additions and 6 deletions

View File

@@ -24,7 +24,7 @@ jobs:
with:
format: YYYY-ww # 2022-01 to 2022-52 for eg
- name: Cache fetched repositories
- name: Cache fetched repositories and HTTP requests
uses: actions/cache@v4
with:
path: ~/.cache

View File

@@ -25,7 +25,7 @@ DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b")
for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
# URLs are cached to avoid rate limiting by support.apple.com.
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS, cache=True)]
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
for soup in soups:
versions_table = soup.find(id="tableWraper")

View File

@@ -20,13 +20,13 @@ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/1
def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30,
cache: bool = False) -> list[Response]:
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]:
logging.info(f"Fetching {urls}")
try:
underlying_session = CachedSession('/tmp/http_cache', backend='filesystem') if cache else None
with FuturesSession(session=underlying_session) as session:
# Respect Cache-Control header if available, and allow stale responses in case of errors.
cache_session = CachedSession('~/.cache/http', backend='filesystem', cache_control=True, stale_if_error=True)
with FuturesSession(session=cache_session) as session:
adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor))
session.mount('http://', adapter)
session.mount('https://', adapter)