Respect HTTP Cache-Control header if available (#450)

Allow stale responses in case of error in order to reduce the number of temporary errors.

Note that the cache will be reset on the first build of every week in any cases, see .github/workflows/update.yml.
This commit is contained in:
Marc Wrobel
2025-06-28 12:57:41 +02:00
committed by GitHub
parent e288cb3e8f
commit 3beb0f6e34
3 changed files with 6 additions and 6 deletions

View File

@@ -20,13 +20,13 @@ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/1
def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30,
cache: bool = False) -> list[Response]:
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]:
logging.info(f"Fetching {urls}")
try:
underlying_session = CachedSession('/tmp/http_cache', backend='filesystem') if cache else None
with FuturesSession(session=underlying_session) as session:
# Respect Cache-Control header if available, and allow stale responses in case of errors.
cache_session = CachedSession('~/.cache/http', backend='filesystem', cache_control=True, stale_if_error=True)
with FuturesSession(session=cache_session) as session:
adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor))
session.mount('http://', adapter)
session.mount('https://', adapter)