Respect HTTP Cache-Control header if available (#450)
Allow stale responses in case of error in order to reduce the number of temporary errors. Note that the cache will be reset on the first build of every week in any cases, see .github/workflows/update.yml.
This commit is contained in:
2
.github/workflows/update.yml
vendored
2
.github/workflows/update.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
with:
|
||||
format: YYYY-ww # 2022-01 to 2022-52 for eg
|
||||
|
||||
- name: Cache fetched repositories
|
||||
- name: Cache fetched repositories and HTTP requests
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache
|
||||
|
||||
@@ -25,7 +25,7 @@ DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b")
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
# URLs are cached to avoid rate limiting by support.apple.com.
|
||||
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS, cache=True)]
|
||||
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
|
||||
|
||||
for soup in soups:
|
||||
versions_table = soup.find(id="tableWraper")
|
||||
|
||||
@@ -20,13 +20,13 @@ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/1
|
||||
|
||||
|
||||
def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None,
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30,
|
||||
cache: bool = False) -> list[Response]:
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]:
|
||||
logging.info(f"Fetching {urls}")
|
||||
|
||||
try:
|
||||
underlying_session = CachedSession('/tmp/http_cache', backend='filesystem') if cache else None
|
||||
with FuturesSession(session=underlying_session) as session:
|
||||
# Respect Cache-Control header if available, and allow stale responses in case of errors.
|
||||
cache_session = CachedSession('~/.cache/http', backend='filesystem', cache_control=True, stale_if_error=True)
|
||||
with FuturesSession(session=cache_session) as session:
|
||||
adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor))
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
|
||||
Reference in New Issue
Block a user