[apple] Adapt script for single-product usage (#317)

Make the Apple script compatible with the way update.py now works, which is 'product' oriented, meaning the script will be called once for each product.

To minimize the impacts the responses are now cached to avoid rate-limiting by support.apple.com.

Version patterns have also been moved to product's auto configuration to make future changes simpler.
This commit is contained in:
Marc Wrobel
2024-02-21 00:01:25 +01:00
parent b11f01bc62
commit 2d5145444b
4 changed files with 20 additions and 39 deletions

View File

@@ -1,12 +1,11 @@
import logging
import re
import sys
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, endoflife, http, releasedata
"""Fetches and parses version and release date information from Apple's support website for macOS,
iOS, iPadOS, and watchOS. While all URLs are fetched once for performance reasons, the actual
parsing for each product is done in a separate loop for having easier-to-read logs."""
"""Fetches and parses version and release date information from Apple's support website."""
URLS = [
"https://support.apple.com/en-us/HT201222", # latest
@@ -22,38 +21,16 @@ URLS = [
"http://web.archive.org/web/20230204234533_/https://support.apple.com/en-us/HT1263", # 2005-2007
]
# If you are changing these, please use
# https://gist.githubusercontent.com/captn3m0/e7cb1f4fc3c07a5da0296ebda2b33e15/raw/5747e42ad611ec9ffdb7a2d1c0e3946bb87ab6d7/apple.txt
# as your corpus to validate your changes
VERSION_PATTERNS = {
"macos": [
# This covers Sierra and beyond
re.compile(r"^macOS[\D]+(?P<version>\d+(?:\.\d+)*)", re.MULTILINE),
# This covers Mavericks - El Capitan
re.compile(r"OS\s+X\s[\w\s]+\sv?(?P<version>\d+(?:\.\d+)+)", re.MULTILINE),
# This covers even older versions (OS X)
re.compile(r"^Mac\s+OS\s+X\s[\w\s]+\sv?(?P<version>\d{2}(?:\.\d+)+)", re.MULTILINE),
],
"ios": [
re.compile(r"iOS\s+(?P<version>\d+)", re.MULTILINE),
re.compile(r"iOS\s+(?P<version>\d+(?:\.\d+)+)", re.MULTILINE),
re.compile(r"iPhone\s+v?(?P<version>\d+(?:\.\d+)+)", re.MULTILINE),
],
"ipados": [
re.compile(r"iPadOS\s+(?P<version>\d+)", re.MULTILINE),
re.compile(r"iPadOS\s+(?P<version>\d+(?:\.\d+)+)", re.MULTILINE),
],
"watchos": [
re.compile(r"watchOS\s+(?P<version>\d+)", re.MULTILINE),
re.compile(r"watchOS\s+(?P<version>\d+(?:\.\d+)+)", re.MULTILINE),
],
}
DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b")
METHOD = 'apple'
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
# URLs are cached to avoid rate limiting by support.apple.com.
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS, cache=True)]
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
for product_name in VERSION_PATTERNS:
with releasedata.ProductData(product_name) as product_data:
for soup in soups:
versions_table = soup.find(id="tableWraper")
versions_table = versions_table if versions_table else soup.find('table', class_="gb-table")
@@ -70,7 +47,7 @@ for product_name in VERSION_PATTERNS:
date_str = date_match.group(0).replace("Sept ", "Sep ")
date = dates.parse_date(date_str)
for version_pattern in VERSION_PATTERNS[product_data.name]:
for version_pattern in config.include_version_patterns:
for version_str in version_pattern.findall(version_text):
version = product_data.get_version(version_str)
if not version or version.date() > date:

View File

@@ -29,11 +29,11 @@ class AutoConfig:
regexes_include = data.get("regex", DEFAULT_VERSION_REGEX)
regexes_include = regexes_include if isinstance(regexes_include, list) else [regexes_include]
self.include_version_patterns = [re.compile(r) for r in regexes_include]
self.include_version_patterns = [re.compile(r, re.MULTILINE) for r in regexes_include]
regexes_exclude = data.get("regex_exclude", [])
regexes_exclude = regexes_exclude if isinstance(regexes_exclude, list) else [regexes_exclude]
self.exclude_version_patterns = [re.compile(r) for r in regexes_exclude]
self.exclude_version_patterns = [re.compile(r, re.MULTILINE) for r in regexes_exclude]
def first_match(self, version: str) -> re.Match | None:
for exclude_pattern in self.exclude_version_patterns:

View File

@@ -5,6 +5,7 @@ from playwright.sync_api import sync_playwright
from requests import Response
from requests.adapters import HTTPAdapter
from requests.exceptions import ChunkedEncodingError
from requests_cache import CachedSession
from requests_futures.sessions import FuturesSession
from urllib3.util import Retry
@@ -13,11 +14,13 @@ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/1
def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]:
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30,
cache: bool = False) -> list[Response]:
logging.info(f"Fetching {urls}")
try:
with FuturesSession() as session:
underlying_session = CachedSession('/tmp/http_cache', backend='filesystem') if cache else None
with FuturesSession(session=underlying_session) as session:
adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor))
session.mount('http://', adapter)
session.mount('https://', adapter)