[apple] Refactor script (#213)

Make the script more readable, mostly by:

- using the endoflife.Product class,
- removing the unnecessary use of functions,
- a little bit of renaming.
This commit is contained in:
Marc Wrobel
2023-12-10 13:44:59 +01:00
committed by GitHub
parent 54191857d9
commit b122ed40fe
3 changed files with 57 additions and 51 deletions

View File

@@ -1,9 +1,14 @@
import logging
import re
from bs4 import BeautifulSoup
from common import http
from common import dates
from common import endoflife
"""Fetches and parses version and release date information from Apple's support website for macOS,
iOS, iPadOS, and watchOS. While all URLs are fetched once for performance reasons, the actual
parsing for each product is done in a separate loop for having easier-to-read logs."""
URLS = [
"https://support.apple.com/en-us/HT201222", # latest
"https://support.apple.com/kb/HT213078", # 2018-2019
@@ -21,70 +26,63 @@ URLS = [
# If you are changing these, please use
# https://gist.githubusercontent.com/captn3m0/e7cb1f4fc3c07a5da0296ebda2b33e15/raw/5747e42ad611ec9ffdb7a2d1c0e3946bb87ab6d7/apple.txt
# as your corpus to validate your changes
CONFIG = {
VERSION_PATTERNS = {
"macos": [
# This covers Sierra and beyond
r"^macOS[\D]+(?P<version>\d+(?:\.\d+)*)",
re.compile(r"^macOS[\D]+(?P<version>\d+(?:\.\d+)*)", re.MULTILINE),
# This covers Mavericks - El Capitan
r"OS\s+X\s[\w\s]+\sv?(?P<version>\d+(?:\.\d+)+)",
re.compile(r"OS\s+X\s[\w\s]+\sv?(?P<version>\d+(?:\.\d+)+)", re.MULTILINE),
# This covers even older versions (OS X)
r"^Mac\s+OS\s+X\s[\w\s]+\sv?(?P<version>\d{2}(?:\.\d+)+)",
re.compile(r"^Mac\s+OS\s+X\s[\w\s]+\sv?(?P<version>\d{2}(?:\.\d+)+)", re.MULTILINE),
],
"ios": [
r"iOS\s+(?P<version>\d+)",
r"iOS\s+(?P<version>\d+(?:)(?:\.\d+)+)",
r"iPhone\s+v?(?P<version>\d+(?:)(?:\.\d+)+)",
re.compile(r"iOS\s+(?P<version>\d+)", re.MULTILINE),
re.compile(r"iOS\s+(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
re.compile(r"iPhone\s+v?(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
],
"ipados": [
r"iPadOS\s+(?P<version>\d+)",
r"iPadOS\s+(?P<version>\d+(?:)(?:\.\d+)+)"
re.compile(r"iPadOS\s+(?P<version>\d+)", re.MULTILINE),
re.compile(r"iPadOS\s+(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
],
"watchos": [
r"watchOS\s+(?P<version>\d+)",
r"watchOS\s+(?P<version>\d+(?:)(?:\.\d+)+)"
re.compile(r"watchOS\s+(?P<version>\d+)", re.MULTILINE),
re.compile(r"watchOS\s+(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
],
}
def parse_date(date_str):
date_str = date_str.replace("Sept", "Sep")
return dates.parse_date(date_str)
DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b")
print("::group::apple")
versions_by_product = {k: {} for k in CONFIG.keys()}
for response in http.fetch_urls(URLS):
soup = BeautifulSoup(response.text, features="html5lib")
versions_table = soup.find(id="tableWraper")
versions_table = versions_table if versions_table else soup.find('table', class_="gb-table")
for row in versions_table.findAll("tr")[1:]:
cells = row.findAll("td")
version_text = cells[0].get_text().strip()
date_text = cells[2].get_text().strip()
date_match = re.search(r"\b\d+\s[A-Za-z]+\s\d+\b", date_text)
if not date_match:
print(f"{version_text}: {date_text} [IGNORED]")
continue
date = parse_date(date_match.group(0))
for product in CONFIG.keys():
versions = versions_by_product[product]
for version_regex in CONFIG[product]:
for version in re.findall(version_regex, version_text, re.MULTILINE):
if version not in versions:
versions[version] = date
print(f"{product}-{version}: {date}")
elif versions[version] > date:
versions[version] = date
print(f"{product}-{version}: {date} [UPDATED]")
else:
print(f"{product}-{version}: {date} [IGNORED]")
for k in CONFIG.keys():
versions = {v: d.strftime("%Y-%m-%d") for v, d in versions_by_product[k].items()}
endoflife.write_releases(k, versions)
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
print("::endgroup::")
for product_name in VERSION_PATTERNS.keys():
product = endoflife.Product(product_name)
print(f"::group::{product.name}")
for soup in soups:
versions_table = soup.find(id="tableWraper")
versions_table = versions_table if versions_table else soup.find('table', class_="gb-table")
for row in versions_table.findAll("tr")[1:]:
cells = row.findAll("td")
version_text = cells[0].get_text().strip()
date_text = cells[2].get_text().strip()
date_match = DATE_PATTERN.search(date_text)
if not date_match:
logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match")
continue
date = dates.parse_date(date_match.group(0))
for version_pattern in VERSION_PATTERNS[product.name]:
for version in version_pattern.findall(version_text):
if not product.has_version(version):
product.declare_version(version, date)
elif product.get_version_date(version) > date:
product.replace_version(version, date)
else:
logging.info(f"ignoring version {version} ({date}) for {product.name}")
product.write()
print("::endgroup::")

View File

@@ -42,6 +42,7 @@ def parse_datetime(text, formats=frozenset([
"""
# so that we don't have to deal with some special characters in formats
text = text.strip().replace(", ", " ").replace(". ", " ").replace("(", "").replace(")", "")
text = text.replace("Sept ", "Sep ") # common typo, for ex. on Apple and Artifactory products
for fmt in formats:
try:
date = datetime.strptime(text, fmt)

View File

@@ -43,6 +43,13 @@ class Product:
for (version, date) in dates_by_version.items():
self.declare_version(version, date)
def replace_version(self, version: str, date: datetime) -> None:
if version not in self.versions:
raise ValueError(f"version {version} cannot be replaced as it does not exist for {self.name}")
logging.info(f"replacing version {version} ({self.versions[version]} -> {date}) to {self.name}")
self.versions[version] = date
def remove_version(self, version: str) -> None:
if not self.has_version(version):
logging.warning(f"version {version} cannot be removed as it does not exist for {self.name}")