diff --git a/src/apple.py b/src/apple.py index bf397d33..687eef1e 100644 --- a/src/apple.py +++ b/src/apple.py @@ -1,9 +1,14 @@ +import logging import re from bs4 import BeautifulSoup from common import http from common import dates from common import endoflife +"""Fetches and parses version and release date information from Apple's support website for macOS, +iOS, iPadOS, and watchOS. While all URLs are fetched once for performance reasons, the actual +parsing for each product is done in a separate loop for having easier-to-read logs.""" + URLS = [ "https://support.apple.com/en-us/HT201222", # latest "https://support.apple.com/kb/HT213078", # 2018-2019 @@ -21,70 +26,63 @@ URLS = [ # If you are changing these, please use # https://gist.githubusercontent.com/captn3m0/e7cb1f4fc3c07a5da0296ebda2b33e15/raw/5747e42ad611ec9ffdb7a2d1c0e3946bb87ab6d7/apple.txt # as your corpus to validate your changes -CONFIG = { +VERSION_PATTERNS = { "macos": [ # This covers Sierra and beyond - r"^macOS[\D]+(?P\d+(?:\.\d+)*)", + re.compile(r"^macOS[\D]+(?P\d+(?:\.\d+)*)", re.MULTILINE), # This covers Mavericks - El Capitan - r"OS\s+X\s[\w\s]+\sv?(?P\d+(?:\.\d+)+)", + re.compile(r"OS\s+X\s[\w\s]+\sv?(?P\d+(?:\.\d+)+)", re.MULTILINE), # This covers even older versions (OS X) - r"^Mac\s+OS\s+X\s[\w\s]+\sv?(?P\d{2}(?:\.\d+)+)", + re.compile(r"^Mac\s+OS\s+X\s[\w\s]+\sv?(?P\d{2}(?:\.\d+)+)", re.MULTILINE), ], "ios": [ - r"iOS\s+(?P\d+)", - r"iOS\s+(?P\d+(?:)(?:\.\d+)+)", - r"iPhone\s+v?(?P\d+(?:)(?:\.\d+)+)", + re.compile(r"iOS\s+(?P\d+)", re.MULTILINE), + re.compile(r"iOS\s+(?P\d+(?:)(?:\.\d+)+)", re.MULTILINE), + re.compile(r"iPhone\s+v?(?P\d+(?:)(?:\.\d+)+)", re.MULTILINE), ], "ipados": [ - r"iPadOS\s+(?P\d+)", - r"iPadOS\s+(?P\d+(?:)(?:\.\d+)+)" + re.compile(r"iPadOS\s+(?P\d+)", re.MULTILINE), + re.compile(r"iPadOS\s+(?P\d+(?:)(?:\.\d+)+)", re.MULTILINE), ], "watchos": [ - r"watchOS\s+(?P\d+)", - r"watchOS\s+(?P\d+(?:)(?:\.\d+)+)" + re.compile(r"watchOS\s+(?P\d+)", re.MULTILINE), + re.compile(r"watchOS\s+(?P\d+(?:)(?:\.\d+)+)", re.MULTILINE), ], } - -def parse_date(date_str): - date_str = date_str.replace("Sept", "Sep") - return dates.parse_date(date_str) - +DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b") print("::group::apple") -versions_by_product = {k: {} for k in CONFIG.keys()} - -for response in http.fetch_urls(URLS): - soup = BeautifulSoup(response.text, features="html5lib") - versions_table = soup.find(id="tableWraper") - versions_table = versions_table if versions_table else soup.find('table', class_="gb-table") - - for row in versions_table.findAll("tr")[1:]: - cells = row.findAll("td") - version_text = cells[0].get_text().strip() - date_text = cells[2].get_text().strip() - - date_match = re.search(r"\b\d+\s[A-Za-z]+\s\d+\b", date_text) - if not date_match: - print(f"{version_text}: {date_text} [IGNORED]") - continue - - date = parse_date(date_match.group(0)) - for product in CONFIG.keys(): - versions = versions_by_product[product] - - for version_regex in CONFIG[product]: - for version in re.findall(version_regex, version_text, re.MULTILINE): - if version not in versions: - versions[version] = date - print(f"{product}-{version}: {date}") - elif versions[version] > date: - versions[version] = date - print(f"{product}-{version}: {date} [UPDATED]") - else: - print(f"{product}-{version}: {date} [IGNORED]") - -for k in CONFIG.keys(): - versions = {v: d.strftime("%Y-%m-%d") for v, d in versions_by_product[k].items()} - endoflife.write_releases(k, versions) +soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)] print("::endgroup::") + +for product_name in VERSION_PATTERNS.keys(): + product = endoflife.Product(product_name) + print(f"::group::{product.name}") + + for soup in soups: + versions_table = soup.find(id="tableWraper") + versions_table = versions_table if versions_table else soup.find('table', class_="gb-table") + + for row in versions_table.findAll("tr")[1:]: + cells = row.findAll("td") + version_text = cells[0].get_text().strip() + date_text = cells[2].get_text().strip() + + date_match = DATE_PATTERN.search(date_text) + if not date_match: + logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match") + continue + + date = dates.parse_date(date_match.group(0)) + for version_pattern in VERSION_PATTERNS[product.name]: + for version in version_pattern.findall(version_text): + if not product.has_version(version): + product.declare_version(version, date) + elif product.get_version_date(version) > date: + product.replace_version(version, date) + else: + logging.info(f"ignoring version {version} ({date}) for {product.name}") + + product.write() + print("::endgroup::") diff --git a/src/common/dates.py b/src/common/dates.py index 35f13f4c..f60e05b0 100644 --- a/src/common/dates.py +++ b/src/common/dates.py @@ -42,6 +42,7 @@ def parse_datetime(text, formats=frozenset([ """ # so that we don't have to deal with some special characters in formats text = text.strip().replace(", ", " ").replace(". ", " ").replace("(", "").replace(")", "") + text = text.replace("Sept ", "Sep ") # common typo, for ex. on Apple and Artifactory products for fmt in formats: try: date = datetime.strptime(text, fmt) diff --git a/src/common/endoflife.py b/src/common/endoflife.py index 302af198..d7d44a6c 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -43,6 +43,13 @@ class Product: for (version, date) in dates_by_version.items(): self.declare_version(version, date) + def replace_version(self, version: str, date: datetime) -> None: + if version not in self.versions: + raise ValueError(f"version {version} cannot be replaced as it does not exist for {self.name}") + + logging.info(f"replacing version {version} ({self.versions[version]} -> {date}) to {self.name}") + self.versions[version] = date + def remove_version(self, version: str) -> None: if not self.has_version(version): logging.warning(f"version {version} cannot be removed as it does not exist for {self.name}")