[apple] Refactor script (#213)
Make the script more readable, mostly by: - using the endoflife.Product class, - removing the unnecessary use of functions, - a little bit of renaming.
This commit is contained in:
100
src/apple.py
100
src/apple.py
@@ -1,9 +1,14 @@
|
||||
import logging
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from common import http
|
||||
from common import dates
|
||||
from common import endoflife
|
||||
|
||||
"""Fetches and parses version and release date information from Apple's support website for macOS,
|
||||
iOS, iPadOS, and watchOS. While all URLs are fetched once for performance reasons, the actual
|
||||
parsing for each product is done in a separate loop for having easier-to-read logs."""
|
||||
|
||||
URLS = [
|
||||
"https://support.apple.com/en-us/HT201222", # latest
|
||||
"https://support.apple.com/kb/HT213078", # 2018-2019
|
||||
@@ -21,70 +26,63 @@ URLS = [
|
||||
# If you are changing these, please use
|
||||
# https://gist.githubusercontent.com/captn3m0/e7cb1f4fc3c07a5da0296ebda2b33e15/raw/5747e42ad611ec9ffdb7a2d1c0e3946bb87ab6d7/apple.txt
|
||||
# as your corpus to validate your changes
|
||||
CONFIG = {
|
||||
VERSION_PATTERNS = {
|
||||
"macos": [
|
||||
# This covers Sierra and beyond
|
||||
r"^macOS[\D]+(?P<version>\d+(?:\.\d+)*)",
|
||||
re.compile(r"^macOS[\D]+(?P<version>\d+(?:\.\d+)*)", re.MULTILINE),
|
||||
# This covers Mavericks - El Capitan
|
||||
r"OS\s+X\s[\w\s]+\sv?(?P<version>\d+(?:\.\d+)+)",
|
||||
re.compile(r"OS\s+X\s[\w\s]+\sv?(?P<version>\d+(?:\.\d+)+)", re.MULTILINE),
|
||||
# This covers even older versions (OS X)
|
||||
r"^Mac\s+OS\s+X\s[\w\s]+\sv?(?P<version>\d{2}(?:\.\d+)+)",
|
||||
re.compile(r"^Mac\s+OS\s+X\s[\w\s]+\sv?(?P<version>\d{2}(?:\.\d+)+)", re.MULTILINE),
|
||||
],
|
||||
"ios": [
|
||||
r"iOS\s+(?P<version>\d+)",
|
||||
r"iOS\s+(?P<version>\d+(?:)(?:\.\d+)+)",
|
||||
r"iPhone\s+v?(?P<version>\d+(?:)(?:\.\d+)+)",
|
||||
re.compile(r"iOS\s+(?P<version>\d+)", re.MULTILINE),
|
||||
re.compile(r"iOS\s+(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
|
||||
re.compile(r"iPhone\s+v?(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
|
||||
],
|
||||
"ipados": [
|
||||
r"iPadOS\s+(?P<version>\d+)",
|
||||
r"iPadOS\s+(?P<version>\d+(?:)(?:\.\d+)+)"
|
||||
re.compile(r"iPadOS\s+(?P<version>\d+)", re.MULTILINE),
|
||||
re.compile(r"iPadOS\s+(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
|
||||
],
|
||||
"watchos": [
|
||||
r"watchOS\s+(?P<version>\d+)",
|
||||
r"watchOS\s+(?P<version>\d+(?:)(?:\.\d+)+)"
|
||||
re.compile(r"watchOS\s+(?P<version>\d+)", re.MULTILINE),
|
||||
re.compile(r"watchOS\s+(?P<version>\d+(?:)(?:\.\d+)+)", re.MULTILINE),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def parse_date(date_str):
|
||||
date_str = date_str.replace("Sept", "Sep")
|
||||
return dates.parse_date(date_str)
|
||||
|
||||
DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b")
|
||||
|
||||
print("::group::apple")
|
||||
versions_by_product = {k: {} for k in CONFIG.keys()}
|
||||
|
||||
for response in http.fetch_urls(URLS):
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
versions_table = soup.find(id="tableWraper")
|
||||
versions_table = versions_table if versions_table else soup.find('table', class_="gb-table")
|
||||
|
||||
for row in versions_table.findAll("tr")[1:]:
|
||||
cells = row.findAll("td")
|
||||
version_text = cells[0].get_text().strip()
|
||||
date_text = cells[2].get_text().strip()
|
||||
|
||||
date_match = re.search(r"\b\d+\s[A-Za-z]+\s\d+\b", date_text)
|
||||
if not date_match:
|
||||
print(f"{version_text}: {date_text} [IGNORED]")
|
||||
continue
|
||||
|
||||
date = parse_date(date_match.group(0))
|
||||
for product in CONFIG.keys():
|
||||
versions = versions_by_product[product]
|
||||
|
||||
for version_regex in CONFIG[product]:
|
||||
for version in re.findall(version_regex, version_text, re.MULTILINE):
|
||||
if version not in versions:
|
||||
versions[version] = date
|
||||
print(f"{product}-{version}: {date}")
|
||||
elif versions[version] > date:
|
||||
versions[version] = date
|
||||
print(f"{product}-{version}: {date} [UPDATED]")
|
||||
else:
|
||||
print(f"{product}-{version}: {date} [IGNORED]")
|
||||
|
||||
for k in CONFIG.keys():
|
||||
versions = {v: d.strftime("%Y-%m-%d") for v, d in versions_by_product[k].items()}
|
||||
endoflife.write_releases(k, versions)
|
||||
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
|
||||
print("::endgroup::")
|
||||
|
||||
for product_name in VERSION_PATTERNS.keys():
|
||||
product = endoflife.Product(product_name)
|
||||
print(f"::group::{product.name}")
|
||||
|
||||
for soup in soups:
|
||||
versions_table = soup.find(id="tableWraper")
|
||||
versions_table = versions_table if versions_table else soup.find('table', class_="gb-table")
|
||||
|
||||
for row in versions_table.findAll("tr")[1:]:
|
||||
cells = row.findAll("td")
|
||||
version_text = cells[0].get_text().strip()
|
||||
date_text = cells[2].get_text().strip()
|
||||
|
||||
date_match = DATE_PATTERN.search(date_text)
|
||||
if not date_match:
|
||||
logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match")
|
||||
continue
|
||||
|
||||
date = dates.parse_date(date_match.group(0))
|
||||
for version_pattern in VERSION_PATTERNS[product.name]:
|
||||
for version in version_pattern.findall(version_text):
|
||||
if not product.has_version(version):
|
||||
product.declare_version(version, date)
|
||||
elif product.get_version_date(version) > date:
|
||||
product.replace_version(version, date)
|
||||
else:
|
||||
logging.info(f"ignoring version {version} ({date}) for {product.name}")
|
||||
|
||||
product.write()
|
||||
print("::endgroup::")
|
||||
|
||||
@@ -42,6 +42,7 @@ def parse_datetime(text, formats=frozenset([
|
||||
"""
|
||||
# so that we don't have to deal with some special characters in formats
|
||||
text = text.strip().replace(", ", " ").replace(". ", " ").replace("(", "").replace(")", "")
|
||||
text = text.replace("Sept ", "Sep ") # common typo, for ex. on Apple and Artifactory products
|
||||
for fmt in formats:
|
||||
try:
|
||||
date = datetime.strptime(text, fmt)
|
||||
|
||||
@@ -43,6 +43,13 @@ class Product:
|
||||
for (version, date) in dates_by_version.items():
|
||||
self.declare_version(version, date)
|
||||
|
||||
def replace_version(self, version: str, date: datetime) -> None:
|
||||
if version not in self.versions:
|
||||
raise ValueError(f"version {version} cannot be replaced as it does not exist for {self.name}")
|
||||
|
||||
logging.info(f"replacing version {version} ({self.versions[version]} -> {date}) to {self.name}")
|
||||
self.versions[version] = date
|
||||
|
||||
def remove_version(self, version: str) -> None:
|
||||
if not self.has_version(version):
|
||||
logging.warning(f"version {version} cannot be removed as it does not exist for {self.name}")
|
||||
|
||||
Reference in New Issue
Block a user