Improve scripts execution orchestration (#299)

Until now products could declare multiple auto-update methods, but they all had to be of the same kind.
For example if you used the git auto-update method, you could not use an additional github_releases or custom auto-update method.
This is an issue as it prevents us to extend the auto-update process, for example by having a product using the 'git' auto-update method to retrieve all the versions, and a custom script to retrieve support and EOL dates.

This improve the scripts execution orchestration to be able to support auto configurations using a mix of methods, meaning:

- multiple kind of methods, such as git and github_release,
- or multiple custom methods.

A side-effect of those changes is that now a failure in a generic script does not cancel the update of subsequent products.

Another side-effect, unwanted this time, is that now custom scripts managing multiple products, such as apple.py, are now executed multiple times instead of once.
This commit is contained in:
Marc Wrobel
2024-02-11 15:28:26 +01:00
committed by GitHub
parent 2b6e21786e
commit a0ba2d687e
17 changed files with 254 additions and 193 deletions

View File

@@ -51,10 +51,7 @@ VERSION_PATTERNS = {
DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b")
logging.info("::group::apple")
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
logging.info("::endgroup::")
for product_name in VERSION_PATTERNS:
with releasedata.ProductData(product_name) as product_data:
for soup in soups:

View File

@@ -28,8 +28,6 @@ with releasedata.ProductData("aws-lambda") as product_data:
identifier = cells[identifier_index].get_text().strip()
date = product_frontmatter.get_release_date(identifier) # use the product releaseDate if available
if date is None:
date = product_data.get_previous_version(identifier).date() # else use the previously found date
if date is None:
date = dates.today() # else use today's date

View File

@@ -9,28 +9,28 @@ Ideally we would want to use the git repository directly, but cgit-managed repos
METHOD = "cgit"
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
response = http.fetch_url(config.url + '/refs/tags')
soup = BeautifulSoup(response.text, features="html5lib")
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url + '/refs/tags')
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.find_all("table", class_="list"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) != 4:
continue
for table in soup.find_all("table", class_="list"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) != 4:
continue
version_str = columns[0].text.strip()
version_match = config.first_match(version_str)
if not version_match:
continue
version_str = columns[0].text.strip()
version_match = config.first_match(version_str)
if not version_match:
continue
datetime_td = columns[3].find_next("span")
datetime_str = datetime_td.attrs["title"] if datetime_td else None
if not datetime_str:
continue
datetime_td = columns[3].find_next("span")
datetime_str = datetime_td.attrs["title"] if datetime_td else None
if not datetime_str:
continue
version = config.render(version_match)
date = dates.parse_datetime(datetime_str)
product_data.declare_version(version, date)
version = config.render(version_match)
date = dates.parse_datetime(datetime_str)
product_data.declare_version(version, date)

View File

@@ -1,3 +1,4 @@
import itertools
import logging
import os
import re
@@ -17,11 +18,14 @@ PRODUCTS_PATH = Path(os.environ.get("PRODUCTS_PATH", "website/products"))
class AutoConfig:
def __init__(self, method: str, config: dict) -> None:
self.method = method
self.url = config[method]
def __init__(self, product: str, config: dict) -> None:
self.product = product
self.method = next(key for key in config if key not in ("template", "regex", "regex_exclude"))
self.url = config[self.method]
self.version_template = Template(config.get("template", DEFAULT_VERSION_TEMPLATE))
self.script = f"{self.url}.py" if self.method == "custom" else f"{self.method}.py"
regexes_include = config.get("regex", DEFAULT_VERSION_REGEX)
regexes_include = regexes_include if isinstance(regexes_include, list) else [regexes_include]
self.include_version_patterns = [re.compile(r) for r in regexes_include]
@@ -45,6 +49,9 @@ class AutoConfig:
def render(self, match: re.Match) -> str:
return self.version_template.render(**match.groupdict())
def __repr__(self) -> str:
return f"{self.product}#{self.method}({self.url})"
class ProductFrontmatter:
def __init__(self, name: str) -> None:
@@ -59,17 +66,23 @@ class ProductFrontmatter:
else:
logging.warning(f"no product data found for {self.name} at {self.path}")
def get_auto_configs(self, method: str) -> list[AutoConfig]:
def has_auto_configs(self) -> bool:
return self.data and "methods" in self.data.get("auto", {})
def is_auto_update_cumulative(self) -> bool:
return self.data.get("auto", {}).get("cumulative", False)
def auto_configs(self, method_filter: str = None, url_filter: str = None) -> list[AutoConfig]:
configs = []
all_configs = self.data.get("auto", {}).get("methods", [])
for config in all_configs:
if method in config:
configs.append(AutoConfig(method, config))
configs_data = self.data.get("auto", {}).get("methods", [])
for config_data in configs_data:
config = AutoConfig(self.name, config_data)
if ((method_filter and config.method != method_filter)
or (url_filter and config.url != url_filter)):
continue
if len(configs) > 0 and len(configs) != len(all_configs):
message = f"mixed auto-update methods declared for {self.name}, this is not yet supported"
raise ValueError(message)
configs.append(config)
return configs
@@ -80,7 +93,7 @@ class ProductFrontmatter:
return None
def list_products(method: str, products_filter: str = None) -> list[ProductFrontmatter]:
def list_products(products_filter: str = None) -> list[ProductFrontmatter]:
"""Return a list of products that are using the same given update method."""
products = []
@@ -89,9 +102,12 @@ def list_products(method: str, products_filter: str = None) -> list[ProductFront
if products_filter and product_name != products_filter:
continue
product = ProductFrontmatter(product_name)
configs = product.get_auto_configs(method)
if len(configs) > 0:
products.append(product)
products.append(ProductFrontmatter(product_name))
return products
def list_configs(products_filter: str = None, methods_filter: str = None, urls_filter: str = None) -> list[AutoConfig]:
products = list_products(products_filter)
configs_by_product = [p.auto_configs(methods_filter, urls_filter) for p in products]
return list(itertools.chain.from_iterable(configs_by_product)) # flatten the list of lists

View File

@@ -45,3 +45,14 @@ class GitHubStepSummary:
if var_exists:
with open(os.environ["GITHUB_STEP_SUMMARY"], 'a') as github_step_summary: # NOQA: PTH123
print(self.value, file=github_step_summary)
class GitHubGroup:
def __init__(self, name: str) -> None:
self.name = name
def __enter__(self) -> None:
logging.info(f"::group::{self.name}")
def __exit__(self, exc_type: any, exc_value: any, traceback: any) -> None:
logging.info("::endgroup::")

View File

@@ -37,71 +37,53 @@ class ProductVersion:
def replace_date(self, date: datetime) -> None:
self.data["date"] = date.strftime("%Y-%m-%d")
def copy(self) -> "ProductVersion":
return ProductVersion(self.product, self.data.copy())
def __repr__(self) -> str:
return f"{self.product}#{self.name()} ({self.date()})"
class ProductData:
def __init__(self, name: str, cumulative_update: bool = False) -> None:
def __init__(self, name: str) -> None:
self.name: str = name
self.cumulative_update: bool = cumulative_update
self.versions_path: Path = VERSIONS_PATH / f"{name}.json"
self.versions: dict[str, ProductVersion] = {}
self.previous_versions: dict[str, ProductVersion] = {}
def __enter__(self) -> "ProductData":
logging.info(f"::group::{self}")
if self.versions_path.is_file():
with self.versions_path.open() as f:
for json_version in json.load(f)["versions"].values():
version = ProductVersion(self.name, json_version)
self.previous_versions[version.name()] = version
logging.info(f"loaded previous versions data for {self} from {self.versions_path}")
self.versions[version.name()] = version
logging.info(f"loaded versions data for {self} from {self.versions_path}")
else:
logging.info(f"no previous versions data found for {self} at {self.versions_path}")
if self.cumulative_update:
logging.info(f"cumulative update is enabled for {self}, will reuse previous versions data")
for name, version in self.previous_versions.items():
self.versions[name] = version.copy()
logging.info(f"no versions data found for {self} at {self.versions_path}")
return self
def __exit__(self, exc_type: Optional[Type[BaseException]], exc_value: Optional[BaseException],
exc_traceback: Optional[TracebackType]) -> None:
try:
if exc_value:
message = f"an unexpected error occurred while updating {self} data"
logging.error(message, exc_info=exc_value)
raise ProductUpdateError(message) from exc_value
if exc_value:
message = f"an unexpected error occurred while updating {self} data"
logging.error(message, exc_info=exc_value)
raise ProductUpdateError(message) from exc_value
logging.info("updating %s data",self)
# sort by date then version (desc)
ordered_versions = sorted(self.versions.values(), key=lambda v: (v.date(), v.name()), reverse=True)
with self.versions_path.open("w") as f:
f.write(json.dumps({
"versions": {version.name(): version.data for version in ordered_versions},
}, indent=2))
finally:
logging.info("::endgroup::")
logging.info("updating %s data",self.versions_path)
# sort by date then version (desc)
ordered_versions = sorted(self.versions.values(), key=lambda v: (v.date(), v.name()), reverse=True)
with self.versions_path.open("w") as f:
f.write(json.dumps({
"versions": {version.name(): version.data for version in ordered_versions},
}, indent=2))
def get_version(self, version: str) -> ProductVersion:
return self.versions[version] if version in self.versions else None
def get_previous_version(self, version: str) -> ProductVersion:
return self.previous_versions[version] if version in self.previous_versions else None
def declare_version(self, version: str, date: datetime) -> None:
if version in self.versions and self.versions[version].date() != date:
logging.info(f"overwriting {version} ({self.get_version(version).date()} -> {date}) for {self}")
self.versions[version].replace_date(date)
else:
logging.info(f"adding version {version} ({date}) to {self}")
self.versions[version] = ProductVersion.of(self, version, date)
self.versions[version] = ProductVersion.of(self.name, version, date)
def declare_versions(self, dates_by_version: dict[str, datetime]) -> None:
for (version, date) in dates_by_version.items():

View File

@@ -6,21 +6,21 @@ from common import dates, endoflife, http, releasedata
METHOD = 'distrowatch'
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}")
soup = BeautifulSoup(response.text, features="html5lib")
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}")
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
versions_match = config.first_match(headline)
if not versions_match:
continue
for table in soup.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
versions_match = config.first_match(headline)
if not versions_match:
continue
# multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5)
versions = config.render(versions_match).split("\n")
date = dates.parse_date(table.select_one("td.NewsDate").get_text())
# multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5)
versions = config.render(versions_match).split("\n")
date = dates.parse_date(table.select_one("td.NewsDate").get_text())
for version in versions:
product_data.declare_version(version, date)
for version in versions:
product_data.declare_version(version, date)

View File

@@ -23,7 +23,7 @@ def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1")
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1")

View File

@@ -20,7 +20,7 @@ The script will need to be updated if someday those conditions are not met."""
MAX_VERSIONS_LIMIT = 50
with releasedata.ProductData("firefox", cumulative_update=True) as product_data:
with releasedata.ProductData("firefox") as product_data:
releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/")
releases_soup = BeautifulSoup(releases_page.text, features="html5lib")
releases_list = releases_soup.find_all("ol", class_="c-release-list")

View File

@@ -8,16 +8,16 @@ from common.git import Git
METHOD = 'git'
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
git = Git(config.url)
git.setup(bare=True)
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
git = Git(config.url)
git.setup(bare=True)
tags = git.list_tags()
for tag, date_str in tags:
version_match = config.first_match(tag)
if version_match:
version = config.render(version_match)
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
tags = git.list_tags()
for tag, date_str in tags:
version_match = config.first_match(tag)
if version_match:
version = config.render(version_match)
date = dates.parse_date(date_str)
product_data.declare_version(version, date)

View File

@@ -43,17 +43,17 @@ query($endCursor: String) {
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
for page in fetch_releases(config.url):
releases = [edge['node'] for edge in (page['data']['repository']['releases']['edges'])]
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
for page in fetch_releases(config.url):
releases = [edge['node'] for edge in (page['data']['repository']['releases']['edges'])]
for release in releases:
if not release['isPrerelease']:
version_str = release['name']
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(release['publishedAt'])
product_data.declare_version(version, date)
for release in releases:
if not release['isPrerelease']:
version_str = release['name']
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(release['publishedAt'])
product_data.declare_version(version, date)

View File

@@ -6,23 +6,23 @@ from common import endoflife, http, releasedata
METHOD = "maven"
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
start = 0
group_id, artifact_id = config.url.split("/")
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
start = 0
group_id, artifact_id = config.url.split("/")
while True:
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100"
data = http.fetch_url(url).json()
while True:
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100"
data = http.fetch_url(url).json()
for row in data["response"]["docs"]:
version_match = config.first_match(row["v"])
if version_match:
version = config.render(version_match)
date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc)
product_data.declare_version(version, date)
for row in data["response"]["docs"]:
version_match = config.first_match(row["v"])
if version_match:
version = config.render(version_match)
date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc)
product_data.declare_version(version, date)
start += 100
if data["response"]["numFound"] <= start:
break
start += 100
if data["response"]["numFound"] <= start:
break

View File

@@ -5,13 +5,13 @@ from common import dates, endoflife, http, releasedata
METHOD = "npm"
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json()
for version_str in data["versions"]:
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(data["time"][version_str])
product_data.declare_version(version, date)
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json()
for version_str in data["versions"]:
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(data["time"][version_str])
product_data.declare_version(version, date)

View File

@@ -1,4 +1,3 @@
import logging
import re
from bs4 import BeautifulSoup
@@ -11,11 +10,8 @@ IDENTIFIERS_BY_PRODUCT = {
}
# all products are on the same page, it's faster to fetch it only once
logging.info("::group::palo-alto-networks")
response = http.fetch_url("https://www.paloaltonetworks.com/services/support/end-of-life-announcements/end-of-life-summary")
soup = BeautifulSoup(response.text, features="html5lib")
logging.info("::endgroup::")
for product_name, identifier in IDENTIFIERS_BY_PRODUCT.items():
with releasedata.ProductData(product_name) as product_data:
table = soup.find(id=identifier)

View File

@@ -5,16 +5,16 @@ from common import dates, endoflife, http, releasedata
METHOD = "pypi"
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product in endoflife.list_products(METHOD, p_filter):
with releasedata.ProductData(product.name) as product_data:
for config in product.get_auto_configs(METHOD):
data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json()
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json()
for version_str in data["releases"]:
version_match = config.first_match(version_str)
version_data = data["releases"][version_str]
for version_str in data["releases"]:
version_match = config.first_match(version_str)
version_data = data["releases"][version_str]
if version_match and version_data:
version = config.render(version_match)
date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"])
product_data.declare_version(version, date)
if version_match and version_data:
version = config.render(version_match)
date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"])
product_data.declare_version(version, date)

View File

@@ -17,7 +17,7 @@ Note that it was assumed that:
The script will need to be updated if someday those conditions are not met."""
with releasedata.ProductData("unity", cumulative_update=True) as product_data:
with releasedata.ProductData("unity") as product_data:
response = http.fetch_url("https://unity.com/releases/editor/qa/lts-releases")
soup = BeautifulSoup(response.text, features="html5lib")