diff --git a/src/aws-lambda.py b/src/aws-lambda.py index 0aad7b03..4c75d617 100644 --- a/src/aws-lambda.py +++ b/src/aws-lambda.py @@ -13,7 +13,6 @@ them though. Note that this would also be unnecessary if it was possible to disa release dates updates in the latest.py script.""" with releasedata.ProductData("aws-lambda") as product_data: - old_product_data = releasedata.ProductData.from_file(product_data.name) product_frontmatter = endoflife.ProductFrontmatter(product_data.name) response = http.fetch_url("https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html") soup = BeautifulSoup(response.text, features="html5lib") @@ -30,7 +29,7 @@ with releasedata.ProductData("aws-lambda") as product_data: date = product_frontmatter.get_release_date(identifier) # use the product releaseDate if available if date is None: - date = old_product_data.get_version(identifier).date() # else use the previously found date + date = product_data.get_previous_version(identifier).date() # else use the previously found date if date is None: date = dates.today() # else use today's date diff --git a/src/common/releasedata.py b/src/common/releasedata.py index 83018fa1..993aecbf 100644 --- a/src/common/releasedata.py +++ b/src/common/releasedata.py @@ -15,13 +15,14 @@ VERSIONS_PATH = Path(os.environ.get("VERSIONS_PATH", "releases")) class ProductUpdateError(Exception): """Custom exceptions raised when unexpected errors occur during product updates.""" + class ProductVersion: - def __init__(self, product: "ProductData", data: dict) -> None: - self.product = str(product) + def __init__(self, product: str, data: dict) -> None: + self.product = product self.data = data @staticmethod - def of(product: "ProductData", name: str, date: datetime) -> "ProductVersion": + def of(product: str, name: str, date: datetime) -> "ProductVersion": return ProductVersion(product, { "name": name, "date": date.strftime("%Y-%m-%d"), @@ -36,18 +37,38 @@ class ProductVersion: def replace_date(self, date: datetime) -> None: self.data["date"] = date.strftime("%Y-%m-%d") + def copy(self) -> "ProductVersion": + return ProductVersion(self.product, self.data.copy()) + def __repr__(self) -> str: return f"{self.product}#{self.name()} ({self.date()})" class ProductData: - def __init__(self, name: str) -> None: + def __init__(self, name: str, cumulative_update: bool = False) -> None: self.name: str = name + self.cumulative_update: bool = cumulative_update self.versions_path: Path = VERSIONS_PATH / f"{name}.json" self.versions: dict[str, ProductVersion] = {} + self.previous_versions: dict[str, ProductVersion] = {} def __enter__(self) -> "ProductData": logging.info(f"::group::{self}") + + if self.versions_path.is_file(): + with self.versions_path.open() as f: + for json_version in json.load(f)["versions"].values(): + version = ProductVersion(self.name, json_version) + self.previous_versions[version.name()] = version + logging.info(f"loaded previous versions data for {self} from {self.versions_path}") + else: + logging.info(f"no previous versions data found for {self} at {self.versions_path}") + + if self.cumulative_update: + logging.info(f"cumulative update is enabled for {self}, will reuse previous versions data") + for name, version in self.previous_versions.items(): + self.versions[name] = version.copy() + return self def __exit__(self, exc_type: Optional[Type[BaseException]], exc_value: Optional[BaseException], @@ -68,24 +89,12 @@ class ProductData: finally: logging.info("::endgroup::") - @staticmethod - def from_file(name: str) -> "ProductData": - product = ProductData(name) - - if product.versions_path.is_file(): - with product.versions_path.open() as f: - for json_version in json.load(f)["versions"].values(): - version = ProductVersion(product, json_version) - product.versions[version.name()] = version - logging.info(f"loaded versions data for {product} from {product.versions_path}") - else: - logging.warning(f"no versions data found for {product} at {product.versions_path}") - - return product - def get_version(self, version: str) -> ProductVersion: return self.versions[version] if version in self.versions else None + def get_previous_version(self, version: str) -> ProductVersion: + return self.previous_versions[version] if version in self.previous_versions else None + def declare_version(self, version: str, date: datetime) -> None: if version in self.versions and self.versions[version].date() != date: logging.info(f"overwriting {version} ({self.get_version(version).date()} -> {date}) for {self}") diff --git a/src/firefox.py b/src/firefox.py index 76618be0..5a338f6c 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -1,45 +1,33 @@ -import re import urllib.parse -from itertools import islice from bs4 import BeautifulSoup from common import dates, http, releasedata """Fetch Firefox versions with their dates from https://www.mozilla.org/. -Versions lower than 10.0 are ignored because too difficult to parse.""" +This script is cumulative: previously found versions are kept, and eventually updated if needed. It only considers the +first MAX_VERSIONS_COUNT versions on Firefox release page because: +- it is too long to fetch them all (at least a minute usually), +- this generates too many requests to the mozilla.org servers, +- and anyway oldest versions are never updated. +Note that it was assumed that: +- the script is ran regularly enough to keep the versions up to date (once a day or week looks enough), +- the versions are listed in descending order on the page, +- new versions are always added inside in the last MAX_VERSIONS_COUNT versions. -# Will be replaced by itertools.batched in Python 3.12+. -# See https://docs.python.org/3/library/itertools.html#itertools.batched. -def batched(iterable: iter, n: int) -> iter: - if n < 1: - msg = 'n must be at least one' - raise ValueError(msg) - it = iter(iterable) - while batch := tuple(islice(it, n)): - yield batch +The script will need to be updated if someday those conditions are not met.""" +MAX_VERSIONS_LIMIT = 50 -with releasedata.ProductData("firefox") as product_data: +with releasedata.ProductData("firefox", cumulative_update=True) as product_data: releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/") releases_soup = BeautifulSoup(releases_page.text, features="html5lib") releases_list = releases_soup.find_all("ol", class_="c-release-list") + release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")] - - for batch_release_notes_urls in batched(release_notes_urls, 20): - for release_notes in http.fetch_urls(batch_release_notes_urls): - version = release_notes.url.split("/")[-3] - - release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") - date_elt = release_notes_soup.find(class_="c-release-date") - if date_elt: - date = dates.parse_date(date_elt.get_text()) - product_data.declare_version(version, date) - continue - - date_elt = release_notes_soup.find("small", string=re.compile("^.?First offered")) - if date_elt: - date = dates.parse_date(' '.join(date_elt.get_text().split(" ")[-3:])) # get last 3 words - product_data.declare_version(version, date) - # versions < 10.0 are ignored + for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]): + version = release_notes.url.split("/")[-3] + release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") + date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25 + product_data.declare_version(version, dates.parse_date(date_str)) diff --git a/src/unity.py b/src/unity.py index 704344b9..2aaf7e38 100644 --- a/src/unity.py +++ b/src/unity.py @@ -1,25 +1,27 @@ from bs4 import BeautifulSoup from common import dates, http, releasedata -# Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, -# so this automation is only partial. -# -# This script iterates over all pages of the Unity LTS releases page, which is paginated. -# It keeps fetching the next page until there is no next page link. +"""Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation +is only partial. -BASE_URL = "https://unity.com/releases/editor/qa/lts-releases" +This script is cumulative, only the first page is fetched (e.g. the first ten versions). This is because: +- it is too long to fetch all (at least 30s, usually more than a minute), +- this generates too many requests to the unity.com servers, +- fetching multiple pages in parallel is raising a lot of errors and makes the overall process slower (this was tested + during https://github.com/endoflife-date/release-data/pull/194), +- and anyway oldest versions are never updated. -next_page_url = BASE_URL -with releasedata.ProductData("unity") as product_data: - # Do not try to fetch multiple pages in parallel: it is raising a lot of errors and make the overall process slower. - while next_page_url: - response = http.fetch_url(next_page_url) - soup = BeautifulSoup(response.text, features="html5lib") +Note that it was assumed that: +- the script is ran regularly enough to keep the versions up to date (once a day or week looks enough), +- there is never more than 10 new LTS versions at a time. - for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): - version = release.find('h4').find('span').text - date = dates.parse_datetime(release.find('time').attrs['datetime']) - product_data.declare_version(version, date) +The script will need to be updated if someday those conditions are not met.""" - next_link = soup.find('a', {"rel": "next"}) - next_page_url = BASE_URL + next_link.attrs['href'] if next_link else None +with releasedata.ProductData("unity", cumulative_update=True) as product_data: + response = http.fetch_url("https://unity.com/releases/editor/qa/lts-releases") + soup = BeautifulSoup(response.text, features="html5lib") + + for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): + version = release.find('h4').find('span').text + date = dates.parse_datetime(release.find('time').attrs['datetime']) + product_data.declare_version(version, date)