[firefox][unity] Add support for cumulative updates and use it in a few scripts (#295)

Generic support for cumulative updates has been added to speed up execution time of some scripts that were very long (in comparison with the vast majority of products), usually because they were involving a lot of HTTP requests.

This feature was developed particularily for the firefox.py and unity.py scripts, which was often very long to execute (a minute or moreaccording to GHA summaries). Those scripts has been updated to make use of this new feature.
This commit is contained in:
Marc Wrobel
2024-02-04 18:05:18 +01:00
committed by GitHub
parent 0e0e227875
commit dc3f4e0653
4 changed files with 67 additions and 69 deletions

View File

@@ -13,7 +13,6 @@ them though. Note that this would also be unnecessary if it was possible to disa
release dates updates in the latest.py script."""
with releasedata.ProductData("aws-lambda") as product_data:
old_product_data = releasedata.ProductData.from_file(product_data.name)
product_frontmatter = endoflife.ProductFrontmatter(product_data.name)
response = http.fetch_url("https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html")
soup = BeautifulSoup(response.text, features="html5lib")
@@ -30,7 +29,7 @@ with releasedata.ProductData("aws-lambda") as product_data:
date = product_frontmatter.get_release_date(identifier) # use the product releaseDate if available
if date is None:
date = old_product_data.get_version(identifier).date() # else use the previously found date
date = product_data.get_previous_version(identifier).date() # else use the previously found date
if date is None:
date = dates.today() # else use today's date

View File

@@ -15,13 +15,14 @@ VERSIONS_PATH = Path(os.environ.get("VERSIONS_PATH", "releases"))
class ProductUpdateError(Exception):
"""Custom exceptions raised when unexpected errors occur during product updates."""
class ProductVersion:
def __init__(self, product: "ProductData", data: dict) -> None:
self.product = str(product)
def __init__(self, product: str, data: dict) -> None:
self.product = product
self.data = data
@staticmethod
def of(product: "ProductData", name: str, date: datetime) -> "ProductVersion":
def of(product: str, name: str, date: datetime) -> "ProductVersion":
return ProductVersion(product, {
"name": name,
"date": date.strftime("%Y-%m-%d"),
@@ -36,18 +37,38 @@ class ProductVersion:
def replace_date(self, date: datetime) -> None:
self.data["date"] = date.strftime("%Y-%m-%d")
def copy(self) -> "ProductVersion":
return ProductVersion(self.product, self.data.copy())
def __repr__(self) -> str:
return f"{self.product}#{self.name()} ({self.date()})"
class ProductData:
def __init__(self, name: str) -> None:
def __init__(self, name: str, cumulative_update: bool = False) -> None:
self.name: str = name
self.cumulative_update: bool = cumulative_update
self.versions_path: Path = VERSIONS_PATH / f"{name}.json"
self.versions: dict[str, ProductVersion] = {}
self.previous_versions: dict[str, ProductVersion] = {}
def __enter__(self) -> "ProductData":
logging.info(f"::group::{self}")
if self.versions_path.is_file():
with self.versions_path.open() as f:
for json_version in json.load(f)["versions"].values():
version = ProductVersion(self.name, json_version)
self.previous_versions[version.name()] = version
logging.info(f"loaded previous versions data for {self} from {self.versions_path}")
else:
logging.info(f"no previous versions data found for {self} at {self.versions_path}")
if self.cumulative_update:
logging.info(f"cumulative update is enabled for {self}, will reuse previous versions data")
for name, version in self.previous_versions.items():
self.versions[name] = version.copy()
return self
def __exit__(self, exc_type: Optional[Type[BaseException]], exc_value: Optional[BaseException],
@@ -68,24 +89,12 @@ class ProductData:
finally:
logging.info("::endgroup::")
@staticmethod
def from_file(name: str) -> "ProductData":
product = ProductData(name)
if product.versions_path.is_file():
with product.versions_path.open() as f:
for json_version in json.load(f)["versions"].values():
version = ProductVersion(product, json_version)
product.versions[version.name()] = version
logging.info(f"loaded versions data for {product} from {product.versions_path}")
else:
logging.warning(f"no versions data found for {product} at {product.versions_path}")
return product
def get_version(self, version: str) -> ProductVersion:
return self.versions[version] if version in self.versions else None
def get_previous_version(self, version: str) -> ProductVersion:
return self.previous_versions[version] if version in self.previous_versions else None
def declare_version(self, version: str, date: datetime) -> None:
if version in self.versions and self.versions[version].date() != date:
logging.info(f"overwriting {version} ({self.get_version(version).date()} -> {date}) for {self}")

View File

@@ -1,45 +1,33 @@
import re
import urllib.parse
from itertools import islice
from bs4 import BeautifulSoup
from common import dates, http, releasedata
"""Fetch Firefox versions with their dates from https://www.mozilla.org/.
Versions lower than 10.0 are ignored because too difficult to parse."""
This script is cumulative: previously found versions are kept, and eventually updated if needed. It only considers the
first MAX_VERSIONS_COUNT versions on Firefox release page because:
- it is too long to fetch them all (at least a minute usually),
- this generates too many requests to the mozilla.org servers,
- and anyway oldest versions are never updated.
Note that it was assumed that:
- the script is ran regularly enough to keep the versions up to date (once a day or week looks enough),
- the versions are listed in descending order on the page,
- new versions are always added inside in the last MAX_VERSIONS_COUNT versions.
# Will be replaced by itertools.batched in Python 3.12+.
# See https://docs.python.org/3/library/itertools.html#itertools.batched.
def batched(iterable: iter, n: int) -> iter:
if n < 1:
msg = 'n must be at least one'
raise ValueError(msg)
it = iter(iterable)
while batch := tuple(islice(it, n)):
yield batch
The script will need to be updated if someday those conditions are not met."""
MAX_VERSIONS_LIMIT = 50
with releasedata.ProductData("firefox") as product_data:
with releasedata.ProductData("firefox", cumulative_update=True) as product_data:
releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/")
releases_soup = BeautifulSoup(releases_page.text, features="html5lib")
releases_list = releases_soup.find_all("ol", class_="c-release-list")
release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")]
for batch_release_notes_urls in batched(release_notes_urls, 20):
for release_notes in http.fetch_urls(batch_release_notes_urls):
version = release_notes.url.split("/")[-3]
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
date_elt = release_notes_soup.find(class_="c-release-date")
if date_elt:
date = dates.parse_date(date_elt.get_text())
product_data.declare_version(version, date)
continue
date_elt = release_notes_soup.find("small", string=re.compile("^.?First offered"))
if date_elt:
date = dates.parse_date(' '.join(date_elt.get_text().split(" ")[-3:])) # get last 3 words
product_data.declare_version(version, date)
# versions < 10.0 are ignored
for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]):
version = release_notes.url.split("/")[-3]
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25
product_data.declare_version(version, dates.parse_date(date_str))

View File

@@ -1,25 +1,27 @@
from bs4 import BeautifulSoup
from common import dates, http, releasedata
# Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there,
# so this automation is only partial.
#
# This script iterates over all pages of the Unity LTS releases page, which is paginated.
# It keeps fetching the next page until there is no next page link.
"""Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation
is only partial.
BASE_URL = "https://unity.com/releases/editor/qa/lts-releases"
This script is cumulative, only the first page is fetched (e.g. the first ten versions). This is because:
- it is too long to fetch all (at least 30s, usually more than a minute),
- this generates too many requests to the unity.com servers,
- fetching multiple pages in parallel is raising a lot of errors and makes the overall process slower (this was tested
during https://github.com/endoflife-date/release-data/pull/194),
- and anyway oldest versions are never updated.
next_page_url = BASE_URL
with releasedata.ProductData("unity") as product_data:
# Do not try to fetch multiple pages in parallel: it is raising a lot of errors and make the overall process slower.
while next_page_url:
response = http.fetch_url(next_page_url)
soup = BeautifulSoup(response.text, features="html5lib")
Note that it was assumed that:
- the script is ran regularly enough to keep the versions up to date (once a day or week looks enough),
- there is never more than 10 new LTS versions at a time.
for release in soup.find_all('div', class_='component-releases-item__show__inner-header'):
version = release.find('h4').find('span').text
date = dates.parse_datetime(release.find('time').attrs['datetime'])
product_data.declare_version(version, date)
The script will need to be updated if someday those conditions are not met."""
next_link = soup.find('a', {"rel": "next"})
next_page_url = BASE_URL + next_link.attrs['href'] if next_link else None
with releasedata.ProductData("unity", cumulative_update=True) as product_data:
response = http.fetch_url("https://unity.com/releases/editor/qa/lts-releases")
soup = BeautifulSoup(response.text, features="html5lib")
for release in soup.find_all('div', class_='component-releases-item__show__inner-header'):
version = release.find('h4').find('span').text
date = dates.parse_datetime(release.find('time').attrs['datetime'])
product_data.declare_version(version, date)