[firefox][unity] Add support for cumulative updates and use it in a few scripts (#295)

Generic support for cumulative updates has been added to speed up execution time of some scripts that were very long (in comparison with the vast majority of products), usually because they were involving a lot of HTTP requests.

This feature was developed particularily for the firefox.py and unity.py scripts, which was often very long to execute (a minute or moreaccording to GHA summaries). Those scripts has been updated to make use of this new feature.
This commit is contained in:
Marc Wrobel
2024-02-04 18:05:18 +01:00
committed by GitHub
parent 0e0e227875
commit dc3f4e0653
4 changed files with 67 additions and 69 deletions

View File

@@ -1,45 +1,33 @@
import re
import urllib.parse
from itertools import islice
from bs4 import BeautifulSoup
from common import dates, http, releasedata
"""Fetch Firefox versions with their dates from https://www.mozilla.org/.
Versions lower than 10.0 are ignored because too difficult to parse."""
This script is cumulative: previously found versions are kept, and eventually updated if needed. It only considers the
first MAX_VERSIONS_COUNT versions on Firefox release page because:
- it is too long to fetch them all (at least a minute usually),
- this generates too many requests to the mozilla.org servers,
- and anyway oldest versions are never updated.
Note that it was assumed that:
- the script is ran regularly enough to keep the versions up to date (once a day or week looks enough),
- the versions are listed in descending order on the page,
- new versions are always added inside in the last MAX_VERSIONS_COUNT versions.
# Will be replaced by itertools.batched in Python 3.12+.
# See https://docs.python.org/3/library/itertools.html#itertools.batched.
def batched(iterable: iter, n: int) -> iter:
if n < 1:
msg = 'n must be at least one'
raise ValueError(msg)
it = iter(iterable)
while batch := tuple(islice(it, n)):
yield batch
The script will need to be updated if someday those conditions are not met."""
MAX_VERSIONS_LIMIT = 50
with releasedata.ProductData("firefox") as product_data:
with releasedata.ProductData("firefox", cumulative_update=True) as product_data:
releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/")
releases_soup = BeautifulSoup(releases_page.text, features="html5lib")
releases_list = releases_soup.find_all("ol", class_="c-release-list")
release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")]
for batch_release_notes_urls in batched(release_notes_urls, 20):
for release_notes in http.fetch_urls(batch_release_notes_urls):
version = release_notes.url.split("/")[-3]
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
date_elt = release_notes_soup.find(class_="c-release-date")
if date_elt:
date = dates.parse_date(date_elt.get_text())
product_data.declare_version(version, date)
continue
date_elt = release_notes_soup.find("small", string=re.compile("^.?First offered"))
if date_elt:
date = dates.parse_date(' '.join(date_elt.get_text().split(" ")[-3:])) # get last 3 words
product_data.declare_version(version, date)
# versions < 10.0 are ignored
for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]):
version = release_notes.url.split("/")[-3]
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25
product_data.declare_version(version, dates.parse_date(date_str))