[firefox] Add automation (#67)

Signed-off-by: Benji Visser <benji@093b.org>
This commit is contained in:
Benji Visser
2023-01-08 06:43:43 -05:00
committed by GitHub
parent b63aa7c317
commit f9597f55d7
2 changed files with 656 additions and 0 deletions

153
src/firefox.py Normal file
View File

@@ -0,0 +1,153 @@
import json
from typing import Tuple
from datetime import datetime
import re
import requests
import urllib.request
from bs4 import BeautifulSoup
import concurrent.futures
"""Fetch Firefox versions with their dates from https://www.mozilla.org/en-US/firefox/releases/"""
URL = "https://www.mozilla.org/en-US/firefox/releases/"
PRODUCT = "firefox"
DATE_REGEX = r"(January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sept|October|Oct|November|Nov|December|Dec)\s+\d{1,2}(st|nd|rd|th)?,\s+\d{4}"
VERSION_REGEX = r"\d+(\.\d+)*"
class UnsupportedReleasePageError(Exception):
"Raised when a firefox release page is not supported"
pass
class InvalidPageVariantError(Exception):
"Raised when an invalid variant is passed to get_version_and_date"
pass
def format_date(unformatted_date: str) -> str:
""" Format date from July 11, 2002 to 2002-07-11 """
date = re.sub(r'(\d)(st|nd|rd|th)', r'\1', unformatted_date)
formats = ["%b %d, %Y", "%B %d, %Y"]
for f in formats:
try:
return datetime.strptime(date, f).strftime("%Y-%m-%d")
except ValueError:
pass
return ""
def get_version_and_date_varant_1(soup: BeautifulSoup) -> Tuple[str, str]:
""" Version matching for firefox versions >= 28.0 (usually) """
# get version
version = soup.find("div", class_="c-release-version").get_text()
# get date
unformatted_date = soup.find("p", class_="c-release-date").get_text()
date = format_date(unformatted_date)
return (version, date)
def get_version_and_date_variant_2(soup: BeautifulSoup) -> Tuple[str, str]:
""" Version matching for firefox versions >= 10.0 (usually) """
release_info = soup.find("h2").find("small").text
# get version
version_match = re.search(VERSION_REGEX, soup.select('div#nav-access a')[0].get("href"))
if version_match is None:
raise InvalidPageVariantError("Unable to find version")
version = version_match.group()
# get date
unformatted_date_match = re.search(DATE_REGEX, release_info)
if unformatted_date_match is None:
raise InvalidPageVariantError("Unable to find date")
unformatted_date = unformatted_date_match.group()
date = format_date(unformatted_date)
return (version, date)
def get_version_and_date_variant_3(soup: BeautifulSoup) -> Tuple[str, str]:
""" Version matching for firefox versions >= 3.0 (usually) """
release_info = soup.select('div#main-feature p em')[0].get_text()
# get version
version_match = re.search(VERSION_REGEX, release_info)
if version_match is None:
raise InvalidPageVariantError("Unable to find version")
version = version_match.group()
# get date
unformatted_date_match = re.search(DATE_REGEX, release_info)
if unformatted_date_match is None:
raise InvalidPageVariantError("Unable to find date")
unformatted_date = unformatted_date_match.group()
date = format_date(unformatted_date)
return (version, date)
def get_version_and_date(release_page: str, release_version: str) -> Tuple[str, str]:
""" Get version and date from the given release page """
major = int(release_version.split(".")[0])
# firefox release pages for versions <3.0 don't include release dates so we
# can't match these versions for now.
# example: https://www.mozilla.org/en-US/firefox/2.0/releasenotes/
if major < 3:
raise UnsupportedReleasePageError("Unsupported release page: %s" % release_page)
# Firefox release pages come in 3 different variants. Unforunately, there is no
# consistent way to determine which variant a page is (say, by version number), so
# we have to try each variant until we find one that works.
functions = [get_version_and_date_varant_1, get_version_and_date_variant_2, get_version_and_date_variant_3]
soup = make_bs_request(release_page)
for function in functions:
try:
return function(soup)
except (InvalidPageVariantError, AttributeError, IndexError):
pass
raise UnsupportedReleasePageError("Unable to find version and date for %s" % release_page)
def make_bs_request(url: str) -> BeautifulSoup:
""" Make a request to the given url and return a BeautifulSoup object """
headers = {"user-agent": "mozilla"}
req = urllib.request.Request(url, headers=headers)
res = urllib.request.urlopen(req, timeout=5)
return BeautifulSoup(res.read(), features="html5lib")
def fetch_releases():
releases = {}
soup = make_bs_request(URL)
ff_releases = soup.find_all("ol", class_="c-release-list")
with concurrent.futures.ThreadPoolExecutor() as executor:
future_to_url = {
executor.submit(
get_version_and_date,
requests.compat.urljoin(URL, p.get("href")),
p.get_text()): p.get("href") for p in ff_releases[0].find_all("a")
}
for future in concurrent.futures.as_completed(future_to_url):
try:
(version, date) = future.result()
print("%s: %s" % (version, date))
releases[version] = date
except UnsupportedReleasePageError:
print("Unsupported release page: %s" % future_to_url[future])
return releases
def main():
print(f"::group::{PRODUCT}")
releases = fetch_releases()
with open(f"releases/{PRODUCT}.json", "w") as f:
f.write(json.dumps(dict(
# sort by date then version (desc)
sorted(releases.items(), key=lambda x: (x[1], x[0]), reverse=True)
), indent=2))
print("::endgroup::")
if __name__ == '__main__':
main()