diff --git a/releases/firefox.json b/releases/firefox.json index 3b24373d..c5f68bcf 100644 --- a/releases/firefox.json +++ b/releases/firefox.json @@ -463,89 +463,10 @@ "10.0.5": "2012-06-05", "12.0": "2012-04-24", "10.0.4": "2012-04-24", - "3.6.28": "2012-03-13", "11.0": "2012-03-13", "10.0.3": "2012-03-13", - "3.6.27": "2012-02-17", "10.0.2": "2012-02-16", "10.0.1": "2012-02-10", - "3.6.26": "2012-01-31", - "10.0": "2012-01-31", - "9.0.1": "2011-12-21", - "9.0": "2011-12-20", - "3.6.25": "2011-12-20", - "8.0.1": "2011-11-21", - "8.0": "2011-11-08", - "3.6.24": "2011-11-08", - "7.0.1": "2011-09-29", - "7.0": "2011-09-27", - "3.6.23": "2011-09-27", - "6.0.2": "2011-09-06", - "3.6.22": "2011-09-06", - "6.0.1": "2011-08-30", - "3.6.21": "2011-08-30", - "6.0": "2011-08-16", - "3.6.20": "2011-08-16", - "5.0.1": "2011-07-11", - "3.6.19": "2011-07-11", - "5.0": "2011-06-21", - "3.6.18": "2011-06-21", - "4.0.1": "2011-04-28", - "3.6.17": "2011-04-28", - "3.5.19": "2011-04-28", - "4.0": "2011-03-22", - "3.6.16": "2011-03-22", - "3.5.18": "2011-03-22", - "3.6.15": "2011-03-04", - "3.6.14": "2011-03-01", - "3.5.17": "2011-03-01", - "3.6.13": "2010-12-09", - "3.5.16": "2010-12-09", - "3.6.12": "2010-10-27", - "3.5.15": "2010-10-27", - "3.6.11": "2010-10-19", - "3.5.14": "2010-10-19", - "3.6.10": "2010-09-15", - "3.5.13": "2010-09-15", - "3.6.9": "2010-09-07", - "3.5.12": "2010-09-07", - "3.6.8": "2010-07-23", - "3.6.7": "2010-07-20", - "3.5.11": "2010-07-20", - "3.6.6": "2010-06-26", - "3.6.4": "2010-06-22", - "3.5.10": "2010-06-22", - "3.6.3": "2010-04-01", - "3.5.9": "2010-03-30", - "3.0.19": "2010-03-30", - "3.6.2": "2010-03-22", - "3.5.8": "2010-02-17", - "3.0.18": "2010-02-17", - "3.6": "2010-01-21", - "3.5.7": "2010-01-05", - "3.0.17": "2010-01-05", - "3.5.6": "2009-12-15", - "3.0.16": "2009-12-15", - "3.5.5": "2009-11-05", - "3.5.4": "2009-10-27", - "3.0.15": "2009-10-27", - "3.5.3": "2009-09-09", - "3.0.14": "2009-09-09", - "3.5.2": "2009-08-03", - "3.0.13": "2009-08-03", - "3.0.12": "2009-07-21", - "3.5.1": "2009-07-16", - "3.5": "2009-06-30", - "3.0.11": "2009-06-11", - "3.0.10": "2009-04-27", - "3.0.9": "2009-04-21", - "3.0.8": "2009-03-27", - "3.0.7": "2009-03-04", - "3.0.6": "2009-02-03", - "3.0.5": "2008-12-16", - "3.0.4": "2008-11-12", - "3.0.3": "2008-09-26", - "3.0.2": "2008-09-23", - "3.0.1": "2008-07-16", - "3.0": "2008-06-17" + "9.0": "2012-01-31", + "10.0": "2012-01-31" } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dd458700..fede8539 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ typing_extensions==4.8.0 webencodings==0.5.1 requests==2.31.0 requests-html==0.10.0 +requests-futures==1.0.1 regex==2023.10.3 diff --git a/src/firefox.py b/src/firefox.py index 2de842c9..e1e78a41 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -1,11 +1,13 @@ -import concurrent.futures import re -import requests -from urllib.error import HTTPError +import urllib.parse from bs4 import BeautifulSoup from common import endoflife from datetime import datetime -from typing import Tuple +from urllib3.util import Retry +from requests.adapters import HTTPAdapter +from requests_futures.sessions import FuturesSession +from requests.exceptions import ChunkedEncodingError +from concurrent.futures import as_completed """Fetch Firefox versions with their dates from https://www.mozilla.org/""" @@ -16,148 +18,47 @@ DATE_REGEX = r"(January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|J VERSION_REGEX = r"\d+(\.\d+)*" -class UnsupportedPageError(Exception): - """Raised when a firefox release page is not supported""" - pass - - -class InvalidPageVariantError(Exception): - """Raised when an invalid variant is passed to get_version_and_date""" - pass - -class UnpublishedReleaseError(Exception): - """Raised when a page is not yet published, but linked""" - pass - -def format_date(unformatted_date: str) -> str: - """ Format date from July 11, 2002 to 2002-07-11 """ - date = re.sub(r'(\d)(st|nd|rd|th)', r'\1', unformatted_date) +def format_date(text: str) -> str: + text = text.replace(')', '') formats = ["%b %d, %Y", "%B %d, %Y"] for f in formats: try: - return datetime.strptime(date, f).strftime("%Y-%m-%d") + return datetime.strptime(text, f).strftime("%Y-%m-%d") except ValueError: pass return "" -def get_version_and_date_variant_1(soup: BeautifulSoup) -> Tuple[str, str]: - """ Version matching for firefox versions >= 28.0 (usually) """ - # get version - version = soup.find("div", class_="c-release-version").get_text() - - # get date - unformatted_date = soup.find("p", class_="c-release-date").get_text() - date = format_date(unformatted_date) - - return version, date - - -def get_version_and_date_variant_2(soup: BeautifulSoup) -> Tuple[str, str]: - """ Version matching for firefox versions >= 10.0 (usually) """ - release_info = soup.find("h2").find("small").text - - # get version - version_match = re.search(VERSION_REGEX, soup.select('div#nav-access a')[0].get("href")) - if version_match is None: - raise InvalidPageVariantError("Unable to find version") - version = version_match.group() - - # get date - unformatted_date_match = re.search(DATE_REGEX, release_info) - if unformatted_date_match is None: - raise InvalidPageVariantError("Unable to find date") - unformatted_date = unformatted_date_match.group() - date = format_date(unformatted_date) - - return version, date - - -def get_version_and_date_variant_3(soup: BeautifulSoup) -> Tuple[str, str]: - """ Version matching for firefox versions >= 3.0 (usually) """ - release_info = soup.select('div#main-feature p em')[0].get_text() - - # get version - version_match = re.search(VERSION_REGEX, release_info) - if version_match is None: - raise InvalidPageVariantError("Unable to find version") - version = version_match.group() - - # get date - unformatted_date_match = re.search(DATE_REGEX, release_info) - if unformatted_date_match is None: - raise InvalidPageVariantError("Unable to find date") - unformatted_date = unformatted_date_match.group() - date = format_date(unformatted_date) - - return version, date - - -def get_version_and_date(release_page: str, release_version: str) -> Tuple[str, str]: - """ Get version and date from the given release page """ - major = int(release_version.split(".")[0]) - - # firefox release pages for versions <3.0 don't include release dates, so we - # can't match these versions for now. - # example: https://www.mozilla.org/en-US/firefox/2.0/releasenotes/ - if major < 3: - raise UnsupportedPageError(f"Unsupported release page: {release_page}") - - # Firefox release pages come in 3 different variants. Unfortunately, there - # is no consistent way to determine which variant a page is (say, by version - # number), so we have to try each variant until we find one that works. - functions = [ - get_version_and_date_variant_1, - get_version_and_date_variant_2, - get_version_and_date_variant_3 - ] - try: - soup = make_bs_request(release_page) - except(HTTPError) as e: - if(e.code == 404): - raise UnpublishedReleaseError(f"The release page is not yet published, got a 404: {release_page}") - else: - raise e - - for function in functions: - try: - return function(soup) - except (InvalidPageVariantError, AttributeError, IndexError): - pass - - raise UnsupportedPageError(f"Unable to find version and date from {release_page}") - - -def make_bs_request(url: str) -> BeautifulSoup: - response = endoflife.fetch_url(url) - return BeautifulSoup(response, features="html5lib") - - -def fetch_releases(): - releases = {} - soup = make_bs_request(URL) - - ff_releases = soup.find_all("ol", class_="c-release-list") - with concurrent.futures.ThreadPoolExecutor() as executor: - future_to_url = { - executor.submit( - get_version_and_date, - requests.compat.urljoin(URL, p.get("href")), - p.get_text()): p.get("href") for p in ff_releases[0].find_all("a") - } - - for future in concurrent.futures.as_completed(future_to_url): - try: - (version, date) = future.result() - print(f"{version}: {date}") - releases[version] = date - except(UnsupportedPageError, UnpublishedReleaseError): - print(f"Unsupported release page: {future_to_url[future]}") - - return releases - - print(f"::group::{PRODUCT}") -versions = fetch_releases() +versions = {} + +response = endoflife.fetch_url(URL) +ff_releases = BeautifulSoup(response, features="html5lib").find_all("ol", class_="c-release-list") +ff_urls = [urllib.parse.urljoin(URL, p.get("href")) for p in ff_releases[0].find_all("a")] + +session = FuturesSession() +session.mount('https://', HTTPAdapter(max_retries=Retry(total=5, backoff_factor=0.2))) +futures = [session.get(url, timeout=30) for url in ff_urls] +for future in as_completed(futures): + try: + response = future.result() + soup = BeautifulSoup(response.text, features="html5lib") + + version = response.request.url.split("/")[-3] + if soup.find("div", class_="c-release-version"): + date = format_date(soup.find("p", class_="c-release-date").get_text()) + versions[version] = date + print(f"{version}: {date}") + elif soup.find("small", string=re.compile("^.?First offered")): + element = soup.find("small", string=re.compile("^.?First offered")) + date = format_date(' '.join(element.get_text().split(" ")[-3:])) # get last 3 words + versions[version] = date + print(f"{version}: {date}") + # we don't get version <= 10.0, not a big deal + except ChunkedEncodingError: + # This may happen sometimes and will be ignored to not make the script fail, + # see https://stackoverflow.com/a/71899731/374236. + print(f"Error fetching {response.request.url}: ChunkedEncodingError") + endoflife.write_releases(PRODUCT, versions) print("::endgroup::")