[firefox] Simplify script
- use requests_futures instead of managing concurrency manually, - drop retrieval of versions < 10.0 to not make the parsing too complex.
This commit is contained in:
@@ -463,89 +463,10 @@
|
|||||||
"10.0.5": "2012-06-05",
|
"10.0.5": "2012-06-05",
|
||||||
"12.0": "2012-04-24",
|
"12.0": "2012-04-24",
|
||||||
"10.0.4": "2012-04-24",
|
"10.0.4": "2012-04-24",
|
||||||
"3.6.28": "2012-03-13",
|
|
||||||
"11.0": "2012-03-13",
|
"11.0": "2012-03-13",
|
||||||
"10.0.3": "2012-03-13",
|
"10.0.3": "2012-03-13",
|
||||||
"3.6.27": "2012-02-17",
|
|
||||||
"10.0.2": "2012-02-16",
|
"10.0.2": "2012-02-16",
|
||||||
"10.0.1": "2012-02-10",
|
"10.0.1": "2012-02-10",
|
||||||
"3.6.26": "2012-01-31",
|
"9.0": "2012-01-31",
|
||||||
"10.0": "2012-01-31",
|
"10.0": "2012-01-31"
|
||||||
"9.0.1": "2011-12-21",
|
|
||||||
"9.0": "2011-12-20",
|
|
||||||
"3.6.25": "2011-12-20",
|
|
||||||
"8.0.1": "2011-11-21",
|
|
||||||
"8.0": "2011-11-08",
|
|
||||||
"3.6.24": "2011-11-08",
|
|
||||||
"7.0.1": "2011-09-29",
|
|
||||||
"7.0": "2011-09-27",
|
|
||||||
"3.6.23": "2011-09-27",
|
|
||||||
"6.0.2": "2011-09-06",
|
|
||||||
"3.6.22": "2011-09-06",
|
|
||||||
"6.0.1": "2011-08-30",
|
|
||||||
"3.6.21": "2011-08-30",
|
|
||||||
"6.0": "2011-08-16",
|
|
||||||
"3.6.20": "2011-08-16",
|
|
||||||
"5.0.1": "2011-07-11",
|
|
||||||
"3.6.19": "2011-07-11",
|
|
||||||
"5.0": "2011-06-21",
|
|
||||||
"3.6.18": "2011-06-21",
|
|
||||||
"4.0.1": "2011-04-28",
|
|
||||||
"3.6.17": "2011-04-28",
|
|
||||||
"3.5.19": "2011-04-28",
|
|
||||||
"4.0": "2011-03-22",
|
|
||||||
"3.6.16": "2011-03-22",
|
|
||||||
"3.5.18": "2011-03-22",
|
|
||||||
"3.6.15": "2011-03-04",
|
|
||||||
"3.6.14": "2011-03-01",
|
|
||||||
"3.5.17": "2011-03-01",
|
|
||||||
"3.6.13": "2010-12-09",
|
|
||||||
"3.5.16": "2010-12-09",
|
|
||||||
"3.6.12": "2010-10-27",
|
|
||||||
"3.5.15": "2010-10-27",
|
|
||||||
"3.6.11": "2010-10-19",
|
|
||||||
"3.5.14": "2010-10-19",
|
|
||||||
"3.6.10": "2010-09-15",
|
|
||||||
"3.5.13": "2010-09-15",
|
|
||||||
"3.6.9": "2010-09-07",
|
|
||||||
"3.5.12": "2010-09-07",
|
|
||||||
"3.6.8": "2010-07-23",
|
|
||||||
"3.6.7": "2010-07-20",
|
|
||||||
"3.5.11": "2010-07-20",
|
|
||||||
"3.6.6": "2010-06-26",
|
|
||||||
"3.6.4": "2010-06-22",
|
|
||||||
"3.5.10": "2010-06-22",
|
|
||||||
"3.6.3": "2010-04-01",
|
|
||||||
"3.5.9": "2010-03-30",
|
|
||||||
"3.0.19": "2010-03-30",
|
|
||||||
"3.6.2": "2010-03-22",
|
|
||||||
"3.5.8": "2010-02-17",
|
|
||||||
"3.0.18": "2010-02-17",
|
|
||||||
"3.6": "2010-01-21",
|
|
||||||
"3.5.7": "2010-01-05",
|
|
||||||
"3.0.17": "2010-01-05",
|
|
||||||
"3.5.6": "2009-12-15",
|
|
||||||
"3.0.16": "2009-12-15",
|
|
||||||
"3.5.5": "2009-11-05",
|
|
||||||
"3.5.4": "2009-10-27",
|
|
||||||
"3.0.15": "2009-10-27",
|
|
||||||
"3.5.3": "2009-09-09",
|
|
||||||
"3.0.14": "2009-09-09",
|
|
||||||
"3.5.2": "2009-08-03",
|
|
||||||
"3.0.13": "2009-08-03",
|
|
||||||
"3.0.12": "2009-07-21",
|
|
||||||
"3.5.1": "2009-07-16",
|
|
||||||
"3.5": "2009-06-30",
|
|
||||||
"3.0.11": "2009-06-11",
|
|
||||||
"3.0.10": "2009-04-27",
|
|
||||||
"3.0.9": "2009-04-21",
|
|
||||||
"3.0.8": "2009-03-27",
|
|
||||||
"3.0.7": "2009-03-04",
|
|
||||||
"3.0.6": "2009-02-03",
|
|
||||||
"3.0.5": "2008-12-16",
|
|
||||||
"3.0.4": "2008-11-12",
|
|
||||||
"3.0.3": "2008-09-26",
|
|
||||||
"3.0.2": "2008-09-23",
|
|
||||||
"3.0.1": "2008-07-16",
|
|
||||||
"3.0": "2008-06-17"
|
|
||||||
}
|
}
|
||||||
@@ -15,4 +15,5 @@ typing_extensions==4.8.0
|
|||||||
webencodings==0.5.1
|
webencodings==0.5.1
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
requests-html==0.10.0
|
requests-html==0.10.0
|
||||||
|
requests-futures==1.0.1
|
||||||
regex==2023.10.3
|
regex==2023.10.3
|
||||||
|
|||||||
177
src/firefox.py
177
src/firefox.py
@@ -1,11 +1,13 @@
|
|||||||
import concurrent.futures
|
|
||||||
import re
|
import re
|
||||||
import requests
|
import urllib.parse
|
||||||
from urllib.error import HTTPError
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from common import endoflife
|
from common import endoflife
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Tuple
|
from urllib3.util import Retry
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from requests_futures.sessions import FuturesSession
|
||||||
|
from requests.exceptions import ChunkedEncodingError
|
||||||
|
from concurrent.futures import as_completed
|
||||||
|
|
||||||
"""Fetch Firefox versions with their dates from https://www.mozilla.org/"""
|
"""Fetch Firefox versions with their dates from https://www.mozilla.org/"""
|
||||||
|
|
||||||
@@ -16,148 +18,47 @@ DATE_REGEX = r"(January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|J
|
|||||||
VERSION_REGEX = r"\d+(\.\d+)*"
|
VERSION_REGEX = r"\d+(\.\d+)*"
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedPageError(Exception):
|
def format_date(text: str) -> str:
|
||||||
"""Raised when a firefox release page is not supported"""
|
text = text.replace(')', '')
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class InvalidPageVariantError(Exception):
|
|
||||||
"""Raised when an invalid variant is passed to get_version_and_date"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class UnpublishedReleaseError(Exception):
|
|
||||||
"""Raised when a page is not yet published, but linked"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def format_date(unformatted_date: str) -> str:
|
|
||||||
""" Format date from July 11, 2002 to 2002-07-11 """
|
|
||||||
date = re.sub(r'(\d)(st|nd|rd|th)', r'\1', unformatted_date)
|
|
||||||
formats = ["%b %d, %Y", "%B %d, %Y"]
|
formats = ["%b %d, %Y", "%B %d, %Y"]
|
||||||
for f in formats:
|
for f in formats:
|
||||||
try:
|
try:
|
||||||
return datetime.strptime(date, f).strftime("%Y-%m-%d")
|
return datetime.strptime(text, f).strftime("%Y-%m-%d")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def get_version_and_date_variant_1(soup: BeautifulSoup) -> Tuple[str, str]:
|
|
||||||
""" Version matching for firefox versions >= 28.0 (usually) """
|
|
||||||
# get version
|
|
||||||
version = soup.find("div", class_="c-release-version").get_text()
|
|
||||||
|
|
||||||
# get date
|
|
||||||
unformatted_date = soup.find("p", class_="c-release-date").get_text()
|
|
||||||
date = format_date(unformatted_date)
|
|
||||||
|
|
||||||
return version, date
|
|
||||||
|
|
||||||
|
|
||||||
def get_version_and_date_variant_2(soup: BeautifulSoup) -> Tuple[str, str]:
|
|
||||||
""" Version matching for firefox versions >= 10.0 (usually) """
|
|
||||||
release_info = soup.find("h2").find("small").text
|
|
||||||
|
|
||||||
# get version
|
|
||||||
version_match = re.search(VERSION_REGEX, soup.select('div#nav-access a')[0].get("href"))
|
|
||||||
if version_match is None:
|
|
||||||
raise InvalidPageVariantError("Unable to find version")
|
|
||||||
version = version_match.group()
|
|
||||||
|
|
||||||
# get date
|
|
||||||
unformatted_date_match = re.search(DATE_REGEX, release_info)
|
|
||||||
if unformatted_date_match is None:
|
|
||||||
raise InvalidPageVariantError("Unable to find date")
|
|
||||||
unformatted_date = unformatted_date_match.group()
|
|
||||||
date = format_date(unformatted_date)
|
|
||||||
|
|
||||||
return version, date
|
|
||||||
|
|
||||||
|
|
||||||
def get_version_and_date_variant_3(soup: BeautifulSoup) -> Tuple[str, str]:
|
|
||||||
""" Version matching for firefox versions >= 3.0 (usually) """
|
|
||||||
release_info = soup.select('div#main-feature p em')[0].get_text()
|
|
||||||
|
|
||||||
# get version
|
|
||||||
version_match = re.search(VERSION_REGEX, release_info)
|
|
||||||
if version_match is None:
|
|
||||||
raise InvalidPageVariantError("Unable to find version")
|
|
||||||
version = version_match.group()
|
|
||||||
|
|
||||||
# get date
|
|
||||||
unformatted_date_match = re.search(DATE_REGEX, release_info)
|
|
||||||
if unformatted_date_match is None:
|
|
||||||
raise InvalidPageVariantError("Unable to find date")
|
|
||||||
unformatted_date = unformatted_date_match.group()
|
|
||||||
date = format_date(unformatted_date)
|
|
||||||
|
|
||||||
return version, date
|
|
||||||
|
|
||||||
|
|
||||||
def get_version_and_date(release_page: str, release_version: str) -> Tuple[str, str]:
|
|
||||||
""" Get version and date from the given release page """
|
|
||||||
major = int(release_version.split(".")[0])
|
|
||||||
|
|
||||||
# firefox release pages for versions <3.0 don't include release dates, so we
|
|
||||||
# can't match these versions for now.
|
|
||||||
# example: https://www.mozilla.org/en-US/firefox/2.0/releasenotes/
|
|
||||||
if major < 3:
|
|
||||||
raise UnsupportedPageError(f"Unsupported release page: {release_page}")
|
|
||||||
|
|
||||||
# Firefox release pages come in 3 different variants. Unfortunately, there
|
|
||||||
# is no consistent way to determine which variant a page is (say, by version
|
|
||||||
# number), so we have to try each variant until we find one that works.
|
|
||||||
functions = [
|
|
||||||
get_version_and_date_variant_1,
|
|
||||||
get_version_and_date_variant_2,
|
|
||||||
get_version_and_date_variant_3
|
|
||||||
]
|
|
||||||
try:
|
|
||||||
soup = make_bs_request(release_page)
|
|
||||||
except(HTTPError) as e:
|
|
||||||
if(e.code == 404):
|
|
||||||
raise UnpublishedReleaseError(f"The release page is not yet published, got a 404: {release_page}")
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
for function in functions:
|
|
||||||
try:
|
|
||||||
return function(soup)
|
|
||||||
except (InvalidPageVariantError, AttributeError, IndexError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
raise UnsupportedPageError(f"Unable to find version and date from {release_page}")
|
|
||||||
|
|
||||||
|
|
||||||
def make_bs_request(url: str) -> BeautifulSoup:
|
|
||||||
response = endoflife.fetch_url(url)
|
|
||||||
return BeautifulSoup(response, features="html5lib")
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_releases():
|
|
||||||
releases = {}
|
|
||||||
soup = make_bs_request(URL)
|
|
||||||
|
|
||||||
ff_releases = soup.find_all("ol", class_="c-release-list")
|
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
||||||
future_to_url = {
|
|
||||||
executor.submit(
|
|
||||||
get_version_and_date,
|
|
||||||
requests.compat.urljoin(URL, p.get("href")),
|
|
||||||
p.get_text()): p.get("href") for p in ff_releases[0].find_all("a")
|
|
||||||
}
|
|
||||||
|
|
||||||
for future in concurrent.futures.as_completed(future_to_url):
|
|
||||||
try:
|
|
||||||
(version, date) = future.result()
|
|
||||||
print(f"{version}: {date}")
|
|
||||||
releases[version] = date
|
|
||||||
except(UnsupportedPageError, UnpublishedReleaseError):
|
|
||||||
print(f"Unsupported release page: {future_to_url[future]}")
|
|
||||||
|
|
||||||
return releases
|
|
||||||
|
|
||||||
|
|
||||||
print(f"::group::{PRODUCT}")
|
print(f"::group::{PRODUCT}")
|
||||||
versions = fetch_releases()
|
versions = {}
|
||||||
|
|
||||||
|
response = endoflife.fetch_url(URL)
|
||||||
|
ff_releases = BeautifulSoup(response, features="html5lib").find_all("ol", class_="c-release-list")
|
||||||
|
ff_urls = [urllib.parse.urljoin(URL, p.get("href")) for p in ff_releases[0].find_all("a")]
|
||||||
|
|
||||||
|
session = FuturesSession()
|
||||||
|
session.mount('https://', HTTPAdapter(max_retries=Retry(total=5, backoff_factor=0.2)))
|
||||||
|
futures = [session.get(url, timeout=30) for url in ff_urls]
|
||||||
|
for future in as_completed(futures):
|
||||||
|
try:
|
||||||
|
response = future.result()
|
||||||
|
soup = BeautifulSoup(response.text, features="html5lib")
|
||||||
|
|
||||||
|
version = response.request.url.split("/")[-3]
|
||||||
|
if soup.find("div", class_="c-release-version"):
|
||||||
|
date = format_date(soup.find("p", class_="c-release-date").get_text())
|
||||||
|
versions[version] = date
|
||||||
|
print(f"{version}: {date}")
|
||||||
|
elif soup.find("small", string=re.compile("^.?First offered")):
|
||||||
|
element = soup.find("small", string=re.compile("^.?First offered"))
|
||||||
|
date = format_date(' '.join(element.get_text().split(" ")[-3:])) # get last 3 words
|
||||||
|
versions[version] = date
|
||||||
|
print(f"{version}: {date}")
|
||||||
|
# we don't get version <= 10.0, not a big deal
|
||||||
|
except ChunkedEncodingError:
|
||||||
|
# This may happen sometimes and will be ignored to not make the script fail,
|
||||||
|
# see https://stackoverflow.com/a/71899731/374236.
|
||||||
|
print(f"Error fetching {response.request.url}: ChunkedEncodingError")
|
||||||
|
|
||||||
endoflife.write_releases(PRODUCT, versions)
|
endoflife.write_releases(PRODUCT, versions)
|
||||||
print("::endgroup::")
|
print("::endgroup::")
|
||||||
|
|||||||
Reference in New Issue
Block a user