From a6eebd1dc17503ee39f6bf5941a63da03323b1a5 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Mon, 18 Dec 2023 00:02:38 +0100 Subject: [PATCH] [aws-lambda] Improve script (#259) - make the script more resilient to changes in the page by using column names, - use the product release releaseDate as the date, else the date the version was first found, else the current date (previously the date the version was first found was not used), - move some code to the Product class. --- releases/aws-lambda.json | 20 ++++++++++++++++ src/aws-lambda.py | 50 ++++++++++++++++------------------------ src/common/endoflife.py | 25 +++++++++++++++++--- 3 files changed, 62 insertions(+), 33 deletions(-) diff --git a/releases/aws-lambda.json b/releases/aws-lambda.json index af48382e..8d6c0f7b 100644 --- a/releases/aws-lambda.json +++ b/releases/aws-lambda.json @@ -1,14 +1,33 @@ { + "python3.12": "2023-12-17", + "java21": "2023-11-17", + "nodejs20.x": "2023-11-15", + "provided.al2023": "2023-11-10", + "python3.11": "2023-07-27", + "ruby3.2": "2023-06-07", + "java17": "2023-04-27", + "python3.10": "2023-04-18", + "nodejs18.x": "2022-11-18", + "dotnet7": "2022-11-15", + "nodejs16.x": "2022-05-12", + "dotnet6": "2022-02-24", + "python3.9": "2021-08-16", "nodejs14.x": "2021-02-03", "nodejs12.x": "2021-02-03", "dotnet5.0": "2020-12-02", + "provided.al2": "2020-08-12", + "java8.al2": "2020-08-12", "dotnetcore3.1": "2020-03-31", "ruby2.7": "2020-02-19", + "python3.8": "2019-11-18", + "java11": "2019-11-18", "nodejs10.x": "2019-05-15", "ruby2.5": "2018-11-29", + "provided": "2018-11-29", "python3.7": "2018-11-19", "dotnetcore2.1": "2018-07-09", "nodejs8.10": "2018-04-02", + "go1.x": "2018-01-15", "dotnetcore2.0": "2018-01-15", "nodejs4.3-edge": "2017-07-17", "python3.6": "2017-04-18", @@ -16,5 +35,6 @@ "dotnetcore1.0": "2016-12-01", "nodejs4.3": "2016-04-07", "python2.7": "2015-10-08", + "java8": "2015-06-15", "nodejs": "2014-11-13" } \ No newline at end of file diff --git a/src/aws-lambda.py b/src/aws-lambda.py index 90d30210..77e367c7 100644 --- a/src/aws-lambda.py +++ b/src/aws-lambda.py @@ -1,50 +1,40 @@ +import datetime from bs4 import BeautifulSoup from common import http from common import endoflife -from datetime import datetime """Fetches AWS lambda runtimes from https://docs.aws.amazon.com. This script does not retrieve release dates, as they are only available in release announcements. -Instead, it uses the release dates from the endoflife.date product file. This has the advantage of -being warned about new releases, without having releaseDate information (wrongly) updated. +Instead, it uses the release dates from the endoflife.date product file, or alternatively the +date the release was first detected (or the current date if none is found). If one day release dates are available in the AWS documentation, it would be better to make use them though. Note that this would also be unnecessary if it was possible to disable release/latest release dates updates in the latest.py script.""" - -def get_release_data(product): - try: - return endoflife.load_product(product.name) - except FileNotFoundError: - print(f"{product.name} file not found, real release dates will not be used.") - return {} - - -def release_date(releaseCycle, releases_data): - if 'releases' in releases_data.keys(): - for release in releases_data['releases']: - if releaseCycle == release['releaseCycle']: - return release['releaseDate'] - - return datetime.now() - - -product = endoflife.Product("aws-lambda") -print(f"::group::{product.name}") -releases_data = get_release_data(product) +print("::group::aws-lambda") +product = endoflife.Product("aws-lambda", load_product_data=True, load_versions_data=True) response = http.fetch_url("https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html") soup = BeautifulSoup(response.text, features="html5lib") -for row in soup.find_all("tr"): - cells = row.find_all("td") - if len(cells) != 6 and len(cells) != 5: # 6 = Supported Runtimes, 5 = Unsupported Runtimes +for table in soup.find_all("table"): + headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[1].find_all("th")] + if "identifier" not in headers: continue - identifier = cells[1].get_text().strip() - date = release_date(identifier, releases_data) - product.declare_version(identifier, date) + identifier_index = headers.index("identifier") + for row in table.find("tbody").find_all("tr"): + cells = row.find_all("td") + identifier = cells[identifier_index].get_text().strip() + + date = product.get_release_date(identifier) # use the product releaseDate if available + if date is None: + date = product.get_old_version_date(identifier) # else use the previously found date + if date is None: + date = datetime.date.today() # else use today's date + + product.declare_version(identifier, date) product.write() print("::endgroup::") diff --git a/src/common/endoflife.py b/src/common/endoflife.py index 7cd70965..bb35500e 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -40,10 +40,9 @@ class AutoConfig: class Product: - """Model an endoflife.date product. - """ + """Model an endoflife.date product.""" - def __init__(self, name: str, load_product_data: bool = False): + def __init__(self, name: str, load_product_data: bool = False, load_versions_data: bool = False): self.name: str = name self.versions = {} self.versions_path: str = f"{VERSIONS_PATH}/{name}.json" @@ -58,6 +57,15 @@ class Product: logging.warning(f"no product data found for {self.name} at {self.product_path}") self.product_data = None + if load_versions_data: + if os.path.isfile(self.versions_path): + with open(self.versions_path) as f: + logging.info(f"loaded versions data for {self.name} from {self.versions_path}") + self.old_versions = json.load(f) + else: + logging.warning(f"no versions data found for {self.name} at {self.versions_path}") + self.old_versions = None + def get_auto_configs(self, method: str) -> list[AutoConfig]: configs = [] @@ -76,6 +84,17 @@ class Product: def get_version_date(self, version: str) -> datetime: return self.versions[version] if version in self.versions else None + def get_old_version_date(self, version: str) -> datetime: + return datetime.strptime(self.old_versions[version], "%Y-%m-%d") if ( + self.old_versions + and version in self.old_versions + ) else None + + def get_release_date(self, release_cycle: str) -> datetime: + for release in self.product_data["releases"]: + if release["releaseCycle"] == release_cycle: + return release["releaseDate"] + def declare_version(self, version: str, date: datetime) -> None: if version in self.versions: if self.versions[version] != date: