[aws-lambda] Improve script (#259)

- make the script more resilient to changes in the page by using column names,
- use the product release releaseDate as the date, else the date the version was first found, else the current date (previously the date the version was first found was not used),
- move some code to the Product class.
This commit is contained in:
Marc Wrobel
2023-12-18 00:02:38 +01:00
committed by GitHub
parent 39061c089a
commit a6eebd1dc1
3 changed files with 62 additions and 33 deletions

View File

@@ -1,50 +1,40 @@
import datetime
from bs4 import BeautifulSoup
from common import http
from common import endoflife
from datetime import datetime
"""Fetches AWS lambda runtimes from https://docs.aws.amazon.com.
This script does not retrieve release dates, as they are only available in release announcements.
Instead, it uses the release dates from the endoflife.date product file. This has the advantage of
being warned about new releases, without having releaseDate information (wrongly) updated.
Instead, it uses the release dates from the endoflife.date product file, or alternatively the
date the release was first detected (or the current date if none is found).
If one day release dates are available in the AWS documentation, it would be better to make use
them though. Note that this would also be unnecessary if it was possible to disable release/latest
release dates updates in the latest.py script."""
def get_release_data(product):
try:
return endoflife.load_product(product.name)
except FileNotFoundError:
print(f"{product.name} file not found, real release dates will not be used.")
return {}
def release_date(releaseCycle, releases_data):
if 'releases' in releases_data.keys():
for release in releases_data['releases']:
if releaseCycle == release['releaseCycle']:
return release['releaseDate']
return datetime.now()
product = endoflife.Product("aws-lambda")
print(f"::group::{product.name}")
releases_data = get_release_data(product)
print("::group::aws-lambda")
product = endoflife.Product("aws-lambda", load_product_data=True, load_versions_data=True)
response = http.fetch_url("https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html")
soup = BeautifulSoup(response.text, features="html5lib")
for row in soup.find_all("tr"):
cells = row.find_all("td")
if len(cells) != 6 and len(cells) != 5: # 6 = Supported Runtimes, 5 = Unsupported Runtimes
for table in soup.find_all("table"):
headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[1].find_all("th")]
if "identifier" not in headers:
continue
identifier = cells[1].get_text().strip()
date = release_date(identifier, releases_data)
product.declare_version(identifier, date)
identifier_index = headers.index("identifier")
for row in table.find("tbody").find_all("tr"):
cells = row.find_all("td")
identifier = cells[identifier_index].get_text().strip()
date = product.get_release_date(identifier) # use the product releaseDate if available
if date is None:
date = product.get_old_version_date(identifier) # else use the previously found date
if date is None:
date = datetime.date.today() # else use today's date
product.declare_version(identifier, date)
product.write()
print("::endgroup::")