diff --git a/src/amazon-neptune.py b/src/amazon-neptune.py index 0403590b..56593624 100644 --- a/src/amazon-neptune.py +++ b/src/amazon-neptune.py @@ -8,16 +8,14 @@ from common import dates, http, releasedata RSS_URL = "https://docs.aws.amazon.com/neptune/latest/userguide/rssupdates.rss" VERSION_PATTERN = re.compile(r"^Engine version (?P[0-9R.]+)$") -product = releasedata.Product("amazon-neptune") -rss_response = http.fetch_url(RSS_URL) -rss = xml.dom.minidom.parseString(rss_response.text) +with releasedata.ProductData("amazon-neptune") as product_data: + rss_response = http.fetch_url(RSS_URL) + rss = xml.dom.minidom.parseString(rss_response.text) -for entry in rss.getElementsByTagName("item"): - version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue - date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue + for entry in rss.getElementsByTagName("item"): + version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue + date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue - version_match = VERSION_PATTERN.match(version_str) - if version_match: - product.declare_version(version_match['version'], dates.parse_datetime(date_str)) - -product.write() + version_match = VERSION_PATTERN.match(version_str) + if version_match: + product_data.declare_version(version_match['version'], dates.parse_datetime(date_str)) diff --git a/src/apache-http-server.py b/src/apache-http-server.py index 42711a74..99e44c7c 100644 --- a/src/apache-http-server.py +++ b/src/apache-http-server.py @@ -15,22 +15,20 @@ VERSION_AND_DATE_PATTERNS = [ re.compile(r"\s+(?P\d+\.\d+\.\d+)\s*:.*Tagged and [rR]olled\s(?:on\s)?(?P\w+\.?\s\d\d?,\s\d{4})"), ] -product = releasedata.Product("apache-http-server") -git = Git("https://github.com/apache/httpd.git") -git.setup() +with releasedata.ProductData("apache-http-server") as product_data: + git = Git("https://github.com/apache/httpd.git") + git.setup() -for branch in git.list_branches("refs/heads/?.?.x"): - git.checkout(branch, file_list=["STATUS"]) + for branch in git.list_branches("refs/heads/?.?.x"): + git.checkout(branch, file_list=["STATUS"]) - release_notes_file = git.repo_dir / "STATUS" - if not release_notes_file.exists(): - continue + release_notes_file = git.repo_dir / "STATUS" + if not release_notes_file.exists(): + continue - with release_notes_file.open("rb") as f: - release_notes = f.read().decode("utf-8", errors="ignore") + with release_notes_file.open("rb") as f: + release_notes = f.read().decode("utf-8", errors="ignore") - for pattern in VERSION_AND_DATE_PATTERNS: - for (version, date_str) in pattern.findall(release_notes): - product.declare_version(version, dates.parse_date(date_str)) - -product.write() + for pattern in VERSION_AND_DATE_PATTERNS: + for (version, date_str) in pattern.findall(release_notes): + product_data.declare_version(version, dates.parse_date(date_str)) diff --git a/src/apple.py b/src/apple.py index fd1c822b..bb406d37 100644 --- a/src/apple.py +++ b/src/apple.py @@ -56,29 +56,27 @@ soups = [BeautifulSoup(response.text, features="html5lib") for response in http. logging.info("::endgroup::") for product_name in VERSION_PATTERNS: - product = releasedata.Product(product_name) - for soup in soups: - versions_table = soup.find(id="tableWraper") - versions_table = versions_table if versions_table else soup.find('table', class_="gb-table") + with releasedata.ProductData(product_name) as product_data: + for soup in soups: + versions_table = soup.find(id="tableWraper") + versions_table = versions_table if versions_table else soup.find('table', class_="gb-table") - for row in versions_table.findAll("tr")[1:]: - cells = row.findAll("td") - version_text = cells[0].get_text().strip() - date_text = cells[2].get_text().strip() + for row in versions_table.findAll("tr")[1:]: + cells = row.findAll("td") + version_text = cells[0].get_text().strip() + date_text = cells[2].get_text().strip() - date_match = DATE_PATTERN.search(date_text) - if not date_match: - logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match") - continue + date_match = DATE_PATTERN.search(date_text) + if not date_match: + logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match") + continue - date_str = date_match.group(0).replace("Sept ", "Sep ") - date = dates.parse_date(date_str) - for version_pattern in VERSION_PATTERNS[product.name]: - for version_str in version_pattern.findall(version_text): - version = product.get_version(version_str) - if not version or version.date() > date: - product.declare_version(version_str, date) - else: - logging.info(f"ignoring version {version_str} ({date}) for {product.name}") - - product.write() + date_str = date_match.group(0).replace("Sept ", "Sep ") + date = dates.parse_date(date_str) + for version_pattern in VERSION_PATTERNS[product_data.name]: + for version_str in version_pattern.findall(version_text): + version = product_data.get_version(version_str) + if not version or version.date() > date: + product_data.declare_version(version_str, date) + else: + logging.info(f"ignoring version {version_str} ({date}) for {product_data.name}") diff --git a/src/aws-lambda.py b/src/aws-lambda.py index 818f7b84..0aad7b03 100644 --- a/src/aws-lambda.py +++ b/src/aws-lambda.py @@ -12,28 +12,26 @@ If one day release dates are available in the AWS documentation, it would be bet them though. Note that this would also be unnecessary if it was possible to disable release/latest release dates updates in the latest.py script.""" -product = releasedata.Product("aws-lambda") -old_product = releasedata.Product.from_file(product.name) -product_frontmatter = endoflife.ProductFrontmatter(product.name) -response = http.fetch_url("https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html") -soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("aws-lambda") as product_data: + old_product_data = releasedata.ProductData.from_file(product_data.name) + product_frontmatter = endoflife.ProductFrontmatter(product_data.name) + response = http.fetch_url("https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html") + soup = BeautifulSoup(response.text, features="html5lib") -for table in soup.find_all("table"): - headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[1].find_all("th")] - if "identifier" not in headers: - continue + for table in soup.find_all("table"): + headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[1].find_all("th")] + if "identifier" not in headers: + continue - identifier_index = headers.index("identifier") - for row in table.find("tbody").find_all("tr"): - cells = row.find_all("td") - identifier = cells[identifier_index].get_text().strip() + identifier_index = headers.index("identifier") + for row in table.find("tbody").find_all("tr"): + cells = row.find_all("td") + identifier = cells[identifier_index].get_text().strip() - date = product_frontmatter.get_release_date(identifier) # use the product releaseDate if available - if date is None: - date = old_product.get_version(identifier).date() # else use the previously found date - if date is None: - date = dates.today() # else use today's date + date = product_frontmatter.get_release_date(identifier) # use the product releaseDate if available + if date is None: + date = old_product_data.get_version(identifier).date() # else use the previously found date + if date is None: + date = dates.today() # else use today's date - product.declare_version(identifier, date) - -product.write() + product_data.declare_version(identifier, date) diff --git a/src/cgit.py b/src/cgit.py index c62f5b21..c5948c4f 100644 --- a/src/cgit.py +++ b/src/cgit.py @@ -10,29 +10,27 @@ METHOD = "cgit" p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - response = http.fetch_url(config.url + '/refs/tags') - soup = BeautifulSoup(response.text, features="html5lib") + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + response = http.fetch_url(config.url + '/refs/tags') + soup = BeautifulSoup(response.text, features="html5lib") - for table in soup.find_all("table", class_="list"): - for row in table.find_all("tr"): - columns = row.find_all("td") - if len(columns) != 4: - continue + for table in soup.find_all("table", class_="list"): + for row in table.find_all("tr"): + columns = row.find_all("td") + if len(columns) != 4: + continue - version_str = columns[0].text.strip() - version_match = config.first_match(version_str) - if not version_match: - continue + version_str = columns[0].text.strip() + version_match = config.first_match(version_str) + if not version_match: + continue - datetime_td = columns[3].find_next("span") - datetime_str = datetime_td.attrs["title"] if datetime_td else None - if not datetime_str: - continue + datetime_td = columns[3].find_next("span") + datetime_str = datetime_td.attrs["title"] if datetime_td else None + if not datetime_str: + continue - version = config.render(version_match) - date = dates.parse_datetime(datetime_str) - product_data.declare_version(version, date) - - product_data.write() + version = config.render(version_match) + date = dates.parse_datetime(datetime_str) + product_data.declare_version(version, date) diff --git a/src/coldfusion.py b/src/coldfusion.py index 5213908d..57fa08c1 100644 --- a/src/coldfusion.py +++ b/src/coldfusion.py @@ -31,16 +31,15 @@ FIXED_VERSIONS = { "2023.0.0": dates.date(2022, 5, 16), # https://coldfusion.adobe.com/2023/05/coldfusion2023-release/ } -product = releasedata.Product("coldfusion") -for changelog in http.fetch_urls(URLS): - changelog_soup = BeautifulSoup(changelog.text, features="html5lib") +with releasedata.ProductData("coldfusion") as product_data: + for changelog in http.fetch_urls(URLS): + changelog_soup = BeautifulSoup(changelog.text, features="html5lib") - for p in changelog_soup.findAll("div", class_="text"): - version_and_date_str = p.get_text().strip().replace('\xa0', ' ') - for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str): - date = dates.parse_date(date_str) - version = version_str.strip().replace(",", ".") # 11,0,0,289974 -> 11.0.0.289974 - product.declare_version(version, date) + for p in changelog_soup.findAll("div", class_="text"): + version_and_date_str = p.get_text().strip().replace('\xa0', ' ') + for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str): + date = dates.parse_date(date_str) + version = version_str.strip().replace(",", ".") # 11,0,0,289974 -> 11.0.0.289974 + product_data.declare_version(version, date) -product.declare_versions(FIXED_VERSIONS) -product.write() + product_data.declare_versions(FIXED_VERSIONS) diff --git a/src/common/releasedata.py b/src/common/releasedata.py index 19d24ff2..83018fa1 100644 --- a/src/common/releasedata.py +++ b/src/common/releasedata.py @@ -3,6 +3,8 @@ import logging import os from datetime import datetime, timezone from pathlib import Path +from types import TracebackType +from typing import Optional, Type # Do not update the format: it's also used to declare groups in the GitHub Actions logs. logging.basicConfig(format="%(message)s", level=logging.INFO) @@ -10,13 +12,16 @@ logging.basicConfig(format="%(message)s", level=logging.INFO) VERSIONS_PATH = Path(os.environ.get("VERSIONS_PATH", "releases")) +class ProductUpdateError(Exception): + """Custom exceptions raised when unexpected errors occur during product updates.""" + class ProductVersion: - def __init__(self, product: "Product", data: dict) -> None: + def __init__(self, product: "ProductData", data: dict) -> None: self.product = str(product) self.data = data @staticmethod - def of(product: "Product", name: str, date: datetime) -> "ProductVersion": + def of(product: "ProductData", name: str, date: datetime) -> "ProductVersion": return ProductVersion(product, { "name": name, "date": date.strftime("%Y-%m-%d"), @@ -35,16 +40,37 @@ class ProductVersion: return f"{self.product}#{self.name()} ({self.date()})" -class Product: +class ProductData: def __init__(self, name: str) -> None: self.name: str = name self.versions_path: Path = VERSIONS_PATH / f"{name}.json" self.versions: dict[str, ProductVersion] = {} + + def __enter__(self) -> "ProductData": logging.info(f"::group::{self}") + return self + + def __exit__(self, exc_type: Optional[Type[BaseException]], exc_value: Optional[BaseException], + exc_traceback: Optional[TracebackType]) -> None: + try: + if exc_value: + message = f"an unexpected error occurred while updating {self} data" + logging.error(message, exc_info=exc_value) + raise ProductUpdateError(message) from exc_value + + logging.info("updating %s data",self) + # sort by date then version (desc) + ordered_versions = sorted(self.versions.values(), key=lambda v: (v.date(), v.name()), reverse=True) + with self.versions_path.open("w") as f: + f.write(json.dumps({ + "versions": {version.name(): version.data for version in ordered_versions}, + }, indent=2)) + finally: + logging.info("::endgroup::") @staticmethod - def from_file(name: str) -> "Product": - product = Product(name) + def from_file(name: str) -> "ProductData": + product = ProductData(name) if product.versions_path.is_file(): with product.versions_path.open() as f: @@ -79,14 +105,5 @@ class Product: logging.info(f"removing version {version} ({self.versions.pop(version)}) from {self}") - def write(self) -> None: - # sort by date then version (desc) - ordered_versions = sorted(self.versions.values(), key=lambda v: (v.date(), v.name()), reverse=True) - with self.versions_path.open("w") as f: - f.write(json.dumps({ - "versions": {version.name(): version.data for version in ordered_versions}, - }, indent=2)) - logging.info("::endgroup::") - def __repr__(self) -> str: return self.name diff --git a/src/confluence.py b/src/confluence.py index 34e413a3..4da9f6da 100644 --- a/src/confluence.py +++ b/src/confluence.py @@ -5,13 +5,11 @@ from requests_html import HTMLSession Note that requests_html is used because JavaScript is needed to render the page.""" -product = releasedata.Product("confluence") -r = HTMLSession().get("https://www.atlassian.com/software/confluence/download-archives") -r.html.render(sleep=1, scrolldown=3) +with releasedata.ProductData("confluence") as product_data: + r = HTMLSession().get("https://www.atlassian.com/software/confluence/download-archives") + r.html.render(sleep=1, scrolldown=3) -for version_block in r.html.find('.versions-list'): - version = version_block.find('a.product-versions', first=True).attrs['data-version'] - date = dates.parse_date(version_block.find('.release-date', first=True).text) - product.declare_version(version, date) - -product.write() + for version_block in r.html.find('.versions-list'): + version = version_block.find('a.product-versions', first=True).attrs['data-version'] + date = dates.parse_date(version_block.find('.release-date', first=True).text) + product_data.declare_version(version, date) diff --git a/src/cos.py b/src/cos.py index 47c2fcf8..0c986e52 100644 --- a/src/cos.py +++ b/src/cos.py @@ -14,32 +14,30 @@ def parse_date(date_text: str) -> datetime: return dates.parse_date(date_text) -product = releasedata.Product("cos") -main = http.fetch_url("https://cloud.google.com/container-optimized-os/docs/release-notes/") -main_soup = BeautifulSoup(main.text, features="html5lib") -milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)] +with releasedata.ProductData("cos") as product_data: + main = http.fetch_url("https://cloud.google.com/container-optimized-os/docs/release-notes/") + main_soup = BeautifulSoup(main.text, features="html5lib") + milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)] -milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones] -for milestone in http.fetch_urls(milestones_urls): - milestone_soup = BeautifulSoup(milestone.text, features="html5lib") - for article in milestone_soup.find_all('article', class_='devsite-article'): - for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse - version_str = heading.get('data-text') - version_match = VERSION_PATTERN.match(version_str) - if not version_match: - continue + milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones] + for milestone in http.fetch_urls(milestones_urls): + milestone_soup = BeautifulSoup(milestone.text, features="html5lib") + for article in milestone_soup.find_all('article', class_='devsite-article'): + for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse + version_str = heading.get('data-text') + version_match = VERSION_PATTERN.match(version_str) + if not version_match: + continue - try: # 1st row is the header, so pick the first td in the 2nd row - date_str = heading.find_next('tr').find_next('tr').find_next('td').text - except AttributeError: # In some older releases, it is mentioned as Date: [Date] - date_str = heading.find_next('i').text + try: # 1st row is the header, so pick the first td in the 2nd row + date_str = heading.find_next('tr').find_next('tr').find_next('td').text + except AttributeError: # In some older releases, it is mentioned as Date: [Date] + date_str = heading.find_next('i').text - try: - date = parse_date(date_str) - except ValueError: # for some h3, the date is in the previous h2 - date_str = heading.find_previous('h2').get('data-text') - date = parse_date(date_str) + try: + date = parse_date(date_str) + except ValueError: # for some h3, the date is in the previous h2 + date_str = heading.find_previous('h2').get('data-text') + date = parse_date(date_str) - product.declare_version(version_match.group(1), date) - -product.write() + product_data.declare_version(version_match.group(1), date) diff --git a/src/couchbase-server.py b/src/couchbase-server.py index 36b0db12..6d6108cf 100644 --- a/src/couchbase-server.py +++ b/src/couchbase-server.py @@ -18,23 +18,22 @@ MANUAL_VERSIONS = { "7.2.0": dates.date(2023, 6, 1), # https://www.couchbase.com/blog/couchbase-capella-spring-release-72/ } -product = releasedata.Product("couchbase-server") -main = http.fetch_url(f"{URLS}/current/install/install-intro.html") -main_soup = BeautifulSoup(main.text, features="html5lib") +with releasedata.ProductData("couchbase-server") as product_data: + main = http.fetch_url(f"{URLS}/current/install/install-intro.html") + main_soup = BeautifulSoup(main.text, features="html5lib") -minor_versions = [options.attrs["value"] for options in main_soup.find(class_="version_list").find_all("option")] -minor_version_urls = [f"{URLS}/{minor}/release-notes/relnotes.html" for minor in minor_versions] + minor_versions = [options.attrs["value"] for options in main_soup.find(class_="version_list").find_all("option")] + minor_version_urls = [f"{URLS}/{minor}/release-notes/relnotes.html" for minor in minor_versions] -for minor_version in http.fetch_urls(minor_version_urls): - minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib") + for minor_version in http.fetch_urls(minor_version_urls): + minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib") - for title in minor_version_soup.find_all("h2"): - match = VERSION_AND_DATE_PATTERN.match(title.get_text().strip()) - if match: - version = match["version"] - version = f"{version}.0" if len(version.split(".")) == 2 else version - date = dates.parse_month_year_date(match['date']) - product.declare_version(version, date) + for title in minor_version_soup.find_all("h2"): + match = VERSION_AND_DATE_PATTERN.match(title.get_text().strip()) + if match: + version = match["version"] + version = f"{version}.0" if len(version.split(".")) == 2 else version + date = dates.parse_month_year_date(match['date']) + product_data.declare_version(version, date) -product.declare_versions(MANUAL_VERSIONS) -product.write() + product_data.declare_versions(MANUAL_VERSIONS) diff --git a/src/debian.py b/src/debian.py index e2fffcb3..e469c969 100644 --- a/src/debian.py +++ b/src/debian.py @@ -7,7 +7,7 @@ from common.git import Git """Fetch Debian versions by parsing news in www.debian.org source repository.""" -def extract_major_versions(p: releasedata.Product, repo_dir: Path) -> None: +def extract_major_versions(p: releasedata.ProductData, repo_dir: Path) -> None: child = run( f"grep -RhE -A 1 'Debian [0-9]+.+ released' {repo_dir}/english/News " f"| cut -d '<' -f 2 " @@ -26,7 +26,7 @@ def extract_major_versions(p: releasedata.Product, repo_dir: Path) -> None: is_release_line = True -def extract_point_versions(p: releasedata.Product, repo_dir: Path) -> None: +def extract_point_versions(p: releasedata.ProductData, repo_dir: Path) -> None: child = run( f"grep -Rh -B 10 '' {repo_dir}/english/News " "| grep -Eo '(release_date>(.*)<|revision>(.*)<)' " @@ -41,12 +41,10 @@ def extract_point_versions(p: releasedata.Product, repo_dir: Path) -> None: p.declare_version(version, dates.parse_date(date)) -product = releasedata.Product("debian") -git = Git("https://salsa.debian.org/webmaster-team/webwml.git") -git.setup() -git.checkout("master", file_list=["english/News"]) +with releasedata.ProductData("debian") as product_data: + git = Git("https://salsa.debian.org/webmaster-team/webwml.git") + git.setup() + git.checkout("master", file_list=["english/News"]) -extract_major_versions(product, git.repo_dir) -extract_point_versions(product, git.repo_dir) - -product.write() + extract_major_versions(product_data, git.repo_dir) + extract_point_versions(product_data, git.repo_dir) diff --git a/src/distrowatch.py b/src/distrowatch.py index a498cfd5..50f64095 100644 --- a/src/distrowatch.py +++ b/src/distrowatch.py @@ -7,22 +7,20 @@ METHOD = 'distrowatch' p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}") - soup = BeautifulSoup(response.text, features="html5lib") + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}") + soup = BeautifulSoup(response.text, features="html5lib") - for table in soup.select("td.News1>table.News"): - headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() - versions_match = config.first_match(headline) - if not versions_match: - continue + for table in soup.select("td.News1>table.News"): + headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() + versions_match = config.first_match(headline) + if not versions_match: + continue - # multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5) - versions = config.render(versions_match).split("\n") - date = dates.parse_date(table.select_one("td.NewsDate").get_text()) + # multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5) + versions = config.render(versions_match).split("\n") + date = dates.parse_date(table.select_one("td.NewsDate").get_text()) - for version in versions: - product_data.declare_version(version, date) - - product_data.write() + for version in versions: + product_data.declare_version(version, date) diff --git a/src/docker_hub.py b/src/docker_hub.py index f9504923..3ca20554 100644 --- a/src/docker_hub.py +++ b/src/docker_hub.py @@ -9,7 +9,7 @@ Unfortunately images creation date cannot be retrieved, so we had to use the tag METHOD = "docker_hub" -def fetch_releases(p: releasedata.Product, c: endoflife.AutoConfig, url: str) -> None: +def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None: data = http.fetch_url(url).json() for result in data["results"]: @@ -24,7 +24,6 @@ def fetch_releases(p: releasedata.Product, c: endoflife.AutoConfig, url: str) -> p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1") - product_data.write() + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1") diff --git a/src/eks.py b/src/eks.py index eef20b8b..0d2ff3db 100644 --- a/src/eks.py +++ b/src/eks.py @@ -13,23 +13,21 @@ URLS = [ "https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html", ] -product = releasedata.Product("eks") -for version_list in http.fetch_urls(URLS): - version_list_soup = BeautifulSoup(version_list.text, features="html5lib") - for tr in version_list_soup.select("#main-col-body")[0].findAll("tr"): - cells = tr.findAll("td") - if not cells: - continue +with releasedata.ProductData("eks") as product_data: + for version_list in http.fetch_urls(URLS): + version_list_soup = BeautifulSoup(version_list.text, features="html5lib") + for tr in version_list_soup.select("#main-col-body")[0].findAll("tr"): + cells = tr.findAll("td") + if not cells: + continue - k8s_version = cells[0].text.strip() - eks_version = cells[1].text.strip() - date_str = cells[-1].text.strip() + k8s_version = cells[0].text.strip() + eks_version = cells[1].text.strip() + date_str = cells[-1].text.strip() - k8s_version_match = endoflife.DEFAULT_VERSION_PATTERN.match(k8s_version) - if k8s_version_match: - date = dates.parse_date(date_str) - # K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags. - version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}" - product.declare_version(version, date) - -product.write() + k8s_version_match = endoflife.DEFAULT_VERSION_PATTERN.match(k8s_version) + if k8s_version_match: + date = dates.parse_date(date_str) + # K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags. + version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}" + product_data.declare_version(version, date) diff --git a/src/firefox.py b/src/firefox.py index 0479f4f6..76618be0 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -21,27 +21,25 @@ def batched(iterable: iter, n: int) -> iter: yield batch -product = releasedata.Product("firefox") -releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/") -releases_soup = BeautifulSoup(releases_page.text, features="html5lib") -releases_list = releases_soup.find_all("ol", class_="c-release-list") -release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")] +with releasedata.ProductData("firefox") as product_data: + releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/") + releases_soup = BeautifulSoup(releases_page.text, features="html5lib") + releases_list = releases_soup.find_all("ol", class_="c-release-list") + release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")] -for batch_release_notes_urls in batched(release_notes_urls, 20): - for release_notes in http.fetch_urls(batch_release_notes_urls): - version = release_notes.url.split("/")[-3] + for batch_release_notes_urls in batched(release_notes_urls, 20): + for release_notes in http.fetch_urls(batch_release_notes_urls): + version = release_notes.url.split("/")[-3] - release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") - date_elt = release_notes_soup.find(class_="c-release-date") - if date_elt: - date = dates.parse_date(date_elt.get_text()) - product.declare_version(version, date) - continue + release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") + date_elt = release_notes_soup.find(class_="c-release-date") + if date_elt: + date = dates.parse_date(date_elt.get_text()) + product_data.declare_version(version, date) + continue - date_elt = release_notes_soup.find("small", string=re.compile("^.?First offered")) - if date_elt: - date = dates.parse_date(' '.join(date_elt.get_text().split(" ")[-3:])) # get last 3 words - product.declare_version(version, date) - # versions < 10.0 are ignored - -product.write() + date_elt = release_notes_soup.find("small", string=re.compile("^.?First offered")) + if date_elt: + date = dates.parse_date(' '.join(date_elt.get_text().split(" ")[-3:])) # get last 3 words + product_data.declare_version(version, date) + # versions < 10.0 are ignored diff --git a/src/git.py b/src/git.py index ff531ab5..915b804c 100644 --- a/src/git.py +++ b/src/git.py @@ -9,17 +9,15 @@ METHOD = 'git' p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - git = Git(config.url) - git.setup(bare=True) + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + git = Git(config.url) + git.setup(bare=True) - tags = git.list_tags() - for tag, date_str in tags: - version_match = config.first_match(tag) - if version_match: - version = config.render(version_match) - date = dates.parse_date(date_str) - product_data.declare_version(version, date) - - product_data.write() + tags = git.list_tags() + for tag, date_str in tags: + version_match = config.first_match(tag) + if version_match: + version = config.render(version_match) + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/github-releases.py b/src/github-releases.py index 999bbd6e..ea4e8fc9 100644 --- a/src/github-releases.py +++ b/src/github-releases.py @@ -44,18 +44,16 @@ query($endCursor: String) { p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - for page in fetch_releases(config.url): - releases = [edge['node'] for edge in (page['data']['repository']['releases']['edges'])] + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + for page in fetch_releases(config.url): + releases = [edge['node'] for edge in (page['data']['repository']['releases']['edges'])] - for release in releases: - if not release['isPrerelease']: - version_str = release['name'] - version_match = config.first_match(version_str) - if version_match: - version = config.render(version_match) - date = dates.parse_datetime(release['publishedAt']) - product_data.declare_version(version, date) - - product_data.write() + for release in releases: + if not release['isPrerelease']: + version_str = release['name'] + version_match = config.first_match(version_str) + if version_match: + version = config.render(version_match) + date = dates.parse_datetime(release['publishedAt']) + product_data.declare_version(version, date) diff --git a/src/gke.py b/src/gke.py index f008a697..8810e9dd 100644 --- a/src/gke.py +++ b/src/gke.py @@ -13,18 +13,16 @@ URL_BY_PRODUCT = { } for product_name, url in URL_BY_PRODUCT.items(): - product = releasedata.Product(product_name) - relnotes = http.fetch_url(url) - relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib") + with releasedata.ProductData(product_name) as product_data: + relnotes = http.fetch_url(url) + relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib") - for section in relnotes_soup.find_all('section', class_='releases'): - for h2 in section.find_all('h2'): # h2 contains the date - date = dates.parse_date(h2.get('data-text')) + for section in relnotes_soup.find_all('section', class_='releases'): + for h2 in section.find_all('h2'): # h2 contains the date + date = dates.parse_date(h2.get('data-text')) - next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date - for li in next_div.find_all('li'): - if "versions are now available" in li.text: - for version in VERSION_PATTERN.findall(li.find('ul').text): - product.declare_version(version, date) - - product.write() + next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date + for li in next_div.find_all('li'): + if "versions are now available" in li.text: + for version in VERSION_PATTERN.findall(li.find('ul').text): + product_data.declare_version(version, date) diff --git a/src/graalvm.py b/src/graalvm.py index 1870abbc..a6949391 100644 --- a/src/graalvm.py +++ b/src/graalvm.py @@ -1,18 +1,16 @@ from bs4 import BeautifulSoup from common import dates, http, releasedata -product = releasedata.Product("graalvm") -release_calendar = http.fetch_url("https://www.graalvm.org/release-calendar/") -release_calendar_soup = BeautifulSoup(release_calendar.text, features="html5lib") +with releasedata.ProductData("graalvm") as product_data: + release_calendar = http.fetch_url("https://www.graalvm.org/release-calendar/") + release_calendar_soup = BeautifulSoup(release_calendar.text, features="html5lib") -for tr in release_calendar_soup.find("h2", id="previous-releases").find_next("table").find("tbody").findAll("tr"): - cells = tr.findAll("td") - date = dates.parse_date(cells[0].get_text()) + for tr in release_calendar_soup.find("h2", id="previous-releases").find_next("table").find("tbody").findAll("tr"): + cells = tr.findAll("td") + date = dates.parse_date(cells[0].get_text()) - # 'GraalVM for JDK' versions has to be prefixed as their release cycle collide with older - # GraalVM release cycles. Example: GraalVM for JDK 20 and 20.0. - versions_str = cells[2].get_text().replace("GraalVM for JDK ", "jdk-") - for version in versions_str.split(", "): - product.declare_version(version, date) - -product.write() + # 'GraalVM for JDK' versions has to be prefixed as their release cycle collide with older + # GraalVM release cycles. Example: GraalVM for JDK 20 and 20.0. + versions_str = cells[2].get_text().replace("GraalVM for JDK ", "jdk-") + for version in versions_str.split(", "): + product_data.declare_version(version, date) diff --git a/src/haproxy.py b/src/haproxy.py index f584c250..f550ec7c 100644 --- a/src/haproxy.py +++ b/src/haproxy.py @@ -6,27 +6,25 @@ from common import dates, http, releasedata CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$") DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$") # https://regex101.com/r/1JCnFC/1 -product = releasedata.Product("haproxy") -# First, get all minor releases from the download page -download = http.fetch_url('https://www.haproxy.org/download/') -download_soup = BeautifulSoup(download.text, features="html5lib") -minor_versions = [] -for link in download_soup.select("a"): - minor_version_match = CYCLE_PATTERN.match(link.attrs["href"]) - if not minor_version_match: - continue +with releasedata.ProductData("haproxy") as product_data: + # First, get all minor releases from the download page + download = http.fetch_url('https://www.haproxy.org/download/') + download_soup = BeautifulSoup(download.text, features="html5lib") + minor_versions = [] + for link in download_soup.select("a"): + minor_version_match = CYCLE_PATTERN.match(link.attrs["href"]) + if not minor_version_match: + continue - minor_version = minor_version_match.groups()[0] - if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src - minor_versions.append(minor_version) + minor_version = minor_version_match.groups()[0] + if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src + minor_versions.append(minor_version) -# Then, fetches all versions from each changelog -changelog_urls = [f"https://www.haproxy.org/download/{minor_version}/src/CHANGELOG" for minor_version in minor_versions] -for changelog in http.fetch_urls(changelog_urls): - for line in changelog.text.split('\n'): - date_and_version_match = DATE_AND_VERSION_PATTERN.match(line) - if date_and_version_match: - year, month, day, version = date_and_version_match.groups() - product.declare_version(version, dates.date(int(year), int(month), int(day))) - -product.write() + # Then, fetches all versions from each changelog + changelog_urls = [f"https://www.haproxy.org/download/{minor_version}/src/CHANGELOG" for minor_version in minor_versions] + for changelog in http.fetch_urls(changelog_urls): + for line in changelog.text.split('\n'): + date_and_version_match = DATE_AND_VERSION_PATTERN.match(line) + if date_and_version_match: + year, month, day, version = date_and_version_match.groups() + product_data.declare_version(version, dates.date(int(year), int(month), int(day))) diff --git a/src/ibm-aix.py b/src/ibm-aix.py index 1719e5f5..71fbd3c3 100644 --- a/src/ibm-aix.py +++ b/src/ibm-aix.py @@ -7,15 +7,13 @@ URLS = [ "https://www.ibm.com/support/pages/aix-support-lifecycle-information", ] -product = releasedata.Product("ibm-aix") -for page in http.fetch_urls(URLS): - page_soup = BeautifulSoup(page.text, features="html5lib") +with releasedata.ProductData("ibm-aix") as product_data: + for page in http.fetch_urls(URLS): + page_soup = BeautifulSoup(page.text, features="html5lib") - for release_table in page_soup.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"): - for row in release_table.find_all("tr")[1:]: # for all rows except the header - cells = row.find_all("td") - version = cells[0].text.strip("AIX ").replace(' TL', '.') - date = dates.parse_month_year_date(cells[1].text) - product.declare_version(version, date) - -product.write() + for release_table in page_soup.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"): + for row in release_table.find_all("tr")[1:]: # for all rows except the header + cells = row.find_all("td") + version = cells[0].text.strip("AIX ").replace(' TL', '.') + date = dates.parse_month_year_date(cells[1].text) + product_data.declare_version(version, date) diff --git a/src/jira.py b/src/jira.py index 33476b07..60b626ea 100644 --- a/src/jira.py +++ b/src/jira.py @@ -5,13 +5,11 @@ from requests_html import HTMLSession Note that requests_html is used because JavaScript is needed to render the page.""" -product = releasedata.Product("jira") -r = HTMLSession().get("https://www.atlassian.com/software/jira/update") -r.html.render(sleep=1, scrolldown=3) +with releasedata.ProductData("jira") as product_data: + r = HTMLSession().get("https://www.atlassian.com/software/jira/update") + r.html.render(sleep=1, scrolldown=3) -for version_block in r.html.find('.versions-list'): - version = version_block.find('a.product-versions', first=True).attrs['data-version'] - date = dates.parse_date(version_block.find('.release-date', first=True).text) - product.declare_version(version, date) - -product.write() + for version_block in r.html.find('.versions-list'): + version = version_block.find('a.product-versions', first=True).attrs['data-version'] + date = dates.parse_date(version_block.find('.release-date', first=True).text) + product_data.declare_version(version, date) diff --git a/src/looker.py b/src/looker.py index b42ed499..b23212ec 100644 --- a/src/looker.py +++ b/src/looker.py @@ -10,25 +10,23 @@ from common import dates, http, releasedata ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IGNORECASE) VERSION_PATTERN = re.compile(r"Looker\s+(?P\d+\.\d+)", re.IGNORECASE) -product = releasedata.Product("looker") -response = http.fetch_url("https://cloud.google.com/feeds/looker-release-notes.xml") -rss = xml.dom.minidom.parseString(response.text) +with releasedata.ProductData("looker") as product_data: + response = http.fetch_url("https://cloud.google.com/feeds/looker-release-notes.xml") + rss = xml.dom.minidom.parseString(response.text) -for item in rss.getElementsByTagName("entry"): - content = item.getElementsByTagName("content")[0].firstChild.nodeValue - content_soup = BeautifulSoup(content, features="html5lib") + for item in rss.getElementsByTagName("entry"): + content = item.getElementsByTagName("content")[0].firstChild.nodeValue + content_soup = BeautifulSoup(content, features="html5lib") - announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN) - if not announcement_match: - continue + announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN) + if not announcement_match: + continue - version_match = VERSION_PATTERN.search(announcement_match.parent.get_text()) - if not version_match: - continue + version_match = VERSION_PATTERN.search(announcement_match.parent.get_text()) + if not version_match: + continue - version = version_match.group("version") - date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue - date = dates.parse_datetime(date_str) - product.declare_version(version, date) - -product.write() + version = version_match.group("version") + date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue + date = dates.parse_datetime(date_str) + product_data.declare_version(version, date) diff --git a/src/maven.py b/src/maven.py index 0427459e..28d7b2e4 100644 --- a/src/maven.py +++ b/src/maven.py @@ -7,24 +7,22 @@ METHOD = "maven" p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - start = 0 - group_id, artifact_id = config.url.split("/") + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + start = 0 + group_id, artifact_id = config.url.split("/") - while True: - url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100" - data = http.fetch_url(url).json() + while True: + url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100" + data = http.fetch_url(url).json() - for row in data["response"]["docs"]: - version_match = config.first_match(row["v"]) - if version_match: - version = config.render(version_match) - date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc) - product_data.declare_version(version, date) + for row in data["response"]["docs"]: + version_match = config.first_match(row["v"]) + if version_match: + version = config.render(version_match) + date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc) + product_data.declare_version(version, date) - start += 100 - if data["response"]["numFound"] <= start: - break - - product_data.write() + start += 100 + if data["response"]["numFound"] <= start: + break diff --git a/src/npm.py b/src/npm.py index 023f1ecc..f1cce2a7 100644 --- a/src/npm.py +++ b/src/npm.py @@ -6,14 +6,12 @@ METHOD = "npm" p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json() - for version_str in data["versions"]: - version_match = config.first_match(version_str) - if version_match: - version = config.render(version_match) - date = dates.parse_datetime(data["time"][version_str]) - product_data.declare_version(version, date) - - product_data.write() + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json() + for version_str in data["versions"]: + version_match = config.first_match(version_str) + if version_match: + version = config.render(version_match) + date = dates.parse_datetime(data["time"][version_str]) + product_data.declare_version(version, date) diff --git a/src/nutanix.py b/src/nutanix.py index 219d97db..324647f1 100644 --- a/src/nutanix.py +++ b/src/nutanix.py @@ -9,13 +9,10 @@ PRODUCTS = { } for product_name, url in PRODUCTS.items(): - product = releasedata.Product(product_name) - - data = http.fetch_url(url).json() - for version_data in data["contents"]: - if 'GENERAL_AVAILABILITY' in version_data: - version = version_data["version"] - date = dates.parse_datetime(version_data["GENERAL_AVAILABILITY"]) - product.declare_version(version, date) - - product.write() + with releasedata.ProductData(product_name) as product_data: + data = http.fetch_url(url).json() + for version_data in data["contents"]: + if 'GENERAL_AVAILABILITY' in version_data: + version = version_data["version"] + date = dates.parse_datetime(version_data["GENERAL_AVAILABILITY"]) + product_data.declare_version(version, date) diff --git a/src/oracle-jdk.py b/src/oracle-jdk.py index 4f7619ba..c4f176c7 100644 --- a/src/oracle-jdk.py +++ b/src/oracle-jdk.py @@ -5,19 +5,18 @@ from requests_html import HTMLSession This script is using requests-html because the page needs JavaScript to render correctly.""" -product = releasedata.Product("oracle-jdk") -r = HTMLSession().get('https://www.java.com/releases/') -r.html.render(sleep=1, scrolldown=3) +with releasedata.ProductData("oracle-jdk") as product_data: + r = HTMLSession().get('https://www.java.com/releases/') + r.html.render(sleep=1, scrolldown=3) -previous_date = None -for row in r.html.find('#released tr'): - version_cell = row.find('td.anchor', first=True) - if version_cell: - version = version_cell.attrs['id'] - date_str = row.find('td')[1].text - date = dates.parse_date(date_str) if date_str else previous_date - product.declare_version(version, date) - previous_date = date + previous_date = None + for row in r.html.find('#released tr'): + version_cell = row.find('td.anchor', first=True) + if version_cell: + version = version_cell.attrs['id'] + date_str = row.find('td')[1].text + date = dates.parse_date(date_str) if date_str else previous_date + product_data.declare_version(version, date) + previous_date = date -product.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed -product.write() + product_data.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed diff --git a/src/palo-alto-networks.py b/src/palo-alto-networks.py index 935138aa..f7929785 100644 --- a/src/palo-alto-networks.py +++ b/src/palo-alto-networks.py @@ -17,22 +17,20 @@ soup = BeautifulSoup(response.text, features="html5lib") logging.info("::endgroup::") for product_name, identifier in IDENTIFIERS_BY_PRODUCT.items(): - product = releasedata.Product(product_name) - table = soup.find(id=identifier) - for tr in table.findAll("tr")[3:]: - td_list = tr.findAll("td") - if len(td_list) <= 1: - continue + with releasedata.ProductData(product_name) as product_data: + table = soup.find(id=identifier) + for tr in table.findAll("tr")[3:]: + td_list = tr.findAll("td") + if len(td_list) <= 1: + continue - version = td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "") - version = version.removesuffix("-(cortex-xdr-agent)") - version = version.removesuffix("-(vm-series-only)") - version = version.removesuffix("-(panorama-only)") + version = td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "") + version = version.removesuffix("-(cortex-xdr-agent)") + version = version.removesuffix("-(vm-series-only)") + version = version.removesuffix("-(panorama-only)") - # A few dates have 1st, 2nd, 4th... Remove it. - date_str = re.sub(r'(\w+) (\d{1,2})\w{2}, (\d{4})', r'\1 \2, \3', td_list[1].get_text()) - date = dates.parse_date(date_str) + # A few dates have 1st, 2nd, 4th... Remove it. + date_str = re.sub(r'(\w+) (\d{1,2})\w{2}, (\d{4})', r'\1 \2, \3', td_list[1].get_text()) + date = dates.parse_date(date_str) - product.declare_version(version, date) - - product.write() + product_data.declare_version(version, date) diff --git a/src/php.py b/src/php.py index f3629c42..34d78b4a 100644 --- a/src/php.py +++ b/src/php.py @@ -2,18 +2,15 @@ from common import dates, endoflife, http, releasedata MAIN_URL = "https://www.php.net/releases/index.php?json&max=-1" -product = releasedata.Product("php") +with releasedata.ProductData("php") as product_data: + # Fetch major versions + latest_by_major = http.fetch_url(MAIN_URL).json() + major_version_urls = [f"{MAIN_URL}&version={major_version}" for major_version in latest_by_major] -# Fetch major versions -latest_by_major = http.fetch_url(MAIN_URL).json() -major_version_urls = [f"{MAIN_URL}&version={major_version}" for major_version in latest_by_major] - -# Fetch all versions for major versions -for major_versions_response in http.fetch_urls(major_version_urls): - major_versions_data = major_versions_response.json() - for version in major_versions_data: - if endoflife.DEFAULT_VERSION_PATTERN.match(version): # exclude versions such as "3.0.x (latest)" - date = dates.parse_date(major_versions_data[version]["date"]) - product.declare_version(version, date) - -product.write() + # Fetch all versions for major versions + for major_versions_response in http.fetch_urls(major_version_urls): + major_versions_data = major_versions_response.json() + for version in major_versions_data: + if endoflife.DEFAULT_VERSION_PATTERN.match(version): # exclude versions such as "3.0.x (latest)" + date = dates.parse_date(major_versions_data[version]["date"]) + product_data.declare_version(version, date) diff --git a/src/plesk.py b/src/plesk.py index 4814bdd2..c0d5d6a9 100644 --- a/src/plesk.py +++ b/src/plesk.py @@ -6,20 +6,18 @@ from common import dates, http, releasedata Only 18.0.20.3 and later will be picked up, as the format of the change log for 18.0.20 and 18.0.19 are different and there is no entry for GA of version 18.0.18 and older.""" -product = releasedata.Product("plesk") -response = http.fetch_url("https://docs.plesk.com/release-notes/obsidian/change-log") -soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("plesk") as product_data: + response = http.fetch_url("https://docs.plesk.com/release-notes/obsidian/change-log") + soup = BeautifulSoup(response.text, features="html5lib") -for release in soup.find_all("div", class_="changelog-entry--obsidian"): - version = release.h2.text.strip() - if not version.startswith('Plesk Obsidian 18'): - continue + for release in soup.find_all("div", class_="changelog-entry--obsidian"): + version = release.h2.text.strip() + if not version.startswith('Plesk Obsidian 18'): + continue - version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '') - if ' ' in version: - continue + version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '') + if ' ' in version: + continue - date = dates.parse_date(release.p.text) - product.declare_version(version, date) - -product.write() + date = dates.parse_date(release.p.text) + product_data.declare_version(version, date) diff --git a/src/pypi.py b/src/pypi.py index 81dd2a47..d78c229f 100644 --- a/src/pypi.py +++ b/src/pypi.py @@ -6,17 +6,15 @@ METHOD = "pypi" p_filter = sys.argv[1] if len(sys.argv) > 1 else None for product in endoflife.list_products(METHOD, p_filter): - product_data = releasedata.Product(product.name) - for config in product.get_auto_configs(METHOD): - data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json() + with releasedata.ProductData(product.name) as product_data: + for config in product.get_auto_configs(METHOD): + data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json() - for version_str in data["releases"]: - version_match = config.first_match(version_str) - version_data = data["releases"][version_str] + for version_str in data["releases"]: + version_match = config.first_match(version_str) + version_data = data["releases"][version_str] - if version_match and version_data: - version = config.render(version_match) - date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"]) - product_data.declare_version(version, date) - - product_data.write() + if version_match and version_data: + version = config.render(version_match) + date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"]) + product_data.declare_version(version, date) diff --git a/src/rds.py b/src/rds.py index 386355e9..b1c77ba2 100644 --- a/src/rds.py +++ b/src/rds.py @@ -16,20 +16,18 @@ PRODUCTS = { VERSION_REGEX = re.compile(r"(?P\d+(?:\.\d+)*)", flags=re.IGNORECASE) # https://regex101.com/r/BY1vwV/1 for product_name, url in PRODUCTS.items(): - product = releasedata.Product(product_name) - response = http.fetch_url(url) - soup = BeautifulSoup(response.text, features="html5lib") + with releasedata.ProductData(product_name) as product_data: + response = http.fetch_url(url) + soup = BeautifulSoup(response.text, features="html5lib") - for table in soup.find_all("table"): - for row in table.find_all("tr"): - columns = row.find_all("td") - if len(columns) <= 3: - continue + for table in soup.find_all("table"): + for row in table.find_all("tr"): + columns = row.find_all("td") + if len(columns) <= 3: + continue - version_match = VERSION_REGEX.search(columns[0].text.strip()) - if version_match: - version = version_match.group("version") - date = dates.parse_date(columns[2].text) - product.declare_version(version, date) - - product.write() + version_match = VERSION_REGEX.search(columns[0].text.strip()) + if version_match: + version = version_match.group("version") + date = dates.parse_date(columns[2].text) + product_data.declare_version(version, date) diff --git a/src/red-hat-openshift.py b/src/red-hat-openshift.py index 57905ba0..2bc1d5a9 100644 --- a/src/red-hat-openshift.py +++ b/src/red-hat-openshift.py @@ -7,23 +7,21 @@ from common.git import Git VERSION_AND_DATE_PATTERN = re.compile(r"{product-title}\s(?P\d+\.\d+\.\d+).*\n+Issued:\s(?P\d{4}-\d\d-\d\d)$", re.MULTILINE) -product = releasedata.Product("red-hat-openshift") -git = Git("https://github.com/openshift/openshift-docs.git") -git.setup() +with releasedata.ProductData("red-hat-openshift") as product_data: + git = Git("https://github.com/openshift/openshift-docs.git") + git.setup() -# only fetch v4+ branches, because the format was different in openshift v3 -for branch in git.list_branches("refs/heads/enterprise-[4-9]*"): - version = branch.split("-")[1].replace(".", "-") - release_notes_filename = f"release_notes/ocp-{version}-release-notes.adoc" - git.checkout(branch, file_list=[release_notes_filename]) + # only fetch v4+ branches, because the format was different in openshift v3 + for branch in git.list_branches("refs/heads/enterprise-[4-9]*"): + version = branch.split("-")[1].replace(".", "-") + release_notes_filename = f"release_notes/ocp-{version}-release-notes.adoc" + git.checkout(branch, file_list=[release_notes_filename]) - release_notes_file = git.repo_dir / release_notes_filename - if not release_notes_file.exists(): - continue + release_notes_file = git.repo_dir / release_notes_filename + if not release_notes_file.exists(): + continue - with release_notes_file.open("rb") as f: - content = f.read().decode("utf-8") - for (version, date_str) in VERSION_AND_DATE_PATTERN.findall(content): - product.declare_version(version, dates.parse_date(date_str)) - -product.write() + with release_notes_file.open("rb") as f: + content = f.read().decode("utf-8") + for (version, date_str) in VERSION_AND_DATE_PATTERN.findall(content): + product_data.declare_version(version, dates.parse_date(date_str)) diff --git a/src/redhat-satellite.py b/src/redhat-satellite.py index f86a242a..59fb326d 100644 --- a/src/redhat-satellite.py +++ b/src/redhat-satellite.py @@ -10,19 +10,17 @@ A few of the older versions, such as 'Satellite 6.1 GA Release (Build 6.1.1)', w # https://regex101.com/r/m8aWXG/1 VERSION_PATTERN = re.compile(r"^Satellite (?P\d+\.\d+\.\d+([.-]\d+)?) ([Uu]pdate|[Rr]elease)$") -product = releasedata.Product("redhat-satellite") -response = http.fetch_url("https://access.redhat.com/articles/1365633") -soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("redhat-satellite") as product_data: + response = http.fetch_url("https://access.redhat.com/articles/1365633") + soup = BeautifulSoup(response.text, features="html5lib") -for table in soup.findAll("tbody"): - for tr in table.findAll("tr"): - td_list = tr.findAll("td") + for table in soup.findAll("tbody"): + for tr in table.findAll("tr"): + td_list = tr.findAll("td") - version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0 - version_match = VERSION_PATTERN.match(version_str) - if version_match: - version = version_match["version"].replace('-', '.') # a.b.c-d => a.b.c.d - date = dates.parse_date(td_list[1].get_text().strip()) - product.declare_version(version, date) - -product.write() + version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0 + version_match = VERSION_PATTERN.match(version_str) + if version_match: + version = version_match["version"].replace('-', '.') # a.b.c-d => a.b.c.d + date = dates.parse_date(td_list[1].get_text().strip()) + product_data.declare_version(version, date) diff --git a/src/rhel.py b/src/rhel.py index 99db4374..0379dd26 100644 --- a/src/rhel.py +++ b/src/rhel.py @@ -6,22 +6,19 @@ from common import dates, http, releasedata # https://regex101.com/r/877ibq/1 VERSION_PATTERN = re.compile(r"RHEL (?P\d)(\. ?(?P\d+))?(( Update (?P\d))| GA)?") -product = releasedata.Product("redhat") -response = http.fetch_url("https://access.redhat.com/articles/3078") -soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("redhat") as product_data: + response = http.fetch_url("https://access.redhat.com/articles/3078") + soup = BeautifulSoup(response.text, features="html5lib") -for tr in soup.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) == 0: - continue + for tr in soup.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) == 0: + continue - version_str = td_list[0].get_text().strip() - version_match = VERSION_PATTERN.match(version_str).groupdict() - version = version_match["major"] - version += ("." + version_match["minor"]) if version_match["minor"] else "" - version += ("." + version_match["minor2"]) if version_match["minor2"] else "" - date = dates.parse_date(td_list[1].get_text()) - - product.declare_version(version, date) - -product.write() + version_str = td_list[0].get_text().strip() + version_match = VERSION_PATTERN.match(version_str).groupdict() + version = version_match["major"] + version += ("." + version_match["minor"]) if version_match["minor"] else "" + version += ("." + version_match["minor2"]) if version_match["minor2"] else "" + date = dates.parse_date(td_list[1].get_text()) + product_data.declare_version(version, date) diff --git a/src/rockylinux.py b/src/rockylinux.py index 5c310e57..4a2d54c8 100644 --- a/src/rockylinux.py +++ b/src/rockylinux.py @@ -1,13 +1,10 @@ from common import dates, endoflife, http, releasedata -product = releasedata.Product("rockylinux") -response = http.fetch_url("https://raw.githubusercontent.com/rocky-linux/wiki.rockylinux.org/development/docs/include/releng/version_table.md") - -for line in response.text.strip().split('\n'): - items = line.split('|') - if len(items) >= 5 and endoflife.DEFAULT_VERSION_PATTERN.match(items[1].strip()): - version = items[1].strip() - date = dates.parse_date(items[3]) - product.declare_version(version, date) - -product.write() +with releasedata.ProductData("rockylinux") as product_data: + response = http.fetch_url("https://raw.githubusercontent.com/rocky-linux/wiki.rockylinux.org/development/docs/include/releng/version_table.md") + for line in response.text.strip().split('\n'): + items = line.split('|') + if len(items) >= 5 and endoflife.DEFAULT_VERSION_PATTERN.match(items[1].strip()): + version = items[1].strip() + date = dates.parse_date(items[3]) + product_data.declare_version(version, date) diff --git a/src/ros.py b/src/ros.py index e25d8dfc..277e03a6 100644 --- a/src/ros.py +++ b/src/ros.py @@ -6,25 +6,23 @@ from common import dates, http, releasedata # https://regex101.com/r/c1ribd/1 VERSION_PATTERN = re.compile(r"^ROS (?P(\w| )+)") -product = releasedata.Product("ros") -response = http.fetch_url("https://wiki.ros.org/Distributions") -soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("ros") as product_data: + response = http.fetch_url("https://wiki.ros.org/Distributions") + soup = BeautifulSoup(response.text, features="html5lib") -for tr in soup.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) == 0: - continue + for tr in soup.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) == 0: + continue - version_str = td_list[0].get_text().strip() - if VERSION_PATTERN.match(version_str): - # Get the "code" (such as noetic) instead of the display name (such as Noetic Ninjemys) - version = td_list[0].findAll("a")[0]["href"][1:] - try: - date = dates.parse_date(td_list[1].get_text()) - except ValueError: # The day has a suffix (such as May 23rd, 2020) - x = td_list[1].get_text().split(",") - date = dates.parse_date(x[0][:-2] + x[1]) + version_str = td_list[0].get_text().strip() + if VERSION_PATTERN.match(version_str): + # Get the "code" (such as noetic) instead of the display name (such as Noetic Ninjemys) + version = td_list[0].findAll("a")[0]["href"][1:] + try: + date = dates.parse_date(td_list[1].get_text()) + except ValueError: # The day has a suffix (such as May 23rd, 2020) + x = td_list[1].get_text().split(",") + date = dates.parse_date(x[0][:-2] + x[1]) - product.declare_version(version, date) - -product.write() + product_data.declare_version(version, date) diff --git a/src/sles.py b/src/sles.py index 3260bcf1..169c66f5 100644 --- a/src/sles.py +++ b/src/sles.py @@ -3,30 +3,28 @@ import logging from bs4 import BeautifulSoup from common import dates, http, releasedata -product = releasedata.Product("sles") -response = http.fetch_url("https://www.suse.com/lifecycle") -soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("sles") as product_data: + response = http.fetch_url("https://www.suse.com/lifecycle") + soup = BeautifulSoup(response.text, features="html5lib") -products_table = soup.find("tbody", id="productSupportLifecycle") -sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) + products_table = soup.find("tbody", id="productSupportLifecycle") + sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) -# Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section) -for detail_id in [f"detail{row['id']}" for row in sles_header_rows]: - detail_row = products_table.find("tr", id=detail_id) - # There is a table with info about minor releases and after it, optionally, a table with info about modules - minor_versions_table = detail_row.find_all("tbody")[0] + # Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section) + for detail_id in [f"detail{row['id']}" for row in sles_header_rows]: + detail_row = products_table.find("tr", id=detail_id) + # There is a table with info about minor releases and after it, optionally, a table with info about modules + minor_versions_table = detail_row.find_all("tbody")[0] - # The first sub-row is a header, the rest contains info about the first release and later minor releases - for row in minor_versions_table.find_all("tr")[1:]: - # For each minor release there is an FCS date, general support end date and LTSS end date - cells = row.find_all("td") - version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.') - date_str = cells[1].text + # The first sub-row is a header, the rest contains info about the first release and later minor releases + for row in minor_versions_table.find_all("tr")[1:]: + # For each minor release there is an FCS date, general support end date and LTSS end date + cells = row.find_all("td") + version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.') + date_str = cells[1].text - try: - date = dates.parse_date(date_str) - product.declare_version(version, date) - except ValueError: - logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed") - -product.write() + try: + date = dates.parse_date(date_str) + product_data.declare_version(version, date) + except ValueError: + logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed") diff --git a/src/splunk.py b/src/splunk.py index f4bf6a1d..f33e0f98 100644 --- a/src/splunk.py +++ b/src/splunk.py @@ -30,20 +30,18 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]: return latest_versions -product = releasedata.Product("splunk") -main = http.fetch_url("https://docs.splunk.com/Documentation/Splunk") -soup = BeautifulSoup(main.text, features="html5lib") +with releasedata.ProductData("splunk") as product_data: + main = http.fetch_url("https://docs.splunk.com/Documentation/Splunk") + soup = BeautifulSoup(main.text, features="html5lib") -all_versions = [option.attrs['value'] for option in soup.select("select#version-select > option")] + all_versions = [option.attrs['value'] for option in soup.select("select#version-select > option")] -# Latest minor release notes contains release notes for all previous minor versions. -# For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4. -latest_minor_versions = get_latest_minor_versions(all_versions) -latest_minor_versions_urls = [f"https://docs.splunk.com/Documentation/Splunk/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions] -for response in http.fetch_urls(latest_minor_versions_urls): - for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text): - version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0 - date = dates.parse_date(date_str) - product.declare_version(version_str, date) - -product.write() + # Latest minor release notes contains release notes for all previous minor versions. + # For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4. + latest_minor_versions = get_latest_minor_versions(all_versions) + latest_minor_versions_urls = [f"https://docs.splunk.com/Documentation/Splunk/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions] + for response in http.fetch_urls(latest_minor_versions_urls): + for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text): + version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0 + date = dates.parse_date(date_str) + product_data.declare_version(version_str, date) diff --git a/src/typo3.py b/src/typo3.py index 2f3a1c79..38a235b7 100644 --- a/src/typo3.py +++ b/src/typo3.py @@ -1,13 +1,11 @@ from common import dates, http, releasedata -product = releasedata.Product("typo3") -data = http.fetch_url("https://get.typo3.org/api/v1/release/").json() -for v in data: - if v['type'] == 'development': - continue +with releasedata.ProductData("typo3") as product_data: + data = http.fetch_url("https://get.typo3.org/api/v1/release/").json() + for v in data: + if v['type'] == 'development': + continue - version = v["version"] - date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility - product.declare_version(version, date) - -product.write() + version = v["version"] + date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility + product_data.declare_version(version, date) diff --git a/src/unity.py b/src/unity.py index 8566e0ea..704344b9 100644 --- a/src/unity.py +++ b/src/unity.py @@ -9,20 +9,17 @@ from common import dates, http, releasedata BASE_URL = "https://unity.com/releases/editor/qa/lts-releases" -product = releasedata.Product("unity") next_page_url = BASE_URL +with releasedata.ProductData("unity") as product_data: + # Do not try to fetch multiple pages in parallel: it is raising a lot of errors and make the overall process slower. + while next_page_url: + response = http.fetch_url(next_page_url) + soup = BeautifulSoup(response.text, features="html5lib") -# Do not try to fetch multiple pages in parallel: it is raising a lot of errors and make the overall process slower. -while next_page_url: - response = http.fetch_url(next_page_url) - soup = BeautifulSoup(response.text, features="html5lib") + for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): + version = release.find('h4').find('span').text + date = dates.parse_datetime(release.find('time').attrs['datetime']) + product_data.declare_version(version, date) - for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): - version = release.find('h4').find('span').text - date = dates.parse_datetime(release.find('time').attrs['datetime']) - product.declare_version(version, date) - - next_link = soup.find('a', {"rel": "next"}) - next_page_url = BASE_URL + next_link.attrs['href'] if next_link else None - -product.write() + next_link = soup.find('a', {"rel": "next"}) + next_page_url = BASE_URL + next_link.attrs['href'] if next_link else None diff --git a/src/unrealircd.py b/src/unrealircd.py index ef58f394..8b0ecc3b 100644 --- a/src/unrealircd.py +++ b/src/unrealircd.py @@ -5,19 +5,17 @@ from common import dates, endoflife, http, releasedata DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}") -product = releasedata.Product("unrealircd") -response = http.fetch_url("https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw") -wikicode = mwparserfromhell.parse(response.text) +with releasedata.ProductData("unrealircd") as product_data: + response = http.fetch_url("https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw") + wikicode = mwparserfromhell.parse(response.text) -for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): - items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") - if len(items) < 2: - continue + for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): + items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") + if len(items) < 2: + continue - version = items[0].__strip__() - date_str = items[1].__strip__() - if endoflife.DEFAULT_VERSION_PATTERN.match(version) and DATE_PATTERN.match(date_str): - date = dates.parse_date(date_str) - product.declare_version(version, date) - -product.write() + version = items[0].__strip__() + date_str = items[1].__strip__() + if endoflife.DEFAULT_VERSION_PATTERN.match(version) and DATE_PATTERN.match(date_str): + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/veeam-backup-and-replication.py b/src/veeam-backup-and-replication.py index c1f8f9a9..8dfd7edc 100644 --- a/src/veeam-backup-and-replication.py +++ b/src/veeam-backup-and-replication.py @@ -5,27 +5,25 @@ from common import dates, http, releasedata """Fetches Veeam versions from https://www.veeam.com.""" -product = releasedata.Product("veeam-backup-and-replication") -response = http.fetch_url("https://www.veeam.com/kb2680") -soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("veeam-backup-and-replication") as product_data: + response = http.fetch_url("https://www.veeam.com/kb2680") + soup = BeautifulSoup(response.text, features="html5lib") -for table in soup.find_all("table"): - headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")] - if "build number" not in headers or "release date" not in headers: - continue - - version_index = headers.index("build number") - date_index = headers.index("release date") - for row in table.find_all("tr")[1:]: - cells = row.find_all("td") - if len(cells) <= max(version_index, date_index): + for table in soup.find_all("table"): + headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")] + if "build number" not in headers or "release date" not in headers: continue - date_str = cells[date_index].get_text().strip() - if date_str and date_str != "-": - # whitespaces in version numbers are replaced with dashes - version = re.sub(r'\s+', "-", cells[version_index].get_text().strip()) - date = dates.parse_date(date_str) - product.declare_version(version, date) + version_index = headers.index("build number") + date_index = headers.index("release date") + for row in table.find_all("tr")[1:]: + cells = row.find_all("td") + if len(cells) <= max(version_index, date_index): + continue -product.write() + date_str = cells[date_index].get_text().strip() + if date_str and date_str != "-": + # whitespaces in version numbers are replaced with dashes + version = re.sub(r'\s+', "-", cells[version_index].get_text().strip()) + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/visualstudio.py b/src/visualstudio.py index e936e5ea..b92854ac 100644 --- a/src/visualstudio.py +++ b/src/visualstudio.py @@ -9,27 +9,25 @@ URLS = [ "https://learn.microsoft.com/en-us/visualstudio/releases/2022/release-history", ] -product = releasedata.Product("visualstudio") -for response in http.fetch_urls(URLS): - soup = BeautifulSoup(response.text, features="html5lib") +with releasedata.ProductData("visualstudio") as product_data: + for response in http.fetch_urls(URLS): + soup = BeautifulSoup(response.text, features="html5lib") - for table in soup.find_all("table"): - headers = [th.get_text().strip().lower() for th in table.find_all("th")] - if "version" not in headers or "release date" not in headers: - continue - - version_index = headers.index("version") - date_index = headers.index("release date") - for row in table.findAll("tr"): - cells = row.findAll("td") - if len(cells) < (max(version_index, date_index) + 1): + for table in soup.find_all("table"): + headers = [th.get_text().strip().lower() for th in table.find_all("th")] + if "version" not in headers or "release date" not in headers: continue - version = cells[version_index].get_text().strip() - date = cells[date_index].get_text().strip() - date = dates.parse_date(date) + version_index = headers.index("version") + date_index = headers.index("release date") + for row in table.findAll("tr"): + cells = row.findAll("td") + if len(cells) < (max(version_index, date_index) + 1): + continue - if date and version and endoflife.DEFAULT_VERSION_PATTERN.match(version): - product.declare_version(version, date) + version = cells[version_index].get_text().strip() + date = cells[date_index].get_text().strip() + date = dates.parse_date(date) -product.write() + if date and version and endoflife.DEFAULT_VERSION_PATTERN.match(version): + product_data.declare_version(version, date)