From 391d65ad8ae1f04bf08e9e0ffc3c55f9c2027fe4 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Sun, 6 Jul 2025 22:42:01 +0200 Subject: [PATCH] Simplify argument parsing (#459) With the current state of automation scripts, this is not possible anymore to launch script with multiple auto configs. --- src/amazon-eks.py | 41 +++++------ src/amazon-neptune.py | 29 ++++---- src/apache-http-server.py | 31 +++++---- src/apache-subversion.py | 29 ++++---- src/apple.py | 51 +++++++------- src/artifactory.py | 31 +++++---- src/atlassian_eol.py | 31 +++++---- src/atlassian_versions.py | 19 ++--- src/aws-lambda.py | 71 +++++++++---------- src/cgit.py | 41 +++++------ src/chef-infra.py | 25 +++---- src/chef-inspec.py | 21 +++--- src/coldfusion.py | 23 +++--- src/common/endoflife.py | 9 +++ src/common/releasedata.py | 6 +- src/cos.py | 51 +++++++------- src/couchbase-server.py | 37 +++++----- src/debian.py | 21 +++--- src/distrowatch.py | 29 ++++---- src/docker_hub.py | 11 +-- src/firefox.py | 25 +++---- src/ghc-wiki.py | 69 +++++++++--------- src/git.py | 25 +++---- src/github_releases.py | 25 +++---- src/github_tags.py | 21 +++--- src/google-kubernetes-engine.py | 27 ++++---- src/graalvm.py | 57 +++++++-------- src/haproxy.py | 43 ++++++------ src/ibm-aix.py | 21 +++--- src/kuma.py | 37 +++++----- src/libreoffice.py | 37 +++++----- src/looker.py | 35 +++++----- src/lua.py | 27 ++++---- src/maven.py | 35 +++++----- src/netbsd.py | 45 ++++++------ src/npm.py | 21 +++--- src/nutanix.py | 21 +++--- src/oracle-jdk.py | 31 +++++---- src/pan-os.py | 17 ++--- src/php.py | 27 ++++---- src/plesk.py | 27 ++++---- src/pypi.py | 23 +++--- src/rds.py | 35 +++++----- src/red-hat-jboss-eap-7.py | 59 ++++++++-------- src/red-hat-jboss-eap-8.py | 21 +++--- src/red-hat-openshift.py | 43 ++++++------ src/red-hat-satellite.py | 35 +++++----- src/redhat_lifecycles.py | 41 +++++------ src/release_table.py | 119 ++++++++++++++++---------------- src/rhel.py | 31 +++++---- src/rocky-linux.py | 21 +++--- src/ros.py | 43 ++++++------ src/samsung-security.py | 100 +++++++++++++-------------- src/sles.py | 45 ++++++------ src/splunk.py | 31 +++++---- src/typo3.py | 21 +++--- src/unity.py | 17 ++--- src/unrealircd.py | 27 ++++---- src/veeam.py | 51 +++++++------- src/virtualbox.py | 41 +++++------ src/visual-studio.py | 39 ++++++----- 61 files changed, 1091 insertions(+), 1032 deletions(-) diff --git a/src/amazon-eks.py b/src/amazon-eks.py index 28052296..f8de0d36 100644 --- a/src/amazon-eks.py +++ b/src/amazon-eks.py @@ -1,31 +1,32 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches EKS versions from AWS docs. Now that AWS no longer publishes docs on GitHub, we use the Web Archive to get the older versions.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for tr in html.select("#main-col-body")[0].findAll("tr"): - cells = tr.findAll("td") - if not cells: - continue + for tr in html.select("#main-col-body")[0].findAll("tr"): + cells = tr.findAll("td") + if not cells: + continue - k8s_version_text = cells[0].text.strip() - k8s_version_match = config.first_match(k8s_version_text) - if not k8s_version_match: - logging.warning(f"Skipping {k8s_version_text}: does not match version regex(es)") - continue + k8s_version_text = cells[0].text.strip() + k8s_version_match = config.first_match(k8s_version_text) + if not k8s_version_match: + logging.warning(f"Skipping {k8s_version_text}: does not match version regex(es)") + continue - eks_version = cells[1].text.strip() - # K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags - version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}" + eks_version = cells[1].text.strip() + # K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags + version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}" - date_str = cells[-1].text.strip() - date_str = date_str.replace("April 18.2025", "April 18 2025") # temporary fix for a typo in the source - date = dates.parse_date_or_month_year_date(date_str) + date_str = cells[-1].text.strip() + date_str = date_str.replace("April 18.2025", "April 18 2025") # temporary fix for a typo in the source + date = dates.parse_date_or_month_year_date(date_str) - product_data.declare_version(version, date) + product_data.declare_version(version, date) diff --git a/src/amazon-neptune.py b/src/amazon-neptune.py index 18c27633..b915c5a6 100644 --- a/src/amazon-neptune.py +++ b/src/amazon-neptune.py @@ -1,22 +1,23 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches Amazon Neptune versions from its RSS feed on docs.aws.amazon.com.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - rss = http.fetch_xml(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + rss = http.fetch_xml(config.url) - for entry in rss.getElementsByTagName("item"): - version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue - date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue + for entry in rss.getElementsByTagName("item"): + version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue + date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue - version_match = config.first_match(version_str) - if not version_match: - logging.warning(f"Skipping entry with malformed version: {entry}") - continue + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping entry with malformed version: {entry}") + continue - version = config.render(version_match) - date = dates.parse_datetime(date_str) - product_data.declare_version(version, date) + version = config.render(version_match) + date = dates.parse_datetime(date_str) + product_data.declare_version(version, date) diff --git a/src/apache-http-server.py b/src/apache-http-server.py index 416df001..8d68e016 100644 --- a/src/apache-http-server.py +++ b/src/apache-http-server.py @@ -1,24 +1,25 @@ -from common import dates, releasedata +from common import dates from common.git import Git +from common.releasedata import ProductData, config_from_argv """Fetches Apache HTTP Server versions and release date from its git repository by looking at the STATUS file of each ..x branch.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - git = Git(config.url) - git.setup() +config = config_from_argv() +with ProductData(config.product) as product_data: + git = Git(config.url) + git.setup() - for branch in git.list_branches("refs/heads/?.?.x"): - git.checkout(branch, file_list=["STATUS"]) + for branch in git.list_branches("refs/heads/?.?.x"): + git.checkout(branch, file_list=["STATUS"]) - release_notes_file = git.repo_dir / "STATUS" - if not release_notes_file.exists(): - continue + release_notes_file = git.repo_dir / "STATUS" + if not release_notes_file.exists(): + continue - with release_notes_file.open("rb") as f: - release_notes = f.read().decode("utf-8", errors="ignore") + with release_notes_file.open("rb") as f: + release_notes = f.read().decode("utf-8", errors="ignore") - for pattern in config.include_version_patterns: - for (version, date_str) in pattern.findall(release_notes): - product_data.declare_version(version, dates.parse_date(date_str)) + for pattern in config.include_version_patterns: + for (version, date_str) in pattern.findall(release_notes): + product_data.declare_version(version, dates.parse_date(date_str)) diff --git a/src/apache-subversion.py b/src/apache-subversion.py index cdc1647b..5d5f94e1 100644 --- a/src/apache-subversion.py +++ b/src/apache-subversion.py @@ -1,19 +1,20 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - ul = html.find("h2").find_next("ul") - for li in ul.find_all("li"): - text = li.get_text(strip=True) - match = config.first_match(text) - if not match: - logging.info(f"Skipping {text}, does not match any regex") - continue + ul = html.find("h2").find_next("ul") + for li in ul.find_all("li"): + text = li.get_text(strip=True) + match = config.first_match(text) + if not match: + logging.info(f"Skipping {text}, does not match any regex") + continue - version = match.group("version") - date = dates.parse_date(match.group("date")) - product_data.declare_version(version, date) + version = match.group("version") + date = dates.parse_date(match.group("date")) + product_data.declare_version(version, date) diff --git a/src/apple.py b/src/apple.py index 3c8a9546..76c97aa7 100644 --- a/src/apple.py +++ b/src/apple.py @@ -2,7 +2,8 @@ import logging import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches and parses version and release date information from Apple's support website.""" @@ -22,31 +23,31 @@ URLS = [ DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b") -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - # URLs are cached to avoid rate limiting by support.apple.com. - soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)] +config = config_from_argv() +with ProductData(config.product) as product_data: + # URLs are cached to avoid rate limiting by support.apple.com. + soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)] - for soup in soups: - versions_table = soup.find(id="tableWraper") - versions_table = versions_table if versions_table else soup.find('table', class_="gb-table") + for soup in soups: + versions_table = soup.find(id="tableWraper") + versions_table = versions_table if versions_table else soup.find('table', class_="gb-table") - for row in versions_table.findAll("tr")[1:]: - cells = row.findAll("td") - version_text = cells[0].get_text().strip() - date_text = cells[2].get_text().strip() + for row in versions_table.findAll("tr")[1:]: + cells = row.findAll("td") + version_text = cells[0].get_text().strip() + date_text = cells[2].get_text().strip() - date_match = DATE_PATTERN.search(date_text) - if not date_match: - logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match") - continue + date_match = DATE_PATTERN.search(date_text) + if not date_match: + logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match") + continue - date_str = date_match.group(0).replace("Sept ", "Sep ") - date = dates.parse_date(date_str) - for version_pattern in config.include_version_patterns: - for version_str in version_pattern.findall(version_text): - version = product_data.get_version(version_str) - if not version or version.date() > date: - product_data.declare_version(version_str, date) - else: - logging.info(f"ignoring version {version_str} ({date}) for {product_data.name}") + date_str = date_match.group(0).replace("Sept ", "Sep ") + date = dates.parse_date(date_str) + for version_pattern in config.include_version_patterns: + for version_str in version_pattern.findall(version_text): + version = product_data.get_version(version_str) + if not version or version.date() > date: + product_data.declare_version(version_str, date) + else: + logging.info(f"ignoring version {version_str} ({date}) for {product_data.name}") diff --git a/src/artifactory.py b/src/artifactory.py index 2c4dfe35..58cb106b 100644 --- a/src/artifactory.py +++ b/src/artifactory.py @@ -1,22 +1,23 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches Artifactory versions from https://jfrog.com, using requests_html because JavaScript is needed to render the page.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - content = http.fetch_javascript_url(config.url, wait_until = 'networkidle') - soup = BeautifulSoup(content, 'html.parser') +config = config_from_argv() +with ProductData(config.product) as product_data: + content = http.fetch_javascript_url(config.url, wait_until = 'networkidle') + soup = BeautifulSoup(content, 'html.parser') - for row in soup.select('.informaltable tbody tr'): - cells = row.select("td") - if len(cells) >= 2: - version = cells[0].text.strip() - if version: - date_str = cells[1].text.strip().replace("_", "-").replace("Sept-", "Sep-") - product_data.declare_version(version, dates.parse_date(date_str)) + for row in soup.select('.informaltable tbody tr'): + cells = row.select("td") + if len(cells) >= 2: + version = cells[0].text.strip() + if version: + date_str = cells[1].text.strip().replace("_", "-").replace("Sept-", "Sep-") + product_data.declare_version(version, dates.parse_date(date_str)) - # 7.29.9 release date is wrong on https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life. - # Sent a mail to jfrog-help-center-feedback@jfrog.com to fix it, but in the meantime... - product_data.declare_version('7.29.9', dates.date(2022, 1, 11)) + # 7.29.9 release date is wrong on https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life. + # Sent a mail to jfrog-help-center-feedback@jfrog.com to fix it, but in the meantime... + product_data.declare_version('7.29.9', dates.date(2022, 1, 11)) diff --git a/src/atlassian_eol.py b/src/atlassian_eol.py index 877078ae..9ddcb80b 100644 --- a/src/atlassian_eol.py +++ b/src/atlassian_eol.py @@ -1,7 +1,8 @@ import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches EOL dates from Atlassian EOL page. @@ -9,19 +10,19 @@ This script takes a selector argument which is the product title identifier on t `AtlassianSupportEndofLifePolicy-JiraSoftware`. """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - content = http.fetch_javascript_url(config.url) - soup = BeautifulSoup(content, features="html5lib") +config = config_from_argv() +with ProductData(config.product) as product_data: + content = http.fetch_javascript_url(config.url) + soup = BeautifulSoup(content, features="html5lib") - # Find the section with the EOL dates - for li in soup.select(f"#{config.data.get('selector')}+ul li"): - match = config.first_match(li.get_text(strip=True)) - if not match: - logging.warning(f"Skipping '{li.get_text(strip=True)}', no match found") - continue + # Find the section with the EOL dates + for li in soup.select(f"#{config.data.get('selector')}+ul li"): + match = config.first_match(li.get_text(strip=True)) + if not match: + logging.warning(f"Skipping '{li.get_text(strip=True)}', no match found") + continue - release_name = match.group("release") - date = dates.parse_date(match.group("date")) - release = product_data.get_release(release_name) - release.set_eol(date) + release_name = match.group("release") + date = dates.parse_date(match.group("date")) + release = product_data.get_release(release_name) + release.set_eol(date) diff --git a/src/atlassian_versions.py b/src/atlassian_versions.py index 3511e4b9..b886878c 100644 --- a/src/atlassian_versions.py +++ b/src/atlassian_versions.py @@ -1,5 +1,6 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches versions from Atlassian download-archives pages. @@ -7,12 +8,12 @@ This script takes a single argument which is the url of the product's download-a `https://www.atlassian.com/software/confluence/download-archives`. """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - content = http.fetch_javascript_url(config.url, wait_until='networkidle') - soup = BeautifulSoup(content, 'html5lib') +config = config_from_argv() +with ProductData(config.product) as product_data: + content = http.fetch_javascript_url(config.url, wait_until='networkidle') + soup = BeautifulSoup(content, 'html5lib') - for version_block in soup.select('.versions-list'): - version = version_block.select_one('a.product-versions').attrs['data-version'] - date = dates.parse_date(version_block.select_one('.release-date').text) - product_data.declare_version(version, date) + for version_block in soup.select('.versions-list'): + version = version_block.select_one('a.product-versions').attrs['data-version'] + date = dates.parse_date(version_block.select_one('.release-date').text) + product_data.declare_version(version, date) diff --git a/src/aws-lambda.py b/src/aws-lambda.py index afbe3d0a..400f9585 100644 --- a/src/aws-lambda.py +++ b/src/aws-lambda.py @@ -1,46 +1,47 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches AWS lambda runtimes with their support / EOL dates from https://docs.aws.amazon.com.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for i, table in enumerate(html.find_all("table")): - headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")] - if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers: - logging.info(f"table with header '{headers}' does not contain all the expected headers") - continue + for i, table in enumerate(html.find_all("table")): + headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")] + if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers: + logging.info(f"table with header '{headers}' does not contain all the expected headers") + continue - is_supported_table = i == 0 # first table is for supported runtimes, second for deprecated ones - identifier_index = headers.index("identifier") - deprecation_date_index = headers.index("deprecation date") - block_function_update_index = headers.index("block function update") + is_supported_table = i == 0 # first table is for supported runtimes, second for deprecated ones + identifier_index = headers.index("identifier") + deprecation_date_index = headers.index("deprecation date") + block_function_update_index = headers.index("block function update") - for row in table.find("tbody").find_all("tr"): - cells = row.find_all("td") - identifier = cells[identifier_index].get_text().strip() + for row in table.find("tbody").find_all("tr"): + cells = row.find_all("td") + identifier = cells[identifier_index].get_text().strip() - deprecation_date_str = cells[deprecation_date_index].get_text().strip() - try: - deprecation_date = dates.parse_date(deprecation_date_str) - except ValueError: - deprecation_date = None + deprecation_date_str = cells[deprecation_date_index].get_text().strip() + try: + deprecation_date = dates.parse_date(deprecation_date_str) + except ValueError: + deprecation_date = None - if identifier == "nodejs4.3-edge": - # there is a mistake in the data: block function update date cannot be before the deprecation date - block_function_update_str = "2020-04-30" - else: - block_function_update_str = cells[block_function_update_index].get_text().strip() - try: - block_function_update = dates.parse_date(block_function_update_str) - except ValueError: - block_function_update = None + if identifier == "nodejs4.3-edge": + # there is a mistake in the data: block function update date cannot be before the deprecation date + block_function_update_str = "2020-04-30" + else: + block_function_update_str = cells[block_function_update_index].get_text().strip() + try: + block_function_update = dates.parse_date(block_function_update_str) + except ValueError: + block_function_update = None - release = product_data.get_release(identifier) - # if no date is available, use False for supported runtimes and True for deprecated ones - release.set_eoas(deprecation_date if deprecation_date else not is_supported_table) - # if no date is available, use False for supported runtimes and True for deprecated ones - release.set_eol(block_function_update if block_function_update else not is_supported_table) + release = product_data.get_release(identifier) + # if no date is available, use False for supported runtimes and True for deprecated ones + release.set_eoas(deprecation_date if deprecation_date else not is_supported_table) + # if no date is available, use False for supported runtimes and True for deprecated ones + release.set_eol(block_function_update if block_function_update else not is_supported_table) diff --git a/src/cgit.py b/src/cgit.py index 43ef1805..1e221e92 100644 --- a/src/cgit.py +++ b/src/cgit.py @@ -1,28 +1,29 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches versions from repositories managed with cgit, such as the Linux kernel repository. Ideally we would want to use the git repository directly, but cgit-managed repositories don't support partial clone.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url + '/refs/tags') +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url + '/refs/tags') - for table in html.find_all("table", class_="list"): - for row in table.find_all("tr"): - columns = row.find_all("td") - if len(columns) != 4: - continue + for table in html.find_all("table", class_="list"): + for row in table.find_all("tr"): + columns = row.find_all("td") + if len(columns) != 4: + continue - version_str = columns[0].text.strip() - version_match = config.first_match(version_str) - if not version_match: - continue + version_str = columns[0].text.strip() + version_match = config.first_match(version_str) + if not version_match: + continue - datetime_td = columns[3].find_next("span") - datetime_str = datetime_td.attrs["title"] if datetime_td else None - if not datetime_str: - continue + datetime_td = columns[3].find_next("span") + datetime_str = datetime_td.attrs["title"] if datetime_td else None + if not datetime_str: + continue - version = config.render(version_match) - date = dates.parse_datetime(datetime_str) - product_data.declare_version(version, date) + version = config.render(version_match) + date = dates.parse_datetime(datetime_str) + product_data.declare_version(version, date) diff --git a/src/chef-infra.py b/src/chef-infra.py index 5243fd2e..a8451cef 100644 --- a/src/chef-infra.py +++ b/src/chef-infra.py @@ -1,5 +1,6 @@ -from common import dates, http, releasedata +from common import dates, http from common.git import Git +from common.releasedata import ProductData, config_from_argv """Fetch released versions from docs.chef.io and retrieve their date from GitHub. docs.chef.io needs to be scraped because not all tagged versions are actually released. @@ -7,16 +8,16 @@ docs.chef.io needs to be scraped because not all tagged versions are actually re More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411. """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) - released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')] +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) + released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')] - git = Git(config.data.get('repository')) - git.setup(bare=True) + git = Git(config.data.get('repository')) + git.setup(bare=True) - versions = git.list_tags() - for version, date_str in versions: - if version in released_versions: - date = dates.parse_date(date_str) - product_data.declare_version(version, date) + versions = git.list_tags() + for version, date_str in versions: + if version in released_versions: + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/chef-inspec.py b/src/chef-inspec.py index db6b50c9..455fe979 100644 --- a/src/chef-inspec.py +++ b/src/chef-inspec.py @@ -1,4 +1,5 @@ -from common import dates, github, http, releasedata +from common import dates, github, http +from common.releasedata import ProductData, config_from_argv """Fetch released versions from docs.chef.io and retrieve their date from GitHub. docs.chef.io needs to be scraped because not all tagged versions are actually released. @@ -6,13 +7,13 @@ docs.chef.io needs to be scraped because not all tagged versions are actually re More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411. """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) - released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')] +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) + released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')] - for release in github.fetch_releases("inspec/inspec"): - sanitized_version = release.tag_name.replace("v", "") - if sanitized_version in released_versions: - date = dates.parse_datetime(release.published_at) - product_data.declare_version(sanitized_version, date) + for release in github.fetch_releases("inspec/inspec"): + sanitized_version = release.tag_name.replace("v", "") + if sanitized_version in released_versions: + date = dates.parse_datetime(release.published_at) + product_data.declare_version(sanitized_version, date) diff --git a/src/coldfusion.py b/src/coldfusion.py index c84d43fb..2ea21bc8 100644 --- a/src/coldfusion.py +++ b/src/coldfusion.py @@ -1,6 +1,7 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches versions from Adobe ColdFusion release notes on helpx.adobe.com. @@ -21,15 +22,15 @@ FIXED_VERSIONS = { "2023.0.0": dates.date(2022, 5, 16), # https://coldfusion.adobe.com/2023/05/coldfusion2023-release/ } -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for p in html.findAll("div", class_="text"): - version_and_date_str = p.get_text().strip().replace('\xa0', ' ') - for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str): - date = dates.parse_date(date_str) - version = version_str.strip().replace(",", ".") # 11,0,0,289974 -> 11.0.0.289974 - product_data.declare_version(version, date) + for p in html.findAll("div", class_="text"): + version_and_date_str = p.get_text().strip().replace('\xa0', ' ') + for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str): + date = dates.parse_date(date_str) + version = version_str.strip().replace(",", ".") # 11,0,0,289974 -> 11.0.0.289974 + product_data.declare_version(version, date) - product_data.declare_versions(FIXED_VERSIONS) + product_data.declare_versions(FIXED_VERSIONS) diff --git a/src/common/endoflife.py b/src/common/endoflife.py index 5c5a9669..e143b4ae 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -85,6 +85,15 @@ class ProductFrontmatter: return configs + def auto_config(self, method_filter: str, url_filter: str) -> AutoConfig: + configs = self.auto_configs(method_filter, url_filter) + + if len(configs) != 1: + message = f"Expected a single auto config for {self.name} with method={method_filter} and url={url_filter}; got {len(configs)}" + raise ValueError(message) + + return configs[0] + def get_title(self) -> str: return self.data["title"] diff --git a/src/common/releasedata.py b/src/common/releasedata.py index 3171227d..9c1c18bc 100644 --- a/src/common/releasedata.py +++ b/src/common/releasedata.py @@ -193,10 +193,10 @@ class ProductData: return self.name -def list_configs_from_argv() -> list[endoflife.AutoConfig]: +def config_from_argv() -> endoflife.AutoConfig: return parse_argv()[1] -def parse_argv() -> tuple[endoflife.ProductFrontmatter, list[endoflife.AutoConfig]]: +def parse_argv() -> tuple[endoflife.ProductFrontmatter, endoflife.AutoConfig]: parser = argparse.ArgumentParser(description=sys.argv[0]) parser.add_argument('-p', '--product', required=True, help='path to the product') parser.add_argument('-m', '--method', required=True, help='method to filter by') @@ -208,4 +208,4 @@ def parse_argv() -> tuple[endoflife.ProductFrontmatter, list[endoflife.AutoConfi logging.basicConfig(format="%(message)s", level=(logging.DEBUG if args.verbose else logging.INFO)) product = endoflife.ProductFrontmatter(Path(args.product)) - return product, product.auto_configs(args.method, args.url) + return product, product.auto_config(args.method, args.url) diff --git a/src/cos.py b/src/cos.py index ee8536b0..280c3fb2 100644 --- a/src/cos.py +++ b/src/cos.py @@ -2,7 +2,8 @@ import datetime import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv MILESTONE_PATTERN = re.compile(r'COS \d+ LTS') VERSION_PATTERN = re.compile(r"^(cos-\d+-\d+-\d+-\d+)") @@ -14,31 +15,31 @@ def parse_date(date_text: str) -> datetime: return dates.parse_date(date_text) -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - main = http.fetch_url(config.url) - main_soup = BeautifulSoup(main.text, features="html5lib") - milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)] +config = config_from_argv() +with ProductData(config.product) as product_data: + main = http.fetch_url(config.url) + main_soup = BeautifulSoup(main.text, features="html5lib") + milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)] - milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones] - for milestone in http.fetch_urls(milestones_urls): - milestone_soup = BeautifulSoup(milestone.text, features="html5lib") - for article in milestone_soup.find_all('article', class_='devsite-article'): - for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse - version_str = heading.get('data-text') - version_match = VERSION_PATTERN.match(version_str) - if not version_match: - continue + milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones] + for milestone in http.fetch_urls(milestones_urls): + milestone_soup = BeautifulSoup(milestone.text, features="html5lib") + for article in milestone_soup.find_all('article', class_='devsite-article'): + for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse + version_str = heading.get('data-text') + version_match = VERSION_PATTERN.match(version_str) + if not version_match: + continue - try: # 1st row is the header, so pick the first td in the 2nd row - date_str = heading.find_next('tr').find_next('tr').find_next('td').text - except AttributeError: # In some older releases, it is mentioned as Date: [Date] - date_str = heading.find_next('i').text + try: # 1st row is the header, so pick the first td in the 2nd row + date_str = heading.find_next('tr').find_next('tr').find_next('td').text + except AttributeError: # In some older releases, it is mentioned as Date: [Date] + date_str = heading.find_next('i').text - try: - date = parse_date(date_str) - except ValueError: # for some h3, the date is in the previous h2 - date_str = heading.find_previous('h2').get('data-text') - date = parse_date(date_str) + try: + date = parse_date(date_str) + except ValueError: # for some h3, the date is in the previous h2 + date_str = heading.find_previous('h2').get('data-text') + date = parse_date(date_str) - product_data.declare_version(version_match.group(1), date) + product_data.declare_version(version_match.group(1), date) diff --git a/src/couchbase-server.py b/src/couchbase-server.py index a8d130b5..9826f31d 100644 --- a/src/couchbase-server.py +++ b/src/couchbase-server.py @@ -1,7 +1,8 @@ import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches versions from release notes of each minor version on docs.couchbase.com. @@ -16,25 +17,25 @@ MANUAL_VERSIONS = { "7.2.0": dates.date(2023, 6, 1), # https://www.couchbase.com/blog/couchbase-capella-spring-release-72/ } -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(f"{config.url}/current/install/install-intro.html") +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(f"{config.url}/current/install/install-intro.html") - minor_versions = [options.attrs["value"] for options in html.find(class_="version_list").find_all("option")] - minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions] + minor_versions = [options.attrs["value"] for options in html.find(class_="version_list").find_all("option")] + minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions] - for minor_version in http.fetch_urls(minor_version_urls): - minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib") + for minor_version in http.fetch_urls(minor_version_urls): + minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib") - for title in minor_version_soup.find_all("h2"): - match = config.first_match(title.get_text().strip()) - if not match: - logging.info(f"Skipping {title}, does not match any regex") - continue + for title in minor_version_soup.find_all("h2"): + match = config.first_match(title.get_text().strip()) + if not match: + logging.info(f"Skipping {title}, does not match any regex") + continue - version = match["version"] - version = f"{version}.0" if len(version.split(".")) == 2 else version - date = dates.parse_month_year_date(match['date']) - product_data.declare_version(version, date) + version = match["version"] + version = f"{version}.0" if len(version.split(".")) == 2 else version + date = dates.parse_month_year_date(match['date']) + product_data.declare_version(version, date) - product_data.declare_versions(MANUAL_VERSIONS) + product_data.declare_versions(MANUAL_VERSIONS) diff --git a/src/debian.py b/src/debian.py index 802854cf..31107fb1 100644 --- a/src/debian.py +++ b/src/debian.py @@ -1,13 +1,14 @@ from pathlib import Path from subprocess import run -from common import dates, releasedata +from common import dates from common.git import Git +from common.releasedata import ProductData, config_from_argv """Fetch Debian versions by parsing news in www.debian.org source repository.""" -def extract_major_versions(p: releasedata.ProductData, repo_dir: Path) -> None: +def extract_major_versions(p: ProductData, repo_dir: Path) -> None: child = run( f"grep -RhE -A 1 'Debian [0-9]+.+ released' {repo_dir}/english/News " f"| cut -d '<' -f 2 " @@ -26,7 +27,7 @@ def extract_major_versions(p: releasedata.ProductData, repo_dir: Path) -> None: is_release_line = True -def extract_point_versions(p: releasedata.ProductData, repo_dir: Path) -> None: +def extract_point_versions(p: ProductData, repo_dir: Path) -> None: child = run( f"grep -Rh -B 10 '' {repo_dir}/english/News " "| grep -Eo '(release_date>(.*)<|revision>(.*)<)' " @@ -40,11 +41,11 @@ def extract_point_versions(p: releasedata.ProductData, repo_dir: Path) -> None: (date, version) = line.split(' ') p.declare_version(version, dates.parse_date(date)) -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - git = Git(config.url) - git.setup() - git.checkout("master", file_list=["english/News"]) +config = config_from_argv() +with ProductData(config.product) as product_data: + git = Git(config.url) + git.setup() + git.checkout("master", file_list=["english/News"]) - extract_major_versions(product_data, git.repo_dir) - extract_point_versions(product_data, git.repo_dir) + extract_major_versions(product_data, git.repo_dir) + extract_point_versions(product_data, git.repo_dir) diff --git a/src/distrowatch.py b/src/distrowatch.py index 4f585bf5..46943237 100644 --- a/src/distrowatch.py +++ b/src/distrowatch.py @@ -1,18 +1,19 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(f"https://distrowatch.com/index.php?distribution={config.url}") +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(f"https://distrowatch.com/index.php?distribution={config.url}") - for table in html.select("td.News1>table.News"): - headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() - versions_match = config.first_match(headline) - if not versions_match: - continue + for table in html.select("td.News1>table.News"): + headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() + versions_match = config.first_match(headline) + if not versions_match: + continue - # multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5) - versions = config.render(versions_match).split("\n") - date = dates.parse_date(table.select_one("td.NewsDate").get_text()) + # multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5) + versions = config.render(versions_match).split("\n") + date = dates.parse_date(table.select_one("td.NewsDate").get_text()) - for version in versions: - product_data.declare_version(version, date) + for version in versions: + product_data.declare_version(version, date) diff --git a/src/docker_hub.py b/src/docker_hub.py index 38965173..9eabd07d 100644 --- a/src/docker_hub.py +++ b/src/docker_hub.py @@ -1,10 +1,11 @@ -from common import dates, endoflife, http, releasedata +from common import dates, endoflife, http +from common.releasedata import ProductData, config_from_argv """Fetches releases from the Docker Hub API. Unfortunately images creation date cannot be retrieved, so we had to use the tag_last_pushed field instead.""" -def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None: +def fetch_releases(p: ProductData, c: endoflife.AutoConfig, url: str) -> None: data = http.fetch_json(url) for result in data["results"]: @@ -17,6 +18,6 @@ def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str fetch_releases(p, c, data["next"]) -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1") +config = config_from_argv() +with ProductData(config.product) as product_data: + fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1") diff --git a/src/firefox.py b/src/firefox.py index 88194b1b..913c8a8a 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -1,7 +1,8 @@ import urllib.parse from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetch Firefox versions with their dates from https://www.mozilla.org/. @@ -20,15 +21,15 @@ The script will need to be updated if someday those conditions are not met.""" MAX_VERSIONS_LIMIT = 100 -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - releases_page = http.fetch_url(config.url) - releases_soup = BeautifulSoup(releases_page.text, features="html5lib") - releases_list = releases_soup.find_all("ol", class_="c-release-list") +config = config_from_argv() +with ProductData(config.product) as product_data: + releases_page = http.fetch_url(config.url) + releases_soup = BeautifulSoup(releases_page.text, features="html5lib") + releases_list = releases_soup.find_all("ol", class_="c-release-list") - release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")] - for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]): - version = release_notes.url.split("/")[-3] - release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") - date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25 - product_data.declare_version(version, dates.parse_date(date_str)) + release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")] + for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]): + version = release_notes.url.split("/")[-3] + release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") + date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25 + product_data.declare_version(version, dates.parse_date(date_str)) diff --git a/src/ghc-wiki.py b/src/ghc-wiki.py index 47f2a3ed..abdadf82 100644 --- a/src/ghc-wiki.py +++ b/src/ghc-wiki.py @@ -14,7 +14,8 @@ References: import re from typing import Any, Generator, Iterator -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv def parse_markdown_tables(lineiter: Iterator[str]) -> Generator[list[list[Any]], Any, None]: @@ -50,41 +51,41 @@ def maybe_markdown_table_row(line: str) -> list[str] | None: return None return [x.strip() for x in line.strip('|').split('|')] -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product: - resp = http.fetch_url(config.url) - resp.raise_for_status() - data = resp.json() - assert data['title'] == "GHC Status" - assert data['format'] == "markdown" - md = data['content'].splitlines() +config = config_from_argv() +with ProductData(config.product) as product_data: + resp = http.fetch_url(config.url) + resp.raise_for_status() + data = resp.json() + assert data['title'] == "GHC Status" + assert data['format'] == "markdown" + md = data['content'].splitlines() - #-- Parse tables out of the wiki text. At time of writing, the script expects exactly two: - #-- 1. "Most recent major" with 5 columns - #-- 2. "All released versions" with 5 columns - [series_table, patch_level] = parse_markdown_tables(iter(md)) + #-- Parse tables out of the wiki text. At time of writing, the script expects exactly two: + #-- 1. "Most recent major" with 5 columns + #-- 2. "All released versions" with 5 columns + [series_table, patch_level] = parse_markdown_tables(iter(md)) - for row in series_table[1:]: - [series, _download_link, _most_recent, next_planned, status] = row - if status == "Next major release": - continue + for row in series_table[1:]: + [series, _download_link, _most_recent, next_planned, status] = row + if status == "Next major release": + continue - series = series.split(' ')[0] - series = series.replace('\\.', '.') - if series == "Nightlies": - continue - status = status.lower() + series = series.split(' ')[0] + series = series.replace('\\.', '.') + if series == "Nightlies": + continue + status = status.lower() - #-- See discussion in https://github.com/endoflife-date/endoflife.date/pull/6287 - r = product.get_release(series) - #-- The clearest semblance of an EOL signal we get - r.set_eol("not recommended for use" in status or ":red_circle:" in status) - #-- eoasColumn label is "Further releases planned" - r.set_eoas(any(keyword in next_planned for keyword in ("None", "N/A"))) + #-- See discussion in https://github.com/endoflife-date/endoflife.date/pull/6287 + r = product_data.get_release(series) + #-- The clearest semblance of an EOL signal we get + r.set_eol("not recommended for use" in status or ":red_circle:" in status) + #-- eoasColumn label is "Further releases planned" + r.set_eoas(any(keyword in next_planned for keyword in ("None", "N/A"))) - for row in patch_level[1:]: - [milestone, _download_link, date, _ticket, _manager] = row - version = milestone.lstrip('%') - version = version.split(' ') [0] - date = dates.parse_date(date) - product.declare_version(version, date) + for row in patch_level[1:]: + [milestone, _download_link, date, _ticket, _manager] = row + version = milestone.lstrip('%') + version = version.split(' ') [0] + date = dates.parse_date(date) + product_data.declare_version(version, date) diff --git a/src/git.py b/src/git.py index 99febdf6..198c8e03 100644 --- a/src/git.py +++ b/src/git.py @@ -1,17 +1,18 @@ -from common import dates, releasedata +from common import dates from common.git import Git +from common.releasedata import ProductData, config_from_argv """Fetches versions from tags in a git repository. This replace the old update.rb script.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - git = Git(config.url) - git.setup(bare=True) +config = config_from_argv() +with ProductData(config.product) as product_data: + git = Git(config.url) + git.setup(bare=True) - tags = git.list_tags() - for tag, date_str in tags: - version_match = config.first_match(tag) - if version_match: - version = config.render(version_match) - date = dates.parse_date(date_str) - product_data.declare_version(version, date) + tags = git.list_tags() + for tag, date_str in tags: + version_match = config.first_match(tag) + if version_match: + version = config.render(version_match) + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/github_releases.py b/src/github_releases.py index 075f8eee..2ec7800e 100644 --- a/src/github_releases.py +++ b/src/github_releases.py @@ -1,19 +1,20 @@ -from common import dates, github, releasedata +from common import dates, github +from common.releasedata import ProductData, config_from_argv """Fetches versions from GitHub releases using the GraphQL API and the GitHub CLI. Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manage pagination and authentication. """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - for release in github.fetch_releases(config.url): - if release.is_prerelease: - continue +config = config_from_argv() +with ProductData(config.product) as product_data: + for release in github.fetch_releases(config.url): + if release.is_prerelease: + continue - version_str = release.tag_name - version_match = config.first_match(version_str) - if version_match: - version = config.render(version_match) - date = dates.parse_datetime(release.published_at) - product_data.declare_version(version, date) + version_str = release.tag_name + version_match = config.first_match(version_str) + if version_match: + version = config.render(version_match) + date = dates.parse_datetime(release.published_at) + product_data.declare_version(version, date) diff --git a/src/github_tags.py b/src/github_tags.py index 06d451da..b0c6f0f7 100644 --- a/src/github_tags.py +++ b/src/github_tags.py @@ -1,16 +1,17 @@ -from common import dates, github, releasedata +from common import dates, github +from common.releasedata import ProductData, config_from_argv """Fetches versions from GitHub tags using the GraphQL API and the GitHub CLI. Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manage pagination and authentication. """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - for tag in github.fetch_tags(config.url): - version_str = tag.name - version_match = config.first_match(version_str) - if version_match: - version = config.render(version_match) - date = dates.parse_datetime(tag.commit_date) - product_data.declare_version(version, date) +config = config_from_argv() +with ProductData(config.product) as product_data: + for tag in github.fetch_tags(config.url): + version_str = tag.name + version_match = config.first_match(version_str) + if version_match: + version = config.render(version_match) + date = dates.parse_datetime(tag.commit_date) + product_data.declare_version(version, date) diff --git a/src/google-kubernetes-engine.py b/src/google-kubernetes-engine.py index d1284df8..fba8bcd9 100644 --- a/src/google-kubernetes-engine.py +++ b/src/google-kubernetes-engine.py @@ -1,6 +1,7 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv # https://regex101.com/r/zPxBqT/1 VERSION_PATTERN = re.compile(r"\d.\d+\.\d+-gke\.\d+") @@ -11,17 +12,17 @@ URL_BY_PRODUCT = { "google-kubernetes-engine-rapid": "https://cloud.google.com/kubernetes-engine/docs/release-notes-rapid", } -for config in releasedata.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons - for product_name, url in URL_BY_PRODUCT.items(): - with releasedata.ProductData(product_name) as product_data: - html = http.fetch_html(url) +config = config_from_argv() # multiple JSON produced for historical reasons +for product_name, url in URL_BY_PRODUCT.items(): + with ProductData(product_name) as product_data: + html = http.fetch_html(url) - for section in html.find_all('section', class_='releases'): - for h2 in section.find_all('h2'): # h2 contains the date - date = dates.parse_date(h2.get('data-text')) + for section in html.find_all('section', class_='releases'): + for h2 in section.find_all('h2'): # h2 contains the date + date = dates.parse_date(h2.get('data-text')) - next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date - for li in next_div.find_all('li'): - if "versions are now available" in li.text: - for version in VERSION_PATTERN.findall(li.find('ul').text): - product_data.declare_version(version, date) + next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date + for li in next_div.find_all('li'): + if "versions are now available" in li.text: + for version in VERSION_PATTERN.findall(li.find('ul').text): + product_data.declare_version(version, date) diff --git a/src/graalvm.py b/src/graalvm.py index 0e94fdb4..23ec572b 100644 --- a/src/graalvm.py +++ b/src/graalvm.py @@ -1,40 +1,33 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) - table_selector = config.data.get("table_selector", "#previous-releases + table").strip() - date_column = config.data.get("date_column", "Date").strip().lower() - versions_column = config.data.get("versions_column").strip().lower() +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) + table_selector = config.data.get("table_selector", "#previous-releases + table").strip() + date_column = config.data.get("date_column", "Date").strip().lower() + versions_column = config.data.get("versions_column").strip().lower() - table = html.select_one(table_selector) - if not table: - logging.warning(f"Skipping config {config} as no table found with selector {table_selector}") + table = html.select_one(table_selector) + headers = [th.get_text().strip().lower() for th in table.select("thead th")] + date_index = headers.index(date_column) + versions_index = headers.index(versions_column) + + for row in table.select("tbody tr"): + cells = row.select("td") + if len(cells) <= max(date_index, versions_index): + logging.warning(f"Skipping row {cells}: not enough cells") continue - headers = [th.get_text().strip().lower() for th in table.select("thead th")] - if date_column not in headers or versions_column not in headers: - logging.info(f"Skipping table with headers {headers} as it does not contain the required columns: {date_column}, {versions_column}") + date_text = cells[date_index].get_text().strip() + date = dates.parse_date(date_text) + if date > dates.today(): + logging.info(f"Skipping future version {cells}") continue - date_index = headers.index(date_column) - versions_index = headers.index(versions_column) - - for row in table.select("tbody tr"): - cells = row.select("td") - if len(cells) <= max(date_index, versions_index): - logging.warning(f"Skipping row {cells}: not enough cells") - continue - - date_text = cells[date_index].get_text().strip() - date = dates.parse_date(date_text) - if date > dates.today(): - logging.info(f"Skipping future version {cells}") - continue - - versions = cells[versions_index].get_text().strip() - for version in versions.split(", "): - if config.first_match(version): - product_data.declare_version(version.strip(), date) + versions = cells[versions_index].get_text().strip() + for version in versions.split(", "): + if config.first_match(version): + product_data.declare_version(version.strip(), date) diff --git a/src/haproxy.py b/src/haproxy.py index 3c3db896..30d470c4 100644 --- a/src/haproxy.py +++ b/src/haproxy.py @@ -1,29 +1,30 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$") DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$") # https://regex101.com/r/1JCnFC/1 -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - # First, get all minor releases from the download page - download_html = http.fetch_html(config.url) - minor_versions = [] - for link in download_html.select("a"): - minor_version_match = CYCLE_PATTERN.match(link.attrs["href"]) - if not minor_version_match: - continue +config = config_from_argv() +with ProductData(config.product) as product_data: + # First, get all minor releases from the download page + download_html = http.fetch_html(config.url) + minor_versions = [] + for link in download_html.select("a"): + minor_version_match = CYCLE_PATTERN.match(link.attrs["href"]) + if not minor_version_match: + continue - minor_version = minor_version_match.groups()[0] - if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src - minor_versions.append(minor_version) + minor_version = minor_version_match.groups()[0] + if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src + minor_versions.append(minor_version) - # Then, fetches all versions from each changelog - changelog_urls = [f"{config.url}{minor_version}/src/CHANGELOG" for minor_version in minor_versions] - for changelog in http.fetch_urls(changelog_urls): - for line in changelog.text.split('\n'): - date_and_version_match = DATE_AND_VERSION_PATTERN.match(line) - if date_and_version_match: - year, month, day, version = date_and_version_match.groups() - product_data.declare_version(version, dates.date(int(year), int(month), int(day))) + # Then, fetches all versions from each changelog + changelog_urls = [f"{config.url}{minor_version}/src/CHANGELOG" for minor_version in minor_versions] + for changelog in http.fetch_urls(changelog_urls): + for line in changelog.text.split('\n'): + date_and_version_match = DATE_AND_VERSION_PATTERN.match(line) + if date_and_version_match: + year, month, day, version = date_and_version_match.groups() + product_data.declare_version(version, dates.date(int(year), int(month), int(day))) diff --git a/src/ibm-aix.py b/src/ibm-aix.py index 3d69b33b..9db5a292 100644 --- a/src/ibm-aix.py +++ b/src/ibm-aix.py @@ -1,12 +1,13 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for release_table in html.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"): - for row in release_table.find_all("tr")[1:]: # for all rows except the header - cells = row.find_all("td") - version = cells[0].text.strip("AIX ").replace(' TL', '.') - date = dates.parse_month_year_date(cells[1].text) - product_data.declare_version(version, date) + for release_table in html.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"): + for row in release_table.find_all("tr")[1:]: # for all rows except the header + cells = row.find_all("td") + version = cells[0].text.strip("AIX ").replace(' TL', '.') + date = dates.parse_month_year_date(cells[1].text) + product_data.declare_version(version, date) diff --git a/src/kuma.py b/src/kuma.py index 67857405..4b34bca2 100644 --- a/src/kuma.py +++ b/src/kuma.py @@ -1,6 +1,7 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetch version data for Kuma from https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml. """ @@ -9,25 +10,25 @@ RELEASE_FIELD = 'release' RELEASE_DATE_FIELD = 'releaseDate' EOL_FIELD = 'endOfLifeDate' -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - versions_data = http.fetch_yaml(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + versions_data = http.fetch_yaml(config.url) - # Iterate through the versions and their associated dates - for version_info in versions_data: - release_name = version_info[RELEASE_FIELD] - if not release_name.endswith('.x'): - logging.info(f"skipping release with name {release_name}: does not end with '.x'") - continue + # Iterate through the versions and their associated dates + for version_info in versions_data: + release_name = version_info[RELEASE_FIELD] + if not release_name.endswith('.x'): + logging.info(f"skipping release with name {release_name}: does not end with '.x'") + continue - if RELEASE_DATE_FIELD not in version_info or EOL_FIELD not in version_info: - logging.info(f"skipping release with name {release_name}: does not contain {RELEASE_DATE_FIELD} or {EOL_FIELD} fields") - continue + if RELEASE_DATE_FIELD not in version_info or EOL_FIELD not in version_info: + logging.info(f"skipping release with name {release_name}: does not contain {RELEASE_DATE_FIELD} or {EOL_FIELD} fields") + continue - release = product_data.get_release(release_name.replace('.x', '')) + release = product_data.get_release(release_name.replace('.x', '')) - release_date = dates.parse_date(version_info[RELEASE_DATE_FIELD]) - release.set_field('releaseDate', release_date) + release_date = dates.parse_date(version_info[RELEASE_DATE_FIELD]) + release.set_field('releaseDate', release_date) - eol = dates.parse_date(version_info[EOL_FIELD]) - release.set_field('eol', eol) + eol = dates.parse_date(version_info[EOL_FIELD]) + release.set_field('eol', eol) diff --git a/src/libreoffice.py b/src/libreoffice.py index 99c08a03..f70c0975 100644 --- a/src/libreoffice.py +++ b/src/libreoffice.py @@ -1,27 +1,28 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for table in html.find_all("table"): - for row in table.find_all("tr")[1:]: - cells = row.find_all("td") - if len(cells) < 4: - continue + for table in html.find_all("table"): + for row in table.find_all("tr")[1:]: + cells = row.find_all("td") + if len(cells) < 4: + continue - version_str = cells[1].get_text().strip() - version_match = config.first_match(version_str) - if not version_match: - logging.warning(f"Skipping version {version_str} as it does not match any known version pattern") - continue - version = config.render(version_match) + version_str = cells[1].get_text().strip() + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping version {version_str} as it does not match any known version pattern") + continue + version = config.render(version_match) - date_str = cells[2].get_text().strip() - date = dates.parse_datetime(date_str) + date_str = cells[2].get_text().strip() + date = dates.parse_datetime(date_str) - product_data.declare_version(version, date) + product_data.declare_version(version, date) diff --git a/src/looker.py b/src/looker.py index daa678c0..55156b8a 100644 --- a/src/looker.py +++ b/src/looker.py @@ -1,31 +1,32 @@ import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetch Looker versions from the Google Cloud release notes RSS feed. """ ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IGNORECASE) -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - rss = http.fetch_xml(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + rss = http.fetch_xml(config.url) - for item in rss.getElementsByTagName("entry"): - content = item.getElementsByTagName("content")[0].firstChild.nodeValue - content_soup = BeautifulSoup(content, features="html5lib") + for item in rss.getElementsByTagName("entry"): + content = item.getElementsByTagName("content")[0].firstChild.nodeValue + content_soup = BeautifulSoup(content, features="html5lib") - announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN) - if not announcement_match: - continue + announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN) + if not announcement_match: + continue - version_match = config.first_match(announcement_match.parent.get_text()) - if not version_match: - continue - version = config.render(version_match) + version_match = config.first_match(announcement_match.parent.get_text()) + if not version_match: + continue + version = config.render(version_match) - date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue - date = dates.parse_datetime(date_str) + date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue + date = dates.parse_datetime(date_str) - product_data.declare_version(version, date) + product_data.declare_version(version, date) diff --git a/src/lua.py b/src/lua.py index 6af31b8a..c4a74b1f 100644 --- a/src/lua.py +++ b/src/lua.py @@ -1,23 +1,24 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches Lua releases from lua.org.""" RELEASED_AT_PATTERN = re.compile(r"Lua\s*(?P\d+\.\d+)\s*was\s*released\s*on\s*(?P\d+\s*\w+\s*\d{4})") VERSION_PATTERN = re.compile(r"(?P\d+\.\d+\.\d+),\s*released\s*on\s*(?P\d+\s*\w+\s*\d{4})") -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url, features = 'html.parser') - page_text = html.text # HTML is broken, no way to parse it with beautifulsoup +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url, features = 'html.parser') + page_text = html.text # HTML is broken, no way to parse it with beautifulsoup - for release_match in RELEASED_AT_PATTERN.finditer(page_text): - release = release_match.group('release') - release_date = dates.parse_date(release_match.group('release_date')) - product_data.get_release(release).set_release_date(release_date) + for release_match in RELEASED_AT_PATTERN.finditer(page_text): + release = release_match.group('release') + release_date = dates.parse_date(release_match.group('release_date')) + product_data.get_release(release).set_release_date(release_date) - for version_match in VERSION_PATTERN.finditer(page_text): - version = version_match.group('version') - version_date = dates.parse_date(version_match.group('version_date')) - product_data.declare_version(version, version_date) + for version_match in VERSION_PATTERN.finditer(page_text): + version = version_match.group('version') + version_date = dates.parse_date(version_match.group('version_date')) + product_data.declare_version(version, version_date) diff --git a/src/maven.py b/src/maven.py index 18149bd0..c69e2039 100644 --- a/src/maven.py +++ b/src/maven.py @@ -1,23 +1,24 @@ from datetime import datetime, timezone -from common import http, releasedata +from common import http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - start = 0 - group_id, artifact_id = config.url.split("/") +config = config_from_argv() +with ProductData(config.product) as product_data: + start = 0 + group_id, artifact_id = config.url.split("/") - while True: - url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100" - data = http.fetch_json(url) + while True: + url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100" + data = http.fetch_json(url) - for row in data["response"]["docs"]: - version_match = config.first_match(row["v"]) - if version_match: - version = config.render(version_match) - date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc) - product_data.declare_version(version, date) + for row in data["response"]["docs"]: + version_match = config.first_match(row["v"]) + if version_match: + version = config.render(version_match) + date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc) + product_data.declare_version(version, date) - start += 100 - if data["response"]["numFound"] <= start: - break + start += 100 + if data["response"]["numFound"] <= start: + break diff --git a/src/netbsd.py b/src/netbsd.py index 054e8be3..f7f5edd2 100644 --- a/src/netbsd.py +++ b/src/netbsd.py @@ -1,32 +1,33 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches NetBSD versions and EOL information from https://www.netbsd.org/.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for row in html.select('table tbody tr'): - cells = [cell.get_text(strip=True) for cell in row.select('td')] + for row in html.select('table tbody tr'): + cells = [cell.get_text(strip=True) for cell in row.select('td')] - version = cells[0] - if not version.startswith('NetBSD'): - logging.info(f"Skipping row {cells}, version does not start with 'NetBSD'") - continue - version = version.split(' ')[1] + version = cells[0] + if not version.startswith('NetBSD'): + logging.info(f"Skipping row {cells}, version does not start with 'NetBSD'") + continue + version = version.split(' ')[1] - try: - release_date = dates.parse_date(cells[1]) - product_data.declare_version(version, release_date) - except ValueError: - logging.warning(f"Skipping row {cells}, could not parse release date") + try: + release_date = dates.parse_date(cells[1]) + product_data.declare_version(version, release_date) + except ValueError: + logging.warning(f"Skipping row {cells}, could not parse release date") - eol_str = cells[2] - if not eol_str: - continue + eol_str = cells[2] + if not eol_str: + continue - eol = dates.parse_date(eol_str) - major_version = version.split('.')[0] - product_data.get_release(major_version).set_eol(eol) + eol = dates.parse_date(eol_str) + major_version = version.split('.')[0] + product_data.get_release(major_version).set_eol(eol) diff --git a/src/npm.py b/src/npm.py index adbe21c2..82197fdb 100644 --- a/src/npm.py +++ b/src/npm.py @@ -1,11 +1,12 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - data = http.fetch_json(f"https://registry.npmjs.org/{config.url}") - for version_str in data["versions"]: - version_match = config.first_match(version_str) - if version_match: - version = config.render(version_match) - date = dates.parse_datetime(data["time"][version_str]) - product_data.declare_version(version, date) +config = config_from_argv() +with ProductData(config.product) as product_data: + data = http.fetch_json(f"https://registry.npmjs.org/{config.url}") + for version_str in data["versions"]: + version_match = config.first_match(version_str) + if version_match: + version = config.render(version_match) + date = dates.parse_datetime(data["time"][version_str]) + product_data.declare_version(version, date) diff --git a/src/nutanix.py b/src/nutanix.py index ea6cee3d..a33013a9 100644 --- a/src/nutanix.py +++ b/src/nutanix.py @@ -1,15 +1,16 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetch Nutanix products versions from https://portal.nutanix.com/api/v1.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - data = http.fetch_json(f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}") +config = config_from_argv() +with ProductData(config.product) as product_data: + data = http.fetch_json(f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}") - for version_data in data["contents"]: - release_name = '.'.join(version_data["version"].split(".")[:2]) + for version_data in data["contents"]: + release_name = '.'.join(version_data["version"].split(".")[:2]) - if 'GENERAL_AVAILABILITY' in version_data: - version = version_data["version"] - date = dates.parse_datetime(version_data["GENERAL_AVAILABILITY"]).replace(second=0) - product_data.declare_version(version, date) + if 'GENERAL_AVAILABILITY' in version_data: + version = version_data["version"] + date = dates.parse_datetime(version_data["GENERAL_AVAILABILITY"]).replace(second=0) + product_data.declare_version(version, date) diff --git a/src/oracle-jdk.py b/src/oracle-jdk.py index d3e2644c..30f6dba4 100644 --- a/src/oracle-jdk.py +++ b/src/oracle-jdk.py @@ -1,23 +1,24 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetch Java versions from https://www.java.com/releases/. This script is using requests-html because the page needs JavaScript to render correctly.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_javascript_url(config.url) - soup = BeautifulSoup(html, 'html5lib') +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_javascript_url(config.url) + soup = BeautifulSoup(html, 'html5lib') - previous_date = None - for row in soup.select('#released tr'): - version_cell = row.select_one('td.anchor') - if version_cell: - version = version_cell.attrs['id'] - date_str = row.select('td')[1].text - date = dates.parse_date(date_str) if date_str else previous_date - product_data.declare_version(version, date) - previous_date = date + previous_date = None + for row in soup.select('#released tr'): + version_cell = row.select_one('td.anchor') + if version_cell: + version = version_cell.attrs['id'] + date_str = row.select('td')[1].text + date = dates.parse_date(date_str) if date_str else previous_date + product_data.declare_version(version, date) + previous_date = date - product_data.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed + product_data.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed diff --git a/src/pan-os.py b/src/pan-os.py index b834657f..49f8588d 100644 --- a/src/pan-os.py +++ b/src/pan-os.py @@ -1,12 +1,13 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches pan-os versions from https://github.com/mrjcap/panos-versions/.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - versions = http.fetch_json(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + versions = http.fetch_json(config.url) - for version in versions: - name = version['version'] - date = dates.parse_datetime(version['released-on']) - product_data.declare_version(name, date) + for version in versions: + name = version['version'] + date = dates.parse_datetime(version['released-on']) + product_data.declare_version(name, date) diff --git a/src/php.py b/src/php.py index 507c3d94..741feaf3 100644 --- a/src/php.py +++ b/src/php.py @@ -1,15 +1,16 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - # Fetch major versions - latest_by_major = http.fetch_url(config.url).json() - major_version_urls = [f"{config.url}&version={major_version}" for major_version in latest_by_major] +config = config_from_argv() +with ProductData(config.product) as product_data: + # Fetch major versions + latest_by_major = http.fetch_url(config.url).json() + major_version_urls = [f"{config.url}&version={major_version}" for major_version in latest_by_major] - # Fetch all versions for major versions - for major_versions_response in http.fetch_urls(major_version_urls): - major_versions_data = major_versions_response.json() - for version in major_versions_data: - if config.first_match(version): # exclude versions such as "3.0.x (latest)" - date = dates.parse_date(major_versions_data[version]["date"]) - product_data.declare_version(version, date) + # Fetch all versions for major versions + for major_versions_response in http.fetch_urls(major_version_urls): + major_versions_data = major_versions_response.json() + for version in major_versions_data: + if config.first_match(version): # exclude versions such as "3.0.x (latest)" + date = dates.parse_date(major_versions_data[version]["date"]) + product_data.declare_version(version, date) diff --git a/src/plesk.py b/src/plesk.py index 5b2a1434..ee8464e1 100644 --- a/src/plesk.py +++ b/src/plesk.py @@ -1,22 +1,23 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches versions from Plesk's change log. Only 18.0.20.3 and later will be picked up, as the format of the change log for 18.0.20 and 18.0.19 are different and there is no entry for GA of version 18.0.18 and older.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for release in html.find_all("div", class_="changelog-entry--obsidian"): - version = release.h2.text.strip() - if not version.startswith('Plesk Obsidian 18'): - continue + for release in html.find_all("div", class_="changelog-entry--obsidian"): + version = release.h2.text.strip() + if not version.startswith('Plesk Obsidian 18'): + continue - version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '') - if ' ' in version: - continue + version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '') + if ' ' in version: + continue - date = dates.parse_date(release.p.text) - product_data.declare_version(version, date) + date = dates.parse_date(release.p.text) + product_data.declare_version(version, date) diff --git a/src/pypi.py b/src/pypi.py index e11b2ecb..5d68bcfc 100644 --- a/src/pypi.py +++ b/src/pypi.py @@ -1,14 +1,15 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - data = http.fetch_json(f"https://pypi.org/pypi/{config.url}/json") +config = config_from_argv() +with ProductData(config.product) as product_data: + data = http.fetch_json(f"https://pypi.org/pypi/{config.url}/json") - for version_str in data["releases"]: - version_match = config.first_match(version_str) - version_data = data["releases"][version_str] + for version_str in data["releases"]: + version_match = config.first_match(version_str) + version_data = data["releases"][version_str] - if version_match and version_data: - version = config.render(version_match) - date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"]) - product_data.declare_version(version, date) + if version_match and version_data: + version = config.render(version_match) + date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"]) + product_data.declare_version(version, date) diff --git a/src/rds.py b/src/rds.py index fe20d4ac..a987cc46 100644 --- a/src/rds.py +++ b/src/rds.py @@ -1,6 +1,7 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches Amazon RDS versions from the version management pages on AWS docs. @@ -8,22 +9,22 @@ Pages parsed by this script are expected to have version tables with a version i in the third column (usually named 'RDS release date'). """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for table in html.find_all("table"): - for row in table.find_all("tr"): - columns = row.find_all("td") - if len(columns) <= 3: - continue + for table in html.find_all("table"): + for row in table.find_all("tr"): + columns = row.find_all("td") + if len(columns) <= 3: + continue - version_text = columns[0].text.strip() - version_match = config.first_match(version_text) - if not version_match: - logging.warning(f"Skipping {version_text}: does not match any version pattern") - continue + version_text = columns[0].text.strip() + version_match = config.first_match(version_text) + if not version_match: + logging.warning(f"Skipping {version_text}: does not match any version pattern") + continue - version = config.render(version_match) - date = dates.parse_date(columns[2].text) - product_data.declare_version(version, date) + version = config.render(version_match) + date = dates.parse_date(columns[2].text) + product_data.declare_version(version, date) diff --git a/src/red-hat-jboss-eap-7.py b/src/red-hat-jboss-eap-7.py index 3cb7425a..f6a272a3 100644 --- a/src/red-hat-jboss-eap-7.py +++ b/src/red-hat-jboss-eap-7.py @@ -1,40 +1,41 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches RedHat JBoss EAP version data for JBoss 7""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for h4 in html.find_all("h4"): - title = h4.get_text(strip=True) - if not title.startswith("7."): + for h4 in html.find_all("h4"): + title = h4.get_text(strip=True) + if not title.startswith("7."): + continue + + release = title[:3] + version_table = h4.find_next("table") + if not version_table: + logging.warning(f"Version table not found for {title}") + continue + + for (i, row) in enumerate(version_table.find_all("tr")): + if i == 0: # Skip the first row (header) continue - release = title[:3] - version_table = h4.find_next("table") - if not version_table: - logging.warning(f"Version table not found for {title}") + columns = row.find_all("td") + # Get the version name without the content of the tag, if present + name_str = ''.join([content for content in columns[0].contents if isinstance(content, str)]).strip() + date_str = columns[1].text.strip() + + if date_str == "TBD" or date_str == "TDB": # Placeholder for a future release continue - for (i, row) in enumerate(version_table.find_all("tr")): - if i == 0: # Skip the first row (header) - continue + if date_str == "[July 21, 2021][d7400]": + # Temporary fix for a typo in the source page + date_str = "July 21 2021" - columns = row.find_all("td") - # Get the version name without the content of the tag, if present - name_str = ''.join([content for content in columns[0].contents if isinstance(content, str)]).strip() - date_str = columns[1].text.strip() - - if date_str == "TBD" or date_str == "TDB": # Placeholder for a future release - continue - - if date_str == "[July 21, 2021][d7400]": - # Temporary fix for a typo in the source page - date_str = "July 21 2021" - - name = name_str.replace("GA", "Update 0").replace("Update ", release + ".") - date = dates.parse_date(date_str) - product_data.declare_version(name, date) + name = name_str.replace("GA", "Update 0").replace("Update ", release + ".") + date = dates.parse_date(date_str) + product_data.declare_version(name, date) diff --git a/src/red-hat-jboss-eap-8.py b/src/red-hat-jboss-eap-8.py index 766920d8..84326f6b 100644 --- a/src/red-hat-jboss-eap-8.py +++ b/src/red-hat-jboss-eap-8.py @@ -1,19 +1,20 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches the latest RedHat JBoss EAP version data for JBoss 8.0""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - xml = http.fetch_xml(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + xml = http.fetch_xml(config.url) - versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0] + versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0] - latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue - latest_name = "8.0." + re.match(r"^..(.*)\.GA", latest_str).group(1) + latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue + latest_name = "8.0." + re.match(r"^..(.*)\.GA", latest_str).group(1) - latest_date_str = versioning.getElementsByTagName("lastUpdated")[0].firstChild.nodeValue - latest_date = dates.parse_datetime(latest_date_str) + latest_date_str = versioning.getElementsByTagName("lastUpdated")[0].firstChild.nodeValue + latest_date = dates.parse_datetime(latest_date_str) - product_data.declare_version(latest_name, latest_date) + product_data.declare_version(latest_name, latest_date) diff --git a/src/red-hat-openshift.py b/src/red-hat-openshift.py index 5e96f43f..94f0885e 100644 --- a/src/red-hat-openshift.py +++ b/src/red-hat-openshift.py @@ -1,7 +1,8 @@ import re -from common import dates, releasedata +from common import dates from common.git import Git +from common.releasedata import ProductData, config_from_argv """Fetches Red Hat OpenShift versions from the documentation's git repository""" @@ -10,26 +11,26 @@ VERSION_AND_DATE_PATTERN = re.compile( re.MULTILINE, ) -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - git = Git(config.url) - git.setup() +config = config_from_argv() +with ProductData(config.product) as product_data: + git = Git(config.url) + git.setup() - # only fetch v4+ branches, because the format was different in openshift v3 - for branch in git.list_branches("refs/heads/enterprise-[4-9]*"): - branch_version = branch.split("-")[1] - file_version = branch_version.replace(".", "-") - release_notes_filename = f"release_notes/ocp-{file_version}-release-notes.adoc" - git.checkout(branch, file_list=[release_notes_filename]) + # only fetch v4+ branches, because the format was different in openshift v3 + for branch in git.list_branches("refs/heads/enterprise-[4-9]*"): + branch_version = branch.split("-")[1] + file_version = branch_version.replace(".", "-") + release_notes_filename = f"release_notes/ocp-{file_version}-release-notes.adoc" + git.checkout(branch, file_list=[release_notes_filename]) - release_notes_file = git.repo_dir / release_notes_filename - if not release_notes_file.exists(): - continue + release_notes_file = git.repo_dir / release_notes_filename + if not release_notes_file.exists(): + continue - with release_notes_file.open("rb") as f: - content = f.read().decode("utf-8") - for version, date_str in VERSION_AND_DATE_PATTERN.findall(content): - product_data.declare_version( - version.replace("{product-version}", branch_version), - dates.parse_date(date_str), - ) + with release_notes_file.open("rb") as f: + content = f.read().decode("utf-8") + for version, date_str in VERSION_AND_DATE_PATTERN.findall(content): + product_data.declare_version( + version.replace("{product-version}", branch_version), + dates.parse_date(date_str), + ) diff --git a/src/red-hat-satellite.py b/src/red-hat-satellite.py index 0effc876..8865a7ae 100644 --- a/src/red-hat-satellite.py +++ b/src/red-hat-satellite.py @@ -1,28 +1,29 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches Satellite versions from access.redhat.com. A few of the older versions, such as 'Satellite 6.1 GA Release (Build 6.1.1)', were ignored because too hard to parse.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for table in html.findAll("tbody"): - for tr in table.findAll("tr"): - td_list = tr.findAll("td") + for table in html.findAll("tbody"): + for tr in table.findAll("tr"): + td_list = tr.findAll("td") - version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0 - version_match = config.first_match(version_str) - if not version_match: - logging.warning(f"Skipping version '{version_str}': does not match any version pattern.") - continue - version = version_match["version"].replace('-', '.') # a.b.c-d => a.b.c.d + version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0 + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping version '{version_str}': does not match any version pattern.") + continue + version = version_match["version"].replace('-', '.') # a.b.c-d => a.b.c.d - date_str = td_list[1].get_text().strip() - date_str = '2024-12-04' if date_str == '2024-12-041' else date_str # there is a typo for 6.15.5 - date = dates.parse_date(date_str) + date_str = td_list[1].get_text().strip() + date_str = '2024-12-04' if date_str == '2024-12-041' else date_str # there is a typo for 6.15.5 + date = dates.parse_date(date_str) - product_data.declare_version(version, date) + product_data.declare_version(version, date) diff --git a/src/redhat_lifecycles.py b/src/redhat_lifecycles.py index c3adbb8f..8dec3ec2 100644 --- a/src/redhat_lifecycles.py +++ b/src/redhat_lifecycles.py @@ -1,7 +1,8 @@ import logging import urllib.parse -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches EOL dates from the Red Hat Product Life Cycle Data API. @@ -17,26 +18,26 @@ class Mapping: def get_field_for(self, phase_name: str) -> str | None: return self.fields_by_phase.get(phase_name.lower(), None) -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - name = urllib.parse.quote(config.url) - mapping = Mapping(config.data["fields"]) +config = config_from_argv() +with ProductData(config.product) as product_data: + name = urllib.parse.quote(config.url) + mapping = Mapping(config.data["fields"]) - data = http.fetch_json('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name) + data = http.fetch_json('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name) - for version in data["data"][0]["versions"]: - version_name = version["name"] - version_match = config.first_match(version_name) - if not version_match: - logging.warning(f"Ignoring version '{version_name}', config is {config}") + for version in data["data"][0]["versions"]: + version_name = version["name"] + version_match = config.first_match(version_name) + if not version_match: + logging.warning(f"Ignoring version '{version_name}', config is {config}") + continue + + release = product_data.get_release(config.render(version_match)) + for phase in version["phases"]: + field = mapping.get_field_for(phase["name"]) + if not field: + logging.debug(f"Ignoring phase '{phase['name']}': not mapped") continue - release = product_data.get_release(config.render(version_match)) - for phase in version["phases"]: - field = mapping.get_field_for(phase["name"]) - if not field: - logging.debug(f"Ignoring phase '{phase['name']}': not mapped") - continue - - date = dates.parse_datetime(phase["date"]) - release.set_field(field, date) + date = dates.parse_datetime(phase["date"]) + release.set_field(field, date) diff --git a/src/release_table.py b/src/release_table.py index c216bfd2..01aac434 100644 --- a/src/release_table.py +++ b/src/release_table.py @@ -4,7 +4,8 @@ from datetime import datetime from re import Match from bs4 import BeautifulSoup -from common import dates, endoflife, http, releasedata +from common import dates, endoflife, http +from common.releasedata import ProductData, config_from_argv from liquid import Template """Fetch release-level data from an HTML table in a web page. @@ -150,69 +151,69 @@ class Field: return f"{self.name}({self.column})" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - render_javascript = config.data.get("render_javascript", False) - render_javascript_click_selector = config.data.get("render_javascript_click_selector", None) - render_javascript_wait_until = config.data.get("render_javascript_wait_until", None) - ignore_empty_releases = config.data.get("ignore_empty_releases", False) - header_row_selector = config.data.get("header_selector", "thead tr") - rows_selector = config.data.get("rows_selector", "tbody tr") - cells_selector = "td, th" - release_cycle_field = Field("releaseCycle", config.data["fields"].pop("releaseCycle")) - fields = [Field(name, definition) for name, definition in config.data["fields"].items()] +config = config_from_argv() +with ProductData(config.product) as product_data: + render_javascript = config.data.get("render_javascript", False) + render_javascript_click_selector = config.data.get("render_javascript_click_selector", None) + render_javascript_wait_until = config.data.get("render_javascript_wait_until", None) + ignore_empty_releases = config.data.get("ignore_empty_releases", False) + header_row_selector = config.data.get("header_selector", "thead tr") + rows_selector = config.data.get("rows_selector", "tbody tr") + cells_selector = "td, th" + release_cycle_field = Field("releaseCycle", config.data["fields"].pop("releaseCycle")) + fields = [Field(name, definition) for name, definition in config.data["fields"].items()] - if render_javascript: - response_text = http.fetch_javascript_url(config.url, click_selector=render_javascript_click_selector, - wait_until=render_javascript_wait_until) - else: - response_text = http.fetch_url(config.url).text - soup = BeautifulSoup(response_text, features="html5lib") + if render_javascript: + response_text = http.fetch_javascript_url(config.url, click_selector=render_javascript_click_selector, + wait_until=render_javascript_wait_until) + else: + response_text = http.fetch_url(config.url).text + soup = BeautifulSoup(response_text, features="html5lib") - for table in soup.select(config.data["selector"]): - header_row = table.select_one(header_row_selector) - if not header_row: - logging.info(f"skipping table with attributes {table.attrs}: no header row found") - continue + for table in soup.select(config.data["selector"]): + header_row = table.select_one(header_row_selector) + if not header_row: + logging.info(f"skipping table with attributes {table.attrs}: no header row found") + continue - headers = [th.get_text().strip().lower() for th in header_row.select(cells_selector)] - logging.info(f"processing table with headers {headers}") + headers = [th.get_text().strip().lower() for th in header_row.select(cells_selector)] + logging.info(f"processing table with headers {headers}") - try: - fields_index = {"releaseCycle": headers.index(release_cycle_field.column)} + try: + fields_index = {"releaseCycle": headers.index(release_cycle_field.column)} + for field in fields: + fields_index[field.name] = field.column if field.is_index else headers.index(field.column) + min_column_count = max(fields_index.values()) + 1 + + for row in table.select(rows_selector): + cells = [cell.get_text().strip() for cell in row.select(cells_selector)] + if len(cells) < min_column_count: + logging.info(f"skipping row {cells}: not enough columns") + continue + + raw_release_name = cells[fields_index[release_cycle_field.name]] + release_name = release_cycle_field.extract_from(raw_release_name) + if not release_name: + logging.info(f"skipping row {cells}: invalid release cycle '{raw_release_name}', " + f"should match one of {release_cycle_field.include_version_patterns} " + f"and not match all of {release_cycle_field.exclude_version_patterns}") + continue + + release = product_data.get_release(release_name) for field in fields: - fields_index[field.name] = field.column if field.is_index else headers.index(field.column) - min_column_count = max(fields_index.values()) + 1 + raw_field = cells[fields_index[field.name]] + try: + release.set_field(field.name, field.extract_from(raw_field)) + except ValueError as e: + logging.info(f"skipping cell {raw_field} for {release}: {e}") - for row in table.select(rows_selector): - cells = [cell.get_text().strip() for cell in row.select(cells_selector)] - if len(cells) < min_column_count: - logging.info(f"skipping row {cells}: not enough columns") - continue + if ignore_empty_releases and release.is_empty(): + logging.info(f"removing empty release '{release}'") + product_data.remove_release(release_name) - raw_release_name = cells[fields_index[release_cycle_field.name]] - release_name = release_cycle_field.extract_from(raw_release_name) - if not release_name: - logging.info(f"skipping row {cells}: invalid release cycle '{raw_release_name}', " - f"should match one of {release_cycle_field.include_version_patterns} " - f"and not match all of {release_cycle_field.exclude_version_patterns}") - continue + if release.is_released_after(TODAY): + logging.info(f"removing future release '{release}'") + product_data.remove_release(release_name) - release = product_data.get_release(release_name) - for field in fields: - raw_field = cells[fields_index[field.name]] - try: - release.set_field(field.name, field.extract_from(raw_field)) - except ValueError as e: - logging.info(f"skipping cell {raw_field} for {release}: {e}") - - if ignore_empty_releases and release.is_empty(): - logging.info(f"removing empty release '{release}'") - product_data.remove_release(release_name) - - if release.is_released_after(TODAY): - logging.info(f"removing future release '{release}'") - product_data.remove_release(release_name) - - except ValueError as e: - logging.info(f"skipping table with headers {headers}: {e}") + except ValueError as e: + logging.info(f"skipping table with headers {headers}: {e}") diff --git a/src/rhel.py b/src/rhel.py index 46a1bb88..06c1d1e0 100644 --- a/src/rhel.py +++ b/src/rhel.py @@ -1,23 +1,24 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv # https://regex101.com/r/877ibq/1 VERSION_PATTERN = re.compile(r"RHEL (?P\d)(\. ?(?P\d+))?(( Update (?P\d))| GA)?") -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for tr in html.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) == 0: - continue + for tr in html.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) == 0: + continue - version_str = td_list[0].get_text().strip() - version_match = VERSION_PATTERN.match(version_str).groupdict() - version = version_match["major"] - version += ("." + version_match["minor"]) if version_match["minor"] else "" - version += ("." + version_match["minor2"]) if version_match["minor2"] else "" - date = dates.parse_date(td_list[1].get_text()) - product_data.declare_version(version, date) + version_str = td_list[0].get_text().strip() + version_match = VERSION_PATTERN.match(version_str).groupdict() + version = version_match["major"] + version += ("." + version_match["minor"]) if version_match["minor"] else "" + version += ("." + version_match["minor2"]) if version_match["minor2"] else "" + date = dates.parse_date(td_list[1].get_text()) + product_data.declare_version(version, date) diff --git a/src/rocky-linux.py b/src/rocky-linux.py index 562a396b..d646ab8c 100644 --- a/src/rocky-linux.py +++ b/src/rocky-linux.py @@ -1,11 +1,12 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - response = http.fetch_url(config.url) - for line in response.text.strip().split('\n'): - items = line.split('|') - if len(items) >= 5 and config.first_match(items[1].strip()): - version = items[1].strip() - date = dates.parse_date(items[3]) - product_data.declare_version(version, date) +config = config_from_argv() +with ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + for line in response.text.strip().split('\n'): + items = line.split('|') + if len(items) >= 5 and config.first_match(items[1].strip()): + version = items[1].strip() + date = dates.parse_date(items[3]) + product_data.declare_version(version, date) diff --git a/src/ros.py b/src/ros.py index 3ebf118b..29cbf6f2 100644 --- a/src/ros.py +++ b/src/ros.py @@ -1,28 +1,29 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for tr in html.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) == 0: - continue + for tr in html.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) == 0: + continue - version_str = td_list[0].get_text().strip() - version_match = config.first_match(version_str) - if not version_match: - logging.warning(f"Skipping version '{version_str}': does not match the expected pattern") - continue + version_str = td_list[0].get_text().strip() + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping version '{version_str}': does not match the expected pattern") + continue - # Get the "code" (such as noetic) instead of the display name (such as Noetic Ninjemys) - version = td_list[0].findAll("a")[0]["href"][1:] - try: - date = dates.parse_date(td_list[1].get_text()) - except ValueError: # The day has a suffix (such as May 23rd, 2020) - x = td_list[1].get_text().split(",") - date = dates.parse_date(x[0][:-2] + x[1]) + # Get the "code" (such as noetic) instead of the display name (such as Noetic Ninjemys) + version = td_list[0].findAll("a")[0]["href"][1:] + try: + date = dates.parse_date(td_list[1].get_text()) + except ValueError: # The day has a suffix (such as May 23rd, 2020) + x = td_list[1].get_text().split(",") + date = dates.parse_date(x[0][:-2] + x[1]) - product_data.declare_version(version, date) + product_data.declare_version(version, date) diff --git a/src/samsung-security.py b/src/samsung-security.py index 4e9000ff..4348c0cc 100644 --- a/src/samsung-security.py +++ b/src/samsung-security.py @@ -2,7 +2,8 @@ import logging import re from datetime import date, datetime, time, timezone -from common import dates, endoflife, http, releasedata +from common import dates, endoflife, http +from common.releasedata import ProductData, parse_argv """Detect new models and aggregate EOL data for Samsung Mobile devices. @@ -12,64 +13,63 @@ it retains the date and use it as the model's EOL date. TODAY = dates.today() -frontmatter, configs = releasedata.parse_argv() -for config in configs: - with releasedata.ProductData(config.product) as product_data: - frontmatter_release_names = frontmatter.get_release_names() +frontmatter, config = parse_argv() +with ProductData(config.product) as product_data: + frontmatter_release_names = frontmatter.get_release_names() - # Copy EOL dates from frontmatter to product data - for frontmatter_release in frontmatter.get_releases(): - eol = frontmatter_release.get("eol") - eol = datetime.combine(eol, time.min, tzinfo=timezone.utc) if isinstance(eol, date) else eol + # Copy EOL dates from frontmatter to product data + for frontmatter_release in frontmatter.get_releases(): + eol = frontmatter_release.get("eol") + eol = datetime.combine(eol, time.min, tzinfo=timezone.utc) if isinstance(eol, date) else eol - release = product_data.get_release(frontmatter_release.get("releaseCycle")) - release.set_eol(eol) + release = product_data.get_release(frontmatter_release.get("releaseCycle")) + release.set_eol(eol) - html = http.fetch_html(config.url) + html = http.fetch_html(config.url) - sections = config.data.get("sections", {}) - for update_cadence, title in sections.items(): - models_list = html.find(string=lambda text, search=title: search in text if text else False).find_next("ul") + sections = config.data.get("sections", {}) + for update_cadence, title in sections.items(): + models_list = html.find(string=lambda text, search=title: search in text if text else False).find_next("ul") - for item in models_list.find_all("li"): - models = item.text.replace("Enterprise Models:", "") - logging.info(f"Found {models} for {update_cadence} security updates") + for item in models_list.find_all("li"): + models = item.text.replace("Enterprise Models:", "") + logging.info(f"Found {models} for {update_cadence} security updates") - for model in re.split(r',\s*', models): - name = endoflife.to_identifier(model) - if config.is_excluded(name): - logging.debug(f"Ignoring model '{name}', excluded by configuration") - continue + for model in re.split(r',\s*', models): + name = endoflife.to_identifier(model) + if config.is_excluded(name): + logging.debug(f"Ignoring model '{name}', excluded by configuration") + continue - release = product_data.get_release(name) - release.set_label(model.strip()) + release = product_data.get_release(name) + release.set_label(model.strip()) - if name in frontmatter_release_names: - frontmatter_release_names.remove(name) - current_eol = release.get_eol() - if current_eol is True or (isinstance(current_eol, datetime) and current_eol <= TODAY): - logging.info(f"Known model {name} is incorrectly marked as EOL, updating eol") - release.set_eol(False) - else: - logging.debug(f"Known model {name} is not EOL, keeping eol as {current_eol}") - - else: - logging.debug(f"Found new model {name}") + if name in frontmatter_release_names: + frontmatter_release_names.remove(name) + current_eol = release.get_eol() + if current_eol is True or (isinstance(current_eol, datetime) and current_eol <= TODAY): + logging.info(f"Known model {name} is incorrectly marked as EOL, updating eol") release.set_eol(False) + else: + logging.debug(f"Known model {name} is not EOL, keeping eol as {current_eol}") - # the remaining models in frontmatter_release_names are not listed anymore on the Samsung page => they are EOL - for eol_model_name in frontmatter_release_names: - release = product_data.get_release(eol_model_name) - current_eol = release.get_eol() - if config.is_excluded(eol_model_name): - logging.debug(f"Skipping model {eol_model_name}, excluded by configuration") - elif current_eol is False: - logging.info(f"Model {eol_model_name} is not EOL, setting eol") - release.set_eol(TODAY) - elif isinstance(current_eol, datetime): - if current_eol > TODAY: - logging.info(f"Model {eol_model_name} is not marked as EOL, setting eol as {TODAY}") - release.set_eol(TODAY) else: - logging.debug(f"Model {eol_model_name} is already EOL, keeping eol as {current_eol}") + logging.debug(f"Found new model {name}") + release.set_eol(False) + + # the remaining models in frontmatter_release_names are not listed anymore on the Samsung page => they are EOL + for eol_model_name in frontmatter_release_names: + release = product_data.get_release(eol_model_name) + current_eol = release.get_eol() + if config.is_excluded(eol_model_name): + logging.debug(f"Skipping model {eol_model_name}, excluded by configuration") + elif current_eol is False: + logging.info(f"Model {eol_model_name} is not EOL, setting eol") + release.set_eol(TODAY) + elif isinstance(current_eol, datetime): + if current_eol > TODAY: + logging.info(f"Model {eol_model_name} is not marked as EOL, setting eol as {TODAY}") + release.set_eol(TODAY) + else: + logging.debug(f"Model {eol_model_name} is already EOL, keeping eol as {current_eol}") diff --git a/src/sles.py b/src/sles.py index cc474bfc..93d4cf21 100644 --- a/src/sles.py +++ b/src/sles.py @@ -1,29 +1,30 @@ import logging -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - products_table = html.find("tbody", id="productSupportLifecycle") - sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) + products_table = html.find("tbody", id="productSupportLifecycle") + sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) - # Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section) - for detail_id in [f"detail{row['id']}" for row in sles_header_rows]: - detail_row = products_table.find("tr", id=detail_id) - # There is a table with info about minor releases and after it, optionally, a table with info about modules - minor_versions_table = detail_row.find_all("tbody")[0] + # Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section) + for detail_id in [f"detail{row['id']}" for row in sles_header_rows]: + detail_row = products_table.find("tr", id=detail_id) + # There is a table with info about minor releases and after it, optionally, a table with info about modules + minor_versions_table = detail_row.find_all("tbody")[0] - # The first sub-row is a header, the rest contains info about the first release and later minor releases - for row in minor_versions_table.find_all("tr")[1:]: - # For each minor release there is an FCS date, general support end date and LTSS end date - cells = row.find_all("td") - version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.') - date_str = cells[1].text + # The first sub-row is a header, the rest contains info about the first release and later minor releases + for row in minor_versions_table.find_all("tr")[1:]: + # For each minor release there is an FCS date, general support end date and LTSS end date + cells = row.find_all("td") + version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.') + date_str = cells[1].text - try: - date = dates.parse_date(date_str) - product_data.declare_version(version, date) - except ValueError: - logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed") + try: + date = dates.parse_date(date_str) + product_data.declare_version(version, date) + except ValueError: + logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed") diff --git a/src/splunk.py b/src/splunk.py index 20f0cf0f..5210f80d 100644 --- a/src/splunk.py +++ b/src/splunk.py @@ -1,6 +1,7 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv VERSION_DATE_PATTERN = re.compile(r"Splunk Enterprise (?P\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P\w+\s\d\d?,\s\d{4})\.", re.MULTILINE) @@ -29,19 +30,19 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]: return latest_versions -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")] - all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"] + all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")] + all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"] - # Latest minor release notes contains release notes for all previous minor versions. - # For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4. - latest_minor_versions = get_latest_minor_versions(all_versions) - latest_minor_versions_urls = [f"{config.url}/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions] - for response in http.fetch_urls(latest_minor_versions_urls): - for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text): - version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0 - date = dates.parse_date(date_str) - product_data.declare_version(version_str, date) + # Latest minor release notes contains release notes for all previous minor versions. + # For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4. + latest_minor_versions = get_latest_minor_versions(all_versions) + latest_minor_versions_urls = [f"{config.url}/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions] + for response in http.fetch_urls(latest_minor_versions_urls): + for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text): + version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0 + date = dates.parse_date(date_str) + product_data.declare_version(version_str, date) diff --git a/src/typo3.py b/src/typo3.py index f2e1da7e..92976390 100644 --- a/src/typo3.py +++ b/src/typo3.py @@ -1,12 +1,13 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - data = http.fetch_json(config.url) - for v in data: - if v['type'] == 'development': - continue +config = config_from_argv() +with ProductData(config.product) as product_data: + data = http.fetch_json(config.url) + for v in data: + if v['type'] == 'development': + continue - version = v["version"] - date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility - product_data.declare_version(version, date) + version = v["version"] + date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility + product_data.declare_version(version, date) diff --git a/src/unity.py b/src/unity.py index 4df337f2..e24ebfab 100644 --- a/src/unity.py +++ b/src/unity.py @@ -1,4 +1,5 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation is only partial. @@ -16,11 +17,11 @@ Note that it was assumed that: The script will need to be updated if someday those conditions are not met.""" -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for release in html.find_all('div', class_='component-releases-item__show__inner-header'): - version = release.find('h4').find('span').text - date = dates.parse_datetime(release.find('time').attrs['datetime']) - product_data.declare_version(version, date) + for release in html.find_all('div', class_='component-releases-item__show__inner-header'): + version = release.find('h4').find('span').text + date = dates.parse_datetime(release.find('time').attrs['datetime']) + product_data.declare_version(version, date) diff --git a/src/unrealircd.py b/src/unrealircd.py index a63d8b55..5b92674f 100644 --- a/src/unrealircd.py +++ b/src/unrealircd.py @@ -1,20 +1,21 @@ import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}") -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - wikicode = http.fetch_markdown(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + wikicode = http.fetch_markdown(config.url) - for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): - items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") - if len(items) < 2: - continue + for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): + items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") + if len(items) < 2: + continue - version = items[0].__strip__() - date_str = items[1].__strip__() - if config.first_match(version) and DATE_PATTERN.match(date_str): - date = dates.parse_date(date_str) - product_data.declare_version(version, date) + version = items[0].__strip__() + date_str = items[1].__strip__() + if config.first_match(version) and DATE_PATTERN.match(date_str): + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/veeam.py b/src/veeam.py index fcd9bdd8..c4cef180 100644 --- a/src/veeam.py +++ b/src/veeam.py @@ -1,7 +1,8 @@ import logging import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches Veeam products versions from https://www.veeam.com. @@ -9,31 +10,31 @@ This script takes a single argument which is the url of the versions page on htt such as `https://www.veeam.com/kb2680`. """ -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - version_column = config.data.get("version_column", "Build Number").lower() - date_column = config.data.get("date_column", "Release Date").lower() - for table in html.find_all("table"): - headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")] - if version_column not in headers or date_column not in headers: - logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'", - headers, version_column, date_column) + version_column = config.data.get("version_column", "Build Number").lower() + date_column = config.data.get("date_column", "Release Date").lower() + for table in html.find_all("table"): + headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")] + if version_column not in headers or date_column not in headers: + logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'", + headers, version_column, date_column) + continue + + version_index = headers.index(version_column) + date_index = headers.index(date_column) + for row in table.find_all("tr")[1:]: + cells = row.find_all("td") + if len(cells) <= max(version_index, date_index): continue - version_index = headers.index(version_column) - date_index = headers.index(date_column) - for row in table.find_all("tr")[1:]: - cells = row.find_all("td") - if len(cells) <= max(version_index, date_index): - continue + date_str = cells[date_index].get_text().strip() + if not date_str or date_str == "-": + continue - date_str = cells[date_index].get_text().strip() - if not date_str or date_str == "-": - continue - - # whitespaces in version numbers are replaced with dashes - version = re.sub(r'\s+', "-", cells[version_index].get_text().strip()) - date = dates.parse_date(date_str) - product_data.declare_version(version, date) + # whitespaces in version numbers are replaced with dashes + version = re.sub(r'\s+', "-", cells[version_index].get_text().strip()) + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/virtualbox.py b/src/virtualbox.py index b2fed7a0..ca65339b 100644 --- a/src/virtualbox.py +++ b/src/virtualbox.py @@ -1,34 +1,35 @@ import logging import re -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv """Fetches releases from VirtualBox download page.""" EOL_REGEX = re.compile(r"^\(no longer supported, support ended (?P\d{4}/\d{2})\)$") -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for li in html.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"): - li_text = li.find("a").text.strip() + for li in html.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"): + li_text = li.find("a").text.strip() - release_match = config.first_match(li_text) - if not release_match: - logging.info(f"Skipping '{li_text}': does not match expected pattern") - continue + release_match = config.first_match(li_text) + if not release_match: + logging.info(f"Skipping '{li_text}': does not match expected pattern") + continue - release_name = release_match.group("value") - release = product_data.get_release(release_name) + release_name = release_match.group("value") + release = product_data.get_release(release_name) - eol_text = li.find("em").text.lower().strip() - eol_match = EOL_REGEX.match(eol_text) - if not eol_match: - logging.info(f"Ignoring '{eol_text}': does not match {EOL_REGEX}") - continue + eol_text = li.find("em").text.lower().strip() + eol_match = EOL_REGEX.match(eol_text) + if not eol_match: + logging.info(f"Ignoring '{eol_text}': does not match {EOL_REGEX}") + continue - eol_date_str = eol_match.group("value") - eol_date = dates.parse_month_year_date(eol_date_str) - release.set_eol(eol_date) + eol_date_str = eol_match.group("value") + eol_date = dates.parse_month_year_date(eol_date_str) + release.set_eol(eol_date) diff --git a/src/visual-studio.py b/src/visual-studio.py index 3c5a92e4..f28e0112 100644 --- a/src/visual-studio.py +++ b/src/visual-studio.py @@ -1,24 +1,25 @@ -from common import dates, http, releasedata +from common import dates, http +from common.releasedata import ProductData, config_from_argv -for config in releasedata.list_configs_from_argv(): - with releasedata.ProductData(config.product) as product_data: - html = http.fetch_html(config.url) +config = config_from_argv() +with ProductData(config.product) as product_data: + html = http.fetch_html(config.url) - for table in html.find_all("table"): - headers = [th.get_text().strip().lower() for th in table.find_all("th")] - if "version" not in headers or "release date" not in headers: + for table in html.find_all("table"): + headers = [th.get_text().strip().lower() for th in table.find_all("th")] + if "version" not in headers or "release date" not in headers: + continue + + version_index = headers.index("version") + date_index = headers.index("release date") + for row in table.findAll("tr"): + cells = row.findAll("td") + if len(cells) < (max(version_index, date_index) + 1): continue - version_index = headers.index("version") - date_index = headers.index("release date") - for row in table.findAll("tr"): - cells = row.findAll("td") - if len(cells) < (max(version_index, date_index) + 1): - continue + version = cells[version_index].get_text().strip() + date = cells[date_index].get_text().strip() + date = dates.parse_date(date) - version = cells[version_index].get_text().strip() - date = cells[date_index].get_text().strip() - date = dates.parse_date(date) - - if date and version and config.first_match(version): - product_data.declare_version(version, date) + if date and version and config.first_match(version): + product_data.declare_version(version, date)