diff --git a/src/amazon-eks.py b/src/amazon-eks.py index edf5a3d2..b88a980b 100644 --- a/src/amazon-eks.py +++ b/src/amazon-eks.py @@ -1,34 +1,32 @@ +import logging + from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata """Fetches EKS versions from AWS docs. -Now that AWS no longer publishes docs on GitHub, we use the Web Archive to still get the older versions.""" +Now that AWS no longer publishes docs on GitHub, we use the Web Archive to get the older versions.""" -URLS = [ - # 1.19.eks.1 - "https://web.archive.org/web/20221007150452/https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html", - # + 1.20 - "https://web.archive.org/web/20230521061347/https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html", - # + latest - "https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html", -] - -with releasedata.ProductData("amazon-eks") as product_data: - for version_list in http.fetch_urls(URLS): - version_list_soup = BeautifulSoup(version_list.text, features="html5lib") - for tr in version_list_soup.select("#main-col-body")[0].findAll("tr"): +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + html = BeautifulSoup(response.text, features="html5lib") + for tr in html.select("#main-col-body")[0].findAll("tr"): cells = tr.findAll("td") if not cells: continue - k8s_version = cells[0].text.strip() - eks_version = cells[1].text.strip() + k8s_version_text = cells[0].text.strip() + k8s_version_match = config.first_match(k8s_version_text) + if not k8s_version_match: + logging.warning(f"Skipping {k8s_version_text}: does not match version regex(es)") + continue - k8s_version_match = endoflife.DEFAULT_VERSION_PATTERN.match(k8s_version) - if k8s_version_match: - date_str = cells[-1].text.strip() - date_str = date_str.replace("April 18.2025", "April 18 2025") # temporary fix for a typo in the source - date = dates.parse_date_or_month_year_date(date_str) - # K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags. - version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}" - product_data.declare_version(version, date) + eks_version = cells[1].text.strip() + # K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags + version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}" + + date_str = cells[-1].text.strip() + date_str = date_str.replace("April 18.2025", "April 18 2025") # temporary fix for a typo in the source + date = dates.parse_date_or_month_year_date(date_str) + + product_data.declare_version(version, date) diff --git a/src/amazon-neptune.py b/src/amazon-neptune.py index 56593624..7cc5f950 100644 --- a/src/amazon-neptune.py +++ b/src/amazon-neptune.py @@ -1,21 +1,24 @@ -import re +import logging import xml.dom.minidom -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches Amazon Neptune versions from its RSS feed on docs.aws.amazon.com.""" -RSS_URL = "https://docs.aws.amazon.com/neptune/latest/userguide/rssupdates.rss" -VERSION_PATTERN = re.compile(r"^Engine version (?P[0-9R.]+)$") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + rss_response = http.fetch_url(config.url) + rss = xml.dom.minidom.parseString(rss_response.text) -with releasedata.ProductData("amazon-neptune") as product_data: - rss_response = http.fetch_url(RSS_URL) - rss = xml.dom.minidom.parseString(rss_response.text) + for entry in rss.getElementsByTagName("item"): + version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue + date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue - for entry in rss.getElementsByTagName("item"): - version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue - date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping entry with malformed version: {entry}") + continue - version_match = VERSION_PATTERN.match(version_str) - if version_match: - product_data.declare_version(version_match['version'], dates.parse_datetime(date_str)) + version = config.render(version_match) + date = dates.parse_datetime(date_str) + product_data.declare_version(version, date) diff --git a/src/apache-http-server.py b/src/apache-http-server.py index 99e44c7c..f64a44d6 100644 --- a/src/apache-http-server.py +++ b/src/apache-http-server.py @@ -1,34 +1,24 @@ -import re - -from common import dates, releasedata +from common import dates, endoflife, releasedata from common.git import Git """Fetches Apache HTTP Server versions and release date from its git repository by looking at the STATUS file of each ..x branch.""" -VERSION_AND_DATE_PATTERNS = [ - # for most versions - re.compile(r"\s+(?P\d+\.\d+\.\d+)\s*:.*(?:Released|Announced|Released and Retired)\s(?:on\s)?(?P\w+\s\d\d?,\s\d{4})"), - # for older 2.0.x versions (only GA versions are considered) - re.compile(r"\s+(?P\d+\.\d+\.\d+)\s*:.*released\s(?P\w+\s\d\d?,\s\d{4}) as GA"), - # for older 1.3.x versions, we take the date of the tag and not the date of the release (too difficult to parse) - re.compile(r"\s+(?P\d+\.\d+\.\d+)\s*:.*Tagged and [rR]olled\s(?:on\s)?(?P\w+\.?\s\d\d?,\s\d{4})"), -] +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + git = Git(config.url) + git.setup() -with releasedata.ProductData("apache-http-server") as product_data: - git = Git("https://github.com/apache/httpd.git") - git.setup() + for branch in git.list_branches("refs/heads/?.?.x"): + git.checkout(branch, file_list=["STATUS"]) - for branch in git.list_branches("refs/heads/?.?.x"): - git.checkout(branch, file_list=["STATUS"]) + release_notes_file = git.repo_dir / "STATUS" + if not release_notes_file.exists(): + continue - release_notes_file = git.repo_dir / "STATUS" - if not release_notes_file.exists(): - continue + with release_notes_file.open("rb") as f: + release_notes = f.read().decode("utf-8", errors="ignore") - with release_notes_file.open("rb") as f: - release_notes = f.read().decode("utf-8", errors="ignore") - - for pattern in VERSION_AND_DATE_PATTERNS: - for (version, date_str) in pattern.findall(release_notes): - product_data.declare_version(version, dates.parse_date(date_str)) + for pattern in config.include_version_patterns: + for (version, date_str) in pattern.findall(release_notes): + product_data.declare_version(version, dates.parse_date(date_str)) diff --git a/src/apache-subversion.py b/src/apache-subversion.py index 6ca157ad..cbaf1970 100644 --- a/src/apache-subversion.py +++ b/src/apache-subversion.py @@ -1,33 +1,21 @@ import logging -import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata -# https://regex101.com/r/k4i7EO/1 only non beta versions -VERSION_PATTERN = re.compile(r"^Subversion\s(?P[1-9]\d*.\d+\.\d+)$") -# https://regex101.com/r/GsimYd/2 -DATE_PATTERN = re.compile(r"^\((?P\w+,\s\d{1,2}\s\w+\s\d{4})") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") -with releasedata.ProductData("apache-subversion") as product_data: - relnotes = http.fetch_url("https://subversion.apache.org/docs/release-notes/release-history.html") - relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib") + ul = soup.find("h2").find_next("ul") + for li in ul.find_all("li"): + text = li.get_text(strip=True) + match = config.first_match(text) + if not match: + logging.info(f"Skipping {text}, does not match any regex") + continue - ul = relnotes_soup.find("h2").find_next("ul") - for li in ul.find_all("li"): - b = li.find_next("b") # b contains the version - version_text = b.get_text(strip=True) - version_match = VERSION_PATTERN.match(version_text) - if not version_match: - logging.info(f"Skipping {version_text}, does not match version regex") - continue - - remaining_part_str = b.next_sibling.get_text(strip=True) - date_match = DATE_PATTERN.match(remaining_part_str) - if not date_match: - logging.info(f"Skipping {version_text}, no matching date in '{remaining_part_str}'") - continue - - version = version_match.group("version") - date = dates.parse_date(date_match.group("date")) - product_data.declare_version(version, date) + version = match.group("version") + date = dates.parse_date(match.group("date")) + product_data.declare_version(version, date) diff --git a/src/apple.py b/src/apple.py index 8faf91f0..93d6c818 100644 --- a/src/apple.py +++ b/src/apple.py @@ -1,6 +1,5 @@ import logging import re -import sys from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata @@ -22,11 +21,8 @@ URLS = [ ] DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b") -METHOD = 'apple' -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: # URLs are cached to avoid rate limiting by support.apple.com. soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS, cache=True)] diff --git a/src/artifactory.py b/src/artifactory.py index 390f5253..4fe0820e 100644 --- a/src/artifactory.py +++ b/src/artifactory.py @@ -1,21 +1,22 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches Artifactory versions from https://jfrog.com, using requests_html because JavaScript is needed to render the page.""" -with releasedata.ProductData("artifactory") as product_data: - content = http.fetch_javascript_url('https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life', wait_until = 'networkidle') - soup = BeautifulSoup(content, 'html.parser') +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + content = http.fetch_javascript_url(config.url, wait_until = 'networkidle') + soup = BeautifulSoup(content, 'html.parser') - for row in soup.select('.informaltable tbody tr'): - cells = row.select("td") - if len(cells) >= 2: - version = cells[0].text.strip() - if version: - date_str = cells[1].text.strip().replace("_", "-").replace("Sept-", "Sep-") - product_data.declare_version(version, dates.parse_date(date_str)) + for row in soup.select('.informaltable tbody tr'): + cells = row.select("td") + if len(cells) >= 2: + version = cells[0].text.strip() + if version: + date_str = cells[1].text.strip().replace("_", "-").replace("Sept-", "Sep-") + product_data.declare_version(version, dates.parse_date(date_str)) - # 7.29.9 release date is wrong on https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life. - # Sent a mail to jfrog-help-center-feedback@jfrog.com to fix it, but in the meantime... - product_data.declare_version('7.29.9', dates.date(2022, 1, 11)) + # 7.29.9 release date is wrong on https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life. + # Sent a mail to jfrog-help-center-feedback@jfrog.com to fix it, but in the meantime... + product_data.declare_version('7.29.9', dates.date(2022, 1, 11)) diff --git a/src/atlassian_eol.py b/src/atlassian_eol.py index b0b50659..0626d22d 100644 --- a/src/atlassian_eol.py +++ b/src/atlassian_eol.py @@ -1,31 +1,24 @@ import logging -import re -import sys from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata """Fetches EOL dates from Atlassian EOL page. -This script takes a single argument which is the product title identifier on the Atlassian EOL page, such as +This script takes a selector argument which is the product title identifier on the Atlassian EOL page, such as `AtlassianSupportEndofLifePolicy-JiraSoftware`. """ -METHOD = "atlassian_eol" -REGEX = r"(?P\d+(\.\d+)+) \(EO[SL] date: (?P.+)\).*$" -PATTERN = re.compile(REGEX, re.MULTILINE) - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: - content = http.fetch_javascript_url('https://confluence.atlassian.com/support/atlassian-support-end-of-life-policy-201851003.html') + content = http.fetch_javascript_url(config.url) soup = BeautifulSoup(content, features="html5lib") - for li in soup.select(f"#{config.url}+ul li"): - match = PATTERN.match(li.get_text(strip=True)) + # Find the section with the EOL dates + for li in soup.select(f"#{config.data.get('selector')}+ul li"): + match = config.first_match(li.get_text(strip=True)) if not match: - logging.warning(f"Failed to parse EOL date from '{li.get_text(strip=True)}'") + logging.warning(f"Skipping '{li.get_text(strip=True)}', no match found") continue release_name = match.group("release") diff --git a/src/atlassian_versions.py b/src/atlassian_versions.py index 475576cc..aa27b828 100644 --- a/src/atlassian_versions.py +++ b/src/atlassian_versions.py @@ -1,5 +1,3 @@ -import sys - from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata @@ -9,11 +7,7 @@ This script takes a single argument which is the url of the product's download-a `https://www.atlassian.com/software/confluence/download-archives`. """ -METHOD = "atlassian_versions" - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: content = http.fetch_javascript_url(config.url, wait_until='networkidle') soup = BeautifulSoup(content, 'html5lib') diff --git a/src/aws-lambda.py b/src/aws-lambda.py index dd9475a2..6f382a3c 100644 --- a/src/aws-lambda.py +++ b/src/aws-lambda.py @@ -1,47 +1,48 @@ import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches AWS lambda runtimes with their support / EOL dates from https://docs.aws.amazon.com.""" -with releasedata.ProductData("aws-lambda") as product_data: - response = http.fetch_url("https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html") - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - for i, table in enumerate(soup.find_all("table")): - headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")] - if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers: - logging.info(f"table with header '{headers}' does not contain all the expected headers") - continue + for i, table in enumerate(soup.find_all("table")): + headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")] + if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers: + logging.info(f"table with header '{headers}' does not contain all the expected headers") + continue - is_supported_table = i == 0 # first table is for supported runtimes, second for deprecated ones - identifier_index = headers.index("identifier") - deprecation_date_index = headers.index("deprecation date") - block_function_update_index = headers.index("block function update") + is_supported_table = i == 0 # first table is for supported runtimes, second for deprecated ones + identifier_index = headers.index("identifier") + deprecation_date_index = headers.index("deprecation date") + block_function_update_index = headers.index("block function update") - for row in table.find("tbody").find_all("tr"): - cells = row.find_all("td") - identifier = cells[identifier_index].get_text().strip() + for row in table.find("tbody").find_all("tr"): + cells = row.find_all("td") + identifier = cells[identifier_index].get_text().strip() - deprecation_date_str = cells[deprecation_date_index].get_text().strip() - try: - deprecation_date = dates.parse_date(deprecation_date_str) - except ValueError: - deprecation_date = None + deprecation_date_str = cells[deprecation_date_index].get_text().strip() + try: + deprecation_date = dates.parse_date(deprecation_date_str) + except ValueError: + deprecation_date = None - if identifier == "nodejs4.3-edge": - # there is a mistake in the data: block function update date cannot be before the deprecation date - block_function_update_str = "2020-04-30" - else: - block_function_update_str = cells[block_function_update_index].get_text().strip() - try: - block_function_update = dates.parse_date(block_function_update_str) - except ValueError: - block_function_update = None + if identifier == "nodejs4.3-edge": + # there is a mistake in the data: block function update date cannot be before the deprecation date + block_function_update_str = "2020-04-30" + else: + block_function_update_str = cells[block_function_update_index].get_text().strip() + try: + block_function_update = dates.parse_date(block_function_update_str) + except ValueError: + block_function_update = None - release = product_data.get_release(identifier) - # if no date is available, use False for supported runtimes and True for deprecated ones - release.set_eoas(deprecation_date if deprecation_date else not is_supported_table) - # if no date is available, use False for supported runtimes and True for deprecated ones - release.set_eol(block_function_update if block_function_update else not is_supported_table) + release = product_data.get_release(identifier) + # if no date is available, use False for supported runtimes and True for deprecated ones + release.set_eoas(deprecation_date if deprecation_date else not is_supported_table) + # if no date is available, use False for supported runtimes and True for deprecated ones + release.set_eol(block_function_update if block_function_update else not is_supported_table) diff --git a/src/cgit.py b/src/cgit.py index 2845d50a..853a0af9 100644 --- a/src/cgit.py +++ b/src/cgit.py @@ -1,16 +1,10 @@ -import sys - from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata """Fetches versions from repositories managed with cgit, such as the Linux kernel repository. Ideally we would want to use the git repository directly, but cgit-managed repositories don't support partial clone.""" -METHOD = "cgit" - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: response = http.fetch_url(config.url + '/refs/tags') soup = BeautifulSoup(response.text, features="html5lib") diff --git a/src/chef-infra-client.py b/src/chef-infra-client.py deleted file mode 100644 index be943a89..00000000 --- a/src/chef-infra-client.py +++ /dev/null @@ -1,23 +0,0 @@ -from bs4 import BeautifulSoup -from common import dates, http, releasedata -from common.git import Git - -"""Fetch released versions from docs.chef.io and retrieve their date from GitHub. -docs.chef.io needs to be scraped because not all tagged versions are actually released. - -More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411. -""" - -with releasedata.ProductData("chef-infra-client") as product_data: - rn_response = http.fetch_url("https://docs.chef.io/release_notes_client/") - rn_soup = BeautifulSoup(rn_response.text, features="html5lib") - released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')] - - git = Git("https://github.com/chef/chef.git") - git.setup(bare=True) - - versions = git.list_tags() - for version, date_str in versions: - if version in released_versions: - date = dates.parse_date(date_str) - product_data.declare_version(version, date) diff --git a/src/chef-infra-server.py b/src/chef-infra-server.py deleted file mode 100644 index 2e00b992..00000000 --- a/src/chef-infra-server.py +++ /dev/null @@ -1,23 +0,0 @@ -from bs4 import BeautifulSoup -from common import dates, http, releasedata -from common.git import Git - -"""Fetch released versions from docs.chef.io and retrieve their date from GitHub. -docs.chef.io needs to be scraped because not all tagged versions are actually released. - -More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411. -""" - -with releasedata.ProductData("chef-infra-server") as product_data: - rn_response = http.fetch_url("https://docs.chef.io/release_notes_server/") - rn_soup = BeautifulSoup(rn_response.text, features="html5lib") - released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')] - - git = Git("https://github.com/chef/chef-server.git") - git.setup(bare=True) - - versions = git.list_tags() - for version, date_str in versions: - if version in released_versions: - date = dates.parse_date(date_str) - product_data.declare_version(version, date) diff --git a/src/chef-infra.py b/src/chef-infra.py new file mode 100644 index 00000000..c40f3ee2 --- /dev/null +++ b/src/chef-infra.py @@ -0,0 +1,24 @@ +from bs4 import BeautifulSoup +from common import dates, endoflife, http, releasedata +from common.git import Git + +"""Fetch released versions from docs.chef.io and retrieve their date from GitHub. +docs.chef.io needs to be scraped because not all tagged versions are actually released. + +More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411. +""" + +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + rn_response = http.fetch_url(config.url) + rn_soup = BeautifulSoup(rn_response.text, features="html5lib") + released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')] + + git = Git(config.data.get('repository')) + git.setup(bare=True) + + versions = git.list_tags() + for version, date_str in versions: + if version in released_versions: + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/chef-inspec.py b/src/chef-inspec.py index 0fac9b30..ee641bfd 100644 --- a/src/chef-inspec.py +++ b/src/chef-inspec.py @@ -1,5 +1,5 @@ from bs4 import BeautifulSoup -from common import dates, github, http, releasedata +from common import dates, endoflife, github, http, releasedata """Fetch released versions from docs.chef.io and retrieve their date from GitHub. docs.chef.io needs to be scraped because not all tagged versions are actually released. @@ -7,13 +7,14 @@ docs.chef.io needs to be scraped because not all tagged versions are actually re More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411. """ -with releasedata.ProductData("chef-inspec") as product_data: - rn_response = http.fetch_url("https://docs.chef.io/release_notes_inspec/") - rn_soup = BeautifulSoup(rn_response.text, features="html5lib") - released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')] +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + rn_response = http.fetch_url(config.url) + rn_soup = BeautifulSoup(rn_response.text, features="html5lib") + released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')] - for release in github.fetch_releases("inspec/inspec"): - sanitized_version = release.tag_name.replace("v", "") - if sanitized_version in released_versions: - date = dates.parse_datetime(release.published_at) - product_data.declare_version(sanitized_version, date) + for release in github.fetch_releases("inspec/inspec"): + sanitized_version = release.tag_name.replace("v", "") + if sanitized_version in released_versions: + date = dates.parse_datetime(release.published_at) + product_data.declare_version(sanitized_version, date) diff --git a/src/coldfusion.py b/src/coldfusion.py index 57fa08c1..0d378ca5 100644 --- a/src/coldfusion.py +++ b/src/coldfusion.py @@ -1,7 +1,7 @@ import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches versions from Adobe ColdFusion release notes on helpx.adobe.com. @@ -9,15 +9,6 @@ x.y.0 release dates are unfortunately not available in the release notes and hav new minor version is released. """ -URLS = [ - "https://helpx.adobe.com/coldfusion/kb/coldfusion-10-updates.html", - "https://helpx.adobe.com/coldfusion/kb/coldfusion-11-updates.html", - "https://helpx.adobe.com/coldfusion/kb/coldfusion-2016-updates.html", - "https://helpx.adobe.com/coldfusion/kb/coldfusion-2018-updates.html", - "https://helpx.adobe.com/coldfusion/kb/coldfusion-2021-updates.html", - "https://helpx.adobe.com/coldfusion/kb/coldfusion-2023-updates.html", -] - VERSION_AND_DATE_PATTERN = re.compile(r"Release Date[,|:]? (.*?)\).*?Build Number: (.*?)$", re.DOTALL | re.MULTILINE | re.IGNORECASE) @@ -31,8 +22,9 @@ FIXED_VERSIONS = { "2023.0.0": dates.date(2022, 5, 16), # https://coldfusion.adobe.com/2023/05/coldfusion2023-release/ } -with releasedata.ProductData("coldfusion") as product_data: - for changelog in http.fetch_urls(URLS): +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + changelog = http.fetch_url(config.url) changelog_soup = BeautifulSoup(changelog.text, features="html5lib") for p in changelog_soup.findAll("div", class_="text"): @@ -42,4 +34,4 @@ with releasedata.ProductData("coldfusion") as product_data: version = version_str.strip().replace(",", ".") # 11,0,0,289974 -> 11.0.0.289974 product_data.declare_version(version, date) - product_data.declare_versions(FIXED_VERSIONS) + product_data.declare_versions(FIXED_VERSIONS) diff --git a/src/common/endoflife.py b/src/common/endoflife.py index 8c368363..0f2bff37 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -2,6 +2,7 @@ import itertools import logging import os import re +import sys from datetime import datetime from pathlib import Path @@ -21,12 +22,11 @@ class AutoConfig: def __init__(self, product: str, data: dict) -> None: self.product = product self.data = data - self.method = next(key for key in data if key not in ("template", "regex", "regex_exclude")) + self.method = next(key for key in data) # assuming the method is always the first key in the dictionary + self.script = f"{self.method}.py" self.url = data[self.method] self.version_template = Template(data.get("template", DEFAULT_VERSION_TEMPLATE)) - self.script = f"{self.url}.py" if self.method == "custom" else f"{self.method}.py" - regexes_include = data.get("regex", DEFAULT_VERSION_REGEX) regexes_include = regexes_include if isinstance(regexes_include, list) else [regexes_include] self.include_version_patterns = [re.compile(r, re.MULTILINE) for r in regexes_include] @@ -127,16 +127,21 @@ def list_products(products_filter: str = None) -> list[ProductFrontmatter]: def list_configs(products_filter: str = None, methods_filter: str = None, urls_filter: str = None) -> list[AutoConfig]: + """Return a list of auto configs, filtering by product name, method, and URL.""" products = list_products(products_filter) configs_by_product = [p.auto_configs(methods_filter, urls_filter) for p in products] return list(itertools.chain.from_iterable(configs_by_product)) # flatten the list of lists -"""Convert a string to a valid endoflife.date identifier.""" + +def list_configs_from_argv() -> list[AutoConfig]: + products_filter = sys.argv[1] if len(sys.argv) > 1 else None + methods_filter = sys.argv[2] if len(sys.argv) > 1 else None + urls_filter = sys.argv[3] if len(sys.argv) > 2 else None + return list_configs(products_filter, methods_filter, urls_filter) + + def to_identifier(s: str) -> str: + """Convert a string to a valid endoflife.date identifier.""" identifier = s.strip().lower() identifier = identifier.replace(" ", "-") return re.sub(r"[^a-z0-9.\-+_]", "", identifier) - - - - return s.lower().replace(" ", "_").replace(".", "_").replace("/", "_") diff --git a/src/cos.py b/src/cos.py index 0c986e52..ab276f65 100644 --- a/src/cos.py +++ b/src/cos.py @@ -2,7 +2,7 @@ import datetime import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata MILESTONE_PATTERN = re.compile(r'COS \d+ LTS') VERSION_PATTERN = re.compile(r"^(cos-\d+-\d+-\d+-\d+)") @@ -14,30 +14,31 @@ def parse_date(date_text: str) -> datetime: return dates.parse_date(date_text) -with releasedata.ProductData("cos") as product_data: - main = http.fetch_url("https://cloud.google.com/container-optimized-os/docs/release-notes/") - main_soup = BeautifulSoup(main.text, features="html5lib") - milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)] +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + main = http.fetch_url(config.url) + main_soup = BeautifulSoup(main.text, features="html5lib") + milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)] - milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones] - for milestone in http.fetch_urls(milestones_urls): - milestone_soup = BeautifulSoup(milestone.text, features="html5lib") - for article in milestone_soup.find_all('article', class_='devsite-article'): - for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse - version_str = heading.get('data-text') - version_match = VERSION_PATTERN.match(version_str) - if not version_match: - continue + milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones] + for milestone in http.fetch_urls(milestones_urls): + milestone_soup = BeautifulSoup(milestone.text, features="html5lib") + for article in milestone_soup.find_all('article', class_='devsite-article'): + for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse + version_str = heading.get('data-text') + version_match = VERSION_PATTERN.match(version_str) + if not version_match: + continue - try: # 1st row is the header, so pick the first td in the 2nd row - date_str = heading.find_next('tr').find_next('tr').find_next('td').text - except AttributeError: # In some older releases, it is mentioned as Date: [Date] - date_str = heading.find_next('i').text + try: # 1st row is the header, so pick the first td in the 2nd row + date_str = heading.find_next('tr').find_next('tr').find_next('td').text + except AttributeError: # In some older releases, it is mentioned as Date: [Date] + date_str = heading.find_next('i').text - try: - date = parse_date(date_str) - except ValueError: # for some h3, the date is in the previous h2 - date_str = heading.find_previous('h2').get('data-text') - date = parse_date(date_str) + try: + date = parse_date(date_str) + except ValueError: # for some h3, the date is in the previous h2 + date_str = heading.find_previous('h2').get('data-text') + date = parse_date(date_str) - product_data.declare_version(version_match.group(1), date) + product_data.declare_version(version_match.group(1), date) diff --git a/src/couchbase-server.py b/src/couchbase-server.py index 6d6108cf..b2eee8bb 100644 --- a/src/couchbase-server.py +++ b/src/couchbase-server.py @@ -1,7 +1,7 @@ -import re +import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches versions from release notes of each minor version on docs.couchbase.com. @@ -9,8 +9,6 @@ Dates are not available for all versions, so they must be set manually in some c Moreover dates are not accurate (only month and year are provided), so they are set to the last day of the month. """ -URLS = "https://docs.couchbase.com/server" -VERSION_AND_DATE_PATTERN = re.compile(r"^Release (?P\d+\.\d+(\.\d+)?) \((?P.+)\)$") MANUAL_VERSIONS = { "6.0.0": dates.date(2018, 10, 31), # https://www.couchbase.com/blog/announcing-couchbase-6-0/ "6.0.1": dates.date(2019, 2, 15), # https://web.archive.org/web/20190307191211/https://docs.couchbase.com/server/6.0/release-notes/relnotes.html @@ -18,22 +16,26 @@ MANUAL_VERSIONS = { "7.2.0": dates.date(2023, 6, 1), # https://www.couchbase.com/blog/couchbase-capella-spring-release-72/ } -with releasedata.ProductData("couchbase-server") as product_data: - main = http.fetch_url(f"{URLS}/current/install/install-intro.html") - main_soup = BeautifulSoup(main.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + main = http.fetch_url(f"{config.url}/current/install/install-intro.html") + main_soup = BeautifulSoup(main.text, features="html5lib") - minor_versions = [options.attrs["value"] for options in main_soup.find(class_="version_list").find_all("option")] - minor_version_urls = [f"{URLS}/{minor}/release-notes/relnotes.html" for minor in minor_versions] + minor_versions = [options.attrs["value"] for options in main_soup.find(class_="version_list").find_all("option")] + minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions] - for minor_version in http.fetch_urls(minor_version_urls): - minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib") + for minor_version in http.fetch_urls(minor_version_urls): + minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib") + + for title in minor_version_soup.find_all("h2"): + match = config.first_match(title.get_text().strip()) + if not match: + logging.info(f"Skipping {title}, does not match any regex") + continue - for title in minor_version_soup.find_all("h2"): - match = VERSION_AND_DATE_PATTERN.match(title.get_text().strip()) - if match: version = match["version"] version = f"{version}.0" if len(version.split(".")) == 2 else version date = dates.parse_month_year_date(match['date']) product_data.declare_version(version, date) - product_data.declare_versions(MANUAL_VERSIONS) + product_data.declare_versions(MANUAL_VERSIONS) diff --git a/src/debian.py b/src/debian.py index e469c969..b54bdefc 100644 --- a/src/debian.py +++ b/src/debian.py @@ -1,7 +1,7 @@ from pathlib import Path from subprocess import run -from common import dates, releasedata +from common import dates, endoflife, releasedata from common.git import Git """Fetch Debian versions by parsing news in www.debian.org source repository.""" @@ -40,11 +40,11 @@ def extract_point_versions(p: releasedata.ProductData, repo_dir: Path) -> None: (date, version) = line.split(' ') p.declare_version(version, dates.parse_date(date)) +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + git = Git(config.url) + git.setup() + git.checkout("master", file_list=["english/News"]) -with releasedata.ProductData("debian") as product_data: - git = Git("https://salsa.debian.org/webmaster-team/webwml.git") - git.setup() - git.checkout("master", file_list=["english/News"]) - - extract_major_versions(product_data, git.repo_dir) - extract_point_versions(product_data, git.repo_dir) + extract_major_versions(product_data, git.repo_dir) + extract_point_versions(product_data, git.repo_dir) diff --git a/src/distrowatch.py b/src/distrowatch.py index 300bf371..a6206721 100644 --- a/src/distrowatch.py +++ b/src/distrowatch.py @@ -1,13 +1,7 @@ -import sys - from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata -METHOD = 'distrowatch' - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}") soup = BeautifulSoup(response.text, features="html5lib") diff --git a/src/docker_hub.py b/src/docker_hub.py index 86db332c..0369e98b 100644 --- a/src/docker_hub.py +++ b/src/docker_hub.py @@ -1,14 +1,9 @@ -import sys - from common import dates, endoflife, http, releasedata """Fetches releases from the Docker Hub API. Unfortunately images creation date cannot be retrieved, so we had to use the tag_last_pushed field instead.""" -METHOD = "docker_hub" - - def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None: data = http.fetch_url(url).json() @@ -22,8 +17,6 @@ def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str fetch_releases(p, c, data["next"]) -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1") diff --git a/src/firefox.py b/src/firefox.py index 6faae772..da570406 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -1,7 +1,7 @@ import urllib.parse from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetch Firefox versions with their dates from https://www.mozilla.org/. @@ -20,14 +20,15 @@ The script will need to be updated if someday those conditions are not met.""" MAX_VERSIONS_LIMIT = 100 -with releasedata.ProductData("firefox") as product_data: - releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/") - releases_soup = BeautifulSoup(releases_page.text, features="html5lib") - releases_list = releases_soup.find_all("ol", class_="c-release-list") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + releases_page = http.fetch_url(config.url) + releases_soup = BeautifulSoup(releases_page.text, features="html5lib") + releases_list = releases_soup.find_all("ol", class_="c-release-list") - release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")] - for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]): - version = release_notes.url.split("/")[-3] - release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") - date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25 - product_data.declare_version(version, dates.parse_date(date_str)) + release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")] + for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]): + version = release_notes.url.split("/")[-3] + release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib") + date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25 + product_data.declare_version(version, dates.parse_date(date_str)) diff --git a/src/ghc-wiki.py b/src/ghc-wiki.py index 161baee3..fc8e4be0 100644 --- a/src/ghc-wiki.py +++ b/src/ghc-wiki.py @@ -14,7 +14,7 @@ References: import re from typing import Any, Generator, Iterator -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata def parse_markdown_tables(lineiter: Iterator[str]) -> Generator[list[list[Any]], Any, None]: @@ -50,40 +50,41 @@ def maybe_markdown_table_row(line: str) -> list[str] | None: return None return [x.strip() for x in line.strip('|').split('|')] -with releasedata.ProductData("ghc") as product: - resp = http.fetch_url("https://gitlab.haskell.org/api/v4/projects/1/wikis/GHC-Status") - resp.raise_for_status() - data = resp.json() - assert data['title'] == "GHC Status" - assert data['format'] == "markdown" - md = data['content'].splitlines() +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product: + resp = http.fetch_url(config.url) + resp.raise_for_status() + data = resp.json() + assert data['title'] == "GHC Status" + assert data['format'] == "markdown" + md = data['content'].splitlines() - #-- Parse tables out of the wiki text. At time of writing, the script expects exactly two: - #-- 1. "Most recent major" with 5 columns - #-- 2. "All released versions" with 5 columns - [series_table, patch_level] = parse_markdown_tables(iter(md)) + #-- Parse tables out of the wiki text. At time of writing, the script expects exactly two: + #-- 1. "Most recent major" with 5 columns + #-- 2. "All released versions" with 5 columns + [series_table, patch_level] = parse_markdown_tables(iter(md)) - for row in series_table[1:]: - [series, _download_link, _most_recent, next_planned, status] = row - if status == "Next major release": - continue + for row in series_table[1:]: + [series, _download_link, _most_recent, next_planned, status] = row + if status == "Next major release": + continue - series = series.split(' ') [0] - series = series.replace('\\.', '.') - if series == "Nightlies": - continue - status = status.lower() + series = series.split(' ')[0] + series = series.replace('\\.', '.') + if series == "Nightlies": + continue + status = status.lower() - #-- See discussion in https://github.com/endoflife-date/endoflife.date/pull/6287 - r = product.get_release(series) - #-- The clearest semblance of an EOL signal we get - r.set_eol("not recommended for use" in status or ":red_circle:" in status) - #-- eoasColumn label is "Further releases planned" - r.set_eoas(any(keyword in next_planned for keyword in ("None", "N/A"))) + #-- See discussion in https://github.com/endoflife-date/endoflife.date/pull/6287 + r = product.get_release(series) + #-- The clearest semblance of an EOL signal we get + r.set_eol("not recommended for use" in status or ":red_circle:" in status) + #-- eoasColumn label is "Further releases planned" + r.set_eoas(any(keyword in next_planned for keyword in ("None", "N/A"))) - for row in patch_level[1:]: - [milestone, _download_link, date, _ticket, _manager] = row - version = milestone.lstrip('%') - version = version.split(' ') [0] - date = dates.parse_date(date) - product.declare_version(version, date) + for row in patch_level[1:]: + [milestone, _download_link, date, _ticket, _manager] = row + version = milestone.lstrip('%') + version = version.split(' ') [0] + date = dates.parse_date(date) + product.declare_version(version, date) diff --git a/src/git.py b/src/git.py index c3253d9e..35c5c024 100644 --- a/src/git.py +++ b/src/git.py @@ -1,15 +1,9 @@ -import sys - from common import dates, endoflife, releasedata from common.git import Git """Fetches versions from tags in a git repository. This replace the old update.rb script.""" -METHOD = 'git' - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: git = Git(config.url) git.setup(bare=True) diff --git a/src/github_releases.py b/src/github_releases.py index 623dc458..730e9116 100644 --- a/src/github_releases.py +++ b/src/github_releases.py @@ -1,5 +1,3 @@ -import sys - from common import dates, endoflife, github, releasedata """Fetches versions from GitHub releases using the GraphQL API and the GitHub CLI. @@ -7,12 +5,7 @@ from common import dates, endoflife, github, releasedata Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manage pagination and authentication. """ -METHOD = "github_releases" - - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: for release in github.fetch_releases(config.url): if release.is_prerelease: diff --git a/src/github_tags.py b/src/github_tags.py index fce22a68..872857b0 100644 --- a/src/github_tags.py +++ b/src/github_tags.py @@ -1,5 +1,3 @@ -import sys - from common import dates, endoflife, github, releasedata """Fetches versions from GitHub tags using the GraphQL API and the GitHub CLI. @@ -7,12 +5,7 @@ from common import dates, endoflife, github, releasedata Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manage pagination and authentication. """ -METHOD = "github_tags" - - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: for tag in github.fetch_tags(config.url): version_str = tag.name diff --git a/src/google-kubernetes-engine.py b/src/google-kubernetes-engine.py index c751cb93..5d2d510d 100644 --- a/src/google-kubernetes-engine.py +++ b/src/google-kubernetes-engine.py @@ -1,7 +1,7 @@ import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata # https://regex101.com/r/zPxBqT/1 VERSION_PATTERN = re.compile(r"\d.\d+\.\d+-gke\.\d+") @@ -12,17 +12,18 @@ URL_BY_PRODUCT = { "google-kubernetes-engine-rapid": "https://cloud.google.com/kubernetes-engine/docs/release-notes-rapid", } -for product_name, url in URL_BY_PRODUCT.items(): - with releasedata.ProductData(product_name) as product_data: - relnotes = http.fetch_url(url) - relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons + for product_name, url in URL_BY_PRODUCT.items(): + with releasedata.ProductData(product_name) as product_data: + relnotes = http.fetch_url(url) + relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib") - for section in relnotes_soup.find_all('section', class_='releases'): - for h2 in section.find_all('h2'): # h2 contains the date - date = dates.parse_date(h2.get('data-text')) + for section in relnotes_soup.find_all('section', class_='releases'): + for h2 in section.find_all('h2'): # h2 contains the date + date = dates.parse_date(h2.get('data-text')) - next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date - for li in next_div.find_all('li'): - if "versions are now available" in li.text: - for version in VERSION_PATTERN.findall(li.find('ul').text): - product_data.declare_version(version, date) + next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date + for li in next_div.find_all('li'): + if "versions are now available" in li.text: + for version in VERSION_PATTERN.findall(li.find('ul').text): + product_data.declare_version(version, date) diff --git a/src/graalvm.py b/src/graalvm.py index d2ab3d30..ec1b7ab4 100644 --- a/src/graalvm.py +++ b/src/graalvm.py @@ -1,12 +1,9 @@ import logging -import sys from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, "graalvm", m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: response = http.fetch_url(config.url) html = BeautifulSoup(response.text, features="html5lib") diff --git a/src/haproxy.py b/src/haproxy.py index f550ec7c..54e7c279 100644 --- a/src/haproxy.py +++ b/src/haproxy.py @@ -1,30 +1,31 @@ import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$") DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$") # https://regex101.com/r/1JCnFC/1 -with releasedata.ProductData("haproxy") as product_data: - # First, get all minor releases from the download page - download = http.fetch_url('https://www.haproxy.org/download/') - download_soup = BeautifulSoup(download.text, features="html5lib") - minor_versions = [] - for link in download_soup.select("a"): - minor_version_match = CYCLE_PATTERN.match(link.attrs["href"]) - if not minor_version_match: - continue +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + # First, get all minor releases from the download page + download = http.fetch_url(config.url) + download_soup = BeautifulSoup(download.text, features="html5lib") + minor_versions = [] + for link in download_soup.select("a"): + minor_version_match = CYCLE_PATTERN.match(link.attrs["href"]) + if not minor_version_match: + continue - minor_version = minor_version_match.groups()[0] - if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src - minor_versions.append(minor_version) + minor_version = minor_version_match.groups()[0] + if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src + minor_versions.append(minor_version) - # Then, fetches all versions from each changelog - changelog_urls = [f"https://www.haproxy.org/download/{minor_version}/src/CHANGELOG" for minor_version in minor_versions] - for changelog in http.fetch_urls(changelog_urls): - for line in changelog.text.split('\n'): - date_and_version_match = DATE_AND_VERSION_PATTERN.match(line) - if date_and_version_match: - year, month, day, version = date_and_version_match.groups() - product_data.declare_version(version, dates.date(int(year), int(month), int(day))) + # Then, fetches all versions from each changelog + changelog_urls = [f"{config.url}{minor_version}/src/CHANGELOG" for minor_version in minor_versions] + for changelog in http.fetch_urls(changelog_urls): + for line in changelog.text.split('\n'): + date_and_version_match = DATE_AND_VERSION_PATTERN.match(line) + if date_and_version_match: + year, month, day, version = date_and_version_match.groups() + product_data.declare_version(version, dates.date(int(year), int(month), int(day))) diff --git a/src/ibm-aix.py b/src/ibm-aix.py index 71fbd3c3..41b9d0e8 100644 --- a/src/ibm-aix.py +++ b/src/ibm-aix.py @@ -1,14 +1,9 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata -URLS = [ - # Disable, it causes too many timeouts / errors - # "https://web.archive.org/web/20210123024247/https://www.ibm.com/support/pages/aix-support-lifecycle-information", - "https://www.ibm.com/support/pages/aix-support-lifecycle-information", -] - -with releasedata.ProductData("ibm-aix") as product_data: - for page in http.fetch_urls(URLS): +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + page = http.fetch_url(config.url) page_soup = BeautifulSoup(page.text, features="html5lib") for release_table in page_soup.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"): diff --git a/src/kuma.py b/src/kuma.py index 00540c77..88c649b3 100644 --- a/src/kuma.py +++ b/src/kuma.py @@ -1,7 +1,7 @@ import logging import yaml -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetch version data for Kuma from https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml. """ @@ -10,25 +10,26 @@ RELEASE_FIELD = 'release' RELEASE_DATE_FIELD = 'releaseDate' EOL_FIELD = 'endOfLifeDate' -with releasedata.ProductData("kuma") as product_data: - yml_response = http.fetch_url("https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml") - versions_data = yaml.safe_load(yml_response.text) +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + yml_response = http.fetch_url(config.url) + versions_data = yaml.safe_load(yml_response.text) - # Iterate through the versions and their associated dates - for version_info in versions_data: - release_name = version_info[RELEASE_FIELD] - if not release_name.endswith('.x'): - logging.info(f"skipping release with name {release_name}: does not end with '.x'") - continue + # Iterate through the versions and their associated dates + for version_info in versions_data: + release_name = version_info[RELEASE_FIELD] + if not release_name.endswith('.x'): + logging.info(f"skipping release with name {release_name}: does not end with '.x'") + continue - if RELEASE_DATE_FIELD not in version_info or EOL_FIELD not in version_info: - logging.info(f"skipping release with name {release_name}: does not contain {RELEASE_DATE_FIELD} or {EOL_FIELD} fields") - continue + if RELEASE_DATE_FIELD not in version_info or EOL_FIELD not in version_info: + logging.info(f"skipping release with name {release_name}: does not contain {RELEASE_DATE_FIELD} or {EOL_FIELD} fields") + continue - release = product_data.get_release(release_name.replace('.x', '')) + release = product_data.get_release(release_name.replace('.x', '')) - release_date = dates.parse_date(version_info[RELEASE_DATE_FIELD]) - release.set_field('releaseDate', release_date) + release_date = dates.parse_date(version_info[RELEASE_DATE_FIELD]) + release.set_field('releaseDate', release_date) - eol = dates.parse_date(version_info[EOL_FIELD]) - release.set_field('eol', eol) + eol = dates.parse_date(version_info[EOL_FIELD]) + release.set_field('eol', eol) diff --git a/src/libreoffice.py b/src/libreoffice.py index f048f5bc..e5af6c03 100644 --- a/src/libreoffice.py +++ b/src/libreoffice.py @@ -1,27 +1,29 @@ -import re +import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/""" -VERSION_PATTERN = re.compile(r"^(?P\d+(\.\d+)*)\/$") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") -with releasedata.ProductData("libreoffice") as product_data: - response = http.fetch_url("https://downloadarchive.documentfoundation.org/libreoffice/old/") - soup = BeautifulSoup(response.text, features="html5lib") + for table in soup.find_all("table"): + for row in table.find_all("tr")[1:]: + cells = row.find_all("td") + if len(cells) < 4: + continue - for table in soup.find_all("table"): - for row in table.find_all("tr")[1:]: - cells = row.find_all("td") - if len(cells) < 4: - continue + version_str = cells[1].get_text().strip() + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping version {version_str} as it does not match any known version pattern") + continue + version = config.render(version_match) - version_str = cells[1].get_text().strip() - date_str = cells[2].get_text().strip() - version_match = VERSION_PATTERN.match(version_str) - - if version_match: - version = version_match["version"] + date_str = cells[2].get_text().strip() date = dates.parse_datetime(date_str) + product_data.declare_version(version, date) diff --git a/src/looker.py b/src/looker.py index b23212ec..91787bc2 100644 --- a/src/looker.py +++ b/src/looker.py @@ -2,31 +2,32 @@ import re import xml.dom.minidom from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetch Looker versions from the Google Cloud release notes RSS feed. """ ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IGNORECASE) -VERSION_PATTERN = re.compile(r"Looker\s+(?P\d+\.\d+)", re.IGNORECASE) -with releasedata.ProductData("looker") as product_data: - response = http.fetch_url("https://cloud.google.com/feeds/looker-release-notes.xml") - rss = xml.dom.minidom.parseString(response.text) +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + rss = xml.dom.minidom.parseString(response.text) - for item in rss.getElementsByTagName("entry"): - content = item.getElementsByTagName("content")[0].firstChild.nodeValue - content_soup = BeautifulSoup(content, features="html5lib") + for item in rss.getElementsByTagName("entry"): + content = item.getElementsByTagName("content")[0].firstChild.nodeValue + content_soup = BeautifulSoup(content, features="html5lib") - announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN) - if not announcement_match: - continue + announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN) + if not announcement_match: + continue - version_match = VERSION_PATTERN.search(announcement_match.parent.get_text()) - if not version_match: - continue + version_match = config.first_match(announcement_match.parent.get_text()) + if not version_match: + continue + version = config.render(version_match) - version = version_match.group("version") - date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue - date = dates.parse_datetime(date_str) - product_data.declare_version(version, date) + date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue + date = dates.parse_datetime(date_str) + + product_data.declare_version(version, date) diff --git a/src/lua.py b/src/lua.py index 3c87efde..9019b34d 100644 --- a/src/lua.py +++ b/src/lua.py @@ -1,24 +1,25 @@ import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches Lua releases from lua.org.""" RELEASED_AT_PATTERN = re.compile(r"Lua\s*(?P\d+\.\d+)\s*was\s*released\s*on\s*(?P\d+\s*\w+\s*\d{4})") VERSION_PATTERN = re.compile(r"(?P\d+\.\d+\.\d+),\s*released\s*on\s*(?P\d+\s*\w+\s*\d{4})") -with releasedata.ProductData("lua") as product_data: - page = http.fetch_url("https://www.lua.org/versions.html") - soup = BeautifulSoup(page.text, 'html.parser') - page_text = soup.text # HTML is broken, no way to parse it with beautifulsoup +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + page = http.fetch_url(config.url) + soup = BeautifulSoup(page.text, 'html.parser') + page_text = soup.text # HTML is broken, no way to parse it with beautifulsoup - for release_match in RELEASED_AT_PATTERN.finditer(page_text): - release = release_match.group('release') - release_date = dates.parse_date(release_match.group('release_date')) - product_data.get_release(release).set_release_date(release_date) + for release_match in RELEASED_AT_PATTERN.finditer(page_text): + release = release_match.group('release') + release_date = dates.parse_date(release_match.group('release_date')) + product_data.get_release(release).set_release_date(release_date) - for version_match in VERSION_PATTERN.finditer(page_text): - version = version_match.group('version') - version_date = dates.parse_date(version_match.group('version_date')) - product_data.declare_version(version, version_date) + for version_match in VERSION_PATTERN.finditer(page_text): + version = version_match.group('version') + version_date = dates.parse_date(version_match.group('version_date')) + product_data.declare_version(version, version_date) diff --git a/src/maven.py b/src/maven.py index ef5ce614..79cf7add 100644 --- a/src/maven.py +++ b/src/maven.py @@ -1,13 +1,8 @@ -import sys from datetime import datetime, timezone from common import endoflife, http, releasedata -METHOD = "maven" - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: start = 0 group_id, artifact_id = config.url.split("/") diff --git a/src/netbsd.py b/src/netbsd.py index 88e699b7..c07e691c 100644 --- a/src/netbsd.py +++ b/src/netbsd.py @@ -1,33 +1,34 @@ import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches NetBSD versions and EOL information from https://www.netbsd.org/.""" -with releasedata.ProductData('netbsd') as product_data: - response = http.fetch_url('https://www.netbsd.org/releases/formal.html') - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - for row in soup.select('table tbody tr'): - cells = [cell.get_text(strip=True) for cell in row.select('td')] + for row in soup.select('table tbody tr'): + cells = [cell.get_text(strip=True) for cell in row.select('td')] - version = cells[0] - if not version.startswith('NetBSD'): - logging.info(f"Skipping row {cells}, version does not start with 'NetBSD'") - continue - version = version.split(' ')[1] + version = cells[0] + if not version.startswith('NetBSD'): + logging.info(f"Skipping row {cells}, version does not start with 'NetBSD'") + continue + version = version.split(' ')[1] - try: - release_date = dates.parse_date(cells[1]) - product_data.declare_version(version, release_date) - except ValueError: - logging.warning(f"Skipping row {cells}, could not parse release date") + try: + release_date = dates.parse_date(cells[1]) + product_data.declare_version(version, release_date) + except ValueError: + logging.warning(f"Skipping row {cells}, could not parse release date") - eol_str = cells[2] - if not eol_str: - continue + eol_str = cells[2] + if not eol_str: + continue - eol = dates.parse_date(eol_str) - major_version = version.split('.')[0] - product_data.get_release(major_version).set_eol(eol) + eol = dates.parse_date(eol_str) + major_version = version.split('.')[0] + product_data.get_release(major_version).set_eol(eol) diff --git a/src/npm.py b/src/npm.py index 377051da..76cfcdac 100644 --- a/src/npm.py +++ b/src/npm.py @@ -1,12 +1,6 @@ -import sys - from common import dates, endoflife, http, releasedata -METHOD = "npm" - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json() for version_str in data["versions"]: diff --git a/src/nutanix.py b/src/nutanix.py index d4266db4..aa5e24a4 100644 --- a/src/nutanix.py +++ b/src/nutanix.py @@ -1,14 +1,8 @@ -import sys - from common import dates, endoflife, http, releasedata """Fetch Nutanix products versions from https://portal.nutanix.com/api/v1.""" -METHOD = 'nutanix' - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: url = f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}" data = http.fetch_url(url).json() diff --git a/src/oracle-jdk.py b/src/oracle-jdk.py index 21feabb8..b7da1346 100644 --- a/src/oracle-jdk.py +++ b/src/oracle-jdk.py @@ -1,22 +1,23 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetch Java versions from https://www.java.com/releases/. This script is using requests-html because the page needs JavaScript to render correctly.""" -with releasedata.ProductData("oracle-jdk") as product_data: - content = http.fetch_javascript_url('https://www.java.com/releases/', wait_until='networkidle') - soup = BeautifulSoup(content, 'html.parser') +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + content = http.fetch_javascript_url(config.url, wait_until='networkidle') + soup = BeautifulSoup(content, 'html.parser') - previous_date = None - for row in soup.select('#released tr'): - version_cell = row.select_one('td.anchor') - if version_cell: - version = version_cell.attrs['id'] - date_str = row.select('td')[1].text - date = dates.parse_date(date_str) if date_str else previous_date - product_data.declare_version(version, date) - previous_date = date + previous_date = None + for row in soup.select('#released tr'): + version_cell = row.select_one('td.anchor') + if version_cell: + version = version_cell.attrs['id'] + date_str = row.select('td')[1].text + date = dates.parse_date(date_str) if date_str else previous_date + product_data.declare_version(version, date) + previous_date = date - product_data.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed + product_data.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed diff --git a/src/pan-os.py b/src/pan-os.py index be08ec59..8e8a37d3 100644 --- a/src/pan-os.py +++ b/src/pan-os.py @@ -1,11 +1,12 @@ -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches pan-os versions from https://github.com/mrjcap/panos-versions/.""" -with releasedata.ProductData("pan-os") as product_data: - versions = http.fetch_url("https://raw.githubusercontent.com/mrjcap/panos-versions/master/PaloAltoVersions.json").json() +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + versions = http.fetch_url(config.url).json() - for version in versions: - name = version['version'] - date = dates.parse_datetime(version['released-on']) - product_data.declare_version(name, date) + for version in versions: + name = version['version'] + date = dates.parse_datetime(version['released-on']) + product_data.declare_version(name, date) diff --git a/src/php.py b/src/php.py index 34d78b4a..ad325757 100644 --- a/src/php.py +++ b/src/php.py @@ -1,16 +1,15 @@ from common import dates, endoflife, http, releasedata -MAIN_URL = "https://www.php.net/releases/index.php?json&max=-1" +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + # Fetch major versions + latest_by_major = http.fetch_url(config.url).json() + major_version_urls = [f"{config.url}&version={major_version}" for major_version in latest_by_major] -with releasedata.ProductData("php") as product_data: - # Fetch major versions - latest_by_major = http.fetch_url(MAIN_URL).json() - major_version_urls = [f"{MAIN_URL}&version={major_version}" for major_version in latest_by_major] - - # Fetch all versions for major versions - for major_versions_response in http.fetch_urls(major_version_urls): - major_versions_data = major_versions_response.json() - for version in major_versions_data: - if endoflife.DEFAULT_VERSION_PATTERN.match(version): # exclude versions such as "3.0.x (latest)" - date = dates.parse_date(major_versions_data[version]["date"]) - product_data.declare_version(version, date) + # Fetch all versions for major versions + for major_versions_response in http.fetch_urls(major_version_urls): + major_versions_data = major_versions_response.json() + for version in major_versions_data: + if config.first_match(version): # exclude versions such as "3.0.x (latest)" + date = dates.parse_date(major_versions_data[version]["date"]) + product_data.declare_version(version, date) diff --git a/src/plesk.py b/src/plesk.py index c0d5d6a9..34712b8b 100644 --- a/src/plesk.py +++ b/src/plesk.py @@ -1,23 +1,24 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches versions from Plesk's change log. Only 18.0.20.3 and later will be picked up, as the format of the change log for 18.0.20 and 18.0.19 are different and there is no entry for GA of version 18.0.18 and older.""" -with releasedata.ProductData("plesk") as product_data: - response = http.fetch_url("https://docs.plesk.com/release-notes/obsidian/change-log") - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - for release in soup.find_all("div", class_="changelog-entry--obsidian"): - version = release.h2.text.strip() - if not version.startswith('Plesk Obsidian 18'): - continue + for release in soup.find_all("div", class_="changelog-entry--obsidian"): + version = release.h2.text.strip() + if not version.startswith('Plesk Obsidian 18'): + continue - version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '') - if ' ' in version: - continue + version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '') + if ' ' in version: + continue - date = dates.parse_date(release.p.text) - product_data.declare_version(version, date) + date = dates.parse_date(release.p.text) + product_data.declare_version(version, date) diff --git a/src/pypi.py b/src/pypi.py index 6ce57111..80f5dedb 100644 --- a/src/pypi.py +++ b/src/pypi.py @@ -1,12 +1,6 @@ -import sys - from common import dates, endoflife, http, releasedata -METHOD = "pypi" - -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json() diff --git a/src/rds.py b/src/rds.py index c0ed20e1..ffc1c153 100644 --- a/src/rds.py +++ b/src/rds.py @@ -1,7 +1,7 @@ -import re +import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches Amazon RDS versions from the version management pages on AWS docs. @@ -9,16 +9,9 @@ Pages parsed by this script are expected to have version tables with a version i in the third column (usually named 'RDS release date'). """ -PRODUCTS = { - "amazon-rds-mysql": "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MySQL.Concepts.VersionMgmt.html", - "amazon-rds-postgresql": "https://docs.aws.amazon.com/AmazonRDS/latest/PostgreSQLReleaseNotes/postgresql-release-calendar.html", - "amazon-rds-mariadb": "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MariaDB.Concepts.VersionMgmt.html", -} -VERSION_REGEX = re.compile(r"(?P\d+(?:\.\d+)*)", flags=re.IGNORECASE) # https://regex101.com/r/BY1vwV/1 - -for product_name, url in PRODUCTS.items(): - with releasedata.ProductData(product_name) as product_data: - response = http.fetch_url(url) +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) soup = BeautifulSoup(response.text, features="html5lib") for table in soup.find_all("table"): @@ -27,8 +20,12 @@ for product_name, url in PRODUCTS.items(): if len(columns) <= 3: continue - version_match = VERSION_REGEX.search(columns[0].text.strip()) - if version_match: - version = version_match.group("version") - date = dates.parse_date(columns[2].text) - product_data.declare_version(version, date) + version_text = columns[0].text.strip() + version_match = config.first_match(version_text) + if not version_match: + logging.warning(f"Skipping {version_text}: does not match any version pattern") + continue + + version = config.render(version_match) + date = dates.parse_date(columns[2].text) + product_data.declare_version(version, date) diff --git a/src/red-hat-jboss-eap-7.py b/src/red-hat-jboss-eap-7.py index 965ad5f9..3f422f92 100644 --- a/src/red-hat-jboss-eap-7.py +++ b/src/red-hat-jboss-eap-7.py @@ -1,41 +1,42 @@ import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches RedHat JBoss EAP version data for JBoss 7""" -with releasedata.ProductData("red-hat-jboss-eap") as product_data: - response = http.fetch_url("https://access.redhat.com/articles/2332721") - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - for h4 in soup.find_all("h4"): - title = h4.get_text(strip=True) - if not title.startswith("7."): - continue - - release = title[:3] - version_table = h4.find_next("table") - if not version_table: - logging.warning(f"Version table not found for {title}") - continue - - for (i, row) in enumerate(version_table.find_all("tr")): - if i == 0: # Skip the first row (header) + for h4 in soup.find_all("h4"): + title = h4.get_text(strip=True) + if not title.startswith("7."): continue - columns = row.find_all("td") - # Get the version name without the content of the tag, if present - name_str = ''.join([content for content in columns[0].contents if isinstance(content, str)]).strip() - date_str = columns[1].text.strip() - - if date_str == "TBD" or date_str == "TDB": # Placeholder for a future release + release = title[:3] + version_table = h4.find_next("table") + if not version_table: + logging.warning(f"Version table not found for {title}") continue - if date_str == "[July 21, 2021][d7400]": - # Temporary fix for a typo in the source page - date_str = "July 21 2021" + for (i, row) in enumerate(version_table.find_all("tr")): + if i == 0: # Skip the first row (header) + continue - name = name_str.replace("GA", "Update 0").replace("Update ", release + ".") - date = dates.parse_date(date_str) - product_data.declare_version(name, date) + columns = row.find_all("td") + # Get the version name without the content of the tag, if present + name_str = ''.join([content for content in columns[0].contents if isinstance(content, str)]).strip() + date_str = columns[1].text.strip() + + if date_str == "TBD" or date_str == "TDB": # Placeholder for a future release + continue + + if date_str == "[July 21, 2021][d7400]": + # Temporary fix for a typo in the source page + date_str = "July 21 2021" + + name = name_str.replace("GA", "Update 0").replace("Update ", release + ".") + date = dates.parse_date(date_str) + product_data.declare_version(name, date) diff --git a/src/red-hat-jboss-eap-8.py b/src/red-hat-jboss-eap-8.py index 545582e2..515f81ea 100644 --- a/src/red-hat-jboss-eap-8.py +++ b/src/red-hat-jboss-eap-8.py @@ -1,20 +1,21 @@ import re from xml.dom.minidom import parseString -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches the latest RedHat JBoss EAP version data for JBoss 8.0""" -with releasedata.ProductData("red-hat-jboss-eap") as product_data: - response = http.fetch_url("https://maven.repository.redhat.com/ga/org/jboss/eap/channels/eap-8.0/maven-metadata.xml") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) - xml = parseString(response.text) - versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0] + xml = parseString(response.text) + versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0] - latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue - latest_name = "8.0." + re.match(r"^..(.*)\.GA", latest_str).group(1) + latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue + latest_name = "8.0." + re.match(r"^..(.*)\.GA", latest_str).group(1) - latest_date_str = versioning.getElementsByTagName("lastUpdated")[0].firstChild.nodeValue - latest_date = dates.parse_datetime(latest_date_str) + latest_date_str = versioning.getElementsByTagName("lastUpdated")[0].firstChild.nodeValue + latest_date = dates.parse_datetime(latest_date_str) - product_data.declare_version(latest_name, latest_date) + product_data.declare_version(latest_name, latest_date) diff --git a/src/red-hat-openshift.py b/src/red-hat-openshift.py index e6c6629b..44cb2520 100644 --- a/src/red-hat-openshift.py +++ b/src/red-hat-openshift.py @@ -1,6 +1,6 @@ import re -from common import dates, releasedata +from common import dates, endoflife, releasedata from common.git import Git """Fetches Red Hat OpenShift versions from the documentation's git repository""" @@ -10,25 +10,26 @@ VERSION_AND_DATE_PATTERN = re.compile( re.MULTILINE, ) -with releasedata.ProductData("red-hat-openshift") as product_data: - git = Git("https://github.com/openshift/openshift-docs.git") - git.setup() +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + git = Git(config.url) + git.setup() - # only fetch v4+ branches, because the format was different in openshift v3 - for branch in git.list_branches("refs/heads/enterprise-[4-9]*"): - branch_version = branch.split("-")[1] - file_version = branch_version.replace(".", "-") - release_notes_filename = f"release_notes/ocp-{file_version}-release-notes.adoc" - git.checkout(branch, file_list=[release_notes_filename]) + # only fetch v4+ branches, because the format was different in openshift v3 + for branch in git.list_branches("refs/heads/enterprise-[4-9]*"): + branch_version = branch.split("-")[1] + file_version = branch_version.replace(".", "-") + release_notes_filename = f"release_notes/ocp-{file_version}-release-notes.adoc" + git.checkout(branch, file_list=[release_notes_filename]) - release_notes_file = git.repo_dir / release_notes_filename - if not release_notes_file.exists(): - continue + release_notes_file = git.repo_dir / release_notes_filename + if not release_notes_file.exists(): + continue - with release_notes_file.open("rb") as f: - content = f.read().decode("utf-8") - for version, date_str in VERSION_AND_DATE_PATTERN.findall(content): - product_data.declare_version( - version.replace("{product-version}", branch_version), - dates.parse_date(date_str), - ) + with release_notes_file.open("rb") as f: + content = f.read().decode("utf-8") + for version, date_str in VERSION_AND_DATE_PATTERN.findall(content): + product_data.declare_version( + version.replace("{product-version}", branch_version), + dates.parse_date(date_str), + ) diff --git a/src/red-hat-satellite.py b/src/red-hat-satellite.py index 118254d8..60f11950 100644 --- a/src/red-hat-satellite.py +++ b/src/red-hat-satellite.py @@ -1,28 +1,30 @@ -import re +import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches Satellite versions from access.redhat.com. A few of the older versions, such as 'Satellite 6.1 GA Release (Build 6.1.1)', were ignored because too hard to parse.""" -# https://regex101.com/r/m8aWXG/1 -VERSION_PATTERN = re.compile(r"^Satellite (?P\d+\.\d+\.\d+([.-]\d+)?) ([Uu]pdate|[Rr]elease)$") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") -with releasedata.ProductData("red-hat-satellite") as product_data: - response = http.fetch_url("https://access.redhat.com/articles/1365633") - soup = BeautifulSoup(response.text, features="html5lib") + for table in soup.findAll("tbody"): + for tr in table.findAll("tr"): + td_list = tr.findAll("td") - for table in soup.findAll("tbody"): - for tr in table.findAll("tr"): - td_list = tr.findAll("td") - - version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0 - version_match = VERSION_PATTERN.match(version_str) - if version_match: + version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0 + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping version '{version_str}': does not match any version pattern.") + continue version = version_match["version"].replace('-', '.') # a.b.c-d => a.b.c.d + date_str = td_list[1].get_text().strip() date_str = '2024-12-04' if date_str == '2024-12-041' else date_str # there is a typo for 6.15.5 date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/redhat_lifecycles.py b/src/redhat_lifecycles.py index a13ae952..02e8b115 100644 --- a/src/redhat_lifecycles.py +++ b/src/redhat_lifecycles.py @@ -1,5 +1,4 @@ import logging -import sys import urllib.parse from common import dates, endoflife, http, releasedata @@ -11,9 +10,6 @@ This script works based on a definition provided in the product's frontmatter to More information on https://docs.redhat.com/documentation/red_hat_product_life_cycle_data_api/. """ -METHOD = "redhat_lifecycles" - - class Mapping: def __init__(self, phases_by_field: dict[str, str]) -> None: self.fields_by_phase = {v.lower(): k for k, v in phases_by_field.items()} @@ -21,9 +17,7 @@ class Mapping: def get_field_for(self, phase_name: str) -> str | None: return self.fields_by_phase.get(phase_name.lower(), None) -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: name = urllib.parse.quote(config.url) mapping = Mapping(config.data["fields"]) diff --git a/src/release_table.py b/src/release_table.py index db7960e5..377ab24c 100644 --- a/src/release_table.py +++ b/src/release_table.py @@ -1,6 +1,5 @@ import logging import re -import sys from datetime import datetime from re import Match @@ -151,9 +150,7 @@ class Field: return f"{self.name}({self.column})" -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, METHOD, m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: render_javascript = config.data.get("render_javascript", False) render_javascript_click_selector = config.data.get("render_javascript_click_selector", None) diff --git a/src/rhel.py b/src/rhel.py index 0379dd26..26c23bd4 100644 --- a/src/rhel.py +++ b/src/rhel.py @@ -1,24 +1,25 @@ import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata # https://regex101.com/r/877ibq/1 VERSION_PATTERN = re.compile(r"RHEL (?P\d)(\. ?(?P\d+))?(( Update (?P\d))| GA)?") -with releasedata.ProductData("redhat") as product_data: - response = http.fetch_url("https://access.redhat.com/articles/3078") - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - for tr in soup.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) == 0: - continue + for tr in soup.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) == 0: + continue - version_str = td_list[0].get_text().strip() - version_match = VERSION_PATTERN.match(version_str).groupdict() - version = version_match["major"] - version += ("." + version_match["minor"]) if version_match["minor"] else "" - version += ("." + version_match["minor2"]) if version_match["minor2"] else "" - date = dates.parse_date(td_list[1].get_text()) - product_data.declare_version(version, date) + version_str = td_list[0].get_text().strip() + version_match = VERSION_PATTERN.match(version_str).groupdict() + version = version_match["major"] + version += ("." + version_match["minor"]) if version_match["minor"] else "" + version += ("." + version_match["minor2"]) if version_match["minor2"] else "" + date = dates.parse_date(td_list[1].get_text()) + product_data.declare_version(version, date) diff --git a/src/rocky-linux.py b/src/rocky-linux.py index 6bbca85f..20e5bfa1 100644 --- a/src/rocky-linux.py +++ b/src/rocky-linux.py @@ -1,10 +1,11 @@ from common import dates, endoflife, http, releasedata -with releasedata.ProductData("rocky-linux") as product_data: - response = http.fetch_url("https://raw.githubusercontent.com/rocky-linux/wiki.rockylinux.org/main/docs/include/releng/version_table.md") - for line in response.text.strip().split('\n'): - items = line.split('|') - if len(items) >= 5 and endoflife.DEFAULT_VERSION_PATTERN.match(items[1].strip()): - version = items[1].strip() - date = dates.parse_date(items[3]) - product_data.declare_version(version, date) +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + for line in response.text.strip().split('\n'): + items = line.split('|') + if len(items) >= 5 and config.first_match(items[1].strip()): + version = items[1].strip() + date = dates.parse_date(items[3]) + product_data.declare_version(version, date) diff --git a/src/ros.py b/src/ros.py index 277e03a6..f9dd01c5 100644 --- a/src/ros.py +++ b/src/ros.py @@ -1,22 +1,24 @@ -import re +import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata -# https://regex101.com/r/c1ribd/1 -VERSION_PATTERN = re.compile(r"^ROS (?P(\w| )+)") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") -with releasedata.ProductData("ros") as product_data: - response = http.fetch_url("https://wiki.ros.org/Distributions") - soup = BeautifulSoup(response.text, features="html5lib") + for tr in soup.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) == 0: + continue - for tr in soup.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) == 0: - continue + version_str = td_list[0].get_text().strip() + version_match = config.first_match(version_str) + if not version_match: + logging.warning(f"Skipping version '{version_str}': does not match the expected pattern") + continue - version_str = td_list[0].get_text().strip() - if VERSION_PATTERN.match(version_str): # Get the "code" (such as noetic) instead of the display name (such as Noetic Ninjemys) version = td_list[0].findAll("a")[0]["href"][1:] try: diff --git a/src/samsung-security.py b/src/samsung-security.py index fedf50f1..58f8b6cf 100644 --- a/src/samsung-security.py +++ b/src/samsung-security.py @@ -1,6 +1,5 @@ import logging import re -import sys from datetime import date, datetime, time, timezone from bs4 import BeautifulSoup @@ -14,9 +13,7 @@ it retains the date and use it as the model's EOL date. TODAY = dates.today() -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, 'samsung-security', m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: frontmatter = endoflife.ProductFrontmatter(product_data.name) frontmatter_release_names = frontmatter.get_release_names() diff --git a/src/sles.py b/src/sles.py index 169c66f5..94aa38d5 100644 --- a/src/sles.py +++ b/src/sles.py @@ -1,30 +1,31 @@ import logging from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata -with releasedata.ProductData("sles") as product_data: - response = http.fetch_url("https://www.suse.com/lifecycle") - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - products_table = soup.find("tbody", id="productSupportLifecycle") - sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) + products_table = soup.find("tbody", id="productSupportLifecycle") + sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) - # Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section) - for detail_id in [f"detail{row['id']}" for row in sles_header_rows]: - detail_row = products_table.find("tr", id=detail_id) - # There is a table with info about minor releases and after it, optionally, a table with info about modules - minor_versions_table = detail_row.find_all("tbody")[0] + # Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section) + for detail_id in [f"detail{row['id']}" for row in sles_header_rows]: + detail_row = products_table.find("tr", id=detail_id) + # There is a table with info about minor releases and after it, optionally, a table with info about modules + minor_versions_table = detail_row.find_all("tbody")[0] - # The first sub-row is a header, the rest contains info about the first release and later minor releases - for row in minor_versions_table.find_all("tr")[1:]: - # For each minor release there is an FCS date, general support end date and LTSS end date - cells = row.find_all("td") - version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.') - date_str = cells[1].text + # The first sub-row is a header, the rest contains info about the first release and later minor releases + for row in minor_versions_table.find_all("tr")[1:]: + # For each minor release there is an FCS date, general support end date and LTSS end date + cells = row.find_all("td") + version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.') + date_str = cells[1].text - try: - date = dates.parse_date(date_str) - product_data.declare_version(version, date) - except ValueError: - logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed") + try: + date = dates.parse_date(date_str) + product_data.declare_version(version, date) + except ValueError: + logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed") diff --git a/src/splunk.py b/src/splunk.py index 3be97bb9..43380b71 100644 --- a/src/splunk.py +++ b/src/splunk.py @@ -1,7 +1,7 @@ import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata VERSION_DATE_PATTERN = re.compile(r"Splunk Enterprise (?P\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P\w+\s\d\d?,\s\d{4})\.", re.MULTILINE) @@ -30,19 +30,20 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]: return latest_versions -with releasedata.ProductData("splunk") as product_data: - main = http.fetch_url("https://docs.splunk.com/Documentation/Splunk") - soup = BeautifulSoup(main.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + main = http.fetch_url(config.url) + soup = BeautifulSoup(main.text, features="html5lib") - all_versions = [option.attrs['value'] for option in soup.select("select#version-select > option")] - all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"] + all_versions = [option.attrs['value'] for option in soup.select("select#version-select > option")] + all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"] - # Latest minor release notes contains release notes for all previous minor versions. - # For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4. - latest_minor_versions = get_latest_minor_versions(all_versions) - latest_minor_versions_urls = [f"https://docs.splunk.com/Documentation/Splunk/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions] - for response in http.fetch_urls(latest_minor_versions_urls): - for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text): - version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0 - date = dates.parse_date(date_str) - product_data.declare_version(version_str, date) + # Latest minor release notes contains release notes for all previous minor versions. + # For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4. + latest_minor_versions = get_latest_minor_versions(all_versions) + latest_minor_versions_urls = [f"{config.url}/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions] + for response in http.fetch_urls(latest_minor_versions_urls): + for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text): + version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0 + date = dates.parse_date(date_str) + product_data.declare_version(version_str, date) diff --git a/src/typo3.py b/src/typo3.py index 38a235b7..7abc8cd5 100644 --- a/src/typo3.py +++ b/src/typo3.py @@ -1,11 +1,12 @@ -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata -with releasedata.ProductData("typo3") as product_data: - data = http.fetch_url("https://get.typo3.org/api/v1/release/").json() - for v in data: - if v['type'] == 'development': - continue +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + data = http.fetch_url(config.url).json() + for v in data: + if v['type'] == 'development': + continue - version = v["version"] - date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility - product_data.declare_version(version, date) + version = v["version"] + date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility + product_data.declare_version(version, date) diff --git a/src/unity.py b/src/unity.py index 2980c186..adc99d2b 100644 --- a/src/unity.py +++ b/src/unity.py @@ -1,5 +1,5 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation is only partial. @@ -17,11 +17,12 @@ Note that it was assumed that: The script will need to be updated if someday those conditions are not met.""" -with releasedata.ProductData("unity") as product_data: - response = http.fetch_url("https://unity.com/releases/editor/qa/lts-releases") - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): - version = release.find('h4').find('span').text - date = dates.parse_datetime(release.find('time').attrs['datetime']) - product_data.declare_version(version, date) + for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): + version = release.find('h4').find('span').text + date = dates.parse_datetime(release.find('time').attrs['datetime']) + product_data.declare_version(version, date) diff --git a/src/unrealircd.py b/src/unrealircd.py index 8b0ecc3b..46c85405 100644 --- a/src/unrealircd.py +++ b/src/unrealircd.py @@ -5,17 +5,18 @@ from common import dates, endoflife, http, releasedata DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}") -with releasedata.ProductData("unrealircd") as product_data: - response = http.fetch_url("https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw") - wikicode = mwparserfromhell.parse(response.text) +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + wikicode = mwparserfromhell.parse(response.text) - for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): - items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") - if len(items) < 2: - continue + for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): + items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") + if len(items) < 2: + continue - version = items[0].__strip__() - date_str = items[1].__strip__() - if endoflife.DEFAULT_VERSION_PATTERN.match(version) and DATE_PATTERN.match(date_str): - date = dates.parse_date(date_str) - product_data.declare_version(version, date) + version = items[0].__strip__() + date_str = items[1].__strip__() + if config.first_match(version) and DATE_PATTERN.match(date_str): + date = dates.parse_date(date_str) + product_data.declare_version(version, date) diff --git a/src/veeam.py b/src/veeam.py index b778433d..d88bc98a 100644 --- a/src/veeam.py +++ b/src/veeam.py @@ -1,6 +1,5 @@ import logging import re -import sys from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata @@ -11,9 +10,7 @@ This script takes a single argument which is the url of the versions page on htt such as `https://www.veeam.com/kb2680`. """ -p_filter = sys.argv[1] if len(sys.argv) > 1 else None -m_filter = sys.argv[2] if len(sys.argv) > 2 else None -for config in endoflife.list_configs(p_filter, "veeam", m_filter): +for config in endoflife.list_configs_from_argv(): with releasedata.ProductData(config.product) as product_data: response = http.fetch_url(config.url) soup = BeautifulSoup(response.text, features="html5lib") diff --git a/src/virtualbox.py b/src/virtualbox.py index 0e8a4ab6..a04985ae 100644 --- a/src/virtualbox.py +++ b/src/virtualbox.py @@ -2,34 +2,34 @@ import logging import re from bs4 import BeautifulSoup -from common import dates, http, releasedata +from common import dates, endoflife, http, releasedata """Fetches releases from VirtualBox download page.""" -RELEASE_REGEX = re.compile(r"^VirtualBox (?P\d+\.\d+)$") EOL_REGEX = re.compile(r"^\(no longer supported, support ended (?P\d{4}/\d{2})\)$") -with releasedata.ProductData("virtualbox") as product_data: - response = http.fetch_url("https://www.virtualbox.org/wiki/Download_Old_Builds") - soup = BeautifulSoup(response.text, features="html5lib") +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) + soup = BeautifulSoup(response.text, features="html5lib") - for li in soup.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"): - li_text = li.find("a").text.strip() + for li in soup.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"): + li_text = li.find("a").text.strip() - release_match = RELEASE_REGEX.match(li_text) - if not release_match: - logging.info(f"Skipping '{li_text}': does not match {RELEASE_REGEX}") - continue + release_match = config.first_match(li_text) + if not release_match: + logging.info(f"Skipping '{li_text}': does not match expected pattern") + continue - release_name = release_match.group("value") - release = product_data.get_release(release_name) + release_name = release_match.group("value") + release = product_data.get_release(release_name) - eol_text = li.find("em").text.lower().strip() - eol_match = EOL_REGEX.match(eol_text) - if not eol_match: - logging.info(f"Ignoring '{eol_text}': does not match {EOL_REGEX}") - continue + eol_text = li.find("em").text.lower().strip() + eol_match = EOL_REGEX.match(eol_text) + if not eol_match: + logging.info(f"Ignoring '{eol_text}': does not match {EOL_REGEX}") + continue - eol_date_str = eol_match.group("value") - eol_date = dates.parse_month_year_date(eol_date_str) - release.set_eol(eol_date) + eol_date_str = eol_match.group("value") + eol_date = dates.parse_month_year_date(eol_date_str) + release.set_eol(eol_date) diff --git a/src/visual-studio.py b/src/visual-studio.py index a0a356e1..6802bc0b 100644 --- a/src/visual-studio.py +++ b/src/visual-studio.py @@ -1,16 +1,9 @@ from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata -# There is no build history for versions 2015 and below. -# This is not a big deal because there was no version for those releases in a very long time. -URLS = [ - "https://learn.microsoft.com/en-us/visualstudio/releasenotes/vs2017-relnotes-history", - "https://learn.microsoft.com/en-us/visualstudio/releases/2019/history", - "https://learn.microsoft.com/en-us/visualstudio/releases/2022/release-history", -] - -with releasedata.ProductData("visual-studio") as product_data: - for response in http.fetch_urls(URLS): +for config in endoflife.list_configs_from_argv(): + with releasedata.ProductData(config.product) as product_data: + response = http.fetch_url(config.url) soup = BeautifulSoup(response.text, features="html5lib") for table in soup.find_all("table"): @@ -29,5 +22,5 @@ with releasedata.ProductData("visual-studio") as product_data: date = cells[date_index].get_text().strip() date = dates.parse_date(date) - if date and version and endoflife.DEFAULT_VERSION_PATTERN.match(version): + if date and version and config.first_match(version): product_data.declare_version(version, date) diff --git a/update.py b/update.py index f28a9a03..5e0d9940 100644 --- a/update.py +++ b/update.py @@ -87,7 +87,7 @@ def __run_script(product: ProductFrontmatter, config: AutoConfig, summary: Scrip logging.info(f"start running {script} for {config}") start = time.perf_counter() # timeout is handled in child scripts - child = subprocess.run([sys.executable, script, config.product, str(config.url)]) + child = subprocess.run([sys.executable, script, config.product, str(config.method), str(config.url)]) success = child.returncode == 0 elapsed_seconds = time.perf_counter() - start