From 750faaa64fc33a0e719ca1420e7130b7dfc8dd37 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Fri, 1 Dec 2023 21:08:11 +0100 Subject: [PATCH] Use a default regex when possible (#205) --- src/cgit.py | 12 ++---------- src/common/endoflife.py | 5 +++++ src/docker_hub.py | 3 +-- src/eks.py | 7 +++---- src/git.py | 10 ++++------ src/maven.py | 3 +-- src/npm.py | 3 +-- src/pypi.py | 6 +----- src/rockylinux.py | 3 +-- src/unrealircd.py | 3 +-- 10 files changed, 20 insertions(+), 35 deletions(-) diff --git a/src/cgit.py b/src/cgit.py index e5e823ea..4fa0d00a 100644 --- a/src/cgit.py +++ b/src/cgit.py @@ -13,14 +13,6 @@ do not support partial clone so we cannot. """ METHOD = 'cgit' -# Same as used in Ruby (update.rb) -DEFAULT_TAG_TEMPLATE = ( - "{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%if tiny %}.{{tiny}}{%endif%}{%endif%}{%endif%}" -) -DEFAULT_VERSION_REGEX = ( - r"^v?(?P\d+)\.(?P\d+)\.?(?P\d+)?\.?(?P\d+)?$" -) - def make_bs_request(url): response = endoflife.fetch_url(url + '/refs/tags') @@ -56,8 +48,8 @@ def update_product(product_name, configs): versions = {} for config in configs: - t = config.get("template", DEFAULT_TAG_TEMPLATE) - regex = config.get("regex", DEFAULT_VERSION_REGEX) + t = config.get("template", endoflife.DEFAULT_TAG_TEMPLATE) + regex = config.get("regex", endoflife.DEFAULT_VERSION_REGEX) versions = versions | fetch_releases(config[METHOD], regex, t) endoflife.write_releases(product_name, versions) diff --git a/src/common/endoflife.py b/src/common/endoflife.py index 536a9822..1cdb915f 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -12,6 +12,11 @@ from urllib3.util import Retry # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent. USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0' +# Handle versions having at least 2 digits (ex. 1.2) and at most 4 digits (ex. 1.2.3.4), with an optional leading "v". +# Major version must be >= 1. +DEFAULT_VERSION_REGEX = r"^v?(?P[1-9]\d*)\.(?P\d+)(\.(?P\d+)(\.(?P\d+))?)?$" +DEFAULT_TAG_TEMPLATE = "{{major}}.{{minor}}{% if patch %}.{{patch}}{% if tiny %}.{{tiny}}{%endif%}{%endif%}" + def load_product(product_name, pathname="website/products") -> frontmatter.Post: """Load the product's file frontmatter. diff --git a/src/docker_hub.py b/src/docker_hub.py index 8252da14..837301c3 100644 --- a/src/docker_hub.py +++ b/src/docker_hub.py @@ -4,7 +4,6 @@ import sys from common import endoflife METHOD = "docker_hub" -REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$" def fetch_releases(url, regex, releases): @@ -35,7 +34,7 @@ def update_product(product_name, configs): for config in configs: url = f"https://hub.docker.com/v2/repositories/{config[METHOD]}/tags?page_size=100&page=1" - config = {"regex": REGEX} | config + config = {"regex": endoflife.DEFAULT_VERSION_REGEX} | config fetch_releases(url, config["regex"], versions) endoflife.write_releases(product_name, versions) diff --git a/src/eks.py b/src/eks.py index 175b9605..16fe0f10 100644 --- a/src/eks.py +++ b/src/eks.py @@ -1,6 +1,6 @@ -import datetime as dt import re from bs4 import BeautifulSoup +from common import dates from common import endoflife # Now that AWS no longer publishes docs on GitHub, @@ -16,7 +16,6 @@ URLS = [ # + latest "https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html", ] -REGEX = r"^(?P\d+)\.(?P\d+)\.(?P\d+)$" def parse_platforms_pages(): @@ -27,11 +26,11 @@ def parse_platforms_pages(): soup = BeautifulSoup(response, features="html5lib") for tr in soup.select("#main-col-body")[0].findAll("tr"): td = tr.find("td") - if td and re.match(REGEX, td.text.strip()): + if td and re.match(endoflife.DEFAULT_VERSION_REGEX, td.text.strip()): data = tr.findAll("td") date = data[-1].text.strip() if len(date) > 0: - d = dt.datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d") + d = dates.parse_date(date).strftime("%Y-%m-%d") k8s_version = ".".join(data[0].text.strip().split(".")[:-1]) eks_version = data[1].text.strip().replace(".", "-") version = f"{k8s_version}-{eks_version}" diff --git a/src/git.py b/src/git.py index 78261691..c72e9fa5 100644 --- a/src/git.py +++ b/src/git.py @@ -16,11 +16,9 @@ identically named groups (as used in the mariadb product). # craft-cms, exim, gerrit, jquery, kdeplasma, kirby, logstash, nexus, silverstripe # and tarantool versions. METHOD = 'git' -DEFAULT_VERSION_REGEX = r"^v?(?P[1-9]\d*)\.(?P\d+)(\.(?P\d+)(\.(?P\d+))?)?$" -DEFAULT_TAG_TEMPLATE = "{{major}}.{{minor}}{% if patch %}.{{patch}}{% if tiny %}.{{tiny}}{%endif%}{%endif%}" -def fetch_releases(product_name, url, regex, template): +def fetch_releases(url, regex, template): releases = {} git = Git(url) @@ -40,10 +38,10 @@ def update_product(product_name, configs): versions = {} for config in configs: - t = config.get("template", DEFAULT_TAG_TEMPLATE) - regex = config.get("regex", DEFAULT_VERSION_REGEX) + t = config.get("template", endoflife.DEFAULT_TAG_TEMPLATE) + regex = config.get("regex", endoflife.DEFAULT_VERSION_REGEX) regex = regex.replace("(?<", "(?P<") # convert ruby regex to python regex - versions = versions | fetch_releases(product_name, config[METHOD], regex, t) + versions = versions | fetch_releases(config[METHOD], regex, t) endoflife.write_releases(product_name, versions) diff --git a/src/maven.py b/src/maven.py index 0c02007e..046b0af5 100644 --- a/src/maven.py +++ b/src/maven.py @@ -5,13 +5,12 @@ import sys from common import endoflife METHOD = "maven" -VERSION_REGEX = r'^\d+\.\d+(\.\d+)?$' # TODO: Add support for custom regexes # Hasn't been needed yet, so only write if we need it def valid_version(version): - if re.match(VERSION_REGEX, version): + if re.match(endoflife.DEFAULT_VERSION_REGEX, version): return True return False diff --git a/src/npm.py b/src/npm.py index e9834a07..f5199b66 100644 --- a/src/npm.py +++ b/src/npm.py @@ -4,7 +4,6 @@ import sys from common import endoflife METHOD = "npm" -REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$" def fetch_releases(npm_id, regex): @@ -34,7 +33,7 @@ def update_product(product_name, configs): versions = {} for config in configs: - config = {"regex": REGEX} | config + config = {"regex": endoflife.DEFAULT_VERSION_REGEX} | config versions = versions | fetch_releases(config[METHOD], config["regex"]) endoflife.write_releases(product_name, versions) diff --git a/src/pypi.py b/src/pypi.py index f2332ee2..436c23c7 100644 --- a/src/pypi.py +++ b/src/pypi.py @@ -5,10 +5,6 @@ from common import dates from common import endoflife METHOD = "pypi" -DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb) - "{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%endif%}{%endif%}" -) -REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$" def fetch_releases(pypi_id, regex): @@ -38,7 +34,7 @@ def update_product(product_name, configs): versions = {} for config in configs: - config = {"regex": REGEX} | config + config = {"regex": endoflife.DEFAULT_VERSION_REGEX} | config versions = versions | fetch_releases(config[METHOD], config["regex"]) endoflife.write_releases(product_name, versions) diff --git a/src/rockylinux.py b/src/rockylinux.py index 135c375c..3317bef8 100644 --- a/src/rockylinux.py +++ b/src/rockylinux.py @@ -3,7 +3,6 @@ from common import dates from common import endoflife URL = "https://raw.githubusercontent.com/rocky-linux/wiki.rockylinux.org/development/docs/include/releng/version_table.md" -REGEX = r"^(\d+\.\d+)$" def parse_date(date_str): @@ -17,7 +16,7 @@ def parse_markdown_table(table_text): for line in lines: items = line.split('|') - if len(items) >=5 and re.match(REGEX, items[1].strip()): + if len(items) >=5 and re.match(endoflife.DEFAULT_VERSION_REGEX, items[1].strip()): version = items[1].strip() date = parse_date(items[3]) print(f"{version}: {date}") diff --git a/src/unrealircd.py b/src/unrealircd.py index cd8bc13b..120086e0 100644 --- a/src/unrealircd.py +++ b/src/unrealircd.py @@ -3,7 +3,6 @@ import re from common import endoflife URL = "https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw" -REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$" print("::group::unrealircd") response = endoflife.fetch_url(URL) @@ -14,7 +13,7 @@ for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") if len(items) >= 2: maybe_version = items[0].__strip__() - if re.match(REGEX, maybe_version): + if re.match(endoflife.DEFAULT_VERSION_REGEX, maybe_version): maybe_date = items[1].__strip__() if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date): versions[maybe_version] = maybe_date