Use a default regex when possible (#205)

This commit is contained in:
Marc Wrobel
2023-12-01 21:08:11 +01:00
parent e97e261946
commit 750faaa64f
10 changed files with 20 additions and 35 deletions

View File

@@ -13,14 +13,6 @@ do not support partial clone so we cannot.
""" """
METHOD = 'cgit' METHOD = 'cgit'
# Same as used in Ruby (update.rb)
DEFAULT_TAG_TEMPLATE = (
"{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%if tiny %}.{{tiny}}{%endif%}{%endif%}{%endif%}"
)
DEFAULT_VERSION_REGEX = (
r"^v?(?P<major>\d+)\.(?P<minor>\d+)\.?(?P<patch>\d+)?\.?(?P<tiny>\d+)?$"
)
def make_bs_request(url): def make_bs_request(url):
response = endoflife.fetch_url(url + '/refs/tags') response = endoflife.fetch_url(url + '/refs/tags')
@@ -56,8 +48,8 @@ def update_product(product_name, configs):
versions = {} versions = {}
for config in configs: for config in configs:
t = config.get("template", DEFAULT_TAG_TEMPLATE) t = config.get("template", endoflife.DEFAULT_TAG_TEMPLATE)
regex = config.get("regex", DEFAULT_VERSION_REGEX) regex = config.get("regex", endoflife.DEFAULT_VERSION_REGEX)
versions = versions | fetch_releases(config[METHOD], regex, t) versions = versions | fetch_releases(config[METHOD], regex, t)
endoflife.write_releases(product_name, versions) endoflife.write_releases(product_name, versions)

View File

@@ -12,6 +12,11 @@ from urllib3.util import Retry
# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent. # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent.
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0' USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0'
# Handle versions having at least 2 digits (ex. 1.2) and at most 4 digits (ex. 1.2.3.4), with an optional leading "v".
# Major version must be >= 1.
DEFAULT_VERSION_REGEX = r"^v?(?P<major>[1-9]\d*)\.(?P<minor>\d+)(\.(?P<patch>\d+)(\.(?P<tiny>\d+))?)?$"
DEFAULT_TAG_TEMPLATE = "{{major}}.{{minor}}{% if patch %}.{{patch}}{% if tiny %}.{{tiny}}{%endif%}{%endif%}"
def load_product(product_name, pathname="website/products") -> frontmatter.Post: def load_product(product_name, pathname="website/products") -> frontmatter.Post:
"""Load the product's file frontmatter. """Load the product's file frontmatter.

View File

@@ -4,7 +4,6 @@ import sys
from common import endoflife from common import endoflife
METHOD = "docker_hub" METHOD = "docker_hub"
REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$"
def fetch_releases(url, regex, releases): def fetch_releases(url, regex, releases):
@@ -35,7 +34,7 @@ def update_product(product_name, configs):
for config in configs: for config in configs:
url = f"https://hub.docker.com/v2/repositories/{config[METHOD]}/tags?page_size=100&page=1" url = f"https://hub.docker.com/v2/repositories/{config[METHOD]}/tags?page_size=100&page=1"
config = {"regex": REGEX} | config config = {"regex": endoflife.DEFAULT_VERSION_REGEX} | config
fetch_releases(url, config["regex"], versions) fetch_releases(url, config["regex"], versions)
endoflife.write_releases(product_name, versions) endoflife.write_releases(product_name, versions)

View File

@@ -1,6 +1,6 @@
import datetime as dt
import re import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from common import dates
from common import endoflife from common import endoflife
# Now that AWS no longer publishes docs on GitHub, # Now that AWS no longer publishes docs on GitHub,
@@ -16,7 +16,6 @@ URLS = [
# + latest # + latest
"https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html", "https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html",
] ]
REGEX = r"^(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)$"
def parse_platforms_pages(): def parse_platforms_pages():
@@ -27,11 +26,11 @@ def parse_platforms_pages():
soup = BeautifulSoup(response, features="html5lib") soup = BeautifulSoup(response, features="html5lib")
for tr in soup.select("#main-col-body")[0].findAll("tr"): for tr in soup.select("#main-col-body")[0].findAll("tr"):
td = tr.find("td") td = tr.find("td")
if td and re.match(REGEX, td.text.strip()): if td and re.match(endoflife.DEFAULT_VERSION_REGEX, td.text.strip()):
data = tr.findAll("td") data = tr.findAll("td")
date = data[-1].text.strip() date = data[-1].text.strip()
if len(date) > 0: if len(date) > 0:
d = dt.datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d") d = dates.parse_date(date).strftime("%Y-%m-%d")
k8s_version = ".".join(data[0].text.strip().split(".")[:-1]) k8s_version = ".".join(data[0].text.strip().split(".")[:-1])
eks_version = data[1].text.strip().replace(".", "-") eks_version = data[1].text.strip().replace(".", "-")
version = f"{k8s_version}-{eks_version}" version = f"{k8s_version}-{eks_version}"

View File

@@ -16,11 +16,9 @@ identically named groups (as used in the mariadb product).
# craft-cms, exim, gerrit, jquery, kdeplasma, kirby, logstash, nexus, silverstripe # craft-cms, exim, gerrit, jquery, kdeplasma, kirby, logstash, nexus, silverstripe
# and tarantool versions. # and tarantool versions.
METHOD = 'git' METHOD = 'git'
DEFAULT_VERSION_REGEX = r"^v?(?P<major>[1-9]\d*)\.(?P<minor>\d+)(\.(?P<patch>\d+)(\.(?P<tiny>\d+))?)?$"
DEFAULT_TAG_TEMPLATE = "{{major}}.{{minor}}{% if patch %}.{{patch}}{% if tiny %}.{{tiny}}{%endif%}{%endif%}"
def fetch_releases(product_name, url, regex, template): def fetch_releases(url, regex, template):
releases = {} releases = {}
git = Git(url) git = Git(url)
@@ -40,10 +38,10 @@ def update_product(product_name, configs):
versions = {} versions = {}
for config in configs: for config in configs:
t = config.get("template", DEFAULT_TAG_TEMPLATE) t = config.get("template", endoflife.DEFAULT_TAG_TEMPLATE)
regex = config.get("regex", DEFAULT_VERSION_REGEX) regex = config.get("regex", endoflife.DEFAULT_VERSION_REGEX)
regex = regex.replace("(?<", "(?P<") # convert ruby regex to python regex regex = regex.replace("(?<", "(?P<") # convert ruby regex to python regex
versions = versions | fetch_releases(product_name, config[METHOD], regex, t) versions = versions | fetch_releases(config[METHOD], regex, t)
endoflife.write_releases(product_name, versions) endoflife.write_releases(product_name, versions)

View File

@@ -5,13 +5,12 @@ import sys
from common import endoflife from common import endoflife
METHOD = "maven" METHOD = "maven"
VERSION_REGEX = r'^\d+\.\d+(\.\d+)?$'
# TODO: Add support for custom regexes # TODO: Add support for custom regexes
# Hasn't been needed yet, so only write if we need it # Hasn't been needed yet, so only write if we need it
def valid_version(version): def valid_version(version):
if re.match(VERSION_REGEX, version): if re.match(endoflife.DEFAULT_VERSION_REGEX, version):
return True return True
return False return False

View File

@@ -4,7 +4,6 @@ import sys
from common import endoflife from common import endoflife
METHOD = "npm" METHOD = "npm"
REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$"
def fetch_releases(npm_id, regex): def fetch_releases(npm_id, regex):
@@ -34,7 +33,7 @@ def update_product(product_name, configs):
versions = {} versions = {}
for config in configs: for config in configs:
config = {"regex": REGEX} | config config = {"regex": endoflife.DEFAULT_VERSION_REGEX} | config
versions = versions | fetch_releases(config[METHOD], config["regex"]) versions = versions | fetch_releases(config[METHOD], config["regex"])
endoflife.write_releases(product_name, versions) endoflife.write_releases(product_name, versions)

View File

@@ -5,10 +5,6 @@ from common import dates
from common import endoflife from common import endoflife
METHOD = "pypi" METHOD = "pypi"
DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb)
"{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%endif%}{%endif%}"
)
REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$"
def fetch_releases(pypi_id, regex): def fetch_releases(pypi_id, regex):
@@ -38,7 +34,7 @@ def update_product(product_name, configs):
versions = {} versions = {}
for config in configs: for config in configs:
config = {"regex": REGEX} | config config = {"regex": endoflife.DEFAULT_VERSION_REGEX} | config
versions = versions | fetch_releases(config[METHOD], config["regex"]) versions = versions | fetch_releases(config[METHOD], config["regex"])
endoflife.write_releases(product_name, versions) endoflife.write_releases(product_name, versions)

View File

@@ -3,7 +3,6 @@ from common import dates
from common import endoflife from common import endoflife
URL = "https://raw.githubusercontent.com/rocky-linux/wiki.rockylinux.org/development/docs/include/releng/version_table.md" URL = "https://raw.githubusercontent.com/rocky-linux/wiki.rockylinux.org/development/docs/include/releng/version_table.md"
REGEX = r"^(\d+\.\d+)$"
def parse_date(date_str): def parse_date(date_str):
@@ -17,7 +16,7 @@ def parse_markdown_table(table_text):
for line in lines: for line in lines:
items = line.split('|') items = line.split('|')
if len(items) >=5 and re.match(REGEX, items[1].strip()): if len(items) >=5 and re.match(endoflife.DEFAULT_VERSION_REGEX, items[1].strip()):
version = items[1].strip() version = items[1].strip()
date = parse_date(items[3]) date = parse_date(items[3])
print(f"{version}: {date}") print(f"{version}: {date}")

View File

@@ -3,7 +3,6 @@ import re
from common import endoflife from common import endoflife
URL = "https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw" URL = "https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw"
REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$"
print("::group::unrealircd") print("::group::unrealircd")
response = endoflife.fetch_url(URL) response = endoflife.fetch_url(URL)
@@ -14,7 +13,7 @@ for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")
if len(items) >= 2: if len(items) >= 2:
maybe_version = items[0].__strip__() maybe_version = items[0].__strip__()
if re.match(REGEX, maybe_version): if re.match(endoflife.DEFAULT_VERSION_REGEX, maybe_version):
maybe_date = items[1].__strip__() maybe_date = items[1].__strip__()
if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date): if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date):
versions[maybe_version] = maybe_date versions[maybe_version] = maybe_date