[cgit] Refactor script (#216)

Make the script more readable, mostly by:

- using the endoflife.Product class,
- introducing the endoflife.AutoConfig class to make it easier to manage such configuration,
- removing the unnecessary use of functions,
- a little bit of renaming.
This commit is contained in:
Marc Wrobel
2023-12-10 16:26:47 +01:00
committed by GitHub
parent aa7975d7f1
commit 0836c270ea
2 changed files with 83 additions and 55 deletions

View File

@@ -1,60 +1,42 @@
import re
import sys
from bs4 import BeautifulSoup
from common import http
from common import dates
from common import endoflife
from liquid import Template
"""Fetch versions with their dates from a cgit repository, such as
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git.
Ideally we would want to use the git repository directly, but cgit repositories
do not support partial clone so we cannot.
"""
METHOD = 'cgit'
def fetch_releases(url, regex, template):
releases = {}
response = http.fetch_url(url + '/refs/tags')
soup = BeautifulSoup(response.text, features="html5lib")
l_template = Template(template)
for table in soup.find_all("table", class_="list"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) == 4:
version_text = columns[0].text.strip()
datetime_td = columns[3].find_next("span")
datetime_text = datetime_td.attrs["title"] if datetime_td else None
if datetime_text:
matches = re.match(regex.strip(), version_text)
if matches:
match_data = matches.groupdict()
version_string = l_template.render(**match_data)
date = dates.parse_datetime(datetime_text).strftime("%Y-%m-%d")
print(f"{version_string} : {date}")
releases[version_string] = date
return releases
def update_product(product_name, configs):
versions = {}
for config in configs:
t = config.get("template", endoflife.DEFAULT_TAG_TEMPLATE)
regex = config.get("regex", endoflife.DEFAULT_VERSION_REGEX)
versions = versions | fetch_releases(config[METHOD], regex, t)
endoflife.write_releases(product_name, versions)
"""Fetches versions from repositories managed with cgit, such as the Linux kernel repository.
Ideally we would want to use the git repository directly, but cgit-managed repositories don't support partial clone."""
METHOD = "cgit"
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product, configs in endoflife.list_products(METHOD, p_filter).items():
print(f"::group::{product}")
update_product(product, configs)
for product_name, configs in endoflife.list_products(METHOD, p_filter).items():
print(f"::group::{product_name}")
product = endoflife.Product(product_name, load_product_data=True)
for auto_config in product.get_auto_configs(METHOD):
response = http.fetch_url(auto_config.url + '/refs/tags')
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.find_all("table", class_="list"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) != 4:
continue
version_str = columns[0].text.strip()
version_match = auto_config.first_match(version_str)
if not version_match:
continue
datetime_td = columns[3].find_next("span")
datetime_str = datetime_td.attrs["title"] if datetime_td else None
if not datetime_str:
continue
version = auto_config.render(version_match)
date = dates.parse_datetime(datetime_str)
product.declare_version(version, date)
product.write()
print("::endgroup::")

View File

@@ -2,8 +2,10 @@ import frontmatter
import json
import logging
import os
import re
from datetime import datetime
from glob import glob
from liquid import Template
logging.basicConfig(format=logging.BASIC_FORMAT, level=logging.INFO)
# Handle versions having at least 2 digits (ex. 1.2) and at most 4 digits (ex. 1.2.3.4), with an optional leading "v".
@@ -11,17 +13,61 @@ logging.basicConfig(format=logging.BASIC_FORMAT, level=logging.INFO)
DEFAULT_VERSION_REGEX = r"^v?(?P<major>[1-9]\d*)\.(?P<minor>\d+)(\.(?P<patch>\d+)(\.(?P<tiny>\d+))?)?$"
DEFAULT_TAG_TEMPLATE = "{{major}}.{{minor}}{% if patch %}.{{patch}}{% if tiny %}.{{tiny}}{%endif%}{%endif%}"
PRODUCTS_PATH = os.environ.get("PRODUCTS_PATH", "website/products")
VERSIONS_PATH = os.environ.get("VERSIONS_PATH", "releases")
class AutoConfig:
def __init__(self, method: str, config: dict):
self.method = method
self.url = config[method]
self.version_template = Template(config.get("template", DEFAULT_TAG_TEMPLATE))
regexes = config.get("regex", DEFAULT_VERSION_REGEX)
regexes = regexes if isinstance(regexes, list) else [regexes]
regexes = [regex.replace("(?<", "(?P<") for regex in regexes] # convert ruby to python regex
self.version_patterns = [re.compile(regex) for regex in regexes]
def first_match(self, version: str) -> re.Match:
for pattern in self.version_patterns:
match = pattern.match(version)
if match:
return match
def render(self, match: re.Match) -> str:
return self.version_template.render(**match.groupdict())
class Product:
"""Model an endoflife.date product.
"""
def __init__(self, name: str):
def __init__(self, name: str, load_product_data: bool = False):
self.name: str = name
self.versions = {}
self.versions_path: str = f"{VERSIONS_PATH}/{name}.json"
self.product_path: str = f"{PRODUCTS_PATH}/{name}.md"
if load_product_data:
if os.path.isfile(self.product_path):
with open(self.product_path) as f:
self.product_data = frontmatter.load(f)
logging.info(f"loaded product data for {self.name} from {self.product_path}")
else:
logging.warning(f"no product data found for {self.name} at {self.product_path}")
self.product_data = None
def get_auto_configs(self, method: str) -> list[AutoConfig]:
configs = []
if "auto" in self.product_data:
for config in self.product_data["auto"]:
if method in config.keys():
configs.append(AutoConfig(method, config))
else:
logging.error(f"mixed auto-update methods declared for {self.name}, this is not yet supported")
return configs
def has_version(self, version: str) -> bool:
return version in self.versions
@@ -69,19 +115,19 @@ class Product:
return f"<{self.name}>"
def load_product(product_name, pathname="website/products") -> frontmatter.Post:
def load_product(product_name) -> frontmatter.Post:
"""Load the product's file frontmatter.
"""
with open(f"{pathname}/{product_name}.md") as f:
with open(f"{PRODUCTS_PATH}/{product_name}.md") as f:
return frontmatter.load(f)
def list_products(method, products_filter=None, pathname="website/products") -> dict[str, list[dict]]:
def list_products(method, products_filter=None) -> dict[str, list[dict]]:
"""Return a list of products that are using the same given update method.
"""
products_with_method = {}
for product_file in glob(f"{pathname}/*.md"):
for product_file in glob(f"{PRODUCTS_PATH}/*.md"):
product_name = os.path.splitext(os.path.basename(product_file))[0]
if products_filter and product_name != products_filter:
continue