From 99d9449d3a5973186456313cea08c005a3b66a3d Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 15 Jun 2023 01:52:32 +0530 Subject: [PATCH] Generalize linuxkernel to cgit (#97, #105) Co-authored-by: Marc Wrobel --- src/cgit.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++ src/linuxkernel.py | 59 --------------------------------- 2 files changed, 82 insertions(+), 59 deletions(-) create mode 100644 src/cgit.py delete mode 100644 src/linuxkernel.py diff --git a/src/cgit.py b/src/cgit.py new file mode 100644 index 00000000..422ef74f --- /dev/null +++ b/src/cgit.py @@ -0,0 +1,82 @@ +import re +import sys +from bs4 import BeautifulSoup +from common import endoflife +from datetime import datetime, timezone +from liquid import Template + +"""Fetch versions with their dates from a cgit repository, such as +https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git. + +Ideally we would want to use the git repository directly, but cgit repositories +do not support partial clone so we cannot. +""" + +METHOD = 'cgit' +# Same as used in Ruby (update.rb) +DEFAULT_TAG_TEMPLATE = ( + "{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%if tiny %}.{{tiny}}{%endif%}{%endif%}{%endif%}" +) +DEFAULT_VERSION_REGEX = ( + r"^v?(?P\d+)\.(?P\d+)\.?(?P\d+)?\.?(?P\d+)?$" +) + + +# Parse date with format 2023-05-01 08:32:34 +0900 and convert to UTC +def parse_date(d): + return ( + datetime.strptime(d, "%Y-%m-%d %H:%M:%S %z") + .astimezone(timezone.utc) + .strftime("%Y-%m-%d") + ) + + +def make_bs_request(url): + response = endoflife.fetch_url(url + '/refs/tags') + return BeautifulSoup(response, features="html5lib") + + +def fetch_releases(url, regex, template): + releases = {} + + soup = make_bs_request(url) + l_template = Template(template) + + for table in soup.find_all("table", class_="list"): + for row in table.find_all("tr"): + columns = row.find_all("td") + if len(columns) == 4: + version_text = columns[0].text.strip() + datetime_td = columns[3].find_next("span") + datetime_text = datetime_td.attrs["title"] if datetime_td else None + if datetime_text: + matches = re.match(regex.strip(), version_text) + if matches: + match_data = matches.groupdict() + version_string = l_template.render(**match_data) + date = parse_date(datetime_text) + print(f"{version_string} : {date}") + releases[version_string] = date + + return releases + + +def update_product(product_name, configs): + releases = {} + + for config in configs: + t = config.get("template", DEFAULT_TAG_TEMPLATE) + regex = config.get("regex", DEFAULT_VERSION_REGEX) + releases = releases | fetch_releases(config[METHOD], regex, t) + + endoflife.write_releases(product_name, dict( + # sort by version then date (asc) + sorted(releases.items(), key=lambda x: (x[0], x[1])) + )) + + +p_filter = sys.argv[1] if len(sys.argv) > 1 else None +for product, configs in endoflife.list_products(METHOD, p_filter).items(): + print(f"::group::{product}") + update_product(product, configs) + print("::endgroup::") diff --git a/src/linuxkernel.py b/src/linuxkernel.py deleted file mode 100644 index 6d927dd2..00000000 --- a/src/linuxkernel.py +++ /dev/null @@ -1,59 +0,0 @@ -import re -from bs4 import BeautifulSoup -from common import endoflife -from datetime import datetime, timezone - -"""Fetch Linux Kernel versions with their dates from -https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags. - -Ideally we would want to use the kernel.org git repository directly -(https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/), but it does -not support partial clone so we cannot. -""" - -PRODUCT = "linuxkernel" -URL = "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags" - - -# Parse date with format 2023-05-01 08:32:34 +0900 and convert to UTC -def parse_date(d): - return datetime.strptime(d, "%Y-%m-%d %H:%M:%S %z")\ - .astimezone(timezone.utc)\ - .strftime("%Y-%m-%d") - - -def make_bs_request(url): - response = endoflife.fetch_url(url) - return BeautifulSoup(response, features="html5lib") - - -def fetch_releases(): - releases = {} - - soup = make_bs_request(URL) - for table in soup.find_all("table", class_='list'): - for row in table.find_all("tr"): - columns = row.find_all("td") - if len(columns) == 4: - version_text = columns[0].text.strip() - datetime_td = columns[3].find_next('span') - datetime_text = datetime_td.attrs['title'] if datetime_td else None - if version_text.startswith('v') and datetime_text: - r = r"v(?P\d+(?:\.\d+)*)$" - m = re.search(r, version_text, flags=re.IGNORECASE) - if m: - version = m.group("v") - date = parse_date(datetime_text) - print(f"{version} : {date}") - releases[version] = date - - return releases - - -print(f"::group::{PRODUCT}") -releases = fetch_releases() -endoflife.write_releases(PRODUCT, dict( - # sort by version then date (asc) - sorted(releases.items(), key=lambda x: (x[0], x[1])) -)) -print("::endgroup::")