From 7c9f3f51464f577d193047c73df625b5fb5ad5b4 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Tue, 2 May 2023 23:23:59 +0200 Subject: [PATCH] [linuxkernel] Use git.kernel.org tags page to retrieve versions (closes #94) Automation for the Linux Kernel was based on the mirror https://github.com/gregkh/linux, which is updated infrequently. The reason for that is documented on https://github.com/endoflife-date/talk/wiki/linux/2ecd6ee9df7647a8083cd4fc3bfd966278fb1122 (e.g. we need the git repository to support partial clone). Using https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags allows us to retrieve new versions much quicker. --- src/linuxkernel.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/linuxkernel.py diff --git a/src/linuxkernel.py b/src/linuxkernel.py new file mode 100644 index 00000000..a82de136 --- /dev/null +++ b/src/linuxkernel.py @@ -0,0 +1,69 @@ +import re +import json +import urllib.request + +from datetime import datetime, timezone +from bs4 import BeautifulSoup + +"""Fetch Linux Kernel versions with their dates from +https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags. + +Ideally we would want to use the kernel.org git repository directly +(https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/), but it does +not support partial clone so we cannot. +""" + +PRODUCT = "linuxkernel" +URL = "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags" + + +# Parse date with format 2023-05-01 08:32:34 +0900 and convert to UTC +def parse_date(d): + return datetime.strptime(d, "%Y-%m-%d %H:%M:%S %z")\ + .astimezone(timezone.utc)\ + .strftime("%Y-%m-%d") + + +def make_bs_request(url): + req = urllib.request.Request(url) + with urllib.request.urlopen(req, timeout=5) as response: + return BeautifulSoup(response.read(), features="html5lib") + + +def fetch_releases(): + releases = {} + + soup = make_bs_request(URL) + for table in soup.find_all("table", class_='list'): + for row in table.find_all("tr"): + columns = row.find_all("td") + if len(columns) == 4: + version_text = columns[0].text.strip() + datetime_td = columns[3].find_next('span') + datetime_text = datetime_td.attrs['title'] if datetime_td else None + if version_text.startswith('v') and datetime_text: + r = r"v(?P\d+(?:\.\d+)*)$" + m = re.search(r, version_text, flags=re.IGNORECASE) + if m: + version = m.group("v") + date = parse_date(datetime_text) + print(f"{version} : {date}") + releases[version] = date + + return releases + + +def main(): + print(f"::group::{PRODUCT}") + releases = fetch_releases() + print("::endgroup::") + + with open(f"releases/{PRODUCT}.json", "w") as f: + f.write(json.dumps(dict( + # sort by version then date (asc) + sorted(releases.items(), key=lambda x: (x[0], x[1])) + ), indent=2)) + + +if __name__ == '__main__': + main()