From 7c9f3f51464f577d193047c73df625b5fb5ad5b4 Mon Sep 17 00:00:00 2001
From: Marc Wrobel <marc.wrobel@gmail.com>
Date: Tue, 2 May 2023 23:23:59 +0200
Subject: [PATCH] [linuxkernel] Use git.kernel.org tags page to retrieve
 versions (closes #94)

Automation for the Linux Kernel was based on the mirror https://github.com/gregkh/linux, which is updated infrequently. The reason for that is documented on https://github.com/endoflife-date/talk/wiki/linux/2ecd6ee9df7647a8083cd4fc3bfd966278fb1122 (e.g. we need the git repository to support partial clone).

Using https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags allows us to retrieve new versions much quicker.
---
 src/linuxkernel.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 src/linuxkernel.py
diff --git a/src/linuxkernel.py b/src/linuxkernel.py
new file mode 100644
index 00000000..a82de136
--- /dev/null
+++ b/src/linuxkernel.py
@@ -0,0 +1,69 @@
+import re
+import json
+import urllib.request
+
+from datetime import datetime, timezone
+from bs4 import BeautifulSoup
+
+"""Fetch Linux Kernel versions with their dates from
+https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags.
+
+Ideally we would want to use the kernel.org git repository directly
+(https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/), but it does
+not support partial clone so we cannot.
+"""
+
+PRODUCT = "linuxkernel"
+URL = "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags"
+
+
+# Parse date with format 2023-05-01 08:32:34 +0900 and convert to UTC
+def parse_date(d):
+    return datetime.strptime(d, "%Y-%m-%d %H:%M:%S %z")\
+        .astimezone(timezone.utc)\
+        .strftime("%Y-%m-%d")
+
+
+def make_bs_request(url):
+    req = urllib.request.Request(url)
+    with urllib.request.urlopen(req, timeout=5) as response:
+        return BeautifulSoup(response.read(), features="html5lib")
+
+
+def fetch_releases():
+    releases = {}
+
+    soup = make_bs_request(URL)
+    for table in soup.find_all("table", class_='list'):
+        for row in table.find_all("tr"):
+            columns = row.find_all("td")
+            if len(columns) == 4:
+                version_text = columns[0].text.strip()
+                datetime_td = columns[3].find_next('span')
+                datetime_text = datetime_td.attrs['title'] if datetime_td else None
+                if version_text.startswith('v') and datetime_text:
+                    r = r"v(?P<v>\d+(?:\.\d+)*)$"
+                    m = re.search(r, version_text, flags=re.IGNORECASE)
+                    if m:
+                        version = m.group("v")
+                        date = parse_date(datetime_text)
+                        print(f"{version} : {date}")
+                        releases[version] = date
+
+    return releases
+
+
+def main():
+    print(f"::group::{PRODUCT}")
+    releases = fetch_releases()
+    print("::endgroup::")
+
+    with open(f"releases/{PRODUCT}.json", "w") as f:
+        f.write(json.dumps(dict(
+            # sort by version then date (asc)
+            sorted(releases.items(), key=lambda x: (x[0], x[1]))
+        ), indent=2))
+
+
+if __name__ == '__main__':
+    main()