[linuxkernel] Use git.kernel.org tags page to retrieve versions (closes #94)

Automation for the Linux Kernel was based on the mirror https://github.com/gregkh/linux, which is updated infrequently. The reason for that is documented on 2ecd6ee9df (e.g. we need the git repository to support partial clone).

Using https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags allows us to retrieve new versions much quicker.
This commit is contained in:
Marc Wrobel
2023-05-02 23:23:59 +02:00
committed by Nemo
parent 07a6765996
commit 7c9f3f5146

69
src/linuxkernel.py Normal file
View File

@@ -0,0 +1,69 @@
import re
import json
import urllib.request
from datetime import datetime, timezone
from bs4 import BeautifulSoup
"""Fetch Linux Kernel versions with their dates from
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags.
Ideally we would want to use the kernel.org git repository directly
(https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/), but it does
not support partial clone so we cannot.
"""
PRODUCT = "linuxkernel"
URL = "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags"
# Parse date with format 2023-05-01 08:32:34 +0900 and convert to UTC
def parse_date(d):
return datetime.strptime(d, "%Y-%m-%d %H:%M:%S %z")\
.astimezone(timezone.utc)\
.strftime("%Y-%m-%d")
def make_bs_request(url):
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=5) as response:
return BeautifulSoup(response.read(), features="html5lib")
def fetch_releases():
releases = {}
soup = make_bs_request(URL)
for table in soup.find_all("table", class_='list'):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) == 4:
version_text = columns[0].text.strip()
datetime_td = columns[3].find_next('span')
datetime_text = datetime_td.attrs['title'] if datetime_td else None
if version_text.startswith('v') and datetime_text:
r = r"v(?P<v>\d+(?:\.\d+)*)$"
m = re.search(r, version_text, flags=re.IGNORECASE)
if m:
version = m.group("v")
date = parse_date(datetime_text)
print(f"{version} : {date}")
releases[version] = date
return releases
def main():
print(f"::group::{PRODUCT}")
releases = fetch_releases()
print("::endgroup::")
with open(f"releases/{PRODUCT}.json", "w") as f:
f.write(json.dumps(dict(
# sort by version then date (asc)
sorted(releases.items(), key=lambda x: (x[0], x[1]))
), indent=2))
if __name__ == '__main__':
main()