[linuxkernel] Use git.kernel.org tags page to retrieve versions (closes #94)

Automation for the Linux Kernel was based on the mirror https://github.com/gregkh/linux, which is updated infrequently. The reason for that is documented on 2ecd6ee9df (e.g. we need the git repository to support partial clone). Using https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags allows us to retrieve new versions much quicker.
2023-05-02 23:23:59 +02:00
parent 07a6765996
commit 7c9f3f5146
1 changed files with 69 additions and 0 deletions
--- a/src/linuxkernel.py
+++ b/src/linuxkernel.py
@@ -0,0 +1,69 @@
 import re
 import json
 import urllib.request
 from datetime import datetime, timezone
 from bs4 import BeautifulSoup
 """Fetch Linux Kernel versions with their dates from
 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags.
 Ideally we would want to use the kernel.org git repository directly
 (https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/), but it does
 not support partial clone so we cannot.
 """
 PRODUCT = "linuxkernel"
 URL = "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags"
 # Parse date with format 2023-05-01 08:32:34 +0900 and convert to UTC
 def parse_date(d):
    return datetime.strptime(d, "%Y-%m-%d %H:%M:%S %z")\
        .astimezone(timezone.utc)\
        .strftime("%Y-%m-%d")
 def make_bs_request(url):
    req = urllib.request.Request(url)
    with urllib.request.urlopen(req, timeout=5) as response:
        return BeautifulSoup(response.read(), features="html5lib")
 def fetch_releases():
    releases = {}
    soup = make_bs_request(URL)
    for table in soup.find_all("table", class_='list'):
        for row in table.find_all("tr"):
            columns = row.find_all("td")
            if len(columns) == 4:
                version_text = columns[0].text.strip()
                datetime_td = columns[3].find_next('span')
                datetime_text = datetime_td.attrs['title'] if datetime_td else None
                if version_text.startswith('v') and datetime_text:
                    r = r"v(?P<v>\d+(?:\.\d+)*)$"
                    m = re.search(r, version_text, flags=re.IGNORECASE)
                    if m:
                        version = m.group("v")
                        date = parse_date(datetime_text)
                        print(f"{version} : {date}")
                        releases[version] = date
    return releases
 def main():
    print(f"::group::{PRODUCT}")
    releases = fetch_releases()
    print("::endgroup::")
    with open(f"releases/{PRODUCT}.json", "w") as f:
        f.write(json.dumps(dict(
            # sort by version then date (asc)
            sorted(releases.items(), key=lambda x: (x[0], x[1]))
        ), indent=2))
 if __name__ == '__main__':
    main()