Align custom scripts with generic scripts (#445)

Align custom scripts with generic scripts, making them configurable. This has a few advantages: - script code is more unified, - no more hard-coded method names in scripts, which is less error prone and make it easier to rename scripts, - no more hard coded product names in scripts, which is less error prone and make it easier to rename products, - less hard-coded URLs and regexes in scripts, which makes auto-configuration more expressive / updatable, Also added method `endoflife.list_configs_from_argv()` so that it is easier to manipulate scripts arguments.
2025-06-07 12:41:59 +02:00
parent 60a62e4696
commit f404274310
63 changed files with 704 additions and 830 deletions
--- a/src/cos.py
+++ b/src/cos.py
@@ -2,7 +2,7 @@ import datetime
 import re

 from bs4 import BeautifulSoup
-from common import dates, http, releasedata
+from common import dates, endoflife, http, releasedata

 MILESTONE_PATTERN = re.compile(r'COS \d+ LTS')
 VERSION_PATTERN = re.compile(r"^(cos-\d+-\d+-\d+-\d+)")
@@ -14,30 +14,31 @@ def parse_date(date_text: str) -> datetime:
    return dates.parse_date(date_text)


-with releasedata.ProductData("cos") as product_data:
-    main = http.fetch_url("https://cloud.google.com/container-optimized-os/docs/release-notes/")
-    main_soup = BeautifulSoup(main.text, features="html5lib")
-    milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)]
+for config in endoflife.list_configs_from_argv():
+    with releasedata.ProductData(config.product) as product_data:
+        main = http.fetch_url(config.url)
+        main_soup = BeautifulSoup(main.text, features="html5lib")
+        milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)]

-    milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones]
-    for milestone in http.fetch_urls(milestones_urls):
-        milestone_soup = BeautifulSoup(milestone.text, features="html5lib")
-        for article in milestone_soup.find_all('article', class_='devsite-article'):
-            for heading in article.find_all(['h2', 'h3']):  # headings contains the date, which we parse
-                version_str = heading.get('data-text')
-                version_match = VERSION_PATTERN.match(version_str)
-                if not version_match:
-                    continue
+        milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones]
+        for milestone in http.fetch_urls(milestones_urls):
+            milestone_soup = BeautifulSoup(milestone.text, features="html5lib")
+            for article in milestone_soup.find_all('article', class_='devsite-article'):
+                for heading in article.find_all(['h2', 'h3']):  # headings contains the date, which we parse
+                    version_str = heading.get('data-text')
+                    version_match = VERSION_PATTERN.match(version_str)
+                    if not version_match:
+                        continue

-                try:  # 1st row is the header, so pick the first td in the 2nd row
-                    date_str = heading.find_next('tr').find_next('tr').find_next('td').text
-                except AttributeError:  # In some older releases, it is mentioned as Date: [Date]
-                    date_str = heading.find_next('i').text
+                    try:  # 1st row is the header, so pick the first td in the 2nd row
+                        date_str = heading.find_next('tr').find_next('tr').find_next('td').text
+                    except AttributeError:  # In some older releases, it is mentioned as Date: [Date]
+                        date_str = heading.find_next('i').text

-                try:
-                    date = parse_date(date_str)
-                except ValueError:  # for some h3, the date is in the previous h2
-                    date_str = heading.find_previous('h2').get('data-text')
-                    date = parse_date(date_str)
+                    try:
+                        date = parse_date(date_str)
+                    except ValueError:  # for some h3, the date is in the previous h2
+                        date_str = heading.find_previous('h2').get('data-text')
+                        date = parse_date(date_str)

-                product_data.declare_version(version_match.group(1), date)
+                    product_data.declare_version(version_match.group(1), date)