Align custom scripts with generic scripts (#445)

Align custom scripts with generic scripts, making them configurable. This has a few advantages: - script code is more unified, - no more hard-coded method names in scripts, which is less error prone and make it easier to rename scripts, - no more hard coded product names in scripts, which is less error prone and make it easier to rename products, - less hard-coded URLs and regexes in scripts, which makes auto-configuration more expressive / updatable, Also added method `endoflife.list_configs_from_argv()` so that it is easier to manipulate scripts arguments.
2025-06-07 12:41:59 +02:00
parent 60a62e4696
commit f404274310
63 changed files with 704 additions and 830 deletions
--- a/src/apache-subversion.py
+++ b/src/apache-subversion.py
@@ -1,33 +1,21 @@
 import logging
-import re

 from bs4 import BeautifulSoup
-from common import dates, http, releasedata
+from common import dates, endoflife, http, releasedata

-# https://regex101.com/r/k4i7EO/1 only non beta versions
-VERSION_PATTERN = re.compile(r"^Subversion\s(?P<version>[1-9]\d*.\d+\.\d+)$")
-# https://regex101.com/r/GsimYd/2
-DATE_PATTERN = re.compile(r"^\((?P<date>\w+,\s\d{1,2}\s\w+\s\d{4})")
+for config in endoflife.list_configs_from_argv():
+    with releasedata.ProductData(config.product) as product_data:
+        response = http.fetch_url(config.url)
+        soup = BeautifulSoup(response.text, features="html5lib")

-with releasedata.ProductData("apache-subversion") as product_data:
-    relnotes = http.fetch_url("https://subversion.apache.org/docs/release-notes/release-history.html")
-    relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib")
+        ul = soup.find("h2").find_next("ul")
+        for li in ul.find_all("li"):
+            text = li.get_text(strip=True)
+            match = config.first_match(text)
+            if not match:
+                logging.info(f"Skipping {text}, does not match any regex")
+                continue

-    ul = relnotes_soup.find("h2").find_next("ul")
-    for li in ul.find_all("li"):
-        b = li.find_next("b") # b contains the version
-        version_text = b.get_text(strip=True)
-        version_match = VERSION_PATTERN.match(version_text)
-        if not version_match:
-            logging.info(f"Skipping {version_text}, does not match version regex")
-            continue
-
-        remaining_part_str = b.next_sibling.get_text(strip=True)
-        date_match = DATE_PATTERN.match(remaining_part_str)
-        if not date_match:
-            logging.info(f"Skipping {version_text}, no matching date in '{remaining_part_str}'")
-            continue
-
-        version = version_match.group("version")
-        date = dates.parse_date(date_match.group("date"))
-        product_data.declare_version(version, date)
+            version = match.group("version")
+            date = dates.parse_date(match.group("date"))
+            product_data.declare_version(version, date)