Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent). It makes scripts that need those capabilities simpler, while improving other scripts. This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions
--- a/src/rds.py
+++ b/src/rds.py
@@ -1,8 +1,8 @@
-import re
-import urllib.request
-from bs4 import BeautifulSoup
-from datetime import datetime
 import json
+import re
+from bs4 import BeautifulSoup
+from common import endoflife
+from datetime import datetime

 dbs = {
    "mysql": "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MySQL.Concepts.VersionMgmt.html",
@@ -18,24 +18,23 @@ for db, url in dbs.items():
    print(f"::group::{db}")
    releases = {}

-    with urllib.request.urlopen(url, data=None, timeout=5) as contents:
-        html = contents.read().decode("utf-8")
-        soup = BeautifulSoup(html, features="html5lib")
+    response = endoflife.fetch_url(url)
+    soup = BeautifulSoup(response, features="html5lib")

-        for table in soup.find_all("table"):
-            for row in table.find_all("tr"):
-                columns = row.find_all("td")
+    for table in soup.find_all("table"):
+        for row in table.find_all("tr"):
+            columns = row.find_all("td")

-                # Must match both the 'Supported XXX minor versions' and
-                # 'Supported XXX major versions' to have correct release dates
-                if len(columns) > 3:
-                    r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
-                    m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
-                    if m:
-                        version = m.group("v")
-                        date = parse_date(columns[2].text.strip())
-                        print(f"{version} : {date}")
-                        releases[version] = date
+            # Must match both the 'Supported XXX minor versions' and
+            # 'Supported XXX major versions' to have correct release dates
+            if len(columns) > 3:
+                r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
+                m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
+                if m:
+                    version = m.group("v")
+                    date = parse_date(columns[2].text.strip())
+                    print(f"{version} : {date}")
+                    releases[version] = date

    print("::endgroup::")
    with open(f"releases/amazon-rds-{db.lower()}.json", "w") as f: