Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent). It makes scripts that need those capabilities simpler, while improving other scripts. This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions
--- a/src/palo-alto-networks.py
+++ b/src/palo-alto-networks.py
@@ -1,8 +1,8 @@
-import json
-import urllib.request
 import datetime
+import json
 import re
 from bs4 import BeautifulSoup
+from common import endoflife

 URL = "https://www.paloaltonetworks.com/services/support/end-of-life-announcements/end-of-life-summary"

@@ -15,33 +15,37 @@ ID_MAPPING = {

 def update_releases(html_identifier, file):
    versions = {}
-    with urllib.request.urlopen(URL, data=None, timeout=5) as response:
-        soup = BeautifulSoup(response, features="html5lib")
-        table = soup.find(id=html_identifier)
-        for tr in table.findAll("tr")[3:]:
-            td_list = tr.findAll("td")
-            version = (
-                td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "")
-            )
-            if file == "pan-xdr":
-                if "xdr" not in version:
-                    continue
-            version = version.removesuffix("-(cortex-xdr-agent)")
-            version = version.removesuffix("-(vm-series-only)")
-            version = version.removesuffix("-(panorama-only)")
-            if len(td_list) > 1 and version != "":
-                # Date formats differ between different products
-                try:
-                    month, date, year = td_list[1].get_text().split("/")
-                    abs_date = f"{year}-{month:0>2}-{date:0>2}"
-                except Exception:
-                    # A few dates have 1st, 2nd, 4th etc. Fix that:
-                    d = td_list[1].get_text()
-                    d = re.sub(r'(\w+) (\d{1,2})(?:\w{2}), (\d{4})', r'\1 \2, \3', d)
-                    date = datetime.datetime.strptime(d, "%B %d, %Y")
-                    abs_date = date.strftime("%Y-%m-%d")

-                versions[version] = abs_date
+    print(f"::group::{html_identifier}")
+    response = endoflife.fetch_url(URL)
+    soup = BeautifulSoup(response, features="html5lib")
+    table = soup.find(id=html_identifier)
+    for tr in table.findAll("tr")[3:]:
+        td_list = tr.findAll("td")
+        version = (
+            td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "")
+        )
+        if file == "pan-xdr":
+            if "xdr" not in version:
+                continue
+        version = version.removesuffix("-(cortex-xdr-agent)")
+        version = version.removesuffix("-(vm-series-only)")
+        version = version.removesuffix("-(panorama-only)")
+        if len(td_list) > 1 and version != "":
+            # Date formats differ between different products
+            try:
+                month, date, year = td_list[1].get_text().split("/")
+                abs_date = f"{year}-{month:0>2}-{date:0>2}"
+            except Exception:
+                # A few dates have 1st, 2nd, 4th etc. Fix that:
+                d = td_list[1].get_text()
+                d = re.sub(r'(\w+) (\d{1,2})(?:\w{2}), (\d{4})', r'\1 \2, \3', d)
+                date = datetime.datetime.strptime(d, "%B %d, %Y")
+                abs_date = date.strftime("%Y-%m-%d")
+
+            versions[version] = abs_date
+            print("%s: %s" % (version, abs_date))
+    print("::endgroup::")

    with open("releases/%s.json" % file, "w") as f:
        f.write(json.dumps(versions, indent=2))