Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent). It makes scripts that need those capabilities simpler, while improving other scripts. This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions
--- a/src/common/endoflife.py
+++ b/src/common/endoflife.py
@@ -1,10 +1,14 @@
 import frontmatter
+import urllib.request

 from glob import glob
 from os import path

+# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent.
+USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0'

-def list_products(method, products_filter=None, pathname = "website/products"):
+
+def list_products(method, products_filter=None, pathname="website/products"):
    """Return a list of products that are using the same given update method.
    """
    products_with_method = {}
@@ -25,3 +29,21 @@ def list_products(method, products_filter=None, pathname = "website/products"):
                    products_with_method[product_name] = configs

    return products_with_method
+
+
+def fetch_url(url, retry_count=2, timeout=5, data=None, headers=None, encoding='utf-8'):
+    last_exception = None
+
+    headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers
+    request = urllib.request.Request(url, headers=headers)
+
+    for retry in range(0, retry_count):
+        try:
+            resp = urllib.request.urlopen(request, data=data, timeout=timeout)
+            return resp.read().decode(encoding)
+        except Exception as e:
+            last_exception = e
+            print(f"Fetch of {url} failed (retry={retry}), got: " + str(e))
+            continue
+
+    raise last_exception