Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent). It makes scripts that need those capabilities simpler, while improving other scripts. This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions
--- a/src/gke.py
+++ b/src/gke.py
@@ -1,7 +1,7 @@
-import urllib.request
-from bs4 import BeautifulSoup
-import re
 import json
+import re
+from bs4 import BeautifulSoup
+from common import endoflife
 from datetime import datetime

 # https://regex101.com/r/zPxBqT/1
@@ -9,8 +9,8 @@ REGEX = r"\d.\d+\.\d+-gke\.\d+"

 def fetch_channel(channel):
    url = "https://cloud.google.com/kubernetes-engine/docs/release-notes-{}".format(channel)
-    with urllib.request.urlopen(url, data=None, timeout=5) as response:
-        return BeautifulSoup(response, features="html5lib")
+    response = endoflife.fetch_url(url)
+    return BeautifulSoup(response, features="html5lib")

 """
 Takes soup, and returns a dictionary of versions and their release dates