Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent). It makes scripts that need those capabilities simpler, while improving other scripts. This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions
--- a/src/distrowatch.py
+++ b/src/distrowatch.py
@@ -1,10 +1,9 @@
+import json
 import re
 import sys
-import json
-import urllib.request
 from bs4 import BeautifulSoup
-from liquid import Template
 from common import endoflife
+from liquid import Template

 METHOD = 'distrowatch'
 DEFAULT_TAG_TEMPLATE = (  # Same as used in Ruby (update.rb)
@@ -28,15 +27,15 @@ def get_versions_from_headline(regex, headline, template):
 def fetch_releases(distrowatch_id, regex, template):
    releases = {}
    l_template = Template(template)
-    url = "https://distrowatch.com/index.php?distribution=%s" % distrowatch_id
-    with urllib.request.urlopen(url, data=None, timeout=5) as response:
-        soup = BeautifulSoup(response, features="html5lib")
-        for table in soup.select("td.News1>table.News"):
-            headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
-            date = table.select_one("td.NewsDate").get_text()
-            for v in get_versions_from_headline(regex, headline, l_template):
-                print("%s: %s" % (v, date))
-                releases[v] = date
+    url = f"https://distrowatch.com/index.php?distribution={distrowatch_id}"
+    response = endoflife.fetch_url(url)
+    soup = BeautifulSoup(response, features="html5lib")
+    for table in soup.select("td.News1>table.News"):
+        headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
+        date = table.select_one("td.NewsDate").get_text()
+        for v in get_versions_from_headline(regex, headline, l_template):
+            print("%s: %s" % (v, date))
+            releases[v] = date
    return releases