Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent).
It makes scripts that need those capabilities simpler, while improving other scripts.

This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
Marc Wrobel
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions

View File

@@ -1,29 +1,31 @@
import json
import urllib.request
from bs4 import BeautifulSoup
import re
from bs4 import BeautifulSoup
from common import endoflife
URL = "https://access.redhat.com/articles/3078"
# https://regex101.com/r/877ibq/1
regex = r"RHEL (?P<major>\d)(\. ?(?P<minor>\d+))?(( Update (?P<minor2>\d))| GA)?"
versions = {}
headers = {"user-agent": "mozilla"}
req = urllib.request.Request(URL, headers=headers)
with urllib.request.urlopen(req, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
for tr in soup.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) > 0:
version = td_list[0].get_text()
m = re.match(regex, version.strip()).groupdict()
version = m["major"]
if m["minor"]:
version += ".%s" % m["minor"]
if m["minor2"]:
version += ".%s" % m["minor2"]
versions[version] = td_list[1].get_text()
print("::group::rhel")
response = endoflife.fetch_url(URL)
soup = BeautifulSoup(response, features="html5lib")
for tr in soup.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) > 0:
version = td_list[0].get_text()
m = re.match(regex, version.strip()).groupdict()
version = m["major"]
if m["minor"]:
version += ".%s" % m["minor"]
if m["minor2"]:
version += ".%s" % m["minor2"]
date = td_list[1].get_text()
versions[version] = date
print("%s: %s" % (version, date))
print("::endgroup::")
with open("releases/redhat.json", "w") as f:
f.write(json.dumps(versions, indent=2))