Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent).
It makes scripts that need those capabilities simpler, while improving other scripts.

This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
Marc Wrobel
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions

View File

@@ -1,8 +1,8 @@
import re
import urllib.request
from bs4 import BeautifulSoup
from datetime import datetime
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
dbs = {
"mysql": "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MySQL.Concepts.VersionMgmt.html",
@@ -18,24 +18,23 @@ for db, url in dbs.items():
print(f"::group::{db}")
releases = {}
with urllib.request.urlopen(url, data=None, timeout=5) as contents:
html = contents.read().decode("utf-8")
soup = BeautifulSoup(html, features="html5lib")
response = endoflife.fetch_url(url)
soup = BeautifulSoup(response, features="html5lib")
for table in soup.find_all("table"):
for row in table.find_all("tr"):
columns = row.find_all("td")
for table in soup.find_all("table"):
for row in table.find_all("tr"):
columns = row.find_all("td")
# Must match both the 'Supported XXX minor versions' and
# 'Supported XXX major versions' to have correct release dates
if len(columns) > 3:
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
if m:
version = m.group("v")
date = parse_date(columns[2].text.strip())
print(f"{version} : {date}")
releases[version] = date
# Must match both the 'Supported XXX minor versions' and
# 'Supported XXX major versions' to have correct release dates
if len(columns) > 3:
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
if m:
version = m.group("v")
date = parse_date(columns[2].text.strip())
print(f"{version} : {date}")
releases[version] = date
print("::endgroup::")
with open(f"releases/amazon-rds-{db.lower()}.json", "w") as f: