Refactor HTTP URL fetching scripts
This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent). It makes scripts that need those capabilities simpler, while improving other scripts. This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
39
src/rds.py
39
src/rds.py
@@ -1,8 +1,8 @@
|
||||
import re
|
||||
import urllib.request
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
import json
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from common import endoflife
|
||||
from datetime import datetime
|
||||
|
||||
dbs = {
|
||||
"mysql": "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MySQL.Concepts.VersionMgmt.html",
|
||||
@@ -18,24 +18,23 @@ for db, url in dbs.items():
|
||||
print(f"::group::{db}")
|
||||
releases = {}
|
||||
|
||||
with urllib.request.urlopen(url, data=None, timeout=5) as contents:
|
||||
html = contents.read().decode("utf-8")
|
||||
soup = BeautifulSoup(html, features="html5lib")
|
||||
response = endoflife.fetch_url(url)
|
||||
soup = BeautifulSoup(response, features="html5lib")
|
||||
|
||||
for table in soup.find_all("table"):
|
||||
for row in table.find_all("tr"):
|
||||
columns = row.find_all("td")
|
||||
for table in soup.find_all("table"):
|
||||
for row in table.find_all("tr"):
|
||||
columns = row.find_all("td")
|
||||
|
||||
# Must match both the 'Supported XXX minor versions' and
|
||||
# 'Supported XXX major versions' to have correct release dates
|
||||
if len(columns) > 3:
|
||||
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
|
||||
m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
|
||||
if m:
|
||||
version = m.group("v")
|
||||
date = parse_date(columns[2].text.strip())
|
||||
print(f"{version} : {date}")
|
||||
releases[version] = date
|
||||
# Must match both the 'Supported XXX minor versions' and
|
||||
# 'Supported XXX major versions' to have correct release dates
|
||||
if len(columns) > 3:
|
||||
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
|
||||
m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
|
||||
if m:
|
||||
version = m.group("v")
|
||||
date = parse_date(columns[2].text.strip())
|
||||
print(f"{version} : {date}")
|
||||
releases[version] = date
|
||||
|
||||
print("::endgroup::")
|
||||
with open(f"releases/amazon-rds-{db.lower()}.json", "w") as f:
|
||||
|
||||
Reference in New Issue
Block a user