Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent).
It makes scripts that need those capabilities simpler, while improving other scripts.

This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
Marc Wrobel
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions

View File

@@ -1,8 +1,7 @@
import json
import re
import urllib.request
from bs4 import BeautifulSoup
from common import endoflife
"""Fetch HAProxy versions with their dates from https://www.haproxy.org/download/.
"""
@@ -16,16 +15,13 @@ VERSION_REGEX = r"^(\d{4})\/(\d{2})\/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$"
def fetch_cycles():
cycles = []
print("Fetching cycles")
with urllib.request.urlopen(
"https://www.haproxy.org/download/") as response:
soup = BeautifulSoup(response, features="html5lib")
for link in soup.select("a"):
m = re.match(CYCLE_REGEX, link.attrs["href"])
if m:
cycle = m.groups()[0]
cycles.append(cycle)
print(f"Found {cycle}")
response = endoflife.fetch_url('https://www.haproxy.org/download/')
soup = BeautifulSoup(response, features="html5lib")
for link in soup.select("a"):
m = re.match(CYCLE_REGEX, link.attrs["href"])
if m:
cycle = m.groups()[0]
cycles.append(cycle)
# No changelog in https://www.haproxy.org/download/1.0/src
cycles.remove("1.0")
@@ -38,14 +34,13 @@ def fetch_releases(cycles):
for cycle in cycles:
url = f"https://www.haproxy.org/download/{cycle}/src/CHANGELOG"
print(f"Fetching version from {url}")
with urllib.request.urlopen(url) as response:
for line in response:
m = re.match(VERSION_REGEX, line.decode("utf-8"))
if m:
year, month, day, version = m.groups()
date = f"{year}-{month}-{day}"
releases[version] = date
response = endoflife.fetch_url(url)
for line in response.split('\n'):
m = re.match(VERSION_REGEX, line)
if m:
year, month, day, version = m.groups()
date = f"{year}-{month}-{day}"
releases[version] = date
return releases