Refactor HTTP URL fetching scripts
This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent). It makes scripts that need those capabilities simpler, while improving other scripts. This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.request
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import endoflife
|
||||
|
||||
"""Fetch HAProxy versions with their dates from https://www.haproxy.org/download/.
|
||||
"""
|
||||
@@ -16,16 +15,13 @@ VERSION_REGEX = r"^(\d{4})\/(\d{2})\/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$"
|
||||
def fetch_cycles():
|
||||
cycles = []
|
||||
|
||||
print("Fetching cycles")
|
||||
with urllib.request.urlopen(
|
||||
"https://www.haproxy.org/download/") as response:
|
||||
soup = BeautifulSoup(response, features="html5lib")
|
||||
for link in soup.select("a"):
|
||||
m = re.match(CYCLE_REGEX, link.attrs["href"])
|
||||
if m:
|
||||
cycle = m.groups()[0]
|
||||
cycles.append(cycle)
|
||||
print(f"Found {cycle}")
|
||||
response = endoflife.fetch_url('https://www.haproxy.org/download/')
|
||||
soup = BeautifulSoup(response, features="html5lib")
|
||||
for link in soup.select("a"):
|
||||
m = re.match(CYCLE_REGEX, link.attrs["href"])
|
||||
if m:
|
||||
cycle = m.groups()[0]
|
||||
cycles.append(cycle)
|
||||
|
||||
# No changelog in https://www.haproxy.org/download/1.0/src
|
||||
cycles.remove("1.0")
|
||||
@@ -38,14 +34,13 @@ def fetch_releases(cycles):
|
||||
|
||||
for cycle in cycles:
|
||||
url = f"https://www.haproxy.org/download/{cycle}/src/CHANGELOG"
|
||||
print(f"Fetching version from {url}")
|
||||
with urllib.request.urlopen(url) as response:
|
||||
for line in response:
|
||||
m = re.match(VERSION_REGEX, line.decode("utf-8"))
|
||||
if m:
|
||||
year, month, day, version = m.groups()
|
||||
date = f"{year}-{month}-{day}"
|
||||
releases[version] = date
|
||||
response = endoflife.fetch_url(url)
|
||||
for line in response.split('\n'):
|
||||
m = re.match(VERSION_REGEX, line)
|
||||
if m:
|
||||
year, month, day, version = m.groups()
|
||||
date = f"{year}-{month}-{day}"
|
||||
releases[version] = date
|
||||
|
||||
return releases
|
||||
|
||||
|
||||
Reference in New Issue
Block a user