Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent).
It makes scripts that need those capabilities simpler, while improving other scripts.

This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
Marc Wrobel
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions

View File

@@ -1,7 +1,7 @@
import urllib.request
from bs4 import BeautifulSoup
import re
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
# https://regex101.com/r/zPxBqT/1
@@ -9,8 +9,8 @@ REGEX = r"\d.\d+\.\d+-gke\.\d+"
def fetch_channel(channel):
url = "https://cloud.google.com/kubernetes-engine/docs/release-notes-{}".format(channel)
with urllib.request.urlopen(url, data=None, timeout=5) as response:
return BeautifulSoup(response, features="html5lib")
response = endoflife.fetch_url(url)
return BeautifulSoup(response, features="html5lib")
"""
Takes soup, and returns a dictionary of versions and their release dates