Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent).
It makes scripts that need those capabilities simpler, while improving other scripts.

This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
Marc Wrobel
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions

View File

@@ -1,11 +1,11 @@
import concurrent.futures
import json
from typing import Tuple
from datetime import datetime
import re
import requests
import urllib.request
from bs4 import BeautifulSoup
import concurrent.futures
from common import endoflife
from datetime import datetime
from typing import Tuple
"""Fetch Firefox versions with their dates from https://www.mozilla.org/en-US/firefox/releases/"""
URL = "https://www.mozilla.org/en-US/firefox/releases/"
@@ -107,22 +107,9 @@ def get_version_and_date(release_page: str, release_version: str) -> Tuple[str,
raise UnsupportedReleasePageError("Unable to find version and date for %s" % release_page)
def make_bs_request(url: str) -> BeautifulSoup:
""" Make a request to the given url and return a BeautifulSoup object """
last_exception = None
headers = {"user-agent": "mozilla"}
# requests to www.mozilla.org often time out, retry in case of failures
for i in range(0, 5):
try:
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req, timeout=5) as response:
return BeautifulSoup(response.read(), features="html5lib")
except TimeoutError as e:
last_exception = e
print(f"Request to {url} timed out, retrying ({i})...")
continue
raise last_exception
response = endoflife.fetch_url(url, timeout=10, retry_count=5)
return BeautifulSoup(response, features="html5lib")
def fetch_releases():
releases = {}