Improve HTTP handling in scripts (#449)

Provide some helper methods to hide the complexity of parsing HTML, JSON, YAML, XML or Markdown.
2025-06-28 11:46:04 +02:00
parent fda4967c38
commit 312ce078bb
43 changed files with 103 additions and 137 deletions
--- a/src/amazon-eks.py
+++ b/src/amazon-eks.py
@@ -1,6 +1,5 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches EKS versions from AWS docs.
@@ -8,8 +7,8 @@ Now that AWS no longer publishes docs on GitHub, we use the Web Archive to get t
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
-        html = BeautifulSoup(response.text, features="html5lib")
+
        for tr in html.select("#main-col-body")[0].findAll("tr"):
            cells = tr.findAll("td")
            if not cells:
--- a/src/amazon-neptune.py
+++ b/src/amazon-neptune.py
@@ -1,5 +1,4 @@
 import logging
 import xml.dom.minidom
 from common import dates, endoflife, http, releasedata
@@ -7,8 +6,7 @@ from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        rss_response = http.fetch_url(config.url)
+        rss = http.fetch_xml(config.url)
        rss = xml.dom.minidom.parseString(rss_response.text)
        for entry in rss.getElementsByTagName("item"):
            version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue
--- a/src/apache-subversion.py
+++ b/src/apache-subversion.py
@@ -1,14 +1,12 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        ul = soup.find("h2").find_next("ul")
+        ul = html.find("h2").find_next("ul")
        for li in ul.find_all("li"):
            text = li.get_text(strip=True)
            match = config.first_match(text)
--- a/src/aws-lambda.py
+++ b/src/aws-lambda.py
@@ -1,16 +1,14 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches AWS lambda runtimes with their support / EOL dates from https://docs.aws.amazon.com."""
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for i, table in enumerate(soup.find_all("table")):
+        for i, table in enumerate(html.find_all("table")):
            headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")]
            if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers:
                logging.info(f"table with header '{headers}' does not contain all the expected headers")
--- a/src/cgit.py
+++ b/src/cgit.py
@@ -1,4 +1,3 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches versions from repositories managed with cgit, such as the Linux kernel repository.
@@ -6,10 +5,9 @@ Ideally we would want to use the git repository directly, but cgit-managed repos
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url + '/refs/tags')
+        html = http.fetch_html(config.url + '/refs/tags')
        soup = BeautifulSoup(response.text, features="html5lib")
-        for table in soup.find_all("table", class_="list"):
+        for table in html.find_all("table", class_="list"):
            for row in table.find_all("tr"):
                columns = row.find_all("td")
                if len(columns) != 4:
--- a/src/chef-infra.py
+++ b/src/chef-infra.py
@@ -1,4 +1,3 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 from common.git import Git
@@ -10,9 +9,8 @@ More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discu
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        rn_response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
-        rn_soup = BeautifulSoup(rn_response.text, features="html5lib")
+        released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
        released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
        git = Git(config.data.get('repository'))
        git.setup(bare=True)
--- a/src/chef-inspec.py
+++ b/src/chef-inspec.py
@@ -1,4 +1,3 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, github, http, releasedata
 """Fetch released versions from docs.chef.io and retrieve their date from GitHub.
@@ -9,9 +8,8 @@ More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discu
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        rn_response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
-        rn_soup = BeautifulSoup(rn_response.text, features="html5lib")
+        released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
        released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
        for release in github.fetch_releases("inspec/inspec"):
            sanitized_version = release.tag_name.replace("v", "")
--- a/src/coldfusion.py
+++ b/src/coldfusion.py
@@ -1,6 +1,5 @@
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches versions from Adobe ColdFusion release notes on helpx.adobe.com.
@@ -24,10 +23,9 @@ FIXED_VERSIONS = {
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        changelog = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        changelog_soup = BeautifulSoup(changelog.text, features="html5lib")
-        for p in changelog_soup.findAll("div", class_="text"):
+        for p in html.findAll("div", class_="text"):
            version_and_date_str = p.get_text().strip().replace('\xa0', ' ')
            for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str):
                date = dates.parse_date(date_str)
--- a/src/common/http.py
+++ b/src/common/http.py
@@ -1,6 +1,12 @@
 import logging
 import xml.dom.minidom
 from concurrent.futures import as_completed
 from xml.dom.minidom import Document
 import mwparserfromhell
 import yaml
 from bs4 import BeautifulSoup
 from mwparserfromhell.wikicode import Wikicode
 from playwright.sync_api import sync_playwright
 from requests import Response
 from requests.adapters import HTTPAdapter
@@ -47,6 +53,31 @@ def fetch_url(url: str, data: any = None, headers: dict[str, str] = None,
              max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Response:
    return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0]
 def fetch_html(url: str, data: any = None, headers: dict[str, str] = None,
               max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30,
               features: str = "html5lib") -> BeautifulSoup:
    response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
    return BeautifulSoup(response.text, features=features)
 def fetch_json(url: str, data: any = None, headers: dict[str, str] = None,
              max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Document:
    response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
    return response.json()
 def fetch_yaml(url: str, data: any = None, headers: dict[str, str] = None,
               max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> any:
    response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
    return yaml.safe_load(response.text)
 def fetch_xml(url: str, data: any = None, headers: dict[str, str] = None,
              max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Document:
    response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
    return xml.dom.minidom.parseString(response.text)
 def fetch_markdown(url: str, data: any = None, headers: dict[str, str] = None,
              max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Wikicode:
    response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
    return mwparserfromhell.parse(response.text)
 # This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright.
 def fetch_javascript_url(url: str, click_selector: str = None, wait_until: str = None) -> str:
--- a/src/couchbase-server.py
+++ b/src/couchbase-server.py
@@ -18,10 +18,9 @@ MANUAL_VERSIONS = {
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        main = http.fetch_url(f"{config.url}/current/install/install-intro.html")
+        html = http.fetch_html(f"{config.url}/current/install/install-intro.html")
        main_soup = BeautifulSoup(main.text, features="html5lib")
-        minor_versions = [options.attrs["value"] for options in main_soup.find(class_="version_list").find_all("option")]
+        minor_versions = [options.attrs["value"] for options in html.find(class_="version_list").find_all("option")]
        minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions]
        for minor_version in http.fetch_urls(minor_version_urls):
--- a/src/distrowatch.py
+++ b/src/distrowatch.py
@@ -1,12 +1,10 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}")
+        html = http.fetch_html(f"https://distrowatch.com/index.php?distribution={config.url}")
        soup = BeautifulSoup(response.text, features="html5lib")
-        for table in soup.select("td.News1>table.News"):
+        for table in html.select("td.News1>table.News"):
            headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
            versions_match = config.first_match(headline)
            if not versions_match:
--- a/src/docker_hub.py
+++ b/src/docker_hub.py
@@ -5,7 +5,7 @@ from common import dates, endoflife, http, releasedata
 Unfortunately images creation date cannot be retrieved, so we had to use the tag_last_pushed field instead."""
 def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None:
-    data = http.fetch_url(url).json()
+    data = http.fetch_json(url)
    for result in data["results"]:
        version_str = result["name"]
--- a/src/google-kubernetes-engine.py
+++ b/src/google-kubernetes-engine.py
@@ -1,6 +1,5 @@
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 # https://regex101.com/r/zPxBqT/1
@@ -15,10 +14,9 @@ URL_BY_PRODUCT = {
 for config in endoflife.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons
    for product_name, url in URL_BY_PRODUCT.items():
        with releasedata.ProductData(product_name) as product_data:
-            relnotes = http.fetch_url(url)
+            html = http.fetch_html(url)
            relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib")
-            for section in relnotes_soup.find_all('section', class_='releases'):
+            for section in html.find_all('section', class_='releases'):
                for h2 in section.find_all('h2'):  # h2 contains the date
                    date = dates.parse_date(h2.get('data-text'))
--- a/src/graalvm.py
+++ b/src/graalvm.py
@@ -1,12 +1,10 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        html = BeautifulSoup(response.text, features="html5lib")
        table_selector = config.data.get("table_selector", "#previous-releases + table").strip()
        date_column = config.data.get("date_column", "Date").strip().lower()
        versions_column = config.data.get("versions_column").strip().lower()
--- a/src/haproxy.py
+++ b/src/haproxy.py
@@ -1,6 +1,5 @@
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$")
@@ -9,10 +8,9 @@ DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
        # First, get all minor releases from the download page
-        download = http.fetch_url(config.url)
+        download_html = http.fetch_html(config.url)
        download_soup = BeautifulSoup(download.text, features="html5lib")
        minor_versions = []
-        for link in download_soup.select("a"):
+        for link in download_html.select("a"):
            minor_version_match = CYCLE_PATTERN.match(link.attrs["href"])
            if not minor_version_match:
                continue
--- a/src/ibm-aix.py
+++ b/src/ibm-aix.py
@@ -1,12 +1,10 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        page = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        page_soup = BeautifulSoup(page.text, features="html5lib")
-        for release_table in page_soup.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"):
+        for release_table in html.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"):
            for row in release_table.find_all("tr")[1:]:  # for all rows except the header
                cells = row.find_all("td")
                version = cells[0].text.strip("AIX ").replace(' TL', '.')
--- a/src/kuma.py
+++ b/src/kuma.py
@@ -1,6 +1,5 @@
 import logging
 import yaml
 from common import dates, endoflife, http, releasedata
 """Fetch version data for Kuma from https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml.
@@ -12,8 +11,7 @@ EOL_FIELD = 'endOfLifeDate'
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        yml_response = http.fetch_url(config.url)
+        versions_data = http.fetch_yaml(config.url)
        versions_data = yaml.safe_load(yml_response.text)
        # Iterate through the versions and their associated dates
        for version_info in versions_data:
--- a/src/libreoffice.py
+++ b/src/libreoffice.py
@@ -1,16 +1,14 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/"""
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for table in soup.find_all("table"):
+        for table in html.find_all("table"):
            for row in table.find_all("tr")[1:]:
                cells = row.find_all("td")
                if len(cells) < 4:
--- a/src/looker.py
+++ b/src/looker.py
@@ -1,5 +1,4 @@
 import re
 import xml.dom.minidom
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
@@ -11,8 +10,7 @@ ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IG
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        rss = http.fetch_xml(config.url)
        rss = xml.dom.minidom.parseString(response.text)
        for item in rss.getElementsByTagName("entry"):
            content = item.getElementsByTagName("content")[0].firstChild.nodeValue
--- a/src/lua.py
+++ b/src/lua.py
@@ -1,6 +1,5 @@
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches Lua releases from lua.org."""
@@ -10,9 +9,8 @@ VERSION_PATTERN = re.compile(r"(?P<version>\d+\.\d+\.\d+),\s*released\s*on\s*(?P
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        page = http.fetch_url(config.url)
+        html = http.fetch_html(config.url, features = 'html.parser')
-        soup = BeautifulSoup(page.text, 'html.parser')
+        page_text = html.text # HTML is broken, no way to parse it with beautifulsoup
        page_text = soup.text # HTML is broken, no way to parse it with beautifulsoup
        for release_match in RELEASED_AT_PATTERN.finditer(page_text):
            release = release_match.group('release')
--- a/src/maven.py
+++ b/src/maven.py
@@ -9,7 +9,7 @@ for config in endoflife.list_configs_from_argv():
        while True:
            url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100"
-            data = http.fetch_url(url).json()
+            data = http.fetch_json(url)
            for row in data["response"]["docs"]:
                version_match = config.first_match(row["v"])
--- a/src/netbsd.py
+++ b/src/netbsd.py
@@ -1,16 +1,14 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches NetBSD versions and EOL information from https://www.netbsd.org/."""
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for row in soup.select('table tbody tr'):
+        for row in html.select('table tbody tr'):
            cells = [cell.get_text(strip=True) for cell in row.select('td')]
            version = cells[0]
--- a/src/npm.py
+++ b/src/npm.py
@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json()
+        data = http.fetch_json(f"https://registry.npmjs.org/{config.url}")
        for version_str in data["versions"]:
            version_match = config.first_match(version_str)
            if version_match:
--- a/src/nutanix.py
+++ b/src/nutanix.py
@@ -4,8 +4,8 @@ from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        url = f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}"
+        data = http.fetch_json(f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}")
-        data = http.fetch_url(url).json()
+
        for version_data in data["contents"]:
            release_name = '.'.join(version_data["version"].split(".")[:2])
--- a/src/pan-os.py
+++ b/src/pan-os.py
@@ -4,7 +4,7 @@ from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        versions = http.fetch_url(config.url).json()
+        versions = http.fetch_json(config.url)
        for version in versions:
            name = version['version']
--- a/src/plesk.py
+++ b/src/plesk.py
@@ -1,4 +1,3 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches versions from Plesk's change log.
@@ -8,10 +7,9 @@ there is no entry for GA of version 18.0.18 and older."""
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for release in soup.find_all("div", class_="changelog-entry--obsidian"):
+        for release in html.find_all("div", class_="changelog-entry--obsidian"):
            version = release.h2.text.strip()
            if not version.startswith('Plesk Obsidian 18'):
                continue
--- a/src/pypi.py
+++ b/src/pypi.py
@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json()
+        data = http.fetch_json(f"https://pypi.org/pypi/{config.url}/json")
        for version_str in data["releases"]:
            version_match = config.first_match(version_str)
--- a/src/rds.py
+++ b/src/rds.py
@@ -1,6 +1,5 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches Amazon RDS versions from the version management pages on AWS docs.
@@ -11,10 +10,9 @@ in the third column (usually named 'RDS release date').
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for table in soup.find_all("table"):
+        for table in html.find_all("table"):
            for row in table.find_all("tr"):
                columns = row.find_all("td")
                if len(columns) <= 3:
--- a/src/red-hat-jboss-eap-7.py
+++ b/src/red-hat-jboss-eap-7.py
@@ -1,16 +1,14 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches RedHat JBoss EAP version data for JBoss 7"""
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for h4 in soup.find_all("h4"):
+        for h4 in html.find_all("h4"):
            title = h4.get_text(strip=True)
            if not title.startswith("7."):
                continue
--- a/src/red-hat-jboss-eap-8.py
+++ b/src/red-hat-jboss-eap-8.py
@@ -1,5 +1,4 @@
 import re
 from xml.dom.minidom import parseString
 from common import dates, endoflife, http, releasedata
@@ -7,9 +6,8 @@ from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        xml = http.fetch_xml(config.url)
        xml = parseString(response.text)
        versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0]
        latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue
--- a/src/red-hat-satellite.py
+++ b/src/red-hat-satellite.py
@@ -1,6 +1,5 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches Satellite versions from access.redhat.com.
@@ -9,10 +8,9 @@ A few of the older versions, such as 'Satellite 6.1 GA Release (Build 6.1.1)', w
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for table in soup.findAll("tbody"):
+        for table in html.findAll("tbody"):
            for tr in table.findAll("tr"):
                td_list = tr.findAll("td")
--- a/src/redhat_lifecycles.py
+++ b/src/redhat_lifecycles.py
@@ -22,7 +22,7 @@ for config in endoflife.list_configs_from_argv():
        name = urllib.parse.quote(config.url)
        mapping = Mapping(config.data["fields"])
-        data = http.fetch_url('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name).json()
+        data = http.fetch_json('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name)
        for version in data["data"][0]["versions"]:
            version_name = version["name"]
--- a/src/rhel.py
+++ b/src/rhel.py
@@ -1,6 +1,5 @@
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 # https://regex101.com/r/877ibq/1
@@ -8,10 +7,9 @@ VERSION_PATTERN = re.compile(r"RHEL (?P<major>\d)(\. ?(?P<minor>\d+))?(( Update
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for tr in soup.findAll("tr"):
+        for tr in html.findAll("tr"):
            td_list = tr.findAll("td")
            if len(td_list) == 0:
                continue
--- a/src/ros.py
+++ b/src/ros.py
@@ -1,14 +1,12 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for tr in soup.findAll("tr"):
+        for tr in html.findAll("tr"):
            td_list = tr.findAll("td")
            if len(td_list) == 0:
                continue
--- a/src/samsung-security.py
+++ b/src/samsung-security.py
@@ -2,7 +2,6 @@ import logging
 import re
 from datetime import date, datetime, time, timezone
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Detect new models and aggregate EOL data for Samsung Mobile devices.
@@ -27,12 +26,11 @@ for config in endoflife.list_configs_from_argv():
            release.set_eol(eol)
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
        sections = config.data.get("sections", {})
        for update_cadence, title in sections.items():
-            models_list = soup.find(string=lambda text, search=title: search in text if text else False).find_next("ul")
+            models_list = html.find(string=lambda text, search=title: search in text if text else False).find_next("ul")
            for item in models_list.find_all("li"):
                models = item.text.replace("Enterprise Models:", "")
--- a/src/sles.py
+++ b/src/sles.py
@@ -1,14 +1,12 @@
 import logging
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        products_table = soup.find("tbody", id="productSupportLifecycle")
+        products_table = html.find("tbody", id="productSupportLifecycle")
        sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"})
        # Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section)
--- a/src/splunk.py
+++ b/src/splunk.py
@@ -1,6 +1,5 @@
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 VERSION_DATE_PATTERN = re.compile(r"Splunk Enterprise (?P<version>\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P<date>\w+\s\d\d?,\s\d{4})\.", re.MULTILINE)
@@ -32,10 +31,9 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]:
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        main = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(main.text, features="html5lib")
-        all_versions = [option.attrs['value'] for option in soup.select("select#version-select > option")]
+        all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")]
        all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"]
        # Latest minor release notes contains release notes for all previous minor versions.
--- a/src/typo3.py
+++ b/src/typo3.py
@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        data = http.fetch_url(config.url).json()
+        data = http.fetch_json(config.url)
        for v in data:
            if v['type'] == 'development':
                continue
--- a/src/unity.py
+++ b/src/unity.py
@@ -1,4 +1,3 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation
@@ -19,10 +18,9 @@ The script will need to be updated if someday those conditions are not met."""
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for release in soup.find_all('div', class_='component-releases-item__show__inner-header'):
+        for release in html.find_all('div', class_='component-releases-item__show__inner-header'):
            version = release.find('h4').find('span').text
            date = dates.parse_datetime(release.find('time').attrs['datetime'])
            product_data.declare_version(version, date)
--- a/src/unrealircd.py
+++ b/src/unrealircd.py
@@ -1,14 +1,12 @@
 import re
 import mwparserfromhell
 from common import dates, endoflife, http, releasedata
 DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}")
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        wikicode = http.fetch_markdown(config.url)
        wikicode = mwparserfromhell.parse(response.text)
        for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
            items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")
--- a/src/veeam.py
+++ b/src/veeam.py
@@ -1,7 +1,6 @@
 import logging
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches Veeam products versions from https://www.veeam.com.
@@ -12,12 +11,11 @@ such as `https://www.veeam.com/kb2680`.
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
        version_column = config.data.get("version_column", "Build Number").lower()
        date_column = config.data.get("date_column", "Release Date").lower()
-        for table in soup.find_all("table"):
+        for table in html.find_all("table"):
            headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")]
            if version_column not in headers or date_column not in headers:
                logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'",
--- a/src/virtualbox.py
+++ b/src/virtualbox.py
@@ -1,7 +1,6 @@
 import logging
 import re
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 """Fetches releases from VirtualBox download page."""
@@ -10,10 +9,10 @@ EOL_REGEX = re.compile(r"^\(no longer supported, support ended (?P<value>\d{4}/\
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for li in soup.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
+
        for li in html.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
            li_text = li.find("a").text.strip()
            release_match = config.first_match(li_text)
--- a/src/visual-studio.py
+++ b/src/visual-studio.py
@@ -1,12 +1,10 @@
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 for config in endoflife.list_configs_from_argv():
    with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url(config.url)
+        html = http.fetch_html(config.url)
        soup = BeautifulSoup(response.text, features="html5lib")
-        for table in soup.find_all("table"):
+        for table in html.find_all("table"):
            headers = [th.get_text().strip().lower() for th in table.find_all("th")]
            if "version" not in headers or "release date" not in headers:
                continue