Improve HTTP handling in scripts (#449)

Provide some helper methods to hide the complexity of parsing HTML, JSON, YAML, XML or Markdown.
This commit is contained in:
Marc Wrobel
2025-06-28 11:46:04 +02:00
parent fda4967c38
commit 312ce078bb
43 changed files with 103 additions and 137 deletions

View File

@@ -1,6 +1,5 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches EKS versions from AWS docs. """Fetches EKS versions from AWS docs.
@@ -8,8 +7,8 @@ Now that AWS no longer publishes docs on GitHub, we use the Web Archive to get t
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
html = BeautifulSoup(response.text, features="html5lib")
for tr in html.select("#main-col-body")[0].findAll("tr"): for tr in html.select("#main-col-body")[0].findAll("tr"):
cells = tr.findAll("td") cells = tr.findAll("td")
if not cells: if not cells:

View File

@@ -1,5 +1,4 @@
import logging import logging
import xml.dom.minidom
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
@@ -7,8 +6,7 @@ from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
rss_response = http.fetch_url(config.url) rss = http.fetch_xml(config.url)
rss = xml.dom.minidom.parseString(rss_response.text)
for entry in rss.getElementsByTagName("item"): for entry in rss.getElementsByTagName("item"):
version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue

View File

@@ -1,14 +1,12 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
ul = soup.find("h2").find_next("ul") ul = html.find("h2").find_next("ul")
for li in ul.find_all("li"): for li in ul.find_all("li"):
text = li.get_text(strip=True) text = li.get_text(strip=True)
match = config.first_match(text) match = config.first_match(text)

View File

@@ -1,16 +1,14 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches AWS lambda runtimes with their support / EOL dates from https://docs.aws.amazon.com.""" """Fetches AWS lambda runtimes with their support / EOL dates from https://docs.aws.amazon.com."""
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for i, table in enumerate(soup.find_all("table")): for i, table in enumerate(html.find_all("table")):
headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")] headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")]
if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers: if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers:
logging.info(f"table with header '{headers}' does not contain all the expected headers") logging.info(f"table with header '{headers}' does not contain all the expected headers")

View File

@@ -1,4 +1,3 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches versions from repositories managed with cgit, such as the Linux kernel repository. """Fetches versions from repositories managed with cgit, such as the Linux kernel repository.
@@ -6,10 +5,9 @@ Ideally we would want to use the git repository directly, but cgit-managed repos
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url + '/refs/tags') html = http.fetch_html(config.url + '/refs/tags')
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.find_all("table", class_="list"): for table in html.find_all("table", class_="list"):
for row in table.find_all("tr"): for row in table.find_all("tr"):
columns = row.find_all("td") columns = row.find_all("td")
if len(columns) != 4: if len(columns) != 4:

View File

@@ -1,4 +1,3 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
from common.git import Git from common.git import Git
@@ -10,9 +9,8 @@ More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discu
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
rn_response = http.fetch_url(config.url) html = http.fetch_html(config.url)
rn_soup = BeautifulSoup(rn_response.text, features="html5lib") released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
git = Git(config.data.get('repository')) git = Git(config.data.get('repository'))
git.setup(bare=True) git.setup(bare=True)

View File

@@ -1,4 +1,3 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, github, http, releasedata from common import dates, endoflife, github, http, releasedata
"""Fetch released versions from docs.chef.io and retrieve their date from GitHub. """Fetch released versions from docs.chef.io and retrieve their date from GitHub.
@@ -9,9 +8,8 @@ More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discu
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
rn_response = http.fetch_url(config.url) html = http.fetch_html(config.url)
rn_soup = BeautifulSoup(rn_response.text, features="html5lib") released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
for release in github.fetch_releases("inspec/inspec"): for release in github.fetch_releases("inspec/inspec"):
sanitized_version = release.tag_name.replace("v", "") sanitized_version = release.tag_name.replace("v", "")

View File

@@ -1,6 +1,5 @@
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches versions from Adobe ColdFusion release notes on helpx.adobe.com. """Fetches versions from Adobe ColdFusion release notes on helpx.adobe.com.
@@ -24,10 +23,9 @@ FIXED_VERSIONS = {
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
changelog = http.fetch_url(config.url) html = http.fetch_html(config.url)
changelog_soup = BeautifulSoup(changelog.text, features="html5lib")
for p in changelog_soup.findAll("div", class_="text"): for p in html.findAll("div", class_="text"):
version_and_date_str = p.get_text().strip().replace('\xa0', ' ') version_and_date_str = p.get_text().strip().replace('\xa0', ' ')
for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str): for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str):
date = dates.parse_date(date_str) date = dates.parse_date(date_str)

View File

@@ -1,6 +1,12 @@
import logging import logging
import xml.dom.minidom
from concurrent.futures import as_completed from concurrent.futures import as_completed
from xml.dom.minidom import Document
import mwparserfromhell
import yaml
from bs4 import BeautifulSoup
from mwparserfromhell.wikicode import Wikicode
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
from requests import Response from requests import Response
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
@@ -47,6 +53,31 @@ def fetch_url(url: str, data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Response: max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Response:
return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0] return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0]
def fetch_html(url: str, data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30,
features: str = "html5lib") -> BeautifulSoup:
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
return BeautifulSoup(response.text, features=features)
def fetch_json(url: str, data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Document:
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
return response.json()
def fetch_yaml(url: str, data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> any:
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
return yaml.safe_load(response.text)
def fetch_xml(url: str, data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Document:
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
return xml.dom.minidom.parseString(response.text)
def fetch_markdown(url: str, data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Wikicode:
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
return mwparserfromhell.parse(response.text)
# This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright. # This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright.
def fetch_javascript_url(url: str, click_selector: str = None, wait_until: str = None) -> str: def fetch_javascript_url(url: str, click_selector: str = None, wait_until: str = None) -> str:

View File

@@ -18,10 +18,9 @@ MANUAL_VERSIONS = {
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
main = http.fetch_url(f"{config.url}/current/install/install-intro.html") html = http.fetch_html(f"{config.url}/current/install/install-intro.html")
main_soup = BeautifulSoup(main.text, features="html5lib")
minor_versions = [options.attrs["value"] for options in main_soup.find(class_="version_list").find_all("option")] minor_versions = [options.attrs["value"] for options in html.find(class_="version_list").find_all("option")]
minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions] minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions]
for minor_version in http.fetch_urls(minor_version_urls): for minor_version in http.fetch_urls(minor_version_urls):

View File

@@ -1,12 +1,10 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}") html = http.fetch_html(f"https://distrowatch.com/index.php?distribution={config.url}")
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.select("td.News1>table.News"): for table in html.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
versions_match = config.first_match(headline) versions_match = config.first_match(headline)
if not versions_match: if not versions_match:

View File

@@ -5,7 +5,7 @@ from common import dates, endoflife, http, releasedata
Unfortunately images creation date cannot be retrieved, so we had to use the tag_last_pushed field instead.""" Unfortunately images creation date cannot be retrieved, so we had to use the tag_last_pushed field instead."""
def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None: def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None:
data = http.fetch_url(url).json() data = http.fetch_json(url)
for result in data["results"]: for result in data["results"]:
version_str = result["name"] version_str = result["name"]

View File

@@ -1,6 +1,5 @@
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
# https://regex101.com/r/zPxBqT/1 # https://regex101.com/r/zPxBqT/1
@@ -15,10 +14,9 @@ URL_BY_PRODUCT = {
for config in endoflife.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons for config in endoflife.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons
for product_name, url in URL_BY_PRODUCT.items(): for product_name, url in URL_BY_PRODUCT.items():
with releasedata.ProductData(product_name) as product_data: with releasedata.ProductData(product_name) as product_data:
relnotes = http.fetch_url(url) html = http.fetch_html(url)
relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib")
for section in relnotes_soup.find_all('section', class_='releases'): for section in html.find_all('section', class_='releases'):
for h2 in section.find_all('h2'): # h2 contains the date for h2 in section.find_all('h2'): # h2 contains the date
date = dates.parse_date(h2.get('data-text')) date = dates.parse_date(h2.get('data-text'))

View File

@@ -1,12 +1,10 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
html = BeautifulSoup(response.text, features="html5lib")
table_selector = config.data.get("table_selector", "#previous-releases + table").strip() table_selector = config.data.get("table_selector", "#previous-releases + table").strip()
date_column = config.data.get("date_column", "Date").strip().lower() date_column = config.data.get("date_column", "Date").strip().lower()
versions_column = config.data.get("versions_column").strip().lower() versions_column = config.data.get("versions_column").strip().lower()

View File

@@ -1,6 +1,5 @@
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$") CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$")
@@ -9,10 +8,9 @@ DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
# First, get all minor releases from the download page # First, get all minor releases from the download page
download = http.fetch_url(config.url) download_html = http.fetch_html(config.url)
download_soup = BeautifulSoup(download.text, features="html5lib")
minor_versions = [] minor_versions = []
for link in download_soup.select("a"): for link in download_html.select("a"):
minor_version_match = CYCLE_PATTERN.match(link.attrs["href"]) minor_version_match = CYCLE_PATTERN.match(link.attrs["href"])
if not minor_version_match: if not minor_version_match:
continue continue

View File

@@ -1,12 +1,10 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
page = http.fetch_url(config.url) html = http.fetch_html(config.url)
page_soup = BeautifulSoup(page.text, features="html5lib")
for release_table in page_soup.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"): for release_table in html.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"):
for row in release_table.find_all("tr")[1:]: # for all rows except the header for row in release_table.find_all("tr")[1:]: # for all rows except the header
cells = row.find_all("td") cells = row.find_all("td")
version = cells[0].text.strip("AIX ").replace(' TL', '.') version = cells[0].text.strip("AIX ").replace(' TL', '.')

View File

@@ -1,6 +1,5 @@
import logging import logging
import yaml
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetch version data for Kuma from https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml. """Fetch version data for Kuma from https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml.
@@ -12,8 +11,7 @@ EOL_FIELD = 'endOfLifeDate'
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
yml_response = http.fetch_url(config.url) versions_data = http.fetch_yaml(config.url)
versions_data = yaml.safe_load(yml_response.text)
# Iterate through the versions and their associated dates # Iterate through the versions and their associated dates
for version_info in versions_data: for version_info in versions_data:

View File

@@ -1,16 +1,14 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/""" """Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/"""
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.find_all("table"): for table in html.find_all("table"):
for row in table.find_all("tr")[1:]: for row in table.find_all("tr")[1:]:
cells = row.find_all("td") cells = row.find_all("td")
if len(cells) < 4: if len(cells) < 4:

View File

@@ -1,5 +1,4 @@
import re import re
import xml.dom.minidom
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
@@ -11,8 +10,7 @@ ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IG
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) rss = http.fetch_xml(config.url)
rss = xml.dom.minidom.parseString(response.text)
for item in rss.getElementsByTagName("entry"): for item in rss.getElementsByTagName("entry"):
content = item.getElementsByTagName("content")[0].firstChild.nodeValue content = item.getElementsByTagName("content")[0].firstChild.nodeValue

View File

@@ -1,6 +1,5 @@
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches Lua releases from lua.org.""" """Fetches Lua releases from lua.org."""
@@ -10,9 +9,8 @@ VERSION_PATTERN = re.compile(r"(?P<version>\d+\.\d+\.\d+),\s*released\s*on\s*(?P
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
page = http.fetch_url(config.url) html = http.fetch_html(config.url, features = 'html.parser')
soup = BeautifulSoup(page.text, 'html.parser') page_text = html.text # HTML is broken, no way to parse it with beautifulsoup
page_text = soup.text # HTML is broken, no way to parse it with beautifulsoup
for release_match in RELEASED_AT_PATTERN.finditer(page_text): for release_match in RELEASED_AT_PATTERN.finditer(page_text):
release = release_match.group('release') release = release_match.group('release')

View File

@@ -9,7 +9,7 @@ for config in endoflife.list_configs_from_argv():
while True: while True:
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100" url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100"
data = http.fetch_url(url).json() data = http.fetch_json(url)
for row in data["response"]["docs"]: for row in data["response"]["docs"]:
version_match = config.first_match(row["v"]) version_match = config.first_match(row["v"])

View File

@@ -1,16 +1,14 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches NetBSD versions and EOL information from https://www.netbsd.org/.""" """Fetches NetBSD versions and EOL information from https://www.netbsd.org/."""
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for row in soup.select('table tbody tr'): for row in html.select('table tbody tr'):
cells = [cell.get_text(strip=True) for cell in row.select('td')] cells = [cell.get_text(strip=True) for cell in row.select('td')]
version = cells[0] version = cells[0]

View File

@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json() data = http.fetch_json(f"https://registry.npmjs.org/{config.url}")
for version_str in data["versions"]: for version_str in data["versions"]:
version_match = config.first_match(version_str) version_match = config.first_match(version_str)
if version_match: if version_match:

View File

@@ -4,8 +4,8 @@ from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
url = f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}" data = http.fetch_json(f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}")
data = http.fetch_url(url).json()
for version_data in data["contents"]: for version_data in data["contents"]:
release_name = '.'.join(version_data["version"].split(".")[:2]) release_name = '.'.join(version_data["version"].split(".")[:2])

View File

@@ -4,7 +4,7 @@ from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
versions = http.fetch_url(config.url).json() versions = http.fetch_json(config.url)
for version in versions: for version in versions:
name = version['version'] name = version['version']

View File

@@ -1,4 +1,3 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches versions from Plesk's change log. """Fetches versions from Plesk's change log.
@@ -8,10 +7,9 @@ there is no entry for GA of version 18.0.18 and older."""
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for release in soup.find_all("div", class_="changelog-entry--obsidian"): for release in html.find_all("div", class_="changelog-entry--obsidian"):
version = release.h2.text.strip() version = release.h2.text.strip()
if not version.startswith('Plesk Obsidian 18'): if not version.startswith('Plesk Obsidian 18'):
continue continue

View File

@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json() data = http.fetch_json(f"https://pypi.org/pypi/{config.url}/json")
for version_str in data["releases"]: for version_str in data["releases"]:
version_match = config.first_match(version_str) version_match = config.first_match(version_str)

View File

@@ -1,6 +1,5 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches Amazon RDS versions from the version management pages on AWS docs. """Fetches Amazon RDS versions from the version management pages on AWS docs.
@@ -11,10 +10,9 @@ in the third column (usually named 'RDS release date').
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.find_all("table"): for table in html.find_all("table"):
for row in table.find_all("tr"): for row in table.find_all("tr"):
columns = row.find_all("td") columns = row.find_all("td")
if len(columns) <= 3: if len(columns) <= 3:

View File

@@ -1,16 +1,14 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches RedHat JBoss EAP version data for JBoss 7""" """Fetches RedHat JBoss EAP version data for JBoss 7"""
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for h4 in soup.find_all("h4"): for h4 in html.find_all("h4"):
title = h4.get_text(strip=True) title = h4.get_text(strip=True)
if not title.startswith("7."): if not title.startswith("7."):
continue continue

View File

@@ -1,5 +1,4 @@
import re import re
from xml.dom.minidom import parseString
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
@@ -7,9 +6,8 @@ from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) xml = http.fetch_xml(config.url)
xml = parseString(response.text)
versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0] versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0]
latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue

View File

@@ -1,6 +1,5 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches Satellite versions from access.redhat.com. """Fetches Satellite versions from access.redhat.com.
@@ -9,10 +8,9 @@ A few of the older versions, such as 'Satellite 6.1 GA Release (Build 6.1.1)', w
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.findAll("tbody"): for table in html.findAll("tbody"):
for tr in table.findAll("tr"): for tr in table.findAll("tr"):
td_list = tr.findAll("td") td_list = tr.findAll("td")

View File

@@ -22,7 +22,7 @@ for config in endoflife.list_configs_from_argv():
name = urllib.parse.quote(config.url) name = urllib.parse.quote(config.url)
mapping = Mapping(config.data["fields"]) mapping = Mapping(config.data["fields"])
data = http.fetch_url('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name).json() data = http.fetch_json('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name)
for version in data["data"][0]["versions"]: for version in data["data"][0]["versions"]:
version_name = version["name"] version_name = version["name"]

View File

@@ -1,6 +1,5 @@
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
# https://regex101.com/r/877ibq/1 # https://regex101.com/r/877ibq/1
@@ -8,10 +7,9 @@ VERSION_PATTERN = re.compile(r"RHEL (?P<major>\d)(\. ?(?P<minor>\d+))?(( Update
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for tr in soup.findAll("tr"): for tr in html.findAll("tr"):
td_list = tr.findAll("td") td_list = tr.findAll("td")
if len(td_list) == 0: if len(td_list) == 0:
continue continue

View File

@@ -1,14 +1,12 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for tr in soup.findAll("tr"): for tr in html.findAll("tr"):
td_list = tr.findAll("td") td_list = tr.findAll("td")
if len(td_list) == 0: if len(td_list) == 0:
continue continue

View File

@@ -2,7 +2,6 @@ import logging
import re import re
from datetime import date, datetime, time, timezone from datetime import date, datetime, time, timezone
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Detect new models and aggregate EOL data for Samsung Mobile devices. """Detect new models and aggregate EOL data for Samsung Mobile devices.
@@ -27,12 +26,11 @@ for config in endoflife.list_configs_from_argv():
release.set_eol(eol) release.set_eol(eol)
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
sections = config.data.get("sections", {}) sections = config.data.get("sections", {})
for update_cadence, title in sections.items(): for update_cadence, title in sections.items():
models_list = soup.find(string=lambda text, search=title: search in text if text else False).find_next("ul") models_list = html.find(string=lambda text, search=title: search in text if text else False).find_next("ul")
for item in models_list.find_all("li"): for item in models_list.find_all("li"):
models = item.text.replace("Enterprise Models:", "") models = item.text.replace("Enterprise Models:", "")

View File

@@ -1,14 +1,12 @@
import logging import logging
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
products_table = soup.find("tbody", id="productSupportLifecycle") products_table = html.find("tbody", id="productSupportLifecycle")
sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"})
# Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section) # Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section)

View File

@@ -1,6 +1,5 @@
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
VERSION_DATE_PATTERN = re.compile(r"Splunk Enterprise (?P<version>\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P<date>\w+\s\d\d?,\s\d{4})\.", re.MULTILINE) VERSION_DATE_PATTERN = re.compile(r"Splunk Enterprise (?P<version>\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P<date>\w+\s\d\d?,\s\d{4})\.", re.MULTILINE)
@@ -32,10 +31,9 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]:
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
main = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(main.text, features="html5lib")
all_versions = [option.attrs['value'] for option in soup.select("select#version-select > option")] all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")]
all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"] all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"]
# Latest minor release notes contains release notes for all previous minor versions. # Latest minor release notes contains release notes for all previous minor versions.

View File

@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
data = http.fetch_url(config.url).json() data = http.fetch_json(config.url)
for v in data: for v in data:
if v['type'] == 'development': if v['type'] == 'development':
continue continue

View File

@@ -1,4 +1,3 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation """Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation
@@ -19,10 +18,9 @@ The script will need to be updated if someday those conditions are not met."""
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): for release in html.find_all('div', class_='component-releases-item__show__inner-header'):
version = release.find('h4').find('span').text version = release.find('h4').find('span').text
date = dates.parse_datetime(release.find('time').attrs['datetime']) date = dates.parse_datetime(release.find('time').attrs['datetime'])
product_data.declare_version(version, date) product_data.declare_version(version, date)

View File

@@ -1,14 +1,12 @@
import re import re
import mwparserfromhell
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}") DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}")
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) wikicode = http.fetch_markdown(config.url)
wikicode = mwparserfromhell.parse(response.text)
for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")

View File

@@ -1,7 +1,6 @@
import logging import logging
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches Veeam products versions from https://www.veeam.com. """Fetches Veeam products versions from https://www.veeam.com.
@@ -12,12 +11,11 @@ such as `https://www.veeam.com/kb2680`.
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
version_column = config.data.get("version_column", "Build Number").lower() version_column = config.data.get("version_column", "Build Number").lower()
date_column = config.data.get("date_column", "Release Date").lower() date_column = config.data.get("date_column", "Release Date").lower()
for table in soup.find_all("table"): for table in html.find_all("table"):
headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")] headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")]
if version_column not in headers or date_column not in headers: if version_column not in headers or date_column not in headers:
logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'", logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'",

View File

@@ -1,7 +1,6 @@
import logging import logging
import re import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
"""Fetches releases from VirtualBox download page.""" """Fetches releases from VirtualBox download page."""
@@ -10,10 +9,10 @@ EOL_REGEX = re.compile(r"^\(no longer supported, support ended (?P<value>\d{4}/\
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for li in soup.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
for li in html.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
li_text = li.find("a").text.strip() li_text = li.find("a").text.strip()
release_match = config.first_match(li_text) release_match = config.first_match(li_text)

View File

@@ -1,12 +1,10 @@
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata from common import dates, endoflife, http, releasedata
for config in endoflife.list_configs_from_argv(): for config in endoflife.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url) html = http.fetch_html(config.url)
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.find_all("table"): for table in html.find_all("table"):
headers = [th.get_text().strip().lower() for th in table.find_all("th")] headers = [th.get_text().strip().lower() for th in table.find_all("th")]
if "version" not in headers or "release date" not in headers: if "version" not in headers or "release date" not in headers:
continue continue