diff --git a/src/amazon-neptune.py b/src/amazon-neptune.py index 7644576c..5c2276e0 100644 --- a/src/amazon-neptune.py +++ b/src/amazon-neptune.py @@ -1,7 +1,8 @@ import re -from xml.dom.minidom import parseString +from common import http from common import dates from common import endoflife +from xml.dom.minidom import parseString """Fetch versions with their dates from the RSS feed of https://docs.aws.amazon.com/neptune/latest/userguide/engine-releases.html. @@ -14,8 +15,8 @@ URL = "https://docs.aws.amazon.com/neptune/latest/userguide/rssupdates.rss" print(f"::group::{PRODUCT}") versions = {} -response = endoflife.fetch_url(URL) -rss = parseString(response) +response = http.fetch_url(URL) +rss = parseString(response.text) for item in rss.getElementsByTagName("item"): title = item.getElementsByTagName("title")[0].firstChild.nodeValue pubDate = item.getElementsByTagName("pubDate")[0].firstChild.nodeValue diff --git a/src/apple.py b/src/apple.py index 5dde4c79..bf397d33 100644 --- a/src/apple.py +++ b/src/apple.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -12,7 +13,6 @@ URLS = [ "https://support.apple.com/kb/HT205759", # 2013 "https://support.apple.com/kb/HT204611", # 2011 to 2012 # Apple still links to the following articles, but they are 404: - # Disabled, too much timed out. "http://web.archive.org/web/20230404214605_/https://support.apple.com/en-us/HT5165", # 2010 "http://web.archive.org/web/20230327200842_/https://support.apple.com/en-us/HT4218", # 2008-2009 "http://web.archive.org/web/20230204234533_/https://support.apple.com/en-us/HT1263", # 2005-2007 @@ -54,7 +54,7 @@ def parse_date(date_str): print("::group::apple") versions_by_product = {k: {} for k in CONFIG.keys()} -for response in endoflife.fetch_urls(URLS): +for response in http.fetch_urls(URLS): soup = BeautifulSoup(response.text, features="html5lib") versions_table = soup.find(id="tableWraper") versions_table = versions_table if versions_table else soup.find('table', class_="gb-table") diff --git a/src/aws-lambda.py b/src/aws-lambda.py index c5cc4bf6..c54a1b73 100644 --- a/src/aws-lambda.py +++ b/src/aws-lambda.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup +from common import http from common import endoflife from datetime import datetime @@ -37,10 +38,9 @@ except FileNotFoundError: releases_data = {} print(f"{PRODUCT} file not found, real release dates will not be used.") -response = endoflife.fetch_url(URL) -soup = BeautifulSoup(response, features="html5lib") - versions = {} +response = http.fetch_url(URL) +soup = BeautifulSoup(response.text, features="html5lib") for row in soup.find_all("tr"): cells = row.find_all("td") if len(cells) == 6: # Supported Runtimes diff --git a/src/cgit.py b/src/cgit.py index 4fa0d00a..3111ec44 100644 --- a/src/cgit.py +++ b/src/cgit.py @@ -1,6 +1,7 @@ import re import sys from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife from liquid import Template @@ -14,15 +15,12 @@ do not support partial clone so we cannot. METHOD = 'cgit' -def make_bs_request(url): - response = endoflife.fetch_url(url + '/refs/tags') - return BeautifulSoup(response, features="html5lib") - def fetch_releases(url, regex, template): releases = {} - soup = make_bs_request(url) + response = http.fetch_url(url + '/refs/tags') + soup = BeautifulSoup(response.text, features="html5lib") l_template = Template(template) for table in soup.find_all("table", class_="list"): diff --git a/src/coldfusion.py b/src/coldfusion.py index 8610fefa..541cfae0 100644 --- a/src/coldfusion.py +++ b/src/coldfusion.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -28,7 +29,7 @@ REGEX = r"[r|R]elease [d|D]ate[,|:]? (.*?)\).*?Build Number: (.*?)$" print(f"::group::{PRODUCT}") versions = RELEASE_DATES | {} -for response in endoflife.fetch_urls(URLS): +for response in http.fetch_urls(URLS): soup = BeautifulSoup(response.text, features="html5lib") for p in soup.findAll("div", class_="text"): text = p.get_text().strip().replace('\xa0', ' ') diff --git a/src/common/endoflife.py b/src/common/endoflife.py index 1cdb915f..efffc270 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -1,16 +1,7 @@ import json import frontmatter -from concurrent.futures import as_completed from glob import glob from os import path -from requests import Response -from requests.adapters import HTTPAdapter -from requests.exceptions import ChunkedEncodingError -from requests_futures.sessions import FuturesSession -from urllib3.util import Retry - -# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent. -USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0' # Handle versions having at least 2 digits (ex. 1.2) and at most 4 digits (ex. 1.2.3.4), with an optional leading "v". # Major version must be >= 1. @@ -49,28 +40,6 @@ def list_products(method, products_filter=None, pathname="website/products") -> # Keep the default timeout high enough to avoid errors with web.archive.org. -def fetch_urls(urls, data=None, headers=None, max_retries=10, backoff_factor=0.5, timeout=30) -> list[Response]: - try: - with FuturesSession() as session: - adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor)) - session.mount('http://', adapter) - session.mount('https://', adapter) - - headers = {'User-Agent': USER_AGENT} | ({} if headers is None else headers) - futures = [session.get(url, headers=headers, data=data, timeout=timeout, stream=None) for url in urls] - return [future.result() for future in as_completed(futures)] - except ChunkedEncodingError as e: # See https://github.com/psf/requests/issues/4771#issue-354077499 - next_max_retries = max_retries - 1 - if next_max_retries == 0: - raise e # So that the function does not get stuck in an infinite loop. - else: - # We could wait a bit before retrying, but it's not clear if it would help. - print(f"Got ChunkedEncodingError while fetching {urls} ({e}), retrying (remaining retries = {next_max_retries}).") - return fetch_urls(urls, data, headers, next_max_retries, backoff_factor, timeout) - - -def fetch_url(url, data=None, headers=None, max_retries=5, backoff_factor=0.5, timeout=30) -> str: - return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0].text def write_releases(product, releases, pathname="releases") -> None: diff --git a/src/common/http.py b/src/common/http.py new file mode 100644 index 00000000..49f6a271 --- /dev/null +++ b/src/common/http.py @@ -0,0 +1,33 @@ +from concurrent.futures import as_completed +from requests import Response +from requests.adapters import HTTPAdapter +from requests.exceptions import ChunkedEncodingError +from requests_futures.sessions import FuturesSession +from urllib3.util import Retry + +# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent. +USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0' + + +def fetch_urls(urls, data=None, headers=None, max_retries=10, backoff_factor=0.5, timeout=30) -> list[Response]: + try: + with FuturesSession() as session: + adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor)) + session.mount('http://', adapter) + session.mount('https://', adapter) + + headers = {'User-Agent': USER_AGENT} | ({} if headers is None else headers) + futures = [session.get(url, headers=headers, data=data, timeout=timeout, stream=None) for url in urls] + return [future.result() for future in as_completed(futures)] + except ChunkedEncodingError as e: # See https://github.com/psf/requests/issues/4771#issue-354077499 + next_max_retries = max_retries - 1 + if next_max_retries == 0: + raise e # So that the function does not get stuck in an infinite loop. + else: + # We could wait a bit before retrying, but it's not clear if it would help. + print(f"Got ChunkedEncodingError while fetching {urls} ({e}), retrying (remaining retries = {next_max_retries}).") + return fetch_urls(urls, data, headers, next_max_retries, backoff_factor, timeout) + + +def fetch_url(url, data=None, headers=None, max_retries=5, backoff_factor=0.5, timeout=30) -> Response: + return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0] diff --git a/src/cos.py b/src/cos.py index d71937e9..27eea891 100644 --- a/src/cos.py +++ b/src/cos.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -8,15 +9,15 @@ REGEX = r"^(cos-\d+-\d+-\d+-\d+)" def list_milestones(): - response = endoflife.fetch_url(URL) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(URL) + soup = BeautifulSoup(response.text, features="html5lib") milestones = soup.find_all('td', string=re.compile(r'COS \d+ LTS')) return [m.text.split(' ')[1] for m in milestones] def fetch_milestones(milestones): urls = [f"{URL}m{channel}" for channel in milestones] - return endoflife.fetch_urls(urls) + return http.fetch_urls(urls) def parse_date(date_str): diff --git a/src/couchbase-server.py b/src/couchbase-server.py index 49613b23..40c98052 100644 --- a/src/couchbase-server.py +++ b/src/couchbase-server.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -29,8 +30,8 @@ FIXED_VERSIONS = { print(f"::group::{PRODUCT}") versions = {} -response = endoflife.fetch_url(f"{URLS}/current/install/install-intro.html") -soup = BeautifulSoup(response, features="html5lib") +response = http.fetch_url(f"{URLS}/current/install/install-intro.html") +soup = BeautifulSoup(response.text, features="html5lib") minor_versions = [options.attrs["value"] for options in soup.find(class_="version_list").find_all("option")] @@ -39,7 +40,7 @@ for minor in minor_versions: versions[minor + '.0'] = 'N/A' minor_version_urls = [f"{URLS}/{minor}/release-notes/relnotes.html" for minor in minor_versions] -for response in endoflife.fetch_urls(minor_version_urls): +for response in http.fetch_urls(minor_version_urls): soup = BeautifulSoup(response.text, features="html5lib") for title in soup.find_all("h2"): versionAndDate = title.get_text().strip() diff --git a/src/distrowatch.py b/src/distrowatch.py index 10f2837b..53012d8d 100644 --- a/src/distrowatch.py +++ b/src/distrowatch.py @@ -1,6 +1,7 @@ import re import sys from bs4 import BeautifulSoup +from common import http from common import endoflife from liquid import Template @@ -27,8 +28,8 @@ def fetch_releases(distrowatch_id, regex, template): releases = {} l_template = Template(template) url = f"https://distrowatch.com/index.php?distribution={distrowatch_id}" - response = endoflife.fetch_url(url) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(url) + soup = BeautifulSoup(response.text, features="html5lib") for table in soup.select("td.News1>table.News"): headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() date = table.select_one("td.NewsDate").get_text() diff --git a/src/docker_hub.py b/src/docker_hub.py index 837301c3..255c6163 100644 --- a/src/docker_hub.py +++ b/src/docker_hub.py @@ -1,6 +1,6 @@ -import json import re import sys +from common import http from common import endoflife METHOD = "docker_hub" @@ -10,8 +10,8 @@ def fetch_releases(url, regex, releases): if not isinstance(regex, list): regex = [regex] - response = endoflife.fetch_url(url) - data = json.loads(response) + response = http.fetch_url(url) + data = response.json() for result in data["results"]: version = result["name"] diff --git a/src/eks.py b/src/eks.py index 16fe0f10..abdd1cda 100644 --- a/src/eks.py +++ b/src/eks.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -22,8 +23,8 @@ def parse_platforms_pages(): all_versions = {} print("::group::eks") for url in URLS: - response = endoflife.fetch_url(url) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(url) + soup = BeautifulSoup(response.text, features="html5lib") for tr in soup.select("#main-col-body")[0].findAll("tr"): td = tr.find("td") if td and re.match(endoflife.DEFAULT_VERSION_REGEX, td.text.strip()): diff --git a/src/firefox.py b/src/firefox.py index 03e4b070..840fd9e0 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -1,6 +1,7 @@ import re import urllib.parse from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -18,11 +19,11 @@ def format_date(text: str) -> str: print(f"::group::{PRODUCT}") versions = {} -response = endoflife.fetch_url(URL) -ff_releases = BeautifulSoup(response, features="html5lib").find_all("ol", class_="c-release-list") +response = http.fetch_url(URL) +ff_releases = BeautifulSoup(response.text, features="html5lib").find_all("ol", class_="c-release-list") urls = [urllib.parse.urljoin(URL, p.get("href")) for p in ff_releases[0].find_all("a")] -for response in endoflife.fetch_urls(urls): +for response in http.fetch_urls(urls): soup = BeautifulSoup(response.text, features="html5lib") version = response.request.url.split("/")[-3] diff --git a/src/gke.py b/src/gke.py index 3f8ffc45..94951fff 100644 --- a/src/gke.py +++ b/src/gke.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -10,8 +11,8 @@ CHANNELS = ['nochannel', 'stable', 'regular', 'rapid'] def fetch_channel(channel): url = f"https://cloud.google.com/kubernetes-engine/docs/release-notes-{channel}" - response = endoflife.fetch_url(url) - return BeautifulSoup(response, features="html5lib") + response = http.fetch_url(url) + return BeautifulSoup(response.text, features="html5lib") def parse_soup_for_versions(soup): diff --git a/src/graalvm.py b/src/graalvm.py index d5b522e2..ec7a4068 100644 --- a/src/graalvm.py +++ b/src/graalvm.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -12,8 +13,8 @@ def split_versions(text): return text.replace("GraalVM for JDK ", "jdk-").split(", ") print("::group::graalvm") -response = endoflife.fetch_url(URL) -soup = BeautifulSoup(response, features="html5lib") +response = http.fetch_url(URL) +soup = BeautifulSoup(response.text, features="html5lib") versions = {} for tr in soup.findAll("table")[1].find("tbody").findAll("tr"): diff --git a/src/haproxy.py b/src/haproxy.py index b08a73e1..73b71b45 100644 --- a/src/haproxy.py +++ b/src/haproxy.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import endoflife """Fetch HAProxy versions with their dates from https://www.haproxy.org/. @@ -14,8 +15,8 @@ VERSION_REGEX = r"^(\d{4})\/(\d{2})\/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$" def fetch_cycles(): cycles = [] - response = endoflife.fetch_url('https://www.haproxy.org/download/') - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url('https://www.haproxy.org/download/') + soup = BeautifulSoup(response.text, features="html5lib") for link in soup.select("a"): m = re.match(CYCLE_REGEX, link.attrs["href"]) if m: @@ -32,7 +33,7 @@ def fetch_releases(cycles): releases = {} urls = [f"https://www.haproxy.org/download/{cycle}/src/CHANGELOG" for cycle in cycles] - for response in endoflife.fetch_urls(urls): + for response in http.fetch_urls(urls): for line in response.text.split('\n'): m = re.match(VERSION_REGEX, line) if m: diff --git a/src/ibm-aix.py b/src/ibm-aix.py index 88163855..8b4e5364 100644 --- a/src/ibm-aix.py +++ b/src/ibm-aix.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -6,8 +7,8 @@ PRODUCT = "ibm-aix" URL = "https://www.ibm.com/support/pages/aix-support-lifecycle-information" def fetch_releases(): - response = endoflife.fetch_url(URL) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(URL) + soup = BeautifulSoup(response.text, features="html5lib") releases = {} # for all release tables diff --git a/src/looker.py b/src/looker.py index 345b0ddf..d3a35789 100644 --- a/src/looker.py +++ b/src/looker.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife from xml.dom.minidom import parseString @@ -16,8 +17,8 @@ VERSION_PATTERN = re.compile(r"Looker\s+(?P\d+\.\d+)", re.IGNORECASE) print(f"::group::{PRODUCT}") versions = {} -response = endoflife.fetch_url(URL) -rss = parseString(response) +response = http.fetch_url(URL) +rss = parseString(response.text) for item in rss.getElementsByTagName("entry"): date = dates.parse_datetime(item.getElementsByTagName("updated")[0].firstChild.nodeValue).strftime("%Y-%m-%d") content = item.getElementsByTagName("content")[0].firstChild.nodeValue diff --git a/src/maven.py b/src/maven.py index 046b0af5..1af7f715 100644 --- a/src/maven.py +++ b/src/maven.py @@ -1,7 +1,7 @@ import datetime -import json import re import sys +from common import http from common import endoflife METHOD = "maven" @@ -17,8 +17,8 @@ def valid_version(version): def fetch_json(group_id, artifact_id, start): url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=100&wt=json&start={start}" - response = endoflife.fetch_url(url) - return json.loads(response) + response = http.fetch_url(url) + return response.json() def fetch_releases(package_identifier): diff --git a/src/npm.py b/src/npm.py index f5199b66..498d0f82 100644 --- a/src/npm.py +++ b/src/npm.py @@ -1,6 +1,6 @@ -import json import re import sys +from common import http from common import endoflife METHOD = "npm" @@ -13,8 +13,8 @@ def fetch_releases(npm_id, regex): regex = [regex] url = f"https://registry.npmjs.org/{npm_id}" - response = endoflife.fetch_url(url) - data = json.loads(response) + response = http.fetch_url(url) + data = response.json() for version in data["time"]: matches = False for r in regex: diff --git a/src/nutanix.py b/src/nutanix.py index 1115fc95..b150f2e9 100644 --- a/src/nutanix.py +++ b/src/nutanix.py @@ -1,4 +1,4 @@ -import json +from common import http from common import endoflife """Fetch Nutanix products versions with their dates from https://portal.nutanix.com/api/v1. @@ -17,8 +17,8 @@ def fetch_releases(product_code): versions = {} url = BASE_URL + product_code print(url) - response = endoflife.fetch_url(url) - data = json.loads(response) + response = http.fetch_url(url) + data = response.json() for version_data in data["contents"]: if 'GENERAL_AVAILABILITY' in version_data: diff --git a/src/palo-alto-networks.py b/src/palo-alto-networks.py index ae6e3d4c..80212102 100644 --- a/src/palo-alto-networks.py +++ b/src/palo-alto-networks.py @@ -1,6 +1,7 @@ import datetime import re from bs4 import BeautifulSoup +from common import http from common import endoflife URL = "https://www.paloaltonetworks.com/services/support/end-of-life-announcements/end-of-life-summary" @@ -15,8 +16,8 @@ def update_releases(html_identifier, file): versions = {} print(f"::group::{html_identifier}") - response = endoflife.fetch_url(URL) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(URL) + soup = BeautifulSoup(response.text, features="html5lib") table = soup.find(id=html_identifier) for tr in table.findAll("tr")[3:]: diff --git a/src/php.py b/src/php.py index de4010cc..c7d76da3 100644 --- a/src/php.py +++ b/src/php.py @@ -1,4 +1,4 @@ -import json +from common import http from common import dates from common import endoflife @@ -7,8 +7,8 @@ PHP_MAJOR_VERSIONS = [4, 5, 7, 8] def fetch_versions(major_version): url = f"https://www.php.net/releases/index.php?json&max=-1&version={major_version}" - response = endoflife.fetch_url(url) - data = json.loads(response) + response = http.fetch_url(url) + data = response.json() for v in data: data[v] = dates.parse_date(data[v]["date"]).strftime("%Y-%m-%d") print(f"{v}: {data[v]}") diff --git a/src/plesk.py b/src/plesk.py index 5f833898..0914fc0d 100644 --- a/src/plesk.py +++ b/src/plesk.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -7,8 +8,8 @@ PRODUCT = "plesk" def make_bs_request(url): - response = endoflife.fetch_url(url) - return BeautifulSoup(response, features="html5lib") + response = http.fetch_url(url) + return BeautifulSoup(response.text, features="html5lib") # Only 18.0.20.3 and later will be picked up : diff --git a/src/pypi.py b/src/pypi.py index 436c23c7..3e670950 100644 --- a/src/pypi.py +++ b/src/pypi.py @@ -1,6 +1,6 @@ -import json import re import sys +from common import http from common import dates from common import endoflife @@ -14,8 +14,8 @@ def fetch_releases(pypi_id, regex): regex = [regex] url = f"https://pypi.org/pypi/{pypi_id}/json" - response = endoflife.fetch_url(url) - data = json.loads(response) + response = http.fetch_url(url) + data = response.json() for version in data["releases"]: R = data["releases"][version] matches = False diff --git a/src/rds.py b/src/rds.py index 460141b1..917c0221 100644 --- a/src/rds.py +++ b/src/rds.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -13,8 +14,8 @@ for db, url in DBS.items(): print(f"::group::{db}") versions = {} - response = endoflife.fetch_url(url) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(url) + soup = BeautifulSoup(response.text, features="html5lib") for table in soup.find_all("table"): for row in table.find_all("tr"): diff --git a/src/redhat-satellite.py b/src/redhat-satellite.py index ee4a4162..335bd0fa 100644 --- a/src/redhat-satellite.py +++ b/src/redhat-satellite.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import endoflife """Fetch versions with their dates from access.redhat.com. @@ -13,8 +14,8 @@ URL = "https://access.redhat.com/articles/1365633" regex = r"^Satellite (?P\d+\.\d+\.\d+([.-]\d+)?) ([Uu]pdate|[Rr]elease)$" print("::group::redhat-satellite") -response = endoflife.fetch_url(URL) -soup = BeautifulSoup(response, features="html5lib") +response = http.fetch_url(URL) +soup = BeautifulSoup(response.text, features="html5lib") versions = {} for table in soup.findAll("tbody"): diff --git a/src/rhel.py b/src/rhel.py index 1c68bc80..07f0950e 100644 --- a/src/rhel.py +++ b/src/rhel.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import endoflife URL = "https://access.redhat.com/articles/3078" @@ -7,8 +8,8 @@ URL = "https://access.redhat.com/articles/3078" regex = r"RHEL (?P\d)(\. ?(?P\d+))?(( Update (?P\d))| GA)?" print("::group::rhel") -response = endoflife.fetch_url(URL) -soup = BeautifulSoup(response, features="html5lib") +response = http.fetch_url(URL) +soup = BeautifulSoup(response.text, features="html5lib") versions = {} for tr in soup.findAll("tr"): diff --git a/src/rockylinux.py b/src/rockylinux.py index 3317bef8..4b1450ed 100644 --- a/src/rockylinux.py +++ b/src/rockylinux.py @@ -1,4 +1,5 @@ import re +from common import http from common import dates from common import endoflife @@ -26,7 +27,7 @@ def parse_markdown_table(table_text): print("::group::rockylinux") -response = endoflife.fetch_url(URL) -versions = parse_markdown_table(response) +response = http.fetch_url(URL) +versions = parse_markdown_table(response.text) endoflife.write_releases('rockylinux', versions) print("::endgroup::") diff --git a/src/ros.py b/src/ros.py index 1bd15367..a4bb87bb 100644 --- a/src/ros.py +++ b/src/ros.py @@ -1,6 +1,7 @@ import datetime import re from bs4 import BeautifulSoup +from common import http from common import endoflife URL = "https://wiki.ros.org/Distributions" @@ -8,8 +9,8 @@ URL = "https://wiki.ros.org/Distributions" regex = r"^ROS (?P(\w| )+)" print("::group::ros") -response = endoflife.fetch_url(URL) -soup = BeautifulSoup(response, features="html5lib") +response = http.fetch_url(URL) +soup = BeautifulSoup(response.text, features="html5lib") versions = {} for tr in soup.findAll("tr"): diff --git a/src/sles.py b/src/sles.py index 0086b032..207fd411 100644 --- a/src/sles.py +++ b/src/sles.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -11,8 +12,8 @@ def strip_version(version_str): def fetch_releases(): - response = endoflife.fetch_url(URL) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(URL) + soup = BeautifulSoup(response.text, features="html5lib") products_table = soup.find("tbody", id="productSupportLifecycle") # Get rows with SLES products sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"}) diff --git a/src/splunk.py b/src/splunk.py index 670ea70b..fb19d669 100644 --- a/src/splunk.py +++ b/src/splunk.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -35,8 +36,8 @@ def get_latest_minor_versions(versions): print(f"::group::{PRODUCT}") versions = dict() -main = endoflife.fetch_url(URL) -soup = BeautifulSoup(main, features="html5lib") +main = http.fetch_url(URL) +soup = BeautifulSoup(main.text, features="html5lib") all_versions = list(map( lambda option: option.attrs['value'], @@ -48,7 +49,7 @@ all_versions = list(map( latest_minor_versions = get_latest_minor_versions(all_versions) latest_minor_versions_urls = [RELNOTES_URL_TEMPLATE.format(version=v) for v in latest_minor_versions] -for response in endoflife.fetch_urls(latest_minor_versions_urls): +for response in http.fetch_urls(latest_minor_versions_urls): for (version, date_str) in re.findall(PATTERN, response.text, re.MULTILINE): version = f"{version}.0" if len(version.split(".")) == 2 else version # convert x.y to x.y.0 date = dates.parse_date(date_str).strftime("%Y-%m-%d") diff --git a/src/typo3.py b/src/typo3.py index 8904ae9e..ee410ca9 100644 --- a/src/typo3.py +++ b/src/typo3.py @@ -1,4 +1,5 @@ import json +from common import http from common import endoflife PRODUCT = "typo3" @@ -7,8 +8,8 @@ URL = "https://get.typo3.org/api/v1/release/" print(f"::group::{PRODUCT}") versions = {} -response = endoflife.fetch_url(URL) -data = json.loads(response) +response = http.fetch_url(URL) +data = json.loads(response.text) for v in data: if v['type'] != 'development': date = v["date"][0:10] diff --git a/src/unity.py b/src/unity.py index fc648d74..e550c521 100644 --- a/src/unity.py +++ b/src/unity.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup +from common import http from common import endoflife # Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, @@ -13,8 +14,8 @@ URL = 'https://unity.com/releases/editor/qa/lts-releases' def fetch_releases(releases, url) -> str: print(url) - response = endoflife.fetch_url(url) - soup = BeautifulSoup(response, features="html5lib") + response = http.fetch_url(url) + soup = BeautifulSoup(response.text, features="html5lib") for release in soup.find_all('div', class_='component-releases-item__show__inner-header'): version = release.find('h4').find('span').text diff --git a/src/unrealircd.py b/src/unrealircd.py index 120086e0..66cf42bd 100644 --- a/src/unrealircd.py +++ b/src/unrealircd.py @@ -1,12 +1,13 @@ import mwparserfromhell import re +from common import http from common import endoflife URL = "https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw" print("::group::unrealircd") -response = endoflife.fetch_url(URL) -wikicode = mwparserfromhell.parse(response) +response = http.fetch_url(URL) +wikicode = mwparserfromhell.parse(response.text) versions = {} for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): diff --git a/src/visualstudio.py b/src/visualstudio.py index d5365693..c2eb84db 100644 --- a/src/visualstudio.py +++ b/src/visualstudio.py @@ -1,5 +1,6 @@ import re from bs4 import BeautifulSoup +from common import http from common import dates from common import endoflife @@ -16,7 +17,7 @@ URLS = [ print(f"::group::{PRODUCT}") versions = {} -for response in endoflife.fetch_urls(URLS): +for response in http.fetch_urls(URLS): soup = BeautifulSoup(response.text, features="html5lib") for table in soup.find_all("table"): headers = [th.get_text().strip().lower() for th in table.find_all("th")]