Improve HTTP handling in scripts (#449)
Provide some helper methods to hide the complexity of parsing HTML, JSON, YAML, XML or Markdown.
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches EKS versions from AWS docs.
|
||||
@@ -8,8 +7,8 @@ Now that AWS no longer publishes docs on GitHub, we use the Web Archive to get t
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
html = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for tr in html.select("#main-col-body")[0].findAll("tr"):
|
||||
cells = tr.findAll("td")
|
||||
if not cells:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import xml.dom.minidom
|
||||
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
@@ -7,8 +6,7 @@ from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
rss_response = http.fetch_url(config.url)
|
||||
rss = xml.dom.minidom.parseString(rss_response.text)
|
||||
rss = http.fetch_xml(config.url)
|
||||
|
||||
for entry in rss.getElementsByTagName("item"):
|
||||
version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
ul = soup.find("h2").find_next("ul")
|
||||
ul = html.find("h2").find_next("ul")
|
||||
for li in ul.find_all("li"):
|
||||
text = li.get_text(strip=True)
|
||||
match = config.first_match(text)
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches AWS lambda runtimes with their support / EOL dates from https://docs.aws.amazon.com."""
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for i, table in enumerate(soup.find_all("table")):
|
||||
for i, table in enumerate(html.find_all("table")):
|
||||
headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")]
|
||||
if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers:
|
||||
logging.info(f"table with header '{headers}' does not contain all the expected headers")
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches versions from repositories managed with cgit, such as the Linux kernel repository.
|
||||
@@ -6,10 +5,9 @@ Ideally we would want to use the git repository directly, but cgit-managed repos
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url + '/refs/tags')
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url + '/refs/tags')
|
||||
|
||||
for table in soup.find_all("table", class_="list"):
|
||||
for table in html.find_all("table", class_="list"):
|
||||
for row in table.find_all("tr"):
|
||||
columns = row.find_all("td")
|
||||
if len(columns) != 4:
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
from common.git import Git
|
||||
|
||||
@@ -10,9 +9,8 @@ More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discu
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
rn_response = http.fetch_url(config.url)
|
||||
rn_soup = BeautifulSoup(rn_response.text, features="html5lib")
|
||||
released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
|
||||
html = http.fetch_html(config.url)
|
||||
released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
|
||||
|
||||
git = Git(config.data.get('repository'))
|
||||
git.setup(bare=True)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, github, http, releasedata
|
||||
|
||||
"""Fetch released versions from docs.chef.io and retrieve their date from GitHub.
|
||||
@@ -9,9 +8,8 @@ More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discu
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
rn_response = http.fetch_url(config.url)
|
||||
rn_soup = BeautifulSoup(rn_response.text, features="html5lib")
|
||||
released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
|
||||
html = http.fetch_html(config.url)
|
||||
released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
|
||||
|
||||
for release in github.fetch_releases("inspec/inspec"):
|
||||
sanitized_version = release.tag_name.replace("v", "")
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches versions from Adobe ColdFusion release notes on helpx.adobe.com.
|
||||
@@ -24,10 +23,9 @@ FIXED_VERSIONS = {
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
changelog = http.fetch_url(config.url)
|
||||
changelog_soup = BeautifulSoup(changelog.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for p in changelog_soup.findAll("div", class_="text"):
|
||||
for p in html.findAll("div", class_="text"):
|
||||
version_and_date_str = p.get_text().strip().replace('\xa0', ' ')
|
||||
for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str):
|
||||
date = dates.parse_date(date_str)
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
import logging
|
||||
import xml.dom.minidom
|
||||
from concurrent.futures import as_completed
|
||||
from xml.dom.minidom import Document
|
||||
|
||||
import mwparserfromhell
|
||||
import yaml
|
||||
from bs4 import BeautifulSoup
|
||||
from mwparserfromhell.wikicode import Wikicode
|
||||
from playwright.sync_api import sync_playwright
|
||||
from requests import Response
|
||||
from requests.adapters import HTTPAdapter
|
||||
@@ -47,6 +53,31 @@ def fetch_url(url: str, data: any = None, headers: dict[str, str] = None,
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Response:
|
||||
return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0]
|
||||
|
||||
def fetch_html(url: str, data: any = None, headers: dict[str, str] = None,
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30,
|
||||
features: str = "html5lib") -> BeautifulSoup:
|
||||
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
|
||||
return BeautifulSoup(response.text, features=features)
|
||||
|
||||
def fetch_json(url: str, data: any = None, headers: dict[str, str] = None,
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Document:
|
||||
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
|
||||
return response.json()
|
||||
|
||||
def fetch_yaml(url: str, data: any = None, headers: dict[str, str] = None,
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> any:
|
||||
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
|
||||
return yaml.safe_load(response.text)
|
||||
|
||||
def fetch_xml(url: str, data: any = None, headers: dict[str, str] = None,
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Document:
|
||||
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
|
||||
return xml.dom.minidom.parseString(response.text)
|
||||
|
||||
def fetch_markdown(url: str, data: any = None, headers: dict[str, str] = None,
|
||||
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Wikicode:
|
||||
response = fetch_url(url, data, headers, max_retries, backoff_factor, timeout)
|
||||
return mwparserfromhell.parse(response.text)
|
||||
|
||||
# This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright.
|
||||
def fetch_javascript_url(url: str, click_selector: str = None, wait_until: str = None) -> str:
|
||||
|
||||
@@ -18,10 +18,9 @@ MANUAL_VERSIONS = {
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
main = http.fetch_url(f"{config.url}/current/install/install-intro.html")
|
||||
main_soup = BeautifulSoup(main.text, features="html5lib")
|
||||
html = http.fetch_html(f"{config.url}/current/install/install-intro.html")
|
||||
|
||||
minor_versions = [options.attrs["value"] for options in main_soup.find(class_="version_list").find_all("option")]
|
||||
minor_versions = [options.attrs["value"] for options in html.find(class_="version_list").find_all("option")]
|
||||
minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions]
|
||||
|
||||
for minor_version in http.fetch_urls(minor_version_urls):
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}")
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(f"https://distrowatch.com/index.php?distribution={config.url}")
|
||||
|
||||
for table in soup.select("td.News1>table.News"):
|
||||
for table in html.select("td.News1>table.News"):
|
||||
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
|
||||
versions_match = config.first_match(headline)
|
||||
if not versions_match:
|
||||
|
||||
@@ -5,7 +5,7 @@ from common import dates, endoflife, http, releasedata
|
||||
Unfortunately images creation date cannot be retrieved, so we had to use the tag_last_pushed field instead."""
|
||||
|
||||
def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None:
|
||||
data = http.fetch_url(url).json()
|
||||
data = http.fetch_json(url)
|
||||
|
||||
for result in data["results"]:
|
||||
version_str = result["name"]
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
# https://regex101.com/r/zPxBqT/1
|
||||
@@ -15,10 +14,9 @@ URL_BY_PRODUCT = {
|
||||
for config in endoflife.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons
|
||||
for product_name, url in URL_BY_PRODUCT.items():
|
||||
with releasedata.ProductData(product_name) as product_data:
|
||||
relnotes = http.fetch_url(url)
|
||||
relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib")
|
||||
html = http.fetch_html(url)
|
||||
|
||||
for section in relnotes_soup.find_all('section', class_='releases'):
|
||||
for section in html.find_all('section', class_='releases'):
|
||||
for h2 in section.find_all('h2'): # h2 contains the date
|
||||
date = dates.parse_date(h2.get('data-text'))
|
||||
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
html = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
table_selector = config.data.get("table_selector", "#previous-releases + table").strip()
|
||||
date_column = config.data.get("date_column", "Date").strip().lower()
|
||||
versions_column = config.data.get("versions_column").strip().lower()
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$")
|
||||
@@ -9,10 +8,9 @@ DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
# First, get all minor releases from the download page
|
||||
download = http.fetch_url(config.url)
|
||||
download_soup = BeautifulSoup(download.text, features="html5lib")
|
||||
download_html = http.fetch_html(config.url)
|
||||
minor_versions = []
|
||||
for link in download_soup.select("a"):
|
||||
for link in download_html.select("a"):
|
||||
minor_version_match = CYCLE_PATTERN.match(link.attrs["href"])
|
||||
if not minor_version_match:
|
||||
continue
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
page = http.fetch_url(config.url)
|
||||
page_soup = BeautifulSoup(page.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for release_table in page_soup.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"):
|
||||
for release_table in html.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"):
|
||||
for row in release_table.find_all("tr")[1:]: # for all rows except the header
|
||||
cells = row.find_all("td")
|
||||
version = cells[0].text.strip("AIX ").replace(' TL', '.')
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
|
||||
import yaml
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetch version data for Kuma from https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml.
|
||||
@@ -12,8 +11,7 @@ EOL_FIELD = 'endOfLifeDate'
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
yml_response = http.fetch_url(config.url)
|
||||
versions_data = yaml.safe_load(yml_response.text)
|
||||
versions_data = http.fetch_yaml(config.url)
|
||||
|
||||
# Iterate through the versions and their associated dates
|
||||
for version_info in versions_data:
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/"""
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for table in soup.find_all("table"):
|
||||
for table in html.find_all("table"):
|
||||
for row in table.find_all("tr")[1:]:
|
||||
cells = row.find_all("td")
|
||||
if len(cells) < 4:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.dom.minidom
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
@@ -11,8 +10,7 @@ ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IG
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
rss = xml.dom.minidom.parseString(response.text)
|
||||
rss = http.fetch_xml(config.url)
|
||||
|
||||
for item in rss.getElementsByTagName("entry"):
|
||||
content = item.getElementsByTagName("content")[0].firstChild.nodeValue
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches Lua releases from lua.org."""
|
||||
@@ -10,9 +9,8 @@ VERSION_PATTERN = re.compile(r"(?P<version>\d+\.\d+\.\d+),\s*released\s*on\s*(?P
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
page = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(page.text, 'html.parser')
|
||||
page_text = soup.text # HTML is broken, no way to parse it with beautifulsoup
|
||||
html = http.fetch_html(config.url, features = 'html.parser')
|
||||
page_text = html.text # HTML is broken, no way to parse it with beautifulsoup
|
||||
|
||||
for release_match in RELEASED_AT_PATTERN.finditer(page_text):
|
||||
release = release_match.group('release')
|
||||
|
||||
@@ -9,7 +9,7 @@ for config in endoflife.list_configs_from_argv():
|
||||
|
||||
while True:
|
||||
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100"
|
||||
data = http.fetch_url(url).json()
|
||||
data = http.fetch_json(url)
|
||||
|
||||
for row in data["response"]["docs"]:
|
||||
version_match = config.first_match(row["v"])
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches NetBSD versions and EOL information from https://www.netbsd.org/."""
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for row in soup.select('table tbody tr'):
|
||||
for row in html.select('table tbody tr'):
|
||||
cells = [cell.get_text(strip=True) for cell in row.select('td')]
|
||||
|
||||
version = cells[0]
|
||||
|
||||
@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
data = http.fetch_url(f"https://registry.npmjs.org/{config.url}").json()
|
||||
data = http.fetch_json(f"https://registry.npmjs.org/{config.url}")
|
||||
for version_str in data["versions"]:
|
||||
version_match = config.first_match(version_str)
|
||||
if version_match:
|
||||
|
||||
@@ -4,8 +4,8 @@ from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
url = f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}"
|
||||
data = http.fetch_url(url).json()
|
||||
data = http.fetch_json(f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}")
|
||||
|
||||
for version_data in data["contents"]:
|
||||
release_name = '.'.join(version_data["version"].split(".")[:2])
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
versions = http.fetch_url(config.url).json()
|
||||
versions = http.fetch_json(config.url)
|
||||
|
||||
for version in versions:
|
||||
name = version['version']
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches versions from Plesk's change log.
|
||||
@@ -8,10 +7,9 @@ there is no entry for GA of version 18.0.18 and older."""
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for release in soup.find_all("div", class_="changelog-entry--obsidian"):
|
||||
for release in html.find_all("div", class_="changelog-entry--obsidian"):
|
||||
version = release.h2.text.strip()
|
||||
if not version.startswith('Plesk Obsidian 18'):
|
||||
continue
|
||||
|
||||
@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
data = http.fetch_url(f"https://pypi.org/pypi/{config.url}/json").json()
|
||||
data = http.fetch_json(f"https://pypi.org/pypi/{config.url}/json")
|
||||
|
||||
for version_str in data["releases"]:
|
||||
version_match = config.first_match(version_str)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches Amazon RDS versions from the version management pages on AWS docs.
|
||||
@@ -11,10 +10,9 @@ in the third column (usually named 'RDS release date').
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for table in soup.find_all("table"):
|
||||
for table in html.find_all("table"):
|
||||
for row in table.find_all("tr"):
|
||||
columns = row.find_all("td")
|
||||
if len(columns) <= 3:
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches RedHat JBoss EAP version data for JBoss 7"""
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for h4 in soup.find_all("h4"):
|
||||
for h4 in html.find_all("h4"):
|
||||
title = h4.get_text(strip=True)
|
||||
if not title.startswith("7."):
|
||||
continue
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
@@ -7,9 +6,8 @@ from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
xml = http.fetch_xml(config.url)
|
||||
|
||||
xml = parseString(response.text)
|
||||
versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0]
|
||||
|
||||
latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches Satellite versions from access.redhat.com.
|
||||
@@ -9,10 +8,9 @@ A few of the older versions, such as 'Satellite 6.1 GA Release (Build 6.1.1)', w
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for table in soup.findAll("tbody"):
|
||||
for table in html.findAll("tbody"):
|
||||
for tr in table.findAll("tr"):
|
||||
td_list = tr.findAll("td")
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ for config in endoflife.list_configs_from_argv():
|
||||
name = urllib.parse.quote(config.url)
|
||||
mapping = Mapping(config.data["fields"])
|
||||
|
||||
data = http.fetch_url('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name).json()
|
||||
data = http.fetch_json('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name)
|
||||
|
||||
for version in data["data"][0]["versions"]:
|
||||
version_name = version["name"]
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
# https://regex101.com/r/877ibq/1
|
||||
@@ -8,10 +7,9 @@ VERSION_PATTERN = re.compile(r"RHEL (?P<major>\d)(\. ?(?P<minor>\d+))?(( Update
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for tr in soup.findAll("tr"):
|
||||
for tr in html.findAll("tr"):
|
||||
td_list = tr.findAll("td")
|
||||
if len(td_list) == 0:
|
||||
continue
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for tr in soup.findAll("tr"):
|
||||
for tr in html.findAll("tr"):
|
||||
td_list = tr.findAll("td")
|
||||
if len(td_list) == 0:
|
||||
continue
|
||||
|
||||
@@ -2,7 +2,6 @@ import logging
|
||||
import re
|
||||
from datetime import date, datetime, time, timezone
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Detect new models and aggregate EOL data for Samsung Mobile devices.
|
||||
@@ -27,12 +26,11 @@ for config in endoflife.list_configs_from_argv():
|
||||
release.set_eol(eol)
|
||||
|
||||
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
sections = config.data.get("sections", {})
|
||||
for update_cadence, title in sections.items():
|
||||
models_list = soup.find(string=lambda text, search=title: search in text if text else False).find_next("ul")
|
||||
models_list = html.find(string=lambda text, search=title: search in text if text else False).find_next("ul")
|
||||
|
||||
for item in models_list.find_all("li"):
|
||||
models = item.text.replace("Enterprise Models:", "")
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
products_table = soup.find("tbody", id="productSupportLifecycle")
|
||||
products_table = html.find("tbody", id="productSupportLifecycle")
|
||||
sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"})
|
||||
|
||||
# Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
VERSION_DATE_PATTERN = re.compile(r"Splunk Enterprise (?P<version>\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P<date>\w+\s\d\d?,\s\d{4})\.", re.MULTILINE)
|
||||
@@ -32,10 +31,9 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]:
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
main = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(main.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
all_versions = [option.attrs['value'] for option in soup.select("select#version-select > option")]
|
||||
all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")]
|
||||
all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"]
|
||||
|
||||
# Latest minor release notes contains release notes for all previous minor versions.
|
||||
|
||||
@@ -2,7 +2,7 @@ from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
data = http.fetch_url(config.url).json()
|
||||
data = http.fetch_json(config.url)
|
||||
for v in data:
|
||||
if v['type'] == 'development':
|
||||
continue
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation
|
||||
@@ -19,10 +18,9 @@ The script will need to be updated if someday those conditions are not met."""
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for release in soup.find_all('div', class_='component-releases-item__show__inner-header'):
|
||||
for release in html.find_all('div', class_='component-releases-item__show__inner-header'):
|
||||
version = release.find('h4').find('span').text
|
||||
date = dates.parse_datetime(release.find('time').attrs['datetime'])
|
||||
product_data.declare_version(version, date)
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import re
|
||||
|
||||
import mwparserfromhell
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}")
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
wikicode = mwparserfromhell.parse(response.text)
|
||||
wikicode = http.fetch_markdown(config.url)
|
||||
|
||||
for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
|
||||
items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches Veeam products versions from https://www.veeam.com.
|
||||
@@ -12,12 +11,11 @@ such as `https://www.veeam.com/kb2680`.
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
version_column = config.data.get("version_column", "Build Number").lower()
|
||||
date_column = config.data.get("date_column", "Release Date").lower()
|
||||
for table in soup.find_all("table"):
|
||||
for table in html.find_all("table"):
|
||||
headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")]
|
||||
if version_column not in headers or date_column not in headers:
|
||||
logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'",
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
"""Fetches releases from VirtualBox download page."""
|
||||
@@ -10,10 +9,10 @@ EOL_REGEX = re.compile(r"^\(no longer supported, support ended (?P<value>\d{4}/\
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for li in soup.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
|
||||
|
||||
for li in html.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
|
||||
li_text = li.find("a").text.strip()
|
||||
|
||||
release_match = config.first_match(li_text)
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, endoflife, http, releasedata
|
||||
|
||||
for config in endoflife.list_configs_from_argv():
|
||||
with releasedata.ProductData(config.product) as product_data:
|
||||
response = http.fetch_url(config.url)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
html = http.fetch_html(config.url)
|
||||
|
||||
for table in soup.find_all("table"):
|
||||
for table in html.find_all("table"):
|
||||
headers = [th.get_text().strip().lower() for th in table.find_all("th")]
|
||||
if "version" not in headers or "release date" not in headers:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user