Simplify argument parsing (#459)

With the current state of automation scripts, this is not possible anymore to launch script with multiple auto configs.
This commit is contained in:
Marc Wrobel
2025-07-06 22:42:01 +02:00
committed by GitHub
parent b105939f93
commit 391d65ad8a
61 changed files with 1091 additions and 1032 deletions

View File

@@ -1,31 +1,32 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches EKS versions from AWS docs.
Now that AWS no longer publishes docs on GitHub, we use the Web Archive to get the older versions."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for tr in html.select("#main-col-body")[0].findAll("tr"):
cells = tr.findAll("td")
if not cells:
continue
for tr in html.select("#main-col-body")[0].findAll("tr"):
cells = tr.findAll("td")
if not cells:
continue
k8s_version_text = cells[0].text.strip()
k8s_version_match = config.first_match(k8s_version_text)
if not k8s_version_match:
logging.warning(f"Skipping {k8s_version_text}: does not match version regex(es)")
continue
k8s_version_text = cells[0].text.strip()
k8s_version_match = config.first_match(k8s_version_text)
if not k8s_version_match:
logging.warning(f"Skipping {k8s_version_text}: does not match version regex(es)")
continue
eks_version = cells[1].text.strip()
# K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags
version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}"
eks_version = cells[1].text.strip()
# K8S patch version is not kept to match versions on https://github.com/aws/eks-distro/tags
version = f"{k8s_version_match.group('major')}.{k8s_version_match.group('minor')}-{eks_version.replace('.', '-')}"
date_str = cells[-1].text.strip()
date_str = date_str.replace("April 18.2025", "April 18 2025") # temporary fix for a typo in the source
date = dates.parse_date_or_month_year_date(date_str)
date_str = cells[-1].text.strip()
date_str = date_str.replace("April 18.2025", "April 18 2025") # temporary fix for a typo in the source
date = dates.parse_date_or_month_year_date(date_str)
product_data.declare_version(version, date)
product_data.declare_version(version, date)

View File

@@ -1,22 +1,23 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches Amazon Neptune versions from its RSS feed on docs.aws.amazon.com."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
rss = http.fetch_xml(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
rss = http.fetch_xml(config.url)
for entry in rss.getElementsByTagName("item"):
version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue
date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue
for entry in rss.getElementsByTagName("item"):
version_str = entry.getElementsByTagName("title")[0].firstChild.nodeValue
date_str = entry.getElementsByTagName("pubDate")[0].firstChild.nodeValue
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping entry with malformed version: {entry}")
continue
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping entry with malformed version: {entry}")
continue
version = config.render(version_match)
date = dates.parse_datetime(date_str)
product_data.declare_version(version, date)
version = config.render(version_match)
date = dates.parse_datetime(date_str)
product_data.declare_version(version, date)

View File

@@ -1,24 +1,25 @@
from common import dates, releasedata
from common import dates
from common.git import Git
from common.releasedata import ProductData, config_from_argv
"""Fetches Apache HTTP Server versions and release date from its git repository
by looking at the STATUS file of each <major>.<minor>.x branch."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
git = Git(config.url)
git.setup()
config = config_from_argv()
with ProductData(config.product) as product_data:
git = Git(config.url)
git.setup()
for branch in git.list_branches("refs/heads/?.?.x"):
git.checkout(branch, file_list=["STATUS"])
for branch in git.list_branches("refs/heads/?.?.x"):
git.checkout(branch, file_list=["STATUS"])
release_notes_file = git.repo_dir / "STATUS"
if not release_notes_file.exists():
continue
release_notes_file = git.repo_dir / "STATUS"
if not release_notes_file.exists():
continue
with release_notes_file.open("rb") as f:
release_notes = f.read().decode("utf-8", errors="ignore")
with release_notes_file.open("rb") as f:
release_notes = f.read().decode("utf-8", errors="ignore")
for pattern in config.include_version_patterns:
for (version, date_str) in pattern.findall(release_notes):
product_data.declare_version(version, dates.parse_date(date_str))
for pattern in config.include_version_patterns:
for (version, date_str) in pattern.findall(release_notes):
product_data.declare_version(version, dates.parse_date(date_str))

View File

@@ -1,19 +1,20 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
ul = html.find("h2").find_next("ul")
for li in ul.find_all("li"):
text = li.get_text(strip=True)
match = config.first_match(text)
if not match:
logging.info(f"Skipping {text}, does not match any regex")
continue
ul = html.find("h2").find_next("ul")
for li in ul.find_all("li"):
text = li.get_text(strip=True)
match = config.first_match(text)
if not match:
logging.info(f"Skipping {text}, does not match any regex")
continue
version = match.group("version")
date = dates.parse_date(match.group("date"))
product_data.declare_version(version, date)
version = match.group("version")
date = dates.parse_date(match.group("date"))
product_data.declare_version(version, date)

View File

@@ -2,7 +2,8 @@ import logging
import re
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches and parses version and release date information from Apple's support website."""
@@ -22,31 +23,31 @@ URLS = [
DATE_PATTERN = re.compile(r"\b\d+\s[A-Za-z]+\s\d+\b")
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
# URLs are cached to avoid rate limiting by support.apple.com.
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
config = config_from_argv()
with ProductData(config.product) as product_data:
# URLs are cached to avoid rate limiting by support.apple.com.
soups = [BeautifulSoup(response.text, features="html5lib") for response in http.fetch_urls(URLS)]
for soup in soups:
versions_table = soup.find(id="tableWraper")
versions_table = versions_table if versions_table else soup.find('table', class_="gb-table")
for soup in soups:
versions_table = soup.find(id="tableWraper")
versions_table = versions_table if versions_table else soup.find('table', class_="gb-table")
for row in versions_table.findAll("tr")[1:]:
cells = row.findAll("td")
version_text = cells[0].get_text().strip()
date_text = cells[2].get_text().strip()
for row in versions_table.findAll("tr")[1:]:
cells = row.findAll("td")
version_text = cells[0].get_text().strip()
date_text = cells[2].get_text().strip()
date_match = DATE_PATTERN.search(date_text)
if not date_match:
logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match")
continue
date_match = DATE_PATTERN.search(date_text)
if not date_match:
logging.info(f"ignoring version {version_text} ({date_text}), date pattern don't match")
continue
date_str = date_match.group(0).replace("Sept ", "Sep ")
date = dates.parse_date(date_str)
for version_pattern in config.include_version_patterns:
for version_str in version_pattern.findall(version_text):
version = product_data.get_version(version_str)
if not version or version.date() > date:
product_data.declare_version(version_str, date)
else:
logging.info(f"ignoring version {version_str} ({date}) for {product_data.name}")
date_str = date_match.group(0).replace("Sept ", "Sep ")
date = dates.parse_date(date_str)
for version_pattern in config.include_version_patterns:
for version_str in version_pattern.findall(version_text):
version = product_data.get_version(version_str)
if not version or version.date() > date:
product_data.declare_version(version_str, date)
else:
logging.info(f"ignoring version {version_str} ({date}) for {product_data.name}")

View File

@@ -1,22 +1,23 @@
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches Artifactory versions from https://jfrog.com, using requests_html because JavaScript is
needed to render the page."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
content = http.fetch_javascript_url(config.url, wait_until = 'networkidle')
soup = BeautifulSoup(content, 'html.parser')
config = config_from_argv()
with ProductData(config.product) as product_data:
content = http.fetch_javascript_url(config.url, wait_until = 'networkidle')
soup = BeautifulSoup(content, 'html.parser')
for row in soup.select('.informaltable tbody tr'):
cells = row.select("td")
if len(cells) >= 2:
version = cells[0].text.strip()
if version:
date_str = cells[1].text.strip().replace("_", "-").replace("Sept-", "Sep-")
product_data.declare_version(version, dates.parse_date(date_str))
for row in soup.select('.informaltable tbody tr'):
cells = row.select("td")
if len(cells) >= 2:
version = cells[0].text.strip()
if version:
date_str = cells[1].text.strip().replace("_", "-").replace("Sept-", "Sep-")
product_data.declare_version(version, dates.parse_date(date_str))
# 7.29.9 release date is wrong on https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life.
# Sent a mail to jfrog-help-center-feedback@jfrog.com to fix it, but in the meantime...
product_data.declare_version('7.29.9', dates.date(2022, 1, 11))
# 7.29.9 release date is wrong on https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life.
# Sent a mail to jfrog-help-center-feedback@jfrog.com to fix it, but in the meantime...
product_data.declare_version('7.29.9', dates.date(2022, 1, 11))

View File

@@ -1,7 +1,8 @@
import logging
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches EOL dates from Atlassian EOL page.
@@ -9,19 +10,19 @@ This script takes a selector argument which is the product title identifier on t
`AtlassianSupportEndofLifePolicy-JiraSoftware`.
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
content = http.fetch_javascript_url(config.url)
soup = BeautifulSoup(content, features="html5lib")
config = config_from_argv()
with ProductData(config.product) as product_data:
content = http.fetch_javascript_url(config.url)
soup = BeautifulSoup(content, features="html5lib")
# Find the section with the EOL dates
for li in soup.select(f"#{config.data.get('selector')}+ul li"):
match = config.first_match(li.get_text(strip=True))
if not match:
logging.warning(f"Skipping '{li.get_text(strip=True)}', no match found")
continue
# Find the section with the EOL dates
for li in soup.select(f"#{config.data.get('selector')}+ul li"):
match = config.first_match(li.get_text(strip=True))
if not match:
logging.warning(f"Skipping '{li.get_text(strip=True)}', no match found")
continue
release_name = match.group("release")
date = dates.parse_date(match.group("date"))
release = product_data.get_release(release_name)
release.set_eol(date)
release_name = match.group("release")
date = dates.parse_date(match.group("date"))
release = product_data.get_release(release_name)
release.set_eol(date)

View File

@@ -1,5 +1,6 @@
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from Atlassian download-archives pages.
@@ -7,12 +8,12 @@ This script takes a single argument which is the url of the product's download-a
`https://www.atlassian.com/software/confluence/download-archives`.
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
content = http.fetch_javascript_url(config.url, wait_until='networkidle')
soup = BeautifulSoup(content, 'html5lib')
config = config_from_argv()
with ProductData(config.product) as product_data:
content = http.fetch_javascript_url(config.url, wait_until='networkidle')
soup = BeautifulSoup(content, 'html5lib')
for version_block in soup.select('.versions-list'):
version = version_block.select_one('a.product-versions').attrs['data-version']
date = dates.parse_date(version_block.select_one('.release-date').text)
product_data.declare_version(version, date)
for version_block in soup.select('.versions-list'):
version = version_block.select_one('a.product-versions').attrs['data-version']
date = dates.parse_date(version_block.select_one('.release-date').text)
product_data.declare_version(version, date)

View File

@@ -1,46 +1,47 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches AWS lambda runtimes with their support / EOL dates from https://docs.aws.amazon.com."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for i, table in enumerate(html.find_all("table")):
headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")]
if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers:
logging.info(f"table with header '{headers}' does not contain all the expected headers")
continue
for i, table in enumerate(html.find_all("table")):
headers = [th.get_text().strip().lower() for th in table.find("thead").find_all("tr")[0].find_all("th")]
if "identifier" not in headers or "deprecation date" not in headers or "block function update" not in headers:
logging.info(f"table with header '{headers}' does not contain all the expected headers")
continue
is_supported_table = i == 0 # first table is for supported runtimes, second for deprecated ones
identifier_index = headers.index("identifier")
deprecation_date_index = headers.index("deprecation date")
block_function_update_index = headers.index("block function update")
is_supported_table = i == 0 # first table is for supported runtimes, second for deprecated ones
identifier_index = headers.index("identifier")
deprecation_date_index = headers.index("deprecation date")
block_function_update_index = headers.index("block function update")
for row in table.find("tbody").find_all("tr"):
cells = row.find_all("td")
identifier = cells[identifier_index].get_text().strip()
for row in table.find("tbody").find_all("tr"):
cells = row.find_all("td")
identifier = cells[identifier_index].get_text().strip()
deprecation_date_str = cells[deprecation_date_index].get_text().strip()
try:
deprecation_date = dates.parse_date(deprecation_date_str)
except ValueError:
deprecation_date = None
deprecation_date_str = cells[deprecation_date_index].get_text().strip()
try:
deprecation_date = dates.parse_date(deprecation_date_str)
except ValueError:
deprecation_date = None
if identifier == "nodejs4.3-edge":
# there is a mistake in the data: block function update date cannot be before the deprecation date
block_function_update_str = "2020-04-30"
else:
block_function_update_str = cells[block_function_update_index].get_text().strip()
try:
block_function_update = dates.parse_date(block_function_update_str)
except ValueError:
block_function_update = None
if identifier == "nodejs4.3-edge":
# there is a mistake in the data: block function update date cannot be before the deprecation date
block_function_update_str = "2020-04-30"
else:
block_function_update_str = cells[block_function_update_index].get_text().strip()
try:
block_function_update = dates.parse_date(block_function_update_str)
except ValueError:
block_function_update = None
release = product_data.get_release(identifier)
# if no date is available, use False for supported runtimes and True for deprecated ones
release.set_eoas(deprecation_date if deprecation_date else not is_supported_table)
# if no date is available, use False for supported runtimes and True for deprecated ones
release.set_eol(block_function_update if block_function_update else not is_supported_table)
release = product_data.get_release(identifier)
# if no date is available, use False for supported runtimes and True for deprecated ones
release.set_eoas(deprecation_date if deprecation_date else not is_supported_table)
# if no date is available, use False for supported runtimes and True for deprecated ones
release.set_eol(block_function_update if block_function_update else not is_supported_table)

View File

@@ -1,28 +1,29 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from repositories managed with cgit, such as the Linux kernel repository.
Ideally we would want to use the git repository directly, but cgit-managed repositories don't support partial clone."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url + '/refs/tags')
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url + '/refs/tags')
for table in html.find_all("table", class_="list"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) != 4:
continue
for table in html.find_all("table", class_="list"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) != 4:
continue
version_str = columns[0].text.strip()
version_match = config.first_match(version_str)
if not version_match:
continue
version_str = columns[0].text.strip()
version_match = config.first_match(version_str)
if not version_match:
continue
datetime_td = columns[3].find_next("span")
datetime_str = datetime_td.attrs["title"] if datetime_td else None
if not datetime_str:
continue
datetime_td = columns[3].find_next("span")
datetime_str = datetime_td.attrs["title"] if datetime_td else None
if not datetime_str:
continue
version = config.render(version_match)
date = dates.parse_datetime(datetime_str)
product_data.declare_version(version, date)
version = config.render(version_match)
date = dates.parse_datetime(datetime_str)
product_data.declare_version(version, date)

View File

@@ -1,5 +1,6 @@
from common import dates, http, releasedata
from common import dates, http
from common.git import Git
from common.releasedata import ProductData, config_from_argv
"""Fetch released versions from docs.chef.io and retrieve their date from GitHub.
docs.chef.io needs to be scraped because not all tagged versions are actually released.
@@ -7,16 +8,16 @@ docs.chef.io needs to be scraped because not all tagged versions are actually re
More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411.
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
git = Git(config.data.get('repository'))
git.setup(bare=True)
git = Git(config.data.get('repository'))
git.setup(bare=True)
versions = git.list_tags()
for version, date_str in versions:
if version in released_versions:
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
versions = git.list_tags()
for version, date_str in versions:
if version in released_versions:
date = dates.parse_date(date_str)
product_data.declare_version(version, date)

View File

@@ -1,4 +1,5 @@
from common import dates, github, http, releasedata
from common import dates, github, http
from common.releasedata import ProductData, config_from_argv
"""Fetch released versions from docs.chef.io and retrieve their date from GitHub.
docs.chef.io needs to be scraped because not all tagged versions are actually released.
@@ -6,13 +7,13 @@ docs.chef.io needs to be scraped because not all tagged versions are actually re
More context on https://github.com/endoflife-date/endoflife.date/pull/4425#discussion_r1447932411.
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
released_versions = [h2.get('id') for h2 in html.find_all('h2', id=True) if h2.get('id')]
for release in github.fetch_releases("inspec/inspec"):
sanitized_version = release.tag_name.replace("v", "")
if sanitized_version in released_versions:
date = dates.parse_datetime(release.published_at)
product_data.declare_version(sanitized_version, date)
for release in github.fetch_releases("inspec/inspec"):
sanitized_version = release.tag_name.replace("v", "")
if sanitized_version in released_versions:
date = dates.parse_datetime(release.published_at)
product_data.declare_version(sanitized_version, date)

View File

@@ -1,6 +1,7 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from Adobe ColdFusion release notes on helpx.adobe.com.
@@ -21,15 +22,15 @@ FIXED_VERSIONS = {
"2023.0.0": dates.date(2022, 5, 16), # https://coldfusion.adobe.com/2023/05/coldfusion2023-release/
}
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for p in html.findAll("div", class_="text"):
version_and_date_str = p.get_text().strip().replace('\xa0', ' ')
for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str):
date = dates.parse_date(date_str)
version = version_str.strip().replace(",", ".") # 11,0,0,289974 -> 11.0.0.289974
product_data.declare_version(version, date)
for p in html.findAll("div", class_="text"):
version_and_date_str = p.get_text().strip().replace('\xa0', ' ')
for (date_str, version_str) in VERSION_AND_DATE_PATTERN.findall(version_and_date_str):
date = dates.parse_date(date_str)
version = version_str.strip().replace(",", ".") # 11,0,0,289974 -> 11.0.0.289974
product_data.declare_version(version, date)
product_data.declare_versions(FIXED_VERSIONS)
product_data.declare_versions(FIXED_VERSIONS)

View File

@@ -85,6 +85,15 @@ class ProductFrontmatter:
return configs
def auto_config(self, method_filter: str, url_filter: str) -> AutoConfig:
configs = self.auto_configs(method_filter, url_filter)
if len(configs) != 1:
message = f"Expected a single auto config for {self.name} with method={method_filter} and url={url_filter}; got {len(configs)}"
raise ValueError(message)
return configs[0]
def get_title(self) -> str:
return self.data["title"]

View File

@@ -193,10 +193,10 @@ class ProductData:
return self.name
def list_configs_from_argv() -> list[endoflife.AutoConfig]:
def config_from_argv() -> endoflife.AutoConfig:
return parse_argv()[1]
def parse_argv() -> tuple[endoflife.ProductFrontmatter, list[endoflife.AutoConfig]]:
def parse_argv() -> tuple[endoflife.ProductFrontmatter, endoflife.AutoConfig]:
parser = argparse.ArgumentParser(description=sys.argv[0])
parser.add_argument('-p', '--product', required=True, help='path to the product')
parser.add_argument('-m', '--method', required=True, help='method to filter by')
@@ -208,4 +208,4 @@ def parse_argv() -> tuple[endoflife.ProductFrontmatter, list[endoflife.AutoConfi
logging.basicConfig(format="%(message)s", level=(logging.DEBUG if args.verbose else logging.INFO))
product = endoflife.ProductFrontmatter(Path(args.product))
return product, product.auto_configs(args.method, args.url)
return product, product.auto_config(args.method, args.url)

View File

@@ -2,7 +2,8 @@ import datetime
import re
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
MILESTONE_PATTERN = re.compile(r'COS \d+ LTS')
VERSION_PATTERN = re.compile(r"^(cos-\d+-\d+-\d+-\d+)")
@@ -14,31 +15,31 @@ def parse_date(date_text: str) -> datetime:
return dates.parse_date(date_text)
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
main = http.fetch_url(config.url)
main_soup = BeautifulSoup(main.text, features="html5lib")
milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)]
config = config_from_argv()
with ProductData(config.product) as product_data:
main = http.fetch_url(config.url)
main_soup = BeautifulSoup(main.text, features="html5lib")
milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)]
milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones]
for milestone in http.fetch_urls(milestones_urls):
milestone_soup = BeautifulSoup(milestone.text, features="html5lib")
for article in milestone_soup.find_all('article', class_='devsite-article'):
for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse
version_str = heading.get('data-text')
version_match = VERSION_PATTERN.match(version_str)
if not version_match:
continue
milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones]
for milestone in http.fetch_urls(milestones_urls):
milestone_soup = BeautifulSoup(milestone.text, features="html5lib")
for article in milestone_soup.find_all('article', class_='devsite-article'):
for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse
version_str = heading.get('data-text')
version_match = VERSION_PATTERN.match(version_str)
if not version_match:
continue
try: # 1st row is the header, so pick the first td in the 2nd row
date_str = heading.find_next('tr').find_next('tr').find_next('td').text
except AttributeError: # In some older releases, it is mentioned as Date: [Date]
date_str = heading.find_next('i').text
try: # 1st row is the header, so pick the first td in the 2nd row
date_str = heading.find_next('tr').find_next('tr').find_next('td').text
except AttributeError: # In some older releases, it is mentioned as Date: [Date]
date_str = heading.find_next('i').text
try:
date = parse_date(date_str)
except ValueError: # for some h3, the date is in the previous h2
date_str = heading.find_previous('h2').get('data-text')
date = parse_date(date_str)
try:
date = parse_date(date_str)
except ValueError: # for some h3, the date is in the previous h2
date_str = heading.find_previous('h2').get('data-text')
date = parse_date(date_str)
product_data.declare_version(version_match.group(1), date)
product_data.declare_version(version_match.group(1), date)

View File

@@ -1,7 +1,8 @@
import logging
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from release notes of each minor version on docs.couchbase.com.
@@ -16,25 +17,25 @@ MANUAL_VERSIONS = {
"7.2.0": dates.date(2023, 6, 1), # https://www.couchbase.com/blog/couchbase-capella-spring-release-72/
}
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(f"{config.url}/current/install/install-intro.html")
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(f"{config.url}/current/install/install-intro.html")
minor_versions = [options.attrs["value"] for options in html.find(class_="version_list").find_all("option")]
minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions]
minor_versions = [options.attrs["value"] for options in html.find(class_="version_list").find_all("option")]
minor_version_urls = [f"{config.url}/{minor}/release-notes/relnotes.html" for minor in minor_versions]
for minor_version in http.fetch_urls(minor_version_urls):
minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib")
for minor_version in http.fetch_urls(minor_version_urls):
minor_version_soup = BeautifulSoup(minor_version.text, features="html5lib")
for title in minor_version_soup.find_all("h2"):
match = config.first_match(title.get_text().strip())
if not match:
logging.info(f"Skipping {title}, does not match any regex")
continue
for title in minor_version_soup.find_all("h2"):
match = config.first_match(title.get_text().strip())
if not match:
logging.info(f"Skipping {title}, does not match any regex")
continue
version = match["version"]
version = f"{version}.0" if len(version.split(".")) == 2 else version
date = dates.parse_month_year_date(match['date'])
product_data.declare_version(version, date)
version = match["version"]
version = f"{version}.0" if len(version.split(".")) == 2 else version
date = dates.parse_month_year_date(match['date'])
product_data.declare_version(version, date)
product_data.declare_versions(MANUAL_VERSIONS)
product_data.declare_versions(MANUAL_VERSIONS)

View File

@@ -1,13 +1,14 @@
from pathlib import Path
from subprocess import run
from common import dates, releasedata
from common import dates
from common.git import Git
from common.releasedata import ProductData, config_from_argv
"""Fetch Debian versions by parsing news in www.debian.org source repository."""
def extract_major_versions(p: releasedata.ProductData, repo_dir: Path) -> None:
def extract_major_versions(p: ProductData, repo_dir: Path) -> None:
child = run(
f"grep -RhE -A 1 '<define-tag pagetitle>Debian [0-9]+.+</q> released' {repo_dir}/english/News "
f"| cut -d '<' -f 2 "
@@ -26,7 +27,7 @@ def extract_major_versions(p: releasedata.ProductData, repo_dir: Path) -> None:
is_release_line = True
def extract_point_versions(p: releasedata.ProductData, repo_dir: Path) -> None:
def extract_point_versions(p: ProductData, repo_dir: Path) -> None:
child = run(
f"grep -Rh -B 10 '<define-tag revision>' {repo_dir}/english/News "
"| grep -Eo '(release_date>(.*)<|revision>(.*)<)' "
@@ -40,11 +41,11 @@ def extract_point_versions(p: releasedata.ProductData, repo_dir: Path) -> None:
(date, version) = line.split(' ')
p.declare_version(version, dates.parse_date(date))
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
git = Git(config.url)
git.setup()
git.checkout("master", file_list=["english/News"])
config = config_from_argv()
with ProductData(config.product) as product_data:
git = Git(config.url)
git.setup()
git.checkout("master", file_list=["english/News"])
extract_major_versions(product_data, git.repo_dir)
extract_point_versions(product_data, git.repo_dir)
extract_major_versions(product_data, git.repo_dir)
extract_point_versions(product_data, git.repo_dir)

View File

@@ -1,18 +1,19 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(f"https://distrowatch.com/index.php?distribution={config.url}")
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(f"https://distrowatch.com/index.php?distribution={config.url}")
for table in html.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
versions_match = config.first_match(headline)
if not versions_match:
continue
for table in html.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
versions_match = config.first_match(headline)
if not versions_match:
continue
# multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5)
versions = config.render(versions_match).split("\n")
date = dates.parse_date(table.select_one("td.NewsDate").get_text())
# multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5)
versions = config.render(versions_match).split("\n")
date = dates.parse_date(table.select_one("td.NewsDate").get_text())
for version in versions:
product_data.declare_version(version, date)
for version in versions:
product_data.declare_version(version, date)

View File

@@ -1,10 +1,11 @@
from common import dates, endoflife, http, releasedata
from common import dates, endoflife, http
from common.releasedata import ProductData, config_from_argv
"""Fetches releases from the Docker Hub API.
Unfortunately images creation date cannot be retrieved, so we had to use the tag_last_pushed field instead."""
def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str) -> None:
def fetch_releases(p: ProductData, c: endoflife.AutoConfig, url: str) -> None:
data = http.fetch_json(url)
for result in data["results"]:
@@ -17,6 +18,6 @@ def fetch_releases(p: releasedata.ProductData, c: endoflife.AutoConfig, url: str
fetch_releases(p, c, data["next"])
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1")
config = config_from_argv()
with ProductData(config.product) as product_data:
fetch_releases(product_data, config, f"https://hub.docker.com/v2/repositories/{config.url}/tags?page_size=100&page=1")

View File

@@ -1,7 +1,8 @@
import urllib.parse
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetch Firefox versions with their dates from https://www.mozilla.org/.
@@ -20,15 +21,15 @@ The script will need to be updated if someday those conditions are not met."""
MAX_VERSIONS_LIMIT = 100
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
releases_page = http.fetch_url(config.url)
releases_soup = BeautifulSoup(releases_page.text, features="html5lib")
releases_list = releases_soup.find_all("ol", class_="c-release-list")
config = config_from_argv()
with ProductData(config.product) as product_data:
releases_page = http.fetch_url(config.url)
releases_soup = BeautifulSoup(releases_page.text, features="html5lib")
releases_list = releases_soup.find_all("ol", class_="c-release-list")
release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")]
for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]):
version = release_notes.url.split("/")[-3]
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25
product_data.declare_version(version, dates.parse_date(date_str))
release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")]
for release_notes in http.fetch_urls(release_notes_urls[:MAX_VERSIONS_LIMIT]):
version = release_notes.url.split("/")[-3]
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
date_str = release_notes_soup.find(class_="c-release-date").get_text() # note: only works for versions > 25
product_data.declare_version(version, dates.parse_date(date_str))

View File

@@ -14,7 +14,8 @@ References:
import re
from typing import Any, Generator, Iterator
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
def parse_markdown_tables(lineiter: Iterator[str]) -> Generator[list[list[Any]], Any, None]:
@@ -50,41 +51,41 @@ def maybe_markdown_table_row(line: str) -> list[str] | None:
return None
return [x.strip() for x in line.strip('|').split('|')]
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product:
resp = http.fetch_url(config.url)
resp.raise_for_status()
data = resp.json()
assert data['title'] == "GHC Status"
assert data['format'] == "markdown"
md = data['content'].splitlines()
config = config_from_argv()
with ProductData(config.product) as product_data:
resp = http.fetch_url(config.url)
resp.raise_for_status()
data = resp.json()
assert data['title'] == "GHC Status"
assert data['format'] == "markdown"
md = data['content'].splitlines()
#-- Parse tables out of the wiki text. At time of writing, the script expects exactly two:
#-- 1. "Most recent major" with 5 columns
#-- 2. "All released versions" with 5 columns
[series_table, patch_level] = parse_markdown_tables(iter(md))
#-- Parse tables out of the wiki text. At time of writing, the script expects exactly two:
#-- 1. "Most recent major" with 5 columns
#-- 2. "All released versions" with 5 columns
[series_table, patch_level] = parse_markdown_tables(iter(md))
for row in series_table[1:]:
[series, _download_link, _most_recent, next_planned, status] = row
if status == "Next major release":
continue
for row in series_table[1:]:
[series, _download_link, _most_recent, next_planned, status] = row
if status == "Next major release":
continue
series = series.split(' ')[0]
series = series.replace('\\.', '.')
if series == "Nightlies":
continue
status = status.lower()
series = series.split(' ')[0]
series = series.replace('\\.', '.')
if series == "Nightlies":
continue
status = status.lower()
#-- See discussion in https://github.com/endoflife-date/endoflife.date/pull/6287
r = product.get_release(series)
#-- The clearest semblance of an EOL signal we get
r.set_eol("not recommended for use" in status or ":red_circle:" in status)
#-- eoasColumn label is "Further releases planned"
r.set_eoas(any(keyword in next_planned for keyword in ("None", "N/A")))
#-- See discussion in https://github.com/endoflife-date/endoflife.date/pull/6287
r = product_data.get_release(series)
#-- The clearest semblance of an EOL signal we get
r.set_eol("not recommended for use" in status or ":red_circle:" in status)
#-- eoasColumn label is "Further releases planned"
r.set_eoas(any(keyword in next_planned for keyword in ("None", "N/A")))
for row in patch_level[1:]:
[milestone, _download_link, date, _ticket, _manager] = row
version = milestone.lstrip('%')
version = version.split(' ') [0]
date = dates.parse_date(date)
product.declare_version(version, date)
for row in patch_level[1:]:
[milestone, _download_link, date, _ticket, _manager] = row
version = milestone.lstrip('%')
version = version.split(' ') [0]
date = dates.parse_date(date)
product_data.declare_version(version, date)

View File

@@ -1,17 +1,18 @@
from common import dates, releasedata
from common import dates
from common.git import Git
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from tags in a git repository. This replace the old update.rb script."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
git = Git(config.url)
git.setup(bare=True)
config = config_from_argv()
with ProductData(config.product) as product_data:
git = Git(config.url)
git.setup(bare=True)
tags = git.list_tags()
for tag, date_str in tags:
version_match = config.first_match(tag)
if version_match:
version = config.render(version_match)
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
tags = git.list_tags()
for tag, date_str in tags:
version_match = config.first_match(tag)
if version_match:
version = config.render(version_match)
date = dates.parse_date(date_str)
product_data.declare_version(version, date)

View File

@@ -1,19 +1,20 @@
from common import dates, github, releasedata
from common import dates, github
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from GitHub releases using the GraphQL API and the GitHub CLI.
Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manage pagination and authentication.
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
for release in github.fetch_releases(config.url):
if release.is_prerelease:
continue
config = config_from_argv()
with ProductData(config.product) as product_data:
for release in github.fetch_releases(config.url):
if release.is_prerelease:
continue
version_str = release.tag_name
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(release.published_at)
product_data.declare_version(version, date)
version_str = release.tag_name
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(release.published_at)
product_data.declare_version(version, date)

View File

@@ -1,16 +1,17 @@
from common import dates, github, releasedata
from common import dates, github
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from GitHub tags using the GraphQL API and the GitHub CLI.
Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manage pagination and authentication.
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
for tag in github.fetch_tags(config.url):
version_str = tag.name
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(tag.commit_date)
product_data.declare_version(version, date)
config = config_from_argv()
with ProductData(config.product) as product_data:
for tag in github.fetch_tags(config.url):
version_str = tag.name
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(tag.commit_date)
product_data.declare_version(version, date)

View File

@@ -1,6 +1,7 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
# https://regex101.com/r/zPxBqT/1
VERSION_PATTERN = re.compile(r"\d.\d+\.\d+-gke\.\d+")
@@ -11,17 +12,17 @@ URL_BY_PRODUCT = {
"google-kubernetes-engine-rapid": "https://cloud.google.com/kubernetes-engine/docs/release-notes-rapid",
}
for config in releasedata.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons
for product_name, url in URL_BY_PRODUCT.items():
with releasedata.ProductData(product_name) as product_data:
html = http.fetch_html(url)
config = config_from_argv() # multiple JSON produced for historical reasons
for product_name, url in URL_BY_PRODUCT.items():
with ProductData(product_name) as product_data:
html = http.fetch_html(url)
for section in html.find_all('section', class_='releases'):
for h2 in section.find_all('h2'): # h2 contains the date
date = dates.parse_date(h2.get('data-text'))
for section in html.find_all('section', class_='releases'):
for h2 in section.find_all('h2'): # h2 contains the date
date = dates.parse_date(h2.get('data-text'))
next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date
for li in next_div.find_all('li'):
if "versions are now available" in li.text:
for version in VERSION_PATTERN.findall(li.find('ul').text):
product_data.declare_version(version, date)
next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date
for li in next_div.find_all('li'):
if "versions are now available" in li.text:
for version in VERSION_PATTERN.findall(li.find('ul').text):
product_data.declare_version(version, date)

View File

@@ -1,40 +1,33 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
table_selector = config.data.get("table_selector", "#previous-releases + table").strip()
date_column = config.data.get("date_column", "Date").strip().lower()
versions_column = config.data.get("versions_column").strip().lower()
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
table_selector = config.data.get("table_selector", "#previous-releases + table").strip()
date_column = config.data.get("date_column", "Date").strip().lower()
versions_column = config.data.get("versions_column").strip().lower()
table = html.select_one(table_selector)
if not table:
logging.warning(f"Skipping config {config} as no table found with selector {table_selector}")
table = html.select_one(table_selector)
headers = [th.get_text().strip().lower() for th in table.select("thead th")]
date_index = headers.index(date_column)
versions_index = headers.index(versions_column)
for row in table.select("tbody tr"):
cells = row.select("td")
if len(cells) <= max(date_index, versions_index):
logging.warning(f"Skipping row {cells}: not enough cells")
continue
headers = [th.get_text().strip().lower() for th in table.select("thead th")]
if date_column not in headers or versions_column not in headers:
logging.info(f"Skipping table with headers {headers} as it does not contain the required columns: {date_column}, {versions_column}")
date_text = cells[date_index].get_text().strip()
date = dates.parse_date(date_text)
if date > dates.today():
logging.info(f"Skipping future version {cells}")
continue
date_index = headers.index(date_column)
versions_index = headers.index(versions_column)
for row in table.select("tbody tr"):
cells = row.select("td")
if len(cells) <= max(date_index, versions_index):
logging.warning(f"Skipping row {cells}: not enough cells")
continue
date_text = cells[date_index].get_text().strip()
date = dates.parse_date(date_text)
if date > dates.today():
logging.info(f"Skipping future version {cells}")
continue
versions = cells[versions_index].get_text().strip()
for version in versions.split(", "):
if config.first_match(version):
product_data.declare_version(version.strip(), date)
versions = cells[versions_index].get_text().strip()
for version in versions.split(", "):
if config.first_match(version):
product_data.declare_version(version.strip(), date)

View File

@@ -1,29 +1,30 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$")
DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$") # https://regex101.com/r/1JCnFC/1
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
# First, get all minor releases from the download page
download_html = http.fetch_html(config.url)
minor_versions = []
for link in download_html.select("a"):
minor_version_match = CYCLE_PATTERN.match(link.attrs["href"])
if not minor_version_match:
continue
config = config_from_argv()
with ProductData(config.product) as product_data:
# First, get all minor releases from the download page
download_html = http.fetch_html(config.url)
minor_versions = []
for link in download_html.select("a"):
minor_version_match = CYCLE_PATTERN.match(link.attrs["href"])
if not minor_version_match:
continue
minor_version = minor_version_match.groups()[0]
if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src
minor_versions.append(minor_version)
minor_version = minor_version_match.groups()[0]
if minor_version != "1.0": # No changelog in https://www.haproxy.org/download/1.0/src
minor_versions.append(minor_version)
# Then, fetches all versions from each changelog
changelog_urls = [f"{config.url}{minor_version}/src/CHANGELOG" for minor_version in minor_versions]
for changelog in http.fetch_urls(changelog_urls):
for line in changelog.text.split('\n'):
date_and_version_match = DATE_AND_VERSION_PATTERN.match(line)
if date_and_version_match:
year, month, day, version = date_and_version_match.groups()
product_data.declare_version(version, dates.date(int(year), int(month), int(day)))
# Then, fetches all versions from each changelog
changelog_urls = [f"{config.url}{minor_version}/src/CHANGELOG" for minor_version in minor_versions]
for changelog in http.fetch_urls(changelog_urls):
for line in changelog.text.split('\n'):
date_and_version_match = DATE_AND_VERSION_PATTERN.match(line)
if date_and_version_match:
year, month, day, version = date_and_version_match.groups()
product_data.declare_version(version, dates.date(int(year), int(month), int(day)))

View File

@@ -1,12 +1,13 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for release_table in html.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"):
for row in release_table.find_all("tr")[1:]: # for all rows except the header
cells = row.find_all("td")
version = cells[0].text.strip("AIX ").replace(' TL', '.')
date = dates.parse_month_year_date(cells[1].text)
product_data.declare_version(version, date)
for release_table in html.find("div", class_="ibm-container-body").find_all("table", class_="ibm-data-table ibm-grid"):
for row in release_table.find_all("tr")[1:]: # for all rows except the header
cells = row.find_all("td")
version = cells[0].text.strip("AIX ").replace(' TL', '.')
date = dates.parse_month_year_date(cells[1].text)
product_data.declare_version(version, date)

View File

@@ -1,6 +1,7 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetch version data for Kuma from https://raw.githubusercontent.com/kumahq/kuma/master/versions.yml.
"""
@@ -9,25 +10,25 @@ RELEASE_FIELD = 'release'
RELEASE_DATE_FIELD = 'releaseDate'
EOL_FIELD = 'endOfLifeDate'
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
versions_data = http.fetch_yaml(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
versions_data = http.fetch_yaml(config.url)
# Iterate through the versions and their associated dates
for version_info in versions_data:
release_name = version_info[RELEASE_FIELD]
if not release_name.endswith('.x'):
logging.info(f"skipping release with name {release_name}: does not end with '.x'")
continue
# Iterate through the versions and their associated dates
for version_info in versions_data:
release_name = version_info[RELEASE_FIELD]
if not release_name.endswith('.x'):
logging.info(f"skipping release with name {release_name}: does not end with '.x'")
continue
if RELEASE_DATE_FIELD not in version_info or EOL_FIELD not in version_info:
logging.info(f"skipping release with name {release_name}: does not contain {RELEASE_DATE_FIELD} or {EOL_FIELD} fields")
continue
if RELEASE_DATE_FIELD not in version_info or EOL_FIELD not in version_info:
logging.info(f"skipping release with name {release_name}: does not contain {RELEASE_DATE_FIELD} or {EOL_FIELD} fields")
continue
release = product_data.get_release(release_name.replace('.x', ''))
release = product_data.get_release(release_name.replace('.x', ''))
release_date = dates.parse_date(version_info[RELEASE_DATE_FIELD])
release.set_field('releaseDate', release_date)
release_date = dates.parse_date(version_info[RELEASE_DATE_FIELD])
release.set_field('releaseDate', release_date)
eol = dates.parse_date(version_info[EOL_FIELD])
release.set_field('eol', eol)
eol = dates.parse_date(version_info[EOL_FIELD])
release.set_field('eol', eol)

View File

@@ -1,27 +1,28 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for table in html.find_all("table"):
for row in table.find_all("tr")[1:]:
cells = row.find_all("td")
if len(cells) < 4:
continue
for table in html.find_all("table"):
for row in table.find_all("tr")[1:]:
cells = row.find_all("td")
if len(cells) < 4:
continue
version_str = cells[1].get_text().strip()
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping version {version_str} as it does not match any known version pattern")
continue
version = config.render(version_match)
version_str = cells[1].get_text().strip()
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping version {version_str} as it does not match any known version pattern")
continue
version = config.render(version_match)
date_str = cells[2].get_text().strip()
date = dates.parse_datetime(date_str)
date_str = cells[2].get_text().strip()
date = dates.parse_datetime(date_str)
product_data.declare_version(version, date)
product_data.declare_version(version, date)

View File

@@ -1,31 +1,32 @@
import re
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetch Looker versions from the Google Cloud release notes RSS feed.
"""
ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IGNORECASE)
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
rss = http.fetch_xml(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
rss = http.fetch_xml(config.url)
for item in rss.getElementsByTagName("entry"):
content = item.getElementsByTagName("content")[0].firstChild.nodeValue
content_soup = BeautifulSoup(content, features="html5lib")
for item in rss.getElementsByTagName("entry"):
content = item.getElementsByTagName("content")[0].firstChild.nodeValue
content_soup = BeautifulSoup(content, features="html5lib")
announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN)
if not announcement_match:
continue
announcement_match = content_soup.find(string=ANNOUNCEMENT_PATTERN)
if not announcement_match:
continue
version_match = config.first_match(announcement_match.parent.get_text())
if not version_match:
continue
version = config.render(version_match)
version_match = config.first_match(announcement_match.parent.get_text())
if not version_match:
continue
version = config.render(version_match)
date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue
date = dates.parse_datetime(date_str)
date_str = item.getElementsByTagName("updated")[0].firstChild.nodeValue
date = dates.parse_datetime(date_str)
product_data.declare_version(version, date)
product_data.declare_version(version, date)

View File

@@ -1,23 +1,24 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches Lua releases from lua.org."""
RELEASED_AT_PATTERN = re.compile(r"Lua\s*(?P<release>\d+\.\d+)\s*was\s*released\s*on\s*(?P<release_date>\d+\s*\w+\s*\d{4})")
VERSION_PATTERN = re.compile(r"(?P<version>\d+\.\d+\.\d+),\s*released\s*on\s*(?P<version_date>\d+\s*\w+\s*\d{4})")
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url, features = 'html.parser')
page_text = html.text # HTML is broken, no way to parse it with beautifulsoup
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url, features = 'html.parser')
page_text = html.text # HTML is broken, no way to parse it with beautifulsoup
for release_match in RELEASED_AT_PATTERN.finditer(page_text):
release = release_match.group('release')
release_date = dates.parse_date(release_match.group('release_date'))
product_data.get_release(release).set_release_date(release_date)
for release_match in RELEASED_AT_PATTERN.finditer(page_text):
release = release_match.group('release')
release_date = dates.parse_date(release_match.group('release_date'))
product_data.get_release(release).set_release_date(release_date)
for version_match in VERSION_PATTERN.finditer(page_text):
version = version_match.group('version')
version_date = dates.parse_date(version_match.group('version_date'))
product_data.declare_version(version, version_date)
for version_match in VERSION_PATTERN.finditer(page_text):
version = version_match.group('version')
version_date = dates.parse_date(version_match.group('version_date'))
product_data.declare_version(version, version_date)

View File

@@ -1,23 +1,24 @@
from datetime import datetime, timezone
from common import http, releasedata
from common import http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
start = 0
group_id, artifact_id = config.url.split("/")
config = config_from_argv()
with ProductData(config.product) as product_data:
start = 0
group_id, artifact_id = config.url.split("/")
while True:
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100"
data = http.fetch_json(url)
while True:
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&wt=json&start={start}&rows=100"
data = http.fetch_json(url)
for row in data["response"]["docs"]:
version_match = config.first_match(row["v"])
if version_match:
version = config.render(version_match)
date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc)
product_data.declare_version(version, date)
for row in data["response"]["docs"]:
version_match = config.first_match(row["v"])
if version_match:
version = config.render(version_match)
date = datetime.fromtimestamp(row["timestamp"] / 1000, tz=timezone.utc)
product_data.declare_version(version, date)
start += 100
if data["response"]["numFound"] <= start:
break
start += 100
if data["response"]["numFound"] <= start:
break

View File

@@ -1,32 +1,33 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches NetBSD versions and EOL information from https://www.netbsd.org/."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for row in html.select('table tbody tr'):
cells = [cell.get_text(strip=True) for cell in row.select('td')]
for row in html.select('table tbody tr'):
cells = [cell.get_text(strip=True) for cell in row.select('td')]
version = cells[0]
if not version.startswith('NetBSD'):
logging.info(f"Skipping row {cells}, version does not start with 'NetBSD'")
continue
version = version.split(' ')[1]
version = cells[0]
if not version.startswith('NetBSD'):
logging.info(f"Skipping row {cells}, version does not start with 'NetBSD'")
continue
version = version.split(' ')[1]
try:
release_date = dates.parse_date(cells[1])
product_data.declare_version(version, release_date)
except ValueError:
logging.warning(f"Skipping row {cells}, could not parse release date")
try:
release_date = dates.parse_date(cells[1])
product_data.declare_version(version, release_date)
except ValueError:
logging.warning(f"Skipping row {cells}, could not parse release date")
eol_str = cells[2]
if not eol_str:
continue
eol_str = cells[2]
if not eol_str:
continue
eol = dates.parse_date(eol_str)
major_version = version.split('.')[0]
product_data.get_release(major_version).set_eol(eol)
eol = dates.parse_date(eol_str)
major_version = version.split('.')[0]
product_data.get_release(major_version).set_eol(eol)

View File

@@ -1,11 +1,12 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
data = http.fetch_json(f"https://registry.npmjs.org/{config.url}")
for version_str in data["versions"]:
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(data["time"][version_str])
product_data.declare_version(version, date)
config = config_from_argv()
with ProductData(config.product) as product_data:
data = http.fetch_json(f"https://registry.npmjs.org/{config.url}")
for version_str in data["versions"]:
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(data["time"][version_str])
product_data.declare_version(version, date)

View File

@@ -1,15 +1,16 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetch Nutanix products versions from https://portal.nutanix.com/api/v1."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
data = http.fetch_json(f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}")
config = config_from_argv()
with ProductData(config.product) as product_data:
data = http.fetch_json(f"https://portal.nutanix.com/api/v1/eol/find?type={config.url}")
for version_data in data["contents"]:
release_name = '.'.join(version_data["version"].split(".")[:2])
for version_data in data["contents"]:
release_name = '.'.join(version_data["version"].split(".")[:2])
if 'GENERAL_AVAILABILITY' in version_data:
version = version_data["version"]
date = dates.parse_datetime(version_data["GENERAL_AVAILABILITY"]).replace(second=0)
product_data.declare_version(version, date)
if 'GENERAL_AVAILABILITY' in version_data:
version = version_data["version"]
date = dates.parse_datetime(version_data["GENERAL_AVAILABILITY"]).replace(second=0)
product_data.declare_version(version, date)

View File

@@ -1,23 +1,24 @@
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetch Java versions from https://www.java.com/releases/.
This script is using requests-html because the page needs JavaScript to render correctly."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_javascript_url(config.url)
soup = BeautifulSoup(html, 'html5lib')
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_javascript_url(config.url)
soup = BeautifulSoup(html, 'html5lib')
previous_date = None
for row in soup.select('#released tr'):
version_cell = row.select_one('td.anchor')
if version_cell:
version = version_cell.attrs['id']
date_str = row.select('td')[1].text
date = dates.parse_date(date_str) if date_str else previous_date
product_data.declare_version(version, date)
previous_date = date
previous_date = None
for row in soup.select('#released tr'):
version_cell = row.select_one('td.anchor')
if version_cell:
version = version_cell.attrs['id']
date_str = row.select('td')[1].text
date = dates.parse_date(date_str) if date_str else previous_date
product_data.declare_version(version, date)
previous_date = date
product_data.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed
product_data.remove_version('1.0_alpha') # the only version we don't want, a regex is not needed

View File

@@ -1,12 +1,13 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches pan-os versions from https://github.com/mrjcap/panos-versions/."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
versions = http.fetch_json(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
versions = http.fetch_json(config.url)
for version in versions:
name = version['version']
date = dates.parse_datetime(version['released-on'])
product_data.declare_version(name, date)
for version in versions:
name = version['version']
date = dates.parse_datetime(version['released-on'])
product_data.declare_version(name, date)

View File

@@ -1,15 +1,16 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
# Fetch major versions
latest_by_major = http.fetch_url(config.url).json()
major_version_urls = [f"{config.url}&version={major_version}" for major_version in latest_by_major]
config = config_from_argv()
with ProductData(config.product) as product_data:
# Fetch major versions
latest_by_major = http.fetch_url(config.url).json()
major_version_urls = [f"{config.url}&version={major_version}" for major_version in latest_by_major]
# Fetch all versions for major versions
for major_versions_response in http.fetch_urls(major_version_urls):
major_versions_data = major_versions_response.json()
for version in major_versions_data:
if config.first_match(version): # exclude versions such as "3.0.x (latest)"
date = dates.parse_date(major_versions_data[version]["date"])
product_data.declare_version(version, date)
# Fetch all versions for major versions
for major_versions_response in http.fetch_urls(major_version_urls):
major_versions_data = major_versions_response.json()
for version in major_versions_data:
if config.first_match(version): # exclude versions such as "3.0.x (latest)"
date = dates.parse_date(major_versions_data[version]["date"])
product_data.declare_version(version, date)

View File

@@ -1,22 +1,23 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches versions from Plesk's change log.
Only 18.0.20.3 and later will be picked up, as the format of the change log for 18.0.20 and 18.0.19 are different and
there is no entry for GA of version 18.0.18 and older."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for release in html.find_all("div", class_="changelog-entry--obsidian"):
version = release.h2.text.strip()
if not version.startswith('Plesk Obsidian 18'):
continue
for release in html.find_all("div", class_="changelog-entry--obsidian"):
version = release.h2.text.strip()
if not version.startswith('Plesk Obsidian 18'):
continue
version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '')
if ' ' in version:
continue
version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '')
if ' ' in version:
continue
date = dates.parse_date(release.p.text)
product_data.declare_version(version, date)
date = dates.parse_date(release.p.text)
product_data.declare_version(version, date)

View File

@@ -1,14 +1,15 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
data = http.fetch_json(f"https://pypi.org/pypi/{config.url}/json")
config = config_from_argv()
with ProductData(config.product) as product_data:
data = http.fetch_json(f"https://pypi.org/pypi/{config.url}/json")
for version_str in data["releases"]:
version_match = config.first_match(version_str)
version_data = data["releases"][version_str]
for version_str in data["releases"]:
version_match = config.first_match(version_str)
version_data = data["releases"][version_str]
if version_match and version_data:
version = config.render(version_match)
date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"])
product_data.declare_version(version, date)
if version_match and version_data:
version = config.render(version_match)
date = dates.parse_datetime(version_data[0]["upload_time_iso_8601"])
product_data.declare_version(version, date)

View File

@@ -1,6 +1,7 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches Amazon RDS versions from the version management pages on AWS docs.
@@ -8,22 +9,22 @@ Pages parsed by this script are expected to have version tables with a version i
in the third column (usually named 'RDS release date').
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for table in html.find_all("table"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) <= 3:
continue
for table in html.find_all("table"):
for row in table.find_all("tr"):
columns = row.find_all("td")
if len(columns) <= 3:
continue
version_text = columns[0].text.strip()
version_match = config.first_match(version_text)
if not version_match:
logging.warning(f"Skipping {version_text}: does not match any version pattern")
continue
version_text = columns[0].text.strip()
version_match = config.first_match(version_text)
if not version_match:
logging.warning(f"Skipping {version_text}: does not match any version pattern")
continue
version = config.render(version_match)
date = dates.parse_date(columns[2].text)
product_data.declare_version(version, date)
version = config.render(version_match)
date = dates.parse_date(columns[2].text)
product_data.declare_version(version, date)

View File

@@ -1,40 +1,41 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches RedHat JBoss EAP version data for JBoss 7"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for h4 in html.find_all("h4"):
title = h4.get_text(strip=True)
if not title.startswith("7."):
for h4 in html.find_all("h4"):
title = h4.get_text(strip=True)
if not title.startswith("7."):
continue
release = title[:3]
version_table = h4.find_next("table")
if not version_table:
logging.warning(f"Version table not found for {title}")
continue
for (i, row) in enumerate(version_table.find_all("tr")):
if i == 0: # Skip the first row (header)
continue
release = title[:3]
version_table = h4.find_next("table")
if not version_table:
logging.warning(f"Version table not found for {title}")
columns = row.find_all("td")
# Get the version name without the content of the <sup> tag, if present
name_str = ''.join([content for content in columns[0].contents if isinstance(content, str)]).strip()
date_str = columns[1].text.strip()
if date_str == "TBD" or date_str == "TDB": # Placeholder for a future release
continue
for (i, row) in enumerate(version_table.find_all("tr")):
if i == 0: # Skip the first row (header)
continue
if date_str == "[July 21, 2021][d7400]":
# Temporary fix for a typo in the source page
date_str = "July 21 2021"
columns = row.find_all("td")
# Get the version name without the content of the <sup> tag, if present
name_str = ''.join([content for content in columns[0].contents if isinstance(content, str)]).strip()
date_str = columns[1].text.strip()
if date_str == "TBD" or date_str == "TDB": # Placeholder for a future release
continue
if date_str == "[July 21, 2021][d7400]":
# Temporary fix for a typo in the source page
date_str = "July 21 2021"
name = name_str.replace("GA", "Update 0").replace("Update ", release + ".")
date = dates.parse_date(date_str)
product_data.declare_version(name, date)
name = name_str.replace("GA", "Update 0").replace("Update ", release + ".")
date = dates.parse_date(date_str)
product_data.declare_version(name, date)

View File

@@ -1,19 +1,20 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches the latest RedHat JBoss EAP version data for JBoss 8.0"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
xml = http.fetch_xml(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
xml = http.fetch_xml(config.url)
versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0]
versioning = xml.getElementsByTagName("metadata")[0].getElementsByTagName("versioning")[0]
latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue
latest_name = "8.0." + re.match(r"^..(.*)\.GA", latest_str).group(1)
latest_str = versioning.getElementsByTagName("latest")[0].firstChild.nodeValue
latest_name = "8.0." + re.match(r"^..(.*)\.GA", latest_str).group(1)
latest_date_str = versioning.getElementsByTagName("lastUpdated")[0].firstChild.nodeValue
latest_date = dates.parse_datetime(latest_date_str)
latest_date_str = versioning.getElementsByTagName("lastUpdated")[0].firstChild.nodeValue
latest_date = dates.parse_datetime(latest_date_str)
product_data.declare_version(latest_name, latest_date)
product_data.declare_version(latest_name, latest_date)

View File

@@ -1,7 +1,8 @@
import re
from common import dates, releasedata
from common import dates
from common.git import Git
from common.releasedata import ProductData, config_from_argv
"""Fetches Red Hat OpenShift versions from the documentation's git repository"""
@@ -10,26 +11,26 @@ VERSION_AND_DATE_PATTERN = re.compile(
re.MULTILINE,
)
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
git = Git(config.url)
git.setup()
config = config_from_argv()
with ProductData(config.product) as product_data:
git = Git(config.url)
git.setup()
# only fetch v4+ branches, because the format was different in openshift v3
for branch in git.list_branches("refs/heads/enterprise-[4-9]*"):
branch_version = branch.split("-")[1]
file_version = branch_version.replace(".", "-")
release_notes_filename = f"release_notes/ocp-{file_version}-release-notes.adoc"
git.checkout(branch, file_list=[release_notes_filename])
# only fetch v4+ branches, because the format was different in openshift v3
for branch in git.list_branches("refs/heads/enterprise-[4-9]*"):
branch_version = branch.split("-")[1]
file_version = branch_version.replace(".", "-")
release_notes_filename = f"release_notes/ocp-{file_version}-release-notes.adoc"
git.checkout(branch, file_list=[release_notes_filename])
release_notes_file = git.repo_dir / release_notes_filename
if not release_notes_file.exists():
continue
release_notes_file = git.repo_dir / release_notes_filename
if not release_notes_file.exists():
continue
with release_notes_file.open("rb") as f:
content = f.read().decode("utf-8")
for version, date_str in VERSION_AND_DATE_PATTERN.findall(content):
product_data.declare_version(
version.replace("{product-version}", branch_version),
dates.parse_date(date_str),
)
with release_notes_file.open("rb") as f:
content = f.read().decode("utf-8")
for version, date_str in VERSION_AND_DATE_PATTERN.findall(content):
product_data.declare_version(
version.replace("{product-version}", branch_version),
dates.parse_date(date_str),
)

View File

@@ -1,28 +1,29 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches Satellite versions from access.redhat.com.
A few of the older versions, such as 'Satellite 6.1 GA Release (Build 6.1.1)', were ignored because too hard to parse."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for table in html.findAll("tbody"):
for tr in table.findAll("tr"):
td_list = tr.findAll("td")
for table in html.findAll("tbody"):
for tr in table.findAll("tr"):
td_list = tr.findAll("td")
version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping version '{version_str}': does not match any version pattern.")
continue
version = version_match["version"].replace('-', '.') # a.b.c-d => a.b.c.d
version_str = td_list[0].get_text().replace(' GA', '.0').strip() # x.y GA => x.y.0
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping version '{version_str}': does not match any version pattern.")
continue
version = version_match["version"].replace('-', '.') # a.b.c-d => a.b.c.d
date_str = td_list[1].get_text().strip()
date_str = '2024-12-04' if date_str == '2024-12-041' else date_str # there is a typo for 6.15.5
date = dates.parse_date(date_str)
date_str = td_list[1].get_text().strip()
date_str = '2024-12-04' if date_str == '2024-12-041' else date_str # there is a typo for 6.15.5
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
product_data.declare_version(version, date)

View File

@@ -1,7 +1,8 @@
import logging
import urllib.parse
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches EOL dates from the Red Hat Product Life Cycle Data API.
@@ -17,26 +18,26 @@ class Mapping:
def get_field_for(self, phase_name: str) -> str | None:
return self.fields_by_phase.get(phase_name.lower(), None)
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
name = urllib.parse.quote(config.url)
mapping = Mapping(config.data["fields"])
config = config_from_argv()
with ProductData(config.product) as product_data:
name = urllib.parse.quote(config.url)
mapping = Mapping(config.data["fields"])
data = http.fetch_json('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name)
data = http.fetch_json('https://access.redhat.com/product-life-cycles/api/v1/products?name=' + name)
for version in data["data"][0]["versions"]:
version_name = version["name"]
version_match = config.first_match(version_name)
if not version_match:
logging.warning(f"Ignoring version '{version_name}', config is {config}")
for version in data["data"][0]["versions"]:
version_name = version["name"]
version_match = config.first_match(version_name)
if not version_match:
logging.warning(f"Ignoring version '{version_name}', config is {config}")
continue
release = product_data.get_release(config.render(version_match))
for phase in version["phases"]:
field = mapping.get_field_for(phase["name"])
if not field:
logging.debug(f"Ignoring phase '{phase['name']}': not mapped")
continue
release = product_data.get_release(config.render(version_match))
for phase in version["phases"]:
field = mapping.get_field_for(phase["name"])
if not field:
logging.debug(f"Ignoring phase '{phase['name']}': not mapped")
continue
date = dates.parse_datetime(phase["date"])
release.set_field(field, date)
date = dates.parse_datetime(phase["date"])
release.set_field(field, date)

View File

@@ -4,7 +4,8 @@ from datetime import datetime
from re import Match
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata
from common import dates, endoflife, http
from common.releasedata import ProductData, config_from_argv
from liquid import Template
"""Fetch release-level data from an HTML table in a web page.
@@ -150,69 +151,69 @@ class Field:
return f"{self.name}({self.column})"
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
render_javascript = config.data.get("render_javascript", False)
render_javascript_click_selector = config.data.get("render_javascript_click_selector", None)
render_javascript_wait_until = config.data.get("render_javascript_wait_until", None)
ignore_empty_releases = config.data.get("ignore_empty_releases", False)
header_row_selector = config.data.get("header_selector", "thead tr")
rows_selector = config.data.get("rows_selector", "tbody tr")
cells_selector = "td, th"
release_cycle_field = Field("releaseCycle", config.data["fields"].pop("releaseCycle"))
fields = [Field(name, definition) for name, definition in config.data["fields"].items()]
config = config_from_argv()
with ProductData(config.product) as product_data:
render_javascript = config.data.get("render_javascript", False)
render_javascript_click_selector = config.data.get("render_javascript_click_selector", None)
render_javascript_wait_until = config.data.get("render_javascript_wait_until", None)
ignore_empty_releases = config.data.get("ignore_empty_releases", False)
header_row_selector = config.data.get("header_selector", "thead tr")
rows_selector = config.data.get("rows_selector", "tbody tr")
cells_selector = "td, th"
release_cycle_field = Field("releaseCycle", config.data["fields"].pop("releaseCycle"))
fields = [Field(name, definition) for name, definition in config.data["fields"].items()]
if render_javascript:
response_text = http.fetch_javascript_url(config.url, click_selector=render_javascript_click_selector,
wait_until=render_javascript_wait_until)
else:
response_text = http.fetch_url(config.url).text
soup = BeautifulSoup(response_text, features="html5lib")
if render_javascript:
response_text = http.fetch_javascript_url(config.url, click_selector=render_javascript_click_selector,
wait_until=render_javascript_wait_until)
else:
response_text = http.fetch_url(config.url).text
soup = BeautifulSoup(response_text, features="html5lib")
for table in soup.select(config.data["selector"]):
header_row = table.select_one(header_row_selector)
if not header_row:
logging.info(f"skipping table with attributes {table.attrs}: no header row found")
continue
for table in soup.select(config.data["selector"]):
header_row = table.select_one(header_row_selector)
if not header_row:
logging.info(f"skipping table with attributes {table.attrs}: no header row found")
continue
headers = [th.get_text().strip().lower() for th in header_row.select(cells_selector)]
logging.info(f"processing table with headers {headers}")
headers = [th.get_text().strip().lower() for th in header_row.select(cells_selector)]
logging.info(f"processing table with headers {headers}")
try:
fields_index = {"releaseCycle": headers.index(release_cycle_field.column)}
try:
fields_index = {"releaseCycle": headers.index(release_cycle_field.column)}
for field in fields:
fields_index[field.name] = field.column if field.is_index else headers.index(field.column)
min_column_count = max(fields_index.values()) + 1
for row in table.select(rows_selector):
cells = [cell.get_text().strip() for cell in row.select(cells_selector)]
if len(cells) < min_column_count:
logging.info(f"skipping row {cells}: not enough columns")
continue
raw_release_name = cells[fields_index[release_cycle_field.name]]
release_name = release_cycle_field.extract_from(raw_release_name)
if not release_name:
logging.info(f"skipping row {cells}: invalid release cycle '{raw_release_name}', "
f"should match one of {release_cycle_field.include_version_patterns} "
f"and not match all of {release_cycle_field.exclude_version_patterns}")
continue
release = product_data.get_release(release_name)
for field in fields:
fields_index[field.name] = field.column if field.is_index else headers.index(field.column)
min_column_count = max(fields_index.values()) + 1
raw_field = cells[fields_index[field.name]]
try:
release.set_field(field.name, field.extract_from(raw_field))
except ValueError as e:
logging.info(f"skipping cell {raw_field} for {release}: {e}")
for row in table.select(rows_selector):
cells = [cell.get_text().strip() for cell in row.select(cells_selector)]
if len(cells) < min_column_count:
logging.info(f"skipping row {cells}: not enough columns")
continue
if ignore_empty_releases and release.is_empty():
logging.info(f"removing empty release '{release}'")
product_data.remove_release(release_name)
raw_release_name = cells[fields_index[release_cycle_field.name]]
release_name = release_cycle_field.extract_from(raw_release_name)
if not release_name:
logging.info(f"skipping row {cells}: invalid release cycle '{raw_release_name}', "
f"should match one of {release_cycle_field.include_version_patterns} "
f"and not match all of {release_cycle_field.exclude_version_patterns}")
continue
if release.is_released_after(TODAY):
logging.info(f"removing future release '{release}'")
product_data.remove_release(release_name)
release = product_data.get_release(release_name)
for field in fields:
raw_field = cells[fields_index[field.name]]
try:
release.set_field(field.name, field.extract_from(raw_field))
except ValueError as e:
logging.info(f"skipping cell {raw_field} for {release}: {e}")
if ignore_empty_releases and release.is_empty():
logging.info(f"removing empty release '{release}'")
product_data.remove_release(release_name)
if release.is_released_after(TODAY):
logging.info(f"removing future release '{release}'")
product_data.remove_release(release_name)
except ValueError as e:
logging.info(f"skipping table with headers {headers}: {e}")
except ValueError as e:
logging.info(f"skipping table with headers {headers}: {e}")

View File

@@ -1,23 +1,24 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
# https://regex101.com/r/877ibq/1
VERSION_PATTERN = re.compile(r"RHEL (?P<major>\d)(\. ?(?P<minor>\d+))?(( Update (?P<minor2>\d))| GA)?")
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for tr in html.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) == 0:
continue
for tr in html.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) == 0:
continue
version_str = td_list[0].get_text().strip()
version_match = VERSION_PATTERN.match(version_str).groupdict()
version = version_match["major"]
version += ("." + version_match["minor"]) if version_match["minor"] else ""
version += ("." + version_match["minor2"]) if version_match["minor2"] else ""
date = dates.parse_date(td_list[1].get_text())
product_data.declare_version(version, date)
version_str = td_list[0].get_text().strip()
version_match = VERSION_PATTERN.match(version_str).groupdict()
version = version_match["major"]
version += ("." + version_match["minor"]) if version_match["minor"] else ""
version += ("." + version_match["minor2"]) if version_match["minor2"] else ""
date = dates.parse_date(td_list[1].get_text())
product_data.declare_version(version, date)

View File

@@ -1,11 +1,12 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
response = http.fetch_url(config.url)
for line in response.text.strip().split('\n'):
items = line.split('|')
if len(items) >= 5 and config.first_match(items[1].strip()):
version = items[1].strip()
date = dates.parse_date(items[3])
product_data.declare_version(version, date)
config = config_from_argv()
with ProductData(config.product) as product_data:
response = http.fetch_url(config.url)
for line in response.text.strip().split('\n'):
items = line.split('|')
if len(items) >= 5 and config.first_match(items[1].strip()):
version = items[1].strip()
date = dates.parse_date(items[3])
product_data.declare_version(version, date)

View File

@@ -1,28 +1,29 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for tr in html.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) == 0:
continue
for tr in html.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) == 0:
continue
version_str = td_list[0].get_text().strip()
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping version '{version_str}': does not match the expected pattern")
continue
version_str = td_list[0].get_text().strip()
version_match = config.first_match(version_str)
if not version_match:
logging.warning(f"Skipping version '{version_str}': does not match the expected pattern")
continue
# Get the "code" (such as noetic) instead of the display name (such as Noetic Ninjemys)
version = td_list[0].findAll("a")[0]["href"][1:]
try:
date = dates.parse_date(td_list[1].get_text())
except ValueError: # The day has a suffix (such as May 23rd, 2020)
x = td_list[1].get_text().split(",")
date = dates.parse_date(x[0][:-2] + x[1])
# Get the "code" (such as noetic) instead of the display name (such as Noetic Ninjemys)
version = td_list[0].findAll("a")[0]["href"][1:]
try:
date = dates.parse_date(td_list[1].get_text())
except ValueError: # The day has a suffix (such as May 23rd, 2020)
x = td_list[1].get_text().split(",")
date = dates.parse_date(x[0][:-2] + x[1])
product_data.declare_version(version, date)
product_data.declare_version(version, date)

View File

@@ -2,7 +2,8 @@ import logging
import re
from datetime import date, datetime, time, timezone
from common import dates, endoflife, http, releasedata
from common import dates, endoflife, http
from common.releasedata import ProductData, parse_argv
"""Detect new models and aggregate EOL data for Samsung Mobile devices.
@@ -12,64 +13,63 @@ it retains the date and use it as the model's EOL date.
TODAY = dates.today()
frontmatter, configs = releasedata.parse_argv()
for config in configs:
with releasedata.ProductData(config.product) as product_data:
frontmatter_release_names = frontmatter.get_release_names()
frontmatter, config = parse_argv()
with ProductData(config.product) as product_data:
frontmatter_release_names = frontmatter.get_release_names()
# Copy EOL dates from frontmatter to product data
for frontmatter_release in frontmatter.get_releases():
eol = frontmatter_release.get("eol")
eol = datetime.combine(eol, time.min, tzinfo=timezone.utc) if isinstance(eol, date) else eol
# Copy EOL dates from frontmatter to product data
for frontmatter_release in frontmatter.get_releases():
eol = frontmatter_release.get("eol")
eol = datetime.combine(eol, time.min, tzinfo=timezone.utc) if isinstance(eol, date) else eol
release = product_data.get_release(frontmatter_release.get("releaseCycle"))
release.set_eol(eol)
release = product_data.get_release(frontmatter_release.get("releaseCycle"))
release.set_eol(eol)
html = http.fetch_html(config.url)
html = http.fetch_html(config.url)
sections = config.data.get("sections", {})
for update_cadence, title in sections.items():
models_list = html.find(string=lambda text, search=title: search in text if text else False).find_next("ul")
sections = config.data.get("sections", {})
for update_cadence, title in sections.items():
models_list = html.find(string=lambda text, search=title: search in text if text else False).find_next("ul")
for item in models_list.find_all("li"):
models = item.text.replace("Enterprise Models:", "")
logging.info(f"Found {models} for {update_cadence} security updates")
for item in models_list.find_all("li"):
models = item.text.replace("Enterprise Models:", "")
logging.info(f"Found {models} for {update_cadence} security updates")
for model in re.split(r',\s*', models):
name = endoflife.to_identifier(model)
if config.is_excluded(name):
logging.debug(f"Ignoring model '{name}', excluded by configuration")
continue
for model in re.split(r',\s*', models):
name = endoflife.to_identifier(model)
if config.is_excluded(name):
logging.debug(f"Ignoring model '{name}', excluded by configuration")
continue
release = product_data.get_release(name)
release.set_label(model.strip())
release = product_data.get_release(name)
release.set_label(model.strip())
if name in frontmatter_release_names:
frontmatter_release_names.remove(name)
current_eol = release.get_eol()
if current_eol is True or (isinstance(current_eol, datetime) and current_eol <= TODAY):
logging.info(f"Known model {name} is incorrectly marked as EOL, updating eol")
release.set_eol(False)
else:
logging.debug(f"Known model {name} is not EOL, keeping eol as {current_eol}")
else:
logging.debug(f"Found new model {name}")
if name in frontmatter_release_names:
frontmatter_release_names.remove(name)
current_eol = release.get_eol()
if current_eol is True or (isinstance(current_eol, datetime) and current_eol <= TODAY):
logging.info(f"Known model {name} is incorrectly marked as EOL, updating eol")
release.set_eol(False)
else:
logging.debug(f"Known model {name} is not EOL, keeping eol as {current_eol}")
# the remaining models in frontmatter_release_names are not listed anymore on the Samsung page => they are EOL
for eol_model_name in frontmatter_release_names:
release = product_data.get_release(eol_model_name)
current_eol = release.get_eol()
if config.is_excluded(eol_model_name):
logging.debug(f"Skipping model {eol_model_name}, excluded by configuration")
elif current_eol is False:
logging.info(f"Model {eol_model_name} is not EOL, setting eol")
release.set_eol(TODAY)
elif isinstance(current_eol, datetime):
if current_eol > TODAY:
logging.info(f"Model {eol_model_name} is not marked as EOL, setting eol as {TODAY}")
release.set_eol(TODAY)
else:
logging.debug(f"Model {eol_model_name} is already EOL, keeping eol as {current_eol}")
logging.debug(f"Found new model {name}")
release.set_eol(False)
# the remaining models in frontmatter_release_names are not listed anymore on the Samsung page => they are EOL
for eol_model_name in frontmatter_release_names:
release = product_data.get_release(eol_model_name)
current_eol = release.get_eol()
if config.is_excluded(eol_model_name):
logging.debug(f"Skipping model {eol_model_name}, excluded by configuration")
elif current_eol is False:
logging.info(f"Model {eol_model_name} is not EOL, setting eol")
release.set_eol(TODAY)
elif isinstance(current_eol, datetime):
if current_eol > TODAY:
logging.info(f"Model {eol_model_name} is not marked as EOL, setting eol as {TODAY}")
release.set_eol(TODAY)
else:
logging.debug(f"Model {eol_model_name} is already EOL, keeping eol as {current_eol}")

View File

@@ -1,29 +1,30 @@
import logging
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
products_table = html.find("tbody", id="productSupportLifecycle")
sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"})
products_table = html.find("tbody", id="productSupportLifecycle")
sles_header_rows = products_table.find_all("tr", class_="row", attrs={"data-productfilter": "SUSE Linux Enterprise Server"})
# Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section)
for detail_id in [f"detail{row['id']}" for row in sles_header_rows]:
detail_row = products_table.find("tr", id=detail_id)
# There is a table with info about minor releases and after it, optionally, a table with info about modules
minor_versions_table = detail_row.find_all("tbody")[0]
# Extract rows' IDs to find related sub-rows with details (normally hidden until a user expands a section)
for detail_id in [f"detail{row['id']}" for row in sles_header_rows]:
detail_row = products_table.find("tr", id=detail_id)
# There is a table with info about minor releases and after it, optionally, a table with info about modules
minor_versions_table = detail_row.find_all("tbody")[0]
# The first sub-row is a header, the rest contains info about the first release and later minor releases
for row in minor_versions_table.find_all("tr")[1:]:
# For each minor release there is an FCS date, general support end date and LTSS end date
cells = row.find_all("td")
version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.')
date_str = cells[1].text
# The first sub-row is a header, the rest contains info about the first release and later minor releases
for row in minor_versions_table.find_all("tr")[1:]:
# For each minor release there is an FCS date, general support end date and LTSS end date
cells = row.find_all("td")
version = cells[0].text.replace("SUSE Linux Enterprise Server ", '').replace(' SP', '.')
date_str = cells[1].text
try:
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
except ValueError:
logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed")
try:
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
except ValueError:
logging.info(f"Ignoring {version}: date '{date_str}' could not be parsed")

View File

@@ -1,6 +1,7 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
VERSION_DATE_PATTERN = re.compile(r"Splunk Enterprise (?P<version>\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P<date>\w+\s\d\d?,\s\d{4})\.", re.MULTILINE)
@@ -29,19 +30,19 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]:
return latest_versions
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")]
all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"]
all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")]
all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"]
# Latest minor release notes contains release notes for all previous minor versions.
# For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4.
latest_minor_versions = get_latest_minor_versions(all_versions)
latest_minor_versions_urls = [f"{config.url}/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions]
for response in http.fetch_urls(latest_minor_versions_urls):
for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text):
version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0
date = dates.parse_date(date_str)
product_data.declare_version(version_str, date)
# Latest minor release notes contains release notes for all previous minor versions.
# For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4.
latest_minor_versions = get_latest_minor_versions(all_versions)
latest_minor_versions_urls = [f"{config.url}/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions]
for response in http.fetch_urls(latest_minor_versions_urls):
for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text):
version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0
date = dates.parse_date(date_str)
product_data.declare_version(version_str, date)

View File

@@ -1,12 +1,13 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
data = http.fetch_json(config.url)
for v in data:
if v['type'] == 'development':
continue
config = config_from_argv()
with ProductData(config.product) as product_data:
data = http.fetch_json(config.url)
for v in data:
if v['type'] == 'development':
continue
version = v["version"]
date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility
product_data.declare_version(version, date)
version = v["version"]
date = dates.parse_datetime(v["date"], to_utc=False) # utc kept for now for backwards compatibility
product_data.declare_version(version, date)

View File

@@ -1,4 +1,5 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation
is only partial.
@@ -16,11 +17,11 @@ Note that it was assumed that:
The script will need to be updated if someday those conditions are not met."""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for release in html.find_all('div', class_='component-releases-item__show__inner-header'):
version = release.find('h4').find('span').text
date = dates.parse_datetime(release.find('time').attrs['datetime'])
product_data.declare_version(version, date)
for release in html.find_all('div', class_='component-releases-item__show__inner-header'):
version = release.find('h4').find('span').text
date = dates.parse_datetime(release.find('time').attrs['datetime'])
product_data.declare_version(version, date)

View File

@@ -1,20 +1,21 @@
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2}")
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
wikicode = http.fetch_markdown(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
wikicode = http.fetch_markdown(config.url)
for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")
if len(items) < 2:
continue
for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")
if len(items) < 2:
continue
version = items[0].__strip__()
date_str = items[1].__strip__()
if config.first_match(version) and DATE_PATTERN.match(date_str):
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
version = items[0].__strip__()
date_str = items[1].__strip__()
if config.first_match(version) and DATE_PATTERN.match(date_str):
date = dates.parse_date(date_str)
product_data.declare_version(version, date)

View File

@@ -1,7 +1,8 @@
import logging
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches Veeam products versions from https://www.veeam.com.
@@ -9,31 +10,31 @@ This script takes a single argument which is the url of the versions page on htt
such as `https://www.veeam.com/kb2680`.
"""
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
version_column = config.data.get("version_column", "Build Number").lower()
date_column = config.data.get("date_column", "Release Date").lower()
for table in html.find_all("table"):
headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")]
if version_column not in headers or date_column not in headers:
logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'",
headers, version_column, date_column)
version_column = config.data.get("version_column", "Build Number").lower()
date_column = config.data.get("date_column", "Release Date").lower()
for table in html.find_all("table"):
headers = [header.get_text().strip().lower() for header in table.find("tr").find_all("td")]
if version_column not in headers or date_column not in headers:
logging.warning("Skipping table with headers %s as it does not contains '%s' or '%s'",
headers, version_column, date_column)
continue
version_index = headers.index(version_column)
date_index = headers.index(date_column)
for row in table.find_all("tr")[1:]:
cells = row.find_all("td")
if len(cells) <= max(version_index, date_index):
continue
version_index = headers.index(version_column)
date_index = headers.index(date_column)
for row in table.find_all("tr")[1:]:
cells = row.find_all("td")
if len(cells) <= max(version_index, date_index):
continue
date_str = cells[date_index].get_text().strip()
if not date_str or date_str == "-":
continue
date_str = cells[date_index].get_text().strip()
if not date_str or date_str == "-":
continue
# whitespaces in version numbers are replaced with dashes
version = re.sub(r'\s+', "-", cells[version_index].get_text().strip())
date = dates.parse_date(date_str)
product_data.declare_version(version, date)
# whitespaces in version numbers are replaced with dashes
version = re.sub(r'\s+', "-", cells[version_index].get_text().strip())
date = dates.parse_date(date_str)
product_data.declare_version(version, date)

View File

@@ -1,34 +1,35 @@
import logging
import re
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
"""Fetches releases from VirtualBox download page."""
EOL_REGEX = re.compile(r"^\(no longer supported, support ended (?P<value>\d{4}/\d{2})\)$")
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for li in html.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
li_text = li.find("a").text.strip()
for li in html.select_one("#DownloadVirtualBoxOldBuilds + ul").find_all("li"):
li_text = li.find("a").text.strip()
release_match = config.first_match(li_text)
if not release_match:
logging.info(f"Skipping '{li_text}': does not match expected pattern")
continue
release_match = config.first_match(li_text)
if not release_match:
logging.info(f"Skipping '{li_text}': does not match expected pattern")
continue
release_name = release_match.group("value")
release = product_data.get_release(release_name)
release_name = release_match.group("value")
release = product_data.get_release(release_name)
eol_text = li.find("em").text.lower().strip()
eol_match = EOL_REGEX.match(eol_text)
if not eol_match:
logging.info(f"Ignoring '{eol_text}': does not match {EOL_REGEX}")
continue
eol_text = li.find("em").text.lower().strip()
eol_match = EOL_REGEX.match(eol_text)
if not eol_match:
logging.info(f"Ignoring '{eol_text}': does not match {EOL_REGEX}")
continue
eol_date_str = eol_match.group("value")
eol_date = dates.parse_month_year_date(eol_date_str)
release.set_eol(eol_date)
eol_date_str = eol_match.group("value")
eol_date = dates.parse_month_year_date(eol_date_str)
release.set_eol(eol_date)

View File

@@ -1,24 +1,25 @@
from common import dates, http, releasedata
from common import dates, http
from common.releasedata import ProductData, config_from_argv
for config in releasedata.list_configs_from_argv():
with releasedata.ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
for table in html.find_all("table"):
headers = [th.get_text().strip().lower() for th in table.find_all("th")]
if "version" not in headers or "release date" not in headers:
for table in html.find_all("table"):
headers = [th.get_text().strip().lower() for th in table.find_all("th")]
if "version" not in headers or "release date" not in headers:
continue
version_index = headers.index("version")
date_index = headers.index("release date")
for row in table.findAll("tr"):
cells = row.findAll("td")
if len(cells) < (max(version_index, date_index) + 1):
continue
version_index = headers.index("version")
date_index = headers.index("release date")
for row in table.findAll("tr"):
cells = row.findAll("td")
if len(cells) < (max(version_index, date_index) + 1):
continue
version = cells[version_index].get_text().strip()
date = cells[date_index].get_text().strip()
date = dates.parse_date(date)
version = cells[version_index].get_text().strip()
date = cells[date_index].get_text().strip()
date = dates.parse_date(date)
if date and version and config.first_match(version):
product_data.declare_version(version, date)
if date and version and config.first_match(version):
product_data.declare_version(version, date)