Add the new release_table method (#300)
Add a new release_table to automate the retrieval of release-level information such as releaseDate, support or eol fields.
This commit is contained in:
@@ -18,19 +18,20 @@ PRODUCTS_PATH = Path(os.environ.get("PRODUCTS_PATH", "website/products"))
|
|||||||
|
|
||||||
|
|
||||||
class AutoConfig:
|
class AutoConfig:
|
||||||
def __init__(self, product: str, config: dict) -> None:
|
def __init__(self, product: str, data: dict) -> None:
|
||||||
self.product = product
|
self.product = product
|
||||||
self.method = next(key for key in config if key not in ("template", "regex", "regex_exclude"))
|
self.data = data
|
||||||
self.url = config[self.method]
|
self.method = next(key for key in data if key not in ("template", "regex", "regex_exclude"))
|
||||||
self.version_template = Template(config.get("template", DEFAULT_VERSION_TEMPLATE))
|
self.url = data[self.method]
|
||||||
|
self.version_template = Template(data.get("template", DEFAULT_VERSION_TEMPLATE))
|
||||||
|
|
||||||
self.script = f"{self.url}.py" if self.method == "custom" else f"{self.method}.py"
|
self.script = f"{self.url}.py" if self.method == "custom" else f"{self.method}.py"
|
||||||
|
|
||||||
regexes_include = config.get("regex", DEFAULT_VERSION_REGEX)
|
regexes_include = data.get("regex", DEFAULT_VERSION_REGEX)
|
||||||
regexes_include = regexes_include if isinstance(regexes_include, list) else [regexes_include]
|
regexes_include = regexes_include if isinstance(regexes_include, list) else [regexes_include]
|
||||||
self.include_version_patterns = [re.compile(r) for r in regexes_include]
|
self.include_version_patterns = [re.compile(r) for r in regexes_include]
|
||||||
|
|
||||||
regexes_exclude = config.get("regex_exclude", [])
|
regexes_exclude = data.get("regex_exclude", [])
|
||||||
regexes_exclude = regexes_exclude if isinstance(regexes_exclude, list) else [regexes_exclude]
|
regexes_exclude = regexes_exclude if isinstance(regexes_exclude, list) else [regexes_exclude]
|
||||||
self.exclude_version_patterns = [re.compile(r) for r in regexes_exclude]
|
self.exclude_version_patterns = [re.compile(r) for r in regexes_exclude]
|
||||||
|
|
||||||
|
|||||||
@@ -28,12 +28,18 @@ class ProductRelease:
|
|||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return self.data["name"]
|
return self.data["name"]
|
||||||
|
|
||||||
|
def set_release_date(self, new_value: datetime) -> None:
|
||||||
|
self.set_field("releaseDate", new_value)
|
||||||
|
|
||||||
def set_support(self, new_value: datetime | bool) -> None:
|
def set_support(self, new_value: datetime | bool) -> None:
|
||||||
self.set_field("support", new_value)
|
self.set_field("support", new_value)
|
||||||
|
|
||||||
def set_eol(self, new_value: datetime | bool) -> None:
|
def set_eol(self, new_value: datetime | bool) -> None:
|
||||||
self.set_field("eol", new_value)
|
self.set_field("eol", new_value)
|
||||||
|
|
||||||
|
def set_extended_support(self, new_value: datetime | bool) -> None:
|
||||||
|
self.set_field("extendedSupport", new_value)
|
||||||
|
|
||||||
def set_field(self, field: str, new_value: any) -> None:
|
def set_field(self, field: str, new_value: any) -> None:
|
||||||
new_value = new_value.strftime("%Y-%m-%d") if isinstance(new_value, datetime) else new_value
|
new_value = new_value.strftime("%Y-%m-%d") if isinstance(new_value, datetime) else new_value
|
||||||
old_value = self.data.get(field, None)
|
old_value = self.data.get(field, None)
|
||||||
|
|||||||
54
src/release_table.py
Normal file
54
src/release_table.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from common import dates, endoflife, http, releasedata
|
||||||
|
|
||||||
|
"""Fetch release-level data from an HTML table in a web page.
|
||||||
|
|
||||||
|
This script works based on a definition provided in the product's frontmatter to locate the table and extract the
|
||||||
|
necessary information. Available configuration options are:
|
||||||
|
|
||||||
|
- selector: A CSS selector used to locate one or more tables in the page.
|
||||||
|
- headers_selector: A CSS selector used to locate the table's headers (column names).
|
||||||
|
- rows_selector: A CSS selector used to locate the table's rows.
|
||||||
|
- mapping: A dictionary that maps release fields to the table's columns names. All identifiers are case-insensitive.
|
||||||
|
|
||||||
|
Supported CSS selectors are defined by BeautifulSoup and documented on its website. For more information, see
|
||||||
|
https://beautiful-soup-4.readthedocs.io/en/latest/index.html?highlight=selector#css-selectors.
|
||||||
|
|
||||||
|
Column data types are auto-detected. The currently supported types are 'date' (parsed using the dates module) and
|
||||||
|
string."""
|
||||||
|
|
||||||
|
METHOD = "release_table"
|
||||||
|
|
||||||
|
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
|
||||||
|
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
|
||||||
|
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
|
||||||
|
with releasedata.ProductData(config.product) as product_data:
|
||||||
|
response = http.fetch_url(config.url)
|
||||||
|
soup = BeautifulSoup(response.text, features="html5lib")
|
||||||
|
|
||||||
|
for table in soup.select(config.data["selector"]):
|
||||||
|
headers = [th.get_text().strip().lower() for th in table.select(config.data["headers_selector"])]
|
||||||
|
|
||||||
|
index_by_target = {}
|
||||||
|
for target, column in config.data["mapping"].items():
|
||||||
|
index_by_target[target] = headers.index(str(column).lower())
|
||||||
|
|
||||||
|
min_column_count = max(index_by_target.values()) + 1
|
||||||
|
for row in table.select(config.data["rows_selector"]):
|
||||||
|
cells = row.findAll("td")
|
||||||
|
if len(cells) < min_column_count:
|
||||||
|
continue
|
||||||
|
|
||||||
|
release_cycle = cells[index_by_target["releaseCycle"]].get_text().strip()
|
||||||
|
release = product_data.get_release(release_cycle)
|
||||||
|
for target, index in index_by_target.items():
|
||||||
|
value_str = cells[index].get_text().strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
value = dates.parse_date(value_str)
|
||||||
|
except ValueError:
|
||||||
|
value = value_str
|
||||||
|
|
||||||
|
release.set_field(target, value)
|
||||||
Reference in New Issue
Block a user