Align custom scripts with generic scripts, making them configurable. This has a few advantages: - script code is more unified, - no more hard-coded method names in scripts, which is less error prone and make it easier to rename scripts, - no more hard coded product names in scripts, which is less error prone and make it easier to rename products, - less hard-coded URLs and regexes in scripts, which makes auto-configuration more expressive / updatable, Also added method `endoflife.list_configs_from_argv()` so that it is easier to manipulate scripts arguments.
43 lines
1.9 KiB
Python
43 lines
1.9 KiB
Python
import logging
|
|
|
|
from bs4 import BeautifulSoup
|
|
from common import dates, endoflife, http, releasedata
|
|
|
|
for config in endoflife.list_configs_from_argv():
|
|
with releasedata.ProductData(config.product) as product_data:
|
|
response = http.fetch_url(config.url)
|
|
html = BeautifulSoup(response.text, features="html5lib")
|
|
table_selector = config.data.get("table_selector", "#previous-releases + table").strip()
|
|
date_column = config.data.get("date_column", "Date").strip().lower()
|
|
versions_column = config.data.get("versions_column").strip().lower()
|
|
|
|
table = html.select_one(table_selector)
|
|
if not table:
|
|
logging.warning(f"Skipping config {config} as no table found with selector {table_selector}")
|
|
continue
|
|
|
|
headers = [th.get_text().strip().lower() for th in table.select("thead th")]
|
|
if date_column not in headers or versions_column not in headers:
|
|
logging.info(f"Skipping table with headers {headers} as it does not contain the required columns: {date_column}, {versions_column}")
|
|
continue
|
|
|
|
date_index = headers.index(date_column)
|
|
versions_index = headers.index(versions_column)
|
|
|
|
for row in table.select("tbody tr"):
|
|
cells = row.select("td")
|
|
if len(cells) <= max(date_index, versions_index):
|
|
logging.warning(f"Skipping row {cells}: not enough cells")
|
|
continue
|
|
|
|
date_text = cells[date_index].get_text().strip()
|
|
date = dates.parse_date(date_text)
|
|
if date > dates.today():
|
|
logging.info(f"Skipping future version {cells}")
|
|
continue
|
|
|
|
versions = cells[versions_index].get_text().strip()
|
|
for version in versions.split(", "):
|
|
if config.first_match(version):
|
|
product_data.declare_version(version.strip(), date)
|