Remove invalid releases automatically (#466)

Automatically remove empty releases or releases which are released in the future, regardless of the product.

Also refactored a bit releasedata.py to use better names and clarify types.
This commit is contained in:
Marc Wrobel
2025-07-13 10:03:13 +02:00
parent ab64d95b0b
commit 081d884abe
286 changed files with 300 additions and 10735 deletions

View File

@@ -0,0 +1,25 @@
import logging
from common import dates, releasedata
"""Remove empty releases or releases which are released in the future."""
TODAY = dates.today()
frontmatter, _ = releasedata.parse_argv(ignore_auto_config=True)
with releasedata.ProductData(frontmatter.name) as product_data:
releases = list(product_data.releases.values()) # a copy is needed to avoid modifying the dict while iterating
product_data.updated = True # mark the product data as updated even when there are no changes
for release in releases:
if release.is_empty():
product_data.remove_release(release.name())
logging.info(f"Removed empty release {release} from {product_data.name}")
continue
if release.was_released_after(TODAY):
product_data.remove_release(release.name())
logging.info(f"Removed future release {release} from {product_data.name}")
continue
logging.debug(f"Keeping release {release} in {product_data.name}")

View File

@@ -74,7 +74,7 @@ class ProductRelease:
def is_empty(self) -> bool:
return len(self.data) == 1 # only the name is set
def is_released_after(self, date: datetime) -> bool:
def was_released_after(self, date: datetime) -> bool:
release_date = self.get_release_date()
return release_date and release_date > date
@@ -110,14 +110,14 @@ class ProductVersion:
class ProductData:
def __init__(self, name: str) -> None:
self.name: str = name
self.versions_path: Path = DATA_DIR / f"{name}.json"
self.releases = {}
self.path: Path = DATA_DIR / f"{name}.json"
self.releases: dict[str, ProductRelease] = {}
self.versions: dict[str, ProductVersion] = {}
self.updated = False
self.updated: bool = False
def __enter__(self) -> "ProductData":
if self.versions_path.is_file():
with self.versions_path.open() as f:
if self.path.is_file():
with self.path.open() as f:
json_data = json.load(f)
for json_version in json_data.get("versions", {}).values():
version = ProductVersion(self.name, json_version)
@@ -125,9 +125,9 @@ class ProductData:
for json_release in json_data.get("releases", {}).values():
release = ProductRelease(self.name, json_release)
self.releases[release.name()] = release
logging.info(f"loaded data for {self} from {self.versions_path}")
logging.info(f"loaded data for {self} from {self.path}")
else:
logging.info(f"no data found for {self} at {self.versions_path}")
logging.info(f"no data found for {self} at {self.path}")
return self
@@ -143,48 +143,49 @@ class ProductData:
logging.error(message)
raise ProductUpdateError(message)
logging.info("updating %s data",self.versions_path)
logging.info("updating %s data", self.path)
ordered_releases = sorted(self.releases.values(), key=lambda v: v.name(), reverse=True)
ordered_versions = sorted(self.versions.values(), key=lambda v: (v.date(), v.name()), reverse=True)
with self.versions_path.open("w") as f:
with self.path.open("w") as f:
f.write(json.dumps({
"releases": {release.name(): release.data for release in ordered_releases},
"versions": {version.name(): version.data for version in ordered_versions},
}, indent=2))
def get_release(self, release: str) -> ProductRelease:
if release not in self.releases:
logging.info(f"adding release {release} to {self}")
self.releases[release] = ProductRelease.of(self.name, release)
def get_release(self, release_name: str) -> ProductRelease:
if release_name not in self.releases:
logging.info(f"adding release {release_name} to {self}")
self.releases[release_name] = ProductRelease.of(self.name, release_name)
self.updated = True
return self.releases[release]
return self.releases[release_name]
def remove_release(self, release: str) -> None:
if release not in self.releases:
logging.warning(f"release {release} cannot be removed as it does not exist for {self}")
def remove_release(self, release_name: str) -> None:
if release_name not in self.releases:
logging.warning(f"release {release_name} cannot be removed as it does not exist for {self}")
return
logging.info(f"removing release {release} ({self.releases.pop(release)}) from {self}")
def get_version(self, version: str) -> ProductVersion:
return self.versions[version] if version in self.versions else None
def declare_version(self, version: str, date: datetime) -> None:
logging.info(f"removing release {release_name} ({self.releases.pop(release_name)}) from {self}")
self.updated = True
if version in self.versions and self.versions[version].date() != date:
logging.info(f"overwriting {version} ({self.get_version(version).date()} -> {date}) for {self}")
self.versions[version].replace_date(date)
def get_version(self, version_name: str) -> ProductVersion:
return self.versions[version_name] if version_name in self.versions else None
def declare_version(self, version_name: str, versions_date: datetime) -> None:
self.updated = True
if version_name in self.versions and self.versions[version_name].date() != versions_date:
logging.info(f"overwriting {version_name} ({self.get_version(version_name).date()} -> {versions_date}) for {self}")
self.versions[version_name].replace_date(versions_date)
else:
logging.info(f"adding version {version} ({date}) to {self}")
self.versions[version] = ProductVersion.of(self.name, version, date)
logging.info(f"adding version {version_name} ({versions_date}) to {self}")
self.versions[version_name] = ProductVersion.of(self.name, version_name, versions_date)
def remove_version(self, version: str) -> None:
if version not in self.versions:
logging.warning(f"version {version} cannot be removed as it does not exist for {self}")
def remove_version(self, version_name: str) -> None:
if version_name not in self.versions:
logging.warning(f"version {version_name} cannot be removed as it does not exist for {self}")
return
logging.info(f"removing version {version} ({self.versions.pop(version)}) from {self}")
logging.info(f"removing version {version_name} ({self.versions.pop(version_name)}) from {self}")
def __repr__(self) -> str:
return self.name

View File

@@ -21,8 +21,6 @@ necessary information. Available configuration options are:
render_javascript is true.
- render_javascript_wait_until (optional, default = None): Argument to pass to Playwright, one of "commit",
"domcontentloaded", "load", or "networkidle". Only use when render_javascript is true and if the script fails without it.
- ignore_empty_releases (optional, default = false): A boolean value indicating whether to ignore releases with no
fields except the name.
- fields: A dictionary that maps release fields to the table's columns. Field definition include:
- column (mandatory): The name or index (starts at 1) of the column in the table.
- type (mandatory, default = string): The type of the field. Supported types are:
@@ -69,7 +67,6 @@ DEFAULT_REGEX = r"^(?P<value>.+)$"
DEFAULT_TEMPLATE = "{{value}}"
DEFAULT_RELEASE_REGEX = r"^v?(?P<value>\d+(\.\d+)*)$"
RANGE_LIST_SEPARATOR_PATTERN = re.compile(r"\s*,\s*")
TODAY = dates.today()
class Field:
@@ -156,7 +153,6 @@ with ProductData(config.product) as product_data:
render_javascript = config.data.get("render_javascript", False)
render_javascript_wait_until = config.data.get("render_javascript_wait_until", None)
render_javascript_wait_for = config.data.get("render_javascript_wait_for", None)
ignore_empty_releases = config.data.get("ignore_empty_releases", False)
header_row_selector = config.data.get("header_selector", "thead tr")
rows_selector = config.data.get("rows_selector", "tbody tr")
cells_selector = "td, th"
@@ -207,13 +203,5 @@ with ProductData(config.product) as product_data:
except ValueError as e:
logging.info(f"skipping cell {raw_field} for {release}: {e}")
if ignore_empty_releases and release.is_empty():
logging.info(f"removing empty release '{release}'")
product_data.remove_release(release_name)
if release.is_released_after(TODAY):
logging.info(f"removing future release '{release}'")
product_data.remove_release(release_name)
except ValueError as e:
logging.info(f"skipping table with headers {headers}: {e}")