[libreoffice] Ignore prereleases (#517)

LibreOffice data may contain prerelease versions. Those should not be included in release data: endoflife.date only list GA releases.

Fixes #511.
This commit is contained in:
Marc Wrobel
2025-09-14 10:58:43 +02:00
committed by GitHub
parent aa643c11f5
commit 0a8610fd69

View File

@@ -5,10 +5,36 @@ from common.releasedata import ProductData, config_from_argv
"""Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/"""
def fetch_prereleases(url: str, text_to_match: str) -> list[str]:
"""Get all prereleases from the LibreOffice download page.
Note that prereleases are version numbers without the patch number, e.g. "25.8.0" and not "25.8.0.1".
See https://github.com/endoflife-date/release-data/issues/511."""
prereleases_html = http.fetch_html(url)
prereleases_paragraph = next(
(p for p in prereleases_html.find_all("p")
if text_to_match in p.get_text()),
None,
)
if not prereleases_paragraph:
message = "Could not find the prerelease paragraph on the LibreOffice download page"
raise ValueError(message)
prereleases = []
for prerelease in prereleases_paragraph.find_next("ul").find_all("li"):
prereleases.append(prerelease.get_text().strip())
return prereleases
config = config_from_argv()
with ProductData(config.product) as product_data:
html = http.fetch_html(config.url)
prereleases_url = config.data.get("prereleases_url", "https://www.libreoffice.org/download/download-libreoffice/")
prereleases_text = config.data.get("prereleases_text", "LibreOffice is available in the following prerelease versions:")
prerelease_prefixes = fetch_prereleases(prereleases_url, prereleases_text)
html = http.fetch_html(config.url)
for table in html.find_all("table"):
for row in table.find_all("tr")[1:]:
cells = row.find_all("td")
@@ -22,6 +48,10 @@ with ProductData(config.product) as product_data:
continue
version = config.render(version_match)
if any(prerelease_prefix in version for prerelease_prefix in prerelease_prefixes):
logging.info(f"Skipping prerelease version {version}")
continue
date_str = cells[2].get_text().strip()
date = dates.parse_datetime(date_str)