[release_table] Add support for 'range' type (#324)

This new type converts a comma-separated list of values into a range, only keeping the first and last value.
For example, '1.0, 1.1, 1.2' becomes '1.0 - 1.2'.
This commit is contained in:
Marc Wrobel
2024-03-03 09:55:27 +01:00
committed by GitHub
parent 8dd4d570e4
commit e1e1274b13

View File

@@ -2,6 +2,7 @@ import logging
import re
import sys
from datetime import datetime
from re import Match
from bs4 import BeautifulSoup
from common import dates, endoflife, http, releasedata
@@ -17,8 +18,12 @@ necessary information. Available configuration options are:
- rows_selector (mandatory, default = tbody tr): A CSS selector used to locate the table's rows.
- fields: A dictionary that maps release fields to the table's columns. Field definition include:
- column (mandatory): The name of the column in the table. This is case-insensitive.
- type (mandatory, default = string): The type of the field. Supported types are listed in SUPPORTED_TYPES. If the
field is one of the known date fields (DATE_FIELDS), the type is automatically set to 'date' if not provided.
- type (mandatory, default = string): The type of the field. Supported types are:
- string: The raw string value.
- date : A full or year-month date (supported patterns available in common.dates).
- range : Convert a comma-separated list of values into a range, only keeping the first and last value.
For example, "1.0, 1.1, 1.2" becomes "1.0 - 1.2".
If the field is one of the known date fields, the type is automatically set to 'date' if not provided.
- regex (mandatory, default = [DEFAULT_REGEX]): A regular expression, or a list of regular expressions, used to
validate allowed values. Note that default value for the releaseCycle field is not DEFAULT_REGEX, but
DEFAULT_RELEASE_REGEX.
@@ -31,13 +36,13 @@ Supported CSS selectors are defined by BeautifulSoup and documented on its websi
https://beautiful-soup-4.readthedocs.io/en/latest/index.html?highlight=selector#css-selectors."""
METHOD = "release_table"
SUPPORTED_TYPES = ["date", "string"]
SUPPORTED_TYPES = ["date", "string", "range"]
DATE_TYPES = ["date"]
DATE_FIELDS = ["releaseDate", "lts", "support", "eol", "extendedSupport"]
DEFAULT_REGEX = r"^(?P<value>.+)$"
DEFAULT_TEMPLATE = "{{value}}"
DEFAULT_RELEASE_REGEX = r"^v?(?P<value>\d+(\.\d+)*)$"
RANGE_LIST_SEPARATOR_PATTERN = re.compile(r"\s*,\s*")
class Field:
def __init__(self, name: str, definition: str | dict) -> None:
@@ -79,13 +84,7 @@ class Field:
if not match:
continue
str_value = self.template.render(**match.groupdict()) if self.template else raw_value
if self.type == "date":
try:
return dates.parse_date(str_value)
except ValueError:
return dates.parse_month_year_date(str_value)
return str_value
return self.__process_value(match, raw_value)
if self.name == "releaseCycle":
return None # skipping entire rows is allowed
@@ -93,6 +92,21 @@ class Field:
msg = f"field {self}'s value '{raw_value}' does not match any regex in {self.include_version_patterns}"
raise ValueError(msg)
def __process_value(self, match: Match[str], raw_value: str) -> str | datetime:
str_value = self.template.render(**match.groupdict()) if self.template else raw_value
if self.type == "date":
try:
return dates.parse_date(str_value)
except ValueError:
return dates.parse_month_year_date(str_value)
elif self.type == "range":
items = RANGE_LIST_SEPARATOR_PATTERN.split(str_value)
return f"{items[0]} - {items[-1]}" if len(items) > 1 else str_value
return str_value
def __repr__(self) -> str:
return f"{self.name}({self.column})"