diff --git a/src/apache-subversion.py b/src/apache-subversion.py new file mode 100644 index 00000000..6ca157ad --- /dev/null +++ b/src/apache-subversion.py @@ -0,0 +1,33 @@ +import logging +import re + +from bs4 import BeautifulSoup +from common import dates, http, releasedata + +# https://regex101.com/r/k4i7EO/1 only non beta versions +VERSION_PATTERN = re.compile(r"^Subversion\s(?P[1-9]\d*.\d+\.\d+)$") +# https://regex101.com/r/GsimYd/2 +DATE_PATTERN = re.compile(r"^\((?P\w+,\s\d{1,2}\s\w+\s\d{4})") + +with releasedata.ProductData("apache-subversion") as product_data: + relnotes = http.fetch_url("https://subversion.apache.org/docs/release-notes/release-history.html") + relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib") + + ul = relnotes_soup.find("h2").find_next("ul") + for li in ul.find_all("li"): + b = li.find_next("b") # b contains the version + version_text = b.get_text(strip=True) + version_match = VERSION_PATTERN.match(version_text) + if not version_match: + logging.info(f"Skipping {version_text}, does not match version regex") + continue + + remaining_part_str = b.next_sibling.get_text(strip=True) + date_match = DATE_PATTERN.match(remaining_part_str) + if not date_match: + logging.info(f"Skipping {version_text}, no matching date in '{remaining_part_str}'") + continue + + version = version_match.group("version") + date = dates.parse_date(date_match.group("date")) + product_data.declare_version(version, date) diff --git a/src/common/dates.py b/src/common/dates.py index f44daa61..714fafe0 100644 --- a/src/common/dates.py +++ b/src/common/dates.py @@ -13,6 +13,8 @@ def parse_date(text: str, formats: list[str] = frozenset([ "%Y-%m-%d", # 2020-01-01 "%m/%d/%Y", # 01/25/2020 "%Y/%m/%d", # 2020/01/25 + "%A %d %B %Y", # Wednesday 1 January 2020 + "%A %d %b %Y", # Wednesday 1 Jan 2020 ])) -> datetime: """Parse a given text representing a date using a list of formats. """