diff --git a/src/common/dates.py b/src/common/dates.py index 3e96b156..f44daa61 100644 --- a/src/common/dates.py +++ b/src/common/dates.py @@ -37,6 +37,7 @@ def parse_month_year_date(text: str, formats: list[str] = frozenset([ def parse_datetime(text: str, formats: list[str] = frozenset([ "%Y-%m-%d %H:%M:%S", # 2023-05-01 08:32:34 "%Y-%m-%dT%H:%M:%S", # 2023-05-01T08:32:34 + "%d-%b-%Y %H:%M", # 01-May-2023 08:32 "%Y-%m-%d %H:%M:%S %z", # 2023-05-01 08:32:34 +0900 "%Y-%m-%dT%H:%M:%S%z", # 2023-05-01T08:32:34+0900 "%Y-%m-%dT%H:%M:%S.%f%z", # 2023-05-01T08:32:34.123456Z diff --git a/src/libreoffice.py b/src/libreoffice.py new file mode 100644 index 00000000..f048f5bc --- /dev/null +++ b/src/libreoffice.py @@ -0,0 +1,27 @@ +import re + +from bs4 import BeautifulSoup +from common import dates, http, releasedata + +"""Fetches LibreOffice versions from https://downloadarchive.documentfoundation.org/libreoffice/old/""" + +VERSION_PATTERN = re.compile(r"^(?P\d+(\.\d+)*)\/$") + +with releasedata.ProductData("libreoffice") as product_data: + response = http.fetch_url("https://downloadarchive.documentfoundation.org/libreoffice/old/") + soup = BeautifulSoup(response.text, features="html5lib") + + for table in soup.find_all("table"): + for row in table.find_all("tr")[1:]: + cells = row.find_all("td") + if len(cells) < 4: + continue + + version_str = cells[1].get_text().strip() + date_str = cells[2].get_text().strip() + version_match = VERSION_PATTERN.match(version_str) + + if version_match: + version = version_match["version"] + date = dates.parse_datetime(date_str) + product_data.declare_version(version, date)