From 0d17306872c68abd3cd5110958d4aa2a8206f9c5 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Sun, 26 Nov 2023 21:01:35 +0100 Subject: [PATCH] Simplify date parsing (#195) Create common functions parse_date, parse_month_year_date and parse_datetime. Those functions support trying multiple formats, and come with default formats lists that support most of the date format encountered so far. Notable change: year-month dates are now set to the end of month (impacted couchbase-server and ibm-aix). --- releases/couchbase-server.json | 34 +++++++++++------------ releases/ibm-aix.json | 20 ++++++------- src/amazon-neptune.py | 4 +-- src/apache-http-server.py | 11 ++------ src/apple.py | 9 +++--- src/artifactory.py | 16 +++-------- src/cgit.py | 13 ++------- src/common/dates.py | 51 ++++++++++++++++++++++++++++++++++ src/confluence.py | 11 ++------ src/cos.py | 12 ++++---- src/couchbase-server.py | 4 +-- src/firefox.py | 10 ++----- src/gke.py | 4 +-- src/graalvm.py | 7 ++--- src/ibm-aix.py | 11 ++------ src/jira.py | 11 ++------ src/looker.py | 8 ++---- src/php.py | 14 ++-------- src/plesk.py | 4 +-- src/pypi.py | 4 +-- src/rds.py | 12 ++------ src/rockylinux.py | 10 +++---- src/sles.py | 13 ++------- src/splunk.py | 8 ++---- 24 files changed, 133 insertions(+), 168 deletions(-) create mode 100644 src/common/dates.py diff --git a/releases/couchbase-server.json b/releases/couchbase-server.json index 178a13b3..db4327f3 100644 --- a/releases/couchbase-server.json +++ b/releases/couchbase-server.json @@ -1,22 +1,22 @@ { - "7.2.3": "2023-11-15", - "7.1.6": "2023-11-15", - "7.2.2": "2023-09-15", - "7.2.1": "2023-09-15", - "7.1.5": "2023-08-15", + "7.2.3": "2023-11-30", + "7.1.6": "2023-11-30", + "7.2.2": "2023-09-30", + "7.2.1": "2023-09-30", + "7.1.5": "2023-08-31", "7.2.0": "2023-06-01", - "7.1.4": "2023-03-15", - "7.0.5": "2022-12-15", - "7.1.3": "2022-11-15", - "7.1.2": "2022-10-15", - "7.1.1": "2022-07-15", - "7.0.4": "2022-06-15", - "7.1.0": "2022-05-15", - "6.6.5": "2022-01-15", - "7.0.3": "2021-12-15", - "7.0.2": "2021-10-15", - "7.0.1": "2021-09-15", - "7.0.0": "2021-07-15", + "7.1.4": "2023-03-31", + "7.0.5": "2022-12-31", + "7.1.3": "2022-11-30", + "7.1.2": "2022-10-31", + "7.1.1": "2022-07-31", + "7.0.4": "2022-06-30", + "7.1.0": "2022-05-31", + "6.6.5": "2022-01-31", + "7.0.3": "2021-12-31", + "7.0.2": "2021-10-31", + "7.0.1": "2021-09-30", + "7.0.0": "2021-07-31", "6.6.0": "2020-08-12", "6.0.1": "2019-02-15", "6.0.0": "2018-10-31" diff --git a/releases/ibm-aix.json b/releases/ibm-aix.json index 67bb55da..0094f763 100644 --- a/releases/ibm-aix.json +++ b/releases/ibm-aix.json @@ -1,12 +1,12 @@ { - "7.3.2": "2023-11-01", - "7.3.1": "2022-12-01", - "7.3.0": "2021-12-01", - "7.2.5": "2020-11-01", - "7.2.4": "2019-11-01", - "7.2.3": "2018-09-01", - "7.2.2": "2017-10-01", - "7.1.5": "2017-10-01", - "7.2.1": "2016-11-01", - "7.2.0": "2015-12-01" + "7.3.2": "2023-11-30", + "7.3.1": "2022-12-31", + "7.3.0": "2021-12-31", + "7.2.5": "2020-11-30", + "7.2.4": "2019-11-30", + "7.2.3": "2018-09-30", + "7.2.2": "2017-10-31", + "7.1.5": "2017-10-31", + "7.2.1": "2016-11-30", + "7.2.0": "2015-12-31" } \ No newline at end of file diff --git a/src/amazon-neptune.py b/src/amazon-neptune.py index e556c319..7644576c 100644 --- a/src/amazon-neptune.py +++ b/src/amazon-neptune.py @@ -1,7 +1,7 @@ import re from xml.dom.minidom import parseString +from common import dates from common import endoflife -from datetime import datetime """Fetch versions with their dates from the RSS feed of https://docs.aws.amazon.com/neptune/latest/userguide/engine-releases.html. @@ -22,7 +22,7 @@ for item in rss.getElementsByTagName("item"): matches = re.match(REGEX, title) if matches: version = matches['version'] - date = datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z").strftime("%Y-%m-%d") + date = dates.parse_datetime(pubDate).strftime("%Y-%m-%d") versions[version] = date print(f"{version}: {date}") diff --git a/src/apache-http-server.py b/src/apache-http-server.py index 69397bef..903031c1 100644 --- a/src/apache-http-server.py +++ b/src/apache-http-server.py @@ -1,6 +1,6 @@ import re -from datetime import datetime from pathlib import Path +from common import dates from common import endoflife from common.git import Git @@ -18,13 +18,8 @@ REPO_URL = "https://github.com/apache/httpd.git" def parse(date: str) -> str: date = date.replace("Feburary", "February") - for format in ["%B %d, %Y", "%B %d, %Y", "%b %d, %Y", "%b. %d, %Y"]: - try: - return datetime.strptime(date, format).strftime("%Y-%m-%d") - except ValueError: - pass - - raise ValueError(f"Unknown date format for '{date}'") + date = date.replace(". ", " ") + return dates.parse_date(date).strftime("%Y-%m-%d") def fetch_versions_from_file(release_notes_file: Path, versions: dict): diff --git a/src/apple.py b/src/apple.py index 42f2c95b..5dde4c79 100644 --- a/src/apple.py +++ b/src/apple.py @@ -1,6 +1,6 @@ -import datetime import re from bs4 import BeautifulSoup +from common import dates from common import endoflife URLS = [ @@ -46,10 +46,9 @@ CONFIG = { } -def parse_date(s): - d, m, y = s.strip().split(" ") - m = m[0:3].lower() # reduce months to 3 letters, such as "Sept" to "Sep", so it can be parsed - return datetime.datetime.strptime(f"{d} {m} {y}", "%d %b %Y") +def parse_date(date_str): + date_str = date_str.replace("Sept", "Sep") + return dates.parse_date(date_str) print("::group::apple") diff --git a/src/artifactory.py b/src/artifactory.py index e6855ac1..b4d0dc84 100644 --- a/src/artifactory.py +++ b/src/artifactory.py @@ -1,22 +1,14 @@ +from common import dates from common import endoflife -from datetime import datetime from requests_html import HTMLSession URL = "https://jfrog.com/help/r/jfrog-release-information/artifactory-end-of-life" PRODUCT = "artifactory" -def parse_date(text): - text = text.replace("Sept", "Sep").replace("_", "-") - date_formats = ['%d-%b-%Y', '%d-%B-%Y'] - - for date_format in date_formats: - try: - return datetime.strptime(text, date_format).strftime("%Y-%m-%d") - except ValueError: - pass - - raise ValueError("Cannot parse '" + text + "' with formats " + str(date_formats)) +def parse_date(date_str): + date_str = date_str.replace("Sept", "Sep").replace("_", "-") + return dates.parse_date(date_str).strftime("%Y-%m-%d") def fetch_releases(): diff --git a/src/cgit.py b/src/cgit.py index 9281954e..e5e823ea 100644 --- a/src/cgit.py +++ b/src/cgit.py @@ -1,8 +1,8 @@ import re import sys from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime, timezone from liquid import Template """Fetch versions with their dates from a cgit repository, such as @@ -22,15 +22,6 @@ DEFAULT_VERSION_REGEX = ( ) -# Parse date with format 2023-05-01 08:32:34 +0900 and convert to UTC -def parse_date(d): - return ( - datetime.strptime(d, "%Y-%m-%d %H:%M:%S %z") - .astimezone(timezone.utc) - .strftime("%Y-%m-%d") - ) - - def make_bs_request(url): response = endoflife.fetch_url(url + '/refs/tags') return BeautifulSoup(response, features="html5lib") @@ -54,7 +45,7 @@ def fetch_releases(url, regex, template): if matches: match_data = matches.groupdict() version_string = l_template.render(**match_data) - date = parse_date(datetime_text) + date = dates.parse_datetime(datetime_text).strftime("%Y-%m-%d") print(f"{version_string} : {date}") releases[version_string] = date diff --git a/src/common/dates.py b/src/common/dates.py new file mode 100644 index 00000000..a72cad13 --- /dev/null +++ b/src/common/dates.py @@ -0,0 +1,51 @@ +from datetime import datetime, timezone +import calendar + + +def parse_date(text, formats=frozenset([ + "%B %d, %Y", # January 1, 2020 + "%b %d, %Y", # Jan 1, 2020 + "%B %d %Y", # January 1 2020 + "%b %d %Y", # Jan 1 2020 + "%d %B %Y", # 1 January 2020 + "%d %b %Y", # 1 Jan 2020 + "%d-%b-%Y", # 1-Jan-2020 + "%d-%B-%Y", # 1-January-2020 +])) -> datetime: + """Parse a given text representing a date using a list of formats. + """ + return parse_datetime(text, formats, to_utc=False) + + +def parse_month_year_date(text, formats=frozenset([ + "%B %Y", # January 2020 + "%b %Y", # Jan 2020 + ])) -> datetime: + """Parse a given text representing a partial date using a list of formats, + adjusting it to the last day of the month. + """ + date = parse_datetime(text, formats, to_utc=False) + _, last_day = calendar.monthrange(date.year, date.month) + return date.replace(day=last_day) + + +def parse_datetime(text, formats=frozenset([ + "%Y-%m-%d %H:%M:%S", # 2023-05-01 08:32:34 + "%Y-%m-%dT%H:%M:%S", # 2023-05-01T08:32:34 + "%Y-%m-%d %H:%M:%S %z", # 2023-05-01 08:32:34 +0900 + "%a, %d %b %Y %H:%M:%S %Z", # Wed, 01 Jan 2020 00:00:00 GMT + "%Y-%m-%dT%H:%M:%S%z", # 2023-05-01T08:32:34+0900 +]), to_utc=True) -> datetime: + """Parse a given text representing a datetime using a list of formats, + optionally converting it to UTC. + """ + text = text.strip() + for fmt in formats: + try: + date = datetime.strptime(text, fmt) + date = date.astimezone(timezone.utc) if to_utc else date + return date + except ValueError: + pass + + raise ValueError(f"'{text}' could not be parsed as a date with any of the formats: {str(formats)}") diff --git a/src/confluence.py b/src/confluence.py index 21841bae..ef1d9ce9 100644 --- a/src/confluence.py +++ b/src/confluence.py @@ -1,6 +1,6 @@ from requests_html import HTMLSession +from common import dates from common import endoflife -from datetime import datetime """Fetch Confluence versions with their dates from the Atlassian Website. @@ -11,13 +11,7 @@ because the page needs JavaScript to render correctly. PRODUCT = 'confluence' URL = 'https://www.atlassian.com/software/confluence/download-archives' - -def parse_date(text): - return datetime.strptime(text, "%d-%b-%Y").strftime("%Y-%m-%d") - - print(f"::group::{PRODUCT}") - session = HTMLSession() r = session.get(URL) r.html.render(sleep=1, scrolldown=3) @@ -25,7 +19,8 @@ r.html.render(sleep=1, scrolldown=3) versions = {} for version_block in r.html.find('.versions-list'): version = version_block.find('a.product-versions', first=True).attrs['data-version'] - date = parse_date(version_block.find('.release-date', first=True).text) + date_text = version_block.find('.release-date', first=True).text + date = dates.parse_date(date_text).strftime('%Y-%m-%d') print(f"{version}: {date}") versions[version] = date diff --git a/src/cos.py b/src/cos.py index 8022193a..d71937e9 100644 --- a/src/cos.py +++ b/src/cos.py @@ -1,10 +1,9 @@ import re from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime URL = "https://cloud.google.com/container-optimized-os/docs/release-notes/" -DATE_FORMAT = '%b %d, %Y' REGEX = r"^(cos-\d+-\d+-\d+-\d+)" @@ -20,11 +19,10 @@ def fetch_milestones(milestones): return endoflife.fetch_urls(urls) -def parse_date(d): - # If the date begins with a >3 letter month name, trim it to just 3 letters - # Strip out the Date: section from the start - d = re.sub(r'(?:Date\: )?(\w{3})(?:\w{1,})? (\d{1,2}), (\d{4})', r'\1 \2, \3', d) - return datetime.strptime(d, DATE_FORMAT).strftime('%Y-%m-%d') +def parse_date(date_str): + date_str = date_str.strip().replace('Date: ', '') + date_str = re.sub(r'Sep[a-zA-Z]+', 'Sep', date_str) + return dates.parse_date(date_str).strftime('%Y-%m-%d') def find_versions(text): diff --git a/src/couchbase-server.py b/src/couchbase-server.py index be2a1c0c..49613b23 100644 --- a/src/couchbase-server.py +++ b/src/couchbase-server.py @@ -1,7 +1,7 @@ import re from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime """Fetch versions with their dates from docs.couchbase.com. @@ -46,7 +46,7 @@ for response in endoflife.fetch_urls(minor_version_urls): m = re.match(REGEX, versionAndDate) if m: version = f"{m['version']}.0" if len(m['version'].split('.')) == 2 else m['version'] - date = datetime.strptime(m['date'], "%B %Y").strftime("%Y-%m-15") + date = dates.parse_month_year_date(m['date']).strftime("%Y-%m-%d") versions[version] = date print(f"{version}: {date}") diff --git a/src/firefox.py b/src/firefox.py index 9fb96cb3..03e4b070 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -1,8 +1,8 @@ import re import urllib.parse from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime """Fetch Firefox versions with their dates from https://www.mozilla.org/""" @@ -12,13 +12,7 @@ PRODUCT = "firefox" def format_date(text: str) -> str: text = text.replace(')', '') - formats = ["%b %d, %Y", "%B %d, %Y"] - for f in formats: - try: - return datetime.strptime(text, f).strftime("%Y-%m-%d") - except ValueError: - pass - return "" + return dates.parse_date(text).strftime("%Y-%m-%d") print(f"::group::{PRODUCT}") diff --git a/src/gke.py b/src/gke.py index 6e8b6eb0..3f8ffc45 100644 --- a/src/gke.py +++ b/src/gke.py @@ -1,7 +1,7 @@ import re from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime # https://regex101.com/r/zPxBqT/1 REGEX = r"\d.\d+\.\d+-gke\.\d+" @@ -22,7 +22,7 @@ def parse_soup_for_versions(soup): # h2 contains the date, which we parse for h2 in section.find_all('h2'): date = h2.get('data-text') - date = datetime.strptime(date, '%B %d, %Y').strftime('%Y-%m-%d') + date = dates.parse_date(date).strftime("%Y-%m-%d") # The div next to the h2 contains the notes about changes made # on that date next_div = h2.find_next('div') diff --git a/src/graalvm.py b/src/graalvm.py index 120208bc..d5b522e2 100644 --- a/src/graalvm.py +++ b/src/graalvm.py @@ -1,14 +1,11 @@ from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime URL = "https://www.graalvm.org/release-calendar/" # https://regex101.com/r/877ibq/1 regex = r"RHEL (?P\d)(\. ?(?P\d+))?(( Update (?P\d))| GA)?" -def parse_date(text): - return datetime.strptime(text, "%B %d, %Y").strftime("%Y-%m-%d") - def split_versions(text): # GraalVM for JDK versions has to be prefixed as their release cycle collide # with older GraalVM release cycles. Example: GraalVM for JDK 20 and 20.0. @@ -21,7 +18,7 @@ soup = BeautifulSoup(response, features="html5lib") versions = {} for tr in soup.findAll("table")[1].find("tbody").findAll("tr"): td_list = tr.findAll("td") - date = parse_date(td_list[0].get_text()) + date = dates.parse_date(td_list[0].get_text()).strftime("%Y-%m-%d") for version in split_versions(td_list[2].get_text()): versions[version] = date diff --git a/src/ibm-aix.py b/src/ibm-aix.py index 11c4fc72..88163855 100644 --- a/src/ibm-aix.py +++ b/src/ibm-aix.py @@ -1,17 +1,10 @@ from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime - PRODUCT = "ibm-aix" URL = "https://www.ibm.com/support/pages/aix-support-lifecycle-information" - -# Convert date from e.g. "November 2022" format to "2022-11-01" -def convert_date(date_str): - return datetime.strptime(date_str, "%B %Y").strftime("%Y-%m-%d") - - def fetch_releases(): response = endoflife.fetch_url(URL) soup = BeautifulSoup(response, features="html5lib") @@ -23,7 +16,7 @@ def fetch_releases(): for row in release_table.find_all("tr")[1:]: cells = row.find_all("td") version = cells[0].text.strip("AIX ").replace(' TL', '.') - date = convert_date(cells[1].text) + date = dates.parse_month_year_date(cells[1].text).strftime("%Y-%m-%d") print(f"{version} : {date}") releases[version] = date diff --git a/src/jira.py b/src/jira.py index 6009d39f..bc87a8bf 100644 --- a/src/jira.py +++ b/src/jira.py @@ -1,6 +1,6 @@ from requests_html import HTMLSession +from common import dates from common import endoflife -from datetime import datetime """Fetch Jira versions with their dates from the Atlassian Website. @@ -11,13 +11,7 @@ because the page needs JavaScript to render correctly. PRODUCT = 'jira' URL = 'https://www.atlassian.com/software/jira/update' - -def parse_date(text): - return datetime.strptime(text, "%d-%b-%Y").strftime("%Y-%m-%d") - - print(f"::group::{PRODUCT}") - session = HTMLSession() r = session.get(URL) r.html.render(sleep=1, scrolldown=3) @@ -25,7 +19,8 @@ r.html.render(sleep=1, scrolldown=3) versions = {} for version_block in r.html.find('.versions-list'): version = version_block.find('a.product-versions', first=True).attrs['data-version'] - date = parse_date(version_block.find('.release-date', first=True).text) + date_text = version_block.find('.release-date', first=True).text + date = dates.parse_date(date_text).strftime('%Y-%m-%d') print(f"{version}: {date}") versions[version] = date diff --git a/src/looker.py b/src/looker.py index 542890b4..345b0ddf 100644 --- a/src/looker.py +++ b/src/looker.py @@ -1,8 +1,7 @@ import re - from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime, timezone from xml.dom.minidom import parseString """Fetch Looker versions with their dates from the Google Cloud release notes RSS feed. @@ -14,16 +13,13 @@ ANNOUNCEMENT_PATTERN = re.compile(r"includes\s+the\s+following\s+changes", re.IG VERSION_PATTERN = re.compile(r"Looker\s+(?P\d+\.\d+)", re.IGNORECASE) -def parse_date(date_str): - return datetime.fromisoformat(date_str).astimezone(timezone.utc).strftime("%Y-%m-%d") - print(f"::group::{PRODUCT}") versions = {} response = endoflife.fetch_url(URL) rss = parseString(response) for item in rss.getElementsByTagName("entry"): - date = parse_date(item.getElementsByTagName("updated")[0].firstChild.nodeValue) + date = dates.parse_datetime(item.getElementsByTagName("updated")[0].firstChild.nodeValue).strftime("%Y-%m-%d") content = item.getElementsByTagName("content")[0].firstChild.nodeValue soup = BeautifulSoup(content, features="html5lib") diff --git a/src/php.py b/src/php.py index 4cf10671..de4010cc 100644 --- a/src/php.py +++ b/src/php.py @@ -1,26 +1,16 @@ import json +from common import dates from common import endoflife -from datetime import datetime PHP_MAJOR_VERSIONS = [4, 5, 7, 8] -# Date format is 03 Nov 2022 -# With some versions using 03 November 2022 instead -# we return it as YYYY-MM-DD -def parse_date(date_str): - try: - return datetime.strptime(date_str, "%d %b %Y").strftime("%Y-%m-%d") - except ValueError: - return datetime.strptime(date_str, "%d %B %Y").strftime("%Y-%m-%d") - - def fetch_versions(major_version): url = f"https://www.php.net/releases/index.php?json&max=-1&version={major_version}" response = endoflife.fetch_url(url) data = json.loads(response) for v in data: - data[v] = parse_date(data[v]["date"]) + data[v] = dates.parse_date(data[v]["date"]).strftime("%Y-%m-%d") print(f"{v}: {data[v]}") return data diff --git a/src/plesk.py b/src/plesk.py index b8177374..5f833898 100644 --- a/src/plesk.py +++ b/src/plesk.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime URL = "https://docs.plesk.com/release-notes/obsidian/change-log" PRODUCT = "plesk" @@ -27,7 +27,7 @@ def fetch_releases(): version = version.replace(' Update ', '.').replace('Plesk Obsidian ', '') if ' ' in version: continue - date = datetime.strptime(release.p.text.strip(), '%d %B %Y').strftime("%Y-%m-%d") + date = dates.parse_date(release.p.text).strftime("%Y-%m-%d") result[version] = date print(f"{version}: {date}") diff --git a/src/pypi.py b/src/pypi.py index b7104234..f2332ee2 100644 --- a/src/pypi.py +++ b/src/pypi.py @@ -1,8 +1,8 @@ import json import re import sys +from common import dates from common import endoflife -from datetime import datetime METHOD = "pypi" DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb) @@ -27,7 +27,7 @@ def fetch_releases(pypi_id, regex): if re.match(r, version): matches = True if matches and R: - d = datetime.fromisoformat(R[0]["upload_time"]).strftime("%Y-%m-%d") + d = dates.parse_datetime(R[0]["upload_time"], to_utc=False).strftime("%Y-%m-%d") releases[version] = d print(f"{version}: {d}") diff --git a/src/rds.py b/src/rds.py index fde9a9b9..460141b1 100644 --- a/src/rds.py +++ b/src/rds.py @@ -1,7 +1,7 @@ import re from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime VERSION_REGEX = r"(?P\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1 DBS = { @@ -9,14 +9,6 @@ DBS = { "postgresql": "https://docs.aws.amazon.com/AmazonRDS/latest/PostgreSQLReleaseNotes/postgresql-release-calendar.html", } - -def parse_date(d): - try: - return datetime.strptime(d, "%d %B %Y").strftime("%Y-%m-%d") - except ValueError: - return None - - for db, url in DBS.items(): print(f"::group::{db}") versions = {} @@ -33,7 +25,7 @@ for db, url in DBS.items(): if len(columns) > 3: m = re.search(VERSION_REGEX, columns[0].text.strip(), flags=re.IGNORECASE) if m: - date = parse_date(columns[2].text.strip()) + date = dates.parse_date(columns[2].text).strftime("%Y-%m-%d") if date: version = m.group("v") print(f"{version} : {date}") diff --git a/src/rockylinux.py b/src/rockylinux.py index d527ac6f..135c375c 100644 --- a/src/rockylinux.py +++ b/src/rockylinux.py @@ -1,16 +1,15 @@ import re +from common import dates from common import endoflife -from datetime import datetime URL = "https://raw.githubusercontent.com/rocky-linux/wiki.rockylinux.org/development/docs/include/releng/version_table.md" REGEX = r"^(\d+\.\d+)$" + def parse_date(date_str): date_str = date_str.replace(',', '').strip() - try: - return datetime.strptime(date_str, "%B %d %Y").strftime("%Y-%m-%d") - except ValueError: - return datetime.strptime(date_str, "%b %d %Y").strftime("%Y-%m-%d") + return dates.parse_date(date_str).strftime("%Y-%m-%d") + def parse_markdown_table(table_text): lines = table_text.strip().split('\n') @@ -26,6 +25,7 @@ def parse_markdown_table(table_text): return versions + print("::group::rockylinux") response = endoflife.fetch_url(URL) versions = parse_markdown_table(response) diff --git a/src/sles.py b/src/sles.py index eb487a42..420ff137 100644 --- a/src/sles.py +++ b/src/sles.py @@ -1,19 +1,10 @@ import re from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime PRODUCT = "sles" URL = "https://www.suse.com/lifecycle" -DATE_FORMAT = "%d %b %Y" - - -# Convert date from e.g. "16 Jul 2018" to "2018-07-16" -def convert_date(date_str): - # If the date begins with a >3 letter month name, trim it to just 3 letters - # Strip out the Date: section from the start - d = re.sub(r'(\d{1,2}) (\w{3})(?:\w{1,4})? (\d{4})', r'\1 \2 \3', date_str) - return datetime.strptime(d, DATE_FORMAT).strftime('%Y-%m-%d') def strip_version(version_str): @@ -47,7 +38,7 @@ def fetch_releases(): version = strip_version(cells[0].text) try: - release_date = convert_date(cells[1].text) + release_date = dates.parse_date(cells[1].text).strftime("%Y-%m-%d") versions[version] = release_date print(f"{version}: {release_date}") except ValueError as e: diff --git a/src/splunk.py b/src/splunk.py index b7b89218..670ea70b 100644 --- a/src/splunk.py +++ b/src/splunk.py @@ -1,7 +1,7 @@ import re from bs4 import BeautifulSoup +from common import dates from common import endoflife -from datetime import datetime PRODUCT = "splunk" URL = "https://docs.splunk.com/Documentation/Splunk" @@ -9,10 +9,6 @@ RELNOTES_URL_TEMPLATE = "https://docs.splunk.com/Documentation/Splunk/{version}/ PATTERN = r"Splunk Enterprise (?P\d+\.\d+(?:\.\d+)*) was (?:first )?released on (?P\w+\s\d\d?,\s\d{4})\." -def convert_date(date: str) -> str: - return datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d") - - def get_latest_minor_versions(versions): versions_split = [version.split('.') for version in versions] @@ -55,7 +51,7 @@ latest_minor_versions_urls = [RELNOTES_URL_TEMPLATE.format(version=v) for v in l for response in endoflife.fetch_urls(latest_minor_versions_urls): for (version, date_str) in re.findall(PATTERN, response.text, re.MULTILINE): version = f"{version}.0" if len(version.split(".")) == 2 else version # convert x.y to x.y.0 - date = convert_date(date_str) + date = dates.parse_date(date_str).strftime("%Y-%m-%d") versions[version] = date print(f"{version}: {date}")