From c94b4ed8bc2268db5643f673ef48c4fa8f257263 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Tue, 13 Feb 2024 22:10:08 +0100 Subject: [PATCH] Support more special date formats (#304) Add a few replacement to support dates containing th/st/rd or and ending *. --- src/common/dates.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/common/dates.py b/src/common/dates.py index e8e2ef5f..7ba54371 100644 --- a/src/common/dates.py +++ b/src/common/dates.py @@ -40,8 +40,20 @@ def parse_datetime(text: str, formats: list[str] = frozenset([ """Parse a given text representing a datetime using a list of formats, optionally converting it to UTC. """ - # so that we don't have to deal with some special characters in formats - text = text.strip().replace(", ", " ").replace(". ", " ").replace("(", "").replace(")", "") + # so that we don't have to deal with some special cases in formats + text = ( + text.strip() + .replace("th, ", " ") # November 10th, 2015 -> November 10, 2015 + .replace("st, ", " ") # March 31st, 2015 -> March 31, 2015 + .replace("Augu ", "August ") # 17 Augu 2023 -> 17 August 2023 - revert after st replacement + .replace("augu ", "August ") # 17 Augu 2023 -> 17 august 2023 - revert after st replacement + .replace("rd, ", " ") # March 3rd, 2015 -> March 3, 2015 + .replace(", ", " ") # November 10, 2015 -> November 10 2015 + .replace(". ", " ") # November 10. 2015 -> November 10 2015 + .replace("(", "") # (November 10 2015) -> November 10 2015) + .replace(")", "") # (November 10 2015) -> (November 10 2015 + .replace("*", "") # November 10 2015* -> November 10 2015 + ) for fmt in formats: try: dt = datetime.strptime(text, fmt) # NOQA: DTZ007, timezone is handled below