Support more special date formats (#304)

Add a few replacement to support dates containing th/st/rd or and ending *.
This commit is contained in:
Marc Wrobel
2024-02-13 22:10:08 +01:00
parent ba5a62b0e4
commit c94b4ed8bc

View File

@@ -40,8 +40,20 @@ def parse_datetime(text: str, formats: list[str] = frozenset([
"""Parse a given text representing a datetime using a list of formats,
optionally converting it to UTC.
"""
# so that we don't have to deal with some special characters in formats
text = text.strip().replace(", ", " ").replace(". ", " ").replace("(", "").replace(")", "")
# so that we don't have to deal with some special cases in formats
text = (
text.strip()
.replace("th, ", " ") # November 10th, 2015 -> November 10, 2015
.replace("st, ", " ") # March 31st, 2015 -> March 31, 2015
.replace("Augu ", "August ") # 17 Augu 2023 -> 17 August 2023 - revert after st replacement
.replace("augu ", "August ") # 17 Augu 2023 -> 17 august 2023 - revert after st replacement
.replace("rd, ", " ") # March 3rd, 2015 -> March 3, 2015
.replace(", ", " ") # November 10, 2015 -> November 10 2015
.replace(". ", " ") # November 10. 2015 -> November 10 2015
.replace("(", "") # (November 10 2015) -> November 10 2015)
.replace(")", "") # (November 10 2015) -> (November 10 2015
.replace("*", "") # November 10 2015* -> November 10 2015
)
for fmt in formats:
try:
dt = datetime.strptime(text, fmt) # NOQA: DTZ007, timezone is handled below