import mwparserfromhell import json import re import urllib.request URL = "https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw" REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$" list = {} with urllib.request.urlopen(URL) as response: text = response.read() wikicode = mwparserfromhell.parse(text) for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") if len(items) >= 2: maybe_version = items[0].__strip__() if re.match(REGEX, maybe_version): maybe_date = items[1].__strip__() if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date): list[maybe_version] = maybe_date with open("releases/unrealircd.json", "w") as f: f.write(json.dumps(list, indent=2))