diff --git a/src/apple.py b/src/apple.py index 8c0a5802..81e38916 100644 --- a/src/apple.py +++ b/src/apple.py @@ -1,8 +1,8 @@ -import json -import urllib.request import datetime -from bs4 import BeautifulSoup +import json import re +from bs4 import BeautifulSoup +from common import endoflife URLS = [ "https://support.apple.com/en-us/HT201222", # latest @@ -55,45 +55,45 @@ def parse_date(s): for url in URLS: - with urllib.request.urlopen(url, data=None, timeout=5) as response: - soup = BeautifulSoup(response, features="html5lib") - table = soup.find(id="tableWraper") - for tr in reversed(table.findAll("tr")[1:]): - td_list = tr.findAll("td") - version_text = td_list[0].get_text() - for key, regexes in CONFIG.items(): - for regex in regexes: - matches = re.findall(regex, version_text, re.MULTILINE) - if matches: - for version in matches: - abs_date = None - try: - print("== %s" % version_text.strip()) - abs_date = parse_date(td_list[2].get_text()) - print_date = abs_date.strftime("%Y-%m-%d") - # Only update the date if we are adding first time - # or if the date is lower - if version not in release_lists[key]: - release_lists[key][version] = abs_date - print("%s-%s: %s" % (key, version, print_date)) - elif release_lists[key][version] < abs_date: - print( - "%s-%s: %s [IGNORED]" - % (key, version, print_date) - ) - elif release_lists[key][version] > abs_date: - # This is a lower date, so we mark it with a bang - print( - "%s-%s: %s [UPDATED]" - % (key, version, print_date) - ) - release_lists[key][version] = abs_date - except ValueError as e: + response = endoflife.fetch_url(url) + soup = BeautifulSoup(response, features="html5lib") + table = soup.find(id="tableWraper") + for tr in reversed(table.findAll("tr")[1:]): + td_list = tr.findAll("td") + version_text = td_list[0].get_text() + for key, regexes in CONFIG.items(): + for regex in regexes: + matches = re.findall(regex, version_text, re.MULTILINE) + if matches: + for version in matches: + abs_date = None + try: + print("== %s" % version_text.strip()) + abs_date = parse_date(td_list[2].get_text()) + print_date = abs_date.strftime("%Y-%m-%d") + # Only update the date if we are adding first time + # or if the date is lower + if version not in release_lists[key]: + release_lists[key][version] = abs_date + print("%s-%s: %s" % (key, version, print_date)) + elif release_lists[key][version] < abs_date: print( - "%s-%s Failed to parse Date (%s)" - % (key, version, td_list[2].get_text()) + "%s-%s: %s [IGNORED]" + % (key, version, print_date) ) - next + elif release_lists[key][version] > abs_date: + # This is a lower date, so we mark it with a bang + print( + "%s-%s: %s [UPDATED]" + % (key, version, print_date) + ) + release_lists[key][version] = abs_date + except ValueError as e: + print( + "%s-%s Failed to parse Date (%s)" + % (key, version, td_list[2].get_text()) + ) + next for k in CONFIG.keys(): diff --git a/src/common/endoflife.py b/src/common/endoflife.py index f3afb708..93832372 100644 --- a/src/common/endoflife.py +++ b/src/common/endoflife.py @@ -1,10 +1,14 @@ import frontmatter +import urllib.request from glob import glob from os import path +# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent. +USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0' -def list_products(method, products_filter=None, pathname = "website/products"): + +def list_products(method, products_filter=None, pathname="website/products"): """Return a list of products that are using the same given update method. """ products_with_method = {} @@ -25,3 +29,21 @@ def list_products(method, products_filter=None, pathname = "website/products"): products_with_method[product_name] = configs return products_with_method + + +def fetch_url(url, retry_count=2, timeout=5, data=None, headers=None, encoding='utf-8'): + last_exception = None + + headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers + request = urllib.request.Request(url, headers=headers) + + for retry in range(0, retry_count): + try: + resp = urllib.request.urlopen(request, data=data, timeout=timeout) + return resp.read().decode(encoding) + except Exception as e: + last_exception = e + print(f"Fetch of {url} failed (retry={retry}), got: " + str(e)) + continue + + raise last_exception diff --git a/src/cos.py b/src/cos.py index 301474f0..cab5d0ad 100644 --- a/src/cos.py +++ b/src/cos.py @@ -1,46 +1,31 @@ -import urllib.request -from bs4 import BeautifulSoup -import re import json +import re +from bs4 import BeautifulSoup +from common import endoflife from datetime import datetime REGEX = r"^(cos-\d+-\d+-\d+-\d+)" + def fetch_all_milestones(): url = "https://cloud.google.com/container-optimized-os/docs/release-notes/" - # Google Docs website often returns SSL errors, retry the request in case of failures. - for i in range(0,10): - try: - with urllib.request.urlopen(url, data=None, timeout=5) as response: - soup = BeautifulSoup(response, features="html5lib") - break - except Exception as e: - print("Retrying Request, got error: " + str(e)) - continue - else: - raise Exception("Failed to fetch COS milestones") - + # Retry as Google Docs often returns SSL errors. + response = endoflife.fetch_url(url, retry_count=10) + soup = BeautifulSoup(response, features="html5lib") milestones = soup.find_all('td', text=re.compile(r'COS \d+ LTS')) return [m.text.split(' ')[1] for m in milestones] + def fetch_milestone(channel): url = "https://cloud.google.com/container-optimized-os/docs/release-notes/m{}".format(channel) - # Google Docs website often returns SSL errors, retry the request in case of failures. - for i in range(0,5): - try: - with urllib.request.urlopen(url, data=None, timeout=5) as response: - return BeautifulSoup(response, features="html5lib") - except Exception as e: - print("Retrying Request") - continue - raise Exception("Failed to fetch COS milestone {}".format(channel)) + # Retry as Google Docs often returns SSL errors. + response = endoflife.fetch_url(url, retry_count=10) + return BeautifulSoup(response, features="html5lib") -""" -Takes soup, and returns a dictionary of versions and their release dates -""" def parse_soup_for_versions(soup): - """ Parse the soup """ + """Takes soup, and returns a dictionary of versions and their release dates + """ versions = {} for article in soup.find_all('article', class_='devsite-article'): def parse_date(d): @@ -67,21 +52,23 @@ def parse_soup_for_versions(soup): d = heading.find_previous('h2').get('data-text') date = parse_date(d) versions[version] = date + print("%s: %s" % (version, date)) return versions + def get_all_versions(): all_versions = {} all_milestones = fetch_all_milestones() + print("::group::cos") for milestone in all_milestones: soup = fetch_milestone(milestone) - print("::group::COS - {}".format(milestone)) versions = parse_soup_for_versions(soup) all_versions |= versions - print("::endgroup::") - + print("::endgroup::") return all_versions + if __name__ == '__main__': v = get_all_versions() with open('releases/cos.json', "w") as f: diff --git a/src/distrowatch.py b/src/distrowatch.py index 669ff6fe..7c3f3cc9 100644 --- a/src/distrowatch.py +++ b/src/distrowatch.py @@ -1,10 +1,9 @@ +import json import re import sys -import json -import urllib.request from bs4 import BeautifulSoup -from liquid import Template from common import endoflife +from liquid import Template METHOD = 'distrowatch' DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb) @@ -28,15 +27,15 @@ def get_versions_from_headline(regex, headline, template): def fetch_releases(distrowatch_id, regex, template): releases = {} l_template = Template(template) - url = "https://distrowatch.com/index.php?distribution=%s" % distrowatch_id - with urllib.request.urlopen(url, data=None, timeout=5) as response: - soup = BeautifulSoup(response, features="html5lib") - for table in soup.select("td.News1>table.News"): - headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() - date = table.select_one("td.NewsDate").get_text() - for v in get_versions_from_headline(regex, headline, l_template): - print("%s: %s" % (v, date)) - releases[v] = date + url = f"https://distrowatch.com/index.php?distribution={distrowatch_id}" + response = endoflife.fetch_url(url) + soup = BeautifulSoup(response, features="html5lib") + for table in soup.select("td.News1>table.News"): + headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() + date = table.select_one("td.NewsDate").get_text() + for v in get_versions_from_headline(regex, headline, l_template): + print("%s: %s" % (v, date)) + releases[v] = date return releases diff --git a/src/eks.py b/src/eks.py index 60c2f31e..0b1b3f63 100644 --- a/src/eks.py +++ b/src/eks.py @@ -1,10 +1,10 @@ -import urllib.request import datetime +import json import markdown import re -import json -from datetime import datetime from bs4 import BeautifulSoup +from common import endoflife +from datetime import datetime URL = "https://raw.githubusercontent.com/awsdocs/amazon-eks-user-guide/master/doc_source/platform-versions.md" REGEX = r"^(?P\d+)\.(?P\d+)\.(?P\d+)$" @@ -12,20 +12,23 @@ REGEX = r"^(?P\d+)\.(?P\d+)\.(?P\d+)$" def parse_platforms_page(): all_versions = {} - with urllib.request.urlopen(URL, data=None, timeout=5) as contents: - html = markdown.markdown(contents.read().decode("utf-8"), extensions=["tables"]) - soup = BeautifulSoup(html, features="html5lib") - for tr in soup.findAll("tr"): - td = tr.find("td") - if td and re.match(REGEX, td.text): - data = tr.findAll("td") - date = data[-1].text - if len(date) > 0: - d = datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d") - k8s_version = ".".join(data[0].text.split(".")[:-1]) - eks_version = data[1].text.replace(".", "-") - version = "%s-%s" % (k8s_version, eks_version) - all_versions[version] = d + print("::group::eks") + response = endoflife.fetch_url(URL) + html = markdown.markdown(response, extensions=["tables"]) + soup = BeautifulSoup(html, features="html5lib") + for tr in soup.findAll("tr"): + td = tr.find("td") + if td and re.match(REGEX, td.text): + data = tr.findAll("td") + date = data[-1].text + if len(date) > 0: + d = datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d") + k8s_version = ".".join(data[0].text.split(".")[:-1]) + eks_version = data[1].text.replace(".", "-") + version = "%s-%s" % (k8s_version, eks_version) + all_versions[version] = d + print("%s: %s" % (version, d)) + print("::endgroup::") return all_versions diff --git a/src/firefox.py b/src/firefox.py index a3c59ddc..d4461f13 100644 --- a/src/firefox.py +++ b/src/firefox.py @@ -1,11 +1,11 @@ +import concurrent.futures import json -from typing import Tuple -from datetime import datetime import re import requests -import urllib.request from bs4 import BeautifulSoup -import concurrent.futures +from common import endoflife +from datetime import datetime +from typing import Tuple """Fetch Firefox versions with their dates from https://www.mozilla.org/en-US/firefox/releases/""" URL = "https://www.mozilla.org/en-US/firefox/releases/" @@ -107,22 +107,9 @@ def get_version_and_date(release_page: str, release_version: str) -> Tuple[str, raise UnsupportedReleasePageError("Unable to find version and date for %s" % release_page) def make_bs_request(url: str) -> BeautifulSoup: - """ Make a request to the given url and return a BeautifulSoup object """ - last_exception = None - headers = {"user-agent": "mozilla"} - # requests to www.mozilla.org often time out, retry in case of failures - for i in range(0, 5): - try: - req = urllib.request.Request(url, headers=headers) - with urllib.request.urlopen(req, timeout=5) as response: - return BeautifulSoup(response.read(), features="html5lib") - except TimeoutError as e: - last_exception = e - print(f"Request to {url} timed out, retrying ({i})...") - continue - - raise last_exception + response = endoflife.fetch_url(url, timeout=10, retry_count=5) + return BeautifulSoup(response, features="html5lib") def fetch_releases(): releases = {} diff --git a/src/gke.py b/src/gke.py index 2119eda7..841cf7d8 100644 --- a/src/gke.py +++ b/src/gke.py @@ -1,7 +1,7 @@ -import urllib.request -from bs4 import BeautifulSoup -import re import json +import re +from bs4 import BeautifulSoup +from common import endoflife from datetime import datetime # https://regex101.com/r/zPxBqT/1 @@ -9,8 +9,8 @@ REGEX = r"\d.\d+\.\d+-gke\.\d+" def fetch_channel(channel): url = "https://cloud.google.com/kubernetes-engine/docs/release-notes-{}".format(channel) - with urllib.request.urlopen(url, data=None, timeout=5) as response: - return BeautifulSoup(response, features="html5lib") + response = endoflife.fetch_url(url) + return BeautifulSoup(response, features="html5lib") """ Takes soup, and returns a dictionary of versions and their release dates diff --git a/src/haproxy.py b/src/haproxy.py index 4199024c..f82fa239 100644 --- a/src/haproxy.py +++ b/src/haproxy.py @@ -1,8 +1,7 @@ import json import re -import urllib.request - from bs4 import BeautifulSoup +from common import endoflife """Fetch HAProxy versions with their dates from https://www.haproxy.org/download/. """ @@ -16,16 +15,13 @@ VERSION_REGEX = r"^(\d{4})\/(\d{2})\/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$" def fetch_cycles(): cycles = [] - print("Fetching cycles") - with urllib.request.urlopen( - "https://www.haproxy.org/download/") as response: - soup = BeautifulSoup(response, features="html5lib") - for link in soup.select("a"): - m = re.match(CYCLE_REGEX, link.attrs["href"]) - if m: - cycle = m.groups()[0] - cycles.append(cycle) - print(f"Found {cycle}") + response = endoflife.fetch_url('https://www.haproxy.org/download/') + soup = BeautifulSoup(response, features="html5lib") + for link in soup.select("a"): + m = re.match(CYCLE_REGEX, link.attrs["href"]) + if m: + cycle = m.groups()[0] + cycles.append(cycle) # No changelog in https://www.haproxy.org/download/1.0/src cycles.remove("1.0") @@ -38,14 +34,13 @@ def fetch_releases(cycles): for cycle in cycles: url = f"https://www.haproxy.org/download/{cycle}/src/CHANGELOG" - print(f"Fetching version from {url}") - with urllib.request.urlopen(url) as response: - for line in response: - m = re.match(VERSION_REGEX, line.decode("utf-8")) - if m: - year, month, day, version = m.groups() - date = f"{year}-{month}-{day}" - releases[version] = date + response = endoflife.fetch_url(url) + for line in response.split('\n'): + m = re.match(VERSION_REGEX, line) + if m: + year, month, day, version = m.groups() + date = f"{year}-{month}-{day}" + releases[version] = date return releases diff --git a/src/linuxkernel.py b/src/linuxkernel.py index a82de136..9dd1b8ad 100644 --- a/src/linuxkernel.py +++ b/src/linuxkernel.py @@ -1,9 +1,8 @@ -import re import json -import urllib.request - -from datetime import datetime, timezone +import re from bs4 import BeautifulSoup +from common import endoflife +from datetime import datetime, timezone """Fetch Linux Kernel versions with their dates from https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags. @@ -25,9 +24,8 @@ def parse_date(d): def make_bs_request(url): - req = urllib.request.Request(url) - with urllib.request.urlopen(req, timeout=5) as response: - return BeautifulSoup(response.read(), features="html5lib") + response = endoflife.fetch_url(url) + return BeautifulSoup(response, features="html5lib") def fetch_releases(): diff --git a/src/maven.py b/src/maven.py index 238ae07b..1c42b27d 100644 --- a/src/maven.py +++ b/src/maven.py @@ -1,8 +1,7 @@ -import sys -import json -import urllib.request import datetime +import json import re +import sys from common import endoflife METHOD = "maven" @@ -19,20 +18,8 @@ def valid_version(version): def fetch_json(group_id, artifact_id, start): url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=100&wt=json&start={start}" - last_exception = None - - # search.maven.org often time out lately, retry the request in case of failures. - for i in range(0, 5): - try: - with urllib.request.urlopen(url, data=None, timeout=5) as response: - return json.load(response) - except Exception as e: - last_exception = e - message = getattr(e, 'message', repr(e)) # https://stackoverflow.com/a/45532289/374236 - print(f"Error while requesting {url} ({message}), retrying ({i})...") - continue - - raise last_exception + response = endoflife.fetch_url(url, retry_count=5) + return json.loads(response) def fetch_releases(package_identifier): diff --git a/src/npm.py b/src/npm.py index 9befb616..79ae13fc 100644 --- a/src/npm.py +++ b/src/npm.py @@ -1,7 +1,6 @@ +import json import re import sys -import json -import urllib.request from common import endoflife METHOD = "npm" @@ -18,18 +17,18 @@ def fetch_releases(npm_id, regex): regex = [regex] url = f"https://registry.npmjs.org/{npm_id}" - with urllib.request.urlopen(url, data=None, timeout=5) as response: - data = json.loads(response.read().decode("utf-8")) - for version in data["time"]: - matches = False - for r in regex: - if re.match(r, version): - matches = True + response = endoflife.fetch_url(url) + data = json.loads(response) + for version in data["time"]: + matches = False + for r in regex: + if re.match(r, version): + matches = True - release_datetime = data["time"][version] - if matches and release_datetime: - releases[version] = release_datetime.split("T")[0] - print(f"{version}: {releases[version]}") + release_datetime = data["time"][version] + if matches and release_datetime: + releases[version] = release_datetime.split("T")[0] + print(f"{version}: {releases[version]}") return releases diff --git a/src/palo-alto-networks.py b/src/palo-alto-networks.py index c03b4644..60c7737c 100644 --- a/src/palo-alto-networks.py +++ b/src/palo-alto-networks.py @@ -1,8 +1,8 @@ -import json -import urllib.request import datetime +import json import re from bs4 import BeautifulSoup +from common import endoflife URL = "https://www.paloaltonetworks.com/services/support/end-of-life-announcements/end-of-life-summary" @@ -15,33 +15,37 @@ ID_MAPPING = { def update_releases(html_identifier, file): versions = {} - with urllib.request.urlopen(URL, data=None, timeout=5) as response: - soup = BeautifulSoup(response, features="html5lib") - table = soup.find(id=html_identifier) - for tr in table.findAll("tr")[3:]: - td_list = tr.findAll("td") - version = ( - td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "") - ) - if file == "pan-xdr": - if "xdr" not in version: - continue - version = version.removesuffix("-(cortex-xdr-agent)") - version = version.removesuffix("-(vm-series-only)") - version = version.removesuffix("-(panorama-only)") - if len(td_list) > 1 and version != "": - # Date formats differ between different products - try: - month, date, year = td_list[1].get_text().split("/") - abs_date = f"{year}-{month:0>2}-{date:0>2}" - except Exception: - # A few dates have 1st, 2nd, 4th etc. Fix that: - d = td_list[1].get_text() - d = re.sub(r'(\w+) (\d{1,2})(?:\w{2}), (\d{4})', r'\1 \2, \3', d) - date = datetime.datetime.strptime(d, "%B %d, %Y") - abs_date = date.strftime("%Y-%m-%d") - versions[version] = abs_date + print(f"::group::{html_identifier}") + response = endoflife.fetch_url(URL) + soup = BeautifulSoup(response, features="html5lib") + table = soup.find(id=html_identifier) + for tr in table.findAll("tr")[3:]: + td_list = tr.findAll("td") + version = ( + td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "") + ) + if file == "pan-xdr": + if "xdr" not in version: + continue + version = version.removesuffix("-(cortex-xdr-agent)") + version = version.removesuffix("-(vm-series-only)") + version = version.removesuffix("-(panorama-only)") + if len(td_list) > 1 and version != "": + # Date formats differ between different products + try: + month, date, year = td_list[1].get_text().split("/") + abs_date = f"{year}-{month:0>2}-{date:0>2}" + except Exception: + # A few dates have 1st, 2nd, 4th etc. Fix that: + d = td_list[1].get_text() + d = re.sub(r'(\w+) (\d{1,2})(?:\w{2}), (\d{4})', r'\1 \2, \3', d) + date = datetime.datetime.strptime(d, "%B %d, %Y") + abs_date = date.strftime("%Y-%m-%d") + + versions[version] = abs_date + print("%s: %s" % (version, abs_date)) + print("::endgroup::") with open("releases/%s.json" % file, "w") as f: f.write(json.dumps(versions, indent=2)) diff --git a/src/php.py b/src/php.py index a16ff33e..4eb56f85 100644 --- a/src/php.py +++ b/src/php.py @@ -1,6 +1,6 @@ -import urllib.request import datetime import json +from common import endoflife PHP_MAJOR_VERSIONS = [4, 5, 7, 8] @@ -17,13 +17,13 @@ def parse_date(date_str): def fetch_versions(major_version): url = f"https://www.php.net/releases/index.php?json&max=-1&version={major_version}" - with urllib.request.urlopen(url, data=None, timeout=5) as response: - data = json.loads(response.read()) - for v in data: - data[v] = parse_date(data[v]["date"]) - print(f"{v}: {data[v]}") + response = endoflife.fetch_url(url) + data = json.loads(response) + for v in data: + data[v] = parse_date(data[v]["date"]) + print(f"{v}: {data[v]}") - return data + return data with open("releases/php.json", "w") as f: diff --git a/src/plesk.py b/src/plesk.py index a4bbd1d8..8d6e099f 100644 --- a/src/plesk.py +++ b/src/plesk.py @@ -1,16 +1,15 @@ import json -from datetime import datetime -import urllib.request from bs4 import BeautifulSoup +from common import endoflife +from datetime import datetime URL = "https://docs.plesk.com/release-notes/obsidian/change-log" PRODUCT = "plesk" def make_bs_request(url): - req = urllib.request.Request(url) - with urllib.request.urlopen(req, timeout=5) as response: - return BeautifulSoup(response.read(), features="html5lib") + response = endoflife.fetch_url(url) + return BeautifulSoup(response, features="html5lib") # Only 18.0.20.3 and later will be picked up : diff --git a/src/pypi.py b/src/pypi.py index 0d0f8e0a..c14f8acf 100644 --- a/src/pypi.py +++ b/src/pypi.py @@ -1,9 +1,8 @@ +import json import re import sys -import json -import urllib.request -from datetime import datetime from common import endoflife +from datetime import datetime METHOD = "pypi" DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb) @@ -19,18 +18,18 @@ def fetch_releases(pypi_id, regex): regex = [regex] url = "https://pypi.org/pypi/%s/json" % pypi_id - with urllib.request.urlopen(url, data=None, timeout=5) as response: - data = json.loads(response.read().decode("utf-8")) - for version in data["releases"]: - R = data["releases"][version] - matches = False - for r in regex: - if re.match(r, version): - matches = True - if matches and R: - d = datetime.fromisoformat(R[0]["upload_time"]).strftime("%Y-%m-%d") - releases[version] = d - print("%s: %s" % (version, d)) + response = endoflife.fetch_url(url) + data = json.loads(response) + for version in data["releases"]: + R = data["releases"][version] + matches = False + for r in regex: + if re.match(r, version): + matches = True + if matches and R: + d = datetime.fromisoformat(R[0]["upload_time"]).strftime("%Y-%m-%d") + releases[version] = d + print("%s: %s" % (version, d)) return releases diff --git a/src/rds.py b/src/rds.py index 0e179c78..e8e17d91 100644 --- a/src/rds.py +++ b/src/rds.py @@ -1,8 +1,8 @@ -import re -import urllib.request -from bs4 import BeautifulSoup -from datetime import datetime import json +import re +from bs4 import BeautifulSoup +from common import endoflife +from datetime import datetime dbs = { "mysql": "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MySQL.Concepts.VersionMgmt.html", @@ -18,24 +18,23 @@ for db, url in dbs.items(): print(f"::group::{db}") releases = {} - with urllib.request.urlopen(url, data=None, timeout=5) as contents: - html = contents.read().decode("utf-8") - soup = BeautifulSoup(html, features="html5lib") + response = endoflife.fetch_url(url) + soup = BeautifulSoup(response, features="html5lib") - for table in soup.find_all("table"): - for row in table.find_all("tr"): - columns = row.find_all("td") + for table in soup.find_all("table"): + for row in table.find_all("tr"): + columns = row.find_all("td") - # Must match both the 'Supported XXX minor versions' and - # 'Supported XXX major versions' to have correct release dates - if len(columns) > 3: - r = r"(?P\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1 - m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE) - if m: - version = m.group("v") - date = parse_date(columns[2].text.strip()) - print(f"{version} : {date}") - releases[version] = date + # Must match both the 'Supported XXX minor versions' and + # 'Supported XXX major versions' to have correct release dates + if len(columns) > 3: + r = r"(?P\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1 + m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE) + if m: + version = m.group("v") + date = parse_date(columns[2].text.strip()) + print(f"{version} : {date}") + releases[version] = date print("::endgroup::") with open(f"releases/amazon-rds-{db.lower()}.json", "w") as f: diff --git a/src/rhel.py b/src/rhel.py index 8ce16dc5..150000fe 100644 --- a/src/rhel.py +++ b/src/rhel.py @@ -1,29 +1,31 @@ import json -import urllib.request -from bs4 import BeautifulSoup import re +from bs4 import BeautifulSoup +from common import endoflife URL = "https://access.redhat.com/articles/3078" # https://regex101.com/r/877ibq/1 regex = r"RHEL (?P\d)(\. ?(?P\d+))?(( Update (?P\d))| GA)?" versions = {} -headers = {"user-agent": "mozilla"} -req = urllib.request.Request(URL, headers=headers) -with urllib.request.urlopen(req, timeout=5) as response: - soup = BeautifulSoup(response, features="html5lib") - for tr in soup.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) > 0: - version = td_list[0].get_text() - m = re.match(regex, version.strip()).groupdict() - version = m["major"] - if m["minor"]: - version += ".%s" % m["minor"] - if m["minor2"]: - version += ".%s" % m["minor2"] - versions[version] = td_list[1].get_text() +print("::group::rhel") +response = endoflife.fetch_url(URL) +soup = BeautifulSoup(response, features="html5lib") +for tr in soup.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) > 0: + version = td_list[0].get_text() + m = re.match(regex, version.strip()).groupdict() + version = m["major"] + if m["minor"]: + version += ".%s" % m["minor"] + if m["minor2"]: + version += ".%s" % m["minor2"] + date = td_list[1].get_text() + versions[version] = date + print("%s: %s" % (version, date)) +print("::endgroup::") with open("releases/redhat.json", "w") as f: f.write(json.dumps(versions, indent=2)) diff --git a/src/ros.py b/src/ros.py index 21088d5c..b7feebba 100644 --- a/src/ros.py +++ b/src/ros.py @@ -1,8 +1,8 @@ -import json -import urllib.request import datetime -from bs4 import BeautifulSoup +import json import re +from bs4 import BeautifulSoup +from common import endoflife URL = "https://wiki.ros.org/Distributions" # https://regex101.com/r/c1ribd/1 @@ -10,27 +10,29 @@ regex = r"^ROS (?P(\w| )+)" versions = {} -with urllib.request.urlopen(URL, timeout=5) as response: - soup = BeautifulSoup(response, features="html5lib") - for tr in soup.findAll("tr"): - td_list = tr.findAll("td") - if len(td_list) > 0: - version = td_list[0].get_text() +print("::group::ros") +response = endoflife.fetch_url(URL) +soup = BeautifulSoup(response, features="html5lib") +for tr in soup.findAll("tr"): + td_list = tr.findAll("td") + if len(td_list) > 0: + version = td_list[0].get_text() - m = re.match(regex, version.strip()) - if m: - version = td_list[0].findAll("a")[0]["href"][1:] - try: - date = datetime.datetime.strptime( - td_list[1].get_text().strip(), "%B %d, %Y" - ) - # The date is a suffix (May 23rd, 2020) - except Exception as e: - x = td_list[1].get_text().split(",") - date = datetime.datetime.strptime(x[0][:-2] + x[1], "%B %d %Y") - abs_date = date.strftime("%Y-%m-%d") - versions[version] = abs_date - print("%s: %s" % (version, abs_date)) + m = re.match(regex, version.strip()) + if m: + version = td_list[0].findAll("a")[0]["href"][1:] + try: + date = datetime.datetime.strptime( + td_list[1].get_text().strip(), "%B %d, %Y" + ) + # The date is a suffix (May 23rd, 2020) + except Exception as e: + x = td_list[1].get_text().split(",") + date = datetime.datetime.strptime(x[0][:-2] + x[1], "%B %d %Y") + abs_date = date.strftime("%Y-%m-%d") + versions[version] = abs_date + print("%s: %s" % (version, abs_date)) +print("::endgroup::") with open("releases/ros.json", "w") as f: f.write(json.dumps(versions, indent=2)) diff --git a/src/unrealircd.py b/src/unrealircd.py index f23e5272..0a3d6da3 100644 --- a/src/unrealircd.py +++ b/src/unrealircd.py @@ -1,24 +1,26 @@ -import mwparserfromhell import json +import mwparserfromhell import re -import urllib.request +from common import endoflife URL = "https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw" REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$" -versions = {} -with urllib.request.urlopen(URL) as response: - text = response.read() - wikicode = mwparserfromhell.parse(text) - for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): - items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") - if len(items) >= 2: - maybe_version = items[0].__strip__() - if re.match(REGEX, maybe_version): - maybe_date = items[1].__strip__() - if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date): - versions[maybe_version] = maybe_date +print("::group::unrealircd") +response = endoflife.fetch_url(URL) +wikicode = mwparserfromhell.parse(response) +versions = {} +for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"): + items = tr.contents.filter_tags(matches=lambda node: node.tag == "td") + if len(items) >= 2: + maybe_version = items[0].__strip__() + if re.match(REGEX, maybe_version): + maybe_date = items[1].__strip__() + if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date): + versions[maybe_version] = maybe_date + print("%s: %s" % (maybe_version, maybe_date)) +print("::endgroup::") with open("releases/unrealircd.json", "w") as f: f.write(json.dumps(versions, indent=2))