From f0348a60c78c46a824d1522a947e04ca5d74aba9 Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 8 Jun 2023 16:02:11 +0530 Subject: [PATCH] [eks] Switch to HTML website The GitHub source is getting retired and will be archived soon, with no further updates. This switches to the AWS docs website instead, parsing the HTML directly. https://aws.amazon.com/blogs/aws/retiring-the-aws-documentation-on-github/ --- src/eks.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/eks.py b/src/eks.py index 9dd5e7c0..bebecd38 100644 --- a/src/eks.py +++ b/src/eks.py @@ -1,11 +1,10 @@ import datetime -import markdown import re from bs4 import BeautifulSoup from common import endoflife from datetime import datetime -URL = "https://raw.githubusercontent.com/awsdocs/amazon-eks-user-guide/master/doc_source/platform-versions.md" +URL = "https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html" REGEX = r"^(?P\d+)\.(?P\d+)\.(?P\d+)$" @@ -13,17 +12,16 @@ def parse_platforms_page(): all_versions = {} print("::group::eks") response = endoflife.fetch_url(URL) - html = markdown.markdown(response, extensions=["tables"]) - soup = BeautifulSoup(html, features="html5lib") - for tr in soup.findAll("tr"): + soup = BeautifulSoup(response, features="html5lib") + for tr in soup.select("#main-col-body")[0].findAll("tr"): td = tr.find("td") - if td and re.match(REGEX, td.text): + if td and re.match(REGEX, td.text.strip()): data = tr.findAll("td") - date = data[-1].text + date = data[-1].text.strip() if len(date) > 0: d = datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d") - k8s_version = ".".join(data[0].text.split(".")[:-1]) - eks_version = data[1].text.replace(".", "-") + k8s_version = ".".join(data[0].text.strip().split(".")[:-1]) + eks_version = data[1].text.strip().replace(".", "-") version = f"{k8s_version}-{eks_version}" all_versions[version] = d print(f"{version}: {d}")