From 1e7d583a9a73ffad4520aa9f0ffcd4321d6c77a3 Mon Sep 17 00:00:00 2001 From: Nemo Date: Tue, 13 Dec 2022 12:39:03 +0530 Subject: [PATCH] [gke] Adds GKE automation This avoids using the GKE APIs, much cleaner as a result. The downside is that we need to keep track of all channels separately. This also tracks the date a version is made available on a channel, not the date it is made the default. This is being slightly optimistic, but I think works well --- src/gke.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/gke.py diff --git a/src/gke.py b/src/gke.py new file mode 100644 index 00000000..2119eda7 --- /dev/null +++ b/src/gke.py @@ -0,0 +1,57 @@ +import urllib.request +from bs4 import BeautifulSoup +import re +import json +from datetime import datetime + +# https://regex101.com/r/zPxBqT/1 +REGEX = r"\d.\d+\.\d+-gke\.\d+" + +def fetch_channel(channel): + url = "https://cloud.google.com/kubernetes-engine/docs/release-notes-{}".format(channel) + with urllib.request.urlopen(url, data=None, timeout=5) as response: + return BeautifulSoup(response, features="html5lib") + +""" +Takes soup, and returns a dictionary of versions and their release dates +""" +def parse_soup_for_versions(soup): + """ Parse the soup """ + versions = {} + for section in soup.find_all('section', class_='releases'): + # h2 contains the date, which we parse + for h2 in section.find_all('h2'): + date = h2.get('data-text') + date = datetime.strptime(date, '%B %d, %Y').strftime('%Y-%m-%d') + # The div next to the h2 contains the notes about changes made on that date + next_div = h2.find_next('div') + # New releases are noted in a nested list, so we look for that + # and parse it using the version regex + for li in next_div.find_all('li'): + # If the
  • text contains with "versions are now available:", get the