diff --git a/src/gke.py b/src/gke.py new file mode 100644 index 00000000..2119eda7 --- /dev/null +++ b/src/gke.py @@ -0,0 +1,57 @@ +import urllib.request +from bs4 import BeautifulSoup +import re +import json +from datetime import datetime + +# https://regex101.com/r/zPxBqT/1 +REGEX = r"\d.\d+\.\d+-gke\.\d+" + +def fetch_channel(channel): + url = "https://cloud.google.com/kubernetes-engine/docs/release-notes-{}".format(channel) + with urllib.request.urlopen(url, data=None, timeout=5) as response: + return BeautifulSoup(response, features="html5lib") + +""" +Takes soup, and returns a dictionary of versions and their release dates +""" +def parse_soup_for_versions(soup): + """ Parse the soup """ + versions = {} + for section in soup.find_all('section', class_='releases'): + # h2 contains the date, which we parse + for h2 in section.find_all('h2'): + date = h2.get('data-text') + date = datetime.strptime(date, '%B %d, %Y').strftime('%Y-%m-%d') + # The div next to the h2 contains the notes about changes made on that date + next_div = h2.find_next('div') + # New releases are noted in a nested list, so we look for that + # and parse it using the version regex + for li in next_div.find_all('li'): + # If the
  • text contains with "versions are now available:", get the