import re from bs4 import BeautifulSoup from common import endoflife from datetime import datetime # https://regex101.com/r/zPxBqT/1 REGEX = r"\d.\d+\.\d+-gke\.\d+" CHANNELS = ['nochannel', 'stable', 'regular', 'rapid'] def fetch_channel(channel): url = f"https://cloud.google.com/kubernetes-engine/docs/release-notes-{channel}" response = endoflife.fetch_url(url) return BeautifulSoup(response, features="html5lib") def parse_soup_for_versions(soup): """Takes soup, and returns a dictionary of versions and their release dates """ versions = {} for section in soup.find_all('section', class_='releases'): # h2 contains the date, which we parse for h2 in section.find_all('h2'): date = h2.get('data-text') date = datetime.strptime(date, '%B %d, %Y').strftime('%Y-%m-%d') # The div next to the h2 contains the notes about changes made # on that date next_div = h2.find_next('div') # New releases are noted in a nested list, so we look for that # and parse it using the version regex for li in next_div.find_all('li'): # If the