This way the writing of the JSON file is handled automatically if the update does not fail. It pave the way to further global improvements, such as a better error handling.
29 lines
1.4 KiB
Python
29 lines
1.4 KiB
Python
import re
|
|
|
|
from bs4 import BeautifulSoup
|
|
from common import dates, http, releasedata
|
|
|
|
# https://regex101.com/r/zPxBqT/1
|
|
VERSION_PATTERN = re.compile(r"\d.\d+\.\d+-gke\.\d+")
|
|
URL_BY_PRODUCT = {
|
|
"gke": "https://cloud.google.com/kubernetes-engine/docs/release-notes-nochannel",
|
|
"gke-stable": "https://cloud.google.com/kubernetes-engine/docs/release-notes-stable",
|
|
"gke-regular": "https://cloud.google.com/kubernetes-engine/docs/release-notes-regular",
|
|
"gke-rapid": "https://cloud.google.com/kubernetes-engine/docs/release-notes-rapid",
|
|
}
|
|
|
|
for product_name, url in URL_BY_PRODUCT.items():
|
|
with releasedata.ProductData(product_name) as product_data:
|
|
relnotes = http.fetch_url(url)
|
|
relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib")
|
|
|
|
for section in relnotes_soup.find_all('section', class_='releases'):
|
|
for h2 in section.find_all('h2'): # h2 contains the date
|
|
date = dates.parse_date(h2.get('data-text'))
|
|
|
|
next_div = h2.find_next('div') # The div next to the h2 contains the notes about changes made on that date
|
|
for li in next_div.find_all('li'):
|
|
if "versions are now available" in li.text:
|
|
for version in VERSION_PATTERN.findall(li.find('ul').text):
|
|
product_data.declare_version(version, date)
|