Apply various minor refactorings

Improve readability and fix a few Python warnings (line too long, exception too broad...) through various minor refactorings.
This commit is contained in:
Marc Wrobel
2023-05-20 12:45:14 +02:00
parent 70f20da616
commit 208ab8e2f8
19 changed files with 106 additions and 87 deletions

View File

@@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
DATE_FORMAT = '%b %d, %Y'
REGEX = r"^(cos-\d+-\d+-\d+-\d+)"
@@ -16,34 +17,35 @@ def fetch_all_milestones():
def fetch_milestone(channel):
url = "https://cloud.google.com/container-optimized-os/docs/release-notes/m{}".format(channel)
url = f"https://cloud.google.com/container-optimized-os/docs/release-notes/m{channel}"
# Retry as Google Docs often returns SSL errors.
response = endoflife.fetch_url(url, retry_count=10)
return BeautifulSoup(response, features="html5lib")
def parse_date(d):
# If the date begins with a >3 letter month name, trim it to just 3 letters
# Strip out the Date: section from the start
d = re.sub(r'(?:Date\: )?(\w{3})(?:\w{1,4})? (\d{1,2}), (\d{4})', r'\1 \2, \3', d)
return datetime.strptime(d, DATE_FORMAT).strftime('%Y-%m-%d')
def parse_soup_for_versions(soup):
"""Takes soup, and returns a dictionary of versions and their release dates
"""
versions = {}
for article in soup.find_all('article', class_='devsite-article'):
def parse_date(d):
# If the date begins with a >3 letter month name, trim it to just 3 letters
# Strip out the Date: section from the start
d = re.sub(r'(?:Date\: )?(\w{3})(?:\w{1,4})? (\d{1,2}), (\d{4})', r'\1 \2, \3', d)
return datetime.strptime(d, date_format).strftime('%Y-%m-%d')
# h2 contains the date, which we parse
for heading in article.find_all(['h2', 'h3']):
version = heading.get('data-text')
m = re.match(REGEX, version)
if m:
version = m.group(1)
date_format = '%b %d, %Y'
try:
# The first row is the header, so we pick the first td in the second row
# 1st row is the header, so pick the first td in the 2nd row
d = heading.find_next('tr').find_next('tr').find_next('td').text
except:
# In some older releases, it is mentioned as Date: [Date] in the text
except AttributeError:
# In some older releases, it is mentioned as Date: [Date]
d = heading.find_next('i').text
try:
date = parse_date(d)
@@ -51,7 +53,7 @@ def parse_soup_for_versions(soup):
d = heading.find_previous('h2').get('data-text')
date = parse_date(d)
versions[version] = date
print("%s: %s" % (version, date))
print(f"{version}: {date}")
return versions