[cos] Refactor script (#219)
Make the script more readable, mostly by: - using the endoflife.Product class, - removing the use of functions when unnecessary, - a little bit of renaming and documentation.
This commit is contained in:
80
src/cos.py
80
src/cos.py
@@ -4,62 +4,44 @@ from common import http
|
||||
from common import dates
|
||||
from common import endoflife
|
||||
|
||||
URL = "https://cloud.google.com/container-optimized-os/docs/release-notes/"
|
||||
REGEX = r"^(cos-\d+-\d+-\d+-\d+)"
|
||||
MILESTONE_PATTERN = re.compile(r'COS \d+ LTS')
|
||||
VERSION_PATTERN = re.compile(r"^(cos-\d+-\d+-\d+-\d+)")
|
||||
|
||||
|
||||
def list_milestones():
|
||||
response = http.fetch_url(URL)
|
||||
soup = BeautifulSoup(response.text, features="html5lib")
|
||||
milestones = soup.find_all('td', string=re.compile(r'COS \d+ LTS'))
|
||||
return [m.text.split(' ')[1] for m in milestones]
|
||||
def parse_date(date_text):
|
||||
date_text = date_text.strip().replace('Date: ', '')
|
||||
date_text = re.sub(r'Sep[a-zA-Z]+', 'Sep', date_text)
|
||||
return dates.parse_date(date_text)
|
||||
|
||||
|
||||
def fetch_milestones(milestones):
|
||||
urls = [f"{URL}m{channel}" for channel in milestones]
|
||||
return http.fetch_urls(urls)
|
||||
product = endoflife.Product("cos")
|
||||
print(f"::group::{product.name}")
|
||||
main = http.fetch_url("https://cloud.google.com/container-optimized-os/docs/release-notes/")
|
||||
main_soup = BeautifulSoup(main.text, features="html5lib")
|
||||
milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)]
|
||||
|
||||
milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones]
|
||||
for milestone in http.fetch_urls(milestones_urls):
|
||||
milestone_soup = BeautifulSoup(milestone.text, features="html5lib")
|
||||
for article in milestone_soup.find_all('article', class_='devsite-article'):
|
||||
for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse
|
||||
version_str = heading.get('data-text')
|
||||
version_match = VERSION_PATTERN.match(version_str)
|
||||
if not version_match:
|
||||
continue
|
||||
|
||||
def parse_date(date_str):
|
||||
date_str = date_str.strip().replace('Date: ', '')
|
||||
date_str = re.sub(r'Sep[a-zA-Z]+', 'Sep', date_str)
|
||||
return dates.parse_date(date_str).strftime('%Y-%m-%d')
|
||||
try: # 1st row is the header, so pick the first td in the 2nd row
|
||||
date_str = heading.find_next('tr').find_next('tr').find_next('td').text
|
||||
except AttributeError: # In some older releases, it is mentioned as Date: [Date]
|
||||
date_str = heading.find_next('i').text
|
||||
|
||||
try:
|
||||
date = parse_date(date_str)
|
||||
except ValueError: # for some h3, the date is in the previous h2
|
||||
date_str = heading.find_previous('h2').get('data-text')
|
||||
date = parse_date(date_str)
|
||||
|
||||
def find_versions(text):
|
||||
"""Takes soup, and returns a dictionary of versions and their release dates
|
||||
"""
|
||||
versions = {}
|
||||
soup = BeautifulSoup(text, features="html5lib")
|
||||
for article in soup.find_all('article', class_='devsite-article'):
|
||||
# h2 contains the date, which we parse
|
||||
for heading in article.find_all(['h2', 'h3']):
|
||||
version = heading.get('data-text')
|
||||
m = re.match(REGEX, version)
|
||||
if m:
|
||||
version = m.group(1)
|
||||
try:
|
||||
# 1st row is the header, so pick the first td in the 2nd row
|
||||
d = heading.find_next('tr').find_next('tr').find_next('td').text
|
||||
except AttributeError:
|
||||
# In some older releases, it is mentioned as Date: [Date]
|
||||
d = heading.find_next('i').text
|
||||
try:
|
||||
date = parse_date(d)
|
||||
except ValueError:
|
||||
d = heading.find_previous('h2').get('data-text')
|
||||
date = parse_date(d)
|
||||
versions[version] = date
|
||||
print(f"{version}: {date}")
|
||||
product.declare_version(version_match.group(1), date)
|
||||
|
||||
return versions
|
||||
|
||||
|
||||
print("::group::cos")
|
||||
versions = {}
|
||||
|
||||
for response in fetch_milestones(list_milestones()):
|
||||
versions |= find_versions(response.text)
|
||||
|
||||
endoflife.write_releases('cos', versions)
|
||||
product.write()
|
||||
print("::endgroup::")
|
||||
|
||||
Reference in New Issue
Block a user