Files
endoflife-date-release-data/src/cos.py
Marc Wrobel f6a8349c46 Centralize GitHub Workflow groups declaration (#272)
It may not be the best place for that (gha.py would have been better), but it's the shorter / faster way to do it for now.

Moreover it now uses logging for writing the group. The logger format has been updated for this to work. This was done to fix issues on GitHub Action logs, where groups were declared after the logs.
2023-12-31 17:00:11 +01:00

46 lines
1.9 KiB
Python

import datetime
import re
from bs4 import BeautifulSoup
from common import dates, endoflife, http
MILESTONE_PATTERN = re.compile(r'COS \d+ LTS')
VERSION_PATTERN = re.compile(r"^(cos-\d+-\d+-\d+-\d+)")
def parse_date(date_text: str) -> datetime:
date_text = date_text.strip().replace('Date: ', '')
date_text = re.sub(r'Sep[a-zA-Z]+', 'Sep', date_text)
return dates.parse_date(date_text)
product = endoflife.Product("cos")
main = http.fetch_url("https://cloud.google.com/container-optimized-os/docs/release-notes/")
main_soup = BeautifulSoup(main.text, features="html5lib")
milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)]
milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones]
for milestone in http.fetch_urls(milestones_urls):
milestone_soup = BeautifulSoup(milestone.text, features="html5lib")
for article in milestone_soup.find_all('article', class_='devsite-article'):
for heading in article.find_all(['h2', 'h3']): # headings contains the date, which we parse
version_str = heading.get('data-text')
version_match = VERSION_PATTERN.match(version_str)
if not version_match:
continue
try: # 1st row is the header, so pick the first td in the 2nd row
date_str = heading.find_next('tr').find_next('tr').find_next('td').text
except AttributeError: # In some older releases, it is mentioned as Date: [Date]
date_str = heading.find_next('i').text
try:
date = parse_date(date_str)
except ValueError: # for some h3, the date is in the previous h2
date_str = heading.find_previous('h2').get('data-text')
date = parse_date(date_str)
product.declare_version(version_match.group(1), date)
product.write()