[cos] Refactor script (#219)

Make the script more readable, mostly by: - using the endoflife.Product class, - removing the use of functions when unnecessary, - a little bit of renaming and documentation.
2023-12-10 16:54:44 +01:00
parent 2c076ff5f1
commit c20b65124d
1 changed files with 31 additions and 49 deletions
--- a/src/cos.py
+++ b/src/cos.py
@@ -4,62 +4,44 @@ from common import http
 from common import dates
 from common import endoflife

-URL = "https://cloud.google.com/container-optimized-os/docs/release-notes/"
-REGEX = r"^(cos-\d+-\d+-\d+-\d+)"
+MILESTONE_PATTERN = re.compile(r'COS \d+ LTS')
+VERSION_PATTERN = re.compile(r"^(cos-\d+-\d+-\d+-\d+)")


-def list_milestones():
-    response = http.fetch_url(URL)
-    soup = BeautifulSoup(response.text, features="html5lib")
-    milestones = soup.find_all('td', string=re.compile(r'COS \d+ LTS'))
-    return [m.text.split(' ')[1] for m in milestones]
+def parse_date(date_text):
+    date_text = date_text.strip().replace('Date: ', '')
+    date_text = re.sub(r'Sep[a-zA-Z]+', 'Sep', date_text)
+    return dates.parse_date(date_text)


-def fetch_milestones(milestones):
-    urls = [f"{URL}m{channel}" for channel in milestones]
-    return http.fetch_urls(urls)
+product = endoflife.Product("cos")
+print(f"::group::{product.name}")
+main = http.fetch_url("https://cloud.google.com/container-optimized-os/docs/release-notes/")
+main_soup = BeautifulSoup(main.text, features="html5lib")
+milestones = [cell.text.split(' ')[1] for cell in main_soup.find_all('td', string=MILESTONE_PATTERN)]

+milestones_urls = [f"{main.url}m{milestone}" for milestone in milestones]
+for milestone in http.fetch_urls(milestones_urls):
+    milestone_soup = BeautifulSoup(milestone.text, features="html5lib")
+    for article in milestone_soup.find_all('article', class_='devsite-article'):
+        for heading in article.find_all(['h2', 'h3']):  # headings contains the date, which we parse
+            version_str = heading.get('data-text')
+            version_match = VERSION_PATTERN.match(version_str)
+            if not version_match:
+                continue

-def parse_date(date_str):
-    date_str = date_str.strip().replace('Date: ', '')
-    date_str = re.sub(r'Sep[a-zA-Z]+', 'Sep', date_str)
-    return dates.parse_date(date_str).strftime('%Y-%m-%d')
+            try:  # 1st row is the header, so pick the first td in the 2nd row
+                date_str = heading.find_next('tr').find_next('tr').find_next('td').text
+            except AttributeError:  # In some older releases, it is mentioned as Date: [Date]
+                date_str = heading.find_next('i').text

+            try:
+                date = parse_date(date_str)
+            except ValueError:  # for some h3, the date is in the previous h2
+                date_str = heading.find_previous('h2').get('data-text')
+                date = parse_date(date_str)

-def find_versions(text):
-    """Takes soup, and returns a dictionary of versions and their release dates
-    """
-    versions = {}
-    soup = BeautifulSoup(text, features="html5lib")
-    for article in soup.find_all('article', class_='devsite-article'):
-        # h2 contains the date, which we parse
-        for heading in article.find_all(['h2', 'h3']):
-            version = heading.get('data-text')
-            m = re.match(REGEX, version)
-            if m:
-                version = m.group(1)
-                try:
-                    # 1st row is the header, so pick the first td in the 2nd row
-                    d = heading.find_next('tr').find_next('tr').find_next('td').text
-                except AttributeError:
-                    # In some older releases, it is mentioned as Date: [Date]
-                    d = heading.find_next('i').text
-                try:
-                    date = parse_date(d)
-                except ValueError:
-                    d = heading.find_previous('h2').get('data-text')
-                    date = parse_date(d)
-                versions[version] = date
-                print(f"{version}: {date}")
+            product.declare_version(version_match.group(1), date)

-    return versions
-
-
-print("::group::cos")
-versions = {}
-
-for response in fetch_milestones(list_milestones()):
-    versions |= find_versions(response.text)
-
-endoflife.write_releases('cos', versions)
+product.write()
 print("::endgroup::")