Apply various minor refactorings

Improve readability and fix a few Python warnings (line too long, exception too broad...) through various minor refactorings.
This commit is contained in:
Marc Wrobel
2023-05-20 12:45:14 +02:00
parent 70f20da616
commit 208ab8e2f8
19 changed files with 106 additions and 87 deletions

View File

@@ -16,9 +16,9 @@ URLS = [
"https://support.apple.com/kb/HT1263", # 2005-2007
]
# If you are changing these, please
# use https://gist.githubusercontent.com/captn3m0/e7cb1f4fc3c07a5da0296ebda2b33e15/raw/5747e42ad611ec9ffdb7a2d1c0e3946bb87ab6d7/apple.txt as your corpus
# to validate your changes
# If you are changing these, please use
# https://gist.githubusercontent.com/captn3m0/e7cb1f4fc3c07a5da0296ebda2b33e15/raw/5747e42ad611ec9ffdb7a2d1c0e3946bb87ab6d7/apple.txt
# as your corpus to validate your changes
CONFIG = {
"macos": [
# This covers Sierra and beyond

View File

@@ -3,6 +3,7 @@ from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
DATE_FORMAT = '%b %d, %Y'
REGEX = r"^(cos-\d+-\d+-\d+-\d+)"
@@ -16,34 +17,35 @@ def fetch_all_milestones():
def fetch_milestone(channel):
url = "https://cloud.google.com/container-optimized-os/docs/release-notes/m{}".format(channel)
url = f"https://cloud.google.com/container-optimized-os/docs/release-notes/m{channel}"
# Retry as Google Docs often returns SSL errors.
response = endoflife.fetch_url(url, retry_count=10)
return BeautifulSoup(response, features="html5lib")
def parse_date(d):
# If the date begins with a >3 letter month name, trim it to just 3 letters
# Strip out the Date: section from the start
d = re.sub(r'(?:Date\: )?(\w{3})(?:\w{1,4})? (\d{1,2}), (\d{4})', r'\1 \2, \3', d)
return datetime.strptime(d, DATE_FORMAT).strftime('%Y-%m-%d')
def parse_soup_for_versions(soup):
"""Takes soup, and returns a dictionary of versions and their release dates
"""
versions = {}
for article in soup.find_all('article', class_='devsite-article'):
def parse_date(d):
# If the date begins with a >3 letter month name, trim it to just 3 letters
# Strip out the Date: section from the start
d = re.sub(r'(?:Date\: )?(\w{3})(?:\w{1,4})? (\d{1,2}), (\d{4})', r'\1 \2, \3', d)
return datetime.strptime(d, date_format).strftime('%Y-%m-%d')
# h2 contains the date, which we parse
for heading in article.find_all(['h2', 'h3']):
version = heading.get('data-text')
m = re.match(REGEX, version)
if m:
version = m.group(1)
date_format = '%b %d, %Y'
try:
# The first row is the header, so we pick the first td in the second row
# 1st row is the header, so pick the first td in the 2nd row
d = heading.find_next('tr').find_next('tr').find_next('td').text
except:
# In some older releases, it is mentioned as Date: [Date] in the text
except AttributeError:
# In some older releases, it is mentioned as Date: [Date]
d = heading.find_next('i').text
try:
date = parse_date(d)
@@ -51,7 +53,7 @@ def parse_soup_for_versions(soup):
d = heading.find_previous('h2').get('data-text')
date = parse_date(d)
versions[version] = date
print("%s: %s" % (version, date))
print(f"{version}: {date}")
return versions

View File

@@ -30,6 +30,7 @@ def clone_repository():
ret_code = call(f"git {git_opts} pull --depth 1 origin master", shell=True)
exit(-ret_code) if ret_code < 0 else None
def extract_major_releases(releases):
child = subprocess.Popen(
f"grep -RhE -A 1 '<define-tag pagetitle>Debian [0-9]+.+</q> released' {REPO_DIR}/english/News "
@@ -48,8 +49,8 @@ def extract_major_releases(releases):
is_release_line = False
else:
date = line
print(f"{version}: {date}")
releases[version] = date
print(f"{version}: {date}")
is_release_line = True
@@ -75,11 +76,11 @@ def extract_point_releases(releases):
print(f"::group::{PRODUCT}")
clone_repository()
releases = {}
extract_major_releases(releases)
extract_point_releases(releases)
all_releases = {}
extract_major_releases(all_releases)
extract_point_releases(all_releases)
endoflife.write_releases(PRODUCT, dict(
# sort by date then version (desc)
sorted(releases.items(), key=lambda x: (x[1], x[0]), reverse=True)
sorted(all_releases.items(), key=lambda x: (x[1], x[0]), reverse=True)
))
print("::endgroup::")

View File

@@ -33,7 +33,7 @@ def fetch_releases(distrowatch_id, regex, template):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
date = table.select_one("td.NewsDate").get_text()
for v in get_versions_from_headline(regex, headline, l_template):
print("%s: %s" % (v, date))
print(f"{v}: {date}")
releases[v] = date
return releases
@@ -52,6 +52,6 @@ def update_product(product_name, configs):
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product, configs in endoflife.list_products(METHOD, p_filter).items():
print("::group::%s" % product)
print(f"::group::{product}")
update_product(product, configs)
print("::endgroup::")

View File

@@ -24,9 +24,9 @@ def parse_platforms_page():
d = datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d")
k8s_version = ".".join(data[0].text.split(".")[:-1])
eks_version = data[1].text.replace(".", "-")
version = "%s-%s" % (k8s_version, eks_version)
version = f"{k8s_version}-{eks_version}"
all_versions[version] = d
print("%s: %s" % (version, d))
print(f"{version}: {d}")
print("::endgroup::")
return all_versions

View File

@@ -6,21 +6,25 @@ from common import endoflife
from datetime import datetime
from typing import Tuple
"""Fetch Firefox versions with their dates from https://www.mozilla.org/en-US/firefox/releases/"""
"""Fetch Firefox versions with their dates from https://www.mozilla.org/"""
URL = "https://www.mozilla.org/en-US/firefox/releases/"
PRODUCT = "firefox"
DATE_REGEX = r"(January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sept|October|Oct|November|Nov|December|Dec)\s+\d{1,2}(st|nd|rd|th)?,\s+\d{4}"
VERSION_REGEX = r"\d+(\.\d+)*"
class UnsupportedReleasePageError(Exception):
"Raised when a firefox release page is not supported"
class UnsupportedPageError(Exception):
"""Raised when a firefox release page is not supported"""
pass
class InvalidPageVariantError(Exception):
"Raised when an invalid variant is passed to get_version_and_date"
"""Raised when an invalid variant is passed to get_version_and_date"""
pass
def format_date(unformatted_date: str) -> str:
""" Format date from July 11, 2002 to 2002-07-11 """
date = re.sub(r'(\d)(st|nd|rd|th)', r'\1', unformatted_date)
@@ -32,7 +36,8 @@ def format_date(unformatted_date: str) -> str:
pass
return ""
def get_version_and_date_varant_1(soup: BeautifulSoup) -> Tuple[str, str]:
def get_version_and_date_variant_1(soup: BeautifulSoup) -> Tuple[str, str]:
""" Version matching for firefox versions >= 28.0 (usually) """
# get version
version = soup.find("div", class_="c-release-version").get_text()
@@ -41,7 +46,8 @@ def get_version_and_date_varant_1(soup: BeautifulSoup) -> Tuple[str, str]:
unformatted_date = soup.find("p", class_="c-release-date").get_text()
date = format_date(unformatted_date)
return (version, date)
return version, date
def get_version_and_date_variant_2(soup: BeautifulSoup) -> Tuple[str, str]:
""" Version matching for firefox versions >= 10.0 (usually) """
@@ -60,7 +66,8 @@ def get_version_and_date_variant_2(soup: BeautifulSoup) -> Tuple[str, str]:
unformatted_date = unformatted_date_match.group()
date = format_date(unformatted_date)
return (version, date)
return version, date
def get_version_and_date_variant_3(soup: BeautifulSoup) -> Tuple[str, str]:
""" Version matching for firefox versions >= 3.0 (usually) """
@@ -79,22 +86,27 @@ def get_version_and_date_variant_3(soup: BeautifulSoup) -> Tuple[str, str]:
unformatted_date = unformatted_date_match.group()
date = format_date(unformatted_date)
return (version, date)
return version, date
def get_version_and_date(release_page: str, release_version: str) -> Tuple[str, str]:
""" Get version and date from the given release page """
major = int(release_version.split(".")[0])
# firefox release pages for versions <3.0 don't include release dates so we
# firefox release pages for versions <3.0 don't include release dates, so we
# can't match these versions for now.
# example: https://www.mozilla.org/en-US/firefox/2.0/releasenotes/
if major < 3:
raise UnsupportedReleasePageError("Unsupported release page: %s" % release_page)
raise UnsupportedPageError(f"Unsupported release page: {release_page}")
# Firefox release pages come in 3 different variants. Unforunately, there is no
# consistent way to determine which variant a page is (say, by version number), so
# we have to try each variant until we find one that works.
functions = [get_version_and_date_varant_1, get_version_and_date_variant_2, get_version_and_date_variant_3]
# Firefox release pages come in 3 different variants. Unfortunately, there
# is no consistent way to determine which variant a page is (say, by version
# number), so we have to try each variant until we find one that works.
functions = [
get_version_and_date_variant_1,
get_version_and_date_variant_2,
get_version_and_date_variant_3
]
soup = make_bs_request(release_page)
for function in functions:
@@ -103,13 +115,15 @@ def get_version_and_date(release_page: str, release_version: str) -> Tuple[str,
except (InvalidPageVariantError, AttributeError, IndexError):
pass
raise UnsupportedReleasePageError("Unable to find version and date for %s" % release_page)
raise UnsupportedPageError(f"Unable to find version and date from {release_page}")
def make_bs_request(url: str) -> BeautifulSoup:
# requests to www.mozilla.org often time out, retry in case of failures
response = endoflife.fetch_url(url, timeout=10, retry_count=5)
return BeautifulSoup(response, features="html5lib")
def fetch_releases():
releases = {}
soup = make_bs_request(URL)
@@ -126,13 +140,14 @@ def fetch_releases():
for future in concurrent.futures.as_completed(future_to_url):
try:
(version, date) = future.result()
print("%s: %s" % (version, date))
print(f"{version}: {date}")
releases[version] = date
except UnsupportedReleasePageError:
print("Unsupported release page: %s" % future_to_url[future])
except UnsupportedPageError:
print(f"Unsupported release page: {future_to_url[future]}")
return releases
print(f"::group::{PRODUCT}")
releases = fetch_releases()
endoflife.write_releases(PRODUCT, dict(

View File

@@ -43,9 +43,10 @@ query($endCursor: String) {
def fetch_releases(repo_id, regex):
"""Returns this repository releases using https://docs.github.com/en/rest/releases/releases#list-releases.
Only the first page is fetched: there are rate limit rules in place on the GitHub API, and the most recent
releases are sufficient.
"""Returns this repository releases using
https://docs.github.com/en/rest/releases/releases#list-releases. Only the
first page is fetched: there are rate limit rules in place on the GitHub
API, and the most recent releases are sufficient.
"""
releases = {}
regex = [regex] if not isinstance(regex, list) else regex
@@ -59,7 +60,7 @@ def fetch_releases(repo_id, regex):
version = match.group(1)
date = raw_date.split("T")[0]
releases[version] = date
print("%s: %s" % (version, date))
print(f"{version}: {date}")
return releases
@@ -76,6 +77,6 @@ def update_product(product_name, configs):
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product, configs in endoflife.list_products(METHOD, p_filter).items():
print("::group::%s" % product)
print(f"::group::{product}")
update_product(product, configs)
print("::endgroup::")

View File

@@ -5,42 +5,44 @@ from datetime import datetime
# https://regex101.com/r/zPxBqT/1
REGEX = r"\d.\d+\.\d+-gke\.\d+"
CHANNELS = ['nochannel', 'stable', 'regular', 'rapid']
def fetch_channel(channel):
url = "https://cloud.google.com/kubernetes-engine/docs/release-notes-{}".format(channel)
url = f"https://cloud.google.com/kubernetes-engine/docs/release-notes-{channel}"
response = endoflife.fetch_url(url)
return BeautifulSoup(response, features="html5lib")
"""
Takes soup, and returns a dictionary of versions and their release dates
"""
def parse_soup_for_versions(soup):
""" Parse the soup """
"""Takes soup, and returns a dictionary of versions and their release dates
"""
versions = {}
for section in soup.find_all('section', class_='releases'):
# h2 contains the date, which we parse
for h2 in section.find_all('h2'):
date = h2.get('data-text')
date = datetime.strptime(date, '%B %d, %Y').strftime('%Y-%m-%d')
# The div next to the h2 contains the notes about changes made on that date
# The div next to the h2 contains the notes about changes made
# on that date
next_div = h2.find_next('div')
# New releases are noted in a nested list, so we look for that
# and parse it using the version regex
for li in next_div.find_all('li'):
# If the <li> text contains with "versions are now available:", get the <ul> inside the li
# If the <li> text contains with "versions are now available:",
# get the <ul> inside the li
if "versions are now available" in li.text:
ul = li.find('ul')
for version in re.findall(REGEX, ul.text):
versions[version] = date
print("%s: %s" % (version, date))
print(f"{version}: {date}")
return versions
CHANNELS = ['nochannel', 'stable', 'regular', 'rapid']
for channel in CHANNELS:
soup = fetch_channel(channel)
print("::group::GKE - {}".format(channel))
print(f"::group::GKE - {channel}")
versions = parse_soup_for_versions(soup)
name = 'gke' if channel == 'nochannel' else 'gke-{}'.format(channel)
name = 'gke' if channel == 'nochannel' else f'gke-{channel}'
endoflife.write_releases(name, versions)
print("::endgroup::")

View File

@@ -2,7 +2,7 @@ import re
from bs4 import BeautifulSoup
from common import endoflife
"""Fetch HAProxy versions with their dates from https://www.haproxy.org/download/.
"""Fetch HAProxy versions with their dates from https://www.haproxy.org/.
"""
PRODUCT = "haproxy"
@@ -51,11 +51,11 @@ def print_releases(releases):
print(f"::group::{PRODUCT}")
cycles = fetch_cycles()
releases = fetch_releases(cycles)
print_releases(releases)
all_cycles = fetch_cycles()
all_releases = fetch_releases(all_cycles)
print_releases(all_releases)
endoflife.write_releases(PRODUCT, dict(
# sort by date then version (desc)
sorted(releases.items(), key=lambda x: (x[1], x[0]), reverse=True)
sorted(all_releases.items(), key=lambda x: (x[1], x[0]), reverse=True)
))
print("::endgroup::")

View File

@@ -18,11 +18,12 @@ PRODUCT = "java"
URL = "https://www.java.com/releases/"
def fetch_releases(releases):
def fetch_releases():
session = HTMLSession()
r = session.get('https://www.java.com/releases/')
r.html.render(sleep=1, scrolldown=3)
releases = {}
previous_date = None
for row in r.html.find('#released tr'):
version_cell = row.find('td.anchor', first=True)
@@ -35,13 +36,14 @@ def fetch_releases(releases):
releases[version] = date
previous_date = date
return releases
print(f"::group::{PRODUCT}")
releases = {}
fetch_releases(releases)
releases.pop('1.0_alpha') # that's the only version we do not want, regex not needed
all_releases = fetch_releases()
all_releases.pop('1.0_alpha') # only version we don't want, regex not needed
endoflife.write_releases(PRODUCT, dict(
# sort by date then version (desc)
sorted(releases.items(), key=lambda x: (x[1], x[0]), reverse=True)
sorted(all_releases.items(), key=lambda x: (x[1], x[0]), reverse=True)
))
print("::endgroup::")

View File

@@ -35,7 +35,7 @@ def fetch_releases(package_identifier):
if valid_version(version):
date = datetime.datetime.utcfromtimestamp(row["timestamp"] / 1000).strftime("%Y-%m-%d")
releases[version] = date
print("%s: %s" % (version, date))
print(f"{version}: {date}")
start += 100
if data["response"]["numFound"] <= start:
@@ -58,6 +58,6 @@ def update_product(product_name, configs):
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product, configs in endoflife.list_products(METHOD, p_filter).items():
print("::group::%s" % product)
print(f"::group::{product}")
update_product(product, configs)
print("::endgroup::")

View File

@@ -43,10 +43,8 @@ def update_product(product_name, configs):
endoflife.write_releases(product_name, releases)
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product, configs in endoflife.list_products(METHOD, p_filter).items():
print("::group::%s" % product)
print(f"::group::{product}")
update_product(product, configs)
print("::endgroup::")

View File

@@ -4,7 +4,6 @@ from bs4 import BeautifulSoup
from common import endoflife
URL = "https://www.paloaltonetworks.com/services/support/end-of-life-announcements/end-of-life-summary"
ID_MAPPING = {
"pan-os-panorama": "pan-os",
"globalprotect": "pan-gp",
@@ -36,7 +35,7 @@ def update_releases(html_identifier, file):
try:
month, date, year = td_list[1].get_text().split("/")
abs_date = f"{year}-{month:0>2}-{date:0>2}"
except Exception:
except ValueError:
# A few dates have 1st, 2nd, 4th etc. Fix that:
d = td_list[1].get_text()
d = re.sub(r'(\w+) (\d{1,2})(?:\w{2}), (\d{4})', r'\1 \2, \3', d)
@@ -44,7 +43,7 @@ def update_releases(html_identifier, file):
abs_date = date.strftime("%Y-%m-%d")
versions[version] = abs_date
print("%s: %s" % (version, abs_date))
print(f"{version}: {abs_date}")
endoflife.write_releases(file, versions)
print("::endgroup::")

View File

@@ -1,6 +1,6 @@
import datetime
import json
from common import endoflife
from datetime import datetime
PHP_MAJOR_VERSIONS = [4, 5, 7, 8]
@@ -10,9 +10,9 @@ PHP_MAJOR_VERSIONS = [4, 5, 7, 8]
# we return it as YYYY-MM-DD
def parse_date(date_str):
try:
return datetime.datetime.strptime(date_str, "%d %b %Y").strftime("%Y-%m-%d")
return datetime.strptime(date_str, "%d %b %Y").strftime("%Y-%m-%d")
except ValueError:
return datetime.datetime.strptime(date_str, "%d %B %Y").strftime("%Y-%m-%d")
return datetime.strptime(date_str, "%d %B %Y").strftime("%Y-%m-%d")
def fetch_versions(major_version):

View File

@@ -17,7 +17,7 @@ def fetch_releases(pypi_id, regex):
if not isinstance(regex, list):
regex = [regex]
url = "https://pypi.org/pypi/%s/json" % pypi_id
url = f"https://pypi.org/pypi/{pypi_id}/json"
response = endoflife.fetch_url(url)
data = json.loads(response)
for version in data["releases"]:
@@ -29,7 +29,7 @@ def fetch_releases(pypi_id, regex):
if matches and R:
d = datetime.fromisoformat(R[0]["upload_time"]).strftime("%Y-%m-%d")
releases[version] = d
print("%s: %s" % (version, d))
print(f"{version}: {d}")
return releases
@@ -46,6 +46,6 @@ def update_product(product_name, configs):
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product, configs in endoflife.list_products(METHOD, p_filter).items():
print("::group::%s" % product)
print(f"::group::{product}")
update_product(product, configs)
print("::endgroup::")

View File

@@ -27,7 +27,7 @@ for db, url in dbs.items():
# Must match both the 'Supported XXX minor versions' and
# 'Supported XXX major versions' to have correct release dates
if len(columns) > 3:
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
if m:
version = m.group("v")

View File

@@ -23,7 +23,7 @@ for tr in soup.findAll("tr"):
version += ".%s" % m["minor2"]
date = td_list[1].get_text()
versions[version] = date
print("%s: %s" % (version, date))
print(f"{version}: {date}")
endoflife.write_releases('redhat', versions)
print("::endgroup::")

View File

@@ -25,12 +25,12 @@ for tr in soup.findAll("tr"):
td_list[1].get_text().strip(), "%B %d, %Y"
)
# The date is a suffix (May 23rd, 2020)
except Exception as e:
except ValueError as e:
x = td_list[1].get_text().split(",")
date = datetime.datetime.strptime(x[0][:-2] + x[1], "%B %d %Y")
abs_date = date.strftime("%Y-%m-%d")
versions[version] = abs_date
print("%s: %s" % (version, abs_date))
print(f"{version}: {abs_date}")
endoflife.write_releases('ros', versions)
print("::endgroup::")

View File

@@ -1,4 +1,3 @@
import json
import mwparserfromhell
import re
from common import endoflife
@@ -19,7 +18,7 @@ for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
maybe_date = items[1].__strip__()
if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date):
versions[maybe_version] = maybe_date
print("%s: %s" % (maybe_version, maybe_date))
print(f"{maybe_version}: {maybe_date}")
endoflife.write_releases('unrealircd', versions)
print("::endgroup::")