[distrowatch] Refactor script (#222)

Make the script more readable, mostly by:

- using the Product and AutoConfig classes,
- removing the use of functions when unnecessary,
- a little bit of renaming and documentation.
This commit is contained in:
Marc Wrobel
2023-12-10 18:39:35 +01:00
committed by GitHub
parent cb4d988c45
commit 724dfc4808
2 changed files with 24 additions and 50 deletions

View File

@@ -11,7 +11,7 @@ logging.basicConfig(format=logging.BASIC_FORMAT, level=logging.INFO)
# Handle versions having at least 2 digits (ex. 1.2) and at most 4 digits (ex. 1.2.3.4), with an optional leading "v".
# Major version must be >= 1.
DEFAULT_VERSION_REGEX = r"^v?(?P<major>[1-9]\d*)\.(?P<minor>\d+)(\.(?P<patch>\d+)(\.(?P<tiny>\d+))?)?$"
DEFAULT_TAG_TEMPLATE = "{{major}}.{{minor}}{% if patch %}.{{patch}}{% if tiny %}.{{tiny}}{%endif%}{%endif%}"
DEFAULT_TAG_TEMPLATE = "{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{% if tiny %}.{{tiny}}{%endif%}{%endif%}{%endif%}"
PRODUCTS_PATH = os.environ.get("PRODUCTS_PATH", "website/products")
VERSIONS_PATH = os.environ.get("VERSIONS_PATH", "releases")

View File

@@ -1,58 +1,32 @@
import re
import sys
from bs4 import BeautifulSoup
from common import http
from common import dates
from common import endoflife
from liquid import Template
METHOD = 'distrowatch'
DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb)
"{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%endif%}{%endif%}"
)
def get_versions_from_headline(regex, headline, template):
if not isinstance(regex, list):
regex = [regex]
for r in regex:
matches = re.match(r.strip(), headline)
if matches:
match_data = matches.groupdict()
version_string = template.render(**match_data)
return version_string.split("\n")
return {}
def fetch_releases(distrowatch_id, regex, template):
releases = {}
l_template = Template(template)
url = f"https://distrowatch.com/index.php?distribution={distrowatch_id}"
response = http.fetch_url(url)
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
date = table.select_one("td.NewsDate").get_text()
for v in get_versions_from_headline(regex, headline, l_template):
print(f"{v}: {date}")
releases[v] = date
return releases
def update_product(product_name, configs):
versions = {}
for config in configs:
t = config.get("template", DEFAULT_TAG_TEMPLATE)
if "regex" in config:
regex = config["regex"]
versions = versions | fetch_releases(config[METHOD], regex, t)
endoflife.write_releases(product_name, versions)
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
for product, configs in endoflife.list_products(METHOD, p_filter).items():
print(f"::group::{product}")
update_product(product, configs)
for product_name, configs in endoflife.list_products(METHOD, p_filter).items():
print(f"::group::{product_name}")
product = endoflife.Product(product_name, load_product_data=True)
for config in product.get_auto_configs(METHOD):
response = http.fetch_url(f"https://distrowatch.com/index.php?distribution={config.url}")
soup = BeautifulSoup(response.text, features="html5lib")
for table in soup.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
versions_match = config.first_match(headline)
if not versions_match:
continue
# multiple versions may be released at once (e.g. Ubuntu 16.04.7 and 18.04.5)
versions = config.render(versions_match).split("\n")
date = dates.parse_date(table.select_one("td.NewsDate").get_text())
for version in versions:
product.declare_version(version, date)
product.write()
print("::endgroup::")