endoflife-date-release-data/src/haproxy.py

import re

from bs4 import BeautifulSoup
from common import dates, http, releasedata

CYCLE_PATTERN = re.compile(r"^(\d+\.\d+)/$")
DATE_AND_VERSION_PATTERN = re.compile(r"^(\d{4})/(\d{2})/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$")  # https://regex101.com/r/1JCnFC/1

with releasedata.ProductData("haproxy") as product_data:
    # First, get all minor releases from the download page
    download = http.fetch_url('https://www.haproxy.org/download/')
    download_soup = BeautifulSoup(download.text, features="html5lib")
    minor_versions = []
    for link in download_soup.select("a"):
        minor_version_match = CYCLE_PATTERN.match(link.attrs["href"])
        if not minor_version_match:
            continue

        minor_version = minor_version_match.groups()[0]
        if minor_version != "1.0":  # No changelog in https://www.haproxy.org/download/1.0/src
            minor_versions.append(minor_version)

    # Then, fetches all versions from each changelog
    changelog_urls = [f"https://www.haproxy.org/download/{minor_version}/src/CHANGELOG" for minor_version in minor_versions]
    for changelog in http.fetch_urls(changelog_urls):
        for line in changelog.text.split('\n'):
            date_and_version_match = DATE_AND_VERSION_PATTERN.match(line)
            if date_and_version_match:
                year, month, day, version = date_and_version_match.groups()
                product_data.declare_version(version, dates.date(int(year), int(month), int(day)))