Refactor HTTP URL fetching scripts

This creates a common function to fetch HTTP URLs, with enhanced capabilities (retry, use of a known User-Agent).
It makes scripts that need those capabilities simpler, while improving other scripts.

This commit also fixes some scripts that did not log properly (cos.py, eks.py, haproxy.py, palo-alto-networks.py, rhel.py, ros.py, unrealircd.py).
This commit is contained in:
Marc Wrobel
2023-05-14 09:35:28 +02:00
parent 5176abd4d4
commit a16d9090d3
19 changed files with 295 additions and 311 deletions

View File

@@ -1,8 +1,8 @@
import json
import urllib.request
import datetime
from bs4 import BeautifulSoup
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
URLS = [
"https://support.apple.com/en-us/HT201222", # latest
@@ -55,45 +55,45 @@ def parse_date(s):
for url in URLS:
with urllib.request.urlopen(url, data=None, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
table = soup.find(id="tableWraper")
for tr in reversed(table.findAll("tr")[1:]):
td_list = tr.findAll("td")
version_text = td_list[0].get_text()
for key, regexes in CONFIG.items():
for regex in regexes:
matches = re.findall(regex, version_text, re.MULTILINE)
if matches:
for version in matches:
abs_date = None
try:
print("== %s" % version_text.strip())
abs_date = parse_date(td_list[2].get_text())
print_date = abs_date.strftime("%Y-%m-%d")
# Only update the date if we are adding first time
# or if the date is lower
if version not in release_lists[key]:
release_lists[key][version] = abs_date
print("%s-%s: %s" % (key, version, print_date))
elif release_lists[key][version] < abs_date:
print(
"%s-%s: %s [IGNORED]"
% (key, version, print_date)
)
elif release_lists[key][version] > abs_date:
# This is a lower date, so we mark it with a bang
print(
"%s-%s: %s [UPDATED]"
% (key, version, print_date)
)
release_lists[key][version] = abs_date
except ValueError as e:
response = endoflife.fetch_url(url)
soup = BeautifulSoup(response, features="html5lib")
table = soup.find(id="tableWraper")
for tr in reversed(table.findAll("tr")[1:]):
td_list = tr.findAll("td")
version_text = td_list[0].get_text()
for key, regexes in CONFIG.items():
for regex in regexes:
matches = re.findall(regex, version_text, re.MULTILINE)
if matches:
for version in matches:
abs_date = None
try:
print("== %s" % version_text.strip())
abs_date = parse_date(td_list[2].get_text())
print_date = abs_date.strftime("%Y-%m-%d")
# Only update the date if we are adding first time
# or if the date is lower
if version not in release_lists[key]:
release_lists[key][version] = abs_date
print("%s-%s: %s" % (key, version, print_date))
elif release_lists[key][version] < abs_date:
print(
"%s-%s Failed to parse Date (%s)"
% (key, version, td_list[2].get_text())
"%s-%s: %s [IGNORED]"
% (key, version, print_date)
)
next
elif release_lists[key][version] > abs_date:
# This is a lower date, so we mark it with a bang
print(
"%s-%s: %s [UPDATED]"
% (key, version, print_date)
)
release_lists[key][version] = abs_date
except ValueError as e:
print(
"%s-%s Failed to parse Date (%s)"
% (key, version, td_list[2].get_text())
)
next
for k in CONFIG.keys():

View File

@@ -1,10 +1,14 @@
import frontmatter
import urllib.request
from glob import glob
from os import path
# See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent.
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0'
def list_products(method, products_filter=None, pathname = "website/products"):
def list_products(method, products_filter=None, pathname="website/products"):
"""Return a list of products that are using the same given update method.
"""
products_with_method = {}
@@ -25,3 +29,21 @@ def list_products(method, products_filter=None, pathname = "website/products"):
products_with_method[product_name] = configs
return products_with_method
def fetch_url(url, retry_count=2, timeout=5, data=None, headers=None, encoding='utf-8'):
last_exception = None
headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers
request = urllib.request.Request(url, headers=headers)
for retry in range(0, retry_count):
try:
resp = urllib.request.urlopen(request, data=data, timeout=timeout)
return resp.read().decode(encoding)
except Exception as e:
last_exception = e
print(f"Fetch of {url} failed (retry={retry}), got: " + str(e))
continue
raise last_exception

View File

@@ -1,46 +1,31 @@
import urllib.request
from bs4 import BeautifulSoup
import re
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
REGEX = r"^(cos-\d+-\d+-\d+-\d+)"
def fetch_all_milestones():
url = "https://cloud.google.com/container-optimized-os/docs/release-notes/"
# Google Docs website often returns SSL errors, retry the request in case of failures.
for i in range(0,10):
try:
with urllib.request.urlopen(url, data=None, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
break
except Exception as e:
print("Retrying Request, got error: " + str(e))
continue
else:
raise Exception("Failed to fetch COS milestones")
# Retry as Google Docs often returns SSL errors.
response = endoflife.fetch_url(url, retry_count=10)
soup = BeautifulSoup(response, features="html5lib")
milestones = soup.find_all('td', text=re.compile(r'COS \d+ LTS'))
return [m.text.split(' ')[1] for m in milestones]
def fetch_milestone(channel):
url = "https://cloud.google.com/container-optimized-os/docs/release-notes/m{}".format(channel)
# Google Docs website often returns SSL errors, retry the request in case of failures.
for i in range(0,5):
try:
with urllib.request.urlopen(url, data=None, timeout=5) as response:
return BeautifulSoup(response, features="html5lib")
except Exception as e:
print("Retrying Request")
continue
raise Exception("Failed to fetch COS milestone {}".format(channel))
# Retry as Google Docs often returns SSL errors.
response = endoflife.fetch_url(url, retry_count=10)
return BeautifulSoup(response, features="html5lib")
"""
Takes soup, and returns a dictionary of versions and their release dates
"""
def parse_soup_for_versions(soup):
""" Parse the soup """
"""Takes soup, and returns a dictionary of versions and their release dates
"""
versions = {}
for article in soup.find_all('article', class_='devsite-article'):
def parse_date(d):
@@ -67,21 +52,23 @@ def parse_soup_for_versions(soup):
d = heading.find_previous('h2').get('data-text')
date = parse_date(d)
versions[version] = date
print("%s: %s" % (version, date))
return versions
def get_all_versions():
all_versions = {}
all_milestones = fetch_all_milestones()
print("::group::cos")
for milestone in all_milestones:
soup = fetch_milestone(milestone)
print("::group::COS - {}".format(milestone))
versions = parse_soup_for_versions(soup)
all_versions |= versions
print("::endgroup::")
print("::endgroup::")
return all_versions
if __name__ == '__main__':
v = get_all_versions()
with open('releases/cos.json', "w") as f:

View File

@@ -1,10 +1,9 @@
import json
import re
import sys
import json
import urllib.request
from bs4 import BeautifulSoup
from liquid import Template
from common import endoflife
from liquid import Template
METHOD = 'distrowatch'
DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb)
@@ -28,15 +27,15 @@ def get_versions_from_headline(regex, headline, template):
def fetch_releases(distrowatch_id, regex, template):
releases = {}
l_template = Template(template)
url = "https://distrowatch.com/index.php?distribution=%s" % distrowatch_id
with urllib.request.urlopen(url, data=None, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
for table in soup.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
date = table.select_one("td.NewsDate").get_text()
for v in get_versions_from_headline(regex, headline, l_template):
print("%s: %s" % (v, date))
releases[v] = date
url = f"https://distrowatch.com/index.php?distribution={distrowatch_id}"
response = endoflife.fetch_url(url)
soup = BeautifulSoup(response, features="html5lib")
for table in soup.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
date = table.select_one("td.NewsDate").get_text()
for v in get_versions_from_headline(regex, headline, l_template):
print("%s: %s" % (v, date))
releases[v] = date
return releases

View File

@@ -1,10 +1,10 @@
import urllib.request
import datetime
import json
import markdown
import re
import json
from datetime import datetime
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
URL = "https://raw.githubusercontent.com/awsdocs/amazon-eks-user-guide/master/doc_source/platform-versions.md"
REGEX = r"^(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)$"
@@ -12,20 +12,23 @@ REGEX = r"^(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)$"
def parse_platforms_page():
all_versions = {}
with urllib.request.urlopen(URL, data=None, timeout=5) as contents:
html = markdown.markdown(contents.read().decode("utf-8"), extensions=["tables"])
soup = BeautifulSoup(html, features="html5lib")
for tr in soup.findAll("tr"):
td = tr.find("td")
if td and re.match(REGEX, td.text):
data = tr.findAll("td")
date = data[-1].text
if len(date) > 0:
d = datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d")
k8s_version = ".".join(data[0].text.split(".")[:-1])
eks_version = data[1].text.replace(".", "-")
version = "%s-%s" % (k8s_version, eks_version)
all_versions[version] = d
print("::group::eks")
response = endoflife.fetch_url(URL)
html = markdown.markdown(response, extensions=["tables"])
soup = BeautifulSoup(html, features="html5lib")
for tr in soup.findAll("tr"):
td = tr.find("td")
if td and re.match(REGEX, td.text):
data = tr.findAll("td")
date = data[-1].text
if len(date) > 0:
d = datetime.strptime(date, "%B %d, %Y").strftime("%Y-%m-%d")
k8s_version = ".".join(data[0].text.split(".")[:-1])
eks_version = data[1].text.replace(".", "-")
version = "%s-%s" % (k8s_version, eks_version)
all_versions[version] = d
print("%s: %s" % (version, d))
print("::endgroup::")
return all_versions

View File

@@ -1,11 +1,11 @@
import concurrent.futures
import json
from typing import Tuple
from datetime import datetime
import re
import requests
import urllib.request
from bs4 import BeautifulSoup
import concurrent.futures
from common import endoflife
from datetime import datetime
from typing import Tuple
"""Fetch Firefox versions with their dates from https://www.mozilla.org/en-US/firefox/releases/"""
URL = "https://www.mozilla.org/en-US/firefox/releases/"
@@ -107,22 +107,9 @@ def get_version_and_date(release_page: str, release_version: str) -> Tuple[str,
raise UnsupportedReleasePageError("Unable to find version and date for %s" % release_page)
def make_bs_request(url: str) -> BeautifulSoup:
""" Make a request to the given url and return a BeautifulSoup object """
last_exception = None
headers = {"user-agent": "mozilla"}
# requests to www.mozilla.org often time out, retry in case of failures
for i in range(0, 5):
try:
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req, timeout=5) as response:
return BeautifulSoup(response.read(), features="html5lib")
except TimeoutError as e:
last_exception = e
print(f"Request to {url} timed out, retrying ({i})...")
continue
raise last_exception
response = endoflife.fetch_url(url, timeout=10, retry_count=5)
return BeautifulSoup(response, features="html5lib")
def fetch_releases():
releases = {}

View File

@@ -1,7 +1,7 @@
import urllib.request
from bs4 import BeautifulSoup
import re
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
# https://regex101.com/r/zPxBqT/1
@@ -9,8 +9,8 @@ REGEX = r"\d.\d+\.\d+-gke\.\d+"
def fetch_channel(channel):
url = "https://cloud.google.com/kubernetes-engine/docs/release-notes-{}".format(channel)
with urllib.request.urlopen(url, data=None, timeout=5) as response:
return BeautifulSoup(response, features="html5lib")
response = endoflife.fetch_url(url)
return BeautifulSoup(response, features="html5lib")
"""
Takes soup, and returns a dictionary of versions and their release dates

View File

@@ -1,8 +1,7 @@
import json
import re
import urllib.request
from bs4 import BeautifulSoup
from common import endoflife
"""Fetch HAProxy versions with their dates from https://www.haproxy.org/download/.
"""
@@ -16,16 +15,13 @@ VERSION_REGEX = r"^(\d{4})\/(\d{2})\/(\d{2})\s+:\s+(\d+\.\d+\.\d.?)$"
def fetch_cycles():
cycles = []
print("Fetching cycles")
with urllib.request.urlopen(
"https://www.haproxy.org/download/") as response:
soup = BeautifulSoup(response, features="html5lib")
for link in soup.select("a"):
m = re.match(CYCLE_REGEX, link.attrs["href"])
if m:
cycle = m.groups()[0]
cycles.append(cycle)
print(f"Found {cycle}")
response = endoflife.fetch_url('https://www.haproxy.org/download/')
soup = BeautifulSoup(response, features="html5lib")
for link in soup.select("a"):
m = re.match(CYCLE_REGEX, link.attrs["href"])
if m:
cycle = m.groups()[0]
cycles.append(cycle)
# No changelog in https://www.haproxy.org/download/1.0/src
cycles.remove("1.0")
@@ -38,14 +34,13 @@ def fetch_releases(cycles):
for cycle in cycles:
url = f"https://www.haproxy.org/download/{cycle}/src/CHANGELOG"
print(f"Fetching version from {url}")
with urllib.request.urlopen(url) as response:
for line in response:
m = re.match(VERSION_REGEX, line.decode("utf-8"))
if m:
year, month, day, version = m.groups()
date = f"{year}-{month}-{day}"
releases[version] = date
response = endoflife.fetch_url(url)
for line in response.split('\n'):
m = re.match(VERSION_REGEX, line)
if m:
year, month, day, version = m.groups()
date = f"{year}-{month}-{day}"
releases[version] = date
return releases

View File

@@ -1,9 +1,8 @@
import re
import json
import urllib.request
from datetime import datetime, timezone
import re
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime, timezone
"""Fetch Linux Kernel versions with their dates from
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/refs/tags.
@@ -25,9 +24,8 @@ def parse_date(d):
def make_bs_request(url):
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=5) as response:
return BeautifulSoup(response.read(), features="html5lib")
response = endoflife.fetch_url(url)
return BeautifulSoup(response, features="html5lib")
def fetch_releases():

View File

@@ -1,8 +1,7 @@
import sys
import json
import urllib.request
import datetime
import json
import re
import sys
from common import endoflife
METHOD = "maven"
@@ -19,20 +18,8 @@ def valid_version(version):
def fetch_json(group_id, artifact_id, start):
url = f"https://search.maven.org/solrsearch/select?q=g:{group_id}+AND+a:{artifact_id}&core=gav&rows=100&wt=json&start={start}"
last_exception = None
# search.maven.org often time out lately, retry the request in case of failures.
for i in range(0, 5):
try:
with urllib.request.urlopen(url, data=None, timeout=5) as response:
return json.load(response)
except Exception as e:
last_exception = e
message = getattr(e, 'message', repr(e)) # https://stackoverflow.com/a/45532289/374236
print(f"Error while requesting {url} ({message}), retrying ({i})...")
continue
raise last_exception
response = endoflife.fetch_url(url, retry_count=5)
return json.loads(response)
def fetch_releases(package_identifier):

View File

@@ -1,7 +1,6 @@
import json
import re
import sys
import json
import urllib.request
from common import endoflife
METHOD = "npm"
@@ -18,18 +17,18 @@ def fetch_releases(npm_id, regex):
regex = [regex]
url = f"https://registry.npmjs.org/{npm_id}"
with urllib.request.urlopen(url, data=None, timeout=5) as response:
data = json.loads(response.read().decode("utf-8"))
for version in data["time"]:
matches = False
for r in regex:
if re.match(r, version):
matches = True
response = endoflife.fetch_url(url)
data = json.loads(response)
for version in data["time"]:
matches = False
for r in regex:
if re.match(r, version):
matches = True
release_datetime = data["time"][version]
if matches and release_datetime:
releases[version] = release_datetime.split("T")[0]
print(f"{version}: {releases[version]}")
release_datetime = data["time"][version]
if matches and release_datetime:
releases[version] = release_datetime.split("T")[0]
print(f"{version}: {releases[version]}")
return releases

View File

@@ -1,8 +1,8 @@
import json
import urllib.request
import datetime
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
URL = "https://www.paloaltonetworks.com/services/support/end-of-life-announcements/end-of-life-summary"
@@ -15,33 +15,37 @@ ID_MAPPING = {
def update_releases(html_identifier, file):
versions = {}
with urllib.request.urlopen(URL, data=None, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
table = soup.find(id=html_identifier)
for tr in table.findAll("tr")[3:]:
td_list = tr.findAll("td")
version = (
td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "")
)
if file == "pan-xdr":
if "xdr" not in version:
continue
version = version.removesuffix("-(cortex-xdr-agent)")
version = version.removesuffix("-(vm-series-only)")
version = version.removesuffix("-(panorama-only)")
if len(td_list) > 1 and version != "":
# Date formats differ between different products
try:
month, date, year = td_list[1].get_text().split("/")
abs_date = f"{year}-{month:0>2}-{date:0>2}"
except Exception:
# A few dates have 1st, 2nd, 4th etc. Fix that:
d = td_list[1].get_text()
d = re.sub(r'(\w+) (\d{1,2})(?:\w{2}), (\d{4})', r'\1 \2, \3', d)
date = datetime.datetime.strptime(d, "%B %d, %Y")
abs_date = date.strftime("%Y-%m-%d")
versions[version] = abs_date
print(f"::group::{html_identifier}")
response = endoflife.fetch_url(URL)
soup = BeautifulSoup(response, features="html5lib")
table = soup.find(id=html_identifier)
for tr in table.findAll("tr")[3:]:
td_list = tr.findAll("td")
version = (
td_list[0].get_text().strip().lower().replace(" ", "-").replace("*", "")
)
if file == "pan-xdr":
if "xdr" not in version:
continue
version = version.removesuffix("-(cortex-xdr-agent)")
version = version.removesuffix("-(vm-series-only)")
version = version.removesuffix("-(panorama-only)")
if len(td_list) > 1 and version != "":
# Date formats differ between different products
try:
month, date, year = td_list[1].get_text().split("/")
abs_date = f"{year}-{month:0>2}-{date:0>2}"
except Exception:
# A few dates have 1st, 2nd, 4th etc. Fix that:
d = td_list[1].get_text()
d = re.sub(r'(\w+) (\d{1,2})(?:\w{2}), (\d{4})', r'\1 \2, \3', d)
date = datetime.datetime.strptime(d, "%B %d, %Y")
abs_date = date.strftime("%Y-%m-%d")
versions[version] = abs_date
print("%s: %s" % (version, abs_date))
print("::endgroup::")
with open("releases/%s.json" % file, "w") as f:
f.write(json.dumps(versions, indent=2))

View File

@@ -1,6 +1,6 @@
import urllib.request
import datetime
import json
from common import endoflife
PHP_MAJOR_VERSIONS = [4, 5, 7, 8]
@@ -17,13 +17,13 @@ def parse_date(date_str):
def fetch_versions(major_version):
url = f"https://www.php.net/releases/index.php?json&max=-1&version={major_version}"
with urllib.request.urlopen(url, data=None, timeout=5) as response:
data = json.loads(response.read())
for v in data:
data[v] = parse_date(data[v]["date"])
print(f"{v}: {data[v]}")
response = endoflife.fetch_url(url)
data = json.loads(response)
for v in data:
data[v] = parse_date(data[v]["date"])
print(f"{v}: {data[v]}")
return data
return data
with open("releases/php.json", "w") as f:

View File

@@ -1,16 +1,15 @@
import json
from datetime import datetime
import urllib.request
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
URL = "https://docs.plesk.com/release-notes/obsidian/change-log"
PRODUCT = "plesk"
def make_bs_request(url):
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=5) as response:
return BeautifulSoup(response.read(), features="html5lib")
response = endoflife.fetch_url(url)
return BeautifulSoup(response, features="html5lib")
# Only 18.0.20.3 and later will be picked up :

View File

@@ -1,9 +1,8 @@
import json
import re
import sys
import json
import urllib.request
from datetime import datetime
from common import endoflife
from datetime import datetime
METHOD = "pypi"
DEFAULT_TAG_TEMPLATE = ( # Same as used in Ruby (update.rb)
@@ -19,18 +18,18 @@ def fetch_releases(pypi_id, regex):
regex = [regex]
url = "https://pypi.org/pypi/%s/json" % pypi_id
with urllib.request.urlopen(url, data=None, timeout=5) as response:
data = json.loads(response.read().decode("utf-8"))
for version in data["releases"]:
R = data["releases"][version]
matches = False
for r in regex:
if re.match(r, version):
matches = True
if matches and R:
d = datetime.fromisoformat(R[0]["upload_time"]).strftime("%Y-%m-%d")
releases[version] = d
print("%s: %s" % (version, d))
response = endoflife.fetch_url(url)
data = json.loads(response)
for version in data["releases"]:
R = data["releases"][version]
matches = False
for r in regex:
if re.match(r, version):
matches = True
if matches and R:
d = datetime.fromisoformat(R[0]["upload_time"]).strftime("%Y-%m-%d")
releases[version] = d
print("%s: %s" % (version, d))
return releases

View File

@@ -1,8 +1,8 @@
import re
import urllib.request
from bs4 import BeautifulSoup
from datetime import datetime
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
from datetime import datetime
dbs = {
"mysql": "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MySQL.Concepts.VersionMgmt.html",
@@ -18,24 +18,23 @@ for db, url in dbs.items():
print(f"::group::{db}")
releases = {}
with urllib.request.urlopen(url, data=None, timeout=5) as contents:
html = contents.read().decode("utf-8")
soup = BeautifulSoup(html, features="html5lib")
response = endoflife.fetch_url(url)
soup = BeautifulSoup(response, features="html5lib")
for table in soup.find_all("table"):
for row in table.find_all("tr"):
columns = row.find_all("td")
for table in soup.find_all("table"):
for row in table.find_all("tr"):
columns = row.find_all("td")
# Must match both the 'Supported XXX minor versions' and
# 'Supported XXX major versions' to have correct release dates
if len(columns) > 3:
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
if m:
version = m.group("v")
date = parse_date(columns[2].text.strip())
print(f"{version} : {date}")
releases[version] = date
# Must match both the 'Supported XXX minor versions' and
# 'Supported XXX major versions' to have correct release dates
if len(columns) > 3:
r = r"(?P<v>\d+(?:\.\d+)*)" # https://regex101.com/r/BY1vwV/1
m = re.search(r, columns[0].text.strip(), flags=re.IGNORECASE)
if m:
version = m.group("v")
date = parse_date(columns[2].text.strip())
print(f"{version} : {date}")
releases[version] = date
print("::endgroup::")
with open(f"releases/amazon-rds-{db.lower()}.json", "w") as f:

View File

@@ -1,29 +1,31 @@
import json
import urllib.request
from bs4 import BeautifulSoup
import re
from bs4 import BeautifulSoup
from common import endoflife
URL = "https://access.redhat.com/articles/3078"
# https://regex101.com/r/877ibq/1
regex = r"RHEL (?P<major>\d)(\. ?(?P<minor>\d+))?(( Update (?P<minor2>\d))| GA)?"
versions = {}
headers = {"user-agent": "mozilla"}
req = urllib.request.Request(URL, headers=headers)
with urllib.request.urlopen(req, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
for tr in soup.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) > 0:
version = td_list[0].get_text()
m = re.match(regex, version.strip()).groupdict()
version = m["major"]
if m["minor"]:
version += ".%s" % m["minor"]
if m["minor2"]:
version += ".%s" % m["minor2"]
versions[version] = td_list[1].get_text()
print("::group::rhel")
response = endoflife.fetch_url(URL)
soup = BeautifulSoup(response, features="html5lib")
for tr in soup.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) > 0:
version = td_list[0].get_text()
m = re.match(regex, version.strip()).groupdict()
version = m["major"]
if m["minor"]:
version += ".%s" % m["minor"]
if m["minor2"]:
version += ".%s" % m["minor2"]
date = td_list[1].get_text()
versions[version] = date
print("%s: %s" % (version, date))
print("::endgroup::")
with open("releases/redhat.json", "w") as f:
f.write(json.dumps(versions, indent=2))

View File

@@ -1,8 +1,8 @@
import json
import urllib.request
import datetime
from bs4 import BeautifulSoup
import json
import re
from bs4 import BeautifulSoup
from common import endoflife
URL = "https://wiki.ros.org/Distributions"
# https://regex101.com/r/c1ribd/1
@@ -10,27 +10,29 @@ regex = r"^ROS (?P<name>(\w| )+)"
versions = {}
with urllib.request.urlopen(URL, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
for tr in soup.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) > 0:
version = td_list[0].get_text()
print("::group::ros")
response = endoflife.fetch_url(URL)
soup = BeautifulSoup(response, features="html5lib")
for tr in soup.findAll("tr"):
td_list = tr.findAll("td")
if len(td_list) > 0:
version = td_list[0].get_text()
m = re.match(regex, version.strip())
if m:
version = td_list[0].findAll("a")[0]["href"][1:]
try:
date = datetime.datetime.strptime(
td_list[1].get_text().strip(), "%B %d, %Y"
)
# The date is a suffix (May 23rd, 2020)
except Exception as e:
x = td_list[1].get_text().split(",")
date = datetime.datetime.strptime(x[0][:-2] + x[1], "%B %d %Y")
abs_date = date.strftime("%Y-%m-%d")
versions[version] = abs_date
print("%s: %s" % (version, abs_date))
m = re.match(regex, version.strip())
if m:
version = td_list[0].findAll("a")[0]["href"][1:]
try:
date = datetime.datetime.strptime(
td_list[1].get_text().strip(), "%B %d, %Y"
)
# The date is a suffix (May 23rd, 2020)
except Exception as e:
x = td_list[1].get_text().split(",")
date = datetime.datetime.strptime(x[0][:-2] + x[1], "%B %d %Y")
abs_date = date.strftime("%Y-%m-%d")
versions[version] = abs_date
print("%s: %s" % (version, abs_date))
print("::endgroup::")
with open("releases/ros.json", "w") as f:
f.write(json.dumps(versions, indent=2))

View File

@@ -1,24 +1,26 @@
import mwparserfromhell
import json
import mwparserfromhell
import re
import urllib.request
from common import endoflife
URL = "https://www.unrealircd.org/docwiki/index.php?title=History_of_UnrealIRCd_releases&action=raw"
REGEX = r"^(?:(\d+\.(?:\d+\.)*\d+))$"
versions = {}
with urllib.request.urlopen(URL) as response:
text = response.read()
wikicode = mwparserfromhell.parse(text)
for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")
if len(items) >= 2:
maybe_version = items[0].__strip__()
if re.match(REGEX, maybe_version):
maybe_date = items[1].__strip__()
if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date):
versions[maybe_version] = maybe_date
print("::group::unrealircd")
response = endoflife.fetch_url(URL)
wikicode = mwparserfromhell.parse(response)
versions = {}
for tr in wikicode.ifilter_tags(matches=lambda node: node.tag == "tr"):
items = tr.contents.filter_tags(matches=lambda node: node.tag == "td")
if len(items) >= 2:
maybe_version = items[0].__strip__()
if re.match(REGEX, maybe_version):
maybe_date = items[1].__strip__()
if re.match(r"\d{4}-\d{2}-\d{2}", maybe_date):
versions[maybe_version] = maybe_date
print("%s: %s" % (maybe_version, maybe_date))
print("::endgroup::")
with open("releases/unrealircd.json", "w") as f:
f.write(json.dumps(versions, indent=2))