[distrowatch] Distrowatch update script

This commit is contained in:
Nemo
2022-07-10 15:20:48 +05:30
parent 31abee5c21
commit 5962146a16
2 changed files with 66 additions and 5 deletions

View File

@@ -39,15 +39,15 @@ jobs:
with:
ruby-version: 3.1
bundler-cache: true
- name: Update Release data
- name: Setup Release Script
run: |
git config --global init.defaultBranch main
git config --global extensions.partialClone true
pip install -r requirements.txt
for i in src/*.py; do python $i;done
bundle exec ruby update.rb ./website ~/.cache releases
- name: Custom Updates
run: for i in src/*.py; do python $i;done
- name: Git and DockerHub Updates
run: bundle exec ruby update.rb ./website ~/.cache releases
id: update_releases
- uses: stefanzweifel/git-auto-commit-action@v4
name: Commit and update new releases

61
src/distowatch.py Normal file
View File

@@ -0,0 +1,61 @@
from glob import glob
import os
import re
import json
import frontmatter
import urllib.request
from bs4 import BeautifulSoup
from html.parser import HTMLParser
from liquid import Template
# Same as used in Ruby (update.rb)
DEFAULT_TAG_TEMPLATE = "{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%endif%}{%endif%}"
def get_versions_from_headline(regex, headline, template):
if not isinstance(regex, list):
regex = [regex]
for r in regex:
matches = re.match(r.strip(), headline)
if matches:
match_data = matches.groupdict()
version_string = template.render(**match_data)
return version_string.split("\n")
return {}
def fetch_releases(distrowatch_id, regex, template):
releases = {}
l_template = Template(template)
url = "https://distrowatch.com/index.php?distribution=%s" % distrowatch_id
with urllib.request.urlopen(url, data=None, timeout=5) as response:
soup = BeautifulSoup(response, features="html5lib")
for table in soup.select("td.News1>table.News"):
headline = table.select_one("td.NewsHeadline a[href]").get_text().strip()
date = table.select_one("td.NewsDate").get_text()
for v in get_versions_from_headline(regex, headline, l_template):
print("%s: %s" % (v, date))
releases[v] = date
return releases
def update_releases():
for product_file in glob("website/products/*.md"):
product_name = os.path.splitext(os.path.basename(product_file))[0]
with open(product_file, "r") as f:
data = frontmatter.load(f)
if "auto" in data:
for config in data["auto"]:
for key, d_id in config.items():
if key == "distrowatch":
update_product(product_name, config)
def update_product(product_name, config):
t = config.get("template", DEFAULT_TAG_TEMPLATE)
if "regex" in config:
print("::group::%s" % product_name)
r = fetch_releases(config['distrowatch'], config["regex"], t)
with open("releases/%s.json" % product_name, "w") as f:
f.write(json.dumps(r, indent=2))
print("::endgroup::")
if __name__ == "__main__":
update_releases()