From 5962146a1681f43af56a9239f6b026ee65d58431 Mon Sep 17 00:00:00 2001 From: Nemo Date: Sun, 10 Jul 2022 15:20:48 +0530 Subject: [PATCH] [distrowatch] Distrowatch update script --- .github/workflows/update.yml | 10 +++--- src/distowatch.py | 61 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 src/distowatch.py diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 7c77a045..11bf6c30 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -39,15 +39,15 @@ jobs: with: ruby-version: 3.1 bundler-cache: true - - name: Update Release data + - name: Setup Release Script run: | git config --global init.defaultBranch main git config --global extensions.partialClone true - pip install -r requirements.txt - for i in src/*.py; do python $i;done - - bundle exec ruby update.rb ./website ~/.cache releases + - name: Custom Updates + run: for i in src/*.py; do python $i;done + - name: Git and DockerHub Updates + run: bundle exec ruby update.rb ./website ~/.cache releases id: update_releases - uses: stefanzweifel/git-auto-commit-action@v4 name: Commit and update new releases diff --git a/src/distowatch.py b/src/distowatch.py new file mode 100644 index 00000000..3d2c570d --- /dev/null +++ b/src/distowatch.py @@ -0,0 +1,61 @@ +from glob import glob +import os +import re +import json +import frontmatter +import urllib.request +from bs4 import BeautifulSoup +from html.parser import HTMLParser +from liquid import Template + +# Same as used in Ruby (update.rb) +DEFAULT_TAG_TEMPLATE = "{{major}}{% if minor %}.{{minor}}{% if patch %}.{{patch}}{%endif%}{%endif%}" + +def get_versions_from_headline(regex, headline, template): + if not isinstance(regex, list): + regex = [regex] + for r in regex: + matches = re.match(r.strip(), headline) + if matches: + match_data = matches.groupdict() + version_string = template.render(**match_data) + return version_string.split("\n") + + return {} + +def fetch_releases(distrowatch_id, regex, template): + releases = {} + l_template = Template(template) + url = "https://distrowatch.com/index.php?distribution=%s" % distrowatch_id + with urllib.request.urlopen(url, data=None, timeout=5) as response: + soup = BeautifulSoup(response, features="html5lib") + for table in soup.select("td.News1>table.News"): + headline = table.select_one("td.NewsHeadline a[href]").get_text().strip() + date = table.select_one("td.NewsDate").get_text() + for v in get_versions_from_headline(regex, headline, l_template): + print("%s: %s" % (v, date)) + releases[v] = date + return releases + +def update_releases(): + for product_file in glob("website/products/*.md"): + product_name = os.path.splitext(os.path.basename(product_file))[0] + with open(product_file, "r") as f: + data = frontmatter.load(f) + if "auto" in data: + for config in data["auto"]: + for key, d_id in config.items(): + if key == "distrowatch": + update_product(product_name, config) + +def update_product(product_name, config): + t = config.get("template", DEFAULT_TAG_TEMPLATE) + if "regex" in config: + print("::group::%s" % product_name) + r = fetch_releases(config['distrowatch'], config["regex"], t) + with open("releases/%s.json" % product_name, "w") as f: + f.write(json.dumps(r, indent=2)) + print("::endgroup::") + +if __name__ == "__main__": + update_releases()