From 86860715ab6f1a2fdd925bdc21b630f8b281ea2d Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Sat, 30 Dec 2023 00:07:22 +0100 Subject: [PATCH] Rewrite update.py to Python (#263) Reasons were: - one language make it easier for maintenance, - workflow simplification, - not stopping when a script fails anymore (it's better to have a partial update than nothing), - use `GITHUB_OUTPUT` instead of the deprecated `set-output` command, - display a workflow summary with statistics about scripts and information about updated products. The commit message is not as good as it used to be, but it makes the diff process agnostic from the file format (will be needed soon when it will change), and it handles versions updates / removal, which was not supported by the previous script. --- .github/dependabot.yml | 11 ---- .github/workflows/update.yml | 25 ++++---- Gemfile | 5 -- Gemfile.lock | 13 ---- HACKING.md | 3 - requirements.txt | 1 + update.py | 114 +++++++++++++++++++++++++++++++++++ update.rb | 37 ------------ 8 files changed, 127 insertions(+), 82 deletions(-) delete mode 100644 Gemfile delete mode 100644 Gemfile.lock delete mode 100644 HACKING.md create mode 100644 update.py delete mode 100644 update.rb diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5af34223..0b845d3b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,6 +1,5 @@ version: 2 updates: - - package-ecosystem: "github-actions" directory: "/" schedule: @@ -10,13 +9,3 @@ updates: directory: "/" schedule: interval: "monthly" - - - package-ecosystem: "npm" - directory: "/" - schedule: - interval: "monthly" - - - package-ecosystem: "bundler" - directory: "/" - schedule: - interval: "monthly" diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 8af507b8..c5cc0f58 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -55,29 +55,28 @@ jobs: path: website submodules: false - - uses: ruby/setup-ruby@v1 - with: - ruby-version: '3.1' - bundler-cache: true - - uses: actions/setup-python@v4 with: python-version: '3.11' cache: 'pip' - run: pip install -r requirements.txt - - name: Custom Updates + - name: Update data + id: update_data env: PYPPETEER_HOME: /home/runner/.cache/pyppeteer # Add chromium downloaded by pyppeteer to the cache. GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: for i in src/*.py; do python $i;done + continue-on-error: true # commit even if the data was not fully updated + run: python update.py - - name: Build commit message - id: update_releases - run: bundle exec ruby update.rb - - - name: Commit and update new releases + - name: Commit changes uses: stefanzweifel/git-auto-commit-action@v5 + if: steps.update_data.outputs.commit_message != '' with: - commit_message: ${{ fromJSON(steps.update_releases.outputs.commit_message)}} + commit_message: ${{ steps.update_data.outputs.commit_message }} commit_author: 'github-actions[bot] ' + + # we still want to easily know if something went wrong + - name: Set job status + if: steps.update_data.outcome != 'success' + run: exit 1 diff --git a/Gemfile b/Gemfile deleted file mode 100644 index 6e005a1a..00000000 --- a/Gemfile +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -source "https://rubygems.org" - -gem "rugged", "~> 1.5.1" diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index 3916f0ac..00000000 --- a/Gemfile.lock +++ /dev/null @@ -1,13 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - rugged (1.5.1) - -PLATFORMS - x86_64-linux - -DEPENDENCIES - rugged (~> 1.5.1) - -BUNDLED WITH - 2.3.5 diff --git a/HACKING.md b/HACKING.md deleted file mode 100644 index 77439670..00000000 --- a/HACKING.md +++ /dev/null @@ -1,3 +0,0 @@ -# Hacking on Release Data - -TODO diff --git a/requirements.txt b/requirements.txt index c25424f8..7ebd47ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ beautifulsoup4==4.12.2 # used by a lot of script to parse html +deepdiff==6.7.1 # used in update.py html5lib==1.1 # used in conjunction with beautifulsoup4 mwparserfromhell==0.6.5 # used in unrealircd.py packaging==23.2 # used in latest.py diff --git a/update.py b/update.py new file mode 100644 index 00000000..75faf823 --- /dev/null +++ b/update.py @@ -0,0 +1,114 @@ +import json +import logging +import os +import subprocess +import sys +import time +from base64 import b64encode +from deepdiff import DeepDiff +from pathlib import Path + + +def github_output(name, value): + if "GITHUB_OUTPUT" not in os.environ: + logging.debug(f"GITHUB_OUTPUT does not exist, but would have written: {name}={value.strip()}") + return + + if "\n" in value: + # https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#multiline-strings + delimiter = b64encode(os.urandom(16)).decode() + value = f"{delimiter}\n{value}\n{delimiter}" + command = f"{name}<<{value}" + else: + command = f"{name}={value}" + + with open(os.environ["GITHUB_OUTPUT"], 'a') as github_output_var: + print(command, file=github_output_var) + logging.debug(f"Wrote to GITHUB_OUTPUT: {name}={value.strip()}") + + +def add_summary_line(line): + if "GITHUB_STEP_SUMMARY" not in os.environ: + logging.debug(f"GITHUB_STEP_SUMMARY does not exist, but would have written: {line}") + return + + with open(os.environ["GITHUB_STEP_SUMMARY"], 'a') as github_step_summary: + print(line, file=github_step_summary) + + +SRC_DIR = 'src' +DATA_DIR = 'releases' + +logging.basicConfig(format=logging.BASIC_FORMAT, level=logging.INFO) + +# Run scripts +scripts = sorted([os.path.join(SRC_DIR, file) for file in os.listdir(SRC_DIR) if file.endswith('.py')]) +some_script_failed = False + +add_summary_line("## Script execution summary\n") +add_summary_line("| Name | Duration | Succeeded |") +add_summary_line("|------|----------|-----------|") +for script in scripts: + logging.info(f"start running {script}") + + start = time.perf_counter() + child = subprocess.run([sys.executable, script], timeout=300) + elapsed_seconds = time.perf_counter() - start + + if child.returncode != 0: + some_script_failed = True + add_summary_line(f"| {script} | {elapsed_seconds:.2f}s | ❌ |") + logging.error(f"Error while running {script} after {elapsed_seconds:.2f}s, update will only be partial") + else: + logging.info(f"Finished running {script}, took {elapsed_seconds:.2f}s") + add_summary_line(f"| {script} | {elapsed_seconds:.2f}s | ✅ |") + +# Generate commit message +subprocess.run('git add --all', timeout=10, check=True, shell=True) # to also get new files in git diff +git_diff = subprocess.run('git diff --name-only --staged', capture_output=True, timeout=10, check=True, shell=True) +updated_files = sorted([Path(file) for file in git_diff.stdout.decode('utf-8').split('\n') if file.startswith(DATA_DIR)]) +logging.info(f"Updated files: {updated_files}") + +add_summary_line("## Update summary\n") +if updated_files: + # get modified files content + new_files_content = {} + for path in updated_files: + with open(path) as file: + new_files_content[path] = json.load(file) + + # get original files content + old_files_content = {} + subprocess.run('git stash --all --quiet', timeout=10, check=True, shell=True) + for path in updated_files: + if path.exists(): + with open(path) as file: + old_files_content[path] = json.load(file) + else: # new file + old_files_content[path] = {} + subprocess.run('git stash pop --quiet', timeout=10, check=True, shell=True) + + # Generate commit message + product_names = ', '.join([path.stem for path in updated_files]) + commit_message = f"🤖: {product_names}\n\n" + add_summary_line(f"Updated {len(updated_files)} products: {product_names}.") + + for path in updated_files: + add_summary_line(f"### {path.stem}\n") + commit_message += f"{path.stem}:\n" + + diff = DeepDiff(old_files_content[path], new_files_content[path], ignore_order=True) + for line in diff.pretty().split('\n'): + add_summary_line(f"- {line}") + commit_message += f"- {line}\n" + logging.info(f"{path.stem}: {line}") + + commit_message += "\n" + add_summary_line("") + + github_output('commit_message', commit_message) + +else: + add_summary_line("No update") + +sys.exit(1 if some_script_failed else 0) diff --git a/update.rb b/update.rb deleted file mode 100644 index 0b5e631d..00000000 --- a/update.rb +++ /dev/null @@ -1,37 +0,0 @@ -require 'set' -require 'json' -require 'rugged' - -def generate_commit_message - begin - products = Set.new - ret = nil - msg = "" - - r = Rugged::Repository.new '.' - r.status() do |f, s| - p = Pathname.new(f).dirname - if p.to_s === 'releases' - ret = true - product = File.basename(f, '.json') - products << product - old_version_list = JSON.parse(r.blob_at(r.head.target.oid, f).content).keys.to_set - new_version_list = JSON.parse(File.read(f)).keys.to_set - new_versions = (new_version_list - old_version_list) - msg += "#{product}: #{new_versions.join(', ')}\n" - end - end - - commit_title = products.join(', ') - return ret ? "🤖: #{commit_title}\n\n#{msg}": "" - - rescue StandardError => e - return "🤖: Automatic Update" - end -end - -def github_actions_step_output(msg) - puts "::set-output name=commit_message::#{JSON.dump(msg)}" -end - -github_actions_step_output(generate_commit_message)