Add some basic logging (#209)

Add some basic logging when using common.git or common.http.
2023-12-09 23:19:18 +01:00
parent 192ceb2da3
commit d80504aea8
3 changed files with 19 additions and 6 deletions
--- a/src/common/endoflife.py
+++ b/src/common/endoflife.py
@@ -1,7 +1,9 @@
-import json
 import frontmatter
+import json
+import logging
 from glob import glob
 from os import path
+logging.basicConfig(format=logging.BASIC_FORMAT, level=logging.INFO)

 # Handle versions having at least 2 digits (ex. 1.2) and at most 4 digits (ex. 1.2.3.4), with an optional leading "v".
 # Major version must be >= 1.
--- a/src/common/git.py
+++ b/src/common/git.py
@@ -1,3 +1,4 @@
+import logging
 from hashlib import sha1
 from pathlib import Path
 from subprocess import run
@@ -15,6 +16,7 @@ class Git:
        """Run git command and return command result as a list of lines.
        """
        try:
+            logging.info(f"Running 'git {cmd}' on {self.url}")
            child = run(f"git {cmd}", capture_output=True, timeout=300, check=True, shell=True, cwd=self.repo_dir)
            return child.stdout.decode("utf-8").strip().split("\n")
        except ChildProcessError as ex:
@@ -54,7 +56,7 @@ class Git:

        return [line.split("\t")[1][11:] for line in lines if "\t" in line]

-    def checkout(self, branch: str, file_list=None):
+    def checkout(self, branch: str, file_list: list[str] = None):
        """Checks out a branch
        If `file_list` is given, sparse-checkout is used to save bandwidth
        and only download the given files
--- a/src/common/http.py
+++ b/src/common/http.py
@@ -1,3 +1,4 @@
+import logging
 from concurrent.futures import as_completed
 from requests import Response
 from requests.adapters import HTTPAdapter
@@ -9,7 +10,10 @@ from urllib3.util import Retry
 USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0'


-def fetch_urls(urls, data=None, headers=None, max_retries=10, backoff_factor=0.5, timeout=30) -> list[Response]:
+def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None,
+               max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]:
+    logging.info(f"Fetching {urls}")
+
    try:
        with FuturesSession() as session:
            adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor))
@@ -18,16 +22,21 @@ def fetch_urls(urls, data=None, headers=None, max_retries=10, backoff_factor=0.5

            headers = {'User-Agent': USER_AGENT} | ({} if headers is None else headers)
            futures = [session.get(url, headers=headers, data=data, timeout=timeout, stream=None) for url in urls]
-            return [future.result() for future in as_completed(futures)]
+            results = [future.result() for future in as_completed(futures)]
+
+            logging.info(f"Fetched {urls}")
+            return results
    except ChunkedEncodingError as e:  # See https://github.com/psf/requests/issues/4771#issue-354077499
        next_max_retries = max_retries - 1
        if next_max_retries == 0:
+            logging.error(f"Got ChunkedEncodingError while fetching {urls} ({e}), giving up")
            raise e  # So that the function does not get stuck in an infinite loop.
        else:
            # We could wait a bit before retrying, but it's not clear if it would help.
-            print(f"Got ChunkedEncodingError while fetching {urls} ({e}), retrying (remaining retries = {next_max_retries}).")
+            logging.warning(f"Got ChunkedEncodingError while fetching {urls} ({e}), retrying (remaining retries = {next_max_retries}).")
            return fetch_urls(urls, data, headers, next_max_retries, backoff_factor, timeout)


-def fetch_url(url, data=None, headers=None, max_retries=5, backoff_factor=0.5, timeout=30) -> Response:
+def fetch_url(url, data: any = None, headers: dict[str, str] = None,
+              max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Response:
    return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0]