Add some basic logging (#209)

Add some basic logging when using common.git or common.http.
This commit is contained in:
Marc Wrobel
2023-12-09 23:19:18 +01:00
committed by GitHub
parent 192ceb2da3
commit d80504aea8
3 changed files with 19 additions and 6 deletions

View File

@@ -1,7 +1,9 @@
import json
import frontmatter
import json
import logging
from glob import glob
from os import path
logging.basicConfig(format=logging.BASIC_FORMAT, level=logging.INFO)
# Handle versions having at least 2 digits (ex. 1.2) and at most 4 digits (ex. 1.2.3.4), with an optional leading "v".
# Major version must be >= 1.

View File

@@ -1,3 +1,4 @@
import logging
from hashlib import sha1
from pathlib import Path
from subprocess import run
@@ -15,6 +16,7 @@ class Git:
"""Run git command and return command result as a list of lines.
"""
try:
logging.info(f"Running 'git {cmd}' on {self.url}")
child = run(f"git {cmd}", capture_output=True, timeout=300, check=True, shell=True, cwd=self.repo_dir)
return child.stdout.decode("utf-8").strip().split("\n")
except ChildProcessError as ex:
@@ -54,7 +56,7 @@ class Git:
return [line.split("\t")[1][11:] for line in lines if "\t" in line]
def checkout(self, branch: str, file_list=None):
def checkout(self, branch: str, file_list: list[str] = None):
"""Checks out a branch
If `file_list` is given, sparse-checkout is used to save bandwidth
and only download the given files

View File

@@ -1,3 +1,4 @@
import logging
from concurrent.futures import as_completed
from requests import Response
from requests.adapters import HTTPAdapter
@@ -9,7 +10,10 @@ from urllib3.util import Retry
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0'
def fetch_urls(urls, data=None, headers=None, max_retries=10, backoff_factor=0.5, timeout=30) -> list[Response]:
def fetch_urls(urls: list[str], data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> list[Response]:
logging.info(f"Fetching {urls}")
try:
with FuturesSession() as session:
adapter = HTTPAdapter(max_retries=Retry(total=max_retries, backoff_factor=backoff_factor))
@@ -18,16 +22,21 @@ def fetch_urls(urls, data=None, headers=None, max_retries=10, backoff_factor=0.5
headers = {'User-Agent': USER_AGENT} | ({} if headers is None else headers)
futures = [session.get(url, headers=headers, data=data, timeout=timeout, stream=None) for url in urls]
return [future.result() for future in as_completed(futures)]
results = [future.result() for future in as_completed(futures)]
logging.info(f"Fetched {urls}")
return results
except ChunkedEncodingError as e: # See https://github.com/psf/requests/issues/4771#issue-354077499
next_max_retries = max_retries - 1
if next_max_retries == 0:
logging.error(f"Got ChunkedEncodingError while fetching {urls} ({e}), giving up")
raise e # So that the function does not get stuck in an infinite loop.
else:
# We could wait a bit before retrying, but it's not clear if it would help.
print(f"Got ChunkedEncodingError while fetching {urls} ({e}), retrying (remaining retries = {next_max_retries}).")
logging.warning(f"Got ChunkedEncodingError while fetching {urls} ({e}), retrying (remaining retries = {next_max_retries}).")
return fetch_urls(urls, data, headers, next_max_retries, backoff_factor, timeout)
def fetch_url(url, data=None, headers=None, max_retries=5, backoff_factor=0.5, timeout=30) -> Response:
def fetch_url(url, data: any = None, headers: dict[str, str] = None,
max_retries: int = 10, backoff_factor: float = 0.5, timeout: int = 30) -> Response:
return fetch_urls([url], data, headers, max_retries, backoff_factor, timeout)[0]