Use request instead of urllib for HTTP requests

It's simpler to use, and provides better retry capabilities.
2023-11-24 23:41:07 +01:00
parent 552b129cf9
commit f0324372d9
2 changed files with 9 additions and 16 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,5 +13,6 @@ six==1.16.0
 soupsieve==2.5
 typing_extensions==4.8.0
 webencodings==0.5.1
+requests==2.31.0
 requests-html==0.10.0
 regex==2023.10.3
--- a/src/common/endoflife.py
+++ b/src/common/endoflife.py
@@ -1,6 +1,8 @@
 import json
 import frontmatter
-import urllib.request
+from requests import Session
+from requests.adapters import HTTPAdapter
+from urllib3.util import Retry
 from glob import glob
 from os import path

@@ -39,22 +41,12 @@ def list_products(method, products_filter=None, pathname="website/products"):


 # Keep the default timeout high enough to avoid errors with web.archive.org.
-def fetch_url(url, retry_count=5, timeout=30, data=None, headers=None, encoding='utf-8'):
-    last_exception = None
-
+def fetch_url(url, retry_count=5, timeout=30, data=None, headers=None):
    headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers
-    request = urllib.request.Request(url, headers=headers)
-
-    for retry in range(0, retry_count):
-        try:
-            resp = urllib.request.urlopen(request, data=data, timeout=timeout)
-            return resp.read().decode(encoding)
-        except Exception as e:
-            last_exception = e
-            print(f"Fetch of {url} failed (retry={retry}), got: " + str(e))
-            continue
-
-    raise last_exception
+    with Session() as s:
+        s.mount('https://', HTTPAdapter(max_retries=Retry(total=retry_count, backoff_factor=0.2)))
+        r = s.get(url, headers=headers, data=data, timeout=timeout)
+        return r.text


 def write_releases(product, releases, pathname="releases"):