Use request instead of urllib for HTTP requests

It's simpler to use, and provides better retry capabilities.
This commit is contained in:
Marc Wrobel
2023-11-24 23:41:07 +01:00
parent 552b129cf9
commit f0324372d9
2 changed files with 9 additions and 16 deletions

View File

@@ -13,5 +13,6 @@ six==1.16.0
soupsieve==2.5 soupsieve==2.5
typing_extensions==4.8.0 typing_extensions==4.8.0
webencodings==0.5.1 webencodings==0.5.1
requests==2.31.0
requests-html==0.10.0 requests-html==0.10.0
regex==2023.10.3 regex==2023.10.3

View File

@@ -1,6 +1,8 @@
import json import json
import frontmatter import frontmatter
import urllib.request from requests import Session
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
from glob import glob from glob import glob
from os import path from os import path
@@ -39,22 +41,12 @@ def list_products(method, products_filter=None, pathname="website/products"):
# Keep the default timeout high enough to avoid errors with web.archive.org. # Keep the default timeout high enough to avoid errors with web.archive.org.
def fetch_url(url, retry_count=5, timeout=30, data=None, headers=None, encoding='utf-8'): def fetch_url(url, retry_count=5, timeout=30, data=None, headers=None):
last_exception = None
headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers
request = urllib.request.Request(url, headers=headers) with Session() as s:
s.mount('https://', HTTPAdapter(max_retries=Retry(total=retry_count, backoff_factor=0.2)))
for retry in range(0, retry_count): r = s.get(url, headers=headers, data=data, timeout=timeout)
try: return r.text
resp = urllib.request.urlopen(request, data=data, timeout=timeout)
return resp.read().decode(encoding)
except Exception as e:
last_exception = e
print(f"Fetch of {url} failed (retry={retry}), got: " + str(e))
continue
raise last_exception
def write_releases(product, releases, pathname="releases"): def write_releases(product, releases, pathname="releases"):