Use request instead of urllib for HTTP requests
It's simpler to use, and provides better retry capabilities.
This commit is contained in:
@@ -13,5 +13,6 @@ six==1.16.0
|
|||||||
soupsieve==2.5
|
soupsieve==2.5
|
||||||
typing_extensions==4.8.0
|
typing_extensions==4.8.0
|
||||||
webencodings==0.5.1
|
webencodings==0.5.1
|
||||||
|
requests==2.31.0
|
||||||
requests-html==0.10.0
|
requests-html==0.10.0
|
||||||
regex==2023.10.3
|
regex==2023.10.3
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import json
|
import json
|
||||||
import frontmatter
|
import frontmatter
|
||||||
import urllib.request
|
from requests import Session
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util import Retry
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
@@ -39,22 +41,12 @@ def list_products(method, products_filter=None, pathname="website/products"):
|
|||||||
|
|
||||||
|
|
||||||
# Keep the default timeout high enough to avoid errors with web.archive.org.
|
# Keep the default timeout high enough to avoid errors with web.archive.org.
|
||||||
def fetch_url(url, retry_count=5, timeout=30, data=None, headers=None, encoding='utf-8'):
|
def fetch_url(url, retry_count=5, timeout=30, data=None, headers=None):
|
||||||
last_exception = None
|
|
||||||
|
|
||||||
headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers
|
headers = {'User-Agent': USER_AGENT} | {} if headers is None else headers
|
||||||
request = urllib.request.Request(url, headers=headers)
|
with Session() as s:
|
||||||
|
s.mount('https://', HTTPAdapter(max_retries=Retry(total=retry_count, backoff_factor=0.2)))
|
||||||
for retry in range(0, retry_count):
|
r = s.get(url, headers=headers, data=data, timeout=timeout)
|
||||||
try:
|
return r.text
|
||||||
resp = urllib.request.urlopen(request, data=data, timeout=timeout)
|
|
||||||
return resp.read().decode(encoding)
|
|
||||||
except Exception as e:
|
|
||||||
last_exception = e
|
|
||||||
print(f"Fetch of {url} failed (retry={retry}), got: " + str(e))
|
|
||||||
continue
|
|
||||||
|
|
||||||
raise last_exception
|
|
||||||
|
|
||||||
|
|
||||||
def write_releases(product, releases, pathname="releases"):
|
def write_releases(product, releases, pathname="releases"):
|
||||||
|
|||||||
Reference in New Issue
Block a user