[patreon] improve hash extraction (#693, #713)

Instead of accessing a specific part of a download URL, potentially
causing an exception if it doesn't exist, we're now searching through
all parts for a potential MD5 hash without ever raising an exception.
This commit is contained in:
Mike Fährmann
2020-04-28 21:40:22 +02:00
parent c56a751dae
commit 46b9a4d8ff

View File

@@ -47,8 +47,8 @@ class PatreonExtractor(Extractor):
self._attachments(post),
self._content(post),
):
fhash = url.split("/")[9].partition("?")[0]
if fhash not in hashes:
fhash = self._filehash(url)
if fhash not in hashes or not fhash:
hashes.add(fhash)
post["hash"] = fhash
post["type"] = kind
@@ -158,11 +158,22 @@ class PatreonExtractor(Extractor):
return attr
def _filename(self, url):
"""Fetch filename from its Content-Disposition header"""
"""Fetch filename from an URL's Content-Disposition header"""
response = self.request(url, method="HEAD", fatal=False)
cd = response.headers.get("Content-Disposition")
return text.extract(cd, 'filename="', '"')[0]
@staticmethod
def _filehash(url):
"""Extract MD5 hash from a download URL"""
parts = url.partition("?")[0].split("/")
parts.reverse()
for part in parts:
if len(part) == 32:
return part
return ""
@staticmethod
def _build_url(endpoint, query):
return (