Instead of accessing a specific part of a download URL, potentially causing an exception if it doesn't exist, we're now searching through all parts for a potential MD5 hash without ever raising an exception.
This commit is contained in:
@@ -47,8 +47,8 @@ class PatreonExtractor(Extractor):
|
|||||||
self._attachments(post),
|
self._attachments(post),
|
||||||
self._content(post),
|
self._content(post),
|
||||||
):
|
):
|
||||||
fhash = url.split("/")[9].partition("?")[0]
|
fhash = self._filehash(url)
|
||||||
if fhash not in hashes:
|
if fhash not in hashes or not fhash:
|
||||||
hashes.add(fhash)
|
hashes.add(fhash)
|
||||||
post["hash"] = fhash
|
post["hash"] = fhash
|
||||||
post["type"] = kind
|
post["type"] = kind
|
||||||
@@ -158,11 +158,22 @@ class PatreonExtractor(Extractor):
|
|||||||
return attr
|
return attr
|
||||||
|
|
||||||
def _filename(self, url):
|
def _filename(self, url):
|
||||||
"""Fetch filename from its Content-Disposition header"""
|
"""Fetch filename from an URL's Content-Disposition header"""
|
||||||
response = self.request(url, method="HEAD", fatal=False)
|
response = self.request(url, method="HEAD", fatal=False)
|
||||||
cd = response.headers.get("Content-Disposition")
|
cd = response.headers.get("Content-Disposition")
|
||||||
return text.extract(cd, 'filename="', '"')[0]
|
return text.extract(cd, 'filename="', '"')[0]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _filehash(url):
|
||||||
|
"""Extract MD5 hash from a download URL"""
|
||||||
|
parts = url.partition("?")[0].split("/")
|
||||||
|
parts.reverse()
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
if len(part) == 32:
|
||||||
|
return part
|
||||||
|
return ""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_url(endpoint, query):
|
def _build_url(endpoint, query):
|
||||||
return (
|
return (
|
||||||
|
|||||||
Reference in New Issue
Block a user