[downloader:http] refactor file signature checks
use functions/lambdas instead of startswith()
This commit is contained in:
@@ -189,7 +189,7 @@ class HttpDownloader(DownloaderBase):
|
|||||||
|
|
||||||
# check filename extension against file header
|
# check filename extension against file header
|
||||||
if adjust_extension and not offset and \
|
if adjust_extension and not offset and \
|
||||||
pathfmt.extension in FILE_SIGNATURES:
|
pathfmt.extension in SIGNATURE_CHECKS:
|
||||||
try:
|
try:
|
||||||
file_header = next(
|
file_header = next(
|
||||||
content if response.raw.chunked
|
content if response.raw.chunked
|
||||||
@@ -220,7 +220,7 @@ class HttpDownloader(DownloaderBase):
|
|||||||
offset += len(file_header)
|
offset += len(file_header)
|
||||||
elif offset:
|
elif offset:
|
||||||
if adjust_extension and \
|
if adjust_extension and \
|
||||||
pathfmt.extension in FILE_SIGNATURES:
|
pathfmt.extension in SIGNATURE_CHECKS:
|
||||||
self._adjust_extension(pathfmt, fp.read(16))
|
self._adjust_extension(pathfmt, fp.read(16))
|
||||||
fp.seek(offset)
|
fp.seek(offset)
|
||||||
|
|
||||||
@@ -308,10 +308,9 @@ class HttpDownloader(DownloaderBase):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _adjust_extension(pathfmt, file_header):
|
def _adjust_extension(pathfmt, file_header):
|
||||||
"""Check filename extension against file header"""
|
"""Check filename extension against file header"""
|
||||||
sig = FILE_SIGNATURES[pathfmt.extension]
|
if not SIGNATURE_CHECKS[pathfmt.extension](file_header):
|
||||||
if not file_header.startswith(sig):
|
for ext, check in SIGNATURE_CHECKS.items():
|
||||||
for ext, sig in FILE_SIGNATURES.items():
|
if check(file_header):
|
||||||
if file_header.startswith(sig):
|
|
||||||
pathfmt.set_extension(ext)
|
pathfmt.set_extension(ext)
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@@ -362,27 +361,30 @@ MIME_TYPES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# https://en.wikipedia.org/wiki/List_of_file_signatures
|
# https://en.wikipedia.org/wiki/List_of_file_signatures
|
||||||
FILE_SIGNATURES = {
|
SIGNATURE_CHECKS = {
|
||||||
"jpg" : b"\xFF\xD8\xFF",
|
"jpg" : lambda s: s[0:3] == b"\xFF\xD8\xFF",
|
||||||
"png" : b"\x89PNG\r\n\x1A\n",
|
"png" : lambda s: s[0:8] == b"\x89PNG\r\n\x1A\n",
|
||||||
"gif" : (b"GIF87a", b"GIF89a"),
|
"gif" : lambda s: s[0:6] in (b"GIF87a", b"GIF89a"),
|
||||||
"bmp" : b"BM",
|
"bmp" : lambda s: s[0:2] == b"BM",
|
||||||
"webp": b"RIFF",
|
"webp": lambda s: (s[0:4] == b"RIFF" and
|
||||||
"svg" : b"<?xml",
|
s[8:12] == b"WEBP"),
|
||||||
"ico" : b"\x00\x00\x01\x00",
|
"svg" : lambda s: s[0:5] == b"<?xml",
|
||||||
"cur" : b"\x00\x00\x02\x00",
|
"ico" : lambda s: s[0:4] == b"\x00\x00\x01\x00",
|
||||||
"psd" : b"8BPS",
|
"cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
|
||||||
"webm": b"\x1A\x45\xDF\xA3",
|
"psd" : lambda s: s[0:4] == b"8BPS",
|
||||||
"ogg" : b"OggS",
|
"webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
|
||||||
"wav" : b"RIFF",
|
"ogg" : lambda s: s[0:4] == b"OggS",
|
||||||
"mp3" : (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2", b"ID3"),
|
"wav" : lambda s: (s[0:4] == b"RIFF" and
|
||||||
"zip" : (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
|
s[8:12] == b"WAVE"),
|
||||||
"rar" : b"\x52\x61\x72\x21\x1A\x07",
|
"mp3" : lambda s: (s[0:3] == b"ID3" or
|
||||||
"7z" : b"\x37\x7A\xBC\xAF\x27\x1C",
|
s[0:2] in (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2")),
|
||||||
"pdf" : b"%PDF-",
|
"zip" : lambda s: s[0:4] in (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
|
||||||
"swf" : (b"CWS", b"FWS"),
|
"rar" : lambda s: s[0:6] == b"\x52\x61\x72\x21\x1A\x07",
|
||||||
|
"7z" : lambda s: s[0:6] == b"\x37\x7A\xBC\xAF\x27\x1C",
|
||||||
|
"pdf" : lambda s: s[0:5] == b"%PDF-",
|
||||||
|
"swf" : lambda s: s[0:3] in (b"CWS", b"FWS"),
|
||||||
# check 'bin' files against all other file signatures
|
# check 'bin' files against all other file signatures
|
||||||
"bin" : b"\x00\x00\x00\x00\x00\x00\x00\x00",
|
"bin" : lambda s: False,
|
||||||
}
|
}
|
||||||
|
|
||||||
__downloader__ = HttpDownloader
|
__downloader__ = HttpDownloader
|
||||||
|
|||||||
Reference in New Issue
Block a user