update HashJob to generate hashes for downloaded content

This commit is contained in:
Mike Fährmann
2015-12-21 22:49:04 +01:00
parent ecc6542fc8
commit 565ea042c9
2 changed files with 21 additions and 2 deletions

View File

@@ -174,14 +174,27 @@ class UrlJob(Job):
class HashJob(DownloadJob):
"""Generate SHA1 hashes for extractor results"""
def __init__(self, url):
class HashIO():
def __init__(self, hashobj):
self.hashobj = hashobj
def write(self, content):
self.hashobj.update(content)
def __init__(self, url, content=False):
DownloadJob.__init__(self, url)
self.content = content
self.hash_url = hashlib.sha1()
self.hash_keyword = hashlib.sha1()
self.hash_content = hashlib.sha1()
if content:
self.fileobj = self.HashIO(self.hash_content)
def download(self, msg):
self.update_url(msg[1])
self.update_keyword(msg[2])
self.update_content(msg[1])
def set_directory(self, msg):
self.update_keyword(msg[1])
@@ -196,3 +209,7 @@ class HashJob(DownloadJob):
self.hash_keyword.update(
json.dumps(kwdict, sort_keys=True).encode()
)
def update_content(self, url):
if self.content:
self.get_downloader(url).download(url, self.fileobj)

View File

@@ -23,12 +23,14 @@ class TestExtractors(unittest.TestCase):
self.run_test(url, result)
def run_test(self, url, result):
hjob = jobs.HashJob(url)
hjob = jobs.HashJob(url, "content" in result)
hjob.run()
if "url" in result:
self.assertEqual(hjob.hash_url.hexdigest(), result["url"])
if "keyword" in result:
self.assertEqual(hjob.hash_keyword.hexdigest(), result["keyword"])
if "content" in result:
self.assertEqual(hjob.hash_content.hexdigest(), result["content"])
if __name__ == '__main__':
unittest.main(warnings='ignore')