update HashJob to generate hashes for downloaded content
This commit is contained in:
@@ -174,14 +174,27 @@ class UrlJob(Job):
|
|||||||
class HashJob(DownloadJob):
|
class HashJob(DownloadJob):
|
||||||
"""Generate SHA1 hashes for extractor results"""
|
"""Generate SHA1 hashes for extractor results"""
|
||||||
|
|
||||||
def __init__(self, url):
|
class HashIO():
|
||||||
|
|
||||||
|
def __init__(self, hashobj):
|
||||||
|
self.hashobj = hashobj
|
||||||
|
|
||||||
|
def write(self, content):
|
||||||
|
self.hashobj.update(content)
|
||||||
|
|
||||||
|
def __init__(self, url, content=False):
|
||||||
DownloadJob.__init__(self, url)
|
DownloadJob.__init__(self, url)
|
||||||
|
self.content = content
|
||||||
self.hash_url = hashlib.sha1()
|
self.hash_url = hashlib.sha1()
|
||||||
self.hash_keyword = hashlib.sha1()
|
self.hash_keyword = hashlib.sha1()
|
||||||
|
self.hash_content = hashlib.sha1()
|
||||||
|
if content:
|
||||||
|
self.fileobj = self.HashIO(self.hash_content)
|
||||||
|
|
||||||
def download(self, msg):
|
def download(self, msg):
|
||||||
self.update_url(msg[1])
|
self.update_url(msg[1])
|
||||||
self.update_keyword(msg[2])
|
self.update_keyword(msg[2])
|
||||||
|
self.update_content(msg[1])
|
||||||
|
|
||||||
def set_directory(self, msg):
|
def set_directory(self, msg):
|
||||||
self.update_keyword(msg[1])
|
self.update_keyword(msg[1])
|
||||||
@@ -196,3 +209,7 @@ class HashJob(DownloadJob):
|
|||||||
self.hash_keyword.update(
|
self.hash_keyword.update(
|
||||||
json.dumps(kwdict, sort_keys=True).encode()
|
json.dumps(kwdict, sort_keys=True).encode()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def update_content(self, url):
|
||||||
|
if self.content:
|
||||||
|
self.get_downloader(url).download(url, self.fileobj)
|
||||||
|
|||||||
@@ -23,12 +23,14 @@ class TestExtractors(unittest.TestCase):
|
|||||||
self.run_test(url, result)
|
self.run_test(url, result)
|
||||||
|
|
||||||
def run_test(self, url, result):
|
def run_test(self, url, result):
|
||||||
hjob = jobs.HashJob(url)
|
hjob = jobs.HashJob(url, "content" in result)
|
||||||
hjob.run()
|
hjob.run()
|
||||||
if "url" in result:
|
if "url" in result:
|
||||||
self.assertEqual(hjob.hash_url.hexdigest(), result["url"])
|
self.assertEqual(hjob.hash_url.hexdigest(), result["url"])
|
||||||
if "keyword" in result:
|
if "keyword" in result:
|
||||||
self.assertEqual(hjob.hash_keyword.hexdigest(), result["keyword"])
|
self.assertEqual(hjob.hash_keyword.hexdigest(), result["keyword"])
|
||||||
|
if "content" in result:
|
||||||
|
self.assertEqual(hjob.hash_content.hexdigest(), result["content"])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main(warnings='ignore')
|
unittest.main(warnings='ignore')
|
||||||
|
|||||||
Reference in New Issue
Block a user