[4chan] detect files containing only null bytes (#7883)
This commit is contained in:
@@ -44,11 +44,23 @@ class _4chanThreadExtractor(Extractor):
|
||||
post.update(data)
|
||||
post["extension"] = post["ext"][1:]
|
||||
post["filename"] = text.unescape(post["filename"])
|
||||
post["_http_signature"] = _detect_null_byte
|
||||
url = (f"https://i.4cdn.org"
|
||||
f"/{post['board']}/{post['tim']}{post['ext']}")
|
||||
yield Message.Url, url, post
|
||||
|
||||
|
||||
def _detect_null_byte(signature):
|
||||
"""Return False if all file signature bytes are null"""
|
||||
if signature:
|
||||
if signature[0]:
|
||||
return True
|
||||
for byte in signature:
|
||||
if byte:
|
||||
return True
|
||||
return "File data consists of null bytes"
|
||||
|
||||
|
||||
class _4chanBoardExtractor(Extractor):
|
||||
"""Extractor for 4chan boards"""
|
||||
category = "4chan"
|
||||
|
||||
@@ -26,6 +26,16 @@ __tests__ = (
|
||||
"#sha1_metadata": "2cadd32796492baca25f5060dc95e9f4e24a0ff2",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://boards.4chan.org/wg/thread/8010591",
|
||||
"#comment" : "file contents filled with null bytes (#7883)",
|
||||
"#class" : _4chan._4chanThreadExtractor,
|
||||
"#range" : "1",
|
||||
"#log" : "File data consists of null bytes",
|
||||
"#results" : "https://i.4cdn.org/wg/1694023485631944.jpg",
|
||||
"#sha1_content": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://boards.4channel.org/po/",
|
||||
"#category": ("", "4chan", "board"),
|
||||
|
||||
Reference in New Issue
Block a user