diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index dcc42227..d81f3056 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -44,11 +44,23 @@ class _4chanThreadExtractor(Extractor): post.update(data) post["extension"] = post["ext"][1:] post["filename"] = text.unescape(post["filename"]) + post["_http_signature"] = _detect_null_byte url = (f"https://i.4cdn.org" f"/{post['board']}/{post['tim']}{post['ext']}") yield Message.Url, url, post +def _detect_null_byte(signature): + """Return False if all file signature bytes are null""" + if signature: + if signature[0]: + return True + for byte in signature: + if byte: + return True + return "File data consists of null bytes" + + class _4chanBoardExtractor(Extractor): """Extractor for 4chan boards""" category = "4chan" diff --git a/test/results/4chan.py b/test/results/4chan.py index 231928ac..2453bb23 100644 --- a/test/results/4chan.py +++ b/test/results/4chan.py @@ -26,6 +26,16 @@ __tests__ = ( "#sha1_metadata": "2cadd32796492baca25f5060dc95e9f4e24a0ff2", }, +{ + "#url" : "https://boards.4chan.org/wg/thread/8010591", + "#comment" : "file contents filled with null bytes (#7883)", + "#class" : _4chan._4chanThreadExtractor, + "#range" : "1", + "#log" : "File data consists of null bytes", + "#results" : "https://i.4cdn.org/wg/1694023485631944.jpg", + "#sha1_content": "da39a3ee5e6b4b0d3255bfef95601890afd80709", +}, + { "#url" : "https://boards.4channel.org/po/", "#category": ("", "4chan", "board"),