[text] reject long filename extensions (#8491)

fixes regression introduced in 3252ead7c7
ref bc868e7bb8
This commit is contained in:
Mike Fährmann
2025-11-01 10:34:11 +01:00
parent 3848a912dc
commit 37aa7337dc
3 changed files with 16 additions and 1 deletions

View File

@@ -125,7 +125,7 @@ def nameext_from_name(filename, data=None):
data = {}
name, _, ext = filename.rpartition(".")
if name:
if name and len(ext) <= 16:
data["filename"] = name
data["extension"] = ext.lower()
else:

View File

@@ -549,6 +549,17 @@ __tests__ = (
"username" : util.NONE,
},
{
"#url" : "https://kemono.cr/patreon/user/2570882/post/79311665",
"#comment" : "patreon file URL as 'name' / long 'extension' (#8491)",
"#category": ("", "kemono", "patreon"),
"#class" : kemono.KemonoPostExtractor,
"name" : "https://www.patreon.com/media-u/Z0FBQUFBQmpfWFNLWHpRakFlYjVNeWpuTlRuRnJBdHY3VVA2UmRhVHFpOFBHMW9QZUdVOHQ3b2pXSV9XMkJlaHFuN2JyVk5VNDBqdV9lZVRLR2NkUXUwSjgwdndDQlk3VzBCUXI5TW5iejlVWVZaUmJoTktIX3B5aGVCS3dUQk11a2hxajd4TUx2MFN2UHpKa0pfOWZQeS1UeDlzNEhpbG9pRzJsZE54MG5OcnZDOUllTGhyY01rNjVRaGgyaVFycjFSUUFIaV92OU9wdktuVjlMeFJNLXhYejdDNWZTVXZEc2l0TVZCR1A0YXM3RVMzbmsxSjh2ND0=#190833153_",
"filename" : "https://www.patreon.com/media-u/Z0FBQUFBQmpfWFNLWHpRakFlYjVNeWpuTlRuRnJBdHY3VVA2UmRhVHFpOFBHMW9QZUdVOHQ3b2pXSV9XMkJlaHFuN2JyVk5VNDBqdV9lZVRLR2NkUXUwSjgwdndDQlk3VzBCUXI5TW5iejlVWVZaUmJoTktIX3B5aGVCS3dUQk11a2hxajd4TUx2MFN2UHpKa0pfOWZQeS1UeDlzNEhpbG9pRzJsZE54MG5OcnZDOUllTGhyY01rNjVRaGgyaVFycjFSUUFIaV92OU9wdktuVjlMeFJNLXhYejdDNWZTVXZEc2l0TVZCR1A0YXM3RVMzbmsxSjh2ND0=#190833153_",
"extension": "jpg",
},
{
"#url" : "https://kemono.cr/discord/server/488668827274444803/608504710906904576",
"#category": ("", "kemono", "discord"),

View File

@@ -228,6 +228,10 @@ class TestText(unittest.TestCase):
{"filename": "foo%202?bar&<>", "extension": "ext"},
)
# long "extension"
fn = "httpswww.example.orgpath-path-path-path-path-path-path-path"
self.assertEqual(f(fn), {"filename": fn, "extension": ""})
def test_extract(self, f=text.extract):
txt = "<a><b>"
self.assertEqual(f(txt, "<", ">"), ("a" , 3))