[bellazon] improve 'filename' & 'extension' (#8544)

https://github.com/mikf/gallery-dl/issues/8544#issuecomment-3522617104

- use 'extension' from file URL (except attachments)
- strip everything after the first '.' from non-URL filenames
This commit is contained in:
Mike Fährmann
2025-11-12 18:38:44 +01:00
parent da47214ca0
commit d7e1351987
2 changed files with 19 additions and 4 deletions

View File

@@ -62,11 +62,12 @@ class BellazonExtractor(Extractor):
data["num_internal"] += 1
if not (alt := text.extr(info, ' alt="', '"')) or (
alt.startswith("post-") and "_thumb." in alt):
name = url
dc = text.nameext_from_url(url, data.copy())
else:
name = text.unescape(alt)
dc = data.copy()
dc["name"] = name = text.unescape(alt)
dc["filename"] = name.partition(".")[0]
dc = text.nameext_from_url(name, data.copy())
dc["id"] = text.extr(info, 'data-fileid="', '"')
if ext := text.extr(info, 'data-fileext="', '"'):
dc["extension"] = ext
@@ -75,7 +76,10 @@ class BellazonExtractor(Extractor):
dc["id"] = \
url.rpartition("?id=")[2].partition("&")[0]
if name := text.extr(info, ">", "<").strip():
text.nameext_from_url(name, dc)
dc["name"] = name = text.unescape(name)
text.nameext_from_name(name, dc)
else:
dc["extension"] = text.ext_from_url(url)
if url[0] == "/":
url = f"https:{url}"

View File

@@ -237,6 +237,17 @@ __tests__ = (
"id" : "14418097",
},
{
"#url" : "https://www.bellazon.com/main/topic/4322-candids/page/1066/#comment-3956772",
"#comment" : "weird/wrong 'filename' & 'extension' (#8544)",
"#class" : bellazon.BellazonPostExtractor,
"#count" : 16,
"extension" : "jpg",
"filename" : r"re:^[^.]+$",
"id" : r"re:^\d+$",
},
{
"#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/",
"#class" : bellazon.BellazonThreadExtractor,