[xenforo] improve 'attachment' extraction (#8947)

This commit is contained in:
Mike Fährmann
2026-01-28 11:47:38 +01:00
parent aa8610c11c
commit d9917ec630
4 changed files with 42 additions and 11 deletions

View File

@@ -46,7 +46,9 @@ class XenforoExtractor(BaseExtractor):
for post in self.posts():
urls = extract_urls(post["content"])
if post["attachments"]:
urls.extend(extract_urls(post["attachments"]))
for att in text.extract_iter(
post["attachments"], "<li", "</li>"):
urls.append((None, att[att.find('href="')+6:], None, None))
data = {"post": post}
post["count"] = data["count"] = len(urls)
@@ -93,7 +95,8 @@ class XenforoExtractor(BaseExtractor):
continue
else:
id_last = id
if alt := text.extr(inline, 'alt="', '"'):
if alt := (text.extr(inline, 'alt="', '"') or
text.extr(inline, 'title="', '"')):
text.nameext_from_name(alt, data)
if not data["extension"]:
data["extension"] = name.rpartition("-")[2]