[xenforo] improve 'attachment' extraction (#8947)

This commit is contained in:
Mike Fährmann
2026-01-28 11:47:38 +01:00
parent aa8610c11c
commit d9917ec630
4 changed files with 42 additions and 11 deletions

View File

@@ -46,7 +46,9 @@ class XenforoExtractor(BaseExtractor):
for post in self.posts():
urls = extract_urls(post["content"])
if post["attachments"]:
urls.extend(extract_urls(post["attachments"]))
for att in text.extract_iter(
post["attachments"], "<li", "</li>"):
urls.append((None, att[att.find('href="')+6:], None, None))
data = {"post": post}
post["count"] = data["count"] = len(urls)
@@ -93,7 +95,8 @@ class XenforoExtractor(BaseExtractor):
continue
else:
id_last = id
if alt := text.extr(inline, 'alt="', '"'):
if alt := (text.extr(inline, 'alt="', '"') or
text.extr(inline, 'title="', '"')):
text.nameext_from_name(alt, data)
if not data["extension"]:
data["extension"] = name.rpartition("-")[2]

View File

@@ -37,9 +37,9 @@ __tests__ = (
"content" : str,
},
"thread" : {
"author" : "Iomflj",
"author" : str,
"author_id" : "",
"author_slug": "iomflj",
"author_slug": str,
"author_url" : "",
"date" : "dt:2024-01-29 19:56:27",
"id" : "84947",
@@ -75,9 +75,9 @@ __tests__ = (
"content" : str
},
"thread" : {
"author" : "Iomflj",
"author" : str,
"author_id" : "",
"author_slug": "iomflj",
"author_slug": str,
"author_url" : "",
"date" : "dt:2024-01-29 19:56:27",
"id" : "84947",

View File

@@ -68,10 +68,10 @@ __tests__ = (
"#auth" : True,
"#results" : (
"https://jpg6.su/img/NNFssUg",
"https://turbovid.cr/embed/nPy1kG3w55V",
"https://turbovid.cr/embed/c0KhPjU4-F3",
"https://turbovid.cr/embed/sZWnVZ_mQsV",
"https://turbovid.cr/embed/MEBiLx6DETQ",
"https://turbo.cr/embed/nPy1kG3w55V",
"https://turbo.cr/embed/c0KhPjU4-F3",
"https://turbo.cr/embed/sZWnVZ_mQsV",
"https://turbo.cr/embed/MEBiLx6DETQ",
),
},
@@ -245,7 +245,7 @@ __tests__ = (
"#category": ("xenforo", "simpcity", "thread"),
"#class" : xenforo.XenforoThreadExtractor,
"#auth" : True,
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbovid.cr/embed",
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbo(vid)?.cr/embed",
"#count" : range(100, 300),
"count" : int,

View File

@@ -60,6 +60,34 @@ __tests__ = (
},
},
{
"#url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/post-3443278",
"#comment" : "video attachment (#8947)",
"#category": ("xenforo", "titsintops", "post"),
"#class" : xenforo.XenforoPostExtractor,
"#auth" : True,
"#results" : (
"https://chaturbate.com/sofi_zeus/",
"https://titsintops.com/phpBB2/data/video/6436/6436292-e23925aebd8cd253097bbee0f884cf52.mp4?hash=wbvMEhEJZC",
"https://titsintops.com/phpBB2/attachments/sofi-webp.6512452/",
),
"thread" : {
"author" : "monsieurmoose",
"author_id" : "1358306",
"author_slug": "monsieurmoose",
"author_url" : "https://titsintops.com/phpBB2/members/monsieurmoose.1358306/",
"date" : "dt:2026-01-16 16:34:59",
"id" : "13586747",
"posts" : int,
"section" : "Busty Amateurs",
"tags" : (),
"title" : "sofi_zeus - sexy curvy big titty camgirl",
"url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/",
"views" : int,
},
},
{
"#url" : "https://titsintops.com/phpBB2/threads/mia-big-titty-boston-blonde.13575039/",
"#category": ("xenforo", "titsintops", "thread"),