[xenforo] improve 'attachment' extraction (#8947)

This commit is contained in:
Mike Fährmann
2026-01-28 11:47:38 +01:00
parent aa8610c11c
commit d9917ec630
4 changed files with 42 additions and 11 deletions

View File

@@ -46,7 +46,9 @@ class XenforoExtractor(BaseExtractor):
for post in self.posts(): for post in self.posts():
urls = extract_urls(post["content"]) urls = extract_urls(post["content"])
if post["attachments"]: if post["attachments"]:
urls.extend(extract_urls(post["attachments"])) for att in text.extract_iter(
post["attachments"], "<li", "</li>"):
urls.append((None, att[att.find('href="')+6:], None, None))
data = {"post": post} data = {"post": post}
post["count"] = data["count"] = len(urls) post["count"] = data["count"] = len(urls)
@@ -93,7 +95,8 @@ class XenforoExtractor(BaseExtractor):
continue continue
else: else:
id_last = id id_last = id
if alt := text.extr(inline, 'alt="', '"'): if alt := (text.extr(inline, 'alt="', '"') or
text.extr(inline, 'title="', '"')):
text.nameext_from_name(alt, data) text.nameext_from_name(alt, data)
if not data["extension"]: if not data["extension"]:
data["extension"] = name.rpartition("-")[2] data["extension"] = name.rpartition("-")[2]

View File

@@ -37,9 +37,9 @@ __tests__ = (
"content" : str, "content" : str,
}, },
"thread" : { "thread" : {
"author" : "Iomflj", "author" : str,
"author_id" : "", "author_id" : "",
"author_slug": "iomflj", "author_slug": str,
"author_url" : "", "author_url" : "",
"date" : "dt:2024-01-29 19:56:27", "date" : "dt:2024-01-29 19:56:27",
"id" : "84947", "id" : "84947",
@@ -75,9 +75,9 @@ __tests__ = (
"content" : str "content" : str
}, },
"thread" : { "thread" : {
"author" : "Iomflj", "author" : str,
"author_id" : "", "author_id" : "",
"author_slug": "iomflj", "author_slug": str,
"author_url" : "", "author_url" : "",
"date" : "dt:2024-01-29 19:56:27", "date" : "dt:2024-01-29 19:56:27",
"id" : "84947", "id" : "84947",

View File

@@ -68,10 +68,10 @@ __tests__ = (
"#auth" : True, "#auth" : True,
"#results" : ( "#results" : (
"https://jpg6.su/img/NNFssUg", "https://jpg6.su/img/NNFssUg",
"https://turbovid.cr/embed/nPy1kG3w55V", "https://turbo.cr/embed/nPy1kG3w55V",
"https://turbovid.cr/embed/c0KhPjU4-F3", "https://turbo.cr/embed/c0KhPjU4-F3",
"https://turbovid.cr/embed/sZWnVZ_mQsV", "https://turbo.cr/embed/sZWnVZ_mQsV",
"https://turbovid.cr/embed/MEBiLx6DETQ", "https://turbo.cr/embed/MEBiLx6DETQ",
), ),
}, },
@@ -245,7 +245,7 @@ __tests__ = (
"#category": ("xenforo", "simpcity", "thread"), "#category": ("xenforo", "simpcity", "thread"),
"#class" : xenforo.XenforoThreadExtractor, "#class" : xenforo.XenforoThreadExtractor,
"#auth" : True, "#auth" : True,
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbovid.cr/embed", "#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbo(vid)?.cr/embed",
"#count" : range(100, 300), "#count" : range(100, 300),
"count" : int, "count" : int,

View File

@@ -60,6 +60,34 @@ __tests__ = (
}, },
}, },
{
"#url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/post-3443278",
"#comment" : "video attachment (#8947)",
"#category": ("xenforo", "titsintops", "post"),
"#class" : xenforo.XenforoPostExtractor,
"#auth" : True,
"#results" : (
"https://chaturbate.com/sofi_zeus/",
"https://titsintops.com/phpBB2/data/video/6436/6436292-e23925aebd8cd253097bbee0f884cf52.mp4?hash=wbvMEhEJZC",
"https://titsintops.com/phpBB2/attachments/sofi-webp.6512452/",
),
"thread" : {
"author" : "monsieurmoose",
"author_id" : "1358306",
"author_slug": "monsieurmoose",
"author_url" : "https://titsintops.com/phpBB2/members/monsieurmoose.1358306/",
"date" : "dt:2026-01-16 16:34:59",
"id" : "13586747",
"posts" : int,
"section" : "Busty Amateurs",
"tags" : (),
"title" : "sofi_zeus - sexy curvy big titty camgirl",
"url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/",
"views" : int,
},
},
{ {
"#url" : "https://titsintops.com/phpBB2/threads/mia-big-titty-boston-blonde.13575039/", "#url" : "https://titsintops.com/phpBB2/threads/mia-big-titty-boston-blonde.13575039/",
"#category": ("xenforo", "titsintops", "thread"), "#category": ("xenforo", "titsintops", "thread"),