[xenforo] improve 'attachment' extraction (#8947)
This commit is contained in:
@@ -46,7 +46,9 @@ class XenforoExtractor(BaseExtractor):
|
||||
for post in self.posts():
|
||||
urls = extract_urls(post["content"])
|
||||
if post["attachments"]:
|
||||
urls.extend(extract_urls(post["attachments"]))
|
||||
for att in text.extract_iter(
|
||||
post["attachments"], "<li", "</li>"):
|
||||
urls.append((None, att[att.find('href="')+6:], None, None))
|
||||
|
||||
data = {"post": post}
|
||||
post["count"] = data["count"] = len(urls)
|
||||
@@ -93,7 +95,8 @@ class XenforoExtractor(BaseExtractor):
|
||||
continue
|
||||
else:
|
||||
id_last = id
|
||||
if alt := text.extr(inline, 'alt="', '"'):
|
||||
if alt := (text.extr(inline, 'alt="', '"') or
|
||||
text.extr(inline, 'title="', '"')):
|
||||
text.nameext_from_name(alt, data)
|
||||
if not data["extension"]:
|
||||
data["extension"] = name.rpartition("-")[2]
|
||||
|
||||
@@ -37,9 +37,9 @@ __tests__ = (
|
||||
"content" : str,
|
||||
},
|
||||
"thread" : {
|
||||
"author" : "Iomflj",
|
||||
"author" : str,
|
||||
"author_id" : "",
|
||||
"author_slug": "iomflj",
|
||||
"author_slug": str,
|
||||
"author_url" : "",
|
||||
"date" : "dt:2024-01-29 19:56:27",
|
||||
"id" : "84947",
|
||||
@@ -75,9 +75,9 @@ __tests__ = (
|
||||
"content" : str
|
||||
},
|
||||
"thread" : {
|
||||
"author" : "Iomflj",
|
||||
"author" : str,
|
||||
"author_id" : "",
|
||||
"author_slug": "iomflj",
|
||||
"author_slug": str,
|
||||
"author_url" : "",
|
||||
"date" : "dt:2024-01-29 19:56:27",
|
||||
"id" : "84947",
|
||||
|
||||
@@ -68,10 +68,10 @@ __tests__ = (
|
||||
"#auth" : True,
|
||||
"#results" : (
|
||||
"https://jpg6.su/img/NNFssUg",
|
||||
"https://turbovid.cr/embed/nPy1kG3w55V",
|
||||
"https://turbovid.cr/embed/c0KhPjU4-F3",
|
||||
"https://turbovid.cr/embed/sZWnVZ_mQsV",
|
||||
"https://turbovid.cr/embed/MEBiLx6DETQ",
|
||||
"https://turbo.cr/embed/nPy1kG3w55V",
|
||||
"https://turbo.cr/embed/c0KhPjU4-F3",
|
||||
"https://turbo.cr/embed/sZWnVZ_mQsV",
|
||||
"https://turbo.cr/embed/MEBiLx6DETQ",
|
||||
),
|
||||
},
|
||||
|
||||
@@ -245,7 +245,7 @@ __tests__ = (
|
||||
"#category": ("xenforo", "simpcity", "thread"),
|
||||
"#class" : xenforo.XenforoThreadExtractor,
|
||||
"#auth" : True,
|
||||
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbovid.cr/embed",
|
||||
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbo(vid)?.cr/embed",
|
||||
"#count" : range(100, 300),
|
||||
|
||||
"count" : int,
|
||||
|
||||
@@ -60,6 +60,34 @@ __tests__ = (
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/post-3443278",
|
||||
"#comment" : "video attachment (#8947)",
|
||||
"#category": ("xenforo", "titsintops", "post"),
|
||||
"#class" : xenforo.XenforoPostExtractor,
|
||||
"#auth" : True,
|
||||
"#results" : (
|
||||
"https://chaturbate.com/sofi_zeus/",
|
||||
"https://titsintops.com/phpBB2/data/video/6436/6436292-e23925aebd8cd253097bbee0f884cf52.mp4?hash=wbvMEhEJZC",
|
||||
"https://titsintops.com/phpBB2/attachments/sofi-webp.6512452/",
|
||||
),
|
||||
|
||||
"thread" : {
|
||||
"author" : "monsieurmoose",
|
||||
"author_id" : "1358306",
|
||||
"author_slug": "monsieurmoose",
|
||||
"author_url" : "https://titsintops.com/phpBB2/members/monsieurmoose.1358306/",
|
||||
"date" : "dt:2026-01-16 16:34:59",
|
||||
"id" : "13586747",
|
||||
"posts" : int,
|
||||
"section" : "Busty Amateurs",
|
||||
"tags" : (),
|
||||
"title" : "sofi_zeus - sexy curvy big titty camgirl",
|
||||
"url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/",
|
||||
"views" : int,
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://titsintops.com/phpBB2/threads/mia-big-titty-boston-blonde.13575039/",
|
||||
"#category": ("xenforo", "titsintops", "thread"),
|
||||
|
||||
Reference in New Issue
Block a user