[xenforo] improve 'attachment' extraction (#8947)
This commit is contained in:
@@ -46,7 +46,9 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
urls = extract_urls(post["content"])
|
urls = extract_urls(post["content"])
|
||||||
if post["attachments"]:
|
if post["attachments"]:
|
||||||
urls.extend(extract_urls(post["attachments"]))
|
for att in text.extract_iter(
|
||||||
|
post["attachments"], "<li", "</li>"):
|
||||||
|
urls.append((None, att[att.find('href="')+6:], None, None))
|
||||||
|
|
||||||
data = {"post": post}
|
data = {"post": post}
|
||||||
post["count"] = data["count"] = len(urls)
|
post["count"] = data["count"] = len(urls)
|
||||||
@@ -93,7 +95,8 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
id_last = id
|
id_last = id
|
||||||
if alt := text.extr(inline, 'alt="', '"'):
|
if alt := (text.extr(inline, 'alt="', '"') or
|
||||||
|
text.extr(inline, 'title="', '"')):
|
||||||
text.nameext_from_name(alt, data)
|
text.nameext_from_name(alt, data)
|
||||||
if not data["extension"]:
|
if not data["extension"]:
|
||||||
data["extension"] = name.rpartition("-")[2]
|
data["extension"] = name.rpartition("-")[2]
|
||||||
|
|||||||
@@ -37,9 +37,9 @@ __tests__ = (
|
|||||||
"content" : str,
|
"content" : str,
|
||||||
},
|
},
|
||||||
"thread" : {
|
"thread" : {
|
||||||
"author" : "Iomflj",
|
"author" : str,
|
||||||
"author_id" : "",
|
"author_id" : "",
|
||||||
"author_slug": "iomflj",
|
"author_slug": str,
|
||||||
"author_url" : "",
|
"author_url" : "",
|
||||||
"date" : "dt:2024-01-29 19:56:27",
|
"date" : "dt:2024-01-29 19:56:27",
|
||||||
"id" : "84947",
|
"id" : "84947",
|
||||||
@@ -75,9 +75,9 @@ __tests__ = (
|
|||||||
"content" : str
|
"content" : str
|
||||||
},
|
},
|
||||||
"thread" : {
|
"thread" : {
|
||||||
"author" : "Iomflj",
|
"author" : str,
|
||||||
"author_id" : "",
|
"author_id" : "",
|
||||||
"author_slug": "iomflj",
|
"author_slug": str,
|
||||||
"author_url" : "",
|
"author_url" : "",
|
||||||
"date" : "dt:2024-01-29 19:56:27",
|
"date" : "dt:2024-01-29 19:56:27",
|
||||||
"id" : "84947",
|
"id" : "84947",
|
||||||
|
|||||||
@@ -68,10 +68,10 @@ __tests__ = (
|
|||||||
"#auth" : True,
|
"#auth" : True,
|
||||||
"#results" : (
|
"#results" : (
|
||||||
"https://jpg6.su/img/NNFssUg",
|
"https://jpg6.su/img/NNFssUg",
|
||||||
"https://turbovid.cr/embed/nPy1kG3w55V",
|
"https://turbo.cr/embed/nPy1kG3w55V",
|
||||||
"https://turbovid.cr/embed/c0KhPjU4-F3",
|
"https://turbo.cr/embed/c0KhPjU4-F3",
|
||||||
"https://turbovid.cr/embed/sZWnVZ_mQsV",
|
"https://turbo.cr/embed/sZWnVZ_mQsV",
|
||||||
"https://turbovid.cr/embed/MEBiLx6DETQ",
|
"https://turbo.cr/embed/MEBiLx6DETQ",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -245,7 +245,7 @@ __tests__ = (
|
|||||||
"#category": ("xenforo", "simpcity", "thread"),
|
"#category": ("xenforo", "simpcity", "thread"),
|
||||||
"#class" : xenforo.XenforoThreadExtractor,
|
"#class" : xenforo.XenforoThreadExtractor,
|
||||||
"#auth" : True,
|
"#auth" : True,
|
||||||
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbovid.cr/embed",
|
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbo(vid)?.cr/embed",
|
||||||
"#count" : range(100, 300),
|
"#count" : range(100, 300),
|
||||||
|
|
||||||
"count" : int,
|
"count" : int,
|
||||||
|
|||||||
@@ -60,6 +60,34 @@ __tests__ = (
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/post-3443278",
|
||||||
|
"#comment" : "video attachment (#8947)",
|
||||||
|
"#category": ("xenforo", "titsintops", "post"),
|
||||||
|
"#class" : xenforo.XenforoPostExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#results" : (
|
||||||
|
"https://chaturbate.com/sofi_zeus/",
|
||||||
|
"https://titsintops.com/phpBB2/data/video/6436/6436292-e23925aebd8cd253097bbee0f884cf52.mp4?hash=wbvMEhEJZC",
|
||||||
|
"https://titsintops.com/phpBB2/attachments/sofi-webp.6512452/",
|
||||||
|
),
|
||||||
|
|
||||||
|
"thread" : {
|
||||||
|
"author" : "monsieurmoose",
|
||||||
|
"author_id" : "1358306",
|
||||||
|
"author_slug": "monsieurmoose",
|
||||||
|
"author_url" : "https://titsintops.com/phpBB2/members/monsieurmoose.1358306/",
|
||||||
|
"date" : "dt:2026-01-16 16:34:59",
|
||||||
|
"id" : "13586747",
|
||||||
|
"posts" : int,
|
||||||
|
"section" : "Busty Amateurs",
|
||||||
|
"tags" : (),
|
||||||
|
"title" : "sofi_zeus - sexy curvy big titty camgirl",
|
||||||
|
"url" : "https://titsintops.com/phpBB2/threads/sofi_zeus-sexy-curvy-big-titty-camgirl.13586747/",
|
||||||
|
"views" : int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://titsintops.com/phpBB2/threads/mia-big-titty-boston-blonde.13575039/",
|
"#url" : "https://titsintops.com/phpBB2/threads/mia-big-titty-boston-blonde.13575039/",
|
||||||
"#category": ("xenforo", "titsintops", "thread"),
|
"#category": ("xenforo", "titsintops", "thread"),
|
||||||
|
|||||||
Reference in New Issue
Block a user