[simpcity] extract 'tiktok' media embeds (#8994)

This commit is contained in:
Mike Fährmann
2026-02-04 11:16:32 +01:00
parent b4351b8193
commit 9379397eec
2 changed files with 44 additions and 4 deletions

View File

@@ -9,7 +9,7 @@
"""Extractors for XenForo forums"""
from .common import BaseExtractor, Message
from .. import text, exception
from .. import text, util, exception
from ..cache import cache
import binascii
@@ -46,10 +46,10 @@ class XenforoExtractor(BaseExtractor):
base = root if (pos := root.find("/", 8)) < 0 else root[:pos]
for post in self.posts():
urls = extract_urls(post["content"])
if "data-s9e-mediaembed-iframe=" in post["content"]:
self._extract_embeds(urls, post)
if post["attachments"]:
for att in text.extract_iter(
post["attachments"], "<li", "</li>"):
urls.append((None, att[att.find('href="')+6:], None, None))
self._extract_attachments(urls, post)
data = {"post": post}
post["count"] = data["count"] = len(urls)
@@ -340,6 +340,37 @@ class XenforoExtractor(BaseExtractor):
data["author_id"] = data["author"][15:]
return data
def _extract_attachments(self, urls, post):
for att in text.extract_iter(post["attachments"], "<li", "</li>"):
urls.append((None, att[att.find('href="')+6:], None, None))
def _extract_embeds(self, urls, post):
for embed in text.extract_iter(
post["content"], "data-s9e-mediaembed-iframe='", "'"):
data = {}
key = None
for value in util.json_loads(embed):
if key is None:
key = value
else:
data[key] = value
key = None
src = data.get("src")
if not src:
self.log.debug(data)
continue
type = data.get("data-s9e-mediaembed")
if type == "tiktok":
url = ("https://www.tiktok.com/@/video/" +
src[src.rfind("#")+1:])
else:
self.log.warning("%s: Unsupported media embed type '%s'",
post["id"], type)
continue
urls.append((None, None, None, url))
def _extract_media(self, url, file):
media = {}
name, _, media["id"] = file.rpartition(".")

View File

@@ -240,6 +240,15 @@ __tests__ = (
),
},
{
"#url" : "https://simpcity.cr/threads/arianaskyeshelby-itsarianaskyebaby-busty.1237895/post-40205575",
"#comment" : "tiktok s9e media embed iframe (#8994)",
"#category": ("xenforo", "simpcity", "post"),
"#class" : xenforo.XenforoPostExtractor,
"#auth" : True,
"#results" : "https://www.tiktok.com/@/video/7556556034794425631",
},
{
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
"#category": ("xenforo", "simpcity", "thread"),