From 9379397eec2cc9dcf6e2107bf69f2bf382992e7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 4 Feb 2026 11:16:32 +0100 Subject: [PATCH] [simpcity] extract 'tiktok' media embeds (#8994) --- gallery_dl/extractor/xenforo.py | 39 +++++++++++++++++++++++++++++---- test/results/simpcity.py | 9 ++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/xenforo.py b/gallery_dl/extractor/xenforo.py index dc25fa15..0d32a2b5 100644 --- a/gallery_dl/extractor/xenforo.py +++ b/gallery_dl/extractor/xenforo.py @@ -9,7 +9,7 @@ """Extractors for XenForo forums""" from .common import BaseExtractor, Message -from .. import text, exception +from .. import text, util, exception from ..cache import cache import binascii @@ -46,10 +46,10 @@ class XenforoExtractor(BaseExtractor): base = root if (pos := root.find("/", 8)) < 0 else root[:pos] for post in self.posts(): urls = extract_urls(post["content"]) + if "data-s9e-mediaembed-iframe=" in post["content"]: + self._extract_embeds(urls, post) if post["attachments"]: - for att in text.extract_iter( - post["attachments"], ""): - urls.append((None, att[att.find('href="')+6:], None, None)) + self._extract_attachments(urls, post) data = {"post": post} post["count"] = data["count"] = len(urls) @@ -340,6 +340,37 @@ class XenforoExtractor(BaseExtractor): data["author_id"] = data["author"][15:] return data + def _extract_attachments(self, urls, post): + for att in text.extract_iter(post["attachments"], ""): + urls.append((None, att[att.find('href="')+6:], None, None)) + + def _extract_embeds(self, urls, post): + for embed in text.extract_iter( + post["content"], "data-s9e-mediaembed-iframe='", "'"): + data = {} + key = None + for value in util.json_loads(embed): + if key is None: + key = value + else: + data[key] = value + key = None + + src = data.get("src") + if not src: + self.log.debug(data) + continue + + type = data.get("data-s9e-mediaembed") + if type == "tiktok": + url = ("https://www.tiktok.com/@/video/" + + src[src.rfind("#")+1:]) + else: + self.log.warning("%s: Unsupported media embed type '%s'", + post["id"], type) + continue + urls.append((None, None, None, url)) + def _extract_media(self, url, file): media = {} name, _, media["id"] = file.rpartition(".") diff --git a/test/results/simpcity.py b/test/results/simpcity.py index a7ecaa2d..1b65a538 100644 --- a/test/results/simpcity.py +++ b/test/results/simpcity.py @@ -240,6 +240,15 @@ __tests__ = ( ), }, +{ + "#url" : "https://simpcity.cr/threads/arianaskyeshelby-itsarianaskyebaby-busty.1237895/post-40205575", + "#comment" : "tiktok s9e media embed iframe (#8994)", + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, + "#auth" : True, + "#results" : "https://www.tiktok.com/@/video/7556556034794425631", +}, + { "#url" : "https://simpcity.cr/threads/alua-tatakai.89490/", "#category": ("xenforo", "simpcity", "thread"),