[simpcity] extract 'tiktok' media embeds (#8994)
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
"""Extractors for XenForo forums"""
|
||||
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text, exception
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
import binascii
|
||||
|
||||
@@ -46,10 +46,10 @@ class XenforoExtractor(BaseExtractor):
|
||||
base = root if (pos := root.find("/", 8)) < 0 else root[:pos]
|
||||
for post in self.posts():
|
||||
urls = extract_urls(post["content"])
|
||||
if "data-s9e-mediaembed-iframe=" in post["content"]:
|
||||
self._extract_embeds(urls, post)
|
||||
if post["attachments"]:
|
||||
for att in text.extract_iter(
|
||||
post["attachments"], "<li", "</li>"):
|
||||
urls.append((None, att[att.find('href="')+6:], None, None))
|
||||
self._extract_attachments(urls, post)
|
||||
|
||||
data = {"post": post}
|
||||
post["count"] = data["count"] = len(urls)
|
||||
@@ -340,6 +340,37 @@ class XenforoExtractor(BaseExtractor):
|
||||
data["author_id"] = data["author"][15:]
|
||||
return data
|
||||
|
||||
def _extract_attachments(self, urls, post):
|
||||
for att in text.extract_iter(post["attachments"], "<li", "</li>"):
|
||||
urls.append((None, att[att.find('href="')+6:], None, None))
|
||||
|
||||
def _extract_embeds(self, urls, post):
|
||||
for embed in text.extract_iter(
|
||||
post["content"], "data-s9e-mediaembed-iframe='", "'"):
|
||||
data = {}
|
||||
key = None
|
||||
for value in util.json_loads(embed):
|
||||
if key is None:
|
||||
key = value
|
||||
else:
|
||||
data[key] = value
|
||||
key = None
|
||||
|
||||
src = data.get("src")
|
||||
if not src:
|
||||
self.log.debug(data)
|
||||
continue
|
||||
|
||||
type = data.get("data-s9e-mediaembed")
|
||||
if type == "tiktok":
|
||||
url = ("https://www.tiktok.com/@/video/" +
|
||||
src[src.rfind("#")+1:])
|
||||
else:
|
||||
self.log.warning("%s: Unsupported media embed type '%s'",
|
||||
post["id"], type)
|
||||
continue
|
||||
urls.append((None, None, None, url))
|
||||
|
||||
def _extract_media(self, url, file):
|
||||
media = {}
|
||||
name, _, media["id"] = file.rpartition(".")
|
||||
|
||||
@@ -240,6 +240,15 @@ __tests__ = (
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://simpcity.cr/threads/arianaskyeshelby-itsarianaskyebaby-busty.1237895/post-40205575",
|
||||
"#comment" : "tiktok s9e media embed iframe (#8994)",
|
||||
"#category": ("xenforo", "simpcity", "post"),
|
||||
"#class" : xenforo.XenforoPostExtractor,
|
||||
"#auth" : True,
|
||||
"#results" : "https://www.tiktok.com/@/video/7556556034794425631",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
||||
"#category": ("xenforo", "simpcity", "thread"),
|
||||
|
||||
Reference in New Issue
Block a user