[simpcity] extract 'tiktok' media embeds (#8994)
This commit is contained in:
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for XenForo forums"""
|
"""Extractors for XenForo forums"""
|
||||||
|
|
||||||
from .common import BaseExtractor, Message
|
from .common import BaseExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text, util, exception
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import binascii
|
import binascii
|
||||||
|
|
||||||
@@ -46,10 +46,10 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
base = root if (pos := root.find("/", 8)) < 0 else root[:pos]
|
base = root if (pos := root.find("/", 8)) < 0 else root[:pos]
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
urls = extract_urls(post["content"])
|
urls = extract_urls(post["content"])
|
||||||
|
if "data-s9e-mediaembed-iframe=" in post["content"]:
|
||||||
|
self._extract_embeds(urls, post)
|
||||||
if post["attachments"]:
|
if post["attachments"]:
|
||||||
for att in text.extract_iter(
|
self._extract_attachments(urls, post)
|
||||||
post["attachments"], "<li", "</li>"):
|
|
||||||
urls.append((None, att[att.find('href="')+6:], None, None))
|
|
||||||
|
|
||||||
data = {"post": post}
|
data = {"post": post}
|
||||||
post["count"] = data["count"] = len(urls)
|
post["count"] = data["count"] = len(urls)
|
||||||
@@ -340,6 +340,37 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
data["author_id"] = data["author"][15:]
|
data["author_id"] = data["author"][15:]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def _extract_attachments(self, urls, post):
|
||||||
|
for att in text.extract_iter(post["attachments"], "<li", "</li>"):
|
||||||
|
urls.append((None, att[att.find('href="')+6:], None, None))
|
||||||
|
|
||||||
|
def _extract_embeds(self, urls, post):
|
||||||
|
for embed in text.extract_iter(
|
||||||
|
post["content"], "data-s9e-mediaembed-iframe='", "'"):
|
||||||
|
data = {}
|
||||||
|
key = None
|
||||||
|
for value in util.json_loads(embed):
|
||||||
|
if key is None:
|
||||||
|
key = value
|
||||||
|
else:
|
||||||
|
data[key] = value
|
||||||
|
key = None
|
||||||
|
|
||||||
|
src = data.get("src")
|
||||||
|
if not src:
|
||||||
|
self.log.debug(data)
|
||||||
|
continue
|
||||||
|
|
||||||
|
type = data.get("data-s9e-mediaembed")
|
||||||
|
if type == "tiktok":
|
||||||
|
url = ("https://www.tiktok.com/@/video/" +
|
||||||
|
src[src.rfind("#")+1:])
|
||||||
|
else:
|
||||||
|
self.log.warning("%s: Unsupported media embed type '%s'",
|
||||||
|
post["id"], type)
|
||||||
|
continue
|
||||||
|
urls.append((None, None, None, url))
|
||||||
|
|
||||||
def _extract_media(self, url, file):
|
def _extract_media(self, url, file):
|
||||||
media = {}
|
media = {}
|
||||||
name, _, media["id"] = file.rpartition(".")
|
name, _, media["id"] = file.rpartition(".")
|
||||||
|
|||||||
@@ -240,6 +240,15 @@ __tests__ = (
|
|||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/threads/arianaskyeshelby-itsarianaskyebaby-busty.1237895/post-40205575",
|
||||||
|
"#comment" : "tiktok s9e media embed iframe (#8994)",
|
||||||
|
"#category": ("xenforo", "simpcity", "post"),
|
||||||
|
"#class" : xenforo.XenforoPostExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#results" : "https://www.tiktok.com/@/video/7556556034794425631",
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
||||||
"#category": ("xenforo", "simpcity", "thread"),
|
"#category": ("xenforo", "simpcity", "thread"),
|
||||||
|
|||||||
Reference in New Issue
Block a user