[tiktok] Queue links from user profiles instead of returning a list of links
[tiktok] Move avatar download to the user extractor, which results in more accurate metadata output (it would previously write the metadata of the video which the avatar was scraped from) [tiktok] Fix tests and remove redundant user profile test
This commit is contained in:
@@ -26,14 +26,8 @@ class TiktokExtractor(Extractor):
|
|||||||
def _init(self):
|
def _init(self):
|
||||||
self.audio = self.config("audio", True)
|
self.audio = self.config("audio", True)
|
||||||
self.video = self.config("videos", True)
|
self.video = self.config("videos", True)
|
||||||
if not self.config("avatar", True):
|
|
||||||
self.avatar = util.false
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
# We assume that all of the URLs served by urls() come from the same
|
|
||||||
# author.
|
|
||||||
downloaded_avatar = not self.avatar()
|
|
||||||
|
|
||||||
for tiktok_url in self.urls():
|
for tiktok_url in self.urls():
|
||||||
tiktok_url = self._sanitize_url(tiktok_url)
|
tiktok_url = self._sanitize_url(tiktok_url)
|
||||||
data = self._extract_rehydration_data(tiktok_url)
|
data = self._extract_rehydration_data(tiktok_url)
|
||||||
@@ -50,18 +44,10 @@ class TiktokExtractor(Extractor):
|
|||||||
|
|
||||||
post = video_detail["itemInfo"]["itemStruct"]
|
post = video_detail["itemInfo"]["itemStruct"]
|
||||||
author = post["author"]
|
author = post["author"]
|
||||||
post["user"] = user = author["uniqueId"]
|
post["user"] = author["uniqueId"]
|
||||||
post["date"] = text.parse_timestamp(post["createTime"])
|
post["date"] = text.parse_timestamp(post["createTime"])
|
||||||
original_title = title = post["desc"]
|
original_title = title = post["desc"]
|
||||||
|
|
||||||
if not downloaded_avatar:
|
|
||||||
avatar_url = author["avatarLarger"]
|
|
||||||
avatar = self._generate_avatar(
|
|
||||||
avatar_url, post, user, author["id"])
|
|
||||||
yield Message.Directory, avatar
|
|
||||||
yield Message.Url, avatar_url, avatar
|
|
||||||
downloaded_avatar = True
|
|
||||||
|
|
||||||
yield Message.Directory, post
|
yield Message.Directory, post
|
||||||
ytdl_media = False
|
ytdl_media = False
|
||||||
|
|
||||||
@@ -112,35 +98,6 @@ class TiktokExtractor(Extractor):
|
|||||||
})
|
})
|
||||||
yield Message.Url, "ytdl:" + tiktok_url, post
|
yield Message.Url, "ytdl:" + tiktok_url, post
|
||||||
|
|
||||||
# If we couldn't download the avatar because the given user has no
|
|
||||||
# posts, we'll need to make a separate request for the user's page
|
|
||||||
# and download the avatar that way.
|
|
||||||
if not downloaded_avatar:
|
|
||||||
user_name = self.avatar()
|
|
||||||
profile_url = "https://www.tiktok.com/@{}".format(user_name)
|
|
||||||
data = self._extract_rehydration_data(profile_url)
|
|
||||||
data = data["webapp.user-detail"]["userInfo"]["user"]
|
|
||||||
data["user"] = user_name
|
|
||||||
avatar_url = data["avatarLarger"]
|
|
||||||
avatar = self._generate_avatar(
|
|
||||||
avatar_url, data, user_name, data["id"])
|
|
||||||
yield Message.Directory, avatar
|
|
||||||
yield Message.Url, avatar_url, avatar
|
|
||||||
|
|
||||||
def avatar(self):
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _generate_avatar(self, avatar_url, data, user_name, user_id):
|
|
||||||
avatar = text.nameext_from_url(avatar_url, data.copy())
|
|
||||||
avatar.update({
|
|
||||||
"type" : "avatar",
|
|
||||||
"title" : "@" + user_name,
|
|
||||||
"id" : user_id,
|
|
||||||
"img_id": avatar["filename"].partition("~")[0],
|
|
||||||
"num" : 0,
|
|
||||||
})
|
|
||||||
return avatar
|
|
||||||
|
|
||||||
def _sanitize_url(self, url):
|
def _sanitize_url(self, url):
|
||||||
return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1))
|
return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1))
|
||||||
|
|
||||||
@@ -245,7 +202,10 @@ class TiktokUserExtractor(TiktokExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)"
|
pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)"
|
||||||
example = "https://www.tiktok.com/@USER"
|
example = "https://www.tiktok.com/@USER"
|
||||||
|
|
||||||
def urls(self):
|
def _init(self):
|
||||||
|
self.avatar = self.config("avatar", True)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
"""Attempt to use yt-dlp/youtube-dl to extract links from a
|
"""Attempt to use yt-dlp/youtube-dl to extract links from a
|
||||||
user's page"""
|
user's page"""
|
||||||
|
|
||||||
@@ -278,19 +238,39 @@ class TiktokUserExtractor(TiktokExtractor):
|
|||||||
ytdl_instance = ytdl.construct_YoutubeDL(
|
ytdl_instance = ytdl.construct_YoutubeDL(
|
||||||
module, self, user_opts, extr_opts)
|
module, self, user_opts, extr_opts)
|
||||||
|
|
||||||
# transfer cookies to ytdl
|
# Transfer cookies to ytdl.
|
||||||
if self.cookies:
|
if self.cookies:
|
||||||
set_cookie = ytdl_instance.cookiejar.set_cookie
|
set_cookie = ytdl_instance.cookiejar.set_cookie
|
||||||
for cookie in self.cookies:
|
for cookie in self.cookies:
|
||||||
set_cookie(cookie)
|
set_cookie(cookie)
|
||||||
|
|
||||||
|
user_name = self.groups[0]
|
||||||
|
profile_url = "{}/@{}".format(self.root, user_name)
|
||||||
|
if self.avatar:
|
||||||
|
avatar_url, avatar = self._generate_avatar(user_name, profile_url)
|
||||||
|
yield Message.Directory, avatar
|
||||||
|
yield Message.Url, avatar_url, avatar
|
||||||
|
|
||||||
with ytdl_instance as ydl:
|
with ytdl_instance as ydl:
|
||||||
info_dict = ydl._YoutubeDL__extract_info(
|
info_dict = ydl._YoutubeDL__extract_info(
|
||||||
"{}/@{}".format(self.root, self.groups[0]),
|
profile_url, ydl.get_info_extractor("TikTokUser"),
|
||||||
ydl.get_info_extractor("TikTokUser"),
|
|
||||||
False, {}, True)
|
False, {}, True)
|
||||||
# This should include video and photo posts in /video/ URL form.
|
# This should include video and photo posts in /video/ URL form.
|
||||||
return [video["url"] for video in info_dict["entries"]]
|
for video in info_dict["entries"]:
|
||||||
|
data = {"_extractor": TiktokPostExtractor}
|
||||||
|
yield Message.Queue, video["url"].partition("?")[0], data
|
||||||
|
|
||||||
def avatar(self):
|
def _generate_avatar(self, user_name, profile_url):
|
||||||
return self.groups[0]
|
data = self._extract_rehydration_data(profile_url)
|
||||||
|
data = data["webapp.user-detail"]["userInfo"]["user"]
|
||||||
|
data["user"] = user_name
|
||||||
|
avatar_url = data["avatarLarger"]
|
||||||
|
avatar = text.nameext_from_url(avatar_url, data.copy())
|
||||||
|
avatar.update({
|
||||||
|
"type" : "avatar",
|
||||||
|
"title" : "@" + user_name,
|
||||||
|
"id" : data["id"],
|
||||||
|
"img_id": avatar["filename"].partition("~")[0],
|
||||||
|
"num" : 0,
|
||||||
|
})
|
||||||
|
return (avatar_url, avatar)
|
||||||
|
|||||||
@@ -7,7 +7,8 @@
|
|||||||
from gallery_dl.extractor import tiktok
|
from gallery_dl.extractor import tiktok
|
||||||
|
|
||||||
PATTERN = r"https://p1[69]-[^/?#.]+\.tiktokcdn[^/?#.]*\.com/[^/?#]+/\w+~.*\.jpe?g"
|
PATTERN = r"https://p1[69]-[^/?#.]+\.tiktokcdn[^/?#.]*\.com/[^/?#]+/\w+~.*\.jpe?g"
|
||||||
PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r"|ytdl:http.+)"
|
PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r"|https://v\d+m?\.tiktokcdn[^/?#.]*\.com/[^?#]+\?[^/?#]+)"
|
||||||
|
USER_PATTERN = r"(https://www.tiktok.com/@([\w_.-]+)/video/(\d+)|" + PATTERN + r")"
|
||||||
|
|
||||||
|
|
||||||
__tests__ = (
|
__tests__ = (
|
||||||
@@ -17,7 +18,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -26,7 +27,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -35,7 +36,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -44,7 +45,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -53,7 +54,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -62,7 +63,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -71,7 +72,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -80,7 +81,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -89,7 +90,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#pattern" : PATTERN,
|
"#pattern" : PATTERN,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -97,7 +98,7 @@ __tests__ = (
|
|||||||
"#comment" : "deleted post",
|
"#comment" : "deleted post",
|
||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#options" : {"videos": False},
|
"#options" : {"videos": False, "audio": False},
|
||||||
"count" : 0,
|
"count" : 0,
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -107,7 +108,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
|
"#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
|
||||||
"#options" : {"videos": True},
|
"#options" : {"videos": True, "audio": True},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -116,7 +117,7 @@ __tests__ = (
|
|||||||
"#category" : ("", "tiktok", "post"),
|
"#category" : ("", "tiktok", "post"),
|
||||||
"#class" : tiktok.TiktokPostExtractor,
|
"#class" : tiktok.TiktokPostExtractor,
|
||||||
"#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
|
"#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
|
||||||
"#options" : {"videos": True},
|
"#options" : {"videos": True, "audio": True},
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -241,17 +242,8 @@ __tests__ = (
|
|||||||
"#comment" : "User profile",
|
"#comment" : "User profile",
|
||||||
"#category" : ("", "tiktok", "user"),
|
"#category" : ("", "tiktok", "user"),
|
||||||
"#class" : tiktok.TiktokUserExtractor,
|
"#class" : tiktok.TiktokUserExtractor,
|
||||||
"#pattern" : PATTERN_WITH_AUDIO,
|
"#pattern" : USER_PATTERN,
|
||||||
"#options" : {"videos": True, "tiktok-range": "1-10"},
|
"#options" : {"videos": True, "audio": True, "tiktok-range": "1-10"},
|
||||||
},
|
|
||||||
|
|
||||||
{
|
|
||||||
"#url" : "https://www.tiktok.com/@chillezy/",
|
|
||||||
"#comment" : "User profile without audio or videos",
|
|
||||||
"#category" : ("", "tiktok", "user"),
|
|
||||||
"#class" : tiktok.TiktokUserExtractor,
|
|
||||||
"#pattern" : PATTERN,
|
|
||||||
"#options" : {"videos": False, "tiktok-range": "1-10"},
|
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user