[tiktok] Queue links from user profiles instead of returning a list of links

[tiktok] Move avatar download to the user extractor, which results in more accurate metadata output (it would previously write the metadata of the video which the avatar was scraped from) [tiktok] Fix tests and remove redundant user profile test
2025-03-19 18:50:06 +00:00
parent c7685bdfc7
commit d6d2b1fba0
2 changed files with 47 additions and 75 deletions
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -26,14 +26,8 @@ class TiktokExtractor(Extractor):
    def _init(self):
        self.audio = self.config("audio", True)
        self.video = self.config("videos", True)
-        if not self.config("avatar", True):
-            self.avatar = util.false

    def items(self):
-        # We assume that all of the URLs served by urls() come from the same
-        # author.
-        downloaded_avatar = not self.avatar()
-
        for tiktok_url in self.urls():
            tiktok_url = self._sanitize_url(tiktok_url)
            data = self._extract_rehydration_data(tiktok_url)
@@ -50,18 +44,10 @@ class TiktokExtractor(Extractor):

            post = video_detail["itemInfo"]["itemStruct"]
            author = post["author"]
-            post["user"] = user = author["uniqueId"]
+            post["user"] = author["uniqueId"]
            post["date"] = text.parse_timestamp(post["createTime"])
            original_title = title = post["desc"]

-            if not downloaded_avatar:
-                avatar_url = author["avatarLarger"]
-                avatar = self._generate_avatar(
-                    avatar_url, post, user, author["id"])
-                yield Message.Directory, avatar
-                yield Message.Url, avatar_url, avatar
-                downloaded_avatar = True
-
            yield Message.Directory, post
            ytdl_media = False

@@ -112,35 +98,6 @@ class TiktokExtractor(Extractor):
                })
                yield Message.Url, "ytdl:" + tiktok_url, post

-        # If we couldn't download the avatar because the given user has no
-        # posts, we'll need to make a separate request for the user's page
-        # and download the avatar that way.
-        if not downloaded_avatar:
-            user_name = self.avatar()
-            profile_url = "https://www.tiktok.com/@{}".format(user_name)
-            data = self._extract_rehydration_data(profile_url)
-            data = data["webapp.user-detail"]["userInfo"]["user"]
-            data["user"] = user_name
-            avatar_url = data["avatarLarger"]
-            avatar = self._generate_avatar(
-                avatar_url, data, user_name, data["id"])
-            yield Message.Directory, avatar
-            yield Message.Url, avatar_url, avatar
-
-    def avatar(self):
-        return False
-
-    def _generate_avatar(self, avatar_url, data, user_name, user_id):
-        avatar = text.nameext_from_url(avatar_url, data.copy())
-        avatar.update({
-            "type"  : "avatar",
-            "title" : "@" + user_name,
-            "id"    : user_id,
-            "img_id": avatar["filename"].partition("~")[0],
-            "num"   : 0,
-        })
-        return avatar
-
    def _sanitize_url(self, url):
        return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1))

@@ -245,7 +202,10 @@ class TiktokUserExtractor(TiktokExtractor):
    pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)"
    example = "https://www.tiktok.com/@USER"

-    def urls(self):
+    def _init(self):
+        self.avatar = self.config("avatar", True)
+
+    def items(self):
        """Attempt to use yt-dlp/youtube-dl to extract links from a
        user's page"""

@@ -278,19 +238,39 @@ class TiktokUserExtractor(TiktokExtractor):
        ytdl_instance = ytdl.construct_YoutubeDL(
            module, self, user_opts, extr_opts)

-        # transfer cookies to ytdl
+        # Transfer cookies to ytdl.
        if self.cookies:
            set_cookie = ytdl_instance.cookiejar.set_cookie
            for cookie in self.cookies:
                set_cookie(cookie)

+        user_name = self.groups[0]
+        profile_url = "{}/@{}".format(self.root, user_name)
+        if self.avatar:
+            avatar_url, avatar = self._generate_avatar(user_name, profile_url)
+            yield Message.Directory, avatar
+            yield Message.Url, avatar_url, avatar
+
        with ytdl_instance as ydl:
            info_dict = ydl._YoutubeDL__extract_info(
-                "{}/@{}".format(self.root, self.groups[0]),
-                ydl.get_info_extractor("TikTokUser"),
+                profile_url, ydl.get_info_extractor("TikTokUser"),
                False, {}, True)
            # This should include video and photo posts in /video/ URL form.
-            return [video["url"] for video in info_dict["entries"]]
+            for video in info_dict["entries"]:
+                data = {"_extractor": TiktokPostExtractor}
+                yield Message.Queue, video["url"].partition("?")[0], data

-    def avatar(self):
-        return self.groups[0]
+    def _generate_avatar(self, user_name, profile_url):
+        data = self._extract_rehydration_data(profile_url)
+        data = data["webapp.user-detail"]["userInfo"]["user"]
+        data["user"] = user_name
+        avatar_url = data["avatarLarger"]
+        avatar = text.nameext_from_url(avatar_url, data.copy())
+        avatar.update({
+            "type"  : "avatar",
+            "title" : "@" + user_name,
+            "id"    : data["id"],
+            "img_id": avatar["filename"].partition("~")[0],
+            "num"   : 0,
+        })
+        return (avatar_url, avatar)
--- a/test/results/tiktok.py
+++ b/test/results/tiktok.py
@@ -7,7 +7,8 @@
 from gallery_dl.extractor import tiktok

 PATTERN = r"https://p1[69]-[^/?#.]+\.tiktokcdn[^/?#.]*\.com/[^/?#]+/\w+~.*\.jpe?g"
-PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r"|ytdl:http.+)"
+PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r"|https://v\d+m?\.tiktokcdn[^/?#.]*\.com/[^?#]+\?[^/?#]+)"
+USER_PATTERN = r"(https://www.tiktok.com/@([\w_.-]+)/video/(\d+)|" + PATTERN + r")"


 __tests__ = (
@@ -17,7 +18,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -26,7 +27,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -35,7 +36,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -44,7 +45,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -53,7 +54,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -62,7 +63,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -71,7 +72,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -80,7 +81,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -89,7 +90,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False},
+    "#options"  : {"videos": False, "audio": False},
 },

 {
@@ -97,7 +98,7 @@ __tests__ = (
    "#comment"   : "deleted post",
    "#category"  : ("", "tiktok", "post"),
    "#class"     : tiktok.TiktokPostExtractor,
-    "#options"   : {"videos": False},
+    "#options"   : {"videos": False, "audio": False},
    "count"      : 0,
 },

@@ -107,7 +108,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#urls"     : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
-    "#options"  : {"videos": True},
+    "#options"  : {"videos": True, "audio": True},
 },

 {
@@ -116,7 +117,7 @@ __tests__ = (
    "#category" : ("", "tiktok", "post"),
    "#class"    : tiktok.TiktokPostExtractor,
    "#urls"     : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
-    "#options"  : {"videos": True},
+    "#options"  : {"videos": True, "audio": True},
 },

 {
@@ -241,17 +242,8 @@ __tests__ = (
    "#comment"  : "User profile",
    "#category" : ("", "tiktok", "user"),
    "#class"    : tiktok.TiktokUserExtractor,
-    "#pattern"  : PATTERN_WITH_AUDIO,
-    "#options"  : {"videos": True, "tiktok-range": "1-10"},
-},
-
-{
-    "#url"      : "https://www.tiktok.com/@chillezy/",
-    "#comment"  : "User profile without audio or videos",
-    "#category" : ("", "tiktok", "user"),
-    "#class"    : tiktok.TiktokUserExtractor,
-    "#pattern"  : PATTERN,
-    "#options"  : {"videos": False, "tiktok-range": "1-10"},
+    "#pattern"  : USER_PATTERN,
+    "#options"  : {"videos": True, "audio": True, "tiktok-range": "1-10"},
 },

 {