diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 30f310d6..4c1da7ae 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -25,14 +25,8 @@ class TiktokExtractor(Extractor): def _init(self): self.audio = self.config("audio", True) self.video = self.config("videos", True) - if not self.config("avatar", True): - self.avatar = util.false def items(self): - # We assume that all of the URLs served by urls() come from the same - # author. - downloaded_avatar = not self.avatar() - for tiktok_url in self.urls(): tiktok_url = self._sanitize_url(tiktok_url) data = self._extract_rehydration_data(tiktok_url) @@ -49,18 +43,10 @@ class TiktokExtractor(Extractor): post = video_detail["itemInfo"]["itemStruct"] author = post["author"] - post["user"] = user = author["uniqueId"] + post["user"] = author["uniqueId"] post["date"] = text.parse_timestamp(post["createTime"]) original_title = title = post["desc"] - if not downloaded_avatar: - avatar_url = author["avatarLarger"] - avatar = self._generate_avatar( - avatar_url, post, user, author["id"]) - yield Message.Directory, avatar - yield Message.Url, avatar_url, avatar - downloaded_avatar = True - yield Message.Directory, post ytdl_media = False @@ -111,44 +97,29 @@ class TiktokExtractor(Extractor): }) yield Message.Url, "ytdl:" + tiktok_url, post - # If we couldn't download the avatar because the given user has no - # posts, we'll need to make a separate request for the user's page - # and download the avatar that way. - if not downloaded_avatar: - user_name = self.avatar() - profile_url = "https://www.tiktok.com/@{}".format(user_name) - data = self._extract_rehydration_data(profile_url) - data = data["webapp.user-detail"]["userInfo"]["user"] - data["user"] = user_name - avatar_url = data["avatarLarger"] - avatar = self._generate_avatar( - avatar_url, data, user_name, data["id"]) - yield Message.Directory, avatar - yield Message.Url, avatar_url, avatar - - def avatar(self): - return False - - def _generate_avatar(self, avatar_url, data, user_name, user_id): - avatar = text.nameext_from_url(avatar_url, data.copy()) - avatar.update({ - "type" : "avatar", - "title" : "@" + user_name, - "id" : user_id, - "img_id": avatar["filename"].partition("~")[0], - "num" : 0, - }) - return avatar - def _sanitize_url(self, url): return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1)) def _extract_rehydration_data(self, url): - html = self.request(url).text - data = text.extr( - html, '') - return util.json_loads(data)["__DEFAULT_SCOPE__"] + tries = 0 + while True: + try: + html = self.request(url).text + data = text.extr( + html, '') + return util.json_loads(data)["__DEFAULT_SCOPE__"] + except ValueError: + # We failed to retrieve rehydration data. This happens + # relatively frequently when making many requests, so + # retry. + if tries >= self._retries: + raise + tries += 1 + self.log.warning("%s: Failed to retrieve rehydration data " + "(%s/%s)", url.rpartition("/")[2], tries, + self._retries) + self.sleep(self._timeout, "retry") def _extract_audio(self, post): audio = post["music"] @@ -179,7 +150,7 @@ class TiktokExtractor(Extractor): elif status == 10204: self.log.error("%s: Requested post not available", url) elif status == 10231: - self.log.error("%s: Region locked - Try downloading with a" + self.log.error("%s: Region locked - Try downloading with a " "VPN/proxy connection", url) else: self.log.error( @@ -230,7 +201,10 @@ class TiktokUserExtractor(TiktokExtractor): pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)" example = "https://www.tiktok.com/@USER" - def urls(self): + def _init(self): + self.avatar = self.config("avatar", True) + + def items(self): """Attempt to use yt-dlp/youtube-dl to extract links from a user's page""" @@ -263,19 +237,39 @@ class TiktokUserExtractor(TiktokExtractor): ytdl_instance = ytdl.construct_YoutubeDL( module, self, user_opts, extr_opts) - # transfer cookies to ytdl + # Transfer cookies to ytdl. if self.cookies: set_cookie = ytdl_instance.cookiejar.set_cookie for cookie in self.cookies: set_cookie(cookie) + user_name = self.groups[0] + profile_url = "{}/@{}".format(self.root, user_name) + if self.avatar: + avatar_url, avatar = self._generate_avatar(user_name, profile_url) + yield Message.Directory, avatar + yield Message.Url, avatar_url, avatar + with ytdl_instance as ydl: info_dict = ydl._YoutubeDL__extract_info( - "{}/@{}".format(self.root, self.groups[0]), - ydl.get_info_extractor("TikTokUser"), + profile_url, ydl.get_info_extractor("TikTokUser"), False, {}, True) # This should include video and photo posts in /video/ URL form. - return [video["url"] for video in info_dict["entries"]] + for video in info_dict["entries"]: + data = {"_extractor": TiktokPostExtractor} + yield Message.Queue, video["url"].partition("?")[0], data - def avatar(self): - return self.groups[0] + def _generate_avatar(self, user_name, profile_url): + data = self._extract_rehydration_data(profile_url) + data = data["webapp.user-detail"]["userInfo"]["user"] + data["user"] = user_name + avatar_url = data["avatarLarger"] + avatar = text.nameext_from_url(avatar_url, data.copy()) + avatar.update({ + "type" : "avatar", + "title" : "@" + user_name, + "id" : data["id"], + "img_id": avatar["filename"].partition("~")[0], + "num" : 0, + }) + return (avatar_url, avatar) diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 9cd73a92..d38540b5 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -7,7 +7,8 @@ from gallery_dl.extractor import tiktok PATTERN = r"https://p1[69]-[^/?#.]+\.tiktokcdn[^/?#.]*\.com/[^/?#]+/\w+~.*\.jpe?g" -PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r"|ytdl:http.+)" +PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r"|https://v\d+m?\.tiktokcdn[^/?#.]*\.com/[^?#]+\?[^/?#]+)" +USER_PATTERN = r"(https://www.tiktok.com/@([\w_.-]+)/video/(\d+)|" + PATTERN + r")" __tests__ = ( @@ -17,7 +18,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -26,7 +27,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -35,7 +36,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -44,7 +45,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -53,7 +54,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -62,7 +63,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -71,7 +72,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -80,7 +81,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -89,7 +90,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, }, { @@ -97,7 +98,7 @@ __tests__ = ( "#comment" : "deleted post", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#options" : {"videos": False}, + "#options" : {"videos": False, "audio": False}, "count" : 0, }, @@ -107,7 +108,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", - "#options" : {"videos": True}, + "#options" : {"videos": True, "audio": True}, }, { @@ -116,7 +117,7 @@ __tests__ = ( "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", - "#options" : {"videos": True}, + "#options" : {"videos": True, "audio": True}, }, { @@ -241,17 +242,8 @@ __tests__ = ( "#comment" : "User profile", "#category" : ("", "tiktok", "user"), "#class" : tiktok.TiktokUserExtractor, - "#pattern" : PATTERN_WITH_AUDIO, - "#options" : {"videos": True, "tiktok-range": "1-10"}, -}, - -{ - "#url" : "https://www.tiktok.com/@chillezy/", - "#comment" : "User profile without audio or videos", - "#category" : ("", "tiktok", "user"), - "#class" : tiktok.TiktokUserExtractor, - "#pattern" : PATTERN, - "#options" : {"videos": False, "tiktok-range": "1-10"}, + "#pattern" : USER_PATTERN, + "#options" : {"videos": True, "audio": True, "tiktok-range": "1-10"}, }, {