[tiktok] Restructure to allow user extractors to provide their own rehydration data (#8848)

This commit is contained in:
CasualYouTuber31
2026-01-30 14:18:56 +00:00
committed by GitHub
parent 3445c51ca4
commit 2d01fef300

View File

@@ -43,6 +43,7 @@ class TiktokExtractor(Extractor):
def items(self):
for tiktok_url in self.posts():
tiktok_url = self._sanitize_url(tiktok_url)
data = self._extract_rehydration_data(tiktok_url)
if "webapp.video-detail" not in data:
# Only /video/ links result in the video-detail dict we need.
@@ -51,11 +52,10 @@ class TiktokExtractor(Extractor):
data["seo.abtest"]["canonical"])
data = self._extract_rehydration_data(tiktok_url)
video_detail = data["webapp.video-detail"]
if not self._check_status_code(video_detail, tiktok_url, "post"):
continue
post = video_detail["itemInfo"]["itemStruct"]
post["user"] = (a := post.get("author")) and a["uniqueId"] or ""
post["date"] = self.parse_timestamp(post["createTime"])
post["post_type"] = "image" if "imagePost" in post else "video"
@@ -414,7 +414,7 @@ class TiktokPostExtractor(TiktokExtractor):
def posts(self):
user, post_id = self.groups
url = f"{self.root}/@{user or ''}/video/{post_id}"
return (url,)
return {url: None}
class TiktokVmpostExtractor(TiktokExtractor):
@@ -709,13 +709,13 @@ class TiktokSavedExtractor(TiktokExtractor):
self.audio)
class TiktokFollowingExtractor(TiktokUserExtractor):
class TiktokFollowingExtractor(TiktokExtractor):
"""Extract all of the stories of all of the users you follow"""
subcategory = "following"
pattern = rf"{BASE_PATTERN}/following"
example = "https://www.tiktok.com/following"
def items(self):
def posts(self):
"""Attempt to extract all of the stories of all of the accounts
the user follows"""
@@ -732,7 +732,7 @@ class TiktokFollowingExtractor(TiktokUserExtractor):
self.log.warning("%s: No followers with stories could be "
"extracted", self.url)
entries = []
entries = {}
# Batch all of the users up into groups of at most ten and use the
# batch endpoint to improve performance. The response to the story user
# list request may also include the user themselves, so skip them if
@@ -769,13 +769,11 @@ class TiktokFollowingExtractor(TiktokUserExtractor):
request.execute(self, f"Batch {batch_number}", query_parameters)
# We technically don't need to have the correct user name in the
# URL and it's easier to just ignore it here.
entries += request.generate_urls("https://www.tiktok.com/@_",
self.video, self.photo,
self.audio)
entries.update(request.generate_urls("https://www.tiktok.com/@_",
self.video, self.photo,
self.audio))
for video in entries:
data = {"_extractor": TiktokPostExtractor}
yield Message.Queue, video, data
return entries
def _is_current_user(self, user_id):
self._ensure_rehydration_data_app_context_cache_is_populated()
@@ -1084,9 +1082,10 @@ class TiktokPaginationRequest:
Returns
-------
list
Ideally one URL for each item, although subclasses are
permitted to return a list of any format they wish.
dict
Ideally one URL for each item, that points to a video detail
object, although subclasses are permitted to return a list
or dict of any format they wish.
"""
return []
@@ -1193,7 +1192,7 @@ class TiktokItemListRequest(TiktokPaginationRequest):
return len(self.items) > max(r.stop for r in self.range_predicate) - 1
def generate_urls(self, profile_url, video, photo, audio):
urls = []
urls = {}
for index, id in enumerate(self.items.keys()):
if not self._matches_filters(self.items.get(id), index + 1, video,
photo, audio):
@@ -1207,7 +1206,7 @@ class TiktokItemListRequest(TiktokPaginationRequest):
except KeyError:
# Use the given profile URL as a back up.
url = f"{profile_url}/video/{id}"
urls.append(url)
urls[url] = self.items.get(id)
return urls
def _matches_filters(self, item, index, video, photo, audio):