[tiktok] solve JS challenges (#8850)

* [tiktok] First draft of a challenge resolver
* use stdlib sha256 implementation
* simplify 'resolve_challenge()' code
* set cookie domain and expires timestamp
* base64 -> binascii
* Avoid incorrect padding exceptions

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
CasualYouTuber31
2026-01-26 08:55:53 +00:00
committed by GitHub
parent d19d5c8b6e
commit 702814654a

View File

@@ -10,6 +10,8 @@ from .common import Extractor, Message, Dispatch
from .. import text, util, ytdl, exception from .. import text, util, ytdl, exception
import functools import functools
import itertools import itertools
import binascii
import hashlib
import random import random
import time import time
@@ -121,6 +123,8 @@ class TiktokExtractor(Extractor):
def _extract_rehydration_data(self, url, additional_keys=[], *, def _extract_rehydration_data(self, url, additional_keys=[], *,
has_keys=[]): has_keys=[]):
tries = 0 tries = 0
html = None
challenge_attempt = False
while True: while True:
try: try:
response = self.request(url) response = self.request(url)
@@ -141,15 +145,31 @@ class TiktokExtractor(Extractor):
return data return data
except (ValueError, KeyError): except (ValueError, KeyError):
# We failed to retrieve rehydration data. This happens # We failed to retrieve rehydration data. This happens
# relatively frequently when making many requests, so # relatively frequently when making many requests, so retry.
# retry.
if tries >= self._retries: if tries >= self._retries:
raise raise
tries += 1 tries += 1
self.log.warning("%s: Failed to retrieve rehydration data " self.log.warning("%s: Failed to retrieve rehydration data "
"(%s/%s)", url.rpartition("/")[2], tries, "(%s/%s)", url.rpartition("/")[2], tries,
self._retries) self._retries)
self.sleep(self._timeout, "retry") if challenge_attempt:
self.sleep(self._timeout, "retry")
challenge_attempt = False
else:
self.log.info("Solving JavaScript challenge")
try:
self._solve_challenge(html)
except Exception as exc:
self.log.traceback(exc)
self.log.warning(
"%s: Failed to solve JavaScript challenge. If you "
"keep encountering this issue, please try again "
"with the --write-pages option and include the "
"resulting page in your bug report",
url.rpartition("/")[2])
self.sleep(self._timeout, "retry")
html = None
challenge_attempt = True
def _extract_rehydration_data_user(self, profile_url, additional_keys=()): def _extract_rehydration_data_user(self, profile_url, additional_keys=()):
if profile_url in self.rehydration_data_cache: if profile_url in self.rehydration_data_cache:
@@ -183,6 +203,35 @@ class TiktokExtractor(Extractor):
self._extract_rehydration_data( self._extract_rehydration_data(
"https://www.tiktok.com/", ["webapp.app-context"]) "https://www.tiktok.com/", ["webapp.app-context"])
def _solve_challenge(self, html):
cs = text.extr(text.extr(html, 'id="cs"', '>'), 'class="', '"')
c = util.json_loads(binascii.a2b_base64(cs + "==").decode())
# find index of expected digest
expected = binascii.a2b_base64(c["v"]["c"] + "==")
base = hashlib.sha256(binascii.a2b_base64(c["v"]["a"] + "=="))
for idx in range(1_000_000):
test = base.copy()
test.update(str(idx).encode())
if test.digest() == expected:
break
else:
raise exception.ExtractionError("failed to find matching digest")
# extract cookie names
wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"')
rci = text.extr(text.extr(html, 'id="rci"', '>'), 'class="', '"')
rs = text.extr(text.extr(html, 'id="rs"', '>'), 'class="', '"')
# set cookie values
domain = self.cookies_domain
expires = int(time.time()) + 5
c["d"] = binascii.b2a_base64(str(idx).encode(), newline=False).decode()
v = binascii.b2a_base64(util.json_dumps(c).encode(), newline=False)
self.cookies.set(wci, v.decode(), domain=domain, expires=expires)
if rs:
self.cookies.set(rci, rs, domain=domain, expires=expires)
def _extract_sec_uid(self, profile_url, user_name): def _extract_sec_uid(self, profile_url, user_name):
sec_uid = self._extract_id( sec_uid = self._extract_id(
profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid") profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")