[tiktok] solve JS challenges (#8850)

* [tiktok] First draft of a challenge resolver
* use stdlib sha256 implementation
* simplify 'resolve_challenge()' code
* set cookie domain and expires timestamp
* base64 -> binascii
* Avoid incorrect padding exceptions

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
CasualYouTuber31
2026-01-26 08:55:53 +00:00
committed by GitHub
parent d19d5c8b6e
commit 702814654a

View File

@@ -10,6 +10,8 @@ from .common import Extractor, Message, Dispatch
from .. import text, util, ytdl, exception
import functools
import itertools
import binascii
import hashlib
import random
import time
@@ -121,6 +123,8 @@ class TiktokExtractor(Extractor):
def _extract_rehydration_data(self, url, additional_keys=[], *,
has_keys=[]):
tries = 0
html = None
challenge_attempt = False
while True:
try:
response = self.request(url)
@@ -141,15 +145,31 @@ class TiktokExtractor(Extractor):
return data
except (ValueError, KeyError):
# We failed to retrieve rehydration data. This happens
# relatively frequently when making many requests, so
# retry.
# relatively frequently when making many requests, so retry.
if tries >= self._retries:
raise
tries += 1
self.log.warning("%s: Failed to retrieve rehydration data "
"(%s/%s)", url.rpartition("/")[2], tries,
self._retries)
self.sleep(self._timeout, "retry")
if challenge_attempt:
self.sleep(self._timeout, "retry")
challenge_attempt = False
else:
self.log.info("Solving JavaScript challenge")
try:
self._solve_challenge(html)
except Exception as exc:
self.log.traceback(exc)
self.log.warning(
"%s: Failed to solve JavaScript challenge. If you "
"keep encountering this issue, please try again "
"with the --write-pages option and include the "
"resulting page in your bug report",
url.rpartition("/")[2])
self.sleep(self._timeout, "retry")
html = None
challenge_attempt = True
def _extract_rehydration_data_user(self, profile_url, additional_keys=()):
if profile_url in self.rehydration_data_cache:
@@ -183,6 +203,35 @@ class TiktokExtractor(Extractor):
self._extract_rehydration_data(
"https://www.tiktok.com/", ["webapp.app-context"])
def _solve_challenge(self, html):
cs = text.extr(text.extr(html, 'id="cs"', '>'), 'class="', '"')
c = util.json_loads(binascii.a2b_base64(cs + "==").decode())
# find index of expected digest
expected = binascii.a2b_base64(c["v"]["c"] + "==")
base = hashlib.sha256(binascii.a2b_base64(c["v"]["a"] + "=="))
for idx in range(1_000_000):
test = base.copy()
test.update(str(idx).encode())
if test.digest() == expected:
break
else:
raise exception.ExtractionError("failed to find matching digest")
# extract cookie names
wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"')
rci = text.extr(text.extr(html, 'id="rci"', '>'), 'class="', '"')
rs = text.extr(text.extr(html, 'id="rs"', '>'), 'class="', '"')
# set cookie values
domain = self.cookies_domain
expires = int(time.time()) + 5
c["d"] = binascii.b2a_base64(str(idx).encode(), newline=False).decode()
v = binascii.b2a_base64(util.json_dumps(c).encode(), newline=False)
self.cookies.set(wci, v.decode(), domain=domain, expires=expires)
if rs:
self.cookies.set(rci, rs, domain=domain, expires=expires)
def _extract_sec_uid(self, profile_url, user_name):
sec_uid = self._extract_id(
profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")