[tiktok] solve JS challenges (#8850)

* [tiktok] First draft of a challenge resolver * use stdlib sha256 implementation * simplify 'resolve_challenge()' code * set cookie domain and expires timestamp * base64 -> binascii * Avoid incorrect padding exceptions --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
2026-01-26 08:55:53 +00:00
parent d19d5c8b6e
commit 702814654a
1 changed files with 52 additions and 3 deletions
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -10,6 +10,8 @@ from .common import Extractor, Message, Dispatch
 from .. import text, util, ytdl, exception
 import functools
 import itertools
 import binascii
 import hashlib
 import random
 import time
@@ -121,6 +123,8 @@ class TiktokExtractor(Extractor):
    def _extract_rehydration_data(self, url, additional_keys=[], *,
                                  has_keys=[]):
        tries = 0
        html = None
        challenge_attempt = False
        while True:
            try:
                response = self.request(url)
@@ -141,15 +145,31 @@ class TiktokExtractor(Extractor):
                return data
            except (ValueError, KeyError):
                # We failed to retrieve rehydration data. This happens
-                # relatively frequently when making many requests, so
+                # relatively frequently when making many requests, so retry.
                # retry.
                if tries >= self._retries:
                    raise
                tries += 1
                self.log.warning("%s: Failed to retrieve rehydration data "
                                 "(%s/%s)", url.rpartition("/")[2], tries,
                                 self._retries)
-                self.sleep(self._timeout, "retry")
+                if challenge_attempt:
                    self.sleep(self._timeout, "retry")
                    challenge_attempt = False
                else:
                    self.log.info("Solving JavaScript challenge")
                    try:
                        self._solve_challenge(html)
                    except Exception as exc:
                        self.log.traceback(exc)
                        self.log.warning(
                            "%s: Failed to solve JavaScript challenge. If you "
                            "keep encountering this issue, please try again "
                            "with the --write-pages option and include the "
                            "resulting page in your bug report",
                            url.rpartition("/")[2])
                        self.sleep(self._timeout, "retry")
                    html = None
                    challenge_attempt = True
    def _extract_rehydration_data_user(self, profile_url, additional_keys=()):
        if profile_url in self.rehydration_data_cache:
@@ -183,6 +203,35 @@ class TiktokExtractor(Extractor):
                self._extract_rehydration_data(
                    "https://www.tiktok.com/", ["webapp.app-context"])
    def _solve_challenge(self, html):
        cs = text.extr(text.extr(html, 'id="cs"', '>'), 'class="', '"')
        c = util.json_loads(binascii.a2b_base64(cs + "==").decode())
        # find index of expected digest
        expected = binascii.a2b_base64(c["v"]["c"] + "==")
        base = hashlib.sha256(binascii.a2b_base64(c["v"]["a"] + "=="))
        for idx in range(1_000_000):
            test = base.copy()
            test.update(str(idx).encode())
            if test.digest() == expected:
                break
        else:
            raise exception.ExtractionError("failed to find matching digest")
        # extract cookie names
        wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"')
        rci = text.extr(text.extr(html, 'id="rci"', '>'), 'class="', '"')
        rs = text.extr(text.extr(html, 'id="rs"', '>'), 'class="', '"')
        # set cookie values
        domain = self.cookies_domain
        expires = int(time.time()) + 5
        c["d"] = binascii.b2a_base64(str(idx).encode(), newline=False).decode()
        v = binascii.b2a_base64(util.json_dumps(c).encode(), newline=False)
        self.cookies.set(wci, v.decode(), domain=domain, expires=expires)
        if rs:
            self.cookies.set(rci, rs, domain=domain, expires=expires)
    def _extract_sec_uid(self, profile_url, user_name):
        sec_uid = self._extract_id(
            profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")