[tiktok] solve JS challenges (#8850)
* [tiktok] First draft of a challenge resolver * use stdlib sha256 implementation * simplify 'resolve_challenge()' code * set cookie domain and expires timestamp * base64 -> binascii * Avoid incorrect padding exceptions --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -10,6 +10,8 @@ from .common import Extractor, Message, Dispatch
|
|||||||
from .. import text, util, ytdl, exception
|
from .. import text, util, ytdl, exception
|
||||||
import functools
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
|
import binascii
|
||||||
|
import hashlib
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@@ -121,6 +123,8 @@ class TiktokExtractor(Extractor):
|
|||||||
def _extract_rehydration_data(self, url, additional_keys=[], *,
|
def _extract_rehydration_data(self, url, additional_keys=[], *,
|
||||||
has_keys=[]):
|
has_keys=[]):
|
||||||
tries = 0
|
tries = 0
|
||||||
|
html = None
|
||||||
|
challenge_attempt = False
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
response = self.request(url)
|
response = self.request(url)
|
||||||
@@ -141,15 +145,31 @@ class TiktokExtractor(Extractor):
|
|||||||
return data
|
return data
|
||||||
except (ValueError, KeyError):
|
except (ValueError, KeyError):
|
||||||
# We failed to retrieve rehydration data. This happens
|
# We failed to retrieve rehydration data. This happens
|
||||||
# relatively frequently when making many requests, so
|
# relatively frequently when making many requests, so retry.
|
||||||
# retry.
|
|
||||||
if tries >= self._retries:
|
if tries >= self._retries:
|
||||||
raise
|
raise
|
||||||
tries += 1
|
tries += 1
|
||||||
self.log.warning("%s: Failed to retrieve rehydration data "
|
self.log.warning("%s: Failed to retrieve rehydration data "
|
||||||
"(%s/%s)", url.rpartition("/")[2], tries,
|
"(%s/%s)", url.rpartition("/")[2], tries,
|
||||||
self._retries)
|
self._retries)
|
||||||
self.sleep(self._timeout, "retry")
|
if challenge_attempt:
|
||||||
|
self.sleep(self._timeout, "retry")
|
||||||
|
challenge_attempt = False
|
||||||
|
else:
|
||||||
|
self.log.info("Solving JavaScript challenge")
|
||||||
|
try:
|
||||||
|
self._solve_challenge(html)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.traceback(exc)
|
||||||
|
self.log.warning(
|
||||||
|
"%s: Failed to solve JavaScript challenge. If you "
|
||||||
|
"keep encountering this issue, please try again "
|
||||||
|
"with the --write-pages option and include the "
|
||||||
|
"resulting page in your bug report",
|
||||||
|
url.rpartition("/")[2])
|
||||||
|
self.sleep(self._timeout, "retry")
|
||||||
|
html = None
|
||||||
|
challenge_attempt = True
|
||||||
|
|
||||||
def _extract_rehydration_data_user(self, profile_url, additional_keys=()):
|
def _extract_rehydration_data_user(self, profile_url, additional_keys=()):
|
||||||
if profile_url in self.rehydration_data_cache:
|
if profile_url in self.rehydration_data_cache:
|
||||||
@@ -183,6 +203,35 @@ class TiktokExtractor(Extractor):
|
|||||||
self._extract_rehydration_data(
|
self._extract_rehydration_data(
|
||||||
"https://www.tiktok.com/", ["webapp.app-context"])
|
"https://www.tiktok.com/", ["webapp.app-context"])
|
||||||
|
|
||||||
|
def _solve_challenge(self, html):
|
||||||
|
cs = text.extr(text.extr(html, 'id="cs"', '>'), 'class="', '"')
|
||||||
|
c = util.json_loads(binascii.a2b_base64(cs + "==").decode())
|
||||||
|
|
||||||
|
# find index of expected digest
|
||||||
|
expected = binascii.a2b_base64(c["v"]["c"] + "==")
|
||||||
|
base = hashlib.sha256(binascii.a2b_base64(c["v"]["a"] + "=="))
|
||||||
|
for idx in range(1_000_000):
|
||||||
|
test = base.copy()
|
||||||
|
test.update(str(idx).encode())
|
||||||
|
if test.digest() == expected:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise exception.ExtractionError("failed to find matching digest")
|
||||||
|
|
||||||
|
# extract cookie names
|
||||||
|
wci = text.extr(text.extr(html, 'id="wci"', '>'), 'class="', '"')
|
||||||
|
rci = text.extr(text.extr(html, 'id="rci"', '>'), 'class="', '"')
|
||||||
|
rs = text.extr(text.extr(html, 'id="rs"', '>'), 'class="', '"')
|
||||||
|
|
||||||
|
# set cookie values
|
||||||
|
domain = self.cookies_domain
|
||||||
|
expires = int(time.time()) + 5
|
||||||
|
c["d"] = binascii.b2a_base64(str(idx).encode(), newline=False).decode()
|
||||||
|
v = binascii.b2a_base64(util.json_dumps(c).encode(), newline=False)
|
||||||
|
self.cookies.set(wci, v.decode(), domain=domain, expires=expires)
|
||||||
|
if rs:
|
||||||
|
self.cookies.set(rci, rs, domain=domain, expires=expires)
|
||||||
|
|
||||||
def _extract_sec_uid(self, profile_url, user_name):
|
def _extract_sec_uid(self, profile_url, user_name):
|
||||||
sec_uid = self._extract_id(
|
sec_uid = self._extract_id(
|
||||||
profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")
|
profile_url, user_name, r"MS4wLjABAAAA[\w-]{64}", "secUid")
|
||||||
|
|||||||
Reference in New Issue
Block a user