update cloudflare bypass (wip)
This commit adds support for the two new JS expressions embedded in the overall challenge code. It does compute the correct 'js_answer' value, but the HTTP request to /cdn-cgi/l/chk_jschl to get the 'cf_clearance' cookie always results in a 403 response with a CAPTCHA inside (hence 'wip') All steps to make this HTTP request indistinguishable from a regular web browser (which passes the test) show no effect. This includes: - using the exact same HTTP headers as a web browser - follow query argument order - different wait times
This commit is contained in:
@@ -12,7 +12,7 @@ import re
|
||||
import time
|
||||
import operator
|
||||
import urllib.parse
|
||||
from . import text
|
||||
from . import text, exception
|
||||
from .cache import memcache
|
||||
|
||||
|
||||
@@ -22,6 +22,11 @@ def is_challenge(response):
|
||||
b"jschl-answer" in response.content)
|
||||
|
||||
|
||||
def is_captcha(response):
|
||||
return (response.status_code == 403 and
|
||||
b'name="captcha-bypass"' in response.content)
|
||||
|
||||
|
||||
def solve_challenge(session, response, kwargs):
|
||||
"""Solve Cloudflare challenge and get cfclearance cookie"""
|
||||
parsed = urllib.parse.urlsplit(response.url)
|
||||
@@ -35,8 +40,8 @@ def solve_challenge(session, response, kwargs):
|
||||
|
||||
page = response.text
|
||||
params["s"] = text.extract(page, 'name="s" value="', '"')[0]
|
||||
params["pass"] = text.extract(page, 'name="pass" value="', '"')[0]
|
||||
params["jschl_vc"] = text.extract(page, 'name="jschl_vc" value="', '"')[0]
|
||||
params["pass"] = text.extract(page, 'name="pass" value="', '"')[0]
|
||||
params["jschl_answer"] = solve_js_challenge(page, parsed.netloc)
|
||||
headers["Referer"] = response.url
|
||||
|
||||
@@ -46,7 +51,15 @@ def solve_challenge(session, response, kwargs):
|
||||
cf_kwargs["allow_redirects"] = False
|
||||
cf_response = session.request(response.request.method, url, **cf_kwargs)
|
||||
|
||||
location = cf_response.headers["Location"]
|
||||
location = cf_response.headers.get("Location")
|
||||
if not location:
|
||||
import logging
|
||||
log = logging.getLogger("cloudflare")
|
||||
rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected"
|
||||
log.error("%s response", rtype)
|
||||
log.debug("Headers:\n%s", cf_response.headers)
|
||||
log.debug("Content:\n%s", cf_response.text)
|
||||
raise exception.StopExtraction()
|
||||
if location[0] == "/":
|
||||
location = root + location
|
||||
|
||||
@@ -73,7 +86,7 @@ def solve_js_challenge(page, netloc):
|
||||
vlength = len(variable)
|
||||
|
||||
# evaluate the initial expression
|
||||
solution = evaluate_expression(data["expr"])
|
||||
solution = evaluate_expression(data["expr"], page, netloc)
|
||||
|
||||
# iterator over all remaining expressions
|
||||
# and combine their values in 'solution'
|
||||
@@ -85,37 +98,55 @@ def solve_js_challenge(page, netloc):
|
||||
# select arithmetc function based on operator (+/-/*)
|
||||
func = OPERATORS[expr[vlength]]
|
||||
# evaluate the rest of the expression
|
||||
value = evaluate_expression(expr[vlength+2:])
|
||||
value = evaluate_expression(expr[vlength+2:], page, netloc)
|
||||
# combine expression value with our current solution
|
||||
solution = func(solution, value)
|
||||
|
||||
elif expr.startswith("a.value"):
|
||||
# add length of hostname
|
||||
solution += len(netloc)
|
||||
|
||||
if "t.length)" in expr:
|
||||
# add length of hostname
|
||||
solution += len(netloc)
|
||||
if ".toFixed(" in expr:
|
||||
# trim solution to 10 decimal places
|
||||
# and strip trailing zeros
|
||||
solution = "{:.10f}".format(solution).rstrip("0")
|
||||
|
||||
return solution
|
||||
|
||||
|
||||
def evaluate_expression(expr, split_re=re.compile(r"\(+([^)]*)\)")):
|
||||
def evaluate_expression(expr, page, netloc, *,
|
||||
split_re=re.compile(r"[(+]+([^)]*)\)")):
|
||||
"""Evaluate a single Javascript expression for the challenge"""
|
||||
|
||||
if expr.startswith("function(p)"):
|
||||
# get HTML element with ID k and evaluate the expression inside
|
||||
# 'eval(eval("document.getElementById(k).innerHTML"))'
|
||||
k, pos = text.extract(page, "k = '", "'")
|
||||
e, pos = text.extract(page, 'id="'+k+'"', '<')
|
||||
return evaluate_expression(e.partition(">")[2], page, netloc)
|
||||
|
||||
if "/" in expr:
|
||||
# split the expression in numerator and denominator subexpressions,
|
||||
# evaluate them separately,
|
||||
# and return their fraction-result
|
||||
num, _, denom = expr.partition("/")
|
||||
return evaluate_expression(num) / evaluate_expression(denom)
|
||||
num = evaluate_expression(num, page, netloc)
|
||||
denom = evaluate_expression(denom, page, netloc)
|
||||
return num / denom
|
||||
|
||||
if "function(p)" in expr:
|
||||
# split initial expression and function code
|
||||
initial, _, func = expr.partition("function(p)")
|
||||
# evaluate said expression
|
||||
initial = evaluate_expression(initial, page, netloc)
|
||||
# get function argument and use it as index into 'netloc'
|
||||
index = evaluate_expression(func[func.index("}")+1:], page, netloc)
|
||||
return initial + ord(netloc[int(index)])
|
||||
|
||||
# iterate over all subexpressions,
|
||||
# evaluate them,
|
||||
# and accumulate their values in 'result'
|
||||
result = ""
|
||||
for subexpr in split_re.findall(expr):
|
||||
for subexpr in split_re.findall(expr) or (expr,):
|
||||
result += str(sum(
|
||||
VALUES[part]
|
||||
for part in subexpr.split("[]")
|
||||
@@ -133,6 +164,7 @@ VALUES = {
|
||||
"": 0,
|
||||
"+": 0,
|
||||
"!+": 1,
|
||||
"!!": 1,
|
||||
"+!!": 1,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user