update cloudflare bypass (wip)

This commit adds support for the two new JS expressions embedded in the
overall challenge code.

It does compute the correct 'js_answer' value, but the HTTP request to
/cdn-cgi/l/chk_jschl to get the 'cf_clearance' cookie always results in
a 403 response with a CAPTCHA inside (hence 'wip')

All steps to make this HTTP request indistinguishable from a regular web
browser (which passes the test) show no effect. This includes:
- using the exact same HTTP headers as a web browser
- follow query argument order
- different wait times
This commit is contained in:
Mike Fährmann
2019-04-01 15:14:59 +02:00
parent 0f02e85961
commit 9ebd29fcc1

View File

@@ -12,7 +12,7 @@ import re
import time
import operator
import urllib.parse
from . import text
from . import text, exception
from .cache import memcache
@@ -22,6 +22,11 @@ def is_challenge(response):
b"jschl-answer" in response.content)
def is_captcha(response):
return (response.status_code == 403 and
b'name="captcha-bypass"' in response.content)
def solve_challenge(session, response, kwargs):
"""Solve Cloudflare challenge and get cfclearance cookie"""
parsed = urllib.parse.urlsplit(response.url)
@@ -35,8 +40,8 @@ def solve_challenge(session, response, kwargs):
page = response.text
params["s"] = text.extract(page, 'name="s" value="', '"')[0]
params["pass"] = text.extract(page, 'name="pass" value="', '"')[0]
params["jschl_vc"] = text.extract(page, 'name="jschl_vc" value="', '"')[0]
params["pass"] = text.extract(page, 'name="pass" value="', '"')[0]
params["jschl_answer"] = solve_js_challenge(page, parsed.netloc)
headers["Referer"] = response.url
@@ -46,7 +51,15 @@ def solve_challenge(session, response, kwargs):
cf_kwargs["allow_redirects"] = False
cf_response = session.request(response.request.method, url, **cf_kwargs)
location = cf_response.headers["Location"]
location = cf_response.headers.get("Location")
if not location:
import logging
log = logging.getLogger("cloudflare")
rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected"
log.error("%s response", rtype)
log.debug("Headers:\n%s", cf_response.headers)
log.debug("Content:\n%s", cf_response.text)
raise exception.StopExtraction()
if location[0] == "/":
location = root + location
@@ -73,7 +86,7 @@ def solve_js_challenge(page, netloc):
vlength = len(variable)
# evaluate the initial expression
solution = evaluate_expression(data["expr"])
solution = evaluate_expression(data["expr"], page, netloc)
# iterator over all remaining expressions
# and combine their values in 'solution'
@@ -85,37 +98,55 @@ def solve_js_challenge(page, netloc):
# select arithmetc function based on operator (+/-/*)
func = OPERATORS[expr[vlength]]
# evaluate the rest of the expression
value = evaluate_expression(expr[vlength+2:])
value = evaluate_expression(expr[vlength+2:], page, netloc)
# combine expression value with our current solution
solution = func(solution, value)
elif expr.startswith("a.value"):
# add length of hostname
solution += len(netloc)
if "t.length)" in expr:
# add length of hostname
solution += len(netloc)
if ".toFixed(" in expr:
# trim solution to 10 decimal places
# and strip trailing zeros
solution = "{:.10f}".format(solution).rstrip("0")
return solution
def evaluate_expression(expr, split_re=re.compile(r"\(+([^)]*)\)")):
def evaluate_expression(expr, page, netloc, *,
split_re=re.compile(r"[(+]+([^)]*)\)")):
"""Evaluate a single Javascript expression for the challenge"""
if expr.startswith("function(p)"):
# get HTML element with ID k and evaluate the expression inside
# 'eval(eval("document.getElementById(k).innerHTML"))'
k, pos = text.extract(page, "k = '", "'")
e, pos = text.extract(page, 'id="'+k+'"', '<')
return evaluate_expression(e.partition(">")[2], page, netloc)
if "/" in expr:
# split the expression in numerator and denominator subexpressions,
# evaluate them separately,
# and return their fraction-result
num, _, denom = expr.partition("/")
return evaluate_expression(num) / evaluate_expression(denom)
num = evaluate_expression(num, page, netloc)
denom = evaluate_expression(denom, page, netloc)
return num / denom
if "function(p)" in expr:
# split initial expression and function code
initial, _, func = expr.partition("function(p)")
# evaluate said expression
initial = evaluate_expression(initial, page, netloc)
# get function argument and use it as index into 'netloc'
index = evaluate_expression(func[func.index("}")+1:], page, netloc)
return initial + ord(netloc[int(index)])
# iterate over all subexpressions,
# evaluate them,
# and accumulate their values in 'result'
result = ""
for subexpr in split_re.findall(expr):
for subexpr in split_re.findall(expr) or (expr,):
result += str(sum(
VALUES[part]
for part in subexpr.split("[]")
@@ -133,6 +164,7 @@ VALUES = {
"": 0,
"+": 0,
"!+": 1,
"!!": 1,
"+!!": 1,
}