diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py
index 43ccdeb1..0cf5a57b 100644
--- a/gallery_dl/cloudflare.py
+++ b/gallery_dl/cloudflare.py
@@ -8,11 +8,11 @@
"""Methods to access sites behind Cloudflare protection"""
-import re
import time
import operator
import collections
import urllib.parse
+from xml.etree import ElementTree
from . import text
from .cache import memcache
@@ -41,12 +41,16 @@ def solve_challenge(session, response, kwargs):
url = root + text.unescape(text.extract(page, 'action="', '"')[0])
headers["Referer"] = response.url
- for inpt in text.extract_iter(page, ""):
- name = text.extract(inpt, 'name="', '"')[0]
+ form = text.extract(page, 'id="challenge-form"', '')[0]
+ for element in ElementTree.fromstring(
+ "" + form + "").findall("input"):
+ name = element.attrib.get("name")
+ if not name:
+ continue
if name == "jschl_answer":
value = solve_js_challenge(page, parsed.netloc)
else:
- value = text.unescape(text.extract(inpt, 'value="', '"')[0])
+ value = element.attrib.get("value")
params[name] = value
time.sleep(4)
@@ -84,6 +88,8 @@ def solve_js_challenge(page, netloc):
variable = "{}.{}".format(data["var"], data["key"])
vlength = len(variable)
+ k = text.extract(page, "k = '", "'")[0]
+
# evaluate the initial expression
solution = evaluate_expression(data["expr"], page, netloc)
@@ -97,7 +103,7 @@ def solve_js_challenge(page, netloc):
# select arithmetc function based on operator (+/-/*)
func = OPERATORS[expr[vlength]]
# evaluate the rest of the expression
- value = evaluate_expression(expr[vlength+2:], page, netloc)
+ value = evaluate_expression(expr[vlength+2:], page, netloc, k)
# combine expression value with our current solution
solution = func(solution, value)
@@ -110,17 +116,18 @@ def solve_js_challenge(page, netloc):
solution = "{:.10f}".format(solution)
return solution
+ elif expr.startswith("k+="):
+ k += str(evaluate_expression(expr[3:], page, netloc))
-def evaluate_expression(expr, page, netloc, *,
- split_re=re.compile(r"[(+]+([^)]*)\)")):
+
+def evaluate_expression(expr, page, netloc, k=""):
"""Evaluate a single Javascript expression for the challenge"""
if expr.startswith("function(p)"):
# get HTML element with ID k and evaluate the expression inside
# 'eval(eval("document.getElementById(k).innerHTML"))'
- k, pos = text.extract(page, "k = '", "'")
- e, pos = text.extract(page, 'id="'+k+'"', '<')
- return evaluate_expression(e.partition(">")[2], page, netloc)
+ expr = text.extract(page, 'id="'+k+'"', '<')[0]
+ return evaluate_expression(expr.partition(">")[2], page, netloc)
if "/" in expr:
# split the expression in numerator and denominator subexpressions,