From dbf841ebd1c5b9d2fb44d23e16c2e05380ed7682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 8 Jul 2020 23:22:33 +0200 Subject: [PATCH] prevent unhandled exception on Cloudflare challenges (#868) The relatively new v2 challenges aren't supported (*), but retrying often enough may yield a v1 challenge which can be solved. (*) and probably never will. They are far too complicated to do without a real browser. --- gallery_dl/cloudflare.py | 41 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 88068d54..0f49d611 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -32,8 +32,28 @@ def solve_challenge(session, response, kwargs): """Solve Cloudflare challenge and get cfclearance cookie""" parsed = urllib.parse.urlsplit(response.url) root = parsed.scheme + "://" + parsed.netloc - page = response.text + + cf_kwargs = {} + headers = cf_kwargs["headers"] = collections.OrderedDict() + params = cf_kwargs["data"] = collections.OrderedDict() + headers["Referer"] = response.url + + form = text.extract(page, 'id="challenge-form"', '')[0] + for element in ElementTree.fromstring( + "" + form + "").findall("input"): + name = element.attrib.get("name") + if not name: + continue + if name == "jschl_answer": + try: + value = solve_js_challenge(page, parsed.netloc) + except Exception: + return response, None, None + else: + value = element.attrib.get("value") + params[name] = value + try: params = {"ray": text.extract(page, '?ray=', '"')[0]} @@ -45,25 +65,8 @@ def solve_challenge(session, response, kwargs): except Exception: pass - cf_kwargs = {} - headers = cf_kwargs["headers"] = collections.OrderedDict() - params = cf_kwargs["data"] = collections.OrderedDict() - url = root + text.unescape(text.extract(page, 'action="', '"')[0]) - headers["Referer"] = response.url - - form = text.extract(page, 'id="challenge-form"', '')[0] - for element in ElementTree.fromstring( - "" + form + "").findall("input"): - name = element.attrib.get("name") - if not name: - continue - if name == "jschl_answer": - value = solve_js_challenge(page, parsed.netloc) - else: - value = element.attrib.get("value") - params[name] = value - time.sleep(4) + url = root + text.unescape(text.extract(page, 'action="', '"')[0]) cf_response = session.request("POST", url, **cf_kwargs) if cf_response.history: