From b0b1feaa674c37e023121da301c6202d6b248cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 10 Jun 2020 21:04:33 +0200 Subject: [PATCH] request 'transparent.gif' when solving Cloudflare challenges This currently also works without, but they might be using these to detect potential bots in the future. --- gallery_dl/cloudflare.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 0cf5a57b..857b4f31 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -33,11 +33,21 @@ def solve_challenge(session, response, kwargs): parsed = urllib.parse.urlsplit(response.url) root = parsed.scheme + "://" + parsed.netloc + page = response.text + try: + params = {"ray": text.extract(page, '?ray=', '"')[0]} + + url = root + "/cdn-cgi/images/trace/jschal/nojs/transparent.gif" + session.request("GET", url, params=params) + + url = root + "/cdn-cgi/images/trace/jschal/js/nocookie/transparent.gif" + session.request("GET", url, params=params) + except Exception: + pass + cf_kwargs = {} headers = cf_kwargs["headers"] = collections.OrderedDict() params = cf_kwargs["data"] = collections.OrderedDict() - - page = response.text url = root + text.unescape(text.extract(page, 'action="', '"')[0]) headers["Referer"] = response.url