From 5d7ca76885ed99f74d18a9b2048cd58d933ef2e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 24 Apr 2020 22:47:27 +0200 Subject: [PATCH] retry Cloudflare challenges --- gallery_dl/cloudflare.py | 7 +++---- gallery_dl/extractor/common.py | 12 +++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 7010eaa3..e3ebd1a4 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -13,7 +13,7 @@ import time import operator import collections import urllib.parse -from . import text, exception +from . import text from .cache import memcache @@ -58,14 +58,13 @@ def solve_challenge(session, response, kwargs): cookie.name: cookie.value for cookie in cf_response.cookies } + if not cookies: import logging log = logging.getLogger("cloudflare") - rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected" - log.error("%s response", rtype) log.debug("Headers:\n%s", cf_response.headers) log.debug("Content:\n%s", cf_response.text) - raise exception.StopExtraction() + return cf_response, None, None domain = next(iter(cf_response.cookies)).domain cookies["__cfduid"] = response.cookies.get("__cfduid", "") diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 8986c997..3a282c28 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -99,18 +99,20 @@ class Extractor(): return response if notfound and code == 404: raise exception.NotFoundError(notfound) + + reason = response.reason if cloudflare.is_challenge(response): self.log.info("Solving Cloudflare challenge") response, domain, cookies = cloudflare.solve_challenge( session, response, kwargs) - if response.status_code >= 400: - continue - cloudflare.cookies.update(self.category, (domain, cookies)) - return response + if cookies: + cloudflare.cookies.update( + self.category, (domain, cookies)) + return response if cloudflare.is_captcha(response): self.log.warning("Cloudflare CAPTCHA") - msg = "'{} {}' for '{}'".format(code, response.reason, url) + msg = "'{} {}' for '{}'".format(code, reason, url) if code < 500 and code != 429 and code != 430: break