retry Cloudflare challenges

This commit is contained in:
Mike Fährmann
2020-04-24 22:47:27 +02:00
parent 3eab07739f
commit 5d7ca76885
2 changed files with 10 additions and 9 deletions

View File

@@ -13,7 +13,7 @@ import time
import operator
import collections
import urllib.parse
from . import text, exception
from . import text
from .cache import memcache
@@ -58,14 +58,13 @@ def solve_challenge(session, response, kwargs):
cookie.name: cookie.value
for cookie in cf_response.cookies
}
if not cookies:
import logging
log = logging.getLogger("cloudflare")
rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected"
log.error("%s response", rtype)
log.debug("Headers:\n%s", cf_response.headers)
log.debug("Content:\n%s", cf_response.text)
raise exception.StopExtraction()
return cf_response, None, None
domain = next(iter(cf_response.cookies)).domain
cookies["__cfduid"] = response.cookies.get("__cfduid", "")

View File

@@ -99,18 +99,20 @@ class Extractor():
return response
if notfound and code == 404:
raise exception.NotFoundError(notfound)
reason = response.reason
if cloudflare.is_challenge(response):
self.log.info("Solving Cloudflare challenge")
response, domain, cookies = cloudflare.solve_challenge(
session, response, kwargs)
if response.status_code >= 400:
continue
cloudflare.cookies.update(self.category, (domain, cookies))
return response
if cookies:
cloudflare.cookies.update(
self.category, (domain, cookies))
return response
if cloudflare.is_captcha(response):
self.log.warning("Cloudflare CAPTCHA")
msg = "'{} {}' for '{}'".format(code, response.reason, url)
msg = "'{} {}' for '{}'".format(code, reason, url)
if code < 500 and code != 429 and code != 430:
break