retry Cloudflare challenges
This commit is contained in:
@@ -13,7 +13,7 @@ import time
|
|||||||
import operator
|
import operator
|
||||||
import collections
|
import collections
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from . import text, exception
|
from . import text
|
||||||
from .cache import memcache
|
from .cache import memcache
|
||||||
|
|
||||||
|
|
||||||
@@ -58,14 +58,13 @@ def solve_challenge(session, response, kwargs):
|
|||||||
cookie.name: cookie.value
|
cookie.name: cookie.value
|
||||||
for cookie in cf_response.cookies
|
for cookie in cf_response.cookies
|
||||||
}
|
}
|
||||||
|
|
||||||
if not cookies:
|
if not cookies:
|
||||||
import logging
|
import logging
|
||||||
log = logging.getLogger("cloudflare")
|
log = logging.getLogger("cloudflare")
|
||||||
rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected"
|
|
||||||
log.error("%s response", rtype)
|
|
||||||
log.debug("Headers:\n%s", cf_response.headers)
|
log.debug("Headers:\n%s", cf_response.headers)
|
||||||
log.debug("Content:\n%s", cf_response.text)
|
log.debug("Content:\n%s", cf_response.text)
|
||||||
raise exception.StopExtraction()
|
return cf_response, None, None
|
||||||
|
|
||||||
domain = next(iter(cf_response.cookies)).domain
|
domain = next(iter(cf_response.cookies)).domain
|
||||||
cookies["__cfduid"] = response.cookies.get("__cfduid", "")
|
cookies["__cfduid"] = response.cookies.get("__cfduid", "")
|
||||||
|
|||||||
@@ -99,18 +99,20 @@ class Extractor():
|
|||||||
return response
|
return response
|
||||||
if notfound and code == 404:
|
if notfound and code == 404:
|
||||||
raise exception.NotFoundError(notfound)
|
raise exception.NotFoundError(notfound)
|
||||||
|
|
||||||
|
reason = response.reason
|
||||||
if cloudflare.is_challenge(response):
|
if cloudflare.is_challenge(response):
|
||||||
self.log.info("Solving Cloudflare challenge")
|
self.log.info("Solving Cloudflare challenge")
|
||||||
response, domain, cookies = cloudflare.solve_challenge(
|
response, domain, cookies = cloudflare.solve_challenge(
|
||||||
session, response, kwargs)
|
session, response, kwargs)
|
||||||
if response.status_code >= 400:
|
if cookies:
|
||||||
continue
|
cloudflare.cookies.update(
|
||||||
cloudflare.cookies.update(self.category, (domain, cookies))
|
self.category, (domain, cookies))
|
||||||
return response
|
return response
|
||||||
if cloudflare.is_captcha(response):
|
if cloudflare.is_captcha(response):
|
||||||
self.log.warning("Cloudflare CAPTCHA")
|
self.log.warning("Cloudflare CAPTCHA")
|
||||||
|
|
||||||
msg = "'{} {}' for '{}'".format(code, response.reason, url)
|
msg = "'{} {}' for '{}'".format(code, reason, url)
|
||||||
if code < 500 and code != 429 and code != 430:
|
if code < 500 and code != 429 and code != 430:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user