automatically detect and bypass cloudflare challenge pages

TODO: cache and re-apply cfclearance cookies
2019-03-10 15:31:33 +01:00
parent 25aaf55514
commit 6dae6bee37
5 changed files with 55 additions and 57 deletions
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -18,7 +18,7 @@ import requests
 import threading
 import http.cookiejar
 from .message import Message
-from .. import config, text, exception
+from .. import config, text, exception, cloudflare


 class Extractor():
@@ -86,6 +86,10 @@ class Extractor():
                    if encoding:
                        response.encoding = encoding
                    return response
+                if cloudflare.is_challenge(response):
+                    self.log.info("Solving Cloudflare challenge")
+                    url = cloudflare.solve_challenge(session, response, kwargs)
+                    continue

                msg = "{}: {} for url: {}".format(code, response.reason, url)
                if code < 500 and code != 429:
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -9,17 +9,12 @@
 """Extract manga-chapters and entire manga from https://kissmanga.com/"""

 from .common import ChapterExtractor, MangaExtractor
-from .. import text, cloudflare, aes, exception
+from .. import text, aes, exception
 from ..cache import cache
 import hashlib
 import ast
 import re

-IV = [
-    0xa5, 0xe8, 0xe2, 0xe9, 0xc2, 0x72, 0x1b, 0xe0,
-    0xa8, 0x4a, 0xd6, 0x60, 0xc4, 0x72, 0xc1, 0xf3
-]
-

 class KissmangaBase():
    """Base class for kissmanga extractors"""
@@ -28,10 +23,10 @@ class KissmangaBase():
    root = "https://kissmanga.com"

    def request(self, url):
-        response = cloudflare.request_func(self, url)
+        response = super().request(url)
        if response.history and "/Message/AreYouHuman?" in response.url:
            self.log.error("Requesting too many pages caused a redirect to %s."
-                           " Try visiting this URL in your browser and solving"
+                           " Try visiting this URL in your browser and solve"
                           " the CAPTCHA to continue.", response.url)
            raise exception.StopExtraction()
        return response
@@ -112,8 +107,10 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
        self.session.headers["Referer"] = None
        try:
            key = self.build_aes_key(page)
+            iv = (0xa5, 0xe8, 0xe2, 0xe9, 0xc2, 0x72, 0x1b, 0xe0,
+                  0xa8, 0x4a, 0xd6, 0x60, 0xc4, 0x72, 0xc1, 0xf3)
            return [
-                (aes.aes_cbc_decrypt_text(data, key, IV), None)
+                (aes.aes_cbc_decrypt_text(data, key, iv), None)
                for data in text.extract_iter(
                    page, 'lstImages.push(wrapKA("', '"'
                )
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -9,7 +9,7 @@
 """Extract manga-chapters and entire manga from https://komikcast.com/"""

 from .common import ChapterExtractor, MangaExtractor
-from .. import text, cloudflare
+from .. import text
 import re


@@ -18,8 +18,6 @@ class KomikcastBase():
    category = "komikcast"
    root = "https://komikcast.com"

-    request = cloudflare.request_func
-
    @staticmethod
    def parse_chapter_string(chapter_string, data=None):
        """Parse 'chapter_string' value and add its info to 'data'"""
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -9,7 +9,7 @@
 """Extract comic-issues and entire comics from https://readcomiconline.to/"""

 from .common import ChapterExtractor, MangaExtractor
-from .. import text, cloudflare
+from .. import text
 import re


@@ -21,8 +21,6 @@ class ReadcomiconlineBase():
    archive_fmt = "{issue_id}_{page}"
    root = "https://readcomiconline.to"

-    request = cloudflare.request_func
-

 class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
    """Extractor for comic-issues from readcomiconline.to"""