[kissmanga][readcomiconline] add 'captcha' option (#279)

to configure how to handle CAPTCHA page redirects: - either interactively wait for the user to solve the CAPTCHA - or raise StopExtraction like before
2019-05-27 22:24:48 +02:00
parent e30ada162d
commit 4465a3ea68
4 changed files with 60 additions and 22 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -593,6 +593,18 @@ Description Controls whether to choose the GIF or MP4 version of an animation.
 =========== =====
 extractor.kissmanga.captcha
 ---------------------------
 =========== =====
 Type        ``string``
 Default     ``"stop"``
 Description Controls how to handle redirects to CAPTCHA pages.
            * ``"stop``: Stop the current extractor run.
            * ``"wait``: Ask the user to solve the CAPTCHA and wait.
 =========== =====
 extractor.oauth.browser
 -----------------------
 =========== =====
@@ -646,6 +658,18 @@ Description Minimum and maximum wait time in seconds between HTTP requests
 =========== =====
 extractor.readcomiconline.captcha
 ---------------------------------
 =========== =====
 Type        ``string``
 Default     ``"stop"``
 Description Controls how to handle redirects to CAPTCHA pages.
            * ``"stop``: Stop the current extractor run.
            * ``"wait``: Ask the user to solve the CAPTCHA and wait.
 =========== =====
 extractor.recursive.blacklist
 -----------------------------
 =========== =====
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -62,6 +62,10 @@
        {
            "mp4": true
        },
        "kissmanga":
        {
            "captcha": "stop"
        },
        "nijie":
        {
            "username": null,
@@ -82,6 +86,10 @@
            "wait-min": 3.0,
            "wait-max": 6.0
        },
        "readcomiconline":
        {
            "captcha": "stop"
        },
        "recursive":
        {
            "blacklist": ["directlink", "oauth", "recursive", "test"]
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -8,7 +8,7 @@
 """Extract manga-chapters and entire manga from https://kissmanga.com/"""
-from .common import ChapterExtractor, MangaExtractor
+from .common import ChapterExtractor, MangaExtractor, Extractor
 from .. import text, aes, exception
 from ..cache import cache
 import hashlib
@@ -16,21 +16,35 @@ import ast
 import re
-class KissmangaBase():
+class RedirectMixin():
    """Detect and handle redirects to CAPTCHA pages"""
    def request(self, url):
        while True:
            response = Extractor.request(self, url)
            if not response.history or "/AreYouHuman" not in response.url:
                return response
            if self.config("captcha", "stop") == "wait":
                self.log.warning(
                    "Redirect to \n%s\nVisit this URL in your browser, solve "
                    "the CAPTCHA, and press ENTER to continue", response.url)
                try:
                    input()
                except (EOFError, OSError):
                    pass
            else:
                self.log.error(
                    "Redirect to \n%s\nVisit this URL in your browser and "
                    "solve the CAPTCHA to continue", response.url)
                raise exception.StopExtraction()
 class KissmangaBase(RedirectMixin):
    """Base class for kissmanga extractors"""
    category = "kissmanga"
    archive_fmt = "{chapter_id}_{page}"
    root = "https://kissmanga.com"
    def request(self, url):
        response = super().request(url)
        if response.history and "/AreYouHuman" in response.url:
            self.log.error("Redirect to \n%s\n"
                           "Visit this URL in your browser and solve "
                           "the CAPTCHA to continue.", response.url)
            raise exception.StopExtraction()
        return response
    @staticmethod
    def parse_chapter_string(data):
        """Parse 'chapter_string' value contained in 'data'"""
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -9,11 +9,12 @@
 """Extract comic-issues and entire comics from https://readcomiconline.to/"""
 from .common import ChapterExtractor, MangaExtractor
-from .. import text, exception
+from .kissmanga import RedirectMixin
 from .. import text
 import re
-class ReadcomiconlineBase():
+class ReadcomiconlineBase(RedirectMixin):
    """Base class for readcomiconline extractors"""
    category = "readcomiconline"
    directory_fmt = ("{category}", "{comic}", "{issue:>03}")
@@ -21,15 +22,6 @@ class ReadcomiconlineBase():
    archive_fmt = "{issue_id}_{page}"
    root = "https://readcomiconline.to"
    def request(self, url):
        response = super().request(url)
        if response.history and "/AreYouHuman" in response.url:
            self.log.error("Redirect to \n%s\n"
                           "Visit this URL in your browser and solve "
                           "the CAPTCHA to continue.", response.url)
            raise exception.StopExtraction()
        return response
 class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
    """Extractor for comic-issues from readcomiconline.to"""