[kissmanga][readcomiconline] add 'captcha' option (#279)

to configure how to handle CAPTCHA page redirects:
- either interactively wait for the user to solve the CAPTCHA
- or raise StopExtraction like before
This commit is contained in:
Mike Fährmann
2019-05-27 22:24:48 +02:00
parent e30ada162d
commit 4465a3ea68
4 changed files with 60 additions and 22 deletions

View File

@@ -8,7 +8,7 @@
"""Extract manga-chapters and entire manga from https://kissmanga.com/"""
from .common import ChapterExtractor, MangaExtractor
from .common import ChapterExtractor, MangaExtractor, Extractor
from .. import text, aes, exception
from ..cache import cache
import hashlib
@@ -16,21 +16,35 @@ import ast
import re
class KissmangaBase():
class RedirectMixin():
"""Detect and handle redirects to CAPTCHA pages"""
def request(self, url):
while True:
response = Extractor.request(self, url)
if not response.history or "/AreYouHuman" not in response.url:
return response
if self.config("captcha", "stop") == "wait":
self.log.warning(
"Redirect to \n%s\nVisit this URL in your browser, solve "
"the CAPTCHA, and press ENTER to continue", response.url)
try:
input()
except (EOFError, OSError):
pass
else:
self.log.error(
"Redirect to \n%s\nVisit this URL in your browser and "
"solve the CAPTCHA to continue", response.url)
raise exception.StopExtraction()
class KissmangaBase(RedirectMixin):
"""Base class for kissmanga extractors"""
category = "kissmanga"
archive_fmt = "{chapter_id}_{page}"
root = "https://kissmanga.com"
def request(self, url):
response = super().request(url)
if response.history and "/AreYouHuman" in response.url:
self.log.error("Redirect to \n%s\n"
"Visit this URL in your browser and solve "
"the CAPTCHA to continue.", response.url)
raise exception.StopExtraction()
return response
@staticmethod
def parse_chapter_string(data):
"""Parse 'chapter_string' value contained in 'data'"""

View File

@@ -9,11 +9,12 @@
"""Extract comic-issues and entire comics from https://readcomiconline.to/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, exception
from .kissmanga import RedirectMixin
from .. import text
import re
class ReadcomiconlineBase():
class ReadcomiconlineBase(RedirectMixin):
"""Base class for readcomiconline extractors"""
category = "readcomiconline"
directory_fmt = ("{category}", "{comic}", "{issue:>03}")
@@ -21,15 +22,6 @@ class ReadcomiconlineBase():
archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.to"
def request(self, url):
response = super().request(url)
if response.history and "/AreYouHuman" in response.url:
self.log.error("Redirect to \n%s\n"
"Visit this URL in your browser and solve "
"the CAPTCHA to continue.", response.url)
raise exception.StopExtraction()
return response
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""