From 2dd3aeeeae737857b2016df9087171cde49441d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 4 Feb 2018 16:27:44 +0100 Subject: [PATCH] [komikcast] add chapter- and manga-extractor (#70) --- docs/supportedsites.rst | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/komikcast.py | 120 ++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) create mode 100644 gallery_dl/extractor/komikcast.py diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index c82d51ec..df7fddb0 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -40,6 +40,7 @@ Jaimini's Box https://jaiminisbox.com/ Chapters, Manga Khinsider https://downloads.khinsider.com/ Soundtracks Kirei Cake https://reader.kireicake.com/ Chapters, Manga KissManga http://kissmanga.com/ Chapters, Manga +Komikcast https://komikcast.com/ Chapters, Manga Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag-Searches Love is Over Archive https://archive.loveisover.me/ Threads Luscious https://luscious.net/ Albums diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d9478485..8b1cb814 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -47,6 +47,7 @@ modules = [ "khinsider", "kireicake", "kissmanga", + "komikcast", "konachan", "loveisover", "luscious", diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py new file mode 100644 index 00000000..96951c83 --- /dev/null +++ b/gallery_dl/extractor/komikcast.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract manga-chapters and entire manga from https://komikcast.com/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, util +import re + + +class KomikcastBase(): + """Base class for komikcast extractors""" + category = "komikcast" + scheme = "https" + root = "https://komikcast.com" + + @staticmethod + def parse_chapter_string(chapter_string, data=None): + """Parse 'chapter_string' value and add its info to 'data'""" + if not data: + data = {} + + match = re.match( + r"(?:(.*) Chapter )?0*(\d+)([^ ]*)(?: (?:- )?(.+))?", + text.unescape(chapter_string), + ) + manga, chapter, data["chapter_minor"], title = match.groups() + + if manga: + data["manga"] = manga.partition(" Chapter ")[0] + if title and title.lower() != "bahasa indonesia": + data["title"] = title.strip() + else: + data["title"] = "" + data["chapter"] = util.safe_int(chapter) + data["lang"] = "id" + data["language"] = "Indonesian" + + return data + + +class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): + """Extractor for manga-chapters from komikcast.com""" + pattern = [r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?&#]+/)"] + test = [ + (("https://komikcast.com/chapter/" + "apotheosis-chapter-02-2-bahasa-indonesia/"), { + "url": "978d3c053d34a77f6ea6e60cbba3deda1e369be8", + "keyword": "9964a7ce7c8a518aebdccdea0e05858439c7ad92", + }), + (("https://komikcast.com/chapter/" + "tonari-no-kashiwagi-san-chapter-18b/"), { + "url": "db5594b025f9d81e4987da538b8599b8dee8851b", + "keyword": "94bb85aec6654ab5af0c10419ca388fcd9c73b47", + }), + ] + + def __init__(self, match): + ChapterExtractor.__init__(self, self.root + match.group(1)) + + def get_metadata(self, page): + info = text.extract(page, '', "")[0] + return self.parse_chapter_string(info) + + @staticmethod + def get_images(page): + readerarea = text.extract( + page, '
', '