diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index f017da58..b57d7517 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -109,6 +109,7 @@ modules = [ "iwara", "jschan", "kabeuchi", + "kaliscan", "keenspot", "kemono", "khinsider", diff --git a/gallery_dl/extractor/kaliscan.py b/gallery_dl/extractor/kaliscan.py new file mode 100644 index 00000000..6c7ed5f1 --- /dev/null +++ b/gallery_dl/extractor/kaliscan.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://kaliscan.me/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?kaliscan\.me" + + +class KaliscanBase(): + """Base class for kaliscan extractors""" + category = "kaliscan" + root = "https://kaliscan.me" + + @memcache(keyarg=1) + def manga_data(self, manga_slug, page=None): + if not page: + url = "{}/manga/{}".format(self.root, manga_slug) + page = self.request(url).text + extr = text.extract_from(page) + + title = text.unescape(extr("

", "<")) + alt_titles = extr("

", "<") + if alt_titles: + alt_titles = [t.strip() for t in alt_titles.split(",")] + else: + alt_titles = [] + + author = text.remove_html(extr( + "Authors :", "

")) + status = text.remove_html(extr( + "Status :", "

")) + genres = [g.strip(" ,") for g in text.split_html(extr( + "Genres :", "

"))] + + desc_html = extr('class="content"', '
")[2]).strip() + else: + description = "" + + manga_id = text.parse_int(text.extr(page, "bookId =", ";")) + + return { + "manga" : title, + "manga_id" : manga_id, + "manga_slug" : manga_slug, + "manga_titles": alt_titles, + "author" : author, + "status" : status, + "genres" : genres, + "description" : description, + "lang" : "en", + "language" : "English", + } + + +class KaliscanChapterExtractor(KaliscanBase, ChapterExtractor): + """Extractor for kaliscan manga chapters""" + pattern = BASE_PATTERN + r"(/manga/([\w-]+)/chapter-([\d.]+))" + example = "https://kaliscan.me/manga/ID-MANGA/chapter-1" + + def __init__(self, match): + ChapterExtractor.__init__(self, match) + self.manga_slug = self.groups[1] + self.chapter_string = self.groups[2] + + def metadata(self, page): + extr = text.extract_from(page) + + manga_id = text.parse_int(extr("bookId =", ";")) + extr("bookSlug =", ";") + chapter_id = text.parse_int(extr("chapterId =", ";")) + extr("chapterSlug =", ";") + chapter_number = extr("chapterNumber =", ";").strip(' "\'') + + chapter, sep, minor = chapter_number.partition(".") + + data = { + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + "chapter_id" : chapter_id, + } + data.update(self.manga_data(self.manga_slug)) + if manga_id: + data["manga_id"] = manga_id + return data + + def images(self, page): + images_str = text.extr(page, 'var chapImages = "', '"') + if not images_str: + return [] + return [ + (url.strip(), None) + for url in images_str.split(",") + if url.strip() + ] + + +class KaliscanMangaExtractor(KaliscanBase, MangaExtractor): + """Extractor for kaliscan manga""" + chapterclass = KaliscanChapterExtractor + pattern = BASE_PATTERN + r"(/manga/([\w-]+))/?$" + example = "https://kaliscan.me/manga/ID-MANGA" + + def __init__(self, match): + MangaExtractor.__init__(self, match) + self.manga_slug = self.groups[1] + + def chapters(self, page): + data = self.manga_data(self.manga_slug, page) + + chapter_list = text.extr(page, 'id="chapter-list">', '') + if not chapter_list: + return [] + + results = [] + for li in text.extract_iter(chapter_list, ""): + url = text.extr(li, 'href="', '"') + if not url: + continue + if url[0] == "/": + url = self.root + url + + chapter, sep, minor = url.rpartition( + "/chapter-")[2].partition(".") + + results.append((url, { + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + **data, + })) + return results diff --git a/test/results/kaliscan.py b/test/results/kaliscan.py new file mode 100644 index 00000000..11769d2a --- /dev/null +++ b/test/results/kaliscan.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import kaliscan + + +__tests__ = ( +{ + "#url" : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim/chapter-1", + "#class" : kaliscan.KaliscanChapterExtractor, + "#pattern" : r"https://s\d+\.1stmggv\d*\.\w+/.+\.\w+", + "#count" : 13, + + "author" : "Jeong gyeong yun", + "chapter" : 1, + "chapter_minor": "", + "chapter_id" : 68134, + "count" : 13, + "genres" : ["Comedy", "Josei", "Manhwa", "Romance", "Webtoons"], + "lang" : "en", + "language" : "English", + "manga" : "What's Wrong with Secretary Kim?", + "manga_id" : 2142, + "manga_slug" : "2142-whats-wrong-with-secretary-kim", + "status" : "Completed", +}, + +{ + "#url" : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim/chapter-14.5", + "#class" : kaliscan.KaliscanChapterExtractor, + + "chapter" : 14, + "chapter_minor": ".5", +}, + +{ + "#url" : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim", + "#class" : kaliscan.KaliscanMangaExtractor, + "#pattern" : kaliscan.KaliscanChapterExtractor.pattern, + "#count" : range(100, 200), + + "author" : "Jeong gyeong yun", + "chapter" : int, + "genres" : ["Comedy", "Josei", "Manhwa", "Romance", "Webtoons"], + "lang" : "en", + "manga" : "What's Wrong with Secretary Kim?", + "manga_id" : 2142, + "status" : "Completed", +}, + +)