feat(kaliscan): add extractor for kaliscan.me

Support chapter and manga extractors with metadata extraction.
2026-01-23 19:54:04 +01:00
parent f869085476
commit 0b0bcb1640
3 changed files with 197 additions and 0 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -109,6 +109,7 @@ modules = [
    "iwara",
    "jschan",
    "kabeuchi",
+    "kaliscan",
    "keenspot",
    "kemono",
    "khinsider",
--- a/gallery_dl/extractor/kaliscan.py
+++ b/gallery_dl/extractor/kaliscan.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://kaliscan.me/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?kaliscan\.me"
+
+
+class KaliscanBase():
+    """Base class for kaliscan extractors"""
+    category = "kaliscan"
+    root = "https://kaliscan.me"
+
+    @memcache(keyarg=1)
+    def manga_data(self, manga_slug, page=None):
+        if not page:
+            url = "{}/manga/{}".format(self.root, manga_slug)
+            page = self.request(url).text
+        extr = text.extract_from(page)
+
+        title = text.unescape(extr("<h1>", "<"))
+        alt_titles = extr("<h2>", "<")
+        if alt_titles:
+            alt_titles = [t.strip() for t in alt_titles.split(",")]
+        else:
+            alt_titles = []
+
+        author = text.remove_html(extr(
+            "Authors :</strong>", "</p>"))
+        status = text.remove_html(extr(
+            "Status :</strong>", "</p>"))
+        genres = [g.strip(" ,") for g in text.split_html(extr(
+            "Genres :</strong>", "</p>"))]
+
+        desc_html = extr('class="content"', '<div class="readmore"')
+        if desc_html:
+            description = text.remove_html(
+                desc_html.partition(">")[2]).strip()
+        else:
+            description = ""
+
+        manga_id = text.parse_int(text.extr(page, "bookId =", ";"))
+
+        return {
+            "manga"       : title,
+            "manga_id"    : manga_id,
+            "manga_slug"  : manga_slug,
+            "manga_titles": alt_titles,
+            "author"      : author,
+            "status"      : status,
+            "genres"      : genres,
+            "description" : description,
+            "lang"        : "en",
+            "language"    : "English",
+        }
+
+
+class KaliscanChapterExtractor(KaliscanBase, ChapterExtractor):
+    """Extractor for kaliscan manga chapters"""
+    pattern = BASE_PATTERN + r"(/manga/([\w-]+)/chapter-([\d.]+))"
+    example = "https://kaliscan.me/manga/ID-MANGA/chapter-1"
+
+    def __init__(self, match):
+        ChapterExtractor.__init__(self, match)
+        self.manga_slug = self.groups[1]
+        self.chapter_string = self.groups[2]
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+
+        manga_id = text.parse_int(extr("bookId =", ";"))
+        extr("bookSlug =", ";")
+        chapter_id = text.parse_int(extr("chapterId =", ";"))
+        extr("chapterSlug =", ";")
+        chapter_number = extr("chapterNumber =", ";").strip(' "\'')
+
+        chapter, sep, minor = chapter_number.partition(".")
+
+        data = {
+            "chapter"      : text.parse_int(chapter),
+            "chapter_minor": sep + minor,
+            "chapter_id"   : chapter_id,
+        }
+        data.update(self.manga_data(self.manga_slug))
+        if manga_id:
+            data["manga_id"] = manga_id
+        return data
+
+    def images(self, page):
+        images_str = text.extr(page, 'var chapImages = "', '"')
+        if not images_str:
+            return []
+        return [
+            (url.strip(), None)
+            for url in images_str.split(",")
+            if url.strip()
+        ]
+
+
+class KaliscanMangaExtractor(KaliscanBase, MangaExtractor):
+    """Extractor for kaliscan manga"""
+    chapterclass = KaliscanChapterExtractor
+    pattern = BASE_PATTERN + r"(/manga/([\w-]+))/?$"
+    example = "https://kaliscan.me/manga/ID-MANGA"
+
+    def __init__(self, match):
+        MangaExtractor.__init__(self, match)
+        self.manga_slug = self.groups[1]
+
+    def chapters(self, page):
+        data = self.manga_data(self.manga_slug, page)
+
+        chapter_list = text.extr(page, 'id="chapter-list">', '</ul>')
+        if not chapter_list:
+            return []
+
+        results = []
+        for li in text.extract_iter(chapter_list, "<li", "</li>"):
+            url = text.extr(li, 'href="', '"')
+            if not url:
+                continue
+            if url[0] == "/":
+                url = self.root + url
+
+            chapter, sep, minor = url.rpartition(
+                "/chapter-")[2].partition(".")
+
+            results.append((url, {
+                "chapter"      : text.parse_int(chapter),
+                "chapter_minor": sep + minor,
+                **data,
+            }))
+        return results
--- a/test/results/kaliscan.py
+++ b/test/results/kaliscan.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import kaliscan
+
+
+__tests__ = (
+{
+    "#url"     : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim/chapter-1",
+    "#class"   : kaliscan.KaliscanChapterExtractor,
+    "#pattern" : r"https://s\d+\.1stmggv\d*\.\w+/.+\.\w+",
+    "#count"   : 13,
+
+    "author"       : "Jeong gyeong yun",
+    "chapter"      : 1,
+    "chapter_minor": "",
+    "chapter_id"   : 68134,
+    "count"        : 13,
+    "genres"       : ["Comedy", "Josei", "Manhwa", "Romance", "Webtoons"],
+    "lang"         : "en",
+    "language"     : "English",
+    "manga"        : "What's Wrong with Secretary Kim?",
+    "manga_id"     : 2142,
+    "manga_slug"   : "2142-whats-wrong-with-secretary-kim",
+    "status"       : "Completed",
+},
+
+{
+    "#url"     : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim/chapter-14.5",
+    "#class"   : kaliscan.KaliscanChapterExtractor,
+
+    "chapter"      : 14,
+    "chapter_minor": ".5",
+},
+
+{
+    "#url"     : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim",
+    "#class"   : kaliscan.KaliscanMangaExtractor,
+    "#pattern" : kaliscan.KaliscanChapterExtractor.pattern,
+    "#count"   : range(100, 200),
+
+    "author"   : "Jeong gyeong yun",
+    "chapter"  : int,
+    "genres"   : ["Comedy", "Josei", "Manhwa", "Romance", "Webtoons"],
+    "lang"     : "en",
+    "manga"    : "What's Wrong with Secretary Kim?",
+    "manga_id" : 2142,
+    "status"   : "Completed",
+},
+
+)