feat(kaliscan): add extractor for kaliscan.me

Support chapter and manga extractors with metadata extraction.
This commit is contained in:
Duy Nguyen
2026-01-23 19:54:04 +01:00
parent f869085476
commit 0b0bcb1640
3 changed files with 197 additions and 0 deletions

View File

@@ -109,6 +109,7 @@ modules = [
"iwara",
"jschan",
"kabeuchi",
"kaliscan",
"keenspot",
"kemono",
"khinsider",

View File

@@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://kaliscan.me/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?kaliscan\.me"
class KaliscanBase():
"""Base class for kaliscan extractors"""
category = "kaliscan"
root = "https://kaliscan.me"
@memcache(keyarg=1)
def manga_data(self, manga_slug, page=None):
if not page:
url = "{}/manga/{}".format(self.root, manga_slug)
page = self.request(url).text
extr = text.extract_from(page)
title = text.unescape(extr("<h1>", "<"))
alt_titles = extr("<h2>", "<")
if alt_titles:
alt_titles = [t.strip() for t in alt_titles.split(",")]
else:
alt_titles = []
author = text.remove_html(extr(
"Authors :</strong>", "</p>"))
status = text.remove_html(extr(
"Status :</strong>", "</p>"))
genres = [g.strip(" ,") for g in text.split_html(extr(
"Genres :</strong>", "</p>"))]
desc_html = extr('class="content"', '<div class="readmore"')
if desc_html:
description = text.remove_html(
desc_html.partition(">")[2]).strip()
else:
description = ""
manga_id = text.parse_int(text.extr(page, "bookId =", ";"))
return {
"manga" : title,
"manga_id" : manga_id,
"manga_slug" : manga_slug,
"manga_titles": alt_titles,
"author" : author,
"status" : status,
"genres" : genres,
"description" : description,
"lang" : "en",
"language" : "English",
}
class KaliscanChapterExtractor(KaliscanBase, ChapterExtractor):
"""Extractor for kaliscan manga chapters"""
pattern = BASE_PATTERN + r"(/manga/([\w-]+)/chapter-([\d.]+))"
example = "https://kaliscan.me/manga/ID-MANGA/chapter-1"
def __init__(self, match):
ChapterExtractor.__init__(self, match)
self.manga_slug = self.groups[1]
self.chapter_string = self.groups[2]
def metadata(self, page):
extr = text.extract_from(page)
manga_id = text.parse_int(extr("bookId =", ";"))
extr("bookSlug =", ";")
chapter_id = text.parse_int(extr("chapterId =", ";"))
extr("chapterSlug =", ";")
chapter_number = extr("chapterNumber =", ";").strip(' "\'')
chapter, sep, minor = chapter_number.partition(".")
data = {
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_id" : chapter_id,
}
data.update(self.manga_data(self.manga_slug))
if manga_id:
data["manga_id"] = manga_id
return data
def images(self, page):
images_str = text.extr(page, 'var chapImages = "', '"')
if not images_str:
return []
return [
(url.strip(), None)
for url in images_str.split(",")
if url.strip()
]
class KaliscanMangaExtractor(KaliscanBase, MangaExtractor):
"""Extractor for kaliscan manga"""
chapterclass = KaliscanChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([\w-]+))/?$"
example = "https://kaliscan.me/manga/ID-MANGA"
def __init__(self, match):
MangaExtractor.__init__(self, match)
self.manga_slug = self.groups[1]
def chapters(self, page):
data = self.manga_data(self.manga_slug, page)
chapter_list = text.extr(page, 'id="chapter-list">', '</ul>')
if not chapter_list:
return []
results = []
for li in text.extract_iter(chapter_list, "<li", "</li>"):
url = text.extr(li, 'href="', '"')
if not url:
continue
if url[0] == "/":
url = self.root + url
chapter, sep, minor = url.rpartition(
"/chapter-")[2].partition(".")
results.append((url, {
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
**data,
}))
return results

54
test/results/kaliscan.py Normal file
View File

@@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import kaliscan
__tests__ = (
{
"#url" : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim/chapter-1",
"#class" : kaliscan.KaliscanChapterExtractor,
"#pattern" : r"https://s\d+\.1stmggv\d*\.\w+/.+\.\w+",
"#count" : 13,
"author" : "Jeong gyeong yun",
"chapter" : 1,
"chapter_minor": "",
"chapter_id" : 68134,
"count" : 13,
"genres" : ["Comedy", "Josei", "Manhwa", "Romance", "Webtoons"],
"lang" : "en",
"language" : "English",
"manga" : "What's Wrong with Secretary Kim?",
"manga_id" : 2142,
"manga_slug" : "2142-whats-wrong-with-secretary-kim",
"status" : "Completed",
},
{
"#url" : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim/chapter-14.5",
"#class" : kaliscan.KaliscanChapterExtractor,
"chapter" : 14,
"chapter_minor": ".5",
},
{
"#url" : "https://kaliscan.me/manga/2142-whats-wrong-with-secretary-kim",
"#class" : kaliscan.KaliscanMangaExtractor,
"#pattern" : kaliscan.KaliscanChapterExtractor.pattern,
"#count" : range(100, 200),
"author" : "Jeong gyeong yun",
"chapter" : int,
"genres" : ["Comedy", "Josei", "Manhwa", "Romance", "Webtoons"],
"lang" : "en",
"manga" : "What's Wrong with Secretary Kim?",
"manga_id" : 2142,
"status" : "Completed",
},
)