Files
gallery-dl/gallery_dl/extractor/manganelo.py
Mike Fährmann 53cdfaac37 [common] add reference to 'exception' module to Extractor class
- remove 'exception' imports
- replace with 'self.exc'
2026-02-15 10:57:22 +01:00

161 lines
5.3 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2020 Jake Mannens
# Copyright 2021-2026 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://www.mangakakalot.gg/ and mirror sites"""
from .common import BaseExtractor, ChapterExtractor, MangaExtractor, Message
from .. import text, util
class ManganeloExtractor(BaseExtractor):
basecategory = "manganelo"
BASE_PATTERN = ManganeloExtractor.update({
"nelomanga": {
"root" : "https://www.nelomanga.net",
"pattern": r"(?:www\.)?nelomanga\.net",
},
"natomanga": {
"root" : "https://www.natomanga.com",
"pattern": r"(?:www\.)?natomanga\.com",
},
"manganato": {
"root" : "https://www.manganato.gg",
"pattern": r"(?:www\.)?manganato\.gg",
},
"mangakakalot": {
"root" : "https://www.mangakakalot.gg",
"pattern": r"(?:www\.)?mangakakalot\.gg",
},
})
class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
"""Extractor for manganelo manga chapters"""
pattern = BASE_PATTERN + r"(/manga/[^/?#]+/chapter-[^/?#]+)"
example = "https://www.mangakakalot.gg/manga/MANGA_NAME/chapter-123"
def __init__(self, match):
ManganeloExtractor.__init__(self, match)
self.page_url = self.root + self.groups[-1]
def metadata(self, page):
extr = text.extract_from(page)
data = {
"date" : self.parse_datetime_iso(extr(
'"datePublished": "', '"')[:19]),
"date_updated": self.parse_datetime_iso(extr(
'"dateModified": "', '"')[:19]),
"manga_id" : text.parse_int(extr("comic_id =", ";")),
"chapter_id" : text.parse_int(extr("chapter_id =", ";")),
"manga" : extr("comic_name =", ";").strip('" '),
"lang" : "en",
"language" : "English",
}
chapter_name = extr("chapter_name =", ";").strip('" ')
chapter, sep, minor = chapter_name.rpartition(" ")[2].partition(".")
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
data["author"] = extr(". Author:", " already has ").strip()
return data
def images(self, page):
extr = text.extract_from(page)
cdns = util.json_loads(extr("var cdns =", ";"))[0]
imgs = util.json_loads(extr("var chapterImages =", ";"))
if cdns[-1] != "/":
cdns += "/"
return [
(cdns + path, None)
for path in imgs
]
class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
"""Extractor for manganelo manga"""
chapterclass = ManganeloChapterExtractor
pattern = BASE_PATTERN + r"(/manga/[^/?#]+)$"
example = "https://www.mangakakalot.gg/manga/MANGA_NAME"
def __init__(self, match):
ManganeloExtractor.__init__(self, match)
self.page_url = self.root + self.groups[-1]
def chapters(self, page):
extr = text.extract_from(page)
url = extr('property="og:url" content="', '"')
slug = url[url.rfind("/")+1:]
info = {
"manga" : text.unescape(extr("<h1>", "<")),
"manga_url" : url,
"manga_slug": slug,
"author": text.remove_html(extr("<li>Author(s) :", "</li>")),
"status": extr("<li>Status :", "<").strip(),
"date_updated": self.parse_datetime(extr(
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p"),
"tags" : text.split_html(extr(">Genres :", "</li>"))[::2],
"lang" : "en",
}
info["tags"].sort()
base = url + "/"
url = f"{self.root}/api/manga/{slug}/chapters?limit=-1"
results = []
data = self.request_json(url)["data"]
for ch in data["chapters"]:
slug = ch["chapter_slug"]
chapter, sep, minor = slug[8:].partition("-")
results.append((base + slug, {
**info,
"chapter": text.parse_int(chapter),
"chapter_minor": (sep and ".") + minor,
"date" : self.parse_datetime_iso(ch["updated_at"]),
"views" : ch["view"],
}))
return results
class ManganeloBookmarkExtractor(ManganeloExtractor):
"""Extractor for manganelo bookmarks"""
subcategory = "bookmark"
pattern = BASE_PATTERN + r"/bookmark"
example = "https://www.mangakakalot.gg/bookmark"
def items(self):
data = {"_extractor": ManganeloMangaExtractor}
url = self.root + "/bookmark"
params = {"page": 1}
response = self.request(url, params=params)
if response.history:
raise self.exc.AuthRequired(
"authenticated cookies", "your bookmarks")
page = response.text
last = text.parse_int(text.extr(page, ">Last(", ")"))
while True:
for bookmark in text.extract_iter(
page, 'class="user-bookmark-item ', '</a>'):
yield Message.Queue, text.extr(bookmark, ' href="', '"'), data
if params["page"] >= last:
break
params["page"] += 1
page = self.request(url, params=params).text