Python's 'ast' module and its 'NodeVisitor' class were incredibly helpful in identifying these
294 lines
9.7 KiB
Python
294 lines
9.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2025 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractors for https://comick.io/"""
|
|
|
|
from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
|
|
from .. import text, exception
|
|
from ..cache import memcache
|
|
|
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io"
|
|
|
|
|
|
class ComickBase():
|
|
"""Base class for comick.io extractors"""
|
|
category = "comick"
|
|
root = "https://comick.io"
|
|
|
|
|
|
class ComickCoversExtractor(ComickBase, GalleryExtractor):
|
|
"""Extractor for comick.io manga covers"""
|
|
subcategory = "covers"
|
|
directory_fmt = ("{category}", "{manga}", "Covers")
|
|
filename_fmt = "{volume:>02}_{lang}.{extension}"
|
|
archive_fmt = "c_{id}"
|
|
pattern = BASE_PATTERN + r"/comic/([\w-]+)/cover"
|
|
example = "https://comick.io/comic/MANGA/cover"
|
|
|
|
def metadata(self, page):
|
|
manga = _manga_info(self, self.groups[0])
|
|
self.slug = manga['manga_slug']
|
|
return manga
|
|
|
|
def images(self, page):
|
|
url = f"{self.root}/comic/{self.slug}/cover"
|
|
page = self.request(url).text
|
|
data = self._extract_nextdata(page)
|
|
|
|
covers = data["props"]["pageProps"]["comic"]["md_covers"]
|
|
covers.reverse()
|
|
|
|
return [
|
|
("https://meo.comick.pictures/" + cover["b2key"], {
|
|
"id" : cover["id"],
|
|
"width" : cover["w"],
|
|
"height": cover["h"],
|
|
"size" : cover["s"],
|
|
"lang" : cover["locale"],
|
|
"volume": text.parse_int(cover["vol"]),
|
|
"cover" : cover,
|
|
})
|
|
for cover in covers
|
|
]
|
|
|
|
|
|
class ComickChapterExtractor(ComickBase, ChapterExtractor):
|
|
"""Extractor for comick.io manga chapters"""
|
|
archive_fmt = "{chapter_hid}_{page}"
|
|
pattern = (BASE_PATTERN + r"/comic/([\w-]+)"
|
|
r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)")
|
|
example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
|
|
|
|
def metadata(self, page):
|
|
slug, chstr = self.groups
|
|
manga = _manga_info(self, slug)
|
|
|
|
while True:
|
|
try:
|
|
props = _chapter_info(self, manga, chstr)
|
|
except exception.HttpError as exc:
|
|
if exc.response.status_code != 404:
|
|
raise
|
|
if exc.response.headers.get(
|
|
"Content-Type", "").startswith("text/html"):
|
|
if locals().get("_retry_buildid"):
|
|
raise
|
|
self.log.debug("Updating Next.js build ID")
|
|
_retry_buildid = True
|
|
_manga_info.cache.clear()
|
|
manga = _manga_info(self, slug)
|
|
continue
|
|
if b'"notFound":true' in exc.response.content:
|
|
raise exception.NotFoundError("chapter")
|
|
raise
|
|
|
|
if "__N_REDIRECT" in props:
|
|
path = props["__N_REDIRECT"]
|
|
self.log.debug("Following redirect to %s", path)
|
|
_, slug, chstr = path.rsplit("/", 2)
|
|
continue
|
|
|
|
ch = props["chapter"]
|
|
break
|
|
|
|
self._images = ch["md_images"]
|
|
|
|
if chapter := ch["chap"]:
|
|
chapter, sep, minor = chapter.partition(".")
|
|
else:
|
|
chapter = 0
|
|
sep = minor = ""
|
|
|
|
return {
|
|
**manga,
|
|
"title" : props["chapTitle"],
|
|
"volume" : text.parse_int(ch["vol"]),
|
|
"chapter" : text.parse_int(chapter),
|
|
"chapter_minor" : sep + minor,
|
|
"chapter_id" : ch["id"],
|
|
"chapter_hid" : ch["hid"],
|
|
"chapter_string": chstr,
|
|
"group" : ch["group_name"],
|
|
"date" : self.parse_datetime_iso(ch["created_at"][:19]),
|
|
"date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
|
|
"lang" : ch["lang"],
|
|
}
|
|
|
|
def images(self, page):
|
|
if not self._images[0].get("b2key") and all(
|
|
not img.get("b2key") for img in self._images):
|
|
self.log.error(
|
|
"%s: Broken Chapter (missing 'b2key' for all pages)",
|
|
self.groups[1])
|
|
return ()
|
|
|
|
return [
|
|
("https://meo.comick.pictures/" + img["b2key"], {
|
|
"width" : img["w"],
|
|
"height" : img["h"],
|
|
"size" : img["s"],
|
|
"optimized": img["optimized"],
|
|
})
|
|
for img in self._images
|
|
]
|
|
|
|
|
|
class ComickMangaExtractor(ComickBase, MangaExtractor):
|
|
"""Extractor for comick.io manga"""
|
|
pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?"
|
|
example = "https://comick.io/comic/MANGA"
|
|
|
|
def items(self):
|
|
manga = _manga_info(self, self.groups[0])
|
|
slug = manga["manga_slug"]
|
|
_manga_info.update(slug, manga)
|
|
|
|
for ch in self.chapters(manga):
|
|
ch.update(manga)
|
|
ch["_extractor"] = ComickChapterExtractor
|
|
|
|
if chapter := ch["chap"]:
|
|
url = (f"{self.root}/comic/{slug}"
|
|
f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}")
|
|
chapter, sep, minor = chapter.partition(".")
|
|
ch["volume"] = text.parse_int(ch["vol"])
|
|
ch["chapter"] = text.parse_int(chapter)
|
|
ch["chapter_minor"] = sep + minor
|
|
elif volume := ch["vol"]:
|
|
url = (f"{self.root}/comic/{slug}"
|
|
f"/{ch['hid']}-volume-{volume}-{ch['lang']}")
|
|
ch["volume"] = text.parse_int(volume)
|
|
ch["chapter"] = 0
|
|
ch["chapter_minor"] = ""
|
|
else:
|
|
url = f"{self.root}/comic/{slug}/{ch['hid']}"
|
|
ch["volume"] = ch["chapter"] = 0
|
|
ch["chapter_minor"] = ""
|
|
|
|
yield Message.Queue, url, ch
|
|
|
|
def chapters(self, manga):
|
|
info = True
|
|
slug, query = self.groups
|
|
|
|
url = f"https://api.comick.io/comic/{manga['manga_hid']}/chapters"
|
|
headers = {
|
|
"Origin": "https://comick.io",
|
|
"Sec-Fetch-Dest": "empty",
|
|
"Sec-Fetch-Mode": "cors",
|
|
"Sec-Fetch-Site": "same-site",
|
|
}
|
|
|
|
query = text.parse_query_list(query, ("lang",))
|
|
|
|
if (lang := query.get("lang")) or (lang := self.config("lang")):
|
|
if not isinstance(lang, str):
|
|
lang = ",".join(lang)
|
|
else:
|
|
lang = None
|
|
|
|
params = {"lang": lang}
|
|
params["page"] = page = text.parse_int(query.get("page"), 1)
|
|
|
|
if date_order := query.get("date-order"):
|
|
params["date-order"] = date_order
|
|
elif chap_order := query.get("chap-order"):
|
|
params["chap-order"] = chap_order
|
|
else:
|
|
params["chap-order"] = \
|
|
"0" if self.config("chapter-reverse", False) else "1"
|
|
|
|
group = query.get("group")
|
|
if group == "0":
|
|
group = None
|
|
|
|
while True:
|
|
data = self.request_json(url, params=params, headers=headers)
|
|
limit = data["limit"]
|
|
|
|
if info:
|
|
info = False
|
|
total = data["total"] - limit * page
|
|
if total > limit:
|
|
self.log.info("Collecting %s chapters", total)
|
|
|
|
if group is None:
|
|
yield from data["chapters"]
|
|
else:
|
|
for ch in data["chapters"]:
|
|
if (groups := ch["group_name"]) and group in groups:
|
|
yield ch
|
|
|
|
if data["total"] <= limit * page:
|
|
return
|
|
params["page"] = page = page + 1
|
|
|
|
|
|
@memcache(keyarg=1)
|
|
def _manga_info(self, slug):
|
|
url = f"{self.root}/comic/{slug}"
|
|
page = self.request(url).text
|
|
data = self._extract_nextdata(page)
|
|
props = data["props"]["pageProps"]
|
|
comic = props["comic"]
|
|
|
|
genre = []
|
|
theme = []
|
|
format = ""
|
|
for item in comic["md_comic_md_genres"]:
|
|
item = item["md_genres"]
|
|
group = item["group"]
|
|
if group == "Genre":
|
|
genre.append(item["name"])
|
|
elif group == "Theme":
|
|
theme.append(item["name"])
|
|
else:
|
|
format = item["name"]
|
|
|
|
if mu := comic["mu_comics"]:
|
|
tags = [c["mu_categories"]["title"]
|
|
for c in mu["mu_comic_categories"]]
|
|
publisher = [p["mu_publishers"]["title"]
|
|
for p in mu["mu_comic_publishers"]]
|
|
else:
|
|
tags = publisher = ()
|
|
|
|
return {
|
|
"manga": comic["title"],
|
|
"manga_id": comic["id"],
|
|
"manga_hid": comic["hid"],
|
|
"manga_slug": comic["slug"],
|
|
"manga_titles": [t["title"] for t in comic["md_titles"]],
|
|
"artist": [a["name"] for a in props["artists"]],
|
|
"author": [a["name"] for a in props["authors"]],
|
|
"genre" : genre,
|
|
"theme" : theme,
|
|
"format": format,
|
|
"tags" : tags,
|
|
"publisher": publisher,
|
|
"published": text.parse_int(comic["year"]),
|
|
"description": comic["desc"],
|
|
"demographic": props["demographic"],
|
|
"origin": comic["iso639_1"],
|
|
"mature": props["matureContent"],
|
|
"rating": comic["content_rating"],
|
|
"rank" : comic["follow_rank"],
|
|
"score" : text.parse_float(comic["bayesian_rating"]),
|
|
"status": "Complete" if comic["status"] == 2 else "Ongoing",
|
|
"links" : comic["links"],
|
|
"_build_id": data["buildId"],
|
|
}
|
|
|
|
|
|
def _chapter_info(self, manga, chstr):
|
|
slug = manga['manga_slug']
|
|
url = (f"{self.root}/_next/data/{manga['_build_id']}"
|
|
f"/comic/{slug}/{chstr}.json")
|
|
params = {"slug": slug, "chapter": chstr}
|
|
return self.request_json(url, params=params)["pageProps"]
|