[comick] handle volume-only chapters (#8043)
This commit is contained in:
@@ -20,79 +20,18 @@ class ComickBase():
|
|||||||
category = "comick"
|
category = "comick"
|
||||||
root = "https://comick.io"
|
root = "https://comick.io"
|
||||||
|
|
||||||
@memcache(keyarg=1)
|
|
||||||
def _manga_info(self, slug):
|
|
||||||
url = f"{self.root}/comic/{slug}"
|
|
||||||
page = self.request(url).text
|
|
||||||
data = self._extract_nextdata(page)
|
|
||||||
props = data["props"]["pageProps"]
|
|
||||||
comic = props["comic"]
|
|
||||||
|
|
||||||
genre = []
|
|
||||||
theme = []
|
|
||||||
format = ""
|
|
||||||
for item in comic["md_comic_md_genres"]:
|
|
||||||
item = item["md_genres"]
|
|
||||||
group = item["group"]
|
|
||||||
if group == "Genre":
|
|
||||||
genre.append(item["name"])
|
|
||||||
elif group == "Theme":
|
|
||||||
theme.append(item["name"])
|
|
||||||
else:
|
|
||||||
format = item["name"]
|
|
||||||
|
|
||||||
if mu := comic["mu_comics"]:
|
|
||||||
tags = [c["mu_categories"]["title"]
|
|
||||||
for c in mu["mu_comic_categories"]]
|
|
||||||
publisher = [p["mu_publishers"]["title"]
|
|
||||||
for p in mu["mu_comic_publishers"]]
|
|
||||||
else:
|
|
||||||
tags = publisher = ()
|
|
||||||
|
|
||||||
return {
|
|
||||||
"manga": comic["title"],
|
|
||||||
"manga_id": comic["id"],
|
|
||||||
"manga_hid": comic["hid"],
|
|
||||||
"manga_slug": slug,
|
|
||||||
"manga_titles": [t["title"] for t in comic["md_titles"]],
|
|
||||||
"artist": [a["name"] for a in props["artists"]],
|
|
||||||
"author": [a["name"] for a in props["authors"]],
|
|
||||||
"genre" : genre,
|
|
||||||
"theme" : theme,
|
|
||||||
"format": format,
|
|
||||||
"tags" : tags,
|
|
||||||
"publisher": publisher,
|
|
||||||
"published": text.parse_int(comic["year"]),
|
|
||||||
"description": comic["desc"],
|
|
||||||
"demographic": props["demographic"],
|
|
||||||
"origin": comic["iso639_1"],
|
|
||||||
"mature": props["matureContent"],
|
|
||||||
"rating": comic["content_rating"],
|
|
||||||
"rank" : comic["follow_rank"],
|
|
||||||
"score" : text.parse_float(comic["bayesian_rating"]),
|
|
||||||
"status": "Complete" if comic["status"] == 2 else "Ongoing",
|
|
||||||
"links" : comic["links"],
|
|
||||||
"_build_id": data["buildId"],
|
|
||||||
}
|
|
||||||
|
|
||||||
def _chapter_info(self, manga, chstr):
|
|
||||||
slug = manga['manga_slug']
|
|
||||||
url = (f"{self.root}/_next/data/{manga['_build_id']}"
|
|
||||||
f"/comic/{slug}/{chstr}.json")
|
|
||||||
params = {"slug": slug, "chapter": chstr}
|
|
||||||
return self.request_json(url, params=params)["pageProps"]
|
|
||||||
|
|
||||||
|
|
||||||
class ComickChapterExtractor(ComickBase, ChapterExtractor):
|
class ComickChapterExtractor(ComickBase, ChapterExtractor):
|
||||||
"""Extractor for comick.io manga chapters"""
|
"""Extractor for comick.io manga chapters"""
|
||||||
archive_fmt = "{chapter_hid}_{page}"
|
archive_fmt = "{chapter_hid}_{page}"
|
||||||
pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+(?:-chapter-[^/?#]+)?)"
|
pattern = (BASE_PATTERN + r"/comic/([\w-]+)"
|
||||||
|
r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)")
|
||||||
example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
|
example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
slug, chstr = self.groups
|
slug, chstr = self.groups
|
||||||
manga = self._manga_info(slug)
|
manga = _manga_info(self, slug)
|
||||||
props = self._chapter_info(manga, chstr)
|
props = _chapter_info(self, manga, chstr)
|
||||||
|
|
||||||
ch = props["chapter"]
|
ch = props["chapter"]
|
||||||
self._images = ch["md_images"]
|
self._images = ch["md_images"]
|
||||||
@@ -138,8 +77,9 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
|
|||||||
example = "https://comick.io/comic/MANGA"
|
example = "https://comick.io/comic/MANGA"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
slug = self.groups[0]
|
manga = _manga_info(self, self.groups[0])
|
||||||
manga = self._manga_info(slug)
|
slug = manga["manga_slug"]
|
||||||
|
_manga_info.update(slug, manga)
|
||||||
|
|
||||||
for ch in self.chapters(manga):
|
for ch in self.chapters(manga):
|
||||||
ch.update(manga)
|
ch.update(manga)
|
||||||
@@ -149,11 +89,18 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
|
|||||||
url = (f"{self.root}/comic/{slug}"
|
url = (f"{self.root}/comic/{slug}"
|
||||||
f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}")
|
f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}")
|
||||||
chapter, sep, minor = chapter.partition(".")
|
chapter, sep, minor = chapter.partition(".")
|
||||||
|
ch["volume"] = text.parse_int(ch["vol"])
|
||||||
ch["chapter"] = text.parse_int(chapter)
|
ch["chapter"] = text.parse_int(chapter)
|
||||||
ch["chapter_minor"] = sep + minor
|
ch["chapter_minor"] = sep + minor
|
||||||
|
elif volume := ch["vol"]:
|
||||||
|
url = (f"{self.root}/comic/{slug}"
|
||||||
|
f"/{ch['hid']}-volume-{volume}-{ch['lang']}")
|
||||||
|
ch["volume"] = text.parse_int(volume)
|
||||||
|
ch["chapter"] = 0
|
||||||
|
ch["chapter_minor"] = ""
|
||||||
else:
|
else:
|
||||||
url = f"{self.root}/comic/{slug}/{ch['hid']}"
|
url = f"{self.root}/comic/{slug}/{ch['hid']}"
|
||||||
ch["chapter"] = 0
|
ch["volume"] = ch["chapter"] = 0
|
||||||
ch["chapter_minor"] = ""
|
ch["chapter_minor"] = ""
|
||||||
|
|
||||||
yield Message.Queue, url, ch
|
yield Message.Queue, url, ch
|
||||||
@@ -213,3 +160,67 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
|
|||||||
if data["total"] <= limit * page:
|
if data["total"] <= limit * page:
|
||||||
return
|
return
|
||||||
params["page"] = page = page + 1
|
params["page"] = page = page + 1
|
||||||
|
|
||||||
|
|
||||||
|
@memcache(keyarg=1)
|
||||||
|
def _manga_info(self, slug):
|
||||||
|
url = f"{self.root}/comic/{slug}"
|
||||||
|
page = self.request(url).text
|
||||||
|
data = self._extract_nextdata(page)
|
||||||
|
props = data["props"]["pageProps"]
|
||||||
|
comic = props["comic"]
|
||||||
|
|
||||||
|
genre = []
|
||||||
|
theme = []
|
||||||
|
format = ""
|
||||||
|
for item in comic["md_comic_md_genres"]:
|
||||||
|
item = item["md_genres"]
|
||||||
|
group = item["group"]
|
||||||
|
if group == "Genre":
|
||||||
|
genre.append(item["name"])
|
||||||
|
elif group == "Theme":
|
||||||
|
theme.append(item["name"])
|
||||||
|
else:
|
||||||
|
format = item["name"]
|
||||||
|
|
||||||
|
if mu := comic["mu_comics"]:
|
||||||
|
tags = [c["mu_categories"]["title"]
|
||||||
|
for c in mu["mu_comic_categories"]]
|
||||||
|
publisher = [p["mu_publishers"]["title"]
|
||||||
|
for p in mu["mu_comic_publishers"]]
|
||||||
|
else:
|
||||||
|
tags = publisher = ()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"manga": comic["title"],
|
||||||
|
"manga_id": comic["id"],
|
||||||
|
"manga_hid": comic["hid"],
|
||||||
|
"manga_slug": comic["slug"],
|
||||||
|
"manga_titles": [t["title"] for t in comic["md_titles"]],
|
||||||
|
"artist": [a["name"] for a in props["artists"]],
|
||||||
|
"author": [a["name"] for a in props["authors"]],
|
||||||
|
"genre" : genre,
|
||||||
|
"theme" : theme,
|
||||||
|
"format": format,
|
||||||
|
"tags" : tags,
|
||||||
|
"publisher": publisher,
|
||||||
|
"published": text.parse_int(comic["year"]),
|
||||||
|
"description": comic["desc"],
|
||||||
|
"demographic": props["demographic"],
|
||||||
|
"origin": comic["iso639_1"],
|
||||||
|
"mature": props["matureContent"],
|
||||||
|
"rating": comic["content_rating"],
|
||||||
|
"rank" : comic["follow_rank"],
|
||||||
|
"score" : text.parse_float(comic["bayesian_rating"]),
|
||||||
|
"status": "Complete" if comic["status"] == 2 else "Ongoing",
|
||||||
|
"links" : comic["links"],
|
||||||
|
"_build_id": data["buildId"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _chapter_info(self, manga, chstr):
|
||||||
|
slug = manga['manga_slug']
|
||||||
|
url = (f"{self.root}/_next/data/{manga['_build_id']}"
|
||||||
|
f"/comic/{slug}/{chstr}.json")
|
||||||
|
params = {"slug": slug, "chapter": chstr}
|
||||||
|
return self.request_json(url, params=params)["pageProps"]
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ __tests__ = (
|
|||||||
"origin": "ja",
|
"origin": "ja",
|
||||||
"published": 2023,
|
"published": 2023,
|
||||||
"publisher": (),
|
"publisher": (),
|
||||||
"rank": range(29_000, 32_000),
|
"rank": range(20_000, 40_000),
|
||||||
"rating": "safe",
|
"rating": "safe",
|
||||||
"score": float,
|
"score": float,
|
||||||
"status": "Ongoing",
|
"status": "Ongoing",
|
||||||
@@ -163,12 +163,55 @@ __tests__ = (
|
|||||||
"volume" : 0,
|
"volume" : 0,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://comick.io/comic/00-boku-no-hero-academia/0nJzK-volume-1-en",
|
||||||
|
"#comment" : "volume-only chapter (#8043)",
|
||||||
|
"#class" : comick.ComickChapterExtractor,
|
||||||
|
"#pattern" : r"https://meo.comick.pictures/\d+-[\w-]+\.(jpg|png)",
|
||||||
|
"#count" : 187,
|
||||||
|
|
||||||
|
"manga" : "Boku no Hero Academia",
|
||||||
|
"manga_hid" : "q1hZ1dbv",
|
||||||
|
"manga_id" : 11359,
|
||||||
|
"manga_slug" : "00-boku-no-hero-academia",
|
||||||
|
"volume" : 1,
|
||||||
|
"chapter" : 0,
|
||||||
|
"chapter_hid" : "0nJzK",
|
||||||
|
"chapter_id" : 2285787,
|
||||||
|
"chapter_minor" : "",
|
||||||
|
"chapter_string": "0nJzK-volume-1-en",
|
||||||
|
"title" : "",
|
||||||
|
"lang" : "en",
|
||||||
|
"artist" : ["Horikoshi Kouhei"],
|
||||||
|
"author" : ["Horikoshi Kouhei"],
|
||||||
|
"group" : ["Official"],
|
||||||
|
"count" : 187,
|
||||||
|
"date" : "dt:2022-10-08 06:07:50",
|
||||||
|
"date_updated" : "dt:2025-01-01 18:56:24",
|
||||||
|
"demographic" : "Shounen",
|
||||||
|
"extension" : {"jpg", "png"},
|
||||||
|
"filename" : str,
|
||||||
|
"width" : int,
|
||||||
|
"height" : int,
|
||||||
|
"mature" : True,
|
||||||
|
"origin" : "ja",
|
||||||
|
"published" : 2014,
|
||||||
|
"rating" : "safe",
|
||||||
|
"score" : float,
|
||||||
|
"status" : "Complete",
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://comick.io/comic/kobayashi-san-chi-no-maid-dragon",
|
"#url" : "https://comick.io/comic/kobayashi-san-chi-no-maid-dragon",
|
||||||
"#comment" : "all chapters",
|
"#comment" : "all chapters",
|
||||||
"#class" : comick.ComickMangaExtractor,
|
"#class" : comick.ComickMangaExtractor,
|
||||||
"#pattern" : comick.ComickChapterExtractor.pattern,
|
"#pattern" : comick.ComickChapterExtractor.pattern,
|
||||||
"#count" : range(890, 1000),
|
"#count" : range(890, 1000),
|
||||||
|
|
||||||
|
"volume" : int,
|
||||||
|
"chapter": int,
|
||||||
|
"chapter_minor": str,
|
||||||
|
"lang" : "iso:639",
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -242,4 +285,23 @@ __tests__ = (
|
|||||||
"#count" : range(50, 100),
|
"#count" : range(50, 100),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://comick.io/comic/q1hZ1dbv",
|
||||||
|
"#comment" : "volume-only 'chapters' (#8043)",
|
||||||
|
"#class" : comick.ComickMangaExtractor,
|
||||||
|
"#range" : "1-5",
|
||||||
|
"#results" : (
|
||||||
|
"https://comick.io/comic/00-boku-no-hero-academia/0nJzK-volume-1-en",
|
||||||
|
"https://comick.io/comic/00-boku-no-hero-academia/oBxML-volume-1-en",
|
||||||
|
"https://comick.io/comic/00-boku-no-hero-academia/lyq4r-volume-2-en",
|
||||||
|
"https://comick.io/comic/00-boku-no-hero-academia/wNJYr-volume-2-en",
|
||||||
|
"https://comick.io/comic/00-boku-no-hero-academia/nAv4E-volume-3-en",
|
||||||
|
),
|
||||||
|
|
||||||
|
"volume" : {1, 2, 3},
|
||||||
|
"chapter": 0,
|
||||||
|
"chapter_minor": "",
|
||||||
|
"lang" : "iso:639",
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -148,7 +148,7 @@ __tests__ = (
|
|||||||
"#comment" : "pfbid user ID; 'This content isn't available right now' profile",
|
"#comment" : "pfbid user ID; 'This content isn't available right now' profile",
|
||||||
"#class" : facebook.FacebookSetExtractor,
|
"#class" : facebook.FacebookSetExtractor,
|
||||||
"#metadata": "post",
|
"#metadata": "post",
|
||||||
"#range" : 0,
|
"#range" : "0",
|
||||||
|
|
||||||
"caption" : "Amarte es mi hábito favorito",
|
"caption" : "Amarte es mi hábito favorito",
|
||||||
"date" : "dt:2025-05-03 03:42:52",
|
"date" : "dt:2025-05-03 03:42:52",
|
||||||
|
|||||||
Reference in New Issue
Block a user