diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py index 49aef1b1..6c541560 100644 --- a/gallery_dl/extractor/comick.py +++ b/gallery_dl/extractor/comick.py @@ -20,79 +20,18 @@ class ComickBase(): category = "comick" root = "https://comick.io" - @memcache(keyarg=1) - def _manga_info(self, slug): - url = f"{self.root}/comic/{slug}" - page = self.request(url).text - data = self._extract_nextdata(page) - props = data["props"]["pageProps"] - comic = props["comic"] - - genre = [] - theme = [] - format = "" - for item in comic["md_comic_md_genres"]: - item = item["md_genres"] - group = item["group"] - if group == "Genre": - genre.append(item["name"]) - elif group == "Theme": - theme.append(item["name"]) - else: - format = item["name"] - - if mu := comic["mu_comics"]: - tags = [c["mu_categories"]["title"] - for c in mu["mu_comic_categories"]] - publisher = [p["mu_publishers"]["title"] - for p in mu["mu_comic_publishers"]] - else: - tags = publisher = () - - return { - "manga": comic["title"], - "manga_id": comic["id"], - "manga_hid": comic["hid"], - "manga_slug": slug, - "manga_titles": [t["title"] for t in comic["md_titles"]], - "artist": [a["name"] for a in props["artists"]], - "author": [a["name"] for a in props["authors"]], - "genre" : genre, - "theme" : theme, - "format": format, - "tags" : tags, - "publisher": publisher, - "published": text.parse_int(comic["year"]), - "description": comic["desc"], - "demographic": props["demographic"], - "origin": comic["iso639_1"], - "mature": props["matureContent"], - "rating": comic["content_rating"], - "rank" : comic["follow_rank"], - "score" : text.parse_float(comic["bayesian_rating"]), - "status": "Complete" if comic["status"] == 2 else "Ongoing", - "links" : comic["links"], - "_build_id": data["buildId"], - } - - def _chapter_info(self, manga, chstr): - slug = manga['manga_slug'] - url = (f"{self.root}/_next/data/{manga['_build_id']}" - f"/comic/{slug}/{chstr}.json") - params = {"slug": slug, "chapter": chstr} - return self.request_json(url, params=params)["pageProps"] - class ComickChapterExtractor(ComickBase, ChapterExtractor): """Extractor for comick.io manga chapters""" archive_fmt = "{chapter_hid}_{page}" - pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+(?:-chapter-[^/?#]+)?)" + pattern = (BASE_PATTERN + r"/comic/([\w-]+)" + r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)") example = "https://comick.io/comic/MANGA/ID-chapter-123-en" def metadata(self, page): slug, chstr = self.groups - manga = self._manga_info(slug) - props = self._chapter_info(manga, chstr) + manga = _manga_info(self, slug) + props = _chapter_info(self, manga, chstr) ch = props["chapter"] self._images = ch["md_images"] @@ -138,8 +77,9 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): example = "https://comick.io/comic/MANGA" def items(self): - slug = self.groups[0] - manga = self._manga_info(slug) + manga = _manga_info(self, self.groups[0]) + slug = manga["manga_slug"] + _manga_info.update(slug, manga) for ch in self.chapters(manga): ch.update(manga) @@ -149,11 +89,18 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): url = (f"{self.root}/comic/{slug}" f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}") chapter, sep, minor = chapter.partition(".") + ch["volume"] = text.parse_int(ch["vol"]) ch["chapter"] = text.parse_int(chapter) ch["chapter_minor"] = sep + minor + elif volume := ch["vol"]: + url = (f"{self.root}/comic/{slug}" + f"/{ch['hid']}-volume-{volume}-{ch['lang']}") + ch["volume"] = text.parse_int(volume) + ch["chapter"] = 0 + ch["chapter_minor"] = "" else: url = f"{self.root}/comic/{slug}/{ch['hid']}" - ch["chapter"] = 0 + ch["volume"] = ch["chapter"] = 0 ch["chapter_minor"] = "" yield Message.Queue, url, ch @@ -213,3 +160,67 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): if data["total"] <= limit * page: return params["page"] = page = page + 1 + + +@memcache(keyarg=1) +def _manga_info(self, slug): + url = f"{self.root}/comic/{slug}" + page = self.request(url).text + data = self._extract_nextdata(page) + props = data["props"]["pageProps"] + comic = props["comic"] + + genre = [] + theme = [] + format = "" + for item in comic["md_comic_md_genres"]: + item = item["md_genres"] + group = item["group"] + if group == "Genre": + genre.append(item["name"]) + elif group == "Theme": + theme.append(item["name"]) + else: + format = item["name"] + + if mu := comic["mu_comics"]: + tags = [c["mu_categories"]["title"] + for c in mu["mu_comic_categories"]] + publisher = [p["mu_publishers"]["title"] + for p in mu["mu_comic_publishers"]] + else: + tags = publisher = () + + return { + "manga": comic["title"], + "manga_id": comic["id"], + "manga_hid": comic["hid"], + "manga_slug": comic["slug"], + "manga_titles": [t["title"] for t in comic["md_titles"]], + "artist": [a["name"] for a in props["artists"]], + "author": [a["name"] for a in props["authors"]], + "genre" : genre, + "theme" : theme, + "format": format, + "tags" : tags, + "publisher": publisher, + "published": text.parse_int(comic["year"]), + "description": comic["desc"], + "demographic": props["demographic"], + "origin": comic["iso639_1"], + "mature": props["matureContent"], + "rating": comic["content_rating"], + "rank" : comic["follow_rank"], + "score" : text.parse_float(comic["bayesian_rating"]), + "status": "Complete" if comic["status"] == 2 else "Ongoing", + "links" : comic["links"], + "_build_id": data["buildId"], + } + + +def _chapter_info(self, manga, chstr): + slug = manga['manga_slug'] + url = (f"{self.root}/_next/data/{manga['_build_id']}" + f"/comic/{slug}/{chstr}.json") + params = {"slug": slug, "chapter": chstr} + return self.request_json(url, params=params)["pageProps"] diff --git a/test/results/comick.py b/test/results/comick.py index 07498602..124f6075 100644 --- a/test/results/comick.py +++ b/test/results/comick.py @@ -49,7 +49,7 @@ __tests__ = ( "origin": "ja", "published": 2023, "publisher": (), - "rank": range(29_000, 32_000), + "rank": range(20_000, 40_000), "rating": "safe", "score": float, "status": "Ongoing", @@ -163,12 +163,55 @@ __tests__ = ( "volume" : 0, }, +{ + "#url" : "https://comick.io/comic/00-boku-no-hero-academia/0nJzK-volume-1-en", + "#comment" : "volume-only chapter (#8043)", + "#class" : comick.ComickChapterExtractor, + "#pattern" : r"https://meo.comick.pictures/\d+-[\w-]+\.(jpg|png)", + "#count" : 187, + + "manga" : "Boku no Hero Academia", + "manga_hid" : "q1hZ1dbv", + "manga_id" : 11359, + "manga_slug" : "00-boku-no-hero-academia", + "volume" : 1, + "chapter" : 0, + "chapter_hid" : "0nJzK", + "chapter_id" : 2285787, + "chapter_minor" : "", + "chapter_string": "0nJzK-volume-1-en", + "title" : "", + "lang" : "en", + "artist" : ["Horikoshi Kouhei"], + "author" : ["Horikoshi Kouhei"], + "group" : ["Official"], + "count" : 187, + "date" : "dt:2022-10-08 06:07:50", + "date_updated" : "dt:2025-01-01 18:56:24", + "demographic" : "Shounen", + "extension" : {"jpg", "png"}, + "filename" : str, + "width" : int, + "height" : int, + "mature" : True, + "origin" : "ja", + "published" : 2014, + "rating" : "safe", + "score" : float, + "status" : "Complete", +}, + { "#url" : "https://comick.io/comic/kobayashi-san-chi-no-maid-dragon", "#comment" : "all chapters", "#class" : comick.ComickMangaExtractor, "#pattern" : comick.ComickChapterExtractor.pattern, "#count" : range(890, 1000), + + "volume" : int, + "chapter": int, + "chapter_minor": str, + "lang" : "iso:639", }, { @@ -242,4 +285,23 @@ __tests__ = ( "#count" : range(50, 100), }, +{ + "#url" : "https://comick.io/comic/q1hZ1dbv", + "#comment" : "volume-only 'chapters' (#8043)", + "#class" : comick.ComickMangaExtractor, + "#range" : "1-5", + "#results" : ( + "https://comick.io/comic/00-boku-no-hero-academia/0nJzK-volume-1-en", + "https://comick.io/comic/00-boku-no-hero-academia/oBxML-volume-1-en", + "https://comick.io/comic/00-boku-no-hero-academia/lyq4r-volume-2-en", + "https://comick.io/comic/00-boku-no-hero-academia/wNJYr-volume-2-en", + "https://comick.io/comic/00-boku-no-hero-academia/nAv4E-volume-3-en", + ), + + "volume" : {1, 2, 3}, + "chapter": 0, + "chapter_minor": "", + "lang" : "iso:639", +}, + ) diff --git a/test/results/facebook.py b/test/results/facebook.py index cc9f5ef9..8067def3 100644 --- a/test/results/facebook.py +++ b/test/results/facebook.py @@ -148,7 +148,7 @@ __tests__ = ( "#comment" : "pfbid user ID; 'This content isn't available right now' profile", "#class" : facebook.FacebookSetExtractor, "#metadata": "post", - "#range" : 0, + "#range" : "0", "caption" : "Amarte es mi hábito favorito", "date" : "dt:2025-05-03 03:42:52",