diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c7828ba3..85d7ee78 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -763,6 +763,12 @@ Consider all listed sites to potentially be NSFW. Playlists, Posts, Tag Searches + + Rawkuma + https://rawkuma.net/ + Chapters, Manga + + Read Comic Online https://readcomiconline.li/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 4dc8c67c..fcf27c8a 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -140,6 +140,7 @@ modules = [ "pornhub", "pornpics", "postmill", + "rawkuma", "reactor", "readcomiconline", "realbooru", diff --git a/gallery_dl/extractor/rawkuma.py b/gallery_dl/extractor/rawkuma.py new file mode 100644 index 00000000..0196a2f1 --- /dev/null +++ b/gallery_dl/extractor/rawkuma.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://rawkuma.net/""" + +from .common import MangaExtractor, ChapterExtractor +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?rawkuma\.(?:net|com)" + + +class RawkumaBase(): + """Base class for rawkuma extractors""" + category = "rawkuma" + root = "https://rawkuma.net" + + +class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor): + """Extractor for manga chapters from rawkuma.net""" + archive_fmt = "{chapter_id}_{page}" + pattern = BASE_PATTERN + r"/([^/?#]+-chapter-\d+(?:-\d+)?)" + example = "https://rawkuma.net/TITLE-chapter-123/" + + def __init__(self, match): + url = "{}/{}/".format(self.root, match.group(1)) + ChapterExtractor.__init__(self, match, url) + + def metadata(self, page): + item = util.json_loads(text.extr(page, ',"item":', "}};")) + title = text.rextr( + page, '

").partition(" – ")[2] + date = text.extr(page, 'datetime="', '"') + chapter, sep, minor = item["c"].partition(".") + + return { + "manga" : item["s"], + "manga_id" : text.parse_int(item["mid"]), + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + "chapter_id" : text.parse_int(item["cid"]), + "title" : text.unescape(title), + "date" : text.parse_datetime( + date, "%Y-%m-%dWIB%H:%M:%S%z"), + "thumbnail" : item.get("t"), + "lang" : "ja", + "language" : "Japanese", + } + + def images(self, page): + images = util.json_loads(text.extr(page, '","images":', '}')) + return [(url, None) for url in images] + + +class RawkumaMangaExtractor(RawkumaBase, MangaExtractor): + """Extractor for manga from rawkuma.net""" + chapterclass = RawkumaChapterExtractor + pattern = BASE_PATTERN + r"/manga/([^/?#]+)" + example = "https://rawkuma.net/manga/TITLE/" + + def __init__(self, match): + url = "{}/manga/{}/".format(self.root, match.group(1)) + MangaExtractor.__init__(self, match, url) + + def chapters(self, page): + manga = text.unescape(text.extr(page, "", " – ")) + + results = [] + for chbox in text.extract_iter( + page, '<li data-num="', "</a>"): + info = text.extr(chbox, '', '"') + chapter, _, title = info.partition(" - ") + chapter, sep, minor = chapter.partition(".") + + results.append((text.extr(chbox, 'href="', '"'), { + "manga" : manga, + "chapter" : text.parse_int(chapter), + "chapter-minor": sep + minor, + "title" : title, + })) + return results diff --git a/test/results/rawkuma.py b/test/results/rawkuma.py new file mode 100644 index 00000000..dab14b7a --- /dev/null +++ b/test/results/rawkuma.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import rawkuma + + +__tests__ = ( +{ + "#url" : "https://rawkuma.net/hitman-chapter-127/", + "#class" : rawkuma.RawkumaChapterExtractor, + "#pattern" : r"https://cdn.kumacdn.club/wp-content/uploads/images/h/hitman/chapter-127/.+\.jpg$", + "#count" : 18, + + "chapter" : 127, + "chapter_id" : 313750, + "chapter_minor": "", + "count" : 18, + "date" : "dt:2021-07-01 07:07:27", + "extension" : "jpg", + "filename" : str, + "lang" : "ja", + "language" : "Japanese", + "manga" : "Hitman", + "manga_id" : 47920, + "page" : range(1, 18), + "thumbnail" : "https://rawkuma.net/wp-content/uploads/2020/10/Hitman-10.jpg", + "title" : "End", +}, + +{ + "#url" : "https://rawkuma.net/saikyou-inyoushi-no-isekai-tenseiki-chapter-8-1/", + "#class" : rawkuma.RawkumaChapterExtractor, + "#pattern" : r"https://cdn.kumacdn.club/wp-content/uploads/images/s/saikyou-inyoushi-no-isekai-tenseiki/chapter-8-1/.+\.jpg$", + + "chapter" : 8, + "chapter_id" : 85076, + "chapter_minor": ".1", + "count" : 11, + "date" : "dt:2023-11-21 06:27:19", + "extension" : "jpg", + "filename" : str, + "lang" : "ja", + "language" : "Japanese", + "manga" : "Saikyou Inyoushi no Isekai Tenseiki", + "manga_id" : 20781, + "page" : range(1, 11), + "thumbnail" : "https://rawkuma.net/wp-content/uploads/2020/06/Saikyou-Inyoushi-no-Isekai-Tenseiki-cover.jpg", + "title" : "", +}, + +{ + "#url" : "https://rawkuma.net/manga/hitman/", + "#class" : rawkuma.RawkumaMangaExtractor, + "#pattern" : rawkuma.RawkumaChapterExtractor.pattern, + + "chapter" : range(1, 127), + "chapter-minor": {"", ".5"}, + "manga" : "Hitman", + "title" : {"", "End"}, +}, + +)