From 039d06c8f61f7ed0a81aabda120c29168d9003ff Mon Sep 17 00:00:00 2001 From: enduser420 Date: Sun, 13 Nov 2022 16:00:29 +0530 Subject: [PATCH 1/2] [mangaread] add 'chapter' and 'manga' extractors --- docs/supportedsites.md | 6 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/mangaread.py | 189 ++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + 4 files changed, 197 insertions(+) create mode 100644 gallery_dl/extractor/mangaread.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7d10d969..c8ebbec9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -511,6 +511,12 @@ Consider all sites to be NSFW unless otherwise known. Chapters, Manga + + MangaRead + https://mangaread.org/ + Chapters, Manga + + MangaSee https://mangasee123.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6998a21e..b62671a3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -86,6 +86,7 @@ modules = [ "mangakakalot", "manganelo", "mangapark", + "mangaread", "mangasee", "mangoxo", "mememuseum", diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py new file mode 100644 index 00000000..b8cf1e88 --- /dev/null +++ b/gallery_dl/extractor/mangaread.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://mangaread.org/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +import re + + +class MangareadBase(): + """Base class for Mangaread extractors""" + category = "mangaread" + root = "https://www.mangaread.org" + + @staticmethod + def parse_chapter_string(chapter_string, data): + match = re.match( + r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?", + text.unescape(chapter_string).strip()) + manga, chapter, minor, title = match.groups() + manga = manga.strip() if manga else "" + data["manga"] = data.pop("manga", manga) + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = minor or "" + data["title"] = title or "" + data["lang"] = "en" + data["language"] = "English" + + +class MangareadChapterExtractor(MangareadBase, ChapterExtractor): + """Extractor for manga-chapters from mangaread.org""" + pattern = (r"(?:https?://)?(?:www\.)?mangaread\.org" + r"(/manga/[^/?#]+/[^/?#]+)") + test = ( + ("https://www.mangaread.org/manga/one-piece/chapter-1053-3/", { + "pattern": (r"https://www\.mangaread\.org/wp-content/uploads" + r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"), + "count": 11, + "keyword": { + "manga" : "One Piece", + "title" : "", + "chapter" : 1053, + "chapter_minor": ".3", + "tags" : ["Oda Eiichiro"], + "lang" : "en", + "language": "English", + } + }), + ("https://www.mangaread.org/manga/one-piece/chapter-1000000/", { + "exception": exception.NotFoundError, + }), + (("https://www.mangaread.org" + "/manga/kanan-sama-wa-akumade-choroi/chapter-10/"), { + "pattern": (r"https://www\.mangaread\.org/wp-content/uploads" + r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"), + "count": 9, + "keyword": { + "manga" : "Kanan-sama wa Akumade Choroi", + "title" : "", + "chapter" : 10, + "chapter_minor": "", + "tags" : list, + "lang" : "en", + "language": "English", + } + }), + # 'Chapter146.5' + # ^^ no whitespace + ("https://www.mangaread.org/manga/above-all-gods/chapter146-5/", { + "pattern": (r"https://www\.mangaread\.org/wp-content/uploads" + r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"), + "count": 6, + "keyword": { + "manga" : "Above All Gods", + "title" : "", + "chapter" : 146, + "chapter_minor": ".5", + "tags" : list, + "lang" : "en", + "language": "English", + } + }), + ) + + def metadata(self, page): + data = {"tags": list(text.extract_iter(page, 'class="">', "<"))} + info = text.extr(page, '

', "

") + if not info: + raise exception.NotFoundError("chapter") + self.parse_chapter_string(info, data) + return data + + def images(self, page): + page = text.extr( + page, '
', '
= 13", + "keyword": { + "manga" : "Kanan-sama wa Akumade Choroi", + "author" : ["nonco"], + "artist" : ["nonco"], + "type" : "Manga", + "genres" : ["Comedy", "Romance", "Shounen", "Supernatural"], + "rating" : float, + "release": 2022, + "status" : "OnGoing", + "lang" : "en", + "language" : "English", + "manga_alt" : list, + "description": str, + } + }), + ("https://www.mangaread.org/manga/one-piece", { + "pattern": (r"https://www\.mangaread\.org/manga" + r"/one-piece/chapter-\d+(-.+)?/"), + "count" : ">= 1066", + "keyword": { + "manga" : "One Piece", + "author" : ["Oda Eiichiro"], + "artist" : ["Oda Eiichiro"], + "type" : "Manga", + "genres" : list, + "rating" : float, + "release": 1997, + "status" : "OnGoing", + "lang" : "en", + "language" : "English", + "manga_alt" : ["One Piece"], + "description": str, + } + }), + ("https://www.mangaread.org/manga/doesnotexist", { + "exception": exception.HttpError, + }), + ) + + def chapters(self, page): + data = self.metadata(page) + result = [] + for chapter in text.extract_iter( + page, '
  • "): + url , pos = text.extract(chapter, '", "", pos) + self.parse_chapter_string(info, data) + result.append((url, data.copy())) + return result + + def metadata(self, page): + extr = text.extract_from(text.extr( + page, 'class="summary_content">', 'class="manga-action"')) + return { + "manga" : text.extr(page, "

    ", "

    ").strip(), + "description": text.unescape(text.remove_html(text.extract( + page, ">", "
  • ", page.index("summary__content"))[0])), + "rating" : text.parse_float( + extr('total_votes">', "").strip()), + "manga_alt" : text.remove_html( + extr("Alternative \n
    ", "")).split("; "), + "author" : list(text.extract_iter( + extr('class="author-content">', ""), '"tag">', "")), + "artist" : list(text.extract_iter( + extr('class="artist-content">', ""), '"tag">', "")), + "genres" : list(text.extract_iter( + extr('class="genres-content">', ""), '"tag">', "")), + "type" : text.remove_html( + extr("Type \n", "")), + "release" : text.parse_int(text.remove_html( + extr("Release \n", ""))), + "status" : text.remove_html( + extr("Status \n", "")), + } diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 65a759d9..770bf358 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -72,6 +72,7 @@ CATEGORY_MAP = { "mangalife" : "MangaLife", "manganelo" : "Manganato", "mangapark" : "MangaPark", + "mangaread" : "MangaRead", "mangasee" : "MangaSee", "mastodon.social": "mastodon.social", "mememuseum" : "meme.museum", From ade9789b3eb5fda667c792fea8e9808c291a7a2c Mon Sep 17 00:00:00 2001 From: enduser420 Date: Sun, 13 Nov 2022 16:04:17 +0530 Subject: [PATCH 2/2] [mangaread] update regex --- gallery_dl/extractor/mangaread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py index b8cf1e88..4af90e09 100644 --- a/gallery_dl/extractor/mangaread.py +++ b/gallery_dl/extractor/mangaread.py @@ -106,7 +106,7 @@ class MangareadChapterExtractor(MangareadBase, ChapterExtractor): class MangareadMangaExtractor(MangareadBase, MangaExtractor): """Extractor for manga from mangaread.org""" chapterclass = MangareadChapterExtractor - pattern = r"(?:https?://)?(?:www\.)?mangaread\.org(/manga/[^/?#]+)" + pattern = r"(?:https?://)?(?:www\.)?mangaread\.org(/manga/[^/?#]+)/?$" test = ( ("https://www.mangaread.org/manga/kanan-sama-wa-akumade-choroi", { "pattern": (r"https://www\.mangaread\.org/manga"