diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 9e3f072d..f16d7e43 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -8,7 +8,7 @@ """Extractors for https://mangapark.net/""" -from .common import ChapterExtractor, MangaExtractor +from .common import ChapterExtractor, Extractor, Message from .. import text, util import re @@ -18,34 +18,18 @@ BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)" class MangaparkBase(): """Base class for mangapark extractors""" category = "mangapark" + _match_title = None - @staticmethod - def parse_chapter_path(path, data): - """Get volume/chapter information from url-path of a chapter""" - data["volume"], data["chapter_minor"] = 0, "" - for part in path.split("/")[1:]: - key, value = part[0], part[1:] - if key == "c": - chapter, dot, minor = value.partition(".") - data["chapter"] = text.parse_int(chapter) - data["chapter_minor"] = dot + minor - elif key == "i": - data["chapter_id"] = text.parse_int(value) - elif key == "v": - data["volume"] = text.parse_int(value) - elif key == "s": - data["stream"] = text.parse_int(value) - elif key == "e": - data["chapter_minor"] = "v" + value - - @staticmethod - def parse_chapter_title(title, data): - match = re.search(r"(?i)(?:vol(?:ume)?[ .]*(\d+) )?" - r"ch(?:apter)?[ .]*(\d+)(\.\w+)?", title) - if match: - vol, ch, data["chapter_minor"] = match.groups() - data["volume"] = text.parse_int(vol) - data["chapter"] = text.parse_int(ch) + def _parse_chapter_title(self, title): + if not self._match_title: + MangaparkBase._match_title = re.compile( + r"(?i)" + r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?" + r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)" + r"(?:\s*:\s*(.*))?" + ).match + match = self._match_title(title) + return match.groups() if match else (0, 0, "", "") class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): @@ -100,13 +84,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): self._urls = chapter["imageSet"]["httpLis"] self._params = chapter["imageSet"]["wordLis"] - - match = re.match( - r"(?i)" - r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?" - r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)" - r"(?:\s*:\s*(.*))?", chapter["dname"]) - vol, ch, minor, title = match.groups() if match else (0, 0, "", "") + vol, ch, minor, title = self._parse_chapter_title(chapter["dname"]) return { "manga" : manga["name"], @@ -132,50 +110,271 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): ] -class MangaparkMangaExtractor(MangaparkBase, MangaExtractor): +class MangaparkMangaExtractor(MangaparkBase, Extractor): """Extractor for manga from mangapark.net""" - chapterclass = MangaparkChapterExtractor - pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)" - r"(/manga/[^/?#]+)/?$") + subcategory = "manga" + pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$" test = ( - ("https://mangapark.net/manga/aria", { - "url": "51c6d82aed5c3c78e0d3f980b09a998e6a2a83ee", - "keyword": "cabc60cf2efa82749d27ac92c495945961e4b73c", + ("https://mangapark.net/title/114972-aria", { + "count": 141, + "pattern": MangaparkChapterExtractor.pattern, + "keyword": { + "chapter": int, + "chapter_id": int, + "chapter_minor": str, + "date": "type:datetime", + "lang": "en", + "language": "English", + "manga_id": 114972, + "source": "re:Horse|Koala", + "title": str, + "volume": int, + }, }), - ("https://mangapark.me/manga/aria"), - ("https://mangapark.com/manga/aria"), + ("https://mangapark.com/title/114972-"), + ("https://mangapark.com/title/114972"), + ("https://mangapark.com/title/114972-aria"), + ("https://mangapark.org/title/114972-aria"), + ("https://mangapark.io/title/114972-aria"), + ("https://mangapark.me/title/114972-aria"), ) def __init__(self, match): - self.root = self.root_fmt.format(match.group(1)) - MangaExtractor.__init__(self, match, self.root + match.group(2)) + self.root = text.root_from_url(match.group(0)) + self.manga_id = int(match.group(1)) + Extractor.__init__(self, match) - def chapters(self, page): - results = [] - data = {"lang": "en", "language": "English"} - data["manga"] = text.unescape( - text.extr(page, '