diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index bd6e3c69..4d2057ce 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -10,7 +10,33 @@ from .common import Extractor, Message from .. import text -import re + +class MangaparkMangaExtractor(Extractor): + """Extract all chapters of a manga from mangapark""" + category = "mangapark" + subcategory = "manga" + pattern = [r"(?:https?://)?(?:www\.)?mangapark\.me/manga/([^/]+)"] + url_base = "http://mangapark.me" + + def __init__(self, match): + Extractor.__init__(self) + self.url_title = match.group(1) + + def items(self): + yield Message.Version, 1 + for chapter in self.get_chapters(): + print(self.url_base + chapter) + yield Message.Queue, self.url_base + chapter + + def get_chapters(self): + """Return a list of all chapter urls""" + page = self.request(self.url_base + "/manga/" + self.url_title).text + needle = '') + return reversed(list( + text.extract_iter(page, needle, '"', pos) + )) + class MangaparkChapterExtractor(Extractor): """Extract a single manga-chapter from mangapark""" @@ -18,7 +44,8 @@ class MangaparkChapterExtractor(Extractor): subcategory = "chapter" directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"] filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" - pattern = [r"(?:https?://)?(?:www\.)?mangapark\.me/manga/([^/]+/s(\d+)(?:/v(\d+))?/c(\d+)(\.\d+)?)"] + pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.me/manga/" + r"([^/]+/s(\d+)(?:/v(\d+))?/c(\d+)(\.\d+)?)")] def __init__(self, match): Extractor.__init__(self) @@ -35,6 +62,7 @@ class MangaparkChapterExtractor(Extractor): yield Message.Url, data["url"], text.nameext_from_url(data["url"], data) def get_job_metadata(self, page): + """Collect metadata for extractor-job""" data = { "category": self.category, "version": self.version, @@ -55,7 +83,9 @@ class MangaparkChapterExtractor(Extractor): data["manga"] = data["manga"][:pos] return data - def get_images(self, page): + @staticmethod + def get_images(page): + """Collect image-urls, -widths and -heights""" pos = 0 while True: url , pos = text.extract(page, ' target="_blank" href="', '"', pos)