[mangareader] add manga-extractor (all chapters)

This commit is contained in:
Mike Fährmann
2015-11-26 23:07:12 +01:00
parent 3a93faa372
commit d5349c8cb5
2 changed files with 28 additions and 4 deletions

View File

@@ -8,9 +8,9 @@
"""Extract manga pages from http://www.mangapanda.com/"""
from .mangareader import MangaReaderExtractor
from .mangareader import MangaReaderChapterExtractor
class MangaPandaExtractor(MangaReaderExtractor):
class MangaPandaExtractor(MangaReaderChapterExtractor):
category = "mangapanda"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]

View File

@@ -8,11 +8,35 @@
"""Extract manga pages from http://www.mangareader.net/"""
from .common import AsynchronousExtractor, Message
from .common import AsynchronousExtractor, Extractor, Message
from .. import text
class MangaReaderExtractor(AsynchronousExtractor):
class MangaReaderExtractor(Extractor):
category = "mangareader"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/]+)$"]
url_base = "http://www.mangareader.net"
def __init__(self, match):
Extractor.__init__(self)
self.url_title = match.group(1)
def items(self):
yield Message.Version, 1
url = self.url_base + self.url_title
page = self.request(url).text
needle = '<a href="' + self.url_title
pos = page.index('<div id="readmangasum">')
while True:
chapter, pos = text.extract(page, needle, '"', pos)
if not chapter:
return
print(url + chapter)
yield Message.Queue, url + chapter
class MangaReaderChapterExtractor(AsynchronousExtractor):
category = "mangareader"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"