change Chapter and MangaExtractor classes
- unify and simplify constructors - rename get_metadata and get_images to just metadata() and images() - rename self.url to chapter_url and manga_url
This commit is contained in:
@@ -32,31 +32,6 @@ class MangareaderBase():
|
||||
return data
|
||||
|
||||
|
||||
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
"""Extractor for manga from mangareader.net"""
|
||||
pattern = r"(?:https?://)?((?:www\.)?mangareader\.net/[^/?&#]+)/?$"
|
||||
reverse = False
|
||||
test = ("https://www.mangareader.net/mushishi", {
|
||||
"url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
|
||||
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
|
||||
})
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
data = self.parse_page(page, {"lang": "en", "language": "English"})
|
||||
|
||||
needle = '<div class="chico_manga"></div>\n<a href="'
|
||||
pos = page.index('<div id="chapterlist">')
|
||||
while True:
|
||||
url, pos = text.extract(page, needle, '"', pos)
|
||||
if not url:
|
||||
return results
|
||||
data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
|
||||
data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
|
||||
data["chapter"] = text.parse_int(url.rpartition("/")[2])
|
||||
results.append((self.root + url, data.copy()))
|
||||
|
||||
|
||||
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangareader.net"""
|
||||
archive_fmt = "{manga}_{chapter}_{page}"
|
||||
@@ -68,11 +43,10 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
self.part, self.url_title, self.chapter = match.groups()
|
||||
ChapterExtractor.__init__(self, match, self.root + self.part)
|
||||
path, self.url_title, self.chapter = match.groups()
|
||||
ChapterExtractor.__init__(self, match, self.root + path)
|
||||
|
||||
def get_metadata(self, chapter_page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
def metadata(self, chapter_page):
|
||||
page = self.request(self.root + self.url_title).text
|
||||
data = self.parse_page(page, {
|
||||
"chapter": text.parse_int(self.chapter),
|
||||
@@ -88,7 +62,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
)
|
||||
return data
|
||||
|
||||
def get_images(self, page):
|
||||
def images(self, page):
|
||||
while True:
|
||||
next_url, image_url, image_data = self.get_image_metadata(page)
|
||||
yield image_url, image_data
|
||||
@@ -117,3 +91,28 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
|
||||
"width": text.parse_int(width),
|
||||
"height": text.parse_int(height),
|
||||
}
|
||||
|
||||
|
||||
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
"""Extractor for manga from mangareader.net"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/?&#]+)/?$"
|
||||
reverse = False
|
||||
test = ("https://www.mangareader.net/mushishi", {
|
||||
"url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
|
||||
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
|
||||
})
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
data = self.parse_page(page, {"lang": "en", "language": "English"})
|
||||
|
||||
needle = '<div class="chico_manga"></div>\n<a href="'
|
||||
pos = page.index('<div id="chapterlist">')
|
||||
while True:
|
||||
url, pos = text.extract(page, needle, '"', pos)
|
||||
if not url:
|
||||
return results
|
||||
data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
|
||||
data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
|
||||
data["chapter"] = text.parse_int(url.rpartition("/")[2])
|
||||
results.append((self.root + url, data.copy()))
|
||||
|
||||
Reference in New Issue
Block a user