[komikcast] fix extraction

This commit is contained in:
Mike Fährmann
2018-09-29 16:37:30 +02:00
parent 8080071174
commit 5507f5ce2e
2 changed files with 9 additions and 8 deletions

View File

@@ -66,13 +66,13 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
ChapterExtractor.__init__(self, self.root + match.group(1)) ChapterExtractor.__init__(self, self.root + match.group(1))
def get_metadata(self, page): def get_metadata(self, page):
info = text.extract(page, '<b>', "</b>")[0] info = text.extract(page, '<title>', " - Komikcast</title>")[0]
return self.parse_chapter_string(info) return self.parse_chapter_string(info)
@staticmethod @staticmethod
def get_images(page): def get_images(page):
readerarea = text.extract( readerarea = text.extract(
page, '<div id="readerarea">', '<div class="navig">')[0] page, '<div class="lexot">', '</center>')[0]
return [ return [
(text.unescape(url), { (text.unescape(url), {
"width": text.parse_int(width), "width": text.parse_int(width),
@@ -104,9 +104,11 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
results = [] results = []
data = self.get_metadata(page) data = self.get_metadata(page)
page = text.extract(
page, "<span>Chapter List</span>", "<span>Download</span>")[0]
for item in text.extract_iter( for item in text.extract_iter(
page, '<span class="leftoff"><a href="', '</a>'): page, ' href="', '" rel="'):
url, _, chapter_string = item.rpartition('">Chapter ') url, _, chapter_string = item.rpartition('" title="')
self.parse_chapter_string(chapter_string, data) self.parse_chapter_string(chapter_string, data)
results.append((url, data.copy())) results.append((url, data.copy()))
return results return results
@@ -115,9 +117,9 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
def get_metadata(page): def get_metadata(page):
"""Return a dict with general metadata""" """Return a dict with general metadata"""
manga , pos = text.extract(page, "<title>", "</title>") manga , pos = text.extract(page, "<title>", "</title>")
author, pos = text.extract(page, "<th>Author</th><td>", "</td>", pos) author, pos = text.extract(page, "<b>Author</b>: ", "</span>", pos)
genres, pos = text.extract(page, "<th>Genres </th><td>", "</td>", pos) genres, pos = text.extract(page, "<b>Genres </b>: ", "</span>", pos)
mtype , pos = text.extract(page, "<th>Type </th><td>", "</td>", pos) mtype , pos = text.extract(page, "<b>Type </b>: ", "</span>", pos)
return { return {
"manga": text.unescape(manga.rpartition(" - ")[0]), "manga": text.unescape(manga.rpartition(" - ")[0]),

View File

@@ -24,7 +24,6 @@ TRAVIS_SKIP = {
# temporary issues, etc. # temporary issues, etc.
BROKEN = { BROKEN = {
"seaotterscans", "seaotterscans",
"komikcast",
} }