[batoto] fix extraction of chapter URLs

This commit is contained in:
Mike Fährmann
2017-08-25 16:34:42 +02:00
parent 18e6ed1c7e
commit c7ec103e15
3 changed files with 5 additions and 5 deletions

View File

@@ -69,10 +69,10 @@ class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
def chapters(self, page): def chapters(self, page):
# TODO: filter by language / translator # TODO: filter by language / translator
needle = ('<td style="border-top:0;">\n ' pattern = (r'<td style="border-top:0;">\s+'
'<a href="https://bato.to/reader#') r'<a href="https?://bato\.to/reader#([^"]+)')
return [self.root + "/reader#" + mangahash return [self.root + "/reader#" + mangahash
for mangahash in text.extract_iter(page, needle, '"')] for mangahash in re.findall(pattern, page)]
class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor): class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):

View File

@@ -291,7 +291,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
r"/favourites/(\d+)/([^/?&#]+)"] r"/favourites/(\d+)/([^/?&#]+)"]
test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", { test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
"url": "22a3858a1efb150d11c3f4e63cf9082ad70c6ea0", "url": "22a3858a1efb150d11c3f4e63cf9082ad70c6ea0",
"keyword": "b4abbad60f87a42fb6c1a021cb3a8efd9d31bfb7", "keyword": "e0ed920fb3dfdad9294be592be2eeb3dc1258a6a",
})] })]
def __init__(self, match): def __init__(self, match):

View File

@@ -58,7 +58,7 @@ skip = [
"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",
"archivedmoe", "archiveofsins", "thebarchive", "archivedmoe", "archiveofsins", "thebarchive",
# temporary issues # temporary issues
"mangapark", "mangapark", "danbooru", "yandere", "pixiv",
] ]
# enable selective testing for direct calls # enable selective testing for direct calls
if __name__ == '__main__' and len(sys.argv) > 1: if __name__ == '__main__' and len(sys.argv) > 1: