From cf147dfee9271dca6c1d2396f8553ecf38b6e497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 9 Feb 2018 22:24:34 +0100 Subject: [PATCH] [hentai2read] fix manga extraction - site changed its HTML structure --- gallery_dl/extractor/hentai2read.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index c960daa4..29af07c9 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -38,10 +38,14 @@ class Hentai2readMangaExtractor(MangaExtractor): page, '[', ']', pos) manga_id = util.safe_int(text.extract(page, 'data-mid="', '"', pos)[0]) - for url, chapter_id, chapter in re.findall( - r'
  • \s+]+data-cid="([^"]+)">' - r'\s+([^<]+)<', page): + while True: + chapter_id, pos = text.extract(page, ' data-cid="', '"', pos) + if not chapter_id: + return results + _ , pos = text.extract(page, ' href="', '"', pos) + url, pos = text.extract(page, ' href="', '"', pos) + chapter, pos = text.extract(page, '>', '<', pos) + chapter, _, title = text.unescape(chapter).strip().partition(" - ") results.append((url, { "manga_id": manga_id, "manga": manga, "type": mtype, @@ -49,7 +53,6 @@ class Hentai2readMangaExtractor(MangaExtractor): "chapter": util.safe_int(chapter), "title": title, "lang": "en", "language": "English", })) - return results class Hentai2readChapterExtractor(ChapterExtractor):