From 5499934ae2e783dd1ecadfe3a5256fce17efc9b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 30 May 2019 19:51:34 +0200 Subject: [PATCH] [ngomik] fix extraction --- gallery_dl/extractor/flickr.py | 2 +- gallery_dl/extractor/ngomik.py | 18 ++++++++++++------ test/test_results.py | 3 +++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index 1f0f8d9c..d941d766 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -176,7 +176,7 @@ class FlickrGalleryExtractor(FlickrExtractor): test = (("https://www.flickr.com/photos/flickr/" "galleries/72157681572514792/"), { "pattern": FlickrImageExtractor.pattern, - "count": 12, + "count": ">= 10", }) def __init__(self, match): diff --git a/gallery_dl/extractor/ngomik.py b/gallery_dl/extractor/ngomik.py index 7412deac..8135a8ae 100644 --- a/gallery_dl/extractor/ngomik.py +++ b/gallery_dl/extractor/ngomik.py @@ -10,6 +10,7 @@ from .common import ChapterExtractor from .. import text +import re class NgomikChapterExtractor(ChapterExtractor): @@ -18,10 +19,15 @@ class NgomikChapterExtractor(ChapterExtractor): root = "http://ngomik.in" pattern = (r"(?:https?://)?(?:www\.)?ngomik\.in" r"(/[^/?&#]+-chapter-[^/?&#]+)") - test = ("https://www.ngomik.in/14-sai-no-koi-chapter-1-6/", { - "url": "8e67fdf751bbc79bc6f4dead7675008ddb8e32a4", - "keyword": "204d177f09d438fd50c9c28d98c73289194640d8", - }) + test = ( + ("https://www.ngomik.in/14-sai-no-koi-chapter-1-6/", { + "url": "8e67fdf751bbc79bc6f4dead7675008ddb8e32a4", + "keyword": "204d177f09d438fd50c9c28d98c73289194640d8", + }), + ("https://ngomik.in/break-blade-chapter-26/", { + "count": 34, + }), + ) def metadata(self, page): info = text.extract(page, '', "")[0] @@ -38,8 +44,8 @@ class NgomikChapterExtractor(ChapterExtractor): @staticmethod def images(page): - readerarea = text.extract(page, 'id="readerarea"', 'class="chnav"')[0] + readerarea = text.extract(page, 'id=readerarea', 'class=chnav')[0] return [ (text.unescape(url), None) - for url in text.extract_iter(readerarea, ' src="', '"') + for url in re.findall(r"\ssrc=[\"']?([^\"' >]+)", readerarea) ] diff --git a/test/test_results.py b/test/test_results.py index 05d4291a..baef10d5 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,11 +21,14 @@ TRAVIS_SKIP = { "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx", "archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs", "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex", + "sankakucomplex", } # temporary issues, etc. BROKEN = { "mangapark", + "simplyhentai", + "wallhaven", }