From 9fc1d0c901d7f9aabd0e3dd54e2e598c0ec5e9df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 24 Sep 2017 15:59:25 +0200 Subject: [PATCH] implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value --- gallery_dl/extractor/batoto.py | 22 +++++++++++----------- gallery_dl/extractor/deviantart.py | 4 ++-- gallery_dl/extractor/exhentai.py | 8 +++++--- gallery_dl/extractor/fallenangels.py | 8 +++++--- gallery_dl/extractor/hbrowse.py | 23 ++++++++++++++--------- gallery_dl/extractor/hentai2read.py | 7 ++++--- gallery_dl/extractor/hentaifoundry.py | 14 +++++++------- gallery_dl/extractor/hentaihere.py | 8 +++++--- gallery_dl/extractor/imagefap.py | 4 ++-- gallery_dl/extractor/kissmanga.py | 24 ++++++++++++------------ gallery_dl/extractor/mangafox.py | 22 ++++++++++++---------- gallery_dl/extractor/mangahere.py | 19 ++++++++++--------- gallery_dl/extractor/mangapark.py | 19 ++++++++++--------- gallery_dl/extractor/mangareader.py | 24 ++++++++++++------------ gallery_dl/extractor/mangastream.py | 16 +++++++--------- gallery_dl/extractor/mangazuki.py | 2 +- gallery_dl/extractor/spectrumnexus.py | 18 ++++++++++-------- gallery_dl/util.py | 10 ++++++++++ test/test_util.py | 10 ++++++++++ 19 files changed, 149 insertions(+), 113 deletions(-) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 20476e54..92cb1726 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -60,15 +60,17 @@ class BatotoExtractor(): return {c: response.cookies[c] for c in self.cookienames} @staticmethod - def _parse_chapter_string(data): + def parse_chapter_string(data): + """Parse 'chapter_string' value contained in 'data'""" data["chapter_string"] = text.unescape(data["chapter_string"]) pattern = r"(?:Vol.(\d+) )?Ch\.(\d+)([^ :]*)(?::? (.+))" match = re.match(pattern, data["chapter_string"]) volume, chapter, data["chapter_minor"], title = match.groups() - data["volume"] = int(volume) if volume else 0 - data["chapter"] = int(chapter) + data["volume"] = util.safe_int(volume) + data["chapter"] = util.safe_int(chapter) data["title"] = title if title != "Read Online" else "" + return data class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): @@ -99,7 +101,7 @@ class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): if not data["token"]: return results - self._parse_chapter_string(data) + self.parse_chapter_string(data) data["lang"] = util.language_to_code(data["language"]) data["group"] = text.unescape(data["group"]) data["contributor"] = text.unescape(data["contributor"]) @@ -117,7 +119,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor): test = [ ("http://bato.to/reader#459878c8fda07502", { "url": "432d7958506ad913b0a9e42664a89e46a63e9296", - "keyword": "a6ca65532ad5653d0690b0ccc83f53b6e952f1bf", + "keyword": "96598b6f94d2b26d11c2780f8173cd6ab5fe9906", }), ("http://bato.to/reader#459878c8fda07503", { "exception": exception.NotFoundError, @@ -148,15 +150,14 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor): elif error == "10020": raise exception.NotFoundError("chapter") else: - raise Exception("[batoto] unexpected error code: " + error) + raise Exception("error code: " + error) page = response.text data = self.get_job_metadata(page) yield Message.Version, 1 yield Message.Directory, data.copy() - for i in range(int(data["count"])): + for data["page"] in range(1, data["count"]+1): next_url, image_url = self.get_page_urls(page) text.nameext_from_url(image_url, data) - data["page"] = i+1 yield Message.Url, image_url, data.copy() if next_url: params["p"] += 1 @@ -181,10 +182,9 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor): "group": text.unescape(group), "lang": util.language_to_code(lang), "language": lang, - "count": count, + "count": util.safe_int(count), } - self._parse_chapter_string(data) - return data + return self.parse_chapter_string(data) @staticmethod def get_page_urls(page): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index f9343c84..2d3670f1 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -9,7 +9,7 @@ """Extract images from https://www.deviantart.com/""" from .common import Extractor, Message -from .. import text, exception +from .. import text, util, exception from ..cache import cache, memcache import itertools import datetime @@ -57,7 +57,7 @@ class DeviantartExtractor(Extractor): if "videos" in deviation: video = max(deviation["videos"], - key=lambda x: int(x["quality"][:-1])) + key=lambda x: util.safe_int(x["quality"][:-1])) yield self.commit(deviation, video) if "flash" in deviation: diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index a66d2754..a03c6945 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -25,7 +25,7 @@ class ExhentaiGalleryExtractor(Extractor): pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"] test = [ ("https://exhentai.org/g/960460/4f0e369d82/", { - "keyword": "d837276b02c4e91e96c1b40fe4415cbb73b56577", + "keyword": "173277161e28162dcc755d2e7a88e6cd750f2477", "content": "493d759de534355c9f55f8e365565b62411de146", }), ("https://exhentai.org/g/960461/4f0e369d82/", { @@ -44,6 +44,7 @@ class ExhentaiGalleryExtractor(Extractor): self.key = {} self.count = 0 self.version, self.gid, self.token = match.groups() + self.gid = util.safe_int(self.gid) self.original = self.config("original", True) self.wait_min = self.config("wait-min", 3) self.wait_max = self.config("wait-max", 6) @@ -72,7 +73,7 @@ class ExhentaiGalleryExtractor(Extractor): raise exception.NotFoundError("gallery") data = self.get_job_metadata(page) - self.count = int(data["count"]) + self.count = data["count"] yield Message.Directory, data for url, image in self.get_images(page): @@ -100,6 +101,7 @@ class ExhentaiGalleryExtractor(Extractor): data["lang"] = util.language_to_code(data["language"]) data["title"] = text.unescape(data["title"]) data["title_jp"] = text.unescape(data["title_jp"]) + data["count"] = util.safe_int(data["count"]) return data def get_images(self, page): @@ -141,7 +143,7 @@ class ExhentaiGalleryExtractor(Extractor): nextkey = self.key["next"] request = { "method" : "showpage", - "gid" : int(self.gid), + "gid" : self.gid, "imgkey" : nextkey, "showkey": self.key["show"], } diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py index 6f7209a7..0fa05ff0 100644 --- a/gallery_dl/extractor/fallenangels.py +++ b/gallery_dl/extractor/fallenangels.py @@ -108,9 +108,11 @@ class FallenangelsMangaExtractor(MangaExtractor): title , pos = text.extract(page, '', '', pos) manga, _, chapter = chapter.rpartition(" ") - chapter, _, minor = chapter.partition(".") + chapter, dot, minor = chapter.partition(".") results.append((url, { - "manga": manga, "title": title, "volume": int(volume), - "chapter": int(chapter), "chapter_minor": minor, + "manga": manga, "title": title, + "volume": util.safe_int(volume), + "chapter": util.safe_int(chapter), + "chapter_minor": dot + minor, "lang": self.lang, "language": language, })) diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 79098061..87aa565b 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -9,7 +9,7 @@ """Extract images from http://www.hbrowse.com/""" from .common import Extractor, MangaExtractor, Message -from .. import text +from .. import text, util import json @@ -19,7 +19,8 @@ class HbrowseExtractor(Extractor): root = "http://www.hbrowse.com" @staticmethod - def _parse_page(page, data): + def parse_page(page, data): + """Parse metadata on 'page' and add it to 'data'""" text.extract_all(page, ( ('manga' , '', ''), ('artist', '', ''), @@ -28,9 +29,10 @@ class HbrowseExtractor(Extractor): ), values=data) data["manga"] = text.unescape(data["manga"]) - data["total"] = int(data["total"]) + data["total"] = util.safe_int(data["total"]) data["artist"] = text.remove_html(data["artist"]) data["origin"] = text.remove_html(data["origin"]) + return data class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor): @@ -44,8 +46,10 @@ class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor): def chapters(self, page): results = [] - data = {"manga_id": int(self.url.rstrip("/").rpartition("/")[2])} - self._parse_page(page, data) + data = self.parse_page(page, { + "manga_id": util.safe_int( + self.url.rstrip("/").rpartition("/")[2]) + }) pos = 0 needle = '\n', '') mtype, pos = text.extract( page, '[', ']', pos) - manga_id = int(text.extract(page, 'data-mid="', '"', pos)[0]) + manga_id = util.safe_int(text.extract(page, 'data-mid="', '"', pos)[0]) page, pos = text.extract( page, '\n') @@ -51,7 +51,8 @@ class Hentai2readMangaExtractor(MangaExtractor): chapter, _, title = text.unescape(chapter).strip().partition(" - ") results.append((url, { "manga_id": manga_id, "manga": manga, "type": mtype, - "chapter_id": int(chapter_id), "chapter": int(chapter), + "chapter_id": util.safe_int(chapter_id), + "chapter": util.safe_int(chapter), "title": title, "lang": "en", "language": "English", })) diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 30d6e47f..2cc3fa7d 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -9,7 +9,7 @@ """Extract images from https://www.hentai-foundry.com/""" from .common import Extractor, Message -from .. import text, exception +from .. import text, util, exception class HentaifoundryUserExtractor(Extractor): @@ -23,7 +23,7 @@ class HentaifoundryUserExtractor(Extractor): test = [ ("https://www.hentai-foundry.com/pictures/user/Tenpura", { "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28", - "keyword": "6e9a549feb9bafebd9d9342ef3c8ccad33a7031c", + "keyword": "f8fecc8aa89978ecf402ec221243978fe791bd54", }), ("http://www.hentai-foundry.com/user/asdq/profile", { "exception": exception.NotFoundError, @@ -40,7 +40,7 @@ class HentaifoundryUserExtractor(Extractor): self.set_filters(token) yield Message.Version, 1 yield Message.Directory, data - for url, image in self.get_images(int(data["count"])): + for url, image in self.get_images(data["count"]): image.update(data) yield Message.Url, url, image @@ -68,7 +68,7 @@ class HentaifoundryUserExtractor(Extractor): page = response.text token, pos = text.extract(page, 'hidden" value="', '"') count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) - return {"artist": self.artist, "count": count}, token + return {"artist": self.artist, "count": util.safe_int(count)}, token def get_image_metadata(self, url): """Collect metadata for an image""" @@ -79,7 +79,7 @@ class HentaifoundryUserExtractor(Extractor): page, 'Pictures » ', '<') url, pos = text.extract( page, '//pictures.hentai-foundry.com', '"', pos) - data = {"index": index, "title": text.unescape(title)} + data = {"index": util.safe_int(index), "title": text.unescape(title)} text.nameext_from_url(url, data) return "https://pictures.hentai-foundry.com" + url, data @@ -127,7 +127,7 @@ class HentaifoundryImageExtractor(Extractor): (("http://www.hentai-foundry.com/" "pictures/user/Tenpura/407501/shimakaze"), { "url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3", - "keyword": "304479cfe00fbb723886be78b2bd6b9306a31d8a", + "keyword": "85b8e26fa93d00ae1333cb7b418078f1792dc4a8", "content": "91bf01497c39254b6dfb234a18e8f01629c77fd1", }), ("http://www.hentai-foundry.com/pictures/user/Tenpura/340853/", { @@ -160,7 +160,7 @@ class HentaifoundryImageExtractor(Extractor): url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos) data = { "artist": artist, - "index": self.index, + "index": util.safe_int(self.index), "title": text.unescape(title), } text.nameext_from_url(url, data) diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py index 37eba05a..c31f0525 100644 --- a/gallery_dl/extractor/hentaihere.py +++ b/gallery_dl/extractor/hentaihere.py @@ -9,7 +9,7 @@ """Extract hentai-manga from https://hentaihere.com/""" from .common import MangaExtractor -from .. import text +from .. import text, util from . import hentaicdn import re @@ -32,7 +32,8 @@ class HentaihereMangaExtractor(MangaExtractor): def chapters(self, page): results = [] - manga_id = int(self.url.rstrip("/").rpartition("/")[2][1:]) + manga_id = util.safe_int( + self.url.rstrip("/").rpartition("/")[2][1:]) manga, pos = text.extract( page, '', '') mtype, pos = text.extract( @@ -48,7 +49,8 @@ class HentaihereMangaExtractor(MangaExtractor): chapter, _, title = text.unescape(chapter).strip().partition(" - ") results.append((url, { "manga_id": manga_id, "manga": manga, "type": mtype, - "chapter_id": int(chapter_id), "chapter": int(chapter), + "chapter_id": util.safe_int(chapter_id), + "chapter": util.safe_int(chapter), "title": title, "lang": "en", "language": "English", })) diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 003559a4..f6c00009 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -9,7 +9,7 @@ """Extract images from http://imagefap.com/""" from .common import Extractor, Message -from .. import text +from .. import text, util import json @@ -159,7 +159,7 @@ class ImagefapUserExtractor(Extractor): yield Message.Version, 1 for gid, name in self.get_gallery_data(): url = "http://www.imagefap.com/gallery/" + gid - data = {"gallery_id": int(gid), "name": name} + data = {"gallery_id": util.safe_int(gid), "name": name} yield Message.Queue, url, data def get_gallery_data(self): diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index a38084a8..761a3bff 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from http://kissmanga.com/""" from .common import Extractor, MangaExtractor, Message -from .. import text, cloudflare, aes +from .. import text, util, cloudflare, aes from ..cache import cache import re import hashlib @@ -38,7 +38,8 @@ class KissmangaExtractor(Extractor): request = cloudflare.request_func @staticmethod - def _parse_chapter_string(data): + def parse_chapter_string(data): + """Parse 'chapter_string' value contained in 'data'""" data["chapter_string"] = text.unescape(data["chapter_string"]) match = re.match(( @@ -49,16 +50,16 @@ class KissmangaExtractor(Extractor): ), data["chapter_string"]) if not match: - match = re.match(( - r"[\w ]+?(?: -)? 0*()(\d+)()(?: *[:-]? *(.+))?" - # r"[\w ]+?(?: -)? 0*()(\d+)(?: (.+))?(?: - (.+))?" - ), data["chapter_string"]) + match = re.match( + r"[\w ]+?(?: -)? 0*()(\d+)()(?: *[:-]? *(.+))?", + data["chapter_string"]) volume, chapter, minor, title = match.groups() - data["volume"] = int(volume) if volume else 0 - data["chapter"] = int(chapter) if chapter else 0 + data["volume"] = util.safe_int(volume) + data["chapter"] = util.safe_int(chapter) data["chapter_minor"] = "." + minor if minor else "" data["title"] = title if title and title != "Read Online" else "" + return data class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor): @@ -87,7 +88,7 @@ class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor): "manga": manga, "id": url.rpartition("=")[2], "chapter_string": chapter, "lang": "en", "language": "English", } - self._parse_chapter_string(data) + self.parse_chapter_string(data) results.append((self.root + url, data)) return results @@ -133,8 +134,7 @@ class KissmangaChapterExtractor(KissmangaExtractor): "lang": "en", "language": "English", } - self._parse_chapter_string(data) - return data + return self.parse_chapter_string(data) def get_image_urls(self, page): """Extract list of all image-urls for a manga chapter""" @@ -148,7 +148,7 @@ class KissmangaChapterExtractor(KissmangaExtractor): ] except UnicodeDecodeError: self.log.error("Failed to decrypt image URls") - except (ValueError, IndexError) as e: + except (ValueError, IndexError): self.log.error("Failed to get AES key") return [] diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py index d7061c5c..386f2e8b 100644 --- a/gallery_dl/extractor/mangafox.py +++ b/gallery_dl/extractor/mangafox.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from http://www.mangafox.me/""" from .common import AsynchronousExtractor, Message -from .. import text, exception +from .. import text, util, exception import re @@ -24,7 +24,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor): r"[^/]+/(v\d+/)?c\d+[^/]*)")] test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/" "v05/c006.2/1.html"), { - "keyword": "ef2757d6136ef6b02eafe12d98a05f189fe8b2ba", + "keyword": "36b570e9ef11b4748407324fe08bebbe4856e6fd", "content": "5c50c252dcf12ffecf68801f4db8a2167265f66c", })] @@ -38,7 +38,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor): raise exception.AuthorizationError() data = self.get_metadata(page) urls = zip( - range(1, int(data["count"])+1), + range(1, data["count"]+1), self.get_image_urls(page), ) yield Message.Version, 1 @@ -50,17 +50,19 @@ class MangafoxChapterExtractor(AsynchronousExtractor): def get_metadata(self, page): """Collect metadata for extractor-job""" data = text.extract_all(page, ( - ("manga" , " - Read ", " Manga Scans "), - ("sid" , "var sid=", ";"), - ("cid" , "var cid=", ";"), - ("count" , "var total_pages=", ";"), - ("chapter", 'var current_chapter="', '";'), + ("manga" , " - Read ", " Manga Scans "), + ("sid" , "var sid=", ";"), + ("cid" , "var cid=", ";"), + ("count" , "var total_pages=", ";"), + ("chapter_string", 'var current_chapter="', '"'), ))[0] - match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter"]) - data["volume"] = match.group(2) or "" + match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter_string"]) + data["volume"] = match.group(2) data["chapter"] = match.group(3) data["chapter_minor"] = match.group(4) or "" data["manga"] = data["manga"].rpartition(" ")[0] + for key in ("sid", "cid", "count", "volume", "chapter"): + data[key] = util.safe_int(data[key]) return data def get_image_urls(self, page): diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index 8e3d4077..43a599db 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from http://www.mangahere.co/""" from .common import MangaExtractor, AsynchronousExtractor, Message -from .. import text +from .. import text, util import re @@ -46,8 +46,9 @@ class MangahereMangaExtractor(MangaExtractor): date, pos = text.extract(page, 'class="right">', '', pos) results.append((url, { "manga": manga, "title": title, "date": date, - "chapter": int(chapter), "chapter_minor": dot + minor, - "volume": int(volume.rpartition(" ")[2]) if volume else 0, + "volume": util.safe_int(volume.rpartition(" ")[2]), + "chapter": util.safe_int(chapter), + "chapter_minor": dot + minor, "lang": "en", "language": "English", })) @@ -62,7 +63,7 @@ class MangahereChapterExtractor(AsynchronousExtractor): pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/" r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")] test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", { - "keyword": "8cb9f9512b68d2cdcbea2419592b9247304c149b", + "keyword": "0c263b83f803524baa8717d2b4d841617aa8d775", "content": "dd8454469429c6c717cbc3cad228e76ef8c6e420", })] url_fmt = "http://www.mangahere.co/manga/{}/{}.html" @@ -75,7 +76,7 @@ class MangahereChapterExtractor(AsynchronousExtractor): page = self.request(self.url_fmt.format(self.part, 1)).text data = self.get_job_metadata(page) urls = zip( - range(1, int(data["count"])+1), + range(1, data["count"]+1), self.get_image_urls(page), ) yield Message.Version, 1 @@ -96,11 +97,11 @@ class MangahereChapterExtractor(AsynchronousExtractor): return { "manga": text.unescape(manga), # "title": TODO, - "volume": self.volume or "", - "chapter": self.chapter, + "volume": util.safe_int(self.volume), + "chapter": util.safe_int(self.chapter), "chapter_minor": self.chminor or "", - "chapter_id": chid, - "count": count, + "chapter_id": util.safe_int(chid), + "count": util.safe_int(count), "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index b0054151..7bb70e8b 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from http://mangapark.me/""" from .common import Extractor, MangaExtractor, Message -from .. import text +from .. import text, util class MangaparkExtractor(Extractor): @@ -18,17 +18,18 @@ class MangaparkExtractor(Extractor): root = "http://mangapark.me" @staticmethod - def _parse_chapter_path(path, data): + def parse_chapter_path(path, data): + """Get volume/chapter information from url-path of a chapter""" data["volume"], data["chapter_minor"] = 0, "" for part in path.split("/")[3:]: key, value = part[0], part[1:] if key == "s": - data["version"] = int(value) + data["version"] = util.safe_int(value) elif key == "v": - data["volume"] = int(value) + data["volume"] = util.safe_int(value) elif key == "c": chapter, dot, minor = value.partition(".") - data["chapter"] = int(chapter) + data["chapter"] = util.safe_int(chapter) data["chapter_minor"] = dot + minor elif key == "e": data["chapter_minor"] = "v" + value @@ -59,10 +60,10 @@ class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor): date , pos = text.extract(page, '', '', pos) count, pos = text.extract(page, '\tof ', ' ', pos) - self._parse_chapter_path(path, data) + self.parse_chapter_path(path, data) data["title"] = title[3:].strip() data["date"] = date - data["count"] = int(count) + data["count"] = util.safe_int(count) results.append((self.root + path, data.copy())) @@ -107,7 +108,7 @@ class MangaparkChapterExtractor(MangaparkExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = {"lang": "en", "language": "English"} - self._parse_chapter_path(self.path, data) + self.parse_chapter_path(self.path, data) text.extract_all(page, ( ("manga_id" , "var _manga_id = '", "'"), ("chapter_id", "var _book_id = '", "'"), @@ -119,7 +120,7 @@ class MangaparkChapterExtractor(MangaparkExtractor): data["manga"], _, data["type"] = data["manga"].rpartition(" ") data["manga"] = text.unescape(data["manga"]) data["title"] = data["title"].partition(": ")[2] - data["count"] = int(data["count"]) + data["count"] = util.safe_int(data["count"]) return data @staticmethod diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index ce669439..350b62ac 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from http://www.mangareader.net/""" from .common import AsynchronousExtractor, MangaExtractor, Message -from .. import text +from .. import text, util class MangareaderBase(): @@ -20,7 +20,8 @@ class MangareaderBase(): root = "http://www.mangareader.net" @staticmethod - def _parse_page(page, data): + def parse_page(page, data): + """Parse metadata on 'page' and add it to 'data'""" text.extract_all(page, ( ("manga" , '

', '

'), ("release", '>Year of Release:\n', ''), @@ -30,6 +31,7 @@ class MangareaderBase(): data["manga"] = data["manga"].strip() data["author"] = text.unescape(data["author"]) data["artist"] = text.unescape(data["artist"]) + return data class MangareaderMangaExtractor(MangareaderBase, MangaExtractor): @@ -43,8 +45,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor): def chapters(self, page): results = [] - data = {"lang": "en", "language": "English"} - self._parse_page(page, data) + data = self.parse_page(page, {"lang": "en", "language": "English"}) needle = '
\n') @@ -54,7 +55,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor): return results data["title"], pos = text.extract(page, ' : ', '', pos) data["date"] , pos = text.extract(page, '', '', pos) - data["chapter"] = int(url.rpartition("/")[2]) + data["chapter"] = util.safe_int(url.rpartition("/")[2]) results.append((self.root + url, data.copy())) @@ -91,17 +92,16 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): def get_job_metadata(self, chapter_page): """Collect metadata for extractor-job""" page = self.request(self.root + self.url_title).text - data = { - "chapter": int(self.chapter), + data = self.parse_page(page, { + "chapter": util.safe_int(self.chapter), "lang": "en", "language": "English", - } - self._parse_page(page, data) + }) text.extract_all(page, ( ('title', ' ' + self.chapter + ' : ', ''), ('date', '', ''), ), page.index('
'), data) - data["count"] = int(text.extract( + data["count"] = util.safe_int(text.extract( chapter_page, ' of ', '<')[0] ) return data @@ -123,6 +123,6 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): height, pos = extr(page, ' height="', '"', pos) image, pos = extr(page, ' src="', '"', pos) return self.root + url, image, text.nameext_from_url(image, { - "width": int(width), - "height": int(height), + "width": util.safe_int(width), + "height": util.safe_int(height), }) diff --git a/gallery_dl/extractor/mangastream.py b/gallery_dl/extractor/mangastream.py index f01b025f..48d0a5f1 100644 --- a/gallery_dl/extractor/mangastream.py +++ b/gallery_dl/extractor/mangastream.py @@ -9,7 +9,7 @@ """Extract manga-chapters from https://mangastream.com/""" from .common import AsynchronousExtractor, Message -from .. import text +from .. import text, util from urllib.parse import urljoin @@ -32,8 +32,8 @@ class MangastreamChapterExtractor(AsynchronousExtractor): data = self.get_job_metadata(page) next_url = None yield Message.Version, 1 - yield Message.Directory, data - for data["page"] in range(1, int(data["count"])+1): + yield Message.Directory, data.copy() + for data["page"] in range(1, data["count"]+1): if next_url: page = self.request(next_url).text next_url, image_url = self.get_page_metadata(page) @@ -44,21 +44,19 @@ class MangastreamChapterExtractor(AsynchronousExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" manga, pos = text.extract( - page, '