implement and use 'util.safe_int()'
same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value
This commit is contained in:
@@ -60,15 +60,17 @@ class BatotoExtractor():
|
||||
return {c: response.cookies[c] for c in self.cookienames}
|
||||
|
||||
@staticmethod
|
||||
def _parse_chapter_string(data):
|
||||
def parse_chapter_string(data):
|
||||
"""Parse 'chapter_string' value contained in 'data'"""
|
||||
data["chapter_string"] = text.unescape(data["chapter_string"])
|
||||
pattern = r"(?:Vol.(\d+) )?Ch\.(\d+)([^ :]*)(?::? (.+))"
|
||||
match = re.match(pattern, data["chapter_string"])
|
||||
|
||||
volume, chapter, data["chapter_minor"], title = match.groups()
|
||||
data["volume"] = int(volume) if volume else 0
|
||||
data["chapter"] = int(chapter)
|
||||
data["volume"] = util.safe_int(volume)
|
||||
data["chapter"] = util.safe_int(chapter)
|
||||
data["title"] = title if title != "Read Online" else ""
|
||||
return data
|
||||
|
||||
|
||||
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
|
||||
@@ -99,7 +101,7 @@ class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
|
||||
if not data["token"]:
|
||||
return results
|
||||
|
||||
self._parse_chapter_string(data)
|
||||
self.parse_chapter_string(data)
|
||||
data["lang"] = util.language_to_code(data["language"])
|
||||
data["group"] = text.unescape(data["group"])
|
||||
data["contributor"] = text.unescape(data["contributor"])
|
||||
@@ -117,7 +119,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
|
||||
test = [
|
||||
("http://bato.to/reader#459878c8fda07502", {
|
||||
"url": "432d7958506ad913b0a9e42664a89e46a63e9296",
|
||||
"keyword": "a6ca65532ad5653d0690b0ccc83f53b6e952f1bf",
|
||||
"keyword": "96598b6f94d2b26d11c2780f8173cd6ab5fe9906",
|
||||
}),
|
||||
("http://bato.to/reader#459878c8fda07503", {
|
||||
"exception": exception.NotFoundError,
|
||||
@@ -148,15 +150,14 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
|
||||
elif error == "10020":
|
||||
raise exception.NotFoundError("chapter")
|
||||
else:
|
||||
raise Exception("[batoto] unexpected error code: " + error)
|
||||
raise Exception("error code: " + error)
|
||||
page = response.text
|
||||
data = self.get_job_metadata(page)
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data.copy()
|
||||
for i in range(int(data["count"])):
|
||||
for data["page"] in range(1, data["count"]+1):
|
||||
next_url, image_url = self.get_page_urls(page)
|
||||
text.nameext_from_url(image_url, data)
|
||||
data["page"] = i+1
|
||||
yield Message.Url, image_url, data.copy()
|
||||
if next_url:
|
||||
params["p"] += 1
|
||||
@@ -181,10 +182,9 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
|
||||
"group": text.unescape(group),
|
||||
"lang": util.language_to_code(lang),
|
||||
"language": lang,
|
||||
"count": count,
|
||||
"count": util.safe_int(count),
|
||||
}
|
||||
self._parse_chapter_string(data)
|
||||
return data
|
||||
return self.parse_chapter_string(data)
|
||||
|
||||
@staticmethod
|
||||
def get_page_urls(page):
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract images from https://www.deviantart.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
import itertools
|
||||
import datetime
|
||||
@@ -57,7 +57,7 @@ class DeviantartExtractor(Extractor):
|
||||
|
||||
if "videos" in deviation:
|
||||
video = max(deviation["videos"],
|
||||
key=lambda x: int(x["quality"][:-1]))
|
||||
key=lambda x: util.safe_int(x["quality"][:-1]))
|
||||
yield self.commit(deviation, video)
|
||||
|
||||
if "flash" in deviation:
|
||||
|
||||
@@ -25,7 +25,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
|
||||
test = [
|
||||
("https://exhentai.org/g/960460/4f0e369d82/", {
|
||||
"keyword": "d837276b02c4e91e96c1b40fe4415cbb73b56577",
|
||||
"keyword": "173277161e28162dcc755d2e7a88e6cd750f2477",
|
||||
"content": "493d759de534355c9f55f8e365565b62411de146",
|
||||
}),
|
||||
("https://exhentai.org/g/960461/4f0e369d82/", {
|
||||
@@ -44,6 +44,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
self.key = {}
|
||||
self.count = 0
|
||||
self.version, self.gid, self.token = match.groups()
|
||||
self.gid = util.safe_int(self.gid)
|
||||
self.original = self.config("original", True)
|
||||
self.wait_min = self.config("wait-min", 3)
|
||||
self.wait_max = self.config("wait-max", 6)
|
||||
@@ -72,7 +73,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
raise exception.NotFoundError("gallery")
|
||||
|
||||
data = self.get_job_metadata(page)
|
||||
self.count = int(data["count"])
|
||||
self.count = data["count"]
|
||||
yield Message.Directory, data
|
||||
|
||||
for url, image in self.get_images(page):
|
||||
@@ -100,6 +101,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
data["lang"] = util.language_to_code(data["language"])
|
||||
data["title"] = text.unescape(data["title"])
|
||||
data["title_jp"] = text.unescape(data["title_jp"])
|
||||
data["count"] = util.safe_int(data["count"])
|
||||
return data
|
||||
|
||||
def get_images(self, page):
|
||||
@@ -141,7 +143,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
nextkey = self.key["next"]
|
||||
request = {
|
||||
"method" : "showpage",
|
||||
"gid" : int(self.gid),
|
||||
"gid" : self.gid,
|
||||
"imgkey" : nextkey,
|
||||
"showkey": self.key["show"],
|
||||
}
|
||||
|
||||
@@ -108,9 +108,11 @@ class FallenangelsMangaExtractor(MangaExtractor):
|
||||
title , pos = text.extract(page, '<em>', '</em>', pos)
|
||||
|
||||
manga, _, chapter = chapter.rpartition(" ")
|
||||
chapter, _, minor = chapter.partition(".")
|
||||
chapter, dot, minor = chapter.partition(".")
|
||||
results.append((url, {
|
||||
"manga": manga, "title": title, "volume": int(volume),
|
||||
"chapter": int(chapter), "chapter_minor": minor,
|
||||
"manga": manga, "title": title,
|
||||
"volume": util.safe_int(volume),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"chapter_minor": dot + minor,
|
||||
"lang": self.lang, "language": language,
|
||||
}))
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract images from http://www.hbrowse.com/"""
|
||||
|
||||
from .common import Extractor, MangaExtractor, Message
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
import json
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ class HbrowseExtractor(Extractor):
|
||||
root = "http://www.hbrowse.com"
|
||||
|
||||
@staticmethod
|
||||
def _parse_page(page, data):
|
||||
def parse_page(page, data):
|
||||
"""Parse metadata on 'page' and add it to 'data'"""
|
||||
text.extract_all(page, (
|
||||
('manga' , '<td class="listLong">', '</td>'),
|
||||
('artist', '<td class="listLong">', '</td>'),
|
||||
@@ -28,9 +29,10 @@ class HbrowseExtractor(Extractor):
|
||||
), values=data)
|
||||
|
||||
data["manga"] = text.unescape(data["manga"])
|
||||
data["total"] = int(data["total"])
|
||||
data["total"] = util.safe_int(data["total"])
|
||||
data["artist"] = text.remove_html(data["artist"])
|
||||
data["origin"] = text.remove_html(data["origin"])
|
||||
return data
|
||||
|
||||
|
||||
class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor):
|
||||
@@ -44,8 +46,10 @@ class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor):
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
data = {"manga_id": int(self.url.rstrip("/").rpartition("/")[2])}
|
||||
self._parse_page(page, data)
|
||||
data = self.parse_page(page, {
|
||||
"manga_id": util.safe_int(
|
||||
self.url.rstrip("/").rpartition("/")[2])
|
||||
})
|
||||
|
||||
pos = 0
|
||||
needle = '<td class="listMiddle">\n<a class="listLink" href="'
|
||||
@@ -54,7 +58,7 @@ class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor):
|
||||
if not url:
|
||||
return results
|
||||
title, pos = text.extract(page, '>View ', '<', pos)
|
||||
data["chapter"] = int(url.rpartition("/")[2][1:])
|
||||
data["chapter"] = util.safe_int(url.rpartition("/")[2][1:])
|
||||
data["title"] = title
|
||||
results.append((url, data.copy()))
|
||||
|
||||
@@ -87,9 +91,10 @@ class HbrowseChapterExtractor(HbrowseExtractor):
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {"manga_id": int(self.gid), "chapter": int(self.chapter)}
|
||||
self._parse_page(page, data)
|
||||
return data
|
||||
return self.parse_page(page, {
|
||||
"manga_id": util.safe_int(self.gid),
|
||||
"chapter": util.safe_int(self.chapter)
|
||||
})
|
||||
|
||||
def get_image_urls(self, page):
|
||||
"""Yield all image-urls for a 'chapter'"""
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract hentai-manga from https://hentai2read.com/"""
|
||||
|
||||
from .common import MangaExtractor
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
from . import hentaicdn
|
||||
import re
|
||||
import json
|
||||
@@ -37,7 +37,7 @@ class Hentai2readMangaExtractor(MangaExtractor):
|
||||
page, '<span itemprop="itemreviewed">', '</span>')
|
||||
mtype, pos = text.extract(
|
||||
page, '<small class="text-danger">[', ']</small>', pos)
|
||||
manga_id = int(text.extract(page, 'data-mid="', '"', pos)[0])
|
||||
manga_id = util.safe_int(text.extract(page, 'data-mid="', '"', pos)[0])
|
||||
page, pos = text.extract(
|
||||
page, '<ul class="nav-chapters remove-margin-b">', '</ul>\n</div>')
|
||||
|
||||
@@ -51,7 +51,8 @@ class Hentai2readMangaExtractor(MangaExtractor):
|
||||
chapter, _, title = text.unescape(chapter).strip().partition(" - ")
|
||||
results.append((url, {
|
||||
"manga_id": manga_id, "manga": manga, "type": mtype,
|
||||
"chapter_id": int(chapter_id), "chapter": int(chapter),
|
||||
"chapter_id": util.safe_int(chapter_id),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"title": title, "lang": "en", "language": "English",
|
||||
}))
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract images from https://www.hentai-foundry.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from .. import text, util, exception
|
||||
|
||||
|
||||
class HentaifoundryUserExtractor(Extractor):
|
||||
@@ -23,7 +23,7 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
test = [
|
||||
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
|
||||
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
|
||||
"keyword": "6e9a549feb9bafebd9d9342ef3c8ccad33a7031c",
|
||||
"keyword": "f8fecc8aa89978ecf402ec221243978fe791bd54",
|
||||
}),
|
||||
("http://www.hentai-foundry.com/user/asdq/profile", {
|
||||
"exception": exception.NotFoundError,
|
||||
@@ -40,7 +40,7 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
self.set_filters(token)
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for url, image in self.get_images(int(data["count"])):
|
||||
for url, image in self.get_images(data["count"]):
|
||||
image.update(data)
|
||||
yield Message.Url, url, image
|
||||
|
||||
@@ -68,7 +68,7 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
page = response.text
|
||||
token, pos = text.extract(page, 'hidden" value="', '"')
|
||||
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
|
||||
return {"artist": self.artist, "count": count}, token
|
||||
return {"artist": self.artist, "count": util.safe_int(count)}, token
|
||||
|
||||
def get_image_metadata(self, url):
|
||||
"""Collect metadata for an image"""
|
||||
@@ -79,7 +79,7 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
page, 'Pictures</a> » <span>', '<')
|
||||
url, pos = text.extract(
|
||||
page, '//pictures.hentai-foundry.com', '"', pos)
|
||||
data = {"index": index, "title": text.unescape(title)}
|
||||
data = {"index": util.safe_int(index), "title": text.unescape(title)}
|
||||
text.nameext_from_url(url, data)
|
||||
return "https://pictures.hentai-foundry.com" + url, data
|
||||
|
||||
@@ -127,7 +127,7 @@ class HentaifoundryImageExtractor(Extractor):
|
||||
(("http://www.hentai-foundry.com/"
|
||||
"pictures/user/Tenpura/407501/shimakaze"), {
|
||||
"url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
|
||||
"keyword": "304479cfe00fbb723886be78b2bd6b9306a31d8a",
|
||||
"keyword": "85b8e26fa93d00ae1333cb7b418078f1792dc4a8",
|
||||
"content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
|
||||
}),
|
||||
("http://www.hentai-foundry.com/pictures/user/Tenpura/340853/", {
|
||||
@@ -160,7 +160,7 @@ class HentaifoundryImageExtractor(Extractor):
|
||||
url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos)
|
||||
data = {
|
||||
"artist": artist,
|
||||
"index": self.index,
|
||||
"index": util.safe_int(self.index),
|
||||
"title": text.unescape(title),
|
||||
}
|
||||
text.nameext_from_url(url, data)
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract hentai-manga from https://hentaihere.com/"""
|
||||
|
||||
from .common import MangaExtractor
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
from . import hentaicdn
|
||||
import re
|
||||
|
||||
@@ -32,7 +32,8 @@ class HentaihereMangaExtractor(MangaExtractor):
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
manga_id = int(self.url.rstrip("/").rpartition("/")[2][1:])
|
||||
manga_id = util.safe_int(
|
||||
self.url.rstrip("/").rpartition("/")[2][1:])
|
||||
manga, pos = text.extract(
|
||||
page, '<span itemprop="name">', '</span>')
|
||||
mtype, pos = text.extract(
|
||||
@@ -48,7 +49,8 @@ class HentaihereMangaExtractor(MangaExtractor):
|
||||
chapter, _, title = text.unescape(chapter).strip().partition(" - ")
|
||||
results.append((url, {
|
||||
"manga_id": manga_id, "manga": manga, "type": mtype,
|
||||
"chapter_id": int(chapter_id), "chapter": int(chapter),
|
||||
"chapter_id": util.safe_int(chapter_id),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"title": title, "lang": "en", "language": "English",
|
||||
}))
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract images from http://imagefap.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
import json
|
||||
|
||||
|
||||
@@ -159,7 +159,7 @@ class ImagefapUserExtractor(Extractor):
|
||||
yield Message.Version, 1
|
||||
for gid, name in self.get_gallery_data():
|
||||
url = "http://www.imagefap.com/gallery/" + gid
|
||||
data = {"gallery_id": int(gid), "name": name}
|
||||
data = {"gallery_id": util.safe_int(gid), "name": name}
|
||||
yield Message.Queue, url, data
|
||||
|
||||
def get_gallery_data(self):
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga-chapters and entire manga from http://kissmanga.com/"""
|
||||
|
||||
from .common import Extractor, MangaExtractor, Message
|
||||
from .. import text, cloudflare, aes
|
||||
from .. import text, util, cloudflare, aes
|
||||
from ..cache import cache
|
||||
import re
|
||||
import hashlib
|
||||
@@ -38,7 +38,8 @@ class KissmangaExtractor(Extractor):
|
||||
request = cloudflare.request_func
|
||||
|
||||
@staticmethod
|
||||
def _parse_chapter_string(data):
|
||||
def parse_chapter_string(data):
|
||||
"""Parse 'chapter_string' value contained in 'data'"""
|
||||
data["chapter_string"] = text.unescape(data["chapter_string"])
|
||||
|
||||
match = re.match((
|
||||
@@ -49,16 +50,16 @@ class KissmangaExtractor(Extractor):
|
||||
), data["chapter_string"])
|
||||
|
||||
if not match:
|
||||
match = re.match((
|
||||
r"[\w ]+?(?: -)? 0*()(\d+)()(?: *[:-]? *(.+))?"
|
||||
# r"[\w ]+?(?: -)? 0*()(\d+)(?: (.+))?(?: - (.+))?"
|
||||
), data["chapter_string"])
|
||||
match = re.match(
|
||||
r"[\w ]+?(?: -)? 0*()(\d+)()(?: *[:-]? *(.+))?",
|
||||
data["chapter_string"])
|
||||
|
||||
volume, chapter, minor, title = match.groups()
|
||||
data["volume"] = int(volume) if volume else 0
|
||||
data["chapter"] = int(chapter) if chapter else 0
|
||||
data["volume"] = util.safe_int(volume)
|
||||
data["chapter"] = util.safe_int(chapter)
|
||||
data["chapter_minor"] = "." + minor if minor else ""
|
||||
data["title"] = title if title and title != "Read Online" else ""
|
||||
return data
|
||||
|
||||
|
||||
class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
|
||||
@@ -87,7 +88,7 @@ class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
|
||||
"manga": manga, "id": url.rpartition("=")[2],
|
||||
"chapter_string": chapter, "lang": "en", "language": "English",
|
||||
}
|
||||
self._parse_chapter_string(data)
|
||||
self.parse_chapter_string(data)
|
||||
results.append((self.root + url, data))
|
||||
return results
|
||||
|
||||
@@ -133,8 +134,7 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
self._parse_chapter_string(data)
|
||||
return data
|
||||
return self.parse_chapter_string(data)
|
||||
|
||||
def get_image_urls(self, page):
|
||||
"""Extract list of all image-urls for a manga chapter"""
|
||||
@@ -148,7 +148,7 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
]
|
||||
except UnicodeDecodeError:
|
||||
self.log.error("Failed to decrypt image URls")
|
||||
except (ValueError, IndexError) as e:
|
||||
except (ValueError, IndexError):
|
||||
self.log.error("Failed to get AES key")
|
||||
return []
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga-chapters and entire manga from http://www.mangafox.me/"""
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text, exception
|
||||
from .. import text, util, exception
|
||||
import re
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
|
||||
r"[^/]+/(v\d+/)?c\d+[^/]*)")]
|
||||
test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/"
|
||||
"v05/c006.2/1.html"), {
|
||||
"keyword": "ef2757d6136ef6b02eafe12d98a05f189fe8b2ba",
|
||||
"keyword": "36b570e9ef11b4748407324fe08bebbe4856e6fd",
|
||||
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
|
||||
})]
|
||||
|
||||
@@ -38,7 +38,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
|
||||
raise exception.AuthorizationError()
|
||||
data = self.get_metadata(page)
|
||||
urls = zip(
|
||||
range(1, int(data["count"])+1),
|
||||
range(1, data["count"]+1),
|
||||
self.get_image_urls(page),
|
||||
)
|
||||
yield Message.Version, 1
|
||||
@@ -50,17 +50,19 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
|
||||
def get_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = text.extract_all(page, (
|
||||
("manga" , " - Read ", " Manga Scans "),
|
||||
("sid" , "var sid=", ";"),
|
||||
("cid" , "var cid=", ";"),
|
||||
("count" , "var total_pages=", ";"),
|
||||
("chapter", 'var current_chapter="', '";'),
|
||||
("manga" , " - Read ", " Manga Scans "),
|
||||
("sid" , "var sid=", ";"),
|
||||
("cid" , "var cid=", ";"),
|
||||
("count" , "var total_pages=", ";"),
|
||||
("chapter_string", 'var current_chapter="', '"'),
|
||||
))[0]
|
||||
match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter"])
|
||||
data["volume"] = match.group(2) or ""
|
||||
match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter_string"])
|
||||
data["volume"] = match.group(2)
|
||||
data["chapter"] = match.group(3)
|
||||
data["chapter_minor"] = match.group(4) or ""
|
||||
data["manga"] = data["manga"].rpartition(" ")[0]
|
||||
for key in ("sid", "cid", "count", "volume", "chapter"):
|
||||
data[key] = util.safe_int(data[key])
|
||||
return data
|
||||
|
||||
def get_image_urls(self, page):
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga-chapters and entire manga from http://www.mangahere.co/"""
|
||||
|
||||
from .common import MangaExtractor, AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
import re
|
||||
|
||||
|
||||
@@ -46,8 +46,9 @@ class MangahereMangaExtractor(MangaExtractor):
|
||||
date, pos = text.extract(page, 'class="right">', '</span>', pos)
|
||||
results.append((url, {
|
||||
"manga": manga, "title": title, "date": date,
|
||||
"chapter": int(chapter), "chapter_minor": dot + minor,
|
||||
"volume": int(volume.rpartition(" ")[2]) if volume else 0,
|
||||
"volume": util.safe_int(volume.rpartition(" ")[2]),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"chapter_minor": dot + minor,
|
||||
"lang": "en", "language": "English",
|
||||
}))
|
||||
|
||||
@@ -62,7 +63,7 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/"
|
||||
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
|
||||
test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", {
|
||||
"keyword": "8cb9f9512b68d2cdcbea2419592b9247304c149b",
|
||||
"keyword": "0c263b83f803524baa8717d2b4d841617aa8d775",
|
||||
"content": "dd8454469429c6c717cbc3cad228e76ef8c6e420",
|
||||
})]
|
||||
url_fmt = "http://www.mangahere.co/manga/{}/{}.html"
|
||||
@@ -75,7 +76,7 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
page = self.request(self.url_fmt.format(self.part, 1)).text
|
||||
data = self.get_job_metadata(page)
|
||||
urls = zip(
|
||||
range(1, int(data["count"])+1),
|
||||
range(1, data["count"]+1),
|
||||
self.get_image_urls(page),
|
||||
)
|
||||
yield Message.Version, 1
|
||||
@@ -96,11 +97,11 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
return {
|
||||
"manga": text.unescape(manga),
|
||||
# "title": TODO,
|
||||
"volume": self.volume or "",
|
||||
"chapter": self.chapter,
|
||||
"volume": util.safe_int(self.volume),
|
||||
"chapter": util.safe_int(self.chapter),
|
||||
"chapter_minor": self.chminor or "",
|
||||
"chapter_id": chid,
|
||||
"count": count,
|
||||
"chapter_id": util.safe_int(chid),
|
||||
"count": util.safe_int(count),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga-chapters and entire manga from http://mangapark.me/"""
|
||||
|
||||
from .common import Extractor, MangaExtractor, Message
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
|
||||
|
||||
class MangaparkExtractor(Extractor):
|
||||
@@ -18,17 +18,18 @@ class MangaparkExtractor(Extractor):
|
||||
root = "http://mangapark.me"
|
||||
|
||||
@staticmethod
|
||||
def _parse_chapter_path(path, data):
|
||||
def parse_chapter_path(path, data):
|
||||
"""Get volume/chapter information from url-path of a chapter"""
|
||||
data["volume"], data["chapter_minor"] = 0, ""
|
||||
for part in path.split("/")[3:]:
|
||||
key, value = part[0], part[1:]
|
||||
if key == "s":
|
||||
data["version"] = int(value)
|
||||
data["version"] = util.safe_int(value)
|
||||
elif key == "v":
|
||||
data["volume"] = int(value)
|
||||
data["volume"] = util.safe_int(value)
|
||||
elif key == "c":
|
||||
chapter, dot, minor = value.partition(".")
|
||||
data["chapter"] = int(chapter)
|
||||
data["chapter"] = util.safe_int(chapter)
|
||||
data["chapter_minor"] = dot + minor
|
||||
elif key == "e":
|
||||
data["chapter_minor"] = "v" + value
|
||||
@@ -59,10 +60,10 @@ class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor):
|
||||
date , pos = text.extract(page, '<i>', '</i>', pos)
|
||||
count, pos = text.extract(page, '\tof ', ' ', pos)
|
||||
|
||||
self._parse_chapter_path(path, data)
|
||||
self.parse_chapter_path(path, data)
|
||||
data["title"] = title[3:].strip()
|
||||
data["date"] = date
|
||||
data["count"] = int(count)
|
||||
data["count"] = util.safe_int(count)
|
||||
results.append((self.root + path, data.copy()))
|
||||
|
||||
|
||||
@@ -107,7 +108,7 @@ class MangaparkChapterExtractor(MangaparkExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {"lang": "en", "language": "English"}
|
||||
self._parse_chapter_path(self.path, data)
|
||||
self.parse_chapter_path(self.path, data)
|
||||
text.extract_all(page, (
|
||||
("manga_id" , "var _manga_id = '", "'"),
|
||||
("chapter_id", "var _book_id = '", "'"),
|
||||
@@ -119,7 +120,7 @@ class MangaparkChapterExtractor(MangaparkExtractor):
|
||||
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
|
||||
data["manga"] = text.unescape(data["manga"])
|
||||
data["title"] = data["title"].partition(": ")[2]
|
||||
data["count"] = int(data["count"])
|
||||
data["count"] = util.safe_int(data["count"])
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga-chapters and entire manga from http://www.mangareader.net/"""
|
||||
|
||||
from .common import AsynchronousExtractor, MangaExtractor, Message
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
|
||||
|
||||
class MangareaderBase():
|
||||
@@ -20,7 +20,8 @@ class MangareaderBase():
|
||||
root = "http://www.mangareader.net"
|
||||
|
||||
@staticmethod
|
||||
def _parse_page(page, data):
|
||||
def parse_page(page, data):
|
||||
"""Parse metadata on 'page' and add it to 'data'"""
|
||||
text.extract_all(page, (
|
||||
("manga" , '<h2 class="aname">', '</h2>'),
|
||||
("release", '>Year of Release:</td>\n<td>', '</td>'),
|
||||
@@ -30,6 +31,7 @@ class MangareaderBase():
|
||||
data["manga"] = data["manga"].strip()
|
||||
data["author"] = text.unescape(data["author"])
|
||||
data["artist"] = text.unescape(data["artist"])
|
||||
return data
|
||||
|
||||
|
||||
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
@@ -43,8 +45,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
data = {"lang": "en", "language": "English"}
|
||||
self._parse_page(page, data)
|
||||
data = self.parse_page(page, {"lang": "en", "language": "English"})
|
||||
|
||||
needle = '<div class="chico_manga"></div>\n<a href="'
|
||||
pos = page.index('<div id="chapterlist">')
|
||||
@@ -54,7 +55,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
return results
|
||||
data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
|
||||
data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
|
||||
data["chapter"] = int(url.rpartition("/")[2])
|
||||
data["chapter"] = util.safe_int(url.rpartition("/")[2])
|
||||
results.append((self.root + url, data.copy()))
|
||||
|
||||
|
||||
@@ -91,17 +92,16 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
def get_job_metadata(self, chapter_page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request(self.root + self.url_title).text
|
||||
data = {
|
||||
"chapter": int(self.chapter),
|
||||
data = self.parse_page(page, {
|
||||
"chapter": util.safe_int(self.chapter),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
self._parse_page(page, data)
|
||||
})
|
||||
text.extract_all(page, (
|
||||
('title', ' ' + self.chapter + '</a> : ', '</td>'),
|
||||
('date', '<td>', '</td>'),
|
||||
), page.index('<div id="chapterlist">'), data)
|
||||
data["count"] = int(text.extract(
|
||||
data["count"] = util.safe_int(text.extract(
|
||||
chapter_page, '</select> of ', '<')[0]
|
||||
)
|
||||
return data
|
||||
@@ -123,6 +123,6 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
height, pos = extr(page, ' height="', '"', pos)
|
||||
image, pos = extr(page, ' src="', '"', pos)
|
||||
return self.root + url, image, text.nameext_from_url(image, {
|
||||
"width": int(width),
|
||||
"height": int(height),
|
||||
"width": util.safe_int(width),
|
||||
"height": util.safe_int(height),
|
||||
})
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga-chapters from https://mangastream.com/"""
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
@@ -32,8 +32,8 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
|
||||
data = self.get_job_metadata(page)
|
||||
next_url = None
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
for data["page"] in range(1, int(data["count"])+1):
|
||||
yield Message.Directory, data.copy()
|
||||
for data["page"] in range(1, data["count"]+1):
|
||||
if next_url:
|
||||
page = self.request(next_url).text
|
||||
next_url, image_url = self.get_page_metadata(page)
|
||||
@@ -44,21 +44,19 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
manga, pos = text.extract(
|
||||
page, '<span class="hidden-xs hidden-sm">', "<"
|
||||
)
|
||||
page, '<span class="hidden-xs hidden-sm">', "<")
|
||||
pos = page.find(self.part, pos)
|
||||
title, pos = text.extract(page, ' - ', '<', pos)
|
||||
count, pos = text.extract(page, 'Last Page (', ')', pos)
|
||||
data = {
|
||||
return {
|
||||
"manga": manga,
|
||||
"chapter": text.unquote(self.chapter),
|
||||
"chapter-id": self.ch_id,
|
||||
"chapter_id": util.safe_int(self.ch_id),
|
||||
"title": title,
|
||||
"count": count,
|
||||
"count": util.safe_int(count, 1),
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def get_page_metadata(page):
|
||||
|
||||
@@ -105,7 +105,7 @@ class MangazukiMangaExtractor(MangaExtractor):
|
||||
for url in urls:
|
||||
chapter = url.rpartition("/")[2]
|
||||
chapter, dot, minor = chapter.partition(".")
|
||||
data["chapter"] = int(chapter)
|
||||
data["chapter"] = util.safe_int(chapter)
|
||||
data["chapter_minor"] = dot + minor
|
||||
results.append((url, data.copy()))
|
||||
if 'class="next disabled"' in page:
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga pages from http://www.thespectrum.net/manga_scans/"""
|
||||
|
||||
from .common import MangaExtractor, AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
from .. import text, util
|
||||
|
||||
|
||||
class SpectrumnexusMangaExtractor(MangaExtractor):
|
||||
@@ -19,6 +19,7 @@ class SpectrumnexusMangaExtractor(MangaExtractor):
|
||||
reverse = False
|
||||
test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", {
|
||||
"url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb",
|
||||
"keyword": "5ed9d5c7c69d2d03417c853c4e8eae30f1e5febf",
|
||||
})]
|
||||
|
||||
def chapters(self, page):
|
||||
@@ -47,7 +48,7 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
|
||||
test = [(("http://view.thespectrum.net/series/"
|
||||
"toriko.html?ch=Chapter+343&page=1"), {
|
||||
"url": "c0fc7dc594841217cc622a67edd79f06e9900333",
|
||||
"keyword": "8499166b62db0c87e7109cc5f9aa837b4815dd9c",
|
||||
"keyword": "3d0cb57b6b1c2cbecc7aed33f83c24891a4ff53f",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -66,27 +67,28 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
|
||||
data = self.get_job_metadata(page)
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data.copy()
|
||||
count = int(data["count"])
|
||||
for i in range(1, count+1):
|
||||
for i in range(1, data["count"]+1):
|
||||
url = self.get_image_url(page)
|
||||
text.nameext_from_url(url, data)
|
||||
data["page"] = i
|
||||
yield Message.Url, url, data.copy()
|
||||
if i < count:
|
||||
if i < data["count"]:
|
||||
params["page"] += 1
|
||||
page = self.request(self.url, params=params).text
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"chapter": self.chapter or "",
|
||||
"volume": self.volume or "",
|
||||
"chapter": util.safe_int(self.chapter),
|
||||
"volume": util.safe_int(self.volume),
|
||||
"identifier": self.identifier.replace("+", " "),
|
||||
}
|
||||
return text.extract_all(page, (
|
||||
data = text.extract_all(page, (
|
||||
('manga', '<title>', ' · SPECTRUM NEXUS </title>'),
|
||||
('count', '<div class="viewerLabel"> of ', '<'),
|
||||
), values=data)[0]
|
||||
data["count"] = util.safe_int(data["count"])
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def get_image_url(page):
|
||||
|
||||
@@ -90,6 +90,16 @@ def combine_dict(a, b):
|
||||
return a
|
||||
|
||||
|
||||
def safe_int(value, default=0):
|
||||
"""Safely convert value to integer"""
|
||||
if value is None or value == "":
|
||||
return default
|
||||
try:
|
||||
return int(value)
|
||||
except (ValueError, TypeError):
|
||||
return default
|
||||
|
||||
|
||||
def code_to_language(code, default=None):
|
||||
"""Map an ISO 639-1 language code to its actual name"""
|
||||
return CODES.get((code or "").lower(), default)
|
||||
|
||||
@@ -160,6 +160,16 @@ class TestOther(unittest.TestCase):
|
||||
{1: {2: {3: {4: {"1": "A", "3": "C"}}}}}),
|
||||
{1: {2: {3: {4: {"1": "A", "2": "b", "3": "C"}}}}})
|
||||
|
||||
def test_safe_int(self):
|
||||
self.assertEqual(util.safe_int(123), 123)
|
||||
self.assertEqual(util.safe_int("123"), 123)
|
||||
self.assertEqual(util.safe_int("zzz"), 0)
|
||||
self.assertEqual(util.safe_int(""), 0)
|
||||
self.assertEqual(util.safe_int(None), 0)
|
||||
self.assertEqual(util.safe_int("zzz", "default"), "default")
|
||||
self.assertEqual(util.safe_int("", "default"), "default")
|
||||
self.assertEqual(util.safe_int(None, "default"), "default")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user