diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index b7c550f6..5914aa67 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -168,9 +168,9 @@ class ChapterExtractor(Extractor):
subcategory = "chapter"
directory_fmt = [
"{category}", "{manga}",
- "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}"]
+ "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}"]
filename_fmt = (
- "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
+ "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
def __init__(self, url):
Extractor.__init__(self)
@@ -182,7 +182,10 @@ class ChapterExtractor(Extractor):
imgs = self.get_images(page)
if "count" in data:
- images = zip(range(1, data["count"]+1), imgs)
+ images = zip(
+ range(1, data["count"]+1),
+ imgs
+ )
else:
try:
data["count"] = len(imgs)
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index 5b369e47..c960daa4 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2017 Mike Fährmann
+# Copyright 2016-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,9 +8,8 @@
"""Extract hentai-manga from https://hentai2read.com/"""
-from .common import MangaExtractor
+from .common import ChapterExtractor, MangaExtractor
from .. import text, util
-from . import hentaicdn
import re
import json
@@ -53,42 +52,43 @@ class Hentai2readMangaExtractor(MangaExtractor):
return results
-class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
+class Hentai2readChapterExtractor(ChapterExtractor):
"""Extractor for a single manga chapter from hentai2read.com"""
category = "hentai2read"
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
test = [("http://hentai2read.com/amazon_elixir/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
- "keyword": "0f6408d462a14bfe58030117dc295b84666843d0",
+ "keyword": "9845105898d28c6a540cffdea60a1a20fab52431",
})]
def __init__(self, match):
- hentaicdn.HentaicdnChapterExtractor.__init__(self)
url_title, self.chapter = match.groups()
- self.url = "https://hentai2read.com/{}/{}/".format(
- url_title, self.chapter
- )
+ url = "https://hentai2read.com/{}/{}/".format(url_title, self.chapter)
+ ChapterExtractor.__init__(self, url)
- def get_job_metadata(self, page, images):
- title = text.extract(page, "
", "")[0]
- chapter_id = text.extract(page, 'data-cid="', '"')[0]
+ def get_metadata(self, page):
+ title, pos = text.extract(page, "", "")
+ manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
+ chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
r"(\d+): (.+) . Page 1 ", title)
return {
- "manga_id": images[0].split("/")[-3],
"manga": match.group(1),
+ "manga_id": util.safe_int(manga_id),
+ "chapter": util.safe_int(self.chapter),
+ "chapter_id": util.safe_int(chapter_id),
"type": match.group(2),
- "chapter_id": chapter_id,
- "chapter": self.chapter,
"author": match.group(3),
"title": match.group(5),
- "count": len(images),
"lang": "en",
"language": "English",
}
@staticmethod
- def get_image_urls(page):
+ def get_images(page):
"""Extract and return a list of all image-urls"""
images = text.extract(page, "'images' : ", ",\n")[0]
- return json.loads(images)
+ return [
+ ("https://hentaicdn.com/hentai" + part, None)
+ for part in json.loads(images)
+ ]
diff --git a/gallery_dl/extractor/hentaicdn.py b/gallery_dl/extractor/hentaicdn.py
deleted file mode 100644
index 0ef55456..00000000
--- a/gallery_dl/extractor/hentaicdn.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2016-2017 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Base classes for extractors from sites based on hentaicdn"""
-
-from .common import Extractor, Message
-from .. import text
-import json
-
-
-class HentaicdnChapterExtractor(Extractor):
- """Base class for extractors for a single manga chapter"""
- subcategory = "chapter"
- directory_fmt = ["{category}", "{manga_id} {title}"]
- filename_fmt = ("{category}_{manga_id}_{chapter:>02}_"
- "{num:>03}.{extension}")
- url = ""
-
- def items(self):
- page = self.request(self.url).text
- images = self.get_image_urls(page)
- data = self.get_job_metadata(page, images)
- yield Message.Version, 1
- yield Message.Directory, data
- for data["num"], part in enumerate(images, 1):
- url = "https://hentaicdn.com/hentai" + part
- yield Message.Url, url, text.nameext_from_url(url, data)
-
- def get_job_metadata(self, page, images):
- """Collect metadata for extractor-job"""
-
- @staticmethod
- def get_image_urls(page):
- """Extract and return a list of all image-urls"""
- images = text.extract(page, "var rff_imageList = ", ";")[0]
- return json.loads(images)
diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py
index 48ebb2ea..9937cf8b 100644
--- a/gallery_dl/extractor/hentaihere.py
+++ b/gallery_dl/extractor/hentaihere.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2017 Mike Fährmann
+# Copyright 2016-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,10 +8,10 @@
"""Extract hentai-manga from https://hentaihere.com/"""
-from .common import MangaExtractor
+from .common import ChapterExtractor, MangaExtractor
from .. import text, util
-from . import hentaicdn
import re
+import json
class HentaihereMangaExtractor(MangaExtractor):
@@ -56,36 +56,42 @@ class HentaihereMangaExtractor(MangaExtractor):
}))
-class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
+class HentaihereChapterExtractor(ChapterExtractor):
"""Extractor for a single manga chapter from hentaihere.com"""
category = "hentaihere"
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"]
test = [("https://hentaihere.com/m/S13812/1/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
- "keyword": "a07753f655210525a80ff62607261715746f3273",
+ "keyword": "e9382a9be337abce3db2b1132e85751379dc05c5",
})]
def __init__(self, match):
- hentaicdn.HentaicdnChapterExtractor.__init__(self)
- self.gid, self.chapter = match.groups()
- self.url = "https://hentaihere.com/m/S{}/{}/1".format(
- self.gid, self.chapter
- )
+ self.manga_id, self.chapter = match.groups()
+ url = "https://hentaihere.com/m/S{}/{}/1".format(
+ self.manga_id, self.chapter)
+ ChapterExtractor.__init__(self, url)
- def get_job_metadata(self, page, images):
+ def get_metadata(self, page):
title = text.extract(page, "", "")[0]
chapter_id = text.extract(page, 'report/C', '"')[0]
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
match = re.match(pattern, title)
return {
- "manga_id": self.gid,
"manga": match.group(1),
+ "manga_id": util.safe_int(self.manga_id),
+ "chapter": util.safe_int(self.chapter),
+ "chapter_id": util.safe_int(chapter_id),
"type": match.group(2),
- "chapter_id": chapter_id,
- "chapter": self.chapter,
"title": match.group(3),
"author": match.group(4),
- "count": len(images),
"lang": "en",
"language": "English",
}
+
+ @staticmethod
+ def get_images(page):
+ images = text.extract(page, "var rff_imageList = ", ";")[0]
+ return [
+ ("https://hentaicdn.com/hentai" + part, None)
+ for part in json.loads(images)
+ ]
diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
index fe9f1ae6..69a6aaeb 100644
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,7 +8,7 @@
"""Extract manga-chapters and entire manga from http://kissmanga.com/"""
-from .common import Extractor, MangaExtractor, Message
+from .common import ChapterExtractor, MangaExtractor
from .. import text, util, cloudflare, aes, exception
from ..cache import cache
import re
@@ -21,21 +21,11 @@ IV = [
]
-class KissmangaExtractor(Extractor):
+class KissmangaBase():
"""Base class for kissmanga extractors"""
category = "kissmanga"
- directory_fmt = [
- "{category}", "{manga}",
- "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}"]
- filename_fmt = (
- "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
root = "http://kissmanga.com"
- def __init__(self, match):
- Extractor.__init__(self)
- self.url = match.group(0)
- self.session.headers["Referer"] = self.root
-
def request(self, url):
response = cloudflare.request_func(self, url)
if response.history and "/Message/AreYouHuman?" in response.url:
@@ -72,10 +62,10 @@ class KissmangaExtractor(Extractor):
return data
-class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
+class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
"""Extractor for manga from kissmanga.com"""
- pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com/"
- r"Manga/[^/?]+/?$"]
+ pattern = [r"(?i)(?:https?://)?(?:www\.)?(kissmanga\.com"
+ r"/Manga/[^/?]+/?)$"]
test = [
("http://kissmanga.com/Manga/Dropout", {
"url": "992befdd64e178fe5af67de53f8b510860d968ca",
@@ -105,11 +95,10 @@ class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
return results
-class KissmangaChapterExtractor(KissmangaExtractor):
+class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
"""Extractor for manga-chapters from kissmanga.com"""
- subcategory = "chapter"
- pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com/"
- r"Manga/[^/?]+/[^/?]+\?id=\d+"]
+ pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
+ r"/Manga/[^/?]+/[^/?]+\?id=\d+"]
test = [
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
"url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0",
@@ -126,18 +115,11 @@ class KissmangaChapterExtractor(KissmangaExtractor):
("http://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608", None),
]
- def items(self):
- page = self.request(self.url).text
- data = self.get_job_metadata(page)
- imgs = self.get_image_urls(page)
- data["count"] = len(imgs)
- yield Message.Version, 1
- yield Message.Directory, data
- for data["page"], url in enumerate(imgs, 1):
- yield Message.Url, url, text.nameext_from_url(url, data)
+ def __init__(self, match):
+ ChapterExtractor.__init__(self, match.group(0))
+ self.session.headers["Referer"] = self.root
- def get_job_metadata(self, page):
- """Collect metadata for extractor-job"""
+ def get_metadata(self, page):
title = text.extract(page, "", "")[0].strip()
manga, cinfo = title.split("\n")[1:3]
data = {
@@ -148,12 +130,11 @@ class KissmangaChapterExtractor(KissmangaExtractor):
}
return self.parse_chapter_string(data)
- def get_image_urls(self, page):
- """Extract list of all image-urls for a manga chapter"""
+ def get_images(self, page):
try:
key = self.build_aes_key(page)
return [
- aes.aes_cbc_decrypt_text(data, key, IV)
+ (aes.aes_cbc_decrypt_text(data, key, IV), None)
for data in text.extract_iter(
page, 'lstImages.push(wrapKA("', '"'
)
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index ab681e74..906c2372 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -1,56 +1,39 @@
# -*- coding: utf-8 -*-
-# Copyright 2017 Mike Fährmann
+# Copyright 2017-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from http://www.mangafox.me/"""
+"""Extract manga-chapters and entire manga from http://fanfox.net/"""
-from .common import AsynchronousExtractor, Message
+from .common import ChapterExtractor
from .. import text, util, exception
import re
-class MangafoxChapterExtractor(AsynchronousExtractor):
- """Extractor for manga-chapters from mangafox.me"""
+class MangafoxChapterExtractor(ChapterExtractor):
+ """Extractor for manga-chapters from fanfox.net"""
category = "mangafox"
- subcategory = "chapter"
- directory_fmt = [
- "{category}", "{manga}",
- "{volume:?v/ />02}c{chapter:>03}{chapter_minor}"]
- filename_fmt = (
- "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
- pattern = [(r"(?:https?://)?(?:www\.)?(mangafox\.me/manga/"
- r"[^/]+/(v\d+/)?c\d+[^/]*)")]
- test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/"
- "v05/c006.2/1.html"), {
- "keyword": "36b570e9ef11b4748407324fe08bebbe4856e6fd",
- "content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
- })]
+ pattern = [(r"(?:https?://)?(?:www\.)?(?:mangafox\.me|fanfox\.net)"
+ r"(/manga/[^/]+/(?:v\d+/)?c\d+[^/?]*)")]
+ test = [
+ ("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
+ "keyword": "36b570e9ef11b4748407324fe08bebbe4856e6fd",
+ "content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
+ }),
+ ("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/", None),
+ ]
+ root = "http://fanfox.net"
def __init__(self, match):
- AsynchronousExtractor.__init__(self)
- self.url = "http://" + match.group(1)
-
- def items(self):
- page = self.request(self.url + "/1.html").text
- if "Sorry, its licensed, and not available." in page:
- raise exception.AuthorizationError()
- data = self.get_metadata(page)
- urls = zip(
- range(1, data["count"]+1),
- self.get_image_urls(page),
- )
- yield Message.Version, 1
- yield Message.Directory, data.copy()
- for data["page"], url in urls:
- text.nameext_from_url(url, data)
- yield Message.Url, url, data.copy()
+ self.urlbase = self.root + match.group(1)
+ ChapterExtractor.__init__(self, self.urlbase + "/1.html")
def get_metadata(self, page):
- """Collect metadata for extractor-job"""
+ if "Sorry, its licensed, and not available." in page:
+ raise exception.AuthorizationError()
data = text.extract_all(page, (
("manga" , " - Read ", " Manga Scans "),
("sid" , "var sid=", ";"),
@@ -67,14 +50,14 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
data[key] = util.safe_int(data[key])
return data
- def get_image_urls(self, page):
- """Yield all image-urls for this chapter"""
+ def get_images(self, page):
pnum = 1
while True:
url, pos = text.extract(page, '
02}c{chapter:>03}{chapter_minor}"]
- filename_fmt = (
- "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
pattern = [(r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
test = [
@@ -82,27 +76,12 @@ class MangahereChapterExtractor(AsynchronousExtractor):
url_fmt = "http://www.mangahere.cc/manga/{}/{}.html"
def __init__(self, match):
- AsynchronousExtractor.__init__(self)
self.part, self.volume, self.chapter, self.chminor = match.groups()
-
- def items(self):
# remove ".html" for the first chapter page to avoid redirects
url = self.url_fmt.format(self.part, "")[:-5]
+ ChapterExtractor.__init__(self, url)
- page = self.request(url).text
- data = self.get_job_metadata(page)
- urls = zip(
- range(1, data["count"]+1),
- self.get_image_urls(page),
- )
-
- yield Message.Version, 1
- yield Message.Directory, data.copy()
- for data["page"], url in urls:
- text.nameext_from_url(url, data)
- yield Message.Url, url, data.copy()
-
- def get_job_metadata(self, page):
+ def get_metadata(self, page):
"""Collect metadata for extractor-job"""
manga, pos = text.extract(page, '', '')
chid , pos = text.extract(page, '.net/store/manga/', '/', pos)
@@ -122,15 +101,16 @@ class MangahereChapterExtractor(AsynchronousExtractor):
"language": "English",
}
- def get_image_urls(self, page):
+ def get_images(self, page):
"""Yield all image-urls for this chapter"""
pnum = 1
while True:
url, pos = text.extract(page, '
03}{title:?: //}"]
- filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
pattern = [
(r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?]+)/(\d+))"),
(r"(?:https?://)?(?:www\.)?mangareader\.net"
@@ -70,26 +67,14 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
test = [(("http://www.mangareader.net/"
"karate-shoukoushi-kohinata-minoru/11"), {
"url": "84ffaab4c027ef9022695c53163c3aeabd07ca58",
- "keyword": "2038e6a780a0028eee0067985b55debb1d4a6aab",
+ "keyword": "2893cfcd1916859fb498f3345f1929f868fe667f",
})]
def __init__(self, match):
- AsynchronousExtractor.__init__(self)
self.part, self.url_title, self.chapter = match.groups()
+ ChapterExtractor.__init__(self, self.root + self.part)
- def items(self):
- page = self.request(self.root + self.part).text
- data = self.get_job_metadata(page)
- yield Message.Version, 1
- yield Message.Directory, data
- for data["page"] in range(1, data["count"]+1):
- next_url, image_url, image_data = self.get_page_metadata(page)
- image_data.update(data)
- yield Message.Url, image_url, image_data
- if next_url:
- page = self.request(next_url).text
-
- def get_job_metadata(self, chapter_page):
+ def get_metadata(self, chapter_page):
"""Collect metadata for extractor-job"""
page = self.request(self.root + self.url_title).text
data = self.parse_page(page, {
@@ -106,7 +91,16 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
)
return data
- def get_page_metadata(self, page):
+ def get_images(self, page):
+ while True:
+ next_url, image_url, image_data = self.get_image_metadata(page)
+ yield image_url, image_data
+
+ if not next_url:
+ return
+ page = self.request(next_url).text
+
+ def get_image_metadata(self, page):
"""Collect next url, image-url and metadata for one manga-page"""
extr = text.extract
width = None
@@ -122,7 +116,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
width , pos = extr(page, '
', "<")
pos = page.find(self.part, pos)
@@ -59,9 +40,11 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
"language": "English",
}
- @staticmethod
- def get_page_metadata(page):
- """Collect next url, image-url and metadata for one manga-page"""
- nurl, pos = text.extract(page, '\n
\n03}"]
filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
root = "http://readcomiconline.to"
+ useragent = "Wget/1.19.2 (linux-gnu)"
- def __init__(self, match):
- kissmanga.KissmangaExtractor.__init__(self, match)
- self.session.headers["User-Agent"] = "Wget/1.19.2 (linux-gnu)"
+ request = cloudflare.request_func
-class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
- kissmanga.KissmangaMangaExtractor):
+class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
"""Extractor for comics from readcomiconline.to"""
subcategory = "comic"
- pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to/"
- r"Comic/[^/?]+/?$"]
+ pattern = [r"(?i)(?:https?://)?(?:www\.)?(readcomiconline\.to"
+ r"/Comic/[^/?]+/?)$"]
test = [
("http://readcomiconline.to/Comic/W-i-t-c-h", {
"url": "c5a530538a30b176916e30cbe223a93d83cb2691",
@@ -42,6 +40,10 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
}),
]
+ def __init__(self, match):
+ MangaExtractor.__init__(self, match)
+ self.session.headers["User-Agent"] = self.useragent
+
def chapters(self, page):
results = []
comic, pos = text.extract(page, '', '<')
@@ -58,19 +60,21 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
return results
-class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor,
- kissmanga.KissmangaChapterExtractor):
+class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
subcategory = "issue"
- pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to/"
- r"Comic/[^/?]+/[^/?]+\?id=\d+"]
+ pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
+ r"/Comic/[^/?]+/[^/?]+\?id=\d+"]
test = [("http://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
"url": "a45c77f8fbde66091fe2346d6341f9cf3c6b1bc5",
"keyword": "dee8a8a44659825afe1d69e1d809a48b03e98c68",
})]
- def get_job_metadata(self, page):
- """Collect metadata for extractor-job"""
+ def __init__(self, match):
+ ChapterExtractor.__init__(self, match.group(0))
+ self.session.headers["User-Agent"] = self.useragent
+
+ def get_metadata(self, page):
comic, pos = text.extract(page, " - Read\r\n ", "\r\n")
iinfo, pos = text.extract(page, " ", "\r\n", pos)
match = re.match(r"(?:Issue )?#(\d+)|(.+)", iinfo)
@@ -82,6 +86,10 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor,
}
@staticmethod
- def get_image_urls(page):
- """Extract list of all image-urls for a manga chapter"""
- return list(text.extract_iter(page, 'lstImages.push("', '"'))
+ def get_images(page):
+ return [
+ (url, None)
+ for url in text.extract_iter(
+ page, 'lstImages.push("', '"'
+ )
+ ]
diff --git a/gallery_dl/extractor/spectrumnexus.py b/gallery_dl/extractor/spectrumnexus.py
index 2cfc1d99..93630196 100644
--- a/gallery_dl/extractor/spectrumnexus.py
+++ b/gallery_dl/extractor/spectrumnexus.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,7 +8,7 @@
"""Extract manga pages from http://www.thespectrum.net/manga_scans/"""
-from .common import MangaExtractor, AsynchronousExtractor, Message
+from .common import ChapterExtractor, MangaExtractor
from .. import text, util
@@ -33,55 +33,31 @@ class SpectrumnexusMangaExtractor(MangaExtractor):
return results
-class SpectrumnexusChapterExtractor(AsynchronousExtractor):
+class SpectrumnexusChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters or -volumes from thespectrum.net"""
category = "spectrumnexus"
- subcategory = "chapter"
- directory_fmt = ["{category}", "{manga}", "{identifier}"]
- filename_fmt = "{manga} {identifier} {page:>03}.{extension}"
- pattern = [
- (r"(?:https?://)?(view\.thespectrum\.net/series/"
- r"[^\.]+\.html)\?ch=(Chapter\+(\d+)|Volume\+(\d+))"),
- (r"(?:https?://)?(view\.thespectrum\.net/series/"
- r"[^/]+-chapter-(\d+)\.html)"),
- ]
+ directory_fmt = ["{category}", "{manga}", "{chapter_string}"]
+ filename_fmt = "{manga}_{chapter_string}_{page:>03}.{extension}"
+
+ pattern = [r"(?:https?://)?view\.thespectrum\.net/series/"
+ r"([^\.]+\.html)\?ch=(Chapter\+(\d+)|Volume\+(\d+))"]
test = [(("http://view.thespectrum.net/series/"
"toriko.html?ch=Chapter+343&page=1"), {
"url": "c0fc7dc594841217cc622a67edd79f06e9900333",
- "keyword": "3d0cb57b6b1c2cbecc7aed33f83c24891a4ff53f",
+ "keyword": "a8abe126cbc5fc798148b0b155242a470c1ba9d1",
})]
def __init__(self, match):
- AsynchronousExtractor.__init__(self)
- self.url = "http://" + match.group(1)
- self.identifier = match.group(2)
- self.chapter = match.group(3)
- self.volume = match.group(4)
+ path, self.chapter_string, self.chapter, self.volume = match.groups()
+ url = "http://view.thespectrum.net/series/{}?ch={}".format(
+ path, self.chapter_string)
+ ChapterExtractor.__init__(self, url)
- def items(self):
- params = {
- "ch": self.identifier,
- "page": 1,
- }
- page = self.request(self.url, params=params).text
- data = self.get_job_metadata(page)
- yield Message.Version, 1
- yield Message.Directory, data.copy()
- for i in range(1, data["count"]+1):
- url = self.get_image_url(page)
- text.nameext_from_url(url, data)
- data["page"] = i
- yield Message.Url, url, data.copy()
- if i < data["count"]:
- params["page"] += 1
- page = self.request(self.url, params=params).text
-
- def get_job_metadata(self, page):
- """Collect metadata for extractor-job"""
+ def get_metadata(self, page):
data = {
"chapter": util.safe_int(self.chapter),
+ "chapter_string": self.chapter_string.replace("+", " "),
"volume": util.safe_int(self.volume),
- "identifier": self.identifier.replace("+", " "),
}
data = text.extract_all(page, (
('manga', '', ' · SPECTRUM NEXUS '),
@@ -90,7 +66,9 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
data["count"] = util.safe_int(data["count"])
return data
- @staticmethod
- def get_image_url(page):
- """Extract url of one manga page"""
- return text.extract(page, '