add manga extractors to all foolslide-based modules

This commit is contained in:
Mike Fährmann
2017-04-11 21:03:40 +02:00
parent bd95fea82c
commit c9a5650cf8
11 changed files with 152 additions and 22 deletions

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://kobato.hologfx.com/"""
"""Extractors for https://kobato.hologfx.com/"""
from . import foolslide
@@ -14,8 +14,18 @@ from . import foolslide
class DokireaderChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from kobato.hologfx.com"""
category = "dokireader"
pattern = foolslide.chapter_pattern("kobato\.hologfx\.com/reader")
pattern = foolslide.chapter_pattern(r"kobato\.hologfx\.com/reader")
test = [(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "f28811c01b64031671108a4a3d6eea1040816b82",
})]
class DokireaderMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from kobato.hologfx.com"""
category = "dokireader"
pattern = foolslide.manga_pattern(r"kobato\.hologfx\.com/reader")
test = [(("https://kobato.hologfx.com/reader/series/"
"boku_ha_ohimesama_ni_narenai/"), {
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Base classes for extractors for FoolSlide based sites"""
"""Base classes for extractors for FoOlSlide based sites"""
from .common import Extractor, Message
from .. import text, util
@@ -21,13 +21,21 @@ CHAPTER_RE = (
r"(?:/(?P<chapter_minor>\d+))?)"
)
MANGA_RE = (
r"/series/[^/]+/?$)"
)
def chapter_pattern(domain_re):
return [r"(?:https?://)?(" + domain_re + CHAPTER_RE]
def manga_pattern(domain_re):
return [r"(?:https?://)?(" + domain_re + MANGA_RE]
class FoolslideChapterExtractor(Extractor):
"""Base class for chapter extractors on foolslide based sites"""
"""Base class for chapter extractors for FoOlSlide based sites"""
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "{chapter_string}"]
filename_fmt = "{manga}_{chapter:>03}_{page:>03}.{extension}"
@@ -42,7 +50,7 @@ class FoolslideChapterExtractor(Extractor):
def items(self):
page = self.request(self.url, encoding="utf-8",
method="post", data={"adult": "true"}).text
data = self.get_job_metadata(page)
data = self.get_metadata(page)
imgs = self.get_images(page)
data["count"] = len(imgs)
@@ -61,7 +69,7 @@ class FoolslideChapterExtractor(Extractor):
text.nameext_from_url(data["filename"], data)
yield Message.Url, url, data
def get_job_metadata(self, page):
def get_metadata(self, page):
"""Collect metadata for extractor-job"""
_ , pos = text.extract(page, '<h1 class="tbtitle dnone">', '')
manga , pos = text.extract(page, 'title="', '"', pos)
@@ -86,3 +94,26 @@ class FoolslideChapterExtractor(Extractor):
pos = page.find("[{")
needle = " = "
return json.loads(text.extract(page, needle, ";", pos)[0])
class FoolslideMangaExtractor(Extractor):
"""Base class for manga extractors for FoOlSlide based sites"""
subcategory = "manga"
scheme = "https"
def __init__(self, match, url=None):
Extractor.__init__(self)
self.url = url or self.scheme + "://" + match.group(1)
def items(self):
yield Message.Version, 1
for url in self.chapters():
yield Message.Queue, url
def chapters(self):
"""Return a list of all chapter urls"""
page = self.request(self.url, encoding="utf-8",
method="post", data={"adult": "true"}).text
return reversed(list(text.extract_iter(
page, '<div class="title"><a href="', '"'
)))

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://gomanga.co/"""
"""Extractors for https://gomanga.co/"""
from . import foolslide
@@ -26,3 +26,13 @@ class GomangaChapterExtractor(foolslide.FoolslideChapterExtractor):
}),
]
single = False
class GomangaMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from gomanga.co"""
category = "gomanga"
pattern = foolslide.manga_pattern(r"(?:www\.)?gomanga\.co/reader")
test = [("https://gomanga.co/reader/series/pastel/", {
"url": "bd1c82d70838d54140a8209296e789f27ceab7cd",
})]
single = False

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://jaiminisbox.com/"""
"""Extractors for https://jaiminisbox.com/"""
from . import foolslide
@@ -14,8 +14,17 @@ from . import foolslide
class JaiminisboxChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from jaiminisbox.com"""
category = "jaiminisbox"
pattern = foolslide.chapter_pattern(r"(?:www\.)?jaiminisbox.com/reader")
pattern = foolslide.chapter_pattern(r"(?:www\.)?jaiminisbox\.com/reader")
test = [("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", {
"url": "f021de7f31ee3a3f688fdf3e8183aef4226c2b50",
"keyword": "d187df3e3b6dbe09ec163626f6fd7c57133ab163",
})]
class JaiminisboxMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from jaiminisbox.com"""
category = "jaiminisbox"
pattern = foolslide.manga_pattern(r"(?:www\.)?jaiminisbox\.com/reader")
test = [("https://jaiminisbox.com/reader/series/sora_no_kian/", {
"url": "66612be177dc3b3fa1d1f537ef02f4f701b163ea",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://reader.kireicake.com/"""
"""Extractors for https://reader.kireicake.com/"""
from . import foolslide
@@ -14,8 +14,17 @@ from . import foolslide
class KireicakeChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from reader.kireicake.com"""
category = "kireicake"
pattern = foolslide.chapter_pattern("reader\.kireicake\.com")
pattern = foolslide.chapter_pattern(r"reader\.kireicake\.com")
test = [("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "17d04e3bb24f6ad593463ecb7f90667f0df5326f",
})]
class KireicakeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from reader.kireicake.com"""
category = "kireicake"
pattern = foolslide.manga_pattern(r"reader\.kireicake\.com")
test = [("https://reader.kireicake.com/series/wonderland/", {
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from http://powermanga.org/"""
"""Extractors for http://powermanga.org/"""
from . import foolslide
@@ -19,3 +19,12 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "e6179c1565068f99180620281f86bdd25be166b4",
"keyword": "203ea5d0ef7759f4517316f0678f3592fc27cdbe",
})]
class PowermangaMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from powermanga.org"""
category = "powermanga"
pattern = foolslide.manga_pattern(r"read\.powermanga\.org")
test = [("http://read.powermanga.org/series/my_hero_academia/", {
"url": "3c7004eea7eefc8d365af3ec95ba98f8cc359553",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://reader.seaotterscans.com/"""
"""Extractors for https://reader.seaotterscans.com/"""
from . import foolslide
@@ -19,3 +19,12 @@ class SeaotterscansChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "63d46b8883cc652dfe8bd5be4492160dd31f06a8",
"keyword": "4d92576e23ee2a5058fd150690230091ee091868",
})]
class SeaotterscansMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from reader.seaotterscans.com"""
category = "seaotterscans"
pattern = foolslide.manga_pattern("reader\.seaotterscans\.com")
test = [("https://reader.seaotterscans.com/series/marry_me/", {
"url": "fdbacabfa566a6baeb3f01bb46cbda0577bd4bbe",
})]

View File

@@ -6,14 +6,23 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from http://sensescans.com/"""
"""Extractors for http://sensescans.com/"""
from . import foolslide
class SensescansChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from sensescans.com"""
class SensescansExtractor():
"""Base class for extractors for sensescans.com"""
category = "sensescans"
def __init__(self, match):
url = "http://sensescans.com/reader" + match.group(1)
super().__init__(match, url)
class SensescansChapterExtractor(SensescansExtractor,
foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from sensescans.com"""
pattern = [(r"(?:https?://)?(?:www\.|reader\.)?sensescans\.com"
r"(?:/reader)?(" + foolslide.CHAPTER_RE)]
test = [
@@ -29,6 +38,12 @@ class SensescansChapterExtractor(foolslide.FoolslideChapterExtractor):
}),
]
def __init__(self, match):
url = "http://sensescans.com/reader" + match.group(1)
super().__init__(match, url)
class SensescansMangaExtractor(SensescansExtractor,
foolslide.FoolslideMangaExtractor):
"""Extractor for manga from sensescans.com"""
pattern = [(r"(?:https?://)?(?:www\.|reader\.)?sensescans\.com"
r"(?:/reader)?(" + foolslide.MANGA_RE)]
test = [("http://sensescans.com/reader/series/hakkenden/", {
"url": "2360ccb0ead0ff2f5e27b7aef7eb17b9329de2f2",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from http://www.slide.world-three.org/"""
"""Extractors for http://www.slide.world-three.org/"""
from . import foolslide
@@ -28,3 +28,13 @@ class WorldthreeChapterExtractor(foolslide.FoolslideChapterExtractor):
}),
]
scheme = "http"
class WorldthreeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from slide.world-three.org"""
category = "worldthree"
pattern = foolslide.manga_pattern("(?:www\.)?slide\.world-three\.org")
test = [("http://www.slide.world-three.org/series/black_bullet/", {
"url": "5743b93512d26e6b540d90a7a5d69208b6d4a738",
})]
scheme = "http"

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://yomanga.co/"""
"""Extractors for https://yomanga.co/"""
from . import foolslide
@@ -19,3 +19,12 @@ class YomangaChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "4b5d8fc5902f03647cc876cf6643849e5bc05455",
})]
single = False
class YomangaMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from yomanga.co"""
category = "yomanga"
pattern = foolslide.manga_pattern(r"(?:www\.)?yomanga\.co/reader")
test = [("https://yomanga.co/reader/series/6_weapons/", {
"url": "19a4828d3a06a4c89c885847c83af54ec1add0f7",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract manga-chapters from https://yonkouprod.com/"""
"""Extractors for https://yonkouprod.com/"""
from . import foolslide
@@ -19,3 +19,12 @@ class YonkouprodChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "7647850e2b1ad11c2baa9628755bf7f186350a0b",
"keyword": "dc1b5764c71e9d93b2d4b18547feb372cd76f730",
})]
class YonkouprodMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from yonkouprod.com"""
category = "yonkouprod"
pattern = foolslide.manga_pattern(r"(?:www\.)?yonkouprod\.com/reader")
test = [("https://yonkouprod.com/reader/series/attack-on-titan/", {
"url": "33bc7a08a6fbf41cf609bdd000d16893d55a3f29",
})]