add manga extractors to all foolslide-based modules

This commit is contained in:
Mike Fährmann
2017-04-11 21:03:40 +02:00
parent bd95fea82c
commit c9a5650cf8
11 changed files with 152 additions and 22 deletions

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://kobato.hologfx.com/""" """Extractors for https://kobato.hologfx.com/"""
from . import foolslide from . import foolslide
@@ -14,8 +14,18 @@ from . import foolslide
class DokireaderChapterExtractor(foolslide.FoolslideChapterExtractor): class DokireaderChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from kobato.hologfx.com""" """Extractor for manga-chapters from kobato.hologfx.com"""
category = "dokireader" category = "dokireader"
pattern = foolslide.chapter_pattern("kobato\.hologfx\.com/reader") pattern = foolslide.chapter_pattern(r"kobato\.hologfx\.com/reader")
test = [(("https://kobato.hologfx.com/reader/read/" test = [(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), { "hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "f28811c01b64031671108a4a3d6eea1040816b82", "keyword": "f28811c01b64031671108a4a3d6eea1040816b82",
})] })]
class DokireaderMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from kobato.hologfx.com"""
category = "dokireader"
pattern = foolslide.manga_pattern(r"kobato\.hologfx\.com/reader")
test = [(("https://kobato.hologfx.com/reader/series/"
"boku_ha_ohimesama_ni_narenai/"), {
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Base classes for extractors for FoolSlide based sites""" """Base classes for extractors for FoOlSlide based sites"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util from .. import text, util
@@ -21,13 +21,21 @@ CHAPTER_RE = (
r"(?:/(?P<chapter_minor>\d+))?)" r"(?:/(?P<chapter_minor>\d+))?)"
) )
MANGA_RE = (
r"/series/[^/]+/?$)"
)
def chapter_pattern(domain_re): def chapter_pattern(domain_re):
return [r"(?:https?://)?(" + domain_re + CHAPTER_RE] return [r"(?:https?://)?(" + domain_re + CHAPTER_RE]
def manga_pattern(domain_re):
return [r"(?:https?://)?(" + domain_re + MANGA_RE]
class FoolslideChapterExtractor(Extractor): class FoolslideChapterExtractor(Extractor):
"""Base class for chapter extractors on foolslide based sites""" """Base class for chapter extractors for FoOlSlide based sites"""
subcategory = "chapter" subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "{chapter_string}"] directory_fmt = ["{category}", "{manga}", "{chapter_string}"]
filename_fmt = "{manga}_{chapter:>03}_{page:>03}.{extension}" filename_fmt = "{manga}_{chapter:>03}_{page:>03}.{extension}"
@@ -42,7 +50,7 @@ class FoolslideChapterExtractor(Extractor):
def items(self): def items(self):
page = self.request(self.url, encoding="utf-8", page = self.request(self.url, encoding="utf-8",
method="post", data={"adult": "true"}).text method="post", data={"adult": "true"}).text
data = self.get_job_metadata(page) data = self.get_metadata(page)
imgs = self.get_images(page) imgs = self.get_images(page)
data["count"] = len(imgs) data["count"] = len(imgs)
@@ -61,7 +69,7 @@ class FoolslideChapterExtractor(Extractor):
text.nameext_from_url(data["filename"], data) text.nameext_from_url(data["filename"], data)
yield Message.Url, url, data yield Message.Url, url, data
def get_job_metadata(self, page): def get_metadata(self, page):
"""Collect metadata for extractor-job""" """Collect metadata for extractor-job"""
_ , pos = text.extract(page, '<h1 class="tbtitle dnone">', '') _ , pos = text.extract(page, '<h1 class="tbtitle dnone">', '')
manga , pos = text.extract(page, 'title="', '"', pos) manga , pos = text.extract(page, 'title="', '"', pos)
@@ -86,3 +94,26 @@ class FoolslideChapterExtractor(Extractor):
pos = page.find("[{") pos = page.find("[{")
needle = " = " needle = " = "
return json.loads(text.extract(page, needle, ";", pos)[0]) return json.loads(text.extract(page, needle, ";", pos)[0])
class FoolslideMangaExtractor(Extractor):
"""Base class for manga extractors for FoOlSlide based sites"""
subcategory = "manga"
scheme = "https"
def __init__(self, match, url=None):
Extractor.__init__(self)
self.url = url or self.scheme + "://" + match.group(1)
def items(self):
yield Message.Version, 1
for url in self.chapters():
yield Message.Queue, url
def chapters(self):
"""Return a list of all chapter urls"""
page = self.request(self.url, encoding="utf-8",
method="post", data={"adult": "true"}).text
return reversed(list(text.extract_iter(
page, '<div class="title"><a href="', '"'
)))

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://gomanga.co/""" """Extractors for https://gomanga.co/"""
from . import foolslide from . import foolslide
@@ -26,3 +26,13 @@ class GomangaChapterExtractor(foolslide.FoolslideChapterExtractor):
}), }),
] ]
single = False single = False
class GomangaMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from gomanga.co"""
category = "gomanga"
pattern = foolslide.manga_pattern(r"(?:www\.)?gomanga\.co/reader")
test = [("https://gomanga.co/reader/series/pastel/", {
"url": "bd1c82d70838d54140a8209296e789f27ceab7cd",
})]
single = False

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://jaiminisbox.com/""" """Extractors for https://jaiminisbox.com/"""
from . import foolslide from . import foolslide
@@ -14,8 +14,17 @@ from . import foolslide
class JaiminisboxChapterExtractor(foolslide.FoolslideChapterExtractor): class JaiminisboxChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from jaiminisbox.com""" """Extractor for manga-chapters from jaiminisbox.com"""
category = "jaiminisbox" category = "jaiminisbox"
pattern = foolslide.chapter_pattern(r"(?:www\.)?jaiminisbox.com/reader") pattern = foolslide.chapter_pattern(r"(?:www\.)?jaiminisbox\.com/reader")
test = [("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", { test = [("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", {
"url": "f021de7f31ee3a3f688fdf3e8183aef4226c2b50", "url": "f021de7f31ee3a3f688fdf3e8183aef4226c2b50",
"keyword": "d187df3e3b6dbe09ec163626f6fd7c57133ab163", "keyword": "d187df3e3b6dbe09ec163626f6fd7c57133ab163",
})] })]
class JaiminisboxMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from jaiminisbox.com"""
category = "jaiminisbox"
pattern = foolslide.manga_pattern(r"(?:www\.)?jaiminisbox\.com/reader")
test = [("https://jaiminisbox.com/reader/series/sora_no_kian/", {
"url": "66612be177dc3b3fa1d1f537ef02f4f701b163ea",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://reader.kireicake.com/""" """Extractors for https://reader.kireicake.com/"""
from . import foolslide from . import foolslide
@@ -14,8 +14,17 @@ from . import foolslide
class KireicakeChapterExtractor(foolslide.FoolslideChapterExtractor): class KireicakeChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from reader.kireicake.com""" """Extractor for manga-chapters from reader.kireicake.com"""
category = "kireicake" category = "kireicake"
pattern = foolslide.chapter_pattern("reader\.kireicake\.com") pattern = foolslide.chapter_pattern(r"reader\.kireicake\.com")
test = [("https://reader.kireicake.com/read/wonderland/en/1/1/", { test = [("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e", "url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "17d04e3bb24f6ad593463ecb7f90667f0df5326f", "keyword": "17d04e3bb24f6ad593463ecb7f90667f0df5326f",
})] })]
class KireicakeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from reader.kireicake.com"""
category = "kireicake"
pattern = foolslide.manga_pattern(r"reader\.kireicake\.com")
test = [("https://reader.kireicake.com/series/wonderland/", {
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from http://powermanga.org/""" """Extractors for http://powermanga.org/"""
from . import foolslide from . import foolslide
@@ -19,3 +19,12 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "e6179c1565068f99180620281f86bdd25be166b4", "url": "e6179c1565068f99180620281f86bdd25be166b4",
"keyword": "203ea5d0ef7759f4517316f0678f3592fc27cdbe", "keyword": "203ea5d0ef7759f4517316f0678f3592fc27cdbe",
})] })]
class PowermangaMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from powermanga.org"""
category = "powermanga"
pattern = foolslide.manga_pattern(r"read\.powermanga\.org")
test = [("http://read.powermanga.org/series/my_hero_academia/", {
"url": "3c7004eea7eefc8d365af3ec95ba98f8cc359553",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://reader.seaotterscans.com/""" """Extractors for https://reader.seaotterscans.com/"""
from . import foolslide from . import foolslide
@@ -19,3 +19,12 @@ class SeaotterscansChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "63d46b8883cc652dfe8bd5be4492160dd31f06a8", "url": "63d46b8883cc652dfe8bd5be4492160dd31f06a8",
"keyword": "4d92576e23ee2a5058fd150690230091ee091868", "keyword": "4d92576e23ee2a5058fd150690230091ee091868",
})] })]
class SeaotterscansMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from reader.seaotterscans.com"""
category = "seaotterscans"
pattern = foolslide.manga_pattern("reader\.seaotterscans\.com")
test = [("https://reader.seaotterscans.com/series/marry_me/", {
"url": "fdbacabfa566a6baeb3f01bb46cbda0577bd4bbe",
})]

View File

@@ -6,14 +6,23 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from http://sensescans.com/""" """Extractors for http://sensescans.com/"""
from . import foolslide from . import foolslide
class SensescansChapterExtractor(foolslide.FoolslideChapterExtractor): class SensescansExtractor():
"""Extractor for manga-chapters from sensescans.com""" """Base class for extractors for sensescans.com"""
category = "sensescans" category = "sensescans"
def __init__(self, match):
url = "http://sensescans.com/reader" + match.group(1)
super().__init__(match, url)
class SensescansChapterExtractor(SensescansExtractor,
foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from sensescans.com"""
pattern = [(r"(?:https?://)?(?:www\.|reader\.)?sensescans\.com" pattern = [(r"(?:https?://)?(?:www\.|reader\.)?sensescans\.com"
r"(?:/reader)?(" + foolslide.CHAPTER_RE)] r"(?:/reader)?(" + foolslide.CHAPTER_RE)]
test = [ test = [
@@ -29,6 +38,12 @@ class SensescansChapterExtractor(foolslide.FoolslideChapterExtractor):
}), }),
] ]
def __init__(self, match):
url = "http://sensescans.com/reader" + match.group(1) class SensescansMangaExtractor(SensescansExtractor,
super().__init__(match, url) foolslide.FoolslideMangaExtractor):
"""Extractor for manga from sensescans.com"""
pattern = [(r"(?:https?://)?(?:www\.|reader\.)?sensescans\.com"
r"(?:/reader)?(" + foolslide.MANGA_RE)]
test = [("http://sensescans.com/reader/series/hakkenden/", {
"url": "2360ccb0ead0ff2f5e27b7aef7eb17b9329de2f2",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from http://www.slide.world-three.org/""" """Extractors for http://www.slide.world-three.org/"""
from . import foolslide from . import foolslide
@@ -28,3 +28,13 @@ class WorldthreeChapterExtractor(foolslide.FoolslideChapterExtractor):
}), }),
] ]
scheme = "http" scheme = "http"
class WorldthreeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from slide.world-three.org"""
category = "worldthree"
pattern = foolslide.manga_pattern("(?:www\.)?slide\.world-three\.org")
test = [("http://www.slide.world-three.org/series/black_bullet/", {
"url": "5743b93512d26e6b540d90a7a5d69208b6d4a738",
})]
scheme = "http"

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://yomanga.co/""" """Extractors for https://yomanga.co/"""
from . import foolslide from . import foolslide
@@ -19,3 +19,12 @@ class YomangaChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "4b5d8fc5902f03647cc876cf6643849e5bc05455", "url": "4b5d8fc5902f03647cc876cf6643849e5bc05455",
})] })]
single = False single = False
class YomangaMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from yomanga.co"""
category = "yomanga"
pattern = foolslide.manga_pattern(r"(?:www\.)?yomanga\.co/reader")
test = [("https://yomanga.co/reader/series/6_weapons/", {
"url": "19a4828d3a06a4c89c885847c83af54ec1add0f7",
})]

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from https://yonkouprod.com/""" """Extractors for https://yonkouprod.com/"""
from . import foolslide from . import foolslide
@@ -19,3 +19,12 @@ class YonkouprodChapterExtractor(foolslide.FoolslideChapterExtractor):
"url": "7647850e2b1ad11c2baa9628755bf7f186350a0b", "url": "7647850e2b1ad11c2baa9628755bf7f186350a0b",
"keyword": "dc1b5764c71e9d93b2d4b18547feb372cd76f730", "keyword": "dc1b5764c71e9d93b2d4b18547feb372cd76f730",
})] })]
class YonkouprodMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from yonkouprod.com"""
category = "yonkouprod"
pattern = foolslide.manga_pattern(r"(?:www\.)?yonkouprod\.com/reader")
test = [("https://yonkouprod.com/reader/series/attack-on-titan/", {
"url": "33bc7a08a6fbf41cf609bdd000d16893d55a3f29",
})]