simplify code by using a MangaExtractor base class

This commit is contained in:
Mike Fährmann
2017-05-20 11:27:43 +02:00
parent 2974d782a3
commit f226417420
15 changed files with 134 additions and 216 deletions

View File

@@ -8,15 +8,16 @@
"""Extract manga chapters from https://bato.to/""" """Extract manga chapters from https://bato.to/"""
from .common import Extractor, AsynchronousExtractor, Message from .common import MangaExtractor, AsynchronousExtractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import re import re
class BatotoExtractor(Extractor): class BatotoExtractor():
"""Base class for batoto extractors""" """Base class for batoto extractors"""
category = "batoto" category = "batoto"
scheme = "https"
root = "https://bato.to" root = "https://bato.to"
def login(self): def login(self):
@@ -56,34 +57,19 @@ class BatotoExtractor(Extractor):
return {c: response.cookies[c] for c in ("member_id", "pass_hash")} return {c: response.cookies[c] for c in ("member_id", "pass_hash")}
class BatotoMangaExtractor(BatotoExtractor): class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
"""Extractor for mangas from bato.to""" """Extractor for manga from bato.to"""
subcategory = "manga" pattern = [r"(?:https?://)?(?:www\.)?(bato\.to/comic/_/comics/.*-r\d+)"]
pattern = [r"(?:https?://)?(?:www\.)?bato\.to/comic/_/comics/.*-r\d+"]
test = [("http://bato.to/comic/_/comics/aria-r2007", { test = [("http://bato.to/comic/_/comics/aria-r2007", {
"url": "a38585b0339587666d772ee06f2a60abdbf42a97", "url": "a38585b0339587666d772ee06f2a60abdbf42a97",
})] })]
def __init__(self, match): def chapters(self, page):
BatotoExtractor.__init__(self)
self.url = match.group(0)
def items(self):
self.login()
yield Message.Version, 1
for chapter in self.get_chapters():
yield Message.Queue, chapter
def get_chapters(self):
"""Return a list of all chapter urls"""
# TODO: filter by language / translator # TODO: filter by language / translator
needle = ('<td style="border-top:0;">\n ' needle = ('<td style="border-top:0;">\n '
'<a href="http://bato.to/reader#') '<a href="http://bato.to/reader#')
page = self.request(self.url).text return [self.root + "/reader#" + mangahash
return reversed([ for mangahash in text.extract_iter(page, needle, '"')]
self.root + "/reader#" + mangahash
for mangahash in text.extract_iter(page, needle, '"')
])
class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor): class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):

View File

@@ -81,6 +81,40 @@ class AsynchronousExtractor(Extractor):
put(None) put(None)
class MangaExtractor(Extractor):
subcategory = "manga"
scheme = "http"
root = ""
reverse = True
def __init__(self, match, url=None):
Extractor.__init__(self)
self.url = url or self.scheme + "://" + match.group(1)
def items(self):
self.login()
page = self.request(self.url).text
chapters = self.chapters(page)
if self.reverse:
chapters.reverse()
yield Message.Version, 1
for chapter in chapters:
yield Message.Queue, chapter
def login(self):
"""Login and set necessary cookies"""
def chapters(self, page):
"""Return a list of all chapter urls"""
return [self.root + path for path in self.chapter_paths(page)]
def chapter_paths(self, page):
return []
def safe_request(session, url, method="GET", *args, **kwargs): def safe_request(session, url, method="GET", *args, **kwargs):
tries = 0 tries = 0
while True: while True:

View File

@@ -8,7 +8,7 @@
"""Base classes for extractors for FoOlSlide based sites""" """Base classes for extractors for FoOlSlide based sites"""
from .common import Extractor, Message from .common import Extractor, MangaExtractor, Message
from .. import text, util from .. import text, util
import json import json
@@ -96,24 +96,17 @@ class FoolslideChapterExtractor(Extractor):
return json.loads(text.extract(page, needle, ";", pos)[0]) return json.loads(text.extract(page, needle, ";", pos)[0])
class FoolslideMangaExtractor(Extractor): class FoolslideMangaExtractor(MangaExtractor):
"""Base class for manga extractors for FoOlSlide based sites""" """Base class for manga extractors for FoOlSlide based sites"""
subcategory = "manga"
scheme = "https" scheme = "https"
def __init__(self, match, url=None): def request(self, url):
Extractor.__init__(self) return MangaExtractor.request(
self.url = url or self.scheme + "://" + match.group(1) self, url, encoding="utf-8", method="post", data={"adult": "true"}
)
def items(self): def chapters(self, page):
yield Message.Version, 1
for url in self.chapters():
yield Message.Queue, url
def chapters(self):
"""Return a list of all chapter urls""" """Return a list of all chapter urls"""
page = self.request(self.url, encoding="utf-8", return list(text.extract_iter(
method="post", data={"adult": "true"}).text
return reversed(list(text.extract_iter(
page, '<div class="title"><a href="', '"' page, '<div class="title"><a href="', '"'
))) ))

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015,2016 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -8,32 +8,21 @@
"""Extract images from http://www.hbrowse.com/""" """Extract images from http://www.hbrowse.com/"""
from .common import Extractor, Message from .common import Extractor, MangaExtractor, Message
from .. import text from .. import text
import json import json
class HbrowseMangaExtractor(Extractor): class HbrowseMangaExtractor(MangaExtractor):
"""Extractor for mangas from hbrowse.com""" """Extractor for manga from hbrowse.com"""
category = "hbrowse" category = "hbrowse"
subcategory = "manga" pattern = [r"(?:https?://)?((?:www\.)?hbrowse\.com/\d+)/?$"]
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/?$"] reverse = False
test = [("http://www.hbrowse.com/10363", { test = [("http://www.hbrowse.com/10363", {
"url": "4d9def5df21c23f8c3d36de2076c189c02ea43bd", "url": "4d9def5df21c23f8c3d36de2076c189c02ea43bd",
})] })]
def __init__(self, match): def chapters(self, page):
Extractor.__init__(self)
self.gid = match.group(1)
def items(self):
yield Message.Version, 1
for url in self.get_chapters():
yield Message.Queue, url
def get_chapters(self):
"""Return a list of all chapter urls"""
page = self.request("http://www.hbrowse.com/" + self.gid).text
needle = '<td class="listMiddle">\n<a class="listLink" href="' needle = '<td class="listMiddle">\n<a class="listLink" href="'
return list(text.extract_iter(page, needle, '"')) return list(text.extract_iter(page, needle, '"'))

View File

@@ -1,23 +1,24 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann # Copyright 2016-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract hentaimanga from https://hentai2read.com/""" """Extract hentai-manga from https://hentai2read.com/"""
from .common import MangaExtractor
from .. import text from .. import text
from . import hentaicdn from . import hentaicdn
import re import re
import json import json
class Hentai2readMangaExtractor(hentaicdn.HentaicdnMangaExtractor): class Hentai2readMangaExtractor(MangaExtractor):
"""Extractor for mangas from hentai2read.com""" """Extractor for hmanga from hentai2read.com"""
category = "hentai2read" category = "hentai2read"
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/?$"] pattern = [r"(?:https?://)?(?:www\.)?(hentai2read\.com/[^/]+/?)$"]
test = [ test = [
("http://hentai2read.com/amazon_elixir/", { ("http://hentai2read.com/amazon_elixir/", {
"url": "d1f87b71d3c97b49a478cdfb6ae96b2d9520ab78", "url": "d1f87b71d3c97b49a478cdfb6ae96b2d9520ab78",
@@ -27,16 +28,11 @@ class Hentai2readMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
}) })
] ]
def __init__(self, match): def chapters(self, page):
hentaicdn.HentaicdnMangaExtractor.__init__(self)
self.url_title = match.group(1)
def get_chapters(self):
page = text.extract( page = text.extract(
self.request("http://hentai2read.com/" + self.url_title).text, page, '<ul class="nav-chapters remove-margin-b">', '</ul>\n</div>'
'<ul class="nav-chapters remove-margin-b">', '</ul>\n</div>'
)[0] )[0]
return text.extract_iter(page, '<li>\n<a href="', '"') return list(text.extract_iter(page, '<li>\n<a href="', '"'))
class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor): class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann # Copyright 2016-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -13,19 +13,6 @@ from .. import text
import json import json
class HentaicdnMangaExtractor(Extractor):
"""Base class for extractors for mangas"""
subcategory = "manga"
def items(self):
yield Message.Version, 1
for chapter in reversed(list(self.get_chapters())):
yield Message.Queue, chapter
def get_chapters(self):
"""Return a list of all chapter urls"""
class HentaicdnChapterExtractor(Extractor): class HentaicdnChapterExtractor(Extractor):
"""Base class for extractors for a single manga chapter""" """Base class for extractors for a single manga chapter"""
subcategory = "chapter" subcategory = "chapter"

View File

@@ -6,17 +6,19 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract hentaimanga from https://hentaihere.com/""" """Extract hentai-manga from https://hentaihere.com/"""
from .common import MangaExtractor
from .. import text from .. import text
from . import hentaicdn from . import hentaicdn
import re import re
class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor): class HentaihereMangaExtractor(MangaExtractor):
"""Extractor for mangas from hentaihere.com""" """Extractor for hmanga from hentaihere.com"""
category = "hentaihere" category = "hentaihere"
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/?$"] pattern = [r"(?:https?://)?(?:www\.)?(hentaihere\.com/m/S\d+)/?$"]
scheme = "https"
test = [ test = [
("https://hentaihere.com/m/S13812", { ("https://hentaihere.com/m/S13812", {
"url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559", "url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
@@ -26,15 +28,10 @@ class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor):
}), }),
] ]
def __init__(self, match): def chapters(self, page):
hentaicdn.HentaicdnMangaExtractor.__init__(self) return list(text.extract_iter(
self.gid = match.group(1) page, '<li class="sub-chp clearfix">\n<a href="', '"'
))
def get_chapters(self):
return text.extract_iter(
self.request("https://hentaihere.com/m/S" + self.gid).text,
'<li class="sub-chp clearfix">\n<a href="', '"'
)
class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor): class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann # Copyright 2016-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -29,13 +29,9 @@ class KisscomicComicExtractor(KisscomicExtractor,
"url": "8c180e2ec2492712b089ca091c54909cb0fe3d4a", "url": "8c180e2ec2492712b089ca091c54909cb0fe3d4a",
})] })]
def get_chapters(self): def chapter_paths(self, page):
"""Return a list of all chapter urls"""
page = self.request(self.url).text
pos = page.find('<div class="list-chapter mCustomScrollbar">') pos = page.find('<div class="list-chapter mCustomScrollbar">')
return reversed(list( return text.extract_iter(page, '<li><a href="', '"', pos)
text.extract_iter(page, '<li><a href="', '"', pos)
))
class KisscomicIssueExtractor(KisscomicExtractor, class KisscomicIssueExtractor(KisscomicExtractor,

View File

@@ -8,7 +8,7 @@
"""Extract manga-chapters and entire manga from http://kissmanga.com/""" """Extract manga-chapters and entire manga from http://kissmanga.com/"""
from .common import Extractor, Message from .common import Extractor, MangaExtractor, Message
from .. import text, cloudflare, aes from .. import text, cloudflare, aes
from ..cache import cache from ..cache import cache
import re import re
@@ -38,25 +38,15 @@ class KissmangaExtractor(Extractor):
request = cloudflare.request_func request = cloudflare.request_func
class KissmangaMangaExtractor(KissmangaExtractor): class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
"""Extractor for mangas from kissmanga.com""" """Extractor for manga from kissmanga.com"""
subcategory = "manga"
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/[^/]+/?$"] pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/[^/]+/?$"]
test = [("http://kissmanga.com/Manga/Dropout", { test = [("http://kissmanga.com/Manga/Dropout", {
"url": "992befdd64e178fe5af67de53f8b510860d968ca", "url": "992befdd64e178fe5af67de53f8b510860d968ca",
})] })]
def items(self): def chapter_paths(self, page):
yield Message.Version, 1 return text.extract_iter(page, '<td>\n<a href="', '"')
for chapter in self.get_chapters():
yield Message.Queue, self.root + chapter
def get_chapters(self):
"""Return a list of all chapter urls"""
page = self.request(self.url).text
return reversed(list(
text.extract_iter(page, '<td>\n<a href="', '"')
))
class KissmangaChapterExtractor(KissmangaExtractor): class KissmangaChapterExtractor(KissmangaExtractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -8,36 +8,24 @@
"""Extract manga-chapters and entire manga from http://www.mangahere.co/""" """Extract manga-chapters and entire manga from http://www.mangahere.co/"""
from .common import Extractor, AsynchronousExtractor, Message from .common import MangaExtractor, AsynchronousExtractor, Message
from .. import text from .. import text
import re import re
class MangahereMangaExtractor(Extractor): class MangahereMangaExtractor(MangaExtractor):
"""Extractor for mangas from mangahere.co""" """Extractor for manga from mangahere.co"""
category = "mangahere" category = "mangahere"
subcategory = "manga" pattern = [r"(?:https?://)?((?:www\.)?mangahere\.co/manga/[^/]+/?)$"]
pattern = [r"(?:https?://)?(?:www\.)?mangahere\.co/manga/([^/]+)/?$"]
test = [("http://www.mangahere.co/manga/aria/", { test = [("http://www.mangahere.co/manga/aria/", {
"url": "77d96842292a6a341e8937816ed45cc09b538cf0", "url": "77d96842292a6a341e8937816ed45cc09b538cf0",
})] })]
def __init__(self, match): def chapters(self, page):
Extractor.__init__(self) return list(text.extract_iter(
self.url = match.group(0) + "/"
def items(self):
yield Message.Version, 1
for chapter in self.get_chapters():
yield Message.Queue, chapter
def get_chapters(self):
"""Return a list of all chapter urls"""
page = self.request(self.url).text
return reversed(list(text.extract_iter(
page, '<a class="color_0077" href="', '"', page, '<a class="color_0077" href="', '"',
page.index('<div class="detail_list">') page.index('<div class="detail_list">')
))) ))
class MangahereChapterExtractor(AsynchronousExtractor): class MangahereChapterExtractor(AsynchronousExtractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -14,13 +14,12 @@ from .mangareader import MangareaderMangaExtractor, MangareaderChapterExtractor
class MangapandaBase(): class MangapandaBase():
"""Base class for mangapanda extractors""" """Base class for mangapanda extractors"""
category = "mangapanda" category = "mangapanda"
url_base = "http://www.mangapanda.com" root = "http://www.mangapanda.com"
class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor): class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
"""Extractor for mangas from mangapanda.com""" """Extractor for manga from mangapanda.com"""
subcategory = "manga" pattern = [r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/]+)$"]
pattern = [r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/]+)$"]
test = [("http://www.mangapanda.com/mushishi", { test = [("http://www.mangapanda.com/mushishi", {
"url": "50a1ba730b85426b904da256c80f68ba6a8a2566", "url": "50a1ba730b85426b904da256c80f68ba6a8a2566",
})] })]

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015, 2016 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -8,37 +8,23 @@
"""Extract manga-chapters and entire manga from http://mangapark.me/""" """Extract manga-chapters and entire manga from http://mangapark.me/"""
from .common import Extractor, Message from .common import Extractor, MangaExtractor, Message
from .. import text from .. import text
class MangaparkMangaExtractor(Extractor): class MangaparkMangaExtractor(MangaExtractor):
"""Extractor for mangas from mangapark.me""" """Extractor for manga from mangapark.me"""
category = "mangapark" category = "mangapark"
subcategory = "manga" pattern = [r"(?:https?://)?(?:www\.)?(mangapark\.me/manga/[^/]+)$"]
pattern = [r"(?:https?://)?(?:www\.)?mangapark\.me/manga/([^/]+)$"] root = "http://mangapark.me"
test = [("http://mangapark.me/manga/mushishi", { test = [("http://mangapark.me/manga/mushishi", {
"url": "9902e342af71af19a5ac20fcd01950b165acf119", "url": "9902e342af71af19a5ac20fcd01950b165acf119",
})] })]
url_base = "http://mangapark.me"
def __init__(self, match): def chapter_paths(self, page):
Extractor.__init__(self)
self.url_title = match.group(1)
def items(self):
yield Message.Version, 1
for chapter in self.get_chapters():
yield Message.Queue, self.url_base + chapter
def get_chapters(self):
"""Return a list of all chapter urls"""
page = self.request(self.url_base + "/manga/" + self.url_title).text
needle = '<a class="ch sts sts_1" target="_blank" href="' needle = '<a class="ch sts sts_1" target="_blank" href="'
pos = page.index('<div id="list" class="book-list">') pos = page.index('<div id="list" class="book-list">')
return reversed(list( return text.extract_iter(page, needle, '"', pos)
text.extract_iter(page, needle, '"', pos)
))
class MangaparkChapterExtractor(Extractor): class MangaparkChapterExtractor(Extractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -8,7 +8,7 @@
"""Extract manga-chapters and entire manga from http://www.mangareader.net/""" """Extract manga-chapters and entire manga from http://www.mangareader.net/"""
from .common import AsynchronousExtractor, Extractor, Message from .common import AsynchronousExtractor, MangaExtractor, Message
from .. import text from .. import text
@@ -17,29 +17,21 @@ class MangareaderBase():
category = "mangareader" category = "mangareader"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
url_base = "http://www.mangareader.net" root = "http://www.mangareader.net"
class MangareaderMangaExtractor(MangareaderBase, Extractor): class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
"""Extractor for mangas from mangareader.net""" """Extractor for manga from mangareader.net"""
subcategory = "manga" pattern = [r"(?:https?://)?((?:www\.)?mangareader\.net/[^/]+)$"]
pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/]+)$"] reverse = False
test = [("http://www.mangareader.net/mushishi", { test = [("http://www.mangareader.net/mushishi", {
"url": "249042420b67a07b32e7f6be4c7410b6d810b808", "url": "249042420b67a07b32e7f6be4c7410b6d810b808",
})] })]
def __init__(self, match): def chapter_paths(self, page):
Extractor.__init__(self) needle = '<div class="chico_manga"></div>\n<a href="'
self.url_title = match.group(1)
def items(self):
yield Message.Version, 1
url = self.url_base + self.url_title
page = self.request(url).text
needle = '<a href="' + self.url_title
pos = page.index('<div id="readmangasum">') pos = page.index('<div id="readmangasum">')
for chapter in text.extract_iter(page, needle, '"', pos): return text.extract_iter(page, needle, '"', pos)
yield Message.Queue, url + chapter
class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
@@ -61,7 +53,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
self.part, self.url_title, self.chapter = match.groups() self.part, self.url_title, self.chapter = match.groups()
def items(self): def items(self):
page = self.request(self.url_base + self.part).text page = self.request(self.root + self.part).text
data = self.get_job_metadata(page) data = self.get_job_metadata(page)
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, data yield Message.Directory, data
@@ -75,7 +67,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
def get_job_metadata(self, chapter_page): def get_job_metadata(self, chapter_page):
"""Collect metadata for extractor-job""" """Collect metadata for extractor-job"""
page = self.request(self.url_base + self.url_title).text page = self.request(self.root + self.url_title).text
data = { data = {
"chapter": self.chapter, "chapter": self.chapter,
"lang": "en", "lang": "en",
@@ -119,7 +111,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
width , pos = extr(page, '<img id="img" width="', '"', pos) width , pos = extr(page, '<img id="img" width="', '"', pos)
height, pos = extr(page, ' height="', '"', pos) height, pos = extr(page, ' height="', '"', pos)
image, pos = extr(page, ' src="', '"', pos) image, pos = extr(page, ' src="', '"', pos)
return self.url_base + url, image, text.nameext_from_url(image, { return self.root + url, image, text.nameext_from_url(image, {
"width": width, "width": width,
"height": height, "height": height,
}) })

View File

@@ -30,12 +30,8 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
"url": "c5a530538a30b176916e30cbe223a93d83cb2691", "url": "c5a530538a30b176916e30cbe223a93d83cb2691",
})] })]
def get_chapters(self): def chapter_paths(self, page):
"""Return a list of all chapter urls""" return text.extract_iter(page, ' <li><a href="', '"')
page = self.request(self.url).text
return reversed(list(
text.extract_iter(page, ' <li><a href="', '"')
))
class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor, class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor,

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -8,36 +8,25 @@
"""Extract manga pages from http://www.thespectrum.net/manga_scans/""" """Extract manga pages from http://www.thespectrum.net/manga_scans/"""
from .common import Extractor, AsynchronousExtractor, Message from .common import MangaExtractor, AsynchronousExtractor, Message
from .. import text from .. import text
class SpectrumnexusMangaExtractor(Extractor): class SpectrumnexusMangaExtractor(MangaExtractor):
"""Extractor for mangas from thespectrum.net""" """Extractor for manga from thespectrum.net"""
category = "spectrumnexus" category = "spectrumnexus"
subcategory = "manga" pattern = [r"(?:https?://)?(view\.thespectrum\.net/series/[^.]+\.html)#?$"]
pattern = [r"(?:https?://)?view\.thespectrum\.net/series/([^\.]+)\.html$"] reverse = False
test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", { test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", {
"url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb", "url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb",
})] })]
url_base = "http://view.thespectrum.net/series/"
def __init__(self, match): def chapters(self, page):
Extractor.__init__(self) page = text.extract(page, 'class="selectchapter"', '</select>')[0]
self.url = self.url_base + match.group(1) + ".html" return [
self.url + "?ch=" + chapter.replace(" ", "+")
def items(self): for chapter in text.extract_iter(page, '<option value="', '"')
yield Message.Version, 1 ]
for chapter in self.get_chapters():
yield Message.Queue, self.url + "?ch=" + chapter.replace(" ", "+")
def get_chapters(self):
"""Return a list of all chapter identifiers"""
page = self.request(self.url).text
page = text.extract(
page, '<select class="selectchapter"', '</select>'
)[0]
return text.extract_iter(page, '<option value="', '"')
class SpectrumnexusChapterExtractor(AsynchronousExtractor): class SpectrumnexusChapterExtractor(AsynchronousExtractor):