Merge branch 'multi-extractor'
This commit is contained in:
@@ -8,24 +8,25 @@
|
|||||||
|
|
||||||
"""Extract image-urls from http://behoimi.org/"""
|
"""Extract image-urls from http://behoimi.org/"""
|
||||||
|
|
||||||
from .booru import JSONBooruExtractor
|
from . import booru
|
||||||
|
|
||||||
info = {
|
class ThreeDeeBooruExtractor(booru.JSONBooruExtractor):
|
||||||
"category": "3dbooru",
|
"""Base class for 3dbooru extractors"""
|
||||||
"extractor": "ThreeDeeBooruExtractor",
|
category = "3dbooru"
|
||||||
"directory": ["{category}", "{tags}"],
|
api_url = "http://behoimi.org/post/index.json"
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
headers = {
|
||||||
"pattern": [
|
"Referer": "http://behoimi.org/post/show/",
|
||||||
r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+).*",
|
"User-Agent": "Mozilla/5.0",
|
||||||
],
|
}
|
||||||
}
|
|
||||||
|
|
||||||
class ThreeDeeBooruExtractor(JSONBooruExtractor):
|
class ThreeDeeBooruTagExtractor(ThreeDeeBooruExtractor, booru.BooruTagExtractor):
|
||||||
|
"""Extract images from 3dbooru based on search-tags"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
class ThreeDeeBooruPoolExtractor(ThreeDeeBooruExtractor, booru.BooruPoolExtractor):
|
||||||
JSONBooruExtractor.__init__(self, match, info)
|
"""Extract image-pools from 3dbooru"""
|
||||||
self.api_url = "http://behoimi.org/post/index.json"
|
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"]
|
||||||
self.headers = {
|
|
||||||
"Referer": "http://behoimi.org/post/show/",
|
class ThreeDeeBooruPostExtractor(ThreeDeeBooruExtractor, booru.BooruPostExtractor):
|
||||||
"User-Agent": "Mozilla/5.0"
|
"""Extract single images from 3dbooru"""
|
||||||
}
|
pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"]
|
||||||
|
|||||||
@@ -10,23 +10,14 @@
|
|||||||
|
|
||||||
from .chan import ChanExtractor
|
from .chan import ChanExtractor
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "4chan",
|
|
||||||
"extractor": "FourChanExtractor",
|
|
||||||
"directory": ["{category}", "{board}-{thread}"],
|
|
||||||
"filename": "{tim}-{filename}{ext}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class FourChanExtractor(ChanExtractor):
|
class FourChanExtractor(ChanExtractor):
|
||||||
|
|
||||||
|
category = "4chan"
|
||||||
|
pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"]
|
||||||
api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
|
api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
|
||||||
file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
|
file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ChanExtractor.__init__(
|
ChanExtractor.__init__(
|
||||||
self, info["category"],
|
self, match.group(1), match.group(2)
|
||||||
match.group(1), match.group(2)
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -10,23 +10,14 @@
|
|||||||
|
|
||||||
from .chan import ChanExtractor
|
from .chan import ChanExtractor
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "8chan",
|
|
||||||
"extractor": "InfinityChanExtractor",
|
|
||||||
"directory": ["{category}", "{board}-{thread}"],
|
|
||||||
"filename": "{tim}-{filename}{ext}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class InfinityChanExtractor(ChanExtractor):
|
class InfinityChanExtractor(ChanExtractor):
|
||||||
|
|
||||||
|
category = "8chan"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"]
|
||||||
api_url = "https://8ch.net/{board}/res/{thread}.json"
|
api_url = "https://8ch.net/{board}/res/{thread}.json"
|
||||||
file_url = "https://8ch.net/{board}/src/{tim}{ext}"
|
file_url = "https://8ch.net/{board}/src/{tim}{ext}"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ChanExtractor.__init__(
|
ChanExtractor.__init__(
|
||||||
self, info["category"],
|
self, match.group(1), match.group(2)
|
||||||
match.group(1), match.group(2)
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -46,11 +46,11 @@ modules = [
|
|||||||
|
|
||||||
def find(url):
|
def find(url):
|
||||||
"""Find extractor suitable for handling the given url"""
|
"""Find extractor suitable for handling the given url"""
|
||||||
for pattern, module, klass in _list_patterns():
|
for pattern, klass in _list_patterns():
|
||||||
match = re.match(pattern, url)
|
match = re.match(pattern, url)
|
||||||
if match:
|
if match:
|
||||||
return klass(match), module.info
|
return klass(match)
|
||||||
return None, None
|
return None
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# internals
|
# internals
|
||||||
@@ -59,15 +59,22 @@ _cache = []
|
|||||||
_module_iter = iter(modules)
|
_module_iter = iter(modules)
|
||||||
|
|
||||||
def _list_patterns():
|
def _list_patterns():
|
||||||
"""Yield all available (pattern, module, klass) tuples"""
|
"""Yield all available (pattern, info, class) tuples"""
|
||||||
for entry in _cache:
|
for entry in _cache:
|
||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
for module_name in _module_iter:
|
for module_name in _module_iter:
|
||||||
module = importlib.import_module("."+module_name, __package__)
|
module = importlib.import_module("."+module_name, __package__)
|
||||||
klass = getattr(module, module.info["extractor"])
|
for klass in _get_classes(module):
|
||||||
userpatterns = config.get(("extractor", module_name, "pattern"), default=[])
|
for pattern in klass.pattern:
|
||||||
for pattern in userpatterns + module.info["pattern"]:
|
etuple = (pattern, klass)
|
||||||
etuple = (pattern, module, klass)
|
_cache.append(etuple)
|
||||||
_cache.append(etuple)
|
yield etuple
|
||||||
yield etuple
|
|
||||||
|
def _get_classes(module):
|
||||||
|
"""Return a list of all extractor classes in a module"""
|
||||||
|
return [
|
||||||
|
klass for klass in module.__dict__.values() if (
|
||||||
|
hasattr(klass, "pattern") and klass.__module__ == module.__name__
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|||||||
@@ -10,21 +10,14 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text, iso639_1
|
from .. import text, iso639_1
|
||||||
import os.path
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "batoto",
|
|
||||||
"extractor": "BatotoExtractor",
|
|
||||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
|
||||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class BatotoExtractor(AsynchronousExtractor):
|
class BatotoExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "batoto"
|
||||||
|
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||||
|
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
|
||||||
url = "https://bato.to/areader"
|
url = "https://bato.to/areader"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -68,7 +61,7 @@ class BatotoExtractor(AsynchronousExtractor):
|
|||||||
manga, pos = extr(page, "document.title = '", " - ", pos)
|
manga, pos = extr(page, "document.title = '", " - ", pos)
|
||||||
match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo)
|
match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo)
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"token": self.token,
|
"token": self.token,
|
||||||
"manga": manga,
|
"manga": manga,
|
||||||
"volume": match.group(2) or "",
|
"volume": match.group(2) or "",
|
||||||
|
|||||||
@@ -12,20 +12,20 @@ from .common import Extractor, Message
|
|||||||
from .. import text
|
from .. import text
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import json
|
import json
|
||||||
import os.path
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
class BooruExtractor(Extractor):
|
class BooruExtractor(Extractor):
|
||||||
|
|
||||||
|
info = {}
|
||||||
|
headers = {}
|
||||||
|
page = "page"
|
||||||
api_url = ""
|
api_url = ""
|
||||||
|
category = ""
|
||||||
|
|
||||||
def __init__(self, match, info):
|
def __init__(self):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.info = info
|
self.params = {"limit": 50}
|
||||||
self.tags = text.unquote(match.group(1))
|
self.setup()
|
||||||
self.page = "page"
|
|
||||||
self.params = {"tags": self.tags}
|
|
||||||
self.headers = {}
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
@@ -40,6 +40,9 @@ class BooruExtractor(Extractor):
|
|||||||
def items_impl(self):
|
def items_impl(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def setup(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def update_page(self, reset=False):
|
def update_page(self, reset=False):
|
||||||
"""Update the value of the 'page' parameter"""
|
"""Update the value of the 'page' parameter"""
|
||||||
# Override this method in derived classes if necessary.
|
# Override this method in derived classes if necessary.
|
||||||
@@ -51,14 +54,14 @@ class BooruExtractor(Extractor):
|
|||||||
|
|
||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
|
# Override this method in derived classes
|
||||||
return {
|
return {
|
||||||
"category": self.info["category"],
|
"category": self.category,
|
||||||
"tags": self.tags
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_file_metadata(self, data):
|
def get_file_metadata(self, data):
|
||||||
"""Collect metadata for a downloadable file"""
|
"""Collect metadata for a downloadable file"""
|
||||||
data["category"] = self.info["category"]
|
data["category"] = self.category
|
||||||
return text.nameext_from_url(self.get_file_url(data), data)
|
return text.nameext_from_url(self.get_file_url(data), data)
|
||||||
|
|
||||||
def get_file_url(self, data):
|
def get_file_url(self, data):
|
||||||
@@ -78,10 +81,10 @@ class JSONBooruExtractor(BooruExtractor):
|
|||||||
self.request(self.api_url, verify=True, params=self.params,
|
self.request(self.api_url, verify=True, params=self.params,
|
||||||
headers=self.headers).text
|
headers=self.headers).text
|
||||||
)
|
)
|
||||||
if len(images) == 0:
|
|
||||||
return
|
|
||||||
for data in images:
|
for data in images:
|
||||||
yield data
|
yield data
|
||||||
|
if len(images) < self.params["limit"]:
|
||||||
|
return
|
||||||
self.update_page()
|
self.update_page()
|
||||||
|
|
||||||
|
|
||||||
@@ -93,8 +96,56 @@ class XMLBooruExtractor(BooruExtractor):
|
|||||||
root = ET.fromstring(
|
root = ET.fromstring(
|
||||||
self.request(self.api_url, verify=True, params=self.params).text
|
self.request(self.api_url, verify=True, params=self.params).text
|
||||||
)
|
)
|
||||||
if len(root) == 0:
|
|
||||||
return
|
|
||||||
for item in root:
|
for item in root:
|
||||||
yield item.attrib
|
yield item.attrib
|
||||||
|
if len(root) < self.params["limit"]:
|
||||||
|
return
|
||||||
self.update_page()
|
self.update_page()
|
||||||
|
|
||||||
|
|
||||||
|
class BooruTagExtractor(BooruExtractor):
|
||||||
|
"""Extract images based on search-tags"""
|
||||||
|
|
||||||
|
directory_fmt = ["{category}", "{tags}"]
|
||||||
|
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
BooruExtractor.__init__(self)
|
||||||
|
self.tags = text.unquote(match.group(1))
|
||||||
|
self.params["tags"] = self.tags
|
||||||
|
|
||||||
|
def get_job_metadata(self):
|
||||||
|
return {
|
||||||
|
"category": self.category,
|
||||||
|
"tags": self.tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BooruPoolExtractor(BooruExtractor):
|
||||||
|
"""Extract image-pools"""
|
||||||
|
|
||||||
|
directory_fmt = ["{category}", "pool", "{pool}"]
|
||||||
|
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
BooruExtractor.__init__(self)
|
||||||
|
self.pool = match.group(1)
|
||||||
|
self.params["tags"] = "pool:" + self.pool
|
||||||
|
|
||||||
|
def get_job_metadata(self):
|
||||||
|
return {
|
||||||
|
"category": self.category,
|
||||||
|
"pool": self.pool,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BooruPostExtractor(BooruExtractor):
|
||||||
|
"""Extract single images"""
|
||||||
|
|
||||||
|
directory_fmt = ["{category}"]
|
||||||
|
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
BooruExtractor.__init__(self)
|
||||||
|
self.post = match.group(1)
|
||||||
|
self.params["tags"] = "id:" + self.post
|
||||||
|
|||||||
@@ -13,13 +13,15 @@ from .. import text
|
|||||||
|
|
||||||
class ChanExtractor(Extractor):
|
class ChanExtractor(Extractor):
|
||||||
|
|
||||||
|
directory_fmt = ["{category}", "{board}-{thread}"]
|
||||||
|
filename_fmt = "{tim}-{filename}{ext}"
|
||||||
api_url = ""
|
api_url = ""
|
||||||
file_url = ""
|
file_url = ""
|
||||||
|
|
||||||
def __init__(self, category, board, thread):
|
def __init__(self, board, thread):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.metadata = {
|
self.metadata = {
|
||||||
"category": category,
|
"category": self.category,
|
||||||
"board": board,
|
"board": board,
|
||||||
"thread": thread,
|
"thread": thread,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,20 +8,21 @@
|
|||||||
|
|
||||||
"""Extract image-urls from https://danbooru.donmai.us/"""
|
"""Extract image-urls from https://danbooru.donmai.us/"""
|
||||||
|
|
||||||
from .booru import JSONBooruExtractor
|
from . import booru
|
||||||
|
|
||||||
info = {
|
class DanbooruExtractor(booru.JSONBooruExtractor):
|
||||||
"category": "danbooru",
|
"""Base class for danbooru extractors"""
|
||||||
"extractor": "DanbooruExtractor",
|
category = "danbooru"
|
||||||
"directory": ["{category}", "{tags}"],
|
api_url = "https://danbooru.donmai.us/posts.json"
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class DanbooruExtractor(JSONBooruExtractor):
|
class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
|
||||||
|
"""Extract images from danbooru based on search-tags"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
|
||||||
JSONBooruExtractor.__init__(self, match, info)
|
"""Extract image-pools from danbooru"""
|
||||||
self.api_url = "https://danbooru.donmai.us/posts.json"
|
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/pools/(\d+)"]
|
||||||
|
|
||||||
|
class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor):
|
||||||
|
"""Extract single images from danbooru"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts/(\d+)"]
|
||||||
|
|||||||
@@ -10,21 +10,15 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "deviantart",
|
|
||||||
"extractor": "DeviantArtExtractor",
|
|
||||||
"directory": ["{category}", "{artist}"],
|
|
||||||
"filename": "{category}_{index}_{title}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class DeviantArtExtractor(AsynchronousExtractor):
|
class DeviantArtExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "deviantart"
|
||||||
|
directory_fmt = ["{category}", "{artist}"]
|
||||||
|
filename_fmt = "{category}_{index}_{title}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
AsynchronousExtractor.__init__(self)
|
AsynchronousExtractor.__init__(self)
|
||||||
self.session.cookies["agegate_state"] = "1"
|
self.session.cookies["agegate_state"] = "1"
|
||||||
@@ -57,14 +51,14 @@ class DeviantArtExtractor(AsynchronousExtractor):
|
|||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"artist": self.artist,
|
"artist": self.artist,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_image_metadata(self, image):
|
def get_image_metadata(self, image):
|
||||||
"""Collect metadata for an image"""
|
"""Collect metadata for an image"""
|
||||||
match = self.extract_data(image, 'title',
|
match = self.extract_data(image, 'title',
|
||||||
'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
|
r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
|
||||||
if image.startswith(" ismature"):
|
if image.startswith(" ismature"):
|
||||||
# adult image
|
# adult image
|
||||||
url, _ = text.extract(image, 'href="', '"')
|
url, _ = text.extract(image, 'href="', '"')
|
||||||
@@ -76,7 +70,7 @@ class DeviantArtExtractor(AsynchronousExtractor):
|
|||||||
height, pos = text.extract(page, ' height="', '"', pos)
|
height, pos = text.extract(page, ' height="', '"', pos)
|
||||||
else:
|
else:
|
||||||
# normal image
|
# normal image
|
||||||
index = self.extract_data(image, 'href', '[^"]+-(\d+)').group(1)
|
index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1)
|
||||||
url, pos = text.extract(image, ' data-super-full-img="', '"', match.end())
|
url, pos = text.extract(image, ' data-super-full-img="', '"', match.end())
|
||||||
if url:
|
if url:
|
||||||
width , pos = text.extract(image, ' data-super-full-width="', '"', pos)
|
width , pos = text.extract(image, ' data-super-full-width="', '"', pos)
|
||||||
|
|||||||
@@ -8,21 +8,24 @@
|
|||||||
|
|
||||||
"""Extract image-urls from https://e621.net/"""
|
"""Extract image-urls from https://e621.net/"""
|
||||||
|
|
||||||
from .booru import JSONBooruExtractor
|
from . import booru
|
||||||
|
|
||||||
info = {
|
class E621Extractor(booru.JSONBooruExtractor):
|
||||||
"category": "e621",
|
"""Base class for e621 extractors"""
|
||||||
"extractor": "E621Extractor",
|
category = "e621"
|
||||||
"directory": ["{category}", "{tags}"],
|
api_url = "https://e621.net/post/index.json"
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
|
||||||
"pattern": [
|
class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
|
||||||
|
"""Extract images from e621 based on search-tags"""
|
||||||
|
pattern = [
|
||||||
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)",
|
r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)",
|
||||||
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+).*"
|
r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)",
|
||||||
],
|
]
|
||||||
}
|
|
||||||
|
|
||||||
class E621Extractor(JSONBooruExtractor):
|
class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
|
||||||
|
"""Extract image-pools from e621"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
class E621PostExtractor(E621Extractor, booru.BooruPostExtractor):
|
||||||
JSONBooruExtractor.__init__(self, match, info)
|
"""Extract single images from e621"""
|
||||||
self.api_url = "https://e621.net/post/index.json"
|
pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"]
|
||||||
|
|||||||
@@ -13,18 +13,12 @@ from .. import config, text, iso639_1
|
|||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "exhentai",
|
|
||||||
"extractor": "ExhentaiExtractor",
|
|
||||||
"directory": ["{category}", "{gallery-id}"],
|
|
||||||
"filename": "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class ExhentaiExtractor(Extractor):
|
class ExhentaiExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "exhentai"
|
||||||
|
directory_fmt = ["{category}", "{gallery-id}"]
|
||||||
|
filename_fmt = "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
|
||||||
api_url = "http://exhentai.org/api.php"
|
api_url = "http://exhentai.org/api.php"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -70,7 +64,7 @@ class ExhentaiExtractor(Extractor):
|
|||||||
def get_job_metadata(self, page):
|
def get_job_metadata(self, page):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
data = {
|
data = {
|
||||||
"category" : info["category"],
|
"category" : self.category,
|
||||||
"gallery-id" : self.gid,
|
"gallery-id" : self.gid,
|
||||||
"gallery-token": self.token,
|
"gallery-token": self.token,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,27 +8,19 @@
|
|||||||
|
|
||||||
"""Extract image-urls from http://gelbooru.com/"""
|
"""Extract image-urls from http://gelbooru.com/"""
|
||||||
|
|
||||||
from .booru import XMLBooruExtractor
|
from . import booru
|
||||||
from .. import config
|
from .. import config
|
||||||
|
|
||||||
info = {
|
class GelbooruExtractor(booru.XMLBooruExtractor):
|
||||||
"category": "gelbooru",
|
"""Base class for gelbooru extractors"""
|
||||||
"extractor": "GelbooruExtractor",
|
|
||||||
"directory": ["{category}", "{tags}"],
|
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class GelbooruExtractor(XMLBooruExtractor):
|
category = "gelbooru"
|
||||||
|
api_url = "http://gelbooru.com/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def setup(self):
|
||||||
XMLBooruExtractor.__init__(self, match, info)
|
self.params.update({"page":"dapi", "s":"post", "q":"index"})
|
||||||
self.api_url = "http://gelbooru.com/"
|
|
||||||
self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}
|
|
||||||
self.session.cookies.update(
|
self.session.cookies.update(
|
||||||
config.get(("extractor", info["category"], "cookies"))
|
config.get(("extractor", self.category, "cookies"))
|
||||||
)
|
)
|
||||||
|
|
||||||
def update_page(self, reset=False):
|
def update_page(self, reset=False):
|
||||||
@@ -36,3 +28,16 @@ class GelbooruExtractor(XMLBooruExtractor):
|
|||||||
self.params["pid"] += 1
|
self.params["pid"] += 1
|
||||||
else:
|
else:
|
||||||
self.params["pid"] = 0
|
self.params["pid"] = 0
|
||||||
|
|
||||||
|
class GelbooruTagExtractor(GelbooruExtractor, booru.BooruTagExtractor):
|
||||||
|
"""Extract images from gelbooru based on search-tags"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"]
|
||||||
|
|
||||||
|
# TODO: find out how to access pools via gelbooru-api
|
||||||
|
# class GelbooruPoolExtractor(GelbooruExtractor, booru.BooruPoolExtractor):
|
||||||
|
# """Extract image-pools from gelbooru"""
|
||||||
|
# pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=pool&s=show&id=(\d+)"]
|
||||||
|
|
||||||
|
class GelbooruPostExtractor(GelbooruExtractor, booru.BooruPostExtractor):
|
||||||
|
"""Extract single images from gelbooru"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=view&id=(\d+)"]
|
||||||
|
|||||||
@@ -10,20 +10,13 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "hbrowse",
|
|
||||||
"extractor": "HbrowseExtractor",
|
|
||||||
"directory": ["{category}", "{gallery-id} {title}"],
|
|
||||||
"filename": "{category}_{gallery-id}_{num:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class HbrowseExtractor(Extractor):
|
class HbrowseExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "hbrowse"
|
||||||
|
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||||
|
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"]
|
||||||
url_base = "http://www.hbrowse.com/thumbnails/"
|
url_base = "http://www.hbrowse.com/thumbnails/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -43,7 +36,7 @@ class HbrowseExtractor(Extractor):
|
|||||||
def get_job_metadata(self, page):
|
def get_job_metadata(self, page):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
data = {
|
data = {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
'gallery-id': self.gid,
|
'gallery-id': self.gid,
|
||||||
'chapter': int(self.chapter[1:]),
|
'chapter': int(self.chapter[1:]),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,19 +12,15 @@ from .common import Extractor, Message
|
|||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "hentaifoundry",
|
|
||||||
"extractor": "HentaiFoundryExtractor",
|
|
||||||
"directory": ["{category}", "{artist}"],
|
|
||||||
"filename": "{category}_{index}_{title}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)",
|
|
||||||
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class HentaiFoundryExtractor(Extractor):
|
class HentaiFoundryExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "hentaifoundry"
|
||||||
|
directory_fmt = ["{category}", "{artist}"]
|
||||||
|
filename_fmt = "{category}_{index}_{title}.{extension}"
|
||||||
|
pattern = [
|
||||||
|
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)",
|
||||||
|
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
|
||||||
|
]
|
||||||
url_base = "http://www.hentai-foundry.com/pictures/user/"
|
url_base = "http://www.hentai-foundry.com/pictures/user/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -60,7 +56,7 @@ class HentaiFoundryExtractor(Extractor):
|
|||||||
token, pos = text.extract(page, 'hidden" value="', '"')
|
token, pos = text.extract(page, 'hidden" value="', '"')
|
||||||
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
|
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"artist": self.artist,
|
"artist": self.artist,
|
||||||
"count": count,
|
"count": count,
|
||||||
}, token
|
}, token
|
||||||
|
|||||||
@@ -10,21 +10,15 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, iso639_1
|
from .. import text, iso639_1
|
||||||
import os.path
|
|
||||||
import string
|
import string
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "hitomi",
|
|
||||||
"extractor": "HitomiExtractor",
|
|
||||||
"directory": ["{category}", "{gallery-id} {title}"],
|
|
||||||
"filename": "{category}_{gallery-id}_{num:>03}_{name}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class HitomiExtractor(Extractor):
|
class HitomiExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "hitomi"
|
||||||
|
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||||
|
filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.gid = match.group(1)
|
self.gid = match.group(1)
|
||||||
@@ -62,7 +56,7 @@ class HitomiExtractor(Extractor):
|
|||||||
series, pos = text.extract(page, '.html">', '</a>', pos)
|
series, pos = text.extract(page, '.html">', '</a>', pos)
|
||||||
lang = lang.capitalize()
|
lang = lang.capitalize()
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"gallery-id": self.gid,
|
"gallery-id": self.gid,
|
||||||
"title": title,
|
"title": title,
|
||||||
"artist": string.capwords(artist),
|
"artist": string.capwords(artist),
|
||||||
|
|||||||
@@ -10,20 +10,13 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "imagebam",
|
|
||||||
"extractor": "ImagebamExtractor",
|
|
||||||
"directory": ["{category}", "{title} - {gallery-key}"],
|
|
||||||
"filename": "{num:>03}-{filename}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class ImagebamExtractor(AsynchronousExtractor):
|
class ImagebamExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "imagebam"
|
||||||
|
directory_fmt = ["{category}", "{title} - {gallery-key}"]
|
||||||
|
filename_fmt = "{num:>03}-{filename}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
|
||||||
url_base = "http://www.imagebam.com"
|
url_base = "http://www.imagebam.com"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -47,7 +40,7 @@ class ImagebamExtractor(AsynchronousExtractor):
|
|||||||
response.encoding = "utf-8"
|
response.encoding = "utf-8"
|
||||||
page = response.text
|
page = response.text
|
||||||
data = {
|
data = {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"gallery-key": self.gkey,
|
"gallery-key": self.gkey,
|
||||||
}
|
}
|
||||||
data, _ = text.extract_all(page, (
|
data, _ = text.extract_all(page, (
|
||||||
|
|||||||
@@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message
|
|||||||
from .. import text
|
from .. import text
|
||||||
import re
|
import re
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "imgbox",
|
|
||||||
"extractor": "ImgboxExtractor",
|
|
||||||
"directory": ["{category}", "{title} - {gallery-key}"],
|
|
||||||
"filename": "{num:>03}-{name}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class ImgboxExtractor(AsynchronousExtractor):
|
class ImgboxExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "imgbox"
|
||||||
|
directory_fmt = ["{category}", "{title} - {gallery-key}"]
|
||||||
|
filename_fmt = "{num:>03}-{name}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)"]
|
||||||
url_base = "http://imgbox.com"
|
url_base = "http://imgbox.com"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -44,7 +38,7 @@ class ImgboxExtractor(AsynchronousExtractor):
|
|||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
match = re.search(r"<h1>(.+) \(([^ ]+) ([^ ]+) \w+\) - (\d+)", page)
|
match = re.search(r"<h1>(.+) \(([^ ]+) ([^ ]+) \w+\) - (\d+)", page)
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"gallery-key": self.key,
|
"gallery-key": self.key,
|
||||||
"title": match.group(1),
|
"title": match.group(1),
|
||||||
"date": match.group(2),
|
"date": match.group(2),
|
||||||
@@ -62,7 +56,8 @@ class ImgboxExtractor(AsynchronousExtractor):
|
|||||||
), values=data)
|
), values=data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_file_url(self, page):
|
@staticmethod
|
||||||
|
def get_file_url(page):
|
||||||
"""Extract download-url"""
|
"""Extract download-url"""
|
||||||
base = "http://i.imgbox.com/"
|
base = "http://i.imgbox.com/"
|
||||||
path, _ = text.extract(page, base, '"')
|
path, _ = text.extract(page, base, '"')
|
||||||
|
|||||||
@@ -12,18 +12,13 @@ from .common import Extractor, Message
|
|||||||
from .. import text
|
from .. import text
|
||||||
import re
|
import re
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "imgchili",
|
|
||||||
"extractor": "ImgchiliExtractor",
|
|
||||||
"directory": ["{category}", "{title} - {key}"],
|
|
||||||
"filename": "{num:>03}-{name}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class ImgchiliExtractor(Extractor):
|
class ImgchiliExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "imgchili"
|
||||||
|
directory_fmt = ["{category}", "{title} - {key}"]
|
||||||
|
filename_fmt = "{num:>03}-{name}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.match = match
|
self.match = match
|
||||||
|
|||||||
@@ -10,20 +10,14 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "imgth",
|
|
||||||
"extractor": "ImgthExtractor",
|
|
||||||
"directory": ["{category}", "{gallery-id} {title}"],
|
|
||||||
"filename": "{category}_{gallery-id}_{num:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?imgth\.com/gallery/(\d+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class ImgthExtractor(Extractor):
|
class ImgthExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "imgth"
|
||||||
|
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||||
|
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.gid = match.group(1)
|
self.gid = match.group(1)
|
||||||
@@ -55,7 +49,7 @@ class ImgthExtractor(Extractor):
|
|||||||
def get_job_metadata(self, page):
|
def get_job_metadata(self, page):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
data = {
|
data = {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"gallery-id": self.gid,
|
"gallery-id": self.gid,
|
||||||
}
|
}
|
||||||
data, _ = text.extract_all(page, (
|
data, _ = text.extract_all(page, (
|
||||||
|
|||||||
@@ -12,18 +12,13 @@ from .common import Extractor, Message
|
|||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "imgur",
|
|
||||||
"extractor": "ImgurExtractor",
|
|
||||||
"directory": ["{category}", "{album-key} - {title}"],
|
|
||||||
"filename": "{category}_{album-key}_{num:>03}_{name}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class ImgurExtractor(Extractor):
|
class ImgurExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "imgur"
|
||||||
|
directory_fmt = ["{category}", "{album-key} - {title}"]
|
||||||
|
filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.album = match.group(1)
|
self.album = match.group(1)
|
||||||
@@ -43,7 +38,7 @@ class ImgurExtractor(Extractor):
|
|||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
page = self.request("https://imgur.com/a/" + self.album).text
|
page = self.request("https://imgur.com/a/" + self.album).text
|
||||||
data = {
|
data = {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"album-key": self.album,
|
"album-key": self.album,
|
||||||
}
|
}
|
||||||
return text.extract_all(page, (
|
return text.extract_all(page, (
|
||||||
|
|||||||
@@ -10,21 +10,15 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, cloudflare
|
from .. import text, cloudflare
|
||||||
import os.path
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "kissmanga",
|
|
||||||
"extractor": "KissmangaExtractor",
|
|
||||||
"directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"],
|
|
||||||
"filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class KissmangaExtractor(Extractor):
|
class KissmangaExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "kissmanga"
|
||||||
|
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
|
||||||
|
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.url = match.group(0)
|
self.url = match.group(0)
|
||||||
@@ -41,8 +35,7 @@ class KissmangaExtractor(Extractor):
|
|||||||
data["page"] = num
|
data["page"] = num
|
||||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||||
|
|
||||||
@staticmethod
|
def get_job_metadata(self, page):
|
||||||
def get_job_metadata(page):
|
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
manga, pos = text.extract(page, "Read manga\n", "\n")
|
manga, pos = text.extract(page, "Read manga\n", "\n")
|
||||||
cinfo, pos = text.extract(page, "", "\n", pos)
|
cinfo, pos = text.extract(page, "", "\n", pos)
|
||||||
@@ -50,7 +43,7 @@ class KissmangaExtractor(Extractor):
|
|||||||
r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
|
r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
|
||||||
chminor = match.group(3)
|
chminor = match.group(3)
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"manga": manga,
|
"manga": manga,
|
||||||
"volume": match.group(1) or "",
|
"volume": match.group(1) or "",
|
||||||
"chapter": match.group(2),
|
"chapter": match.group(2),
|
||||||
|
|||||||
@@ -8,20 +8,21 @@
|
|||||||
|
|
||||||
"""Extract image-urls from https://konachan.com/"""
|
"""Extract image-urls from https://konachan.com/"""
|
||||||
|
|
||||||
from .booru import JSONBooruExtractor
|
from . import booru
|
||||||
|
|
||||||
info = {
|
class KonachanExtractor(booru.JSONBooruExtractor):
|
||||||
"category": "konachan",
|
"""Base class for konachan extractors"""
|
||||||
"extractor": "KonachanExtractor",
|
category = "konachan"
|
||||||
"directory": ["{category}", "{tags}"],
|
api_url = "https://konachan.com/post.json"
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class KonachanExtractor(JSONBooruExtractor):
|
class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
|
||||||
|
"""Extract images from konachan based on search-tags"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
|
||||||
JSONBooruExtractor.__init__(self, match, info)
|
"""Extract image-pools from konachan"""
|
||||||
self.api_url = "http://konachan.com/post.json"
|
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"]
|
||||||
|
|
||||||
|
class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor):
|
||||||
|
"""Extract single images from konachan"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"]
|
||||||
|
|||||||
@@ -10,18 +10,13 @@
|
|||||||
|
|
||||||
from .mangareader import MangaReaderExtractor
|
from .mangareader import MangaReaderExtractor
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "mangapanda",
|
|
||||||
"extractor": "MangaPandaExtractor",
|
|
||||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
|
||||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
|
|
||||||
r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class MangaPandaExtractor(MangaReaderExtractor):
|
class MangaPandaExtractor(MangaReaderExtractor):
|
||||||
|
|
||||||
category = info["category"]
|
category = "mangapanda"
|
||||||
|
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||||
|
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||||
|
pattern = [
|
||||||
|
r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
|
||||||
|
r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
||||||
|
]
|
||||||
url_base = "http://www.mangapanda.com"
|
url_base = "http://www.mangapanda.com"
|
||||||
|
|||||||
@@ -10,22 +10,16 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "mangareader",
|
|
||||||
"extractor": "MangaReaderExtractor",
|
|
||||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
|
||||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
|
|
||||||
r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class MangaReaderExtractor(AsynchronousExtractor):
|
class MangaReaderExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
category = info["category"]
|
category = "mangareader"
|
||||||
|
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||||
|
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||||
|
pattern = [
|
||||||
|
r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
|
||||||
|
r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
||||||
|
]
|
||||||
url_base = "http://www.mangareader.net"
|
url_base = "http://www.mangareader.net"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
|
|||||||
@@ -10,20 +10,13 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "mangashare",
|
|
||||||
"extractor": "MangaShareExtractor",
|
|
||||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
|
||||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class MangaShareExtractor(AsynchronousExtractor):
|
class MangaShareExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "mangashare"
|
||||||
|
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||||
|
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"]
|
||||||
url_fmt = "http://read.mangashare.com/{}/page{:>03}.html"
|
url_fmt = "http://read.mangashare.com/{}/page{:>03}.html"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -40,11 +33,10 @@ class MangaShareExtractor(AsynchronousExtractor):
|
|||||||
text.nameext_from_url(url, data)
|
text.nameext_from_url(url, data)
|
||||||
yield Message.Url, url, data.copy()
|
yield Message.Url, url, data.copy()
|
||||||
|
|
||||||
@staticmethod
|
def get_job_metadata(self, page):
|
||||||
def get_job_metadata(page):
|
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
data = {
|
data = {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"lang": "en",
|
"lang": "en",
|
||||||
"language": "English",
|
"language": "English",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,20 +10,13 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "mangastream",
|
|
||||||
"extractor": "MangaStreamExtractor",
|
|
||||||
"directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"],
|
|
||||||
"filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class MangaStreamExtractor(AsynchronousExtractor):
|
class MangaStreamExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "mangastream"
|
||||||
|
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
|
||||||
|
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))"]
|
||||||
url_base = "https://readms.com/r/"
|
url_base = "https://readms.com/r/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -47,7 +40,7 @@ class MangaStreamExtractor(AsynchronousExtractor):
|
|||||||
def get_job_metadata(self, page):
|
def get_job_metadata(self, page):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
data = {
|
data = {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"chapter": self.chapter,
|
"chapter": self.chapter,
|
||||||
"chapter-minor": self.ch_minor,
|
"chapter-minor": self.ch_minor,
|
||||||
"chapter-id": self.ch_id,
|
"chapter-id": self.ch_id,
|
||||||
@@ -61,7 +54,8 @@ class MangaStreamExtractor(AsynchronousExtractor):
|
|||||||
), values=data)
|
), values=data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_page_metadata(self, page):
|
@staticmethod
|
||||||
|
def get_page_metadata(page):
|
||||||
"""Collect next url, image-url and metadata for one manga-page"""
|
"""Collect next url, image-url and metadata for one manga-page"""
|
||||||
nurl, pos = text.extract(page, '<div class="page">\n<a href="', '"')
|
nurl, pos = text.extract(page, '<div class="page">\n<a href="', '"')
|
||||||
iurl, pos = text.extract(page, '<img id="manga-page" src="', '"', pos)
|
iurl, pos = text.extract(page, '<img id="manga-page" src="', '"', pos)
|
||||||
|
|||||||
@@ -12,18 +12,13 @@ from .common import Extractor, Message
|
|||||||
from .. import text
|
from .. import text
|
||||||
import json
|
import json
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "nhentai",
|
|
||||||
"extractor": "NhentaiExtractor",
|
|
||||||
"directory": ["{category}", "{gallery-id} {title}"],
|
|
||||||
"filename": "{category}_{gallery-id}_{num:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class NhentaiExtractor(Extractor):
|
class NhentaiExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "nhentai"
|
||||||
|
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||||
|
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.gid = match.group(1)
|
self.gid = match.group(1)
|
||||||
@@ -57,7 +52,7 @@ class NhentaiExtractor(Extractor):
|
|||||||
title_en = ginfo["title"].get("english", "")
|
title_en = ginfo["title"].get("english", "")
|
||||||
title_ja = ginfo["title"].get("japanese", "")
|
title_ja = ginfo["title"].get("japanese", "")
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"gallery-id": self.gid,
|
"gallery-id": self.gid,
|
||||||
"upload-date": ginfo["upload_date"],
|
"upload-date": ginfo["upload_date"],
|
||||||
"media-id": ginfo["media_id"],
|
"media-id": ginfo["media_id"],
|
||||||
|
|||||||
@@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message
|
|||||||
from .. import config, text
|
from .. import config, text
|
||||||
import re
|
import re
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "nijie",
|
|
||||||
"extractor": "NijieExtractor",
|
|
||||||
"directory": ["{category}", "{artist-id}"],
|
|
||||||
"filename": "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class NijieExtractor(AsynchronousExtractor):
|
class NijieExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "nijie"
|
||||||
|
directory_fmt = ["{category}", "{artist-id}"]
|
||||||
|
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"]
|
||||||
popup_url = "https://nijie.info/view_popup.php?id="
|
popup_url = "https://nijie.info/view_popup.php?id="
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -37,7 +31,7 @@ class NijieExtractor(AsynchronousExtractor):
|
|||||||
self.session.cookies["R18"] = "1"
|
self.session.cookies["R18"] = "1"
|
||||||
self.session.cookies["nijie_referer"] = "nijie.info"
|
self.session.cookies["nijie_referer"] = "nijie.info"
|
||||||
self.session.cookies.update(
|
self.session.cookies.update(
|
||||||
config.get(("extractor", info["category"], "cookies"))
|
config.get(("extractor", self.category, "cookies"))
|
||||||
)
|
)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -52,7 +46,7 @@ class NijieExtractor(AsynchronousExtractor):
|
|||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"artist-id": self.artist_id,
|
"artist-id": self.artist_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,19 +14,12 @@ import re
|
|||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "pixiv",
|
|
||||||
"extractor": "PixivExtractor",
|
|
||||||
"directory": ["{category}", "{artist-id}-{artist-nick}"],
|
|
||||||
"filename": "{category}_{artist-id}_{id}{num}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class PixivExtractor(Extractor):
|
class PixivExtractor(Extractor):
|
||||||
|
|
||||||
|
category = "pixiv"
|
||||||
|
directory_fmt = ["{category}", "{artist-id}-{artist-nick}"]
|
||||||
|
filename_fmt = "{category}_{artist-id}_{id}{num}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)"]
|
||||||
member_url = "http://www.pixiv.net/member_illust.php"
|
member_url = "http://www.pixiv.net/member_illust.php"
|
||||||
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
|
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
|
||||||
|
|
||||||
@@ -121,7 +114,7 @@ class PixivExtractor(Extractor):
|
|||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
data = self.api.user(self.artist_id)["response"][0]
|
data = self.api.user(self.artist_id)["response"][0]
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"artist-id": self.artist_id,
|
"artist-id": self.artist_id,
|
||||||
"artist-name": data["name"],
|
"artist-name": data["name"],
|
||||||
"artist-nick": data["account"],
|
"artist-nick": data["account"],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2014, 2015 Mike Fährmann
|
# Copyright 2015 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -8,27 +8,27 @@
|
|||||||
|
|
||||||
"""Extract image-urls from http://safebooru.org/"""
|
"""Extract image-urls from http://safebooru.org/"""
|
||||||
|
|
||||||
from .booru import XMLBooruExtractor
|
from . import booru
|
||||||
|
|
||||||
info = {
|
class SafebooruExtractor(booru.XMLBooruExtractor):
|
||||||
"category": "safebooru",
|
"""Base class for safebooru extractors"""
|
||||||
"extractor": "SafebooruExtractor",
|
|
||||||
"directory": ["{category}", "{tags}"],
|
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=list&tags=([^&]+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class SafebooruExtractor(XMLBooruExtractor):
|
category = "safebooru"
|
||||||
|
api_url = "http://safebooru.org/index.php"
|
||||||
|
|
||||||
def __init__(self, match):
|
def setup(self):
|
||||||
XMLBooruExtractor.__init__(self, match, info)
|
self.params.update({"page":"dapi", "s":"post", "q":"index"})
|
||||||
self.api_url = "http://safebooru.org/index.php"
|
|
||||||
self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}
|
|
||||||
|
|
||||||
def update_page(self, reset=False):
|
def update_page(self, reset=False):
|
||||||
if reset is False:
|
if reset is False:
|
||||||
self.params["pid"] += 1
|
self.params["pid"] += 1
|
||||||
else:
|
else:
|
||||||
self.params["pid"] = 0
|
self.params["pid"] = 0
|
||||||
|
|
||||||
|
class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
|
||||||
|
"""Extract images from safebooru based on search-tags"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"]
|
||||||
|
|
||||||
|
class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
|
||||||
|
"""Extract single images from safebooru"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=view&id=(\d+)"]
|
||||||
|
|||||||
@@ -10,20 +10,13 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "sankaku",
|
|
||||||
"extractor": "SankakuExtractor",
|
|
||||||
"directory": ["{category}", "{tags}"],
|
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class SankakuExtractor(AsynchronousExtractor):
|
class SankakuExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "sankaku"
|
||||||
|
directory_fmt = ["{category}", "{tags}"]
|
||||||
|
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||||
|
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)"]
|
||||||
url = "https://chan.sankakucomplex.com/"
|
url = "https://chan.sankakucomplex.com/"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -45,7 +38,7 @@ class SankakuExtractor(AsynchronousExtractor):
|
|||||||
def get_job_metadata(self):
|
def get_job_metadata(self):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
return {
|
return {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"tags": self.tags,
|
"tags": self.tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,21 +10,17 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import os.path
|
|
||||||
|
|
||||||
info = {
|
|
||||||
"category": "spectrumnexus",
|
|
||||||
"extractor": "SpectrumNexusExtractor",
|
|
||||||
"directory": ["{category}", "{manga}", "c{chapter:>03}"],
|
|
||||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)",
|
|
||||||
r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class SpectrumNexusExtractor(AsynchronousExtractor):
|
class SpectrumNexusExtractor(AsynchronousExtractor):
|
||||||
|
|
||||||
|
category = "spectrumnexus"
|
||||||
|
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}"]
|
||||||
|
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||||
|
pattern = [
|
||||||
|
r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)",
|
||||||
|
r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)",
|
||||||
|
]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
AsynchronousExtractor.__init__(self)
|
AsynchronousExtractor.__init__(self)
|
||||||
self.url = "http://" + match.group(1)
|
self.url = "http://" + match.group(1)
|
||||||
@@ -52,7 +48,7 @@ class SpectrumNexusExtractor(AsynchronousExtractor):
|
|||||||
def get_job_metadata(self, page):
|
def get_job_metadata(self, page):
|
||||||
"""Collect metadata for extractor-job"""
|
"""Collect metadata for extractor-job"""
|
||||||
data = {
|
data = {
|
||||||
"category": info["category"],
|
"category": self.category,
|
||||||
"chapter": self.chapter,
|
"chapter": self.chapter,
|
||||||
}
|
}
|
||||||
return text.extract_all(page, (
|
return text.extract_all(page, (
|
||||||
|
|||||||
@@ -8,20 +8,21 @@
|
|||||||
|
|
||||||
"""Extract image-urls from https://yande.re/"""
|
"""Extract image-urls from https://yande.re/"""
|
||||||
|
|
||||||
from .booru import JSONBooruExtractor
|
from . import booru
|
||||||
|
|
||||||
info = {
|
class YandereExtractor(booru.JSONBooruExtractor):
|
||||||
"category": "yandere",
|
"""Base class for yandere extractors"""
|
||||||
"extractor": "YandereExtractor",
|
category = "yandere"
|
||||||
"directory": ["{category}", "{tags}"],
|
api_url = "https://yande.re/post.json"
|
||||||
"filename": "{category}_{id}_{md5}.{extension}",
|
|
||||||
"pattern": [
|
|
||||||
r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+).*",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
class YandereExtractor(JSONBooruExtractor):
|
class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
|
||||||
|
"""Extract images from yandere based on search-tags"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"]
|
||||||
|
|
||||||
def __init__(self, match):
|
class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
|
||||||
JSONBooruExtractor.__init__(self, match, info)
|
"""Extract image-pools from yandere"""
|
||||||
self.api_url = "https://yande.re/post.json"
|
pattern = [r"(?:https?://)?(?:www\.)?yande.re/pool/show/(\d+)"]
|
||||||
|
|
||||||
|
class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
|
||||||
|
"""Extract single images from yandere"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?yande.re/post/show/(\d+)"]
|
||||||
|
|||||||
@@ -14,19 +14,19 @@ from .extractor.common import Message
|
|||||||
class DownloadJob():
|
class DownloadJob():
|
||||||
|
|
||||||
def __init__(self, url):
|
def __init__(self, url):
|
||||||
self.extractor, self.info = extractor.find(url)
|
self.extractor = extractor.find(url)
|
||||||
if self.extractor is None:
|
if self.extractor is None:
|
||||||
print(url, ": No extractor found", sep="", file=sys.stderr)
|
print(url, ": No extractor found", sep="", file=sys.stderr)
|
||||||
return
|
return
|
||||||
self.directory = self.get_base_directory()
|
self.directory = self.get_base_directory()
|
||||||
self.downloaders = {}
|
self.downloaders = {}
|
||||||
self.filename_fmt = config.get(
|
self.filename_fmt = config.get(
|
||||||
("extractor", self.info["category"], "filename"),
|
("extractor", self.extractor.category, "filename"),
|
||||||
default=self.info["filename"]
|
default=self.extractor.filename_fmt
|
||||||
)
|
)
|
||||||
segments = config.get(
|
segments = config.get(
|
||||||
("extractor", self.info["category"], "directory"),
|
("extractor", self.extractor.category, "directory"),
|
||||||
default=self.info["directory"]
|
default=self.extractor.directory_fmt
|
||||||
)
|
)
|
||||||
self.directory_fmt = os.path.join(*segments)
|
self.directory_fmt = os.path.join(*segments)
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ class DownloadJob():
|
|||||||
elif msg[0] == Message.Version:
|
elif msg[0] == Message.Version:
|
||||||
if msg[1] != 1:
|
if msg[1] != 1:
|
||||||
raise "unsupported message-version ({}, {})".format(
|
raise "unsupported message-version ({}, {})".format(
|
||||||
self.info.category, msg[1]
|
self.extractor.category, msg[1]
|
||||||
)
|
)
|
||||||
# TODO: support for multiple message versions
|
# TODO: support for multiple message versions
|
||||||
|
|
||||||
@@ -118,7 +118,7 @@ class DownloadJob():
|
|||||||
class KeywordJob():
|
class KeywordJob():
|
||||||
|
|
||||||
def __init__(self, url):
|
def __init__(self, url):
|
||||||
self.extractor, self.info = extractor.find(url)
|
self.extractor = extractor.find(url)
|
||||||
if self.extractor is None:
|
if self.extractor is None:
|
||||||
print(url, ": No extractor found", sep="", file=sys.stderr)
|
print(url, ": No extractor found", sep="", file=sys.stderr)
|
||||||
return
|
return
|
||||||
|
|||||||
Reference in New Issue
Block a user