Merge branch 'multi-extractor'

2015-11-21 04:29:38 +01:00
parent ad12180027 c0aaf230d5
commit 68e7f7427d
34 changed files with 348 additions and 419 deletions
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@@ -8,24 +8,25 @@

 """Extract image-urls from http://behoimi.org/"""

-from .booru import JSONBooruExtractor
+from . import booru

-info = {
-    "category": "3dbooru",
-    "extractor": "ThreeDeeBooruExtractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+).*",
-    ],
-}
+class ThreeDeeBooruExtractor(booru.JSONBooruExtractor):
+    """Base class for 3dbooru extractors"""
+    category = "3dbooru"
+    api_url = "http://behoimi.org/post/index.json"
+    headers = {
+        "Referer": "http://behoimi.org/post/show/",
+        "User-Agent": "Mozilla/5.0",
+    }

-class ThreeDeeBooruExtractor(JSONBooruExtractor):
+class ThreeDeeBooruTagExtractor(ThreeDeeBooruExtractor, booru.BooruTagExtractor):
+    """Extract images from 3dbooru based on search-tags"""
+    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+)"]

-    def __init__(self, match):
-        JSONBooruExtractor.__init__(self, match, info)
-        self.api_url = "http://behoimi.org/post/index.json"
-        self.headers = {
-            "Referer": "http://behoimi.org/post/show/",
-            "User-Agent": "Mozilla/5.0"
-        }
+class ThreeDeeBooruPoolExtractor(ThreeDeeBooruExtractor, booru.BooruPoolExtractor):
+    """Extract image-pools from 3dbooru"""
+    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"]
+
+class ThreeDeeBooruPostExtractor(ThreeDeeBooruExtractor, booru.BooruPostExtractor):
+    """Extract single images from 3dbooru"""
+    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"]
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -10,23 +10,14 @@

 from .chan import ChanExtractor

-info = {
-    "category": "4chan",
-    "extractor": "FourChanExtractor",
-    "directory": ["{category}", "{board}-{thread}"],
-    "filename": "{tim}-{filename}{ext}",
-    "pattern": [
-        r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+).*",
-    ],
-}
-
 class FourChanExtractor(ChanExtractor):

+    category = "4chan"
+    pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"]
    api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
    file_url = "https://i.4cdn.org/{board}/{tim}{ext}"

    def __init__(self, match):
        ChanExtractor.__init__(
-            self, info["category"],
-            match.group(1), match.group(2)
+            self, match.group(1), match.group(2)
        )
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -10,23 +10,14 @@

 from .chan import ChanExtractor

-info = {
-    "category": "8chan",
-    "extractor": "InfinityChanExtractor",
-    "directory": ["{category}", "{board}-{thread}"],
-    "filename": "{tim}-{filename}{ext}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+).*",
-    ],
-}
-
 class InfinityChanExtractor(ChanExtractor):

+    category = "8chan"
+    pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"]
    api_url = "https://8ch.net/{board}/res/{thread}.json"
    file_url = "https://8ch.net/{board}/src/{tim}{ext}"

    def __init__(self, match):
        ChanExtractor.__init__(
-            self, info["category"],
-            match.group(1), match.group(2)
+            self, match.group(1), match.group(2)
        )
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -46,11 +46,11 @@ modules = [

 def find(url):
    """Find extractor suitable for handling the given url"""
-    for pattern, module, klass in _list_patterns():
+    for pattern, klass in _list_patterns():
        match = re.match(pattern, url)
        if match:
-            return klass(match), module.info
-    return None, None
+            return klass(match)
+    return None

 # --------------------------------------------------------------------
 # internals
@@ -59,15 +59,22 @@ _cache = []
 _module_iter = iter(modules)

 def _list_patterns():
-    """Yield all available (pattern, module, klass) tuples"""
+    """Yield all available (pattern, info, class) tuples"""
    for entry in _cache:
        yield entry

    for module_name in _module_iter:
        module = importlib.import_module("."+module_name, __package__)
-        klass = getattr(module, module.info["extractor"])
-        userpatterns = config.get(("extractor", module_name, "pattern"), default=[])
-        for pattern in userpatterns + module.info["pattern"]:
-            etuple = (pattern, module, klass)
-            _cache.append(etuple)
-            yield etuple
+        for klass in _get_classes(module):
+            for pattern in klass.pattern:
+                etuple = (pattern, klass)
+                _cache.append(etuple)
+                yield etuple
+
+def _get_classes(module):
+    """Return a list of all extractor classes in a module"""
+    return [
+        klass for klass in module.__dict__.values() if (
+            hasattr(klass, "pattern") and klass.__module__ == module.__name__
+        )
+    ]
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -10,21 +10,14 @@

 from .common import AsynchronousExtractor, Message
 from .. import text, iso639_1
-import os.path
 import re

-info = {
-    "category": "batoto",
-    "extractor": "BatotoExtractor",
-    "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
-    "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)",
-    ],
-}
-
 class BatotoExtractor(AsynchronousExtractor):

+    category = "batoto"
+    directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
+    filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
    url = "https://bato.to/areader"

    def __init__(self, match):
@@ -68,7 +61,7 @@ class BatotoExtractor(AsynchronousExtractor):
        manga, pos = extr(page, "document.title = '", " - ", pos)
        match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo)
        return {
-            "category": info["category"],
+            "category": self.category,
            "token": self.token,
            "manga": manga,
            "volume": match.group(2) or "",
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -12,20 +12,20 @@ from .common import Extractor, Message
 from .. import text
 import xml.etree.ElementTree as ET
 import json
-import os.path
 import urllib.parse

 class BooruExtractor(Extractor):

+    info = {}
+    headers = {}
+    page = "page"
    api_url = ""
+    category = ""

-    def __init__(self, match, info):
+    def __init__(self):
        Extractor.__init__(self)
-        self.info = info
-        self.tags = text.unquote(match.group(1))
-        self.page = "page"
-        self.params = {"tags": self.tags}
-        self.headers = {}
+        self.params = {"limit": 50}
+        self.setup()

    def items(self):
        yield Message.Version, 1
@@ -40,6 +40,9 @@ class BooruExtractor(Extractor):
    def items_impl(self):
        pass

+    def setup(self):
+        pass
+
    def update_page(self, reset=False):
        """Update the value of the 'page' parameter"""
        # Override this method in derived classes if necessary.
@@ -51,14 +54,14 @@ class BooruExtractor(Extractor):

    def get_job_metadata(self):
        """Collect metadata for extractor-job"""
+        # Override this method in derived classes
        return {
-            "category": self.info["category"],
-            "tags": self.tags
+            "category": self.category,
        }

    def get_file_metadata(self, data):
        """Collect metadata for a downloadable file"""
-        data["category"] = self.info["category"]
+        data["category"] = self.category
        return text.nameext_from_url(self.get_file_url(data), data)

    def get_file_url(self, data):
@@ -78,10 +81,10 @@ class JSONBooruExtractor(BooruExtractor):
                self.request(self.api_url, verify=True, params=self.params,
                             headers=self.headers).text
            )
-            if len(images) == 0:
-                return
            for data in images:
                yield data
+            if len(images) < self.params["limit"]:
+                return
            self.update_page()


@@ -93,8 +96,56 @@ class XMLBooruExtractor(BooruExtractor):
            root = ET.fromstring(
                self.request(self.api_url, verify=True, params=self.params).text
            )
-            if len(root) == 0:
-                return
            for item in root:
                yield item.attrib
+            if len(root) < self.params["limit"]:
+                return
            self.update_page()
+
+
+class BooruTagExtractor(BooruExtractor):
+    """Extract images based on search-tags"""
+
+    directory_fmt = ["{category}", "{tags}"]
+    filename_fmt = "{category}_{id}_{md5}.{extension}"
+
+    def __init__(self, match):
+        BooruExtractor.__init__(self)
+        self.tags = text.unquote(match.group(1))
+        self.params["tags"] = self.tags
+
+    def get_job_metadata(self):
+        return {
+            "category": self.category,
+            "tags": self.tags,
+        }
+
+
+class BooruPoolExtractor(BooruExtractor):
+    """Extract image-pools"""
+
+    directory_fmt = ["{category}", "pool", "{pool}"]
+    filename_fmt = "{category}_{id}_{md5}.{extension}"
+
+    def __init__(self, match):
+        BooruExtractor.__init__(self)
+        self.pool = match.group(1)
+        self.params["tags"] = "pool:" + self.pool
+
+    def get_job_metadata(self):
+        return {
+            "category": self.category,
+            "pool": self.pool,
+        }
+
+
+class BooruPostExtractor(BooruExtractor):
+    """Extract single images"""
+
+    directory_fmt = ["{category}"]
+    filename_fmt = "{category}_{id}_{md5}.{extension}"
+
+    def __init__(self, match):
+        BooruExtractor.__init__(self)
+        self.post = match.group(1)
+        self.params["tags"] = "id:" + self.post
--- a/gallery_dl/extractor/chan.py
+++ b/gallery_dl/extractor/chan.py
@@ -13,13 +13,15 @@ from .. import text

 class ChanExtractor(Extractor):

+    directory_fmt = ["{category}", "{board}-{thread}"]
+    filename_fmt = "{tim}-{filename}{ext}"
    api_url = ""
    file_url = ""

-    def __init__(self, category, board, thread):
+    def __init__(self, board, thread):
        Extractor.__init__(self)
        self.metadata = {
-            "category": category,
+            "category": self.category,
            "board": board,
            "thread": thread,
        }
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -8,20 +8,21 @@

 """Extract image-urls from https://danbooru.donmai.us/"""

-from .booru import JSONBooruExtractor
+from . import booru

-info = {
-    "category": "danbooru",
-    "extractor": "DanbooruExtractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+).*",
-    ],
-}
+class DanbooruExtractor(booru.JSONBooruExtractor):
+    """Base class for danbooru extractors"""
+    category = "danbooru"
+    api_url = "https://danbooru.donmai.us/posts.json"

-class DanbooruExtractor(JSONBooruExtractor):
+class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
+    """Extract images from danbooru based on search-tags"""
+    pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"]

-    def __init__(self, match):
-        JSONBooruExtractor.__init__(self, match, info)
-        self.api_url = "https://danbooru.donmai.us/posts.json"
+class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
+    """Extract image-pools from danbooru"""
+    pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/pools/(\d+)"]
+
+class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor):
+    """Extract single images from danbooru"""
+    pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts/(\d+)"]
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -10,21 +10,15 @@

 from .common import AsynchronousExtractor, Message
 from .. import text
-import os.path
 import re

-info = {
-    "category": "deviantart",
-    "extractor": "DeviantArtExtractor",
-    "directory": ["{category}", "{artist}"],
-    "filename": "{category}_{index}_{title}.{extension}",
-    "pattern": [
-        r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*",
-    ],
-}
-
 class DeviantArtExtractor(AsynchronousExtractor):

+    category = "deviantart"
+    directory_fmt = ["{category}", "{artist}"]
+    filename_fmt = "{category}_{index}_{title}.{extension}"
+    pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"]
+
    def __init__(self, match):
        AsynchronousExtractor.__init__(self)
        self.session.cookies["agegate_state"] = "1"
@@ -57,14 +51,14 @@ class DeviantArtExtractor(AsynchronousExtractor):
    def get_job_metadata(self):
        """Collect metadata for extractor-job"""
        return {
-            "category": info["category"],
+            "category": self.category,
            "artist": self.artist,
        }

    def get_image_metadata(self, image):
        """Collect metadata for an image"""
        match = self.extract_data(image, 'title',
-            '(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
+            r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
        if image.startswith(" ismature"):
            # adult image
            url, _ = text.extract(image, 'href="', '"')
@@ -76,7 +70,7 @@ class DeviantArtExtractor(AsynchronousExtractor):
            height, pos = text.extract(page, ' height="', '"', pos)
        else:
            # normal image
-            index = self.extract_data(image, 'href', '[^"]+-(\d+)').group(1)
+            index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1)
            url, pos = text.extract(image, ' data-super-full-img="', '"', match.end())
            if url:
                width , pos = text.extract(image, ' data-super-full-width="', '"', pos)
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -8,21 +8,24 @@

 """Extract image-urls from https://e621.net/"""

-from .booru import JSONBooruExtractor
+from . import booru

-info = {
-    "category": "e621",
-    "extractor": "E621Extractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
+class E621Extractor(booru.JSONBooruExtractor):
+    """Base class for e621 extractors"""
+    category = "e621"
+    api_url = "https://e621.net/post/index.json"
+
+class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
+    """Extract images from e621 based on search-tags"""
+    pattern = [
        r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)",
-        r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+).*"
-    ],
-}
+        r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)",
+    ]

-class E621Extractor(JSONBooruExtractor):
+class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
+    """Extract image-pools from e621"""
+    pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"]

-    def __init__(self, match):
-        JSONBooruExtractor.__init__(self, match, info)
-        self.api_url = "https://e621.net/post/index.json"
+class E621PostExtractor(E621Extractor, booru.BooruPostExtractor):
+    """Extract single images from e621"""
+    pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"]
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -13,18 +13,12 @@ from .. import config, text, iso639_1
 import time
 import random

-info = {
-    "category": "exhentai",
-    "extractor": "ExhentaiExtractor",
-    "directory": ["{category}", "{gallery-id}"],
-    "filename": "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})",
-    ],
-}
-
 class ExhentaiExtractor(Extractor):

+    category = "exhentai"
+    directory_fmt = ["{category}", "{gallery-id}"]
+    filename_fmt = "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}"
+    pattern = [r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
    api_url = "http://exhentai.org/api.php"

    def __init__(self, match):
@@ -70,7 +64,7 @@ class ExhentaiExtractor(Extractor):
    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        data = {
-            "category"     : info["category"],
+            "category"     : self.category,
            "gallery-id"   : self.gid,
            "gallery-token": self.token,
        }
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -8,27 +8,19 @@

 """Extract image-urls from http://gelbooru.com/"""

-from .booru import XMLBooruExtractor
+from . import booru
 from .. import config

-info = {
-    "category": "gelbooru",
-    "extractor": "GelbooruExtractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+).*",
-    ],
-}
+class GelbooruExtractor(booru.XMLBooruExtractor):
+    """Base class for gelbooru extractors"""

-class GelbooruExtractor(XMLBooruExtractor):
+    category = "gelbooru"
+    api_url = "http://gelbooru.com/"

-    def __init__(self, match):
-        XMLBooruExtractor.__init__(self, match, info)
-        self.api_url = "http://gelbooru.com/"
-        self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}
+    def setup(self):
+        self.params.update({"page":"dapi", "s":"post", "q":"index"})
        self.session.cookies.update(
-            config.get(("extractor", info["category"], "cookies"))
+            config.get(("extractor", self.category, "cookies"))
        )

    def update_page(self, reset=False):
@@ -36,3 +28,16 @@ class GelbooruExtractor(XMLBooruExtractor):
            self.params["pid"] += 1
        else:
            self.params["pid"] = 0
+
+class GelbooruTagExtractor(GelbooruExtractor, booru.BooruTagExtractor):
+    """Extract images from gelbooru based on search-tags"""
+    pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"]
+
+# TODO: find out how to access pools via gelbooru-api
+# class GelbooruPoolExtractor(GelbooruExtractor, booru.BooruPoolExtractor):
+    # """Extract image-pools from gelbooru"""
+    # pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=pool&s=show&id=(\d+)"]
+
+class GelbooruPostExtractor(GelbooruExtractor, booru.BooruPostExtractor):
+    """Extract single images from gelbooru"""
+    pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=view&id=(\d+)"]
--- a/gallery_dl/extractor/hbrowse.py
+++ b/gallery_dl/extractor/hbrowse.py
@@ -10,20 +10,13 @@

 from .common import Extractor, Message
 from .. import text
-import os.path
-
-info = {
-    "category": "hbrowse",
-    "extractor": "HbrowseExtractor",
-    "directory": ["{category}", "{gallery-id} {title}"],
-    "filename": "{category}_{gallery-id}_{num:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)",
-    ],
-}

 class HbrowseExtractor(Extractor):

+    category = "hbrowse"
+    directory_fmt = ["{category}", "{gallery-id} {title}"]
+    filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"]
    url_base = "http://www.hbrowse.com/thumbnails/"

    def __init__(self, match):
@@ -43,7 +36,7 @@ class HbrowseExtractor(Extractor):
    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        data = {
-            "category": info["category"],
+            "category": self.category,
            'gallery-id': self.gid,
            'chapter': int(self.chapter[1:]),
        }
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -12,19 +12,15 @@ from .common import Extractor, Message
 from .. import text
 import os.path

-info = {
-    "category": "hentaifoundry",
-    "extractor": "HentaiFoundryExtractor",
-    "directory": ["{category}", "{artist}"],
-    "filename": "{category}_{index}_{title}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)",
-        r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
-    ],
-}
-
 class HentaiFoundryExtractor(Extractor):

+    category = "hentaifoundry"
+    directory_fmt = ["{category}", "{artist}"]
+    filename_fmt = "{category}_{index}_{title}.{extension}"
+    pattern = [
+        r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)",
+        r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
+    ]
    url_base = "http://www.hentai-foundry.com/pictures/user/"

    def __init__(self, match):
@@ -60,7 +56,7 @@ class HentaiFoundryExtractor(Extractor):
        token, pos = text.extract(page, 'hidden" value="', '"')
        count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
        return {
-            "category": info["category"],
+            "category": self.category,
            "artist": self.artist,
            "count": count,
        }, token
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -10,21 +10,15 @@

 from .common import Extractor, Message
 from .. import text, iso639_1
-import os.path
 import string

-info = {
-    "category": "hitomi",
-    "extractor": "HitomiExtractor",
-    "directory": ["{category}", "{gallery-id} {title}"],
-    "filename": "{category}_{gallery-id}_{num:>03}_{name}.{extension}",
-    "pattern": [
-        r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html",
-    ],
-}
-
 class HitomiExtractor(Extractor):

+    category = "hitomi"
+    directory_fmt = ["{category}", "{gallery-id} {title}"]
+    filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}"
+    pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"]
+
    def __init__(self, match):
        Extractor.__init__(self)
        self.gid = match.group(1)
@@ -62,7 +56,7 @@ class HitomiExtractor(Extractor):
            series, pos = text.extract(page, '.html">', '</a>', pos)
        lang = lang.capitalize()
        return {
-            "category": info["category"],
+            "category": self.category,
            "gallery-id": self.gid,
            "title": title,
            "artist": string.capwords(artist),
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -10,20 +10,13 @@

 from .common import AsynchronousExtractor, Message
 from .. import text
-import os.path
-
-info = {
-    "category": "imagebam",
-    "extractor": "ImagebamExtractor",
-    "directory": ["{category}", "{title} - {gallery-key}"],
-    "filename": "{num:>03}-{filename}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*",
-    ],
-}

 class ImagebamExtractor(AsynchronousExtractor):

+    category = "imagebam"
+    directory_fmt = ["{category}", "{title} - {gallery-key}"]
+    filename_fmt = "{num:>03}-{filename}"
+    pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
    url_base = "http://www.imagebam.com"

    def __init__(self, match):
@@ -47,7 +40,7 @@ class ImagebamExtractor(AsynchronousExtractor):
        response.encoding = "utf-8"
        page = response.text
        data = {
-            "category": info["category"],
+            "category": self.category,
            "gallery-key": self.gkey,
        }
        data, _ = text.extract_all(page, (
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message
 from .. import text
 import re

-info = {
-    "category": "imgbox",
-    "extractor": "ImgboxExtractor",
-    "directory": ["{category}", "{title} - {gallery-key}"],
-    "filename": "{num:>03}-{name}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)",
-    ],
-}
-
 class ImgboxExtractor(AsynchronousExtractor):

+    category = "imgbox"
+    directory_fmt = ["{category}", "{title} - {gallery-key}"]
+    filename_fmt = "{num:>03}-{name}"
+    pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)"]
    url_base = "http://imgbox.com"

    def __init__(self, match):
@@ -44,7 +38,7 @@ class ImgboxExtractor(AsynchronousExtractor):
        """Collect metadata for extractor-job"""
        match = re.search(r"<h1>(.+) \(([^ ]+) ([^ ]+) \w+\) - (\d+)", page)
        return {
-            "category": info["category"],
+            "category": self.category,
            "gallery-key": self.key,
            "title": match.group(1),
            "date": match.group(2),
@@ -62,7 +56,8 @@ class ImgboxExtractor(AsynchronousExtractor):
        ), values=data)
        return data

-    def get_file_url(self, page):
+    @staticmethod
+    def get_file_url(page):
        """Extract download-url"""
        base = "http://i.imgbox.com/"
        path, _ = text.extract(page, base, '"')
--- a/gallery_dl/extractor/imgchili.py
+++ b/gallery_dl/extractor/imgchili.py
@@ -12,18 +12,13 @@ from .common import Extractor, Message
 from .. import text
 import re

-info = {
-    "category": "imgchili",
-    "extractor": "ImgchiliExtractor",
-    "directory": ["{category}", "{title} - {key}"],
-    "filename": "{num:>03}-{name}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)",
-    ],
-}
-
 class ImgchiliExtractor(Extractor):

+    category = "imgchili"
+    directory_fmt = ["{category}", "{title} - {key}"]
+    filename_fmt = "{num:>03}-{name}"
+    pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
+
    def __init__(self, match):
        Extractor.__init__(self)
        self.match = match
--- a/gallery_dl/extractor/imgth.py
+++ b/gallery_dl/extractor/imgth.py
@@ -10,20 +10,14 @@

 from .common import Extractor, Message
 from .. import text
-import os.path
-
-info = {
-    "category": "imgth",
-    "extractor": "ImgthExtractor",
-    "directory": ["{category}", "{gallery-id} {title}"],
-    "filename": "{category}_{gallery-id}_{num:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?imgth\.com/gallery/(\d+)",
-    ],
-}

 class ImgthExtractor(Extractor):

+    category = "imgth"
+    directory_fmt = ["{category}", "{gallery-id} {title}"]
+    filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
+    pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
+
    def __init__(self, match):
        Extractor.__init__(self)
        self.gid = match.group(1)
@@ -55,7 +49,7 @@ class ImgthExtractor(Extractor):
    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        data = {
-            "category": info["category"],
+            "category": self.category,
            "gallery-id": self.gid,
        }
        data, _ = text.extract_all(page, (
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -12,18 +12,13 @@ from .common import Extractor, Message
 from .. import text
 import os.path

-info = {
-    "category": "imgur",
-    "extractor": "ImgurExtractor",
-    "directory": ["{category}", "{album-key} - {title}"],
-    "filename": "{category}_{album-key}_{num:>03}_{name}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)",
-    ],
-}
-
 class ImgurExtractor(Extractor):

+    category = "imgur"
+    directory_fmt = ["{category}", "{album-key} - {title}"]
+    filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
+
    def __init__(self, match):
        Extractor.__init__(self)
        self.album = match.group(1)
@@ -43,7 +38,7 @@ class ImgurExtractor(Extractor):
        """Collect metadata for extractor-job"""
        page = self.request("https://imgur.com/a/" + self.album).text
        data = {
-            "category": info["category"],
+            "category": self.category,
            "album-key": self.album,
        }
        return text.extract_all(page, (
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -10,21 +10,15 @@

 from .common import Extractor, Message
 from .. import text, cloudflare
-import os.path
 import re

-info = {
-    "category": "kissmanga",
-    "extractor": "KissmangaExtractor",
-    "directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"],
-    "filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+",
-    ],
-}
-
 class KissmangaExtractor(Extractor):

+    category = "kissmanga"
+    directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
+    filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"]
+
    def __init__(self, match):
        Extractor.__init__(self)
        self.url = match.group(0)
@@ -41,8 +35,7 @@ class KissmangaExtractor(Extractor):
            data["page"] = num
            yield Message.Url, url, text.nameext_from_url(url, data)

-    @staticmethod
-    def get_job_metadata(page):
+    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        manga, pos = text.extract(page, "Read manga\n", "\n")
        cinfo, pos = text.extract(page, "", "\n", pos)
@@ -50,7 +43,7 @@ class KissmangaExtractor(Extractor):
            r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
        chminor = match.group(3)
        return {
-            "category": info["category"],
+            "category": self.category,
            "manga": manga,
            "volume": match.group(1) or "",
            "chapter": match.group(2),
--- a/gallery_dl/extractor/konachan.py
+++ b/gallery_dl/extractor/konachan.py
@@ -8,20 +8,21 @@

 """Extract image-urls from https://konachan.com/"""

-from .booru import JSONBooruExtractor
+from . import booru

-info = {
-    "category": "konachan",
-    "extractor": "KonachanExtractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+).*",
-    ],
-}
+class KonachanExtractor(booru.JSONBooruExtractor):
+    """Base class for konachan extractors"""
+    category = "konachan"
+    api_url = "https://konachan.com/post.json"

-class KonachanExtractor(JSONBooruExtractor):
+class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
+    """Extract images from konachan based on search-tags"""
+    pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"]

-    def __init__(self, match):
-        JSONBooruExtractor.__init__(self, match, info)
-        self.api_url = "http://konachan.com/post.json"
+class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
+    """Extract image-pools from konachan"""
+    pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"]
+
+class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor):
+    """Extract single images from konachan"""
+    pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"]
--- a/gallery_dl/extractor/mangapanda.py
+++ b/gallery_dl/extractor/mangapanda.py
@@ -10,18 +10,13 @@

 from .mangareader import MangaReaderExtractor

-info = {
-    "category": "mangapanda",
-    "extractor": "MangaPandaExtractor",
-    "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
-    "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
-        r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
-    ],
-}
-
 class MangaPandaExtractor(MangaReaderExtractor):

-    category = info["category"]
+    category = "mangapanda"
+    directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
+    filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
+    pattern = [
+        r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
+        r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
+    ]
    url_base = "http://www.mangapanda.com"
--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@@ -10,22 +10,16 @@

 from .common import AsynchronousExtractor, Message
 from .. import text
-import os.path
-
-info = {
-    "category": "mangareader",
-    "extractor": "MangaReaderExtractor",
-    "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
-    "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
-        r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
-    ],
-}

 class MangaReaderExtractor(AsynchronousExtractor):

-    category = info["category"]
+    category = "mangareader"
+    directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
+    filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
+    pattern = [
+        r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
+        r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
+    ]
    url_base = "http://www.mangareader.net"

    def __init__(self, match):
--- a/gallery_dl/extractor/mangashare.py
+++ b/gallery_dl/extractor/mangashare.py
@@ -10,20 +10,13 @@

 from .common import AsynchronousExtractor, Message
 from .. import text
-import os
-
-info = {
-    "category": "mangashare",
-    "extractor": "MangaShareExtractor",
-    "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
-    "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)",
-    ],
-}

 class MangaShareExtractor(AsynchronousExtractor):

+    category = "mangashare"
+    directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
+    filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
+    pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"]
    url_fmt = "http://read.mangashare.com/{}/page{:>03}.html"

    def __init__(self, match):
@@ -40,11 +33,10 @@ class MangaShareExtractor(AsynchronousExtractor):
            text.nameext_from_url(url, data)
            yield Message.Url, url, data.copy()

-    @staticmethod
-    def get_job_metadata(page):
+    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        data = {
-            "category": info["category"],
+            "category": self.category,
            "lang": "en",
            "language": "English",
        }
--- a/gallery_dl/extractor/mangastream.py
+++ b/gallery_dl/extractor/mangastream.py
@@ -10,20 +10,13 @@

 from .common import AsynchronousExtractor, Message
 from .. import text
-import os.path
-
-info = {
-    "category": "mangastream",
-    "extractor": "MangaStreamExtractor",
-    "directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"],
-    "filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))",
-    ],
-}

 class MangaStreamExtractor(AsynchronousExtractor):

+    category = "mangastream"
+    directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
+    filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))"]
    url_base = "https://readms.com/r/"

    def __init__(self, match):
@@ -47,7 +40,7 @@ class MangaStreamExtractor(AsynchronousExtractor):
    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        data = {
-            "category": info["category"],
+            "category": self.category,
            "chapter": self.chapter,
            "chapter-minor": self.ch_minor,
            "chapter-id": self.ch_id,
@@ -61,7 +54,8 @@ class MangaStreamExtractor(AsynchronousExtractor):
        ), values=data)
        return data

-    def get_page_metadata(self, page):
+    @staticmethod
+    def get_page_metadata(page):
        """Collect next url, image-url and metadata for one manga-page"""
        nurl, pos = text.extract(page, '<div class="page">\n<a href="', '"')
        iurl, pos = text.extract(page, '<img id="manga-page" src="', '"', pos)
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -12,18 +12,13 @@ from .common import Extractor, Message
 from .. import text
 import json

-info = {
-    "category": "nhentai",
-    "extractor": "NhentaiExtractor",
-    "directory": ["{category}", "{gallery-id} {title}"],
-    "filename": "{category}_{gallery-id}_{num:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)",
-    ],
-}
-
 class NhentaiExtractor(Extractor):

+    category = "nhentai"
+    directory_fmt = ["{category}", "{gallery-id} {title}"]
+    filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
+
    def __init__(self, match):
        Extractor.__init__(self)
        self.gid = match.group(1)
@@ -57,7 +52,7 @@ class NhentaiExtractor(Extractor):
        title_en = ginfo["title"].get("english", "")
        title_ja = ginfo["title"].get("japanese", "")
        return {
-            "category": info["category"],
+            "category": self.category,
            "gallery-id": self.gid,
            "upload-date": ginfo["upload_date"],
            "media-id": ginfo["media_id"],
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message
 from .. import config, text
 import re

-info = {
-    "category": "nijie",
-    "extractor": "NijieExtractor",
-    "directory": ["{category}", "{artist-id}"],
-    "filename": "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)",
-    ],
-}
-
 class NijieExtractor(AsynchronousExtractor):

+    category = "nijie"
+    directory_fmt = ["{category}", "{artist-id}"]
+    filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"]
    popup_url = "https://nijie.info/view_popup.php?id="

    def __init__(self, match):
@@ -37,7 +31,7 @@ class NijieExtractor(AsynchronousExtractor):
        self.session.cookies["R18"] = "1"
        self.session.cookies["nijie_referer"] = "nijie.info"
        self.session.cookies.update(
-            config.get(("extractor", info["category"], "cookies"))
+            config.get(("extractor", self.category, "cookies"))
        )

    def items(self):
@@ -52,7 +46,7 @@ class NijieExtractor(AsynchronousExtractor):
    def get_job_metadata(self):
        """Collect metadata for extractor-job"""
        return {
-            "category": info["category"],
+            "category": self.category,
            "artist-id": self.artist_id,
        }

--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -14,19 +14,12 @@ import re
 import json
 import time

-info = {
-    "category": "pixiv",
-    "extractor": "PixivExtractor",
-    "directory": ["{category}", "{artist-id}-{artist-nick}"],
-    "filename": "{category}_{artist-id}_{id}{num}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)",
-    ],
-}
-
-
 class PixivExtractor(Extractor):

+    category = "pixiv"
+    directory_fmt = ["{category}", "{artist-id}-{artist-nick}"]
+    filename_fmt = "{category}_{artist-id}_{id}{num}.{extension}"
+    pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)"]
    member_url = "http://www.pixiv.net/member_illust.php"
    illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"

@@ -121,7 +114,7 @@ class PixivExtractor(Extractor):
        """Collect metadata for extractor-job"""
        data = self.api.user(self.artist_id)["response"][0]
        return {
-            "category": info["category"],
+            "category": self.category,
            "artist-id": self.artist_id,
            "artist-name": data["name"],
            "artist-nick": data["account"],
--- a/gallery_dl/extractor/safebooru.py
+++ b/gallery_dl/extractor/safebooru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2014, 2015 Mike Fährmann
+# Copyright 2015 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -8,27 +8,27 @@

 """Extract image-urls from http://safebooru.org/"""

-from .booru import XMLBooruExtractor
+from . import booru

-info = {
-    "category": "safebooru",
-    "extractor": "SafebooruExtractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=list&tags=([^&]+).*",
-    ],
-}
+class SafebooruExtractor(booru.XMLBooruExtractor):
+    """Base class for safebooru extractors"""

-class SafebooruExtractor(XMLBooruExtractor):
+    category = "safebooru"
+    api_url = "http://safebooru.org/index.php"

-    def __init__(self, match):
-        XMLBooruExtractor.__init__(self, match, info)
-        self.api_url = "http://safebooru.org/index.php"
-        self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}
+    def setup(self):
+        self.params.update({"page":"dapi", "s":"post", "q":"index"})

    def update_page(self, reset=False):
        if reset is False:
            self.params["pid"] += 1
        else:
            self.params["pid"] = 0
+
+class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
+    """Extract images from safebooru based on search-tags"""
+    pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"]
+
+class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
+    """Extract single images from safebooru"""
+    pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=view&id=(\d+)"]
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -10,20 +10,13 @@

 from .common import AsynchronousExtractor, Message
 from .. import text
-import os.path
-
-info = {
-    "category": "sankaku",
-    "extractor": "SankakuExtractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)",
-    ],
-}

 class SankakuExtractor(AsynchronousExtractor):

+    category = "sankaku"
+    directory_fmt = ["{category}", "{tags}"]
+    filename_fmt = "{category}_{id}_{md5}.{extension}"
+    pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)"]
    url = "https://chan.sankakucomplex.com/"

    def __init__(self, match):
@@ -45,7 +38,7 @@ class SankakuExtractor(AsynchronousExtractor):
    def get_job_metadata(self):
        """Collect metadata for extractor-job"""
        return {
-            "category": info["category"],
+            "category": self.category,
            "tags": self.tags,
        }

--- a/gallery_dl/extractor/spectrumnexus.py
+++ b/gallery_dl/extractor/spectrumnexus.py
@@ -10,21 +10,17 @@

 from .common import AsynchronousExtractor, Message
 from .. import text
-import os.path
-
-info = {
-    "category": "spectrumnexus",
-    "extractor": "SpectrumNexusExtractor",
-    "directory": ["{category}", "{manga}", "c{chapter:>03}"],
-    "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)",
-        r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)",
-    ],
-}

 class SpectrumNexusExtractor(AsynchronousExtractor):

+    category = "spectrumnexus"
+    directory_fmt = ["{category}", "{manga}", "c{chapter:>03}"]
+    filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
+    pattern = [
+        r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)",
+        r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)",
+    ]
+
    def __init__(self, match):
        AsynchronousExtractor.__init__(self)
        self.url = "http://" + match.group(1)
@@ -52,7 +48,7 @@ class SpectrumNexusExtractor(AsynchronousExtractor):
    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        data = {
-            "category": info["category"],
+            "category": self.category,
            "chapter": self.chapter,
        }
        return text.extract_all(page, (
--- a/gallery_dl/extractor/yandere.py
+++ b/gallery_dl/extractor/yandere.py
@@ -8,20 +8,21 @@

 """Extract image-urls from https://yande.re/"""

-from .booru import JSONBooruExtractor
+from . import booru

-info = {
-    "category": "yandere",
-    "extractor": "YandereExtractor",
-    "directory": ["{category}", "{tags}"],
-    "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+).*",
-    ],
-}
+class YandereExtractor(booru.JSONBooruExtractor):
+    """Base class for yandere extractors"""
+    category = "yandere"
+    api_url = "https://yande.re/post.json"

-class YandereExtractor(JSONBooruExtractor):
+class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
+    """Extract images from yandere based on search-tags"""
+    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"]

-    def __init__(self, match):
-        JSONBooruExtractor.__init__(self, match, info)
-        self.api_url = "https://yande.re/post.json"
+class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
+    """Extract image-pools from yandere"""
+    pattern = [r"(?:https?://)?(?:www\.)?yande.re/pool/show/(\d+)"]
+
+class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
+    """Extract single images from yandere"""
+    pattern = [r"(?:https?://)?(?:www\.)?yande.re/post/show/(\d+)"]
--- a/gallery_dl/jobs.py
+++ b/gallery_dl/jobs.py
@@ -14,19 +14,19 @@ from .extractor.common import Message
 class DownloadJob():

    def __init__(self, url):
-        self.extractor, self.info = extractor.find(url)
+        self.extractor = extractor.find(url)
        if self.extractor is None:
            print(url, ": No extractor found", sep="", file=sys.stderr)
            return
        self.directory = self.get_base_directory()
        self.downloaders = {}
        self.filename_fmt = config.get(
-            ("extractor", self.info["category"], "filename"),
-            default=self.info["filename"]
+            ("extractor", self.extractor.category, "filename"),
+            default=self.extractor.filename_fmt
        )
        segments = config.get(
-            ("extractor", self.info["category"], "directory"),
-            default=self.info["directory"]
+            ("extractor", self.extractor.category, "directory"),
+            default=self.extractor.directory_fmt
        )
        self.directory_fmt = os.path.join(*segments)

@@ -51,7 +51,7 @@ class DownloadJob():
            elif msg[0] == Message.Version:
                if msg[1] != 1:
                    raise "unsupported message-version ({}, {})".format(
-                        self.info.category, msg[1]
+                        self.extractor.category, msg[1]
                    )
                # TODO: support for multiple message versions

@@ -118,7 +118,7 @@ class DownloadJob():
 class KeywordJob():

    def __init__(self, url):
-        self.extractor, self.info = extractor.find(url)
+        self.extractor = extractor.find(url)
        if self.extractor is None:
            print(url, ": No extractor found", sep="", file=sys.stderr)
            return