[booru] rewrite using Mixin classes (#59)

- improved code structure - improved URL patterns - better pagination to work around page limits on - Danbooru - e621 - 3dbooru
2018-01-03 23:52:01 +01:00
parent 0876541e43
commit 9e8a84ab6c
8 changed files with 298 additions and 224 deletions
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,57 +11,65 @@
 from . import booru


-class ThreedeebooruExtractor(booru.JSONBooruExtractor):
+class ThreedeebooruExtractor(booru.JsonParserMixin,
+                             booru.MoebooruPageMixin,
+                             booru.BooruExtractor):
    """Base class for 3dbooru extractors"""
    category = "3dbooru"
    api_url = "http://behoimi.org/post/index.json"
-    headers = {
-        "Referer": "http://behoimi.org/post/show/",
-        "Accept-Encoding": "identity",
-    }
+    page_limit = 1000
+
+    def __init__(self, match):
+        super().__init__(match)
+        self.session.headers.update({
+            "Referer": "http://behoimi.org/post/show/",
+            "Accept-Encoding": "identity",
+        })


-class ThreedeebooruTagExtractor(ThreedeebooruExtractor,
-                                booru.BooruTagExtractor):
+class ThreedeebooruTagExtractor(booru.TagMixin,
+                                ThreedeebooruExtractor):
    """Extractor for images from behoimi.org based on search-tags"""
    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post"
-               r"(?:/(?:index)?)?\?tags=([^&]+)"]
+               r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)"]
    test = [("http://behoimi.org/post?tags=himekawa_azuru+dress", {
        "url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
        "content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
    })]


-class ThreedeebooruPoolExtractor(ThreedeebooruExtractor,
-                                 booru.BooruPoolExtractor):
+class ThreedeebooruPoolExtractor(booru.PoolMixin,
+                                 ThreedeebooruExtractor):
    """Extractor for image-pools from behoimi.org"""
-    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"]
+    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"]
    test = [("http://behoimi.org/pool/show/27", {
        "url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
        "content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
    })]


-class ThreedeebooruPostExtractor(ThreedeebooruExtractor,
-                                 booru.BooruPostExtractor):
+class ThreedeebooruPostExtractor(booru.PostMixin,
+                                 ThreedeebooruExtractor):
    """Extractor for single images from behoimi.org"""
-    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"]
+    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"]
    test = [("http://behoimi.org/post/show/140852", {
        "url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
        "content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4",
    })]


-class ThreedeebooruPopularExtractor(ThreedeebooruExtractor,
-                                    booru.BooruPopularExtractor):
+class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin,
+                                    ThreedeebooruExtractor):
    """Extractor for popular images from behoimi.org"""
-    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/popular_"
-               r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
+    pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org"
+               r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
+               r"(?:\?(?P<query>[^#]*))?"]
    test = [("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
        "url": "a447e115fdab60c25ab71c4fdb1b9f509bc23f99",
        "count": 20,
    })]

-    @property
-    def api_url(self):
-        return "http://behoimi.org/post/popular_" + self.scale + ".json"
+    def __init__(self, match):
+        super().__init__(match)
+        self.api_url = "http://behoimi.org/post/popular_{scale}.json".format(
+            scale=self.scale)
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -10,8 +10,8 @@

 from .common import SharedConfigExtractor, Message
 from .. import text
-import xml.etree.ElementTree as ET
-import urllib.parse
+from urllib.parse import urljoin
+from xml.etree import ElementTree
 import datetime
 import operator

@@ -20,166 +20,158 @@ class BooruExtractor(SharedConfigExtractor):
    """Base class for all booru extractors"""
    basecategory = "booru"
    filename_fmt = "{category}_{id}_{md5}.{extension}"
-    headers = {}
-    pagestart = 1
-    pagekey = "page"
    api_url = ""
+    per_page = 50
+    page_start = 1
+    page_limit = None
+    sort = False

-    def __init__(self):
-        SharedConfigExtractor.__init__(self)
-        self.session.headers.update(self.headers)
-        self.params = {"limit": 50}
-        self.setup()
+    def __init__(self, match):
+        super().__init__()
+        self.params = {}
+
+    def skip(self, num):
+        pages = num // self.per_page
+        if self.page_limit and pages + self.page_start > self.page_limit:
+            pages = self.page_limit - self.page_start
+        self.page_start += pages
+        return pages * self.per_page

    def items(self):
        yield Message.Version, 1
-        yield Message.Directory, self.get_job_metadata()
-        for data in self.items_impl():
-            try:
-                url = self.get_file_url(data)
-                data = self.get_file_metadata(data)
-                yield Message.Url, url, data
-            except KeyError:
-                continue
+        yield Message.Directory, self.get_metadata()

-    def skip(self, num):
-        limit = self.params["limit"]
-        pages = num // limit
-        self.pagestart += pages
-        return pages * limit
+        self.reset_page()
+        while True:
+            images, count = self.parse_response(
+                self.request(self.api_url, params=self.params))

-    def items_impl(self):
-        pass
+            for data in images:
+                try:
+                    url = data["file_url"]
+                    if url.startswith("/"):
+                        url = urljoin(self.api_url, url)
+                    yield Message.Url, url, text.nameext_from_url(url, data)
+                except KeyError:
+                    continue

-    def setup(self):
-        pass
+            if count < self.per_page:
+                return
+            self.update_page(data)

-    def update_page(self, reset=False):
-        """Update the value of the 'page' parameter"""
-        # Override this method in derived classes if necessary.
-        # It is usually enough to just adjust the 'page' attribute
-        if reset is False:
-            self.params[self.pagekey] += 1
-        else:
-            self.params[self.pagekey] = self.pagestart
+    def reset_page(self):
+        """Initialize params to point to the first page"""
+        self.params["page"] = self.page_start

-    def get_job_metadata(self):
+    def update_page(self, data):
+        """Update params to point to the next page"""
+
+    def get_metadata(self):
        """Collect metadata for extractor-job"""
-        # Override this method in derived classes
-        return {}
-
-    def get_file_metadata(self, data):
-        """Collect metadata for a downloadable file"""
-        return text.nameext_from_url(self.get_file_url(data), data)
-
-    def get_file_url(self, data):
-        """Extract download-url from 'data'"""
-        url = data["file_url"]
-        if url.startswith("/"):
-            url = urllib.parse.urljoin(self.api_url, url)
-        return url


-class JSONBooruExtractor(BooruExtractor):
-    """Base class for JSON based API responses"""
+class JsonParserMixin():
+    """Class for JSON based API responses"""
    sort = False

-    def items_impl(self):
-        self.update_page(reset=True)
-        while True:
-            images = self.request(self.api_url, params=self.params).json()
-            if self.sort:
-                images.sort(key=operator.itemgetter("score", "id"),
-                            reverse=True)
-            yield from images
-            if len(images) < self.params["limit"]:
-                return
-            self.update_page()
+    def parse_response(self, response):
+        images = response.json()
+        if self.sort:
+            images.sort(key=operator.itemgetter("score", "id"),
+                        reverse=True)
+        return images, len(images)


-class XMLBooruExtractor(BooruExtractor):
-    """Base class for XML based API responses"""
-    def items_impl(self):
-        self.update_page(reset=True)
-        while True:
-            root = ET.fromstring(
-                self.request(self.api_url, params=self.params).text
-            )
-            for item in root:
-                yield item.attrib
-            if len(root) < self.params["limit"]:
-                return
-            self.update_page()
+class XmlParserMixin():
+    """Class for XML based API responses"""
+    def parse_response(self, response):
+        root = ElementTree.fromstring(response.text)
+        return map(lambda x: x.attrib, root), len(root)


-class BooruTagExtractor(BooruExtractor):
-    """Extractor for images based on search-tags"""
+class DanbooruPageMixin():
+    """Pagination for Danbooru v2"""
+    def update_page(self, data):
+        self.params["page"] = "b{}".format(data["id"])
+
+
+class MoebooruPageMixin():
+    """Pagination for Moebooru and Danbooru v1"""
+    def update_page(self, data):
+        print("update:", self.params)
+        if self.page_limit:
+            self.params["page"] = None
+            self.params["before_id"] = data["id"]
+        else:
+            self.params["page"] += 1
+
+
+class GelbooruPageMixin():
+    """Pagination for Gelbooru-like sites"""
+    def reset_page(self):
+        self.params["pid"] = self.page_start - 1
+
+    def update_page(self, data):
+        self.params["pid"] += 1
+
+
+class TagMixin():
+    """Extraction of images based on search-tags"""
    subcategory = "tag"
    directory_fmt = ["{category}", "{tags}"]

    def __init__(self, match):
-        BooruExtractor.__init__(self)
-        self.tags = text.unquote(match.group(1).replace("+", " "))
+        super().__init__(match)
+        self.tags = text.unquote(match.group("tags").replace("+", " "))
        self.params["tags"] = self.tags
+        self.params["limit"] = self.per_page

-    def get_job_metadata(self):
+    def get_metadata(self):
        return {"tags": self.tags}


-class BooruPoolExtractor(BooruExtractor):
-    """Extractor for image-pools"""
+class PoolMixin():
+    """Extraction of image-pools"""
    subcategory = "pool"
    directory_fmt = ["{category}", "pool", "{pool}"]

    def __init__(self, match):
-        BooruExtractor.__init__(self)
-        self.pool = match.group(1)
+        super().__init__(match)
+        self.pool = match.group("pool")
        self.params["tags"] = "pool:" + self.pool
+        self.params["limit"] = self.per_page

-    def get_job_metadata(self):
+    def get_metadata(self):
        return {"pool": self.pool}


-class BooruPostExtractor(BooruExtractor):
-    """Extractor for single images"""
+class PostMixin():
+    """Extraction of a single image-post"""
    subcategory = "post"

    def __init__(self, match):
-        BooruExtractor.__init__(self)
-        self.post = match.group(1)
+        super().__init__(match)
+        self.post = match.group("post")
        self.params["tags"] = "id:" + self.post

+    def get_metadata(self):
+        return {}

-class BooruPopularExtractor(BooruExtractor):
-    """Extractor for popular images"""
+
+class PopularMixin():
+    """Extraction and metadata handling for Danbooru v2"""
    subcategory = "popular"
    directory_fmt = ["{category}", "popular", "{scale}", "{date}"]
+    page_start = None

    def __init__(self, match):
-        BooruExtractor.__init__(self)
+        super().__init__(match)
        self.sort = True
-        self.scale = match.group(1)
-        self.params.update(text.parse_query(match.group(2)))
+        self.params.update(text.parse_query(match.group("query")))

-    def get_job_metadata(self, fmt="%Y-%m-%d"):
-        if "scale" in self.params:
-            scale = self.params["scale"]
-        elif self.scale:
-            scale = self.scale
-            if scale.startswith("by_"):
-                scale = scale[3:]
-        else:
-            scale = "day"
-
-        if "date" in self.params:
-            date = self.params["date"][:10]
-        elif "year" in self.params:
-            date = "{:>04}-{:>02}-{:>02}".format(
-                self.params["year"],
-                self.params.get("month", "01"),
-                self.params.get("day", "01"))
-        else:
-            date = datetime.datetime.utcnow().strftime(fmt)
+    def get_metadata(self, fmt="%Y-%m-%d"):
+        date = self.get_date() or datetime.datetime.utcnow().strftime(fmt)
+        scale = self.get_scale() or "day"

        if scale == "week":
            dt = datetime.datetime.strptime(date, fmt)
@@ -189,3 +181,33 @@ class BooruPopularExtractor(BooruExtractor):
            date = date[:-3]

        return {"date": date, "scale": scale}
+
+    def get_scale(self):
+        if "scale" in self.params:
+            return self.params["scale"]
+        return None
+
+    def get_date(self):
+        if "date" in self.params:
+            return self.params["date"][:10]
+        return None
+
+
+class MoebooruPopularMixin(PopularMixin):
+    """Extraction and metadata handling for Moebooru and Danbooru v1"""
+    def __init__(self, match):
+        super().__init__(match)
+        self.scale = match.group("scale")
+
+    def get_date(self):
+        if "year" in self.params:
+            return "{:>04}-{:>02}-{:>02}".format(
+                self.params["year"],
+                self.params.get("month", "01"),
+                self.params.get("day", "01"))
+        return None
+
+    def get_scale(self):
+        if self.scale and self.scale.startswith("by_"):
+            return self.scale[3:]
+        return self.scale
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2014-2017 Mike Fährmann
+# Copyright 2014-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,16 +11,19 @@
 from . import booru


-class DanbooruExtractor(booru.JSONBooruExtractor):
+class DanbooruExtractor(booru.JsonParserMixin,
+                        booru.DanbooruPageMixin,
+                        booru.BooruExtractor):
    """Base class for danbooru extractors"""
    category = "danbooru"
    api_url = "https://danbooru.donmai.us/posts.json"
+    page_limit = 1000


-class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
+class DanbooruTagExtractor(booru.TagMixin, DanbooruExtractor):
    """Extractor for images from danbooru based on search-tags"""
    pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
-               r"/posts\?(?:[^&#]*&)*tags=([^&#]+)"]
+               r"/posts\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
    test = [
        ("https://danbooru.donmai.us/posts?tags=bonocho", {
            "content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
@@ -30,28 +33,28 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
    ]


-class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
+class DanbooruPoolExtractor(booru.PoolMixin, DanbooruExtractor):
    """Extractor for image-pools from danbooru"""
    pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
-               r"/pools/(\d+)"]
+               r"/pools/(?P<pool>\d+)"]
    test = [("https://danbooru.donmai.us/pools/7659", {
        "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
    })]


-class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor):
+class DanbooruPostExtractor(booru.PostMixin, DanbooruExtractor):
    """Extractor for single images from danbooru"""
    pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
-               r"/posts/(\d+)"]
+               r"/posts/(?P<post>\d+)"]
    test = [("https://danbooru.donmai.us/posts/294929", {
        "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
    })]


-class DanbooruPopularExtractor(DanbooruExtractor, booru.BooruPopularExtractor):
+class DanbooruPopularExtractor(booru.PopularMixin, DanbooruExtractor):
    """Extractor for popular images from danbooru"""
    pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us"
-               r"/explore/posts/popular()(?:\?([^#]*))?"]
+               r"/explore/posts/popular(?:\?(?P<query>[^#]*))?"]
    test = [
        ("https://danbooru.donmai.us/explore/posts/popular", None),
        (("https://danbooru.donmai.us/explore/posts/popular"
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2014-2017 Mike Fährmann
+# Copyright 2014-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,50 +11,58 @@
 from . import booru


-class E621Extractor(booru.JSONBooruExtractor):
+class E621Extractor(booru.JsonParserMixin,
+                    booru.MoebooruPageMixin,
+                    booru.BooruExtractor):
    """Base class for e621 extractors"""
    category = "e621"
    api_url = "https://e621.net/post/index.json"
+    page_limit = 750


-class E621TagExtractor(E621Extractor, booru.BooruTagExtractor):
+class E621TagExtractor(booru.TagMixin, E621Extractor):
    """Extractor for images from e621.net based on search-tags"""
    pattern = [
-        r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)",
-        r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)",
+        r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/(?P<tags>[^/?&#]+)",
+        r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=(?P<tags>[^&#]+)",
+    ]
+    test = [
+        ("https://e621.net/post/index/1/anry", {
+            "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
+            "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
+        }),
+        ("https://e621.net/post?tags=anry", None),
    ]
-    test = [("https://e621.net/post/index/1/anry", {
-        "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
-        "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
-    })]


-class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor):
+class E621PoolExtractor(booru.PoolMixin, E621Extractor):
    """Extractor for image-pools from e621.net"""
-    pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"]
+    pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(?P<pool>\d+)"]
    test = [("https://e621.net/pool/show/73", {
        "url": "842f2fb065c7c339486a9b1d689020b8569888ed",
        "content": "c2c87b7a9150509496cddc75ccab08109922876a",
    })]


-class E621PostExtractor(E621Extractor, booru.BooruPostExtractor):
+class E621PostExtractor(booru.PostMixin, E621Extractor):
    """Extractor for single images from e621.net"""
-    pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"]
+    pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(?P<post>\d+)"]
    test = [("https://e621.net/post/show/535", {
        "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
        "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
    })]


-class E621PopularExtractor(E621Extractor, booru.BooruPopularExtractor):
+class E621PopularExtractor(booru.MoebooruPopularMixin, E621Extractor):
    """Extractor for popular images from 621.net"""
-    pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/popular_by_"
-               r"(day|week|month)(?:\?([^#]*))?"]
+    pattern = [r"(?:https?://)?(?:www\.)?e621\.net"
+               r"/post/popular_by_(?P<scale>day|week|month)"
+               r"(?:\?(?P<query>[^#]*))?"]
    test = [("https://e621.net/post/popular_by_month?month=6&year=2013", {
        "count": 32,
    })]

-    @property
-    def api_url(self):
-        return "https://e621.net/post/popular_by_" + self.scale + ".json"
+    def __init__(self, match):
+        super().__init__(match)
+        self.api_url = "https://e621.net/post/popular_by_{scale}.json".format(
+            scale=self.scale)
--- a/gallery_dl/extractor/konachan.py
+++ b/gallery_dl/extractor/konachan.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,44 +11,69 @@
 from . import booru


-class KonachanExtractor(booru.JSONBooruExtractor):
+class KonachanExtractor(booru.JsonParserMixin,
+                        booru.MoebooruPageMixin,
+                        booru.BooruExtractor):
    """Base class for konachan extractors"""
    category = "konachan"
-    api_url = "https://konachan.com/post.json"
+
+    def __init__(self, match):
+        super().__init__(match)
+        self.api_url = "https://konachan.{tld}/post.json".format(
+            tld=match.group("tld"))


-class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor):
+class KonachanTagExtractor(booru.TagMixin, KonachanExtractor):
    """Extractor for images from konachan.com based on search-tags"""
-    pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"]
-    test = [("http://konachan.com/post?tags=patata", {
-        "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
-    })]
+    pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
+               r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
+    test = [
+        ("http://konachan.com/post?tags=patata", {
+            "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
+        }),
+        ("http://konachan.net/post?tags=patata", None),
+    ]


-class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor):
+class KonachanPoolExtractor(booru.PoolMixin, KonachanExtractor):
    """Extractor for image-pools from konachan.com"""
-    pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"]
-    test = [("http://konachan.com/pool/show/95", {
-        "content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
-    })]
+    pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
+               r"/pool/show/(?P<pool>\d+)"]
+    test = [
+        ("http://konachan.com/pool/show/95", {
+            "content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
+        }),
+        ("http://konachan.net/pool/show/95", None),
+    ]


-class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor):
+class KonachanPostExtractor(booru.PostMixin, KonachanExtractor):
    """Extractor for single images from konachan.com"""
-    pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"]
-    test = [("http://konachan.com/post/show/205189", {
-        "content": "674e75a753df82f5ad80803f575818b8e46e4b65",
-    })]
+    pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
+               r"/post/show/(?P<post>\d+)"]
+    test = [
+        ("http://konachan.com/post/show/205189", {
+            "content": "674e75a753df82f5ad80803f575818b8e46e4b65",
+        }),
+        ("http://konachan.com/post/show/205189", None),
+    ]


-class KonachanPopularExtractor(KonachanExtractor, booru.BooruPopularExtractor):
+class KonachanPopularExtractor(booru.MoebooruPopularMixin, KonachanExtractor):
    """Extractor for popular images from konachan.com"""
-    pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/popular_"
-               r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
-    test = [("https://konachan.com/post/popular_by_month?month=11&year=2010", {
-        "count": 20,
-    })]
+    pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?P<tld>com|net)"
+               r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
+               r"(?:\?(?P<query>[^#]*))?"]
+    test = [
+        ("https://konachan.com/post/popular_by_month?month=11&year=2010", {
+            "count": 20,
+        }),
+        ("https://konachan.com/post/popular_recent", None),
+        ("https://konachan.net/post/popular_recent", None),
+    ]

-    @property
-    def api_url(self):
-        return "https://konachan.com/post/popular_" + self.scale + ".json"
+    def __init__(self, match):
+        super().__init__(match)
+        self.api_url = (
+            "https://konachan.{tld}/post/popular_{scale}.json".format(
+                tld=match.group("tld"), scale=self.scale))
--- a/gallery_dl/extractor/rule34.py
+++ b/gallery_dl/extractor/rule34.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2016-2017 Mike Fährmann
+# Copyright 2016-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,31 +11,33 @@
 from . import booru


-class Rule34Extractor(booru.XMLBooruExtractor):
+class Rule34Extractor(booru.XmlParserMixin,
+                      booru.GelbooruPageMixin,
+                      booru.BooruExtractor):
    """Base class for rule34 extractors"""
    category = "rule34"
    api_url = "https://rule34.xxx/index.php"
-    pagestart = 0
-    pagekey = "pid"
+    page_limit = 4000

-    def setup(self):
+    def __init__(self, match):
+        super().__init__(match)
        self.params.update({"page": "dapi", "s": "post", "q": "index"})


-class Rule34TagExtractor(Rule34Extractor, booru.BooruTagExtractor):
+class Rule34TagExtractor(booru.TagMixin, Rule34Extractor):
    """Extractor for images from rule34.xxx based on search-tags"""
    pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
-                r"\?page=post&s=list&tags=([^&]+)")]
+                r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")]
    test = [("http://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
        "url": "104094495973edfe7e764c8f2dd42017163322aa",
        "content": "a01768c6f86f32eb7ebbdeb87c30b0d9968d7f97",
    })]


-class Rule34PostExtractor(Rule34Extractor, booru.BooruPostExtractor):
+class Rule34PostExtractor(booru.PostMixin, Rule34Extractor):
    """Extractor for single images from rule34.xxx"""
    pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?"
-                r"\?page=post&s=view&id=(\d+)")]
+                r"\?page=post&s=view&id=(?P<post>\d+)")]
    test = [("http://rule34.xxx/index.php?page=post&s=view&id=1974854", {
        "url": "3b1f9817785868d1cd94d5376d20478eed591965",
        "content": "fd2820df78fb937532da0a46f7af6cefc4dc94be",
--- a/gallery_dl/extractor/safebooru.py
+++ b/gallery_dl/extractor/safebooru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,31 +11,32 @@
 from . import booru


-class SafebooruExtractor(booru.XMLBooruExtractor):
+class SafebooruExtractor(booru.XmlParserMixin,
+                         booru.GelbooruPageMixin,
+                         booru.BooruExtractor):
    """Base class for safebooru extractors"""
    category = "safebooru"
    api_url = "https://safebooru.org/index.php"
-    pagestart = 0
-    pagekey = "pid"

-    def setup(self):
+    def __init__(self, match):
+        super().__init__(match)
        self.params.update({"page": "dapi", "s": "post", "q": "index"})


-class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
+class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor):
    """Extractor for images from safebooru.org based on search-tags"""
    pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
-                r"\?page=post&s=list&tags=([^&]+)")]
+                r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")]
    test = [("http://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
        "url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
        "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
    })]


-class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
+class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor):
    """Extractor for single images from safebooru.org"""
    pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
-                r"\?page=post&s=view&id=(\d+)")]
+                r"\?page=post&s=view&id=(?P<post>\d+)")]
    test = [("http://safebooru.org/index.php?page=post&s=view&id=1169132", {
        "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
        "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
--- a/gallery_dl/extractor/yandere.py
+++ b/gallery_dl/extractor/yandere.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2015-2017 Mike Fährmann
+# Copyright 2015-2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -11,40 +11,44 @@
 from . import booru


-class YandereExtractor(booru.JSONBooruExtractor):
+class YandereExtractor(booru.JsonParserMixin,
+                       booru.MoebooruPageMixin,
+                       booru.BooruExtractor):
    """Base class for yandere extractors"""
    category = "yandere"
    api_url = "https://yande.re/post.json"


-class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor):
+class YandereTagExtractor(booru.TagMixin, YandereExtractor):
    """Extractor for images from yande.re based on search-tags"""
-    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"]
+    pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
+               r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)"]
    test = [("https://yande.re/post?tags=ouzoku+armor", {
        "content": "59201811c728096b2d95ce6896fd0009235fe683",
    })]


-class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor):
+class YanderePoolExtractor(booru.PoolMixin, YandereExtractor):
    """Extractor for image-pools from yande.re"""
-    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(\d+)"]
+    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(?P<pool>\d+)"]
    test = [("https://yande.re/pool/show/318", {
        "content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
    })]


-class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor):
+class YanderePostExtractor(booru.PostMixin, YandereExtractor):
    """Extractor for single images from yande.re"""
-    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(\d+)"]
+    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"]
    test = [("https://yande.re/post/show/51824", {
        "content": "59201811c728096b2d95ce6896fd0009235fe683",
    })]


-class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor):
+class YanderePopularExtractor(booru.MoebooruPopularMixin, YandereExtractor):
    """Extractor for popular images from yande.re"""
-    pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/popular_"
-               r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"]
+    pattern = [r"(?:https?://)?(?:www\.)?yande\.re"
+               r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
+               r"(?:\?(?P<query>[^#]*))?"]
    test = [
        ("https://yande.re/post/popular_by_month?month=6&year=2014", {
            "count": 40,
@@ -52,6 +56,7 @@ class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor):
        ("https://yande.re/post/popular_recent", None),
    ]

-    @property
-    def api_url(self):
-        return "https://yande.re/post/popular_" + self.scale + ".json"
+    def __init__(self, match):
+        super().__init__(match)
+        self.api_url = "https://yande.re/post/popular_{scale}.json".format(
+            scale=self.scale)