diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index 74c55346..3cad8363 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2017 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,57 +11,65 @@ from . import booru -class ThreedeebooruExtractor(booru.JSONBooruExtractor): +class ThreedeebooruExtractor(booru.JsonParserMixin, + booru.MoebooruPageMixin, + booru.BooruExtractor): """Base class for 3dbooru extractors""" category = "3dbooru" api_url = "http://behoimi.org/post/index.json" - headers = { - "Referer": "http://behoimi.org/post/show/", - "Accept-Encoding": "identity", - } + page_limit = 1000 + + def __init__(self, match): + super().__init__(match) + self.session.headers.update({ + "Referer": "http://behoimi.org/post/show/", + "Accept-Encoding": "identity", + }) -class ThreedeebooruTagExtractor(ThreedeebooruExtractor, - booru.BooruTagExtractor): +class ThreedeebooruTagExtractor(booru.TagMixin, + ThreedeebooruExtractor): """Extractor for images from behoimi.org based on search-tags""" pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post" - r"(?:/(?:index)?)?\?tags=([^&]+)"] + r"(?:/(?:index)?)?\?tags=(?P[^&#]+)"] test = [("http://behoimi.org/post?tags=himekawa_azuru+dress", { "url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1", "content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a", })] -class ThreedeebooruPoolExtractor(ThreedeebooruExtractor, - booru.BooruPoolExtractor): +class ThreedeebooruPoolExtractor(booru.PoolMixin, + ThreedeebooruExtractor): """Extractor for image-pools from behoimi.org""" - pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P\d+)"] test = [("http://behoimi.org/pool/show/27", { "url": "da75d2d1475449d5ef0c266cb612683b110a30f2", "content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554", })] -class ThreedeebooruPostExtractor(ThreedeebooruExtractor, - booru.BooruPostExtractor): +class ThreedeebooruPostExtractor(booru.PostMixin, + ThreedeebooruExtractor): """Extractor for single images from behoimi.org""" - pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P\d+)"] test = [("http://behoimi.org/post/show/140852", { "url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6", "content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4", })] -class ThreedeebooruPopularExtractor(ThreedeebooruExtractor, - booru.BooruPopularExtractor): +class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin, + ThreedeebooruExtractor): """Extractor for popular images from behoimi.org""" - pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/popular_" - r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"] + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org" + r"/post/popular_(?Pby_(?:day|week|month)|recent)" + r"(?:\?(?P[^#]*))?"] test = [("http://behoimi.org/post/popular_by_month?month=2&year=2013", { "url": "a447e115fdab60c25ab71c4fdb1b9f509bc23f99", "count": 20, })] - @property - def api_url(self): - return "http://behoimi.org/post/popular_" + self.scale + ".json" + def __init__(self, match): + super().__init__(match) + self.api_url = "http://behoimi.org/post/popular_{scale}.json".format( + scale=self.scale) diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index a7a28dec..66838808 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2017 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,8 +10,8 @@ from .common import SharedConfigExtractor, Message from .. import text -import xml.etree.ElementTree as ET -import urllib.parse +from urllib.parse import urljoin +from xml.etree import ElementTree import datetime import operator @@ -20,166 +20,158 @@ class BooruExtractor(SharedConfigExtractor): """Base class for all booru extractors""" basecategory = "booru" filename_fmt = "{category}_{id}_{md5}.{extension}" - headers = {} - pagestart = 1 - pagekey = "page" api_url = "" + per_page = 50 + page_start = 1 + page_limit = None + sort = False - def __init__(self): - SharedConfigExtractor.__init__(self) - self.session.headers.update(self.headers) - self.params = {"limit": 50} - self.setup() + def __init__(self, match): + super().__init__() + self.params = {} + + def skip(self, num): + pages = num // self.per_page + if self.page_limit and pages + self.page_start > self.page_limit: + pages = self.page_limit - self.page_start + self.page_start += pages + return pages * self.per_page def items(self): yield Message.Version, 1 - yield Message.Directory, self.get_job_metadata() - for data in self.items_impl(): - try: - url = self.get_file_url(data) - data = self.get_file_metadata(data) - yield Message.Url, url, data - except KeyError: - continue + yield Message.Directory, self.get_metadata() - def skip(self, num): - limit = self.params["limit"] - pages = num // limit - self.pagestart += pages - return pages * limit + self.reset_page() + while True: + images, count = self.parse_response( + self.request(self.api_url, params=self.params)) - def items_impl(self): - pass + for data in images: + try: + url = data["file_url"] + if url.startswith("/"): + url = urljoin(self.api_url, url) + yield Message.Url, url, text.nameext_from_url(url, data) + except KeyError: + continue - def setup(self): - pass + if count < self.per_page: + return + self.update_page(data) - def update_page(self, reset=False): - """Update the value of the 'page' parameter""" - # Override this method in derived classes if necessary. - # It is usually enough to just adjust the 'page' attribute - if reset is False: - self.params[self.pagekey] += 1 - else: - self.params[self.pagekey] = self.pagestart + def reset_page(self): + """Initialize params to point to the first page""" + self.params["page"] = self.page_start - def get_job_metadata(self): + def update_page(self, data): + """Update params to point to the next page""" + + def get_metadata(self): """Collect metadata for extractor-job""" - # Override this method in derived classes - return {} - - def get_file_metadata(self, data): - """Collect metadata for a downloadable file""" - return text.nameext_from_url(self.get_file_url(data), data) - - def get_file_url(self, data): - """Extract download-url from 'data'""" - url = data["file_url"] - if url.startswith("/"): - url = urllib.parse.urljoin(self.api_url, url) - return url -class JSONBooruExtractor(BooruExtractor): - """Base class for JSON based API responses""" +class JsonParserMixin(): + """Class for JSON based API responses""" sort = False - def items_impl(self): - self.update_page(reset=True) - while True: - images = self.request(self.api_url, params=self.params).json() - if self.sort: - images.sort(key=operator.itemgetter("score", "id"), - reverse=True) - yield from images - if len(images) < self.params["limit"]: - return - self.update_page() + def parse_response(self, response): + images = response.json() + if self.sort: + images.sort(key=operator.itemgetter("score", "id"), + reverse=True) + return images, len(images) -class XMLBooruExtractor(BooruExtractor): - """Base class for XML based API responses""" - def items_impl(self): - self.update_page(reset=True) - while True: - root = ET.fromstring( - self.request(self.api_url, params=self.params).text - ) - for item in root: - yield item.attrib - if len(root) < self.params["limit"]: - return - self.update_page() +class XmlParserMixin(): + """Class for XML based API responses""" + def parse_response(self, response): + root = ElementTree.fromstring(response.text) + return map(lambda x: x.attrib, root), len(root) -class BooruTagExtractor(BooruExtractor): - """Extractor for images based on search-tags""" +class DanbooruPageMixin(): + """Pagination for Danbooru v2""" + def update_page(self, data): + self.params["page"] = "b{}".format(data["id"]) + + +class MoebooruPageMixin(): + """Pagination for Moebooru and Danbooru v1""" + def update_page(self, data): + print("update:", self.params) + if self.page_limit: + self.params["page"] = None + self.params["before_id"] = data["id"] + else: + self.params["page"] += 1 + + +class GelbooruPageMixin(): + """Pagination for Gelbooru-like sites""" + def reset_page(self): + self.params["pid"] = self.page_start - 1 + + def update_page(self, data): + self.params["pid"] += 1 + + +class TagMixin(): + """Extraction of images based on search-tags""" subcategory = "tag" directory_fmt = ["{category}", "{tags}"] def __init__(self, match): - BooruExtractor.__init__(self) - self.tags = text.unquote(match.group(1).replace("+", " ")) + super().__init__(match) + self.tags = text.unquote(match.group("tags").replace("+", " ")) self.params["tags"] = self.tags + self.params["limit"] = self.per_page - def get_job_metadata(self): + def get_metadata(self): return {"tags": self.tags} -class BooruPoolExtractor(BooruExtractor): - """Extractor for image-pools""" +class PoolMixin(): + """Extraction of image-pools""" subcategory = "pool" directory_fmt = ["{category}", "pool", "{pool}"] def __init__(self, match): - BooruExtractor.__init__(self) - self.pool = match.group(1) + super().__init__(match) + self.pool = match.group("pool") self.params["tags"] = "pool:" + self.pool + self.params["limit"] = self.per_page - def get_job_metadata(self): + def get_metadata(self): return {"pool": self.pool} -class BooruPostExtractor(BooruExtractor): - """Extractor for single images""" +class PostMixin(): + """Extraction of a single image-post""" subcategory = "post" def __init__(self, match): - BooruExtractor.__init__(self) - self.post = match.group(1) + super().__init__(match) + self.post = match.group("post") self.params["tags"] = "id:" + self.post + def get_metadata(self): + return {} -class BooruPopularExtractor(BooruExtractor): - """Extractor for popular images""" + +class PopularMixin(): + """Extraction and metadata handling for Danbooru v2""" subcategory = "popular" directory_fmt = ["{category}", "popular", "{scale}", "{date}"] + page_start = None def __init__(self, match): - BooruExtractor.__init__(self) + super().__init__(match) self.sort = True - self.scale = match.group(1) - self.params.update(text.parse_query(match.group(2))) + self.params.update(text.parse_query(match.group("query"))) - def get_job_metadata(self, fmt="%Y-%m-%d"): - if "scale" in self.params: - scale = self.params["scale"] - elif self.scale: - scale = self.scale - if scale.startswith("by_"): - scale = scale[3:] - else: - scale = "day" - - if "date" in self.params: - date = self.params["date"][:10] - elif "year" in self.params: - date = "{:>04}-{:>02}-{:>02}".format( - self.params["year"], - self.params.get("month", "01"), - self.params.get("day", "01")) - else: - date = datetime.datetime.utcnow().strftime(fmt) + def get_metadata(self, fmt="%Y-%m-%d"): + date = self.get_date() or datetime.datetime.utcnow().strftime(fmt) + scale = self.get_scale() or "day" if scale == "week": dt = datetime.datetime.strptime(date, fmt) @@ -189,3 +181,33 @@ class BooruPopularExtractor(BooruExtractor): date = date[:-3] return {"date": date, "scale": scale} + + def get_scale(self): + if "scale" in self.params: + return self.params["scale"] + return None + + def get_date(self): + if "date" in self.params: + return self.params["date"][:10] + return None + + +class MoebooruPopularMixin(PopularMixin): + """Extraction and metadata handling for Moebooru and Danbooru v1""" + def __init__(self, match): + super().__init__(match) + self.scale = match.group("scale") + + def get_date(self): + if "year" in self.params: + return "{:>04}-{:>02}-{:>02}".format( + self.params["year"], + self.params.get("month", "01"), + self.params.get("day", "01")) + return None + + def get_scale(self): + if self.scale and self.scale.startswith("by_"): + return self.scale[3:] + return self.scale diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 5e5ac206..9b7e7fac 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2017 Mike Fährmann +# Copyright 2014-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,16 +11,19 @@ from . import booru -class DanbooruExtractor(booru.JSONBooruExtractor): +class DanbooruExtractor(booru.JsonParserMixin, + booru.DanbooruPageMixin, + booru.BooruExtractor): """Base class for danbooru extractors""" category = "danbooru" api_url = "https://danbooru.donmai.us/posts.json" + page_limit = 1000 -class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): +class DanbooruTagExtractor(booru.TagMixin, DanbooruExtractor): """Extractor for images from danbooru based on search-tags""" pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us" - r"/posts\?(?:[^&#]*&)*tags=([^&#]+)"] + r"/posts\?(?:[^&#]*&)*tags=(?P[^&#]+)"] test = [ ("https://danbooru.donmai.us/posts?tags=bonocho", { "content": "b196fb9f1668109d7774a0a82efea3ffdda07746", @@ -30,28 +33,28 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): ] -class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor): +class DanbooruPoolExtractor(booru.PoolMixin, DanbooruExtractor): """Extractor for image-pools from danbooru""" pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us" - r"/pools/(\d+)"] + r"/pools/(?P\d+)"] test = [("https://danbooru.donmai.us/pools/7659", { "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99", })] -class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor): +class DanbooruPostExtractor(booru.PostMixin, DanbooruExtractor): """Extractor for single images from danbooru""" pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us" - r"/posts/(\d+)"] + r"/posts/(?P\d+)"] test = [("https://danbooru.donmai.us/posts/294929", { "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", })] -class DanbooruPopularExtractor(DanbooruExtractor, booru.BooruPopularExtractor): +class DanbooruPopularExtractor(booru.PopularMixin, DanbooruExtractor): """Extractor for popular images from danbooru""" pattern = [r"(?:https?://)?(?:danbooru|hijiribe|sonohara)\.donmai\.us" - r"/explore/posts/popular()(?:\?([^#]*))?"] + r"/explore/posts/popular(?:\?(?P[^#]*))?"] test = [ ("https://danbooru.donmai.us/explore/posts/popular", None), (("https://danbooru.donmai.us/explore/posts/popular" diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index e786d594..97ac630f 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2017 Mike Fährmann +# Copyright 2014-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,50 +11,58 @@ from . import booru -class E621Extractor(booru.JSONBooruExtractor): +class E621Extractor(booru.JsonParserMixin, + booru.MoebooruPageMixin, + booru.BooruExtractor): """Base class for e621 extractors""" category = "e621" api_url = "https://e621.net/post/index.json" + page_limit = 750 -class E621TagExtractor(E621Extractor, booru.BooruTagExtractor): +class E621TagExtractor(booru.TagMixin, E621Extractor): """Extractor for images from e621.net based on search-tags""" pattern = [ - r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)", - r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)", + r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/(?P[^/?&#]+)", + r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=(?P[^&#]+)", + ] + test = [ + ("https://e621.net/post/index/1/anry", { + "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", + "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", + }), + ("https://e621.net/post?tags=anry", None), ] - test = [("https://e621.net/post/index/1/anry", { - "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", - "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", - })] -class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor): +class E621PoolExtractor(booru.PoolMixin, E621Extractor): """Extractor for image-pools from e621.net""" - pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(?P\d+)"] test = [("https://e621.net/pool/show/73", { "url": "842f2fb065c7c339486a9b1d689020b8569888ed", "content": "c2c87b7a9150509496cddc75ccab08109922876a", })] -class E621PostExtractor(E621Extractor, booru.BooruPostExtractor): +class E621PostExtractor(booru.PostMixin, E621Extractor): """Extractor for single images from e621.net""" - pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(?P\d+)"] test = [("https://e621.net/post/show/535", { "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529", "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", })] -class E621PopularExtractor(E621Extractor, booru.BooruPopularExtractor): +class E621PopularExtractor(booru.MoebooruPopularMixin, E621Extractor): """Extractor for popular images from 621.net""" - pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/popular_by_" - r"(day|week|month)(?:\?([^#]*))?"] + pattern = [r"(?:https?://)?(?:www\.)?e621\.net" + r"/post/popular_by_(?Pday|week|month)" + r"(?:\?(?P[^#]*))?"] test = [("https://e621.net/post/popular_by_month?month=6&year=2013", { "count": 32, })] - @property - def api_url(self): - return "https://e621.net/post/popular_by_" + self.scale + ".json" + def __init__(self, match): + super().__init__(match) + self.api_url = "https://e621.net/post/popular_by_{scale}.json".format( + scale=self.scale) diff --git a/gallery_dl/extractor/konachan.py b/gallery_dl/extractor/konachan.py index cdcdd7db..5506ff94 100644 --- a/gallery_dl/extractor/konachan.py +++ b/gallery_dl/extractor/konachan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2017 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,44 +11,69 @@ from . import booru -class KonachanExtractor(booru.JSONBooruExtractor): +class KonachanExtractor(booru.JsonParserMixin, + booru.MoebooruPageMixin, + booru.BooruExtractor): """Base class for konachan extractors""" category = "konachan" - api_url = "https://konachan.com/post.json" + + def __init__(self, match): + super().__init__(match) + self.api_url = "https://konachan.{tld}/post.json".format( + tld=match.group("tld")) -class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor): +class KonachanTagExtractor(booru.TagMixin, KonachanExtractor): """Extractor for images from konachan.com based on search-tags""" - pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"] - test = [("http://konachan.com/post?tags=patata", { - "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d", - })] + pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?Pcom|net)" + r"/post\?(?:[^&#]*&)*tags=(?P[^&#]+)"] + test = [ + ("http://konachan.com/post?tags=patata", { + "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d", + }), + ("http://konachan.net/post?tags=patata", None), + ] -class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor): +class KonachanPoolExtractor(booru.PoolMixin, KonachanExtractor): """Extractor for image-pools from konachan.com""" - pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"] - test = [("http://konachan.com/pool/show/95", { - "content": "cf0546e38a93c2c510a478f8744e60687b7a8426", - })] + pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?Pcom|net)" + r"/pool/show/(?P\d+)"] + test = [ + ("http://konachan.com/pool/show/95", { + "content": "cf0546e38a93c2c510a478f8744e60687b7a8426", + }), + ("http://konachan.net/pool/show/95", None), + ] -class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor): +class KonachanPostExtractor(booru.PostMixin, KonachanExtractor): """Extractor for single images from konachan.com""" - pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"] - test = [("http://konachan.com/post/show/205189", { - "content": "674e75a753df82f5ad80803f575818b8e46e4b65", - })] + pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?Pcom|net)" + r"/post/show/(?P\d+)"] + test = [ + ("http://konachan.com/post/show/205189", { + "content": "674e75a753df82f5ad80803f575818b8e46e4b65", + }), + ("http://konachan.com/post/show/205189", None), + ] -class KonachanPopularExtractor(KonachanExtractor, booru.BooruPopularExtractor): +class KonachanPopularExtractor(booru.MoebooruPopularMixin, KonachanExtractor): """Extractor for popular images from konachan.com""" - pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/popular_" - r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"] - test = [("https://konachan.com/post/popular_by_month?month=11&year=2010", { - "count": 20, - })] + pattern = [r"(?:https?://)?(?:www\.)?konachan\.(?Pcom|net)" + r"/post/popular_(?Pby_(?:day|week|month)|recent)" + r"(?:\?(?P[^#]*))?"] + test = [ + ("https://konachan.com/post/popular_by_month?month=11&year=2010", { + "count": 20, + }), + ("https://konachan.com/post/popular_recent", None), + ("https://konachan.net/post/popular_recent", None), + ] - @property - def api_url(self): - return "https://konachan.com/post/popular_" + self.scale + ".json" + def __init__(self, match): + super().__init__(match) + self.api_url = ( + "https://konachan.{tld}/post/popular_{scale}.json".format( + tld=match.group("tld"), scale=self.scale)) diff --git a/gallery_dl/extractor/rule34.py b/gallery_dl/extractor/rule34.py index 030e9201..08db9af8 100644 --- a/gallery_dl/extractor/rule34.py +++ b/gallery_dl/extractor/rule34.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2017 Mike Fährmann +# Copyright 2016-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,31 +11,33 @@ from . import booru -class Rule34Extractor(booru.XMLBooruExtractor): +class Rule34Extractor(booru.XmlParserMixin, + booru.GelbooruPageMixin, + booru.BooruExtractor): """Base class for rule34 extractors""" category = "rule34" api_url = "https://rule34.xxx/index.php" - pagestart = 0 - pagekey = "pid" + page_limit = 4000 - def setup(self): + def __init__(self, match): + super().__init__(match) self.params.update({"page": "dapi", "s": "post", "q": "index"}) -class Rule34TagExtractor(Rule34Extractor, booru.BooruTagExtractor): +class Rule34TagExtractor(booru.TagMixin, Rule34Extractor): """Extractor for images from rule34.xxx based on search-tags""" pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?" - r"\?page=post&s=list&tags=([^&]+)")] + r"\?page=post&s=list&tags=(?P[^&#]+)")] test = [("http://rule34.xxx/index.php?page=post&s=list&tags=danraku", { "url": "104094495973edfe7e764c8f2dd42017163322aa", "content": "a01768c6f86f32eb7ebbdeb87c30b0d9968d7f97", })] -class Rule34PostExtractor(Rule34Extractor, booru.BooruPostExtractor): +class Rule34PostExtractor(booru.PostMixin, Rule34Extractor): """Extractor for single images from rule34.xxx""" pattern = [(r"(?:https?://)?(?:www\.)?rule34\.xxx/(?:index\.php)?" - r"\?page=post&s=view&id=(\d+)")] + r"\?page=post&s=view&id=(?P\d+)")] test = [("http://rule34.xxx/index.php?page=post&s=view&id=1974854", { "url": "3b1f9817785868d1cd94d5376d20478eed591965", "content": "fd2820df78fb937532da0a46f7af6cefc4dc94be", diff --git a/gallery_dl/extractor/safebooru.py b/gallery_dl/extractor/safebooru.py index 72068e7f..442748a4 100644 --- a/gallery_dl/extractor/safebooru.py +++ b/gallery_dl/extractor/safebooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2017 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,31 +11,32 @@ from . import booru -class SafebooruExtractor(booru.XMLBooruExtractor): +class SafebooruExtractor(booru.XmlParserMixin, + booru.GelbooruPageMixin, + booru.BooruExtractor): """Base class for safebooru extractors""" category = "safebooru" api_url = "https://safebooru.org/index.php" - pagestart = 0 - pagekey = "pid" - def setup(self): + def __init__(self, match): + super().__init__(match) self.params.update({"page": "dapi", "s": "post", "q": "index"}) -class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor): +class SafebooruTagExtractor(booru.TagMixin, SafebooruExtractor): """Extractor for images from safebooru.org based on search-tags""" pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?" - r"\?page=post&s=list&tags=([^&]+)")] + r"\?page=post&s=list&tags=(?P[^&#]+)")] test = [("http://safebooru.org/index.php?page=post&s=list&tags=bonocho", { "url": "17c61b386530cf4c30842c9f580d15ef1cd09586", "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb", })] -class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor): +class SafebooruPostExtractor(booru.PostMixin, SafebooruExtractor): """Extractor for single images from safebooru.org""" pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?" - r"\?page=post&s=view&id=(\d+)")] + r"\?page=post&s=view&id=(?P\d+)")] test = [("http://safebooru.org/index.php?page=post&s=view&id=1169132", { "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b", "content": "93b293b27dabd198afafabbaf87c49863ac82f27", diff --git a/gallery_dl/extractor/yandere.py b/gallery_dl/extractor/yandere.py index 833c44dc..4b78dda6 100644 --- a/gallery_dl/extractor/yandere.py +++ b/gallery_dl/extractor/yandere.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2017 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,40 +11,44 @@ from . import booru -class YandereExtractor(booru.JSONBooruExtractor): +class YandereExtractor(booru.JsonParserMixin, + booru.MoebooruPageMixin, + booru.BooruExtractor): """Base class for yandere extractors""" category = "yandere" api_url = "https://yande.re/post.json" -class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor): +class YandereTagExtractor(booru.TagMixin, YandereExtractor): """Extractor for images from yande.re based on search-tags""" - pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"] + pattern = [r"(?:https?://)?(?:www\.)?yande\.re" + r"/post\?(?:[^&#]*&)*tags=(?P[^&#]+)"] test = [("https://yande.re/post?tags=ouzoku+armor", { "content": "59201811c728096b2d95ce6896fd0009235fe683", })] -class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor): +class YanderePoolExtractor(booru.PoolMixin, YandereExtractor): """Extractor for image-pools from yande.re""" - pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?yande\.re/pool/show/(?P\d+)"] test = [("https://yande.re/pool/show/318", { "content": "2a35b9d6edecce11cc2918c6dce4de2198342b68", })] -class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor): +class YanderePostExtractor(booru.PostMixin, YandereExtractor): """Extractor for single images from yande.re""" - pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(\d+)"] + pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P\d+)"] test = [("https://yande.re/post/show/51824", { "content": "59201811c728096b2d95ce6896fd0009235fe683", })] -class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor): +class YanderePopularExtractor(booru.MoebooruPopularMixin, YandereExtractor): """Extractor for popular images from yande.re""" - pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/popular_" - r"(by_(?:day|week|month)|recent)(?:\?([^#]*))?"] + pattern = [r"(?:https?://)?(?:www\.)?yande\.re" + r"/post/popular_(?Pby_(?:day|week|month)|recent)" + r"(?:\?(?P[^#]*))?"] test = [ ("https://yande.re/post/popular_by_month?month=6&year=2014", { "count": 40, @@ -52,6 +56,7 @@ class YanderePopularExtractor(YandereExtractor, booru.BooruPopularExtractor): ("https://yande.re/post/popular_recent", None), ] - @property - def api_url(self): - return "https://yande.re/post/popular_" + self.scale + ".json" + def __init__(self, match): + super().__init__(match) + self.api_url = "https://yande.re/post/popular_{scale}.json".format( + scale=self.scale)