diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 64cde807..86d2aa04 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,7 @@ """Extractors for *booru sites""" -from .common import Extractor, Message, generate_extractors +from .common import BaseExtractor, Message from .. import text, util, exception from xml.etree import ElementTree @@ -17,7 +17,7 @@ import operator import re -class BooruExtractor(Extractor): +class BooruExtractor(BaseExtractor): """Base class for *booru extractors""" basecategory = "booru" filename_fmt = "{category}_{id}_{md5}.{extension}" @@ -104,14 +104,55 @@ class BooruExtractor(Extractor): params["pid"] += 1 +BASE_PATTERN = BooruExtractor.update({ + "rule34": { + "root": "https://rule34.xxx", + }, + "safebooru": { + "root": "https://safebooru.org", + }, + "realbooru": { + "root": "https://realbooru.com", + }, +}) + + class BooruPostExtractor(BooruExtractor): subcategory = "post" archive_fmt = "{id}" - pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" + test = ( + ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", { + "content": "97e4bbf86c3860be18de384d02d544251afe1d45", + "options": (("tags", True),), + "keyword": { + "tags_artist": "danraku", + "tags_character": "kashima_(kantai_collection)", + "tags_copyright": "kantai_collection", + "tags_general": str, + "tags_metadata": str, + }, + }), + ("https://safebooru.org/index.php?page=post&s=view&id=1169132", { + "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b", + "content": "93b293b27dabd198afafabbaf87c49863ac82f27", + "options": (("tags", True),), + "keyword": { + "tags_artist": "kawanakajima", + "tags_character": "heath_ledger ronald_mcdonald the_joker", + "tags_copyright": "dc_comics mcdonald's the_dark_knight", + "tags_general": str, + }, + }), + ("https://realbooru.com/index.php?page=post&s=view&id=668483", { + "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9", + "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da", + }), + ) def __init__(self, match): BooruExtractor.__init__(self, match) - self.post_id = match.group(1) + self.post_id = match.group(match.lastindex) def posts(self): return self._pagination({"id": self.post_id}) @@ -121,11 +162,26 @@ class BooruTagExtractor(BooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)" + test = ( + ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", { + "content": "97e4bbf86c3860be18de384d02d544251afe1d45", + "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg", + "count": 1, + }), + ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", { + "url": "17c61b386530cf4c30842c9f580d15ef1cd09586", + "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb", + }), + ("https://realbooru.com/index.php?page=post&s=list&tags=wine", { + "count": ">= 64", + }), + ) def __init__(self, match): BooruExtractor.__init__(self, match) - self.tags = text.unquote(match.group(1).replace("+", " ")) + tags = match.group(match.lastindex) + self.tags = text.unquote(tags.replace("+", " ")) def metadata(self): return {"search_tags": self.tags} @@ -138,11 +194,22 @@ class BooruPoolExtractor(BooruExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool}") archive_fmt = "p_{pool}_{id}" - pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)" + test = ( + ("https://rule34.xxx/index.php?page=pool&s=show&id=179", { + "count": 3, + }), + ("https://safebooru.org/index.php?page=pool&s=show&id=11", { + "count": 5, + }), + ("https://realbooru.com/index.php?page=pool&s=show&id=1", { + "count": 3, + }), + ) def __init__(self, match): BooruExtractor.__init__(self, match) - self.pool_id = match.group(1) + self.pool_id = match.group(match.lastindex) self.post_ids = () def skip(self, num): @@ -170,87 +237,3 @@ class BooruPoolExtractor(BooruExtractor): for params["id"] in util.advance(self.post_ids, self.page_start): for post in self._api_request(params): yield post.attrib - - -EXTRACTORS = { - "rule34": { - "root": "https://rule34.xxx", - "test-tag": ( - ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", { - "content": "97e4bbf86c3860be18de384d02d544251afe1d45", - "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg", - "count": 1, - }), - ), - "test-pool": ( - ("https://rule34.xxx/index.php?page=pool&s=show&id=179", { - "count": 3, - }), - ), - "test-post": ( - ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", { - "content": "97e4bbf86c3860be18de384d02d544251afe1d45", - "options": (("tags", True),), - "keyword": { - "tags_artist": "danraku", - "tags_character": "kashima_(kantai_collection)", - "tags_copyright": "kantai_collection", - "tags_general": str, - "tags_metadata": str, - }, - }), - ), - }, - "safebooru": { - "root": "https://safebooru.org", - "test-tag": ( - ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", { - "url": "17c61b386530cf4c30842c9f580d15ef1cd09586", - "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb", - }), - ), - "test-pool": ( - ("https://safebooru.org/index.php?page=pool&s=show&id=11", { - "count": 5, - }), - ), - "test-post": ( - ("https://safebooru.org/index.php?page=post&s=view&id=1169132", { - "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b", - "content": "93b293b27dabd198afafabbaf87c49863ac82f27", - "options": (("tags", True),), - "keyword": { - "tags_artist": "kawanakajima", - "tags_character": "heath_ledger ronald_mcdonald the_joker", - "tags_copyright": "dc_comics mcdonald's the_dark_knight", - "tags_general": str, - }, - }), - ), - }, - "realbooru": { - "root": "https://realbooru.com", - "test-tag": ( - ("https://realbooru.com/index.php?page=post&s=list&tags=wine", { - "count": ">= 64", - }), - ), - "test-pool": ( - ("https://realbooru.com/index.php?page=pool&s=show&id=1", { - "count": 3, - }), - ), - "test-post": ( - ("https://realbooru.com/index.php?page=post&s=view&id=668483", { - "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9", - "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da", - }), - ), - }, -} - -generate_extractors(EXTRACTORS, globals(), ( - BooruTagExtractor, - BooruPoolExtractor, - BooruPostExtractor, -))