diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 64cde807..86d2aa04 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,7 @@ """Extractors for *booru sites""" -from .common import Extractor, Message, generate_extractors +from .common import BaseExtractor, Message from .. import text, util, exception from xml.etree import ElementTree @@ -17,7 +17,7 @@ import operator import re -class BooruExtractor(Extractor): +class BooruExtractor(BaseExtractor): """Base class for *booru extractors""" basecategory = "booru" filename_fmt = "{category}_{id}_{md5}.{extension}" @@ -104,14 +104,55 @@ class BooruExtractor(Extractor): params["pid"] += 1 +BASE_PATTERN = BooruExtractor.update({ + "rule34": { + "root": "https://rule34.xxx", + }, + "safebooru": { + "root": "https://safebooru.org", + }, + "realbooru": { + "root": "https://realbooru.com", + }, +}) + + class BooruPostExtractor(BooruExtractor): subcategory = "post" archive_fmt = "{id}" - pattern_fmt = r"/index\.php\?page=post&s=view&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" + test = ( + ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", { + "content": "97e4bbf86c3860be18de384d02d544251afe1d45", + "options": (("tags", True),), + "keyword": { + "tags_artist": "danraku", + "tags_character": "kashima_(kantai_collection)", + "tags_copyright": "kantai_collection", + "tags_general": str, + "tags_metadata": str, + }, + }), + ("https://safebooru.org/index.php?page=post&s=view&id=1169132", { + "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b", + "content": "93b293b27dabd198afafabbaf87c49863ac82f27", + "options": (("tags", True),), + "keyword": { + "tags_artist": "kawanakajima", + "tags_character": "heath_ledger ronald_mcdonald the_joker", + "tags_copyright": "dc_comics mcdonald's the_dark_knight", + "tags_general": str, + }, + }), + ("https://realbooru.com/index.php?page=post&s=view&id=668483", { + "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9", + "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da", + }), + ) def __init__(self, match): BooruExtractor.__init__(self, match) - self.post_id = match.group(1) + self.post_id = match.group(match.lastindex) def posts(self): return self._pagination({"id": self.post_id}) @@ -121,11 +162,26 @@ class BooruTagExtractor(BooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern_fmt = r"/index\.php\?page=post&s=list&tags=([^&#]+)" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)" + test = ( + ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", { + "content": "97e4bbf86c3860be18de384d02d544251afe1d45", + "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg", + "count": 1, + }), + ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", { + "url": "17c61b386530cf4c30842c9f580d15ef1cd09586", + "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb", + }), + ("https://realbooru.com/index.php?page=post&s=list&tags=wine", { + "count": ">= 64", + }), + ) def __init__(self, match): BooruExtractor.__init__(self, match) - self.tags = text.unquote(match.group(1).replace("+", " ")) + tags = match.group(match.lastindex) + self.tags = text.unquote(tags.replace("+", " ")) def metadata(self): return {"search_tags": self.tags} @@ -138,11 +194,22 @@ class BooruPoolExtractor(BooruExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool}") archive_fmt = "p_{pool}_{id}" - pattern_fmt = r"/index\.php\?page=pool&s=show&id=(\d+)" + pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)" + test = ( + ("https://rule34.xxx/index.php?page=pool&s=show&id=179", { + "count": 3, + }), + ("https://safebooru.org/index.php?page=pool&s=show&id=11", { + "count": 5, + }), + ("https://realbooru.com/index.php?page=pool&s=show&id=1", { + "count": 3, + }), + ) def __init__(self, match): BooruExtractor.__init__(self, match) - self.pool_id = match.group(1) + self.pool_id = match.group(match.lastindex) self.post_ids = () def skip(self, num): @@ -170,87 +237,3 @@ class BooruPoolExtractor(BooruExtractor): for params["id"] in util.advance(self.post_ids, self.page_start): for post in self._api_request(params): yield post.attrib - - -EXTRACTORS = { - "rule34": { - "root": "https://rule34.xxx", - "test-tag": ( - ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", { - "content": "97e4bbf86c3860be18de384d02d544251afe1d45", - "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg", - "count": 1, - }), - ), - "test-pool": ( - ("https://rule34.xxx/index.php?page=pool&s=show&id=179", { - "count": 3, - }), - ), - "test-post": ( - ("https://rule34.xxx/index.php?page=post&s=view&id=1995545", { - "content": "97e4bbf86c3860be18de384d02d544251afe1d45", - "options": (("tags", True),), - "keyword": { - "tags_artist": "danraku", - "tags_character": "kashima_(kantai_collection)", - "tags_copyright": "kantai_collection", - "tags_general": str, - "tags_metadata": str, - }, - }), - ), - }, - "safebooru": { - "root": "https://safebooru.org", - "test-tag": ( - ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", { - "url": "17c61b386530cf4c30842c9f580d15ef1cd09586", - "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb", - }), - ), - "test-pool": ( - ("https://safebooru.org/index.php?page=pool&s=show&id=11", { - "count": 5, - }), - ), - "test-post": ( - ("https://safebooru.org/index.php?page=post&s=view&id=1169132", { - "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b", - "content": "93b293b27dabd198afafabbaf87c49863ac82f27", - "options": (("tags", True),), - "keyword": { - "tags_artist": "kawanakajima", - "tags_character": "heath_ledger ronald_mcdonald the_joker", - "tags_copyright": "dc_comics mcdonald's the_dark_knight", - "tags_general": str, - }, - }), - ), - }, - "realbooru": { - "root": "https://realbooru.com", - "test-tag": ( - ("https://realbooru.com/index.php?page=post&s=list&tags=wine", { - "count": ">= 64", - }), - ), - "test-pool": ( - ("https://realbooru.com/index.php?page=pool&s=show&id=1", { - "count": 3, - }), - ), - "test-post": ( - ("https://realbooru.com/index.php?page=post&s=view&id=668483", { - "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9", - "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da", - }), - ), - }, -} - -generate_extractors(EXTRACTORS, globals(), ( - BooruTagExtractor, - BooruPoolExtractor, - BooruPostExtractor, -)) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 15cc7768..ec88d94b 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -527,46 +527,37 @@ class AsynchronousMixin(): messages.put(None) -def generate_extractors(extractor_data, symtable, classes): - """Dynamically generate Extractor classes""" - extractors = config.get(("extractor",), classes[0].basecategory) - ckey = extractor_data.get("_ckey") - prev = None +class BaseExtractor(Extractor): + instances = None - if extractors: - extractor_data.update(extractors) + def __init__(self, match): + if not self.category: + for index, group in enumerate(match.groups()): + if group is not None: + self.category, self.root = self.instances[index] + break + Extractor.__init__(self, match) - for category, info in extractor_data.items(): + @classmethod + def update(cls, instances): + extra_instances = config.get(("extractor",), cls.basecategory) + if extra_instances: + for category, info in extra_instances.items(): + if isinstance(info, dict) and "root" in info: + instances[category] = info - if not isinstance(info, dict) or "root" not in info: - continue + pattern_list = [] + instance_list = cls.instances = [] + for category, info in instances.items(): + root = info["root"] + instance_list.append((category, root)) - root = info["root"] - domain = root[root.index(":") + 3:] - pattern = info.get("pattern") or re.escape(domain) - name = (info.get("name") or category).capitalize() + pattern = info.get("pattern") + if not pattern: + pattern = re.escape(root[root.index(":") + 3:]) + pattern_list.append(pattern + "()") - for cls in classes: - - class Extr(cls): - pass - Extr.__module__ = cls.__module__ - Extr.__name__ = Extr.__qualname__ = \ - name + cls.subcategory.capitalize() + "Extractor" - Extr.__doc__ = \ - "Extractor for " + cls.subcategory + "s from " + domain - Extr.category = category - Extr.pattern = r"(?:https?://)?" + pattern + cls.pattern_fmt - Extr.test = info.get("test-" + cls.subcategory) - Extr.root = root - - if "extra" in info: - for key, value in info["extra"].items(): - setattr(Extr, key, value) - if prev and ckey: - setattr(Extr, ckey, prev) - - symtable[Extr.__name__] = prev = Extr + return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")" # Undo automatic pyOpenSSL injection by requests diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 319ebe2d..0bcec2b7 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -8,21 +8,21 @@ """Extractors for 4chan archives based on FoolFuuka""" -from .common import Extractor, Message, generate_extractors +from .common import BaseExtractor, Message from .. import text import itertools -class FoolfuukaExtractor(Extractor): +class FoolfuukaExtractor(BaseExtractor): """Base extractor for FoolFuuka based boards/archives""" basecategory = "foolfuuka" archive_fmt = "{board[shortname]}_{num}_{timestamp}" external = "default" def __init__(self, match): - Extractor.__init__(self, match) + BaseExtractor.__init__(self, match) self.session.headers["Referer"] = self.root - if self.external == "direct": + if self.category == "b4k": self.remote = self._remote_direct def items(self): @@ -43,7 +43,7 @@ class FoolfuukaExtractor(Extractor): yield Message.Url, url, post def metadata(self): - """ """ + """Return general metadata""" def posts(self): """Return an iterable with all relevant posts""" @@ -59,16 +59,90 @@ class FoolfuukaExtractor(Extractor): return media["remote_media_link"] +BASE_PATTERN = FoolfuukaExtractor.update({ + "4plebs": { + "root": "https://archive.4plebs.org", + "pattern": r"(?:archive\.)?4plebs\.org", + }, + "archivedmoe": { + "root": "https://archived.moe", + }, + "archiveofsins": { + "root": "https://archiveofsins.com", + "pattern": r"(?:www\.)?archiveofsins\.com", + }, + "b4k": { + "root": "https://arch.b4k.co", + }, + "desuarchive": { + "root": "https://desuarchive.org", + }, + "fireden": { + "root": "https://boards.fireden.net", + }, + "nyafuu": { + "root": "https://archive.nyafuu.org", + "pattern": r"(?:archive\.)?nyafuu\.org", + }, + "rbt": { + "root": "https://rbt.asia", + "pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)", + }, + "thebarchive": { + "root": "https://thebarchive.com", + "pattern": r"thebarchive\.com", + }, +}) + + class FoolfuukaThreadExtractor(FoolfuukaExtractor): """Base extractor for threads on FoolFuuka based boards/archives""" subcategory = "thread" directory_fmt = ("{category}", "{board[shortname]}", "{thread_num}{title:? - //}") - pattern_fmt = r"/([^/?#]+)/thread/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)" + test = ( + ("https://archive.4plebs.org/tg/thread/54059290", { + "url": "07452944164b602502b02b24521f8cee5c484d2a", + }), + ("https://archived.moe/gd/thread/309639/", { + "url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8", + "content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573", + }), + ("https://archived.moe/a/thread/159767162/", { + "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87", + }), + ("https://archiveofsins.com/h/thread/4668813/", { + "url": "f612d287087e10a228ef69517cf811539db9a102", + "content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4", + }), + ("https://arch.b4k.co/meta/thread/196/", { + "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a", + }), + ("https://desuarchive.org/a/thread/159542679/", { + "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406", + }), + ("https://boards.fireden.net/sci/thread/11264294/", { + "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43", + }), + ("https://archive.nyafuu.org/c/thread/2849220/", { + "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f", + }), + ("https://rbt.asia/g/thread/61487650/", { + "url": "61896d9d9a2edb556b619000a308a984307b6d30", + }), + ("https://archive.rebeccablacktech.com/g/thread/61487650/", { + "url": "61896d9d9a2edb556b619000a308a984307b6d30", + }), + ("https://thebarchive.com/b/thread/739772332/", { + "url": "e8b18001307d130d67db31740ce57c8561b5d80c", + }), + ) def __init__(self, match): FoolfuukaExtractor.__init__(self, match) - self.board, self.thread = match.groups() + self.board = match.group(match.lastindex-1) + self.thread = match.group(match.lastindex) self.data = None def metadata(self): @@ -78,23 +152,34 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): return self.data["op"] def posts(self): + op = (self.data["op"],) posts = self.data.get("posts") if posts: posts = list(posts.values()) posts.sort(key=lambda p: p["timestamp"]) - else: - posts = () - return itertools.chain((self.data["op"],), posts) + return itertools.chain(op, posts) + return op class FoolfuukaBoardExtractor(FoolfuukaExtractor): """Base extractor for FoolFuuka based boards/archives""" subcategory = "board" - pattern_fmt = r"/([^/?#]+)/\d*$" + pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$" + test = ( + ("https://archive.4plebs.org/tg/"), + ("https://archived.moe/gd/"), + ("https://archiveofsins.com/h/"), + ("https://arch.b4k.co/meta/"), + ("https://desuarchive.org/a/"), + ("https://boards.fireden.net/sci/"), + ("https://archive.nyafuu.org/c/"), + ("https://rbt.asia/g/"), + ("https://thebarchive.com/b/"), + ) def __init__(self, match): FoolfuukaExtractor.__init__(self, match) - self.board = match.group(1) + self.board = match.group(match.lastindex) def items(self): index_base = "{}/_/api/chan/index/?board={}&page=".format( @@ -113,7 +198,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor): for num, thread in threads.items(): thread["url"] = thread_base + format(num) - thread["_extractor"] = self.childclass + thread["_extractor"] = FoolfuukaThreadExtractor yield Message.Queue, thread["url"], thread @@ -121,15 +206,24 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): """Base extractor for search results on FoolFuuka based boards/archives""" subcategory = "search" directory_fmt = ("{category}", "search", "{search}") - pattern_fmt = r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)" request_interval = 1.0 + test = ( + ("https://archive.4plebs.org/_/search/text/test/"), + ("https://archived.moe/_/search/text/test/"), + ("https://archiveofsins.com/_/search/text/test/"), + ("https://archiveofsins.com/_/search/text/test/"), + ("https://desuarchive.org/_/search/text/test/"), + ("https://boards.fireden.net/_/search/text/test/"), + ("https://archive.nyafuu.org/_/search/text/test/"), + ("https://rbt.asia/_/search/text/test/"), + ("https://thebarchive.com/_/search/text/test/"), + ) def __init__(self, match): FoolfuukaExtractor.__init__(self, match) - board, search = match.groups() - self.params = params = {} - args = search.split("/") + args = match.group(match.lastindex).split("/") key = None for arg in args: @@ -138,6 +232,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): key = None else: key = arg + + board = match.group(match.lastindex-1) if board != "_": params["boards"] = board @@ -170,105 +266,3 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): if len(posts) <= 3: return params["page"] += 1 - - -EXTRACTORS = { - "4plebs": { - "name": "_4plebs", - "root": "https://archive.4plebs.org", - "pattern": r"(?:archive\.)?4plebs\.org", - "test-thread": ("https://archive.4plebs.org/tg/thread/54059290", { - "url": "07452944164b602502b02b24521f8cee5c484d2a", - }), - "test-board": ("https://archive.4plebs.org/tg/",), - "test-search": ("https://archive.4plebs.org/_/search/text/test/",), - }, - "archivedmoe": { - "root": "https://archived.moe", - "test-thread": ( - ("https://archived.moe/gd/thread/309639/", { - "url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8", - "content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573", - }), - ("https://archived.moe/a/thread/159767162/", { - "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87", - }), - ), - "test-board": ("https://archived.moe/gd/",), - "test-search": ("https://archived.moe/_/search/text/test/",), - }, - "archiveofsins": { - "root": "https://archiveofsins.com", - "pattern": r"(?:www\.)?archiveofsins\.com", - "test-thread": ("https://archiveofsins.com/h/thread/4668813/", { - "url": "f612d287087e10a228ef69517cf811539db9a102", - "content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4", - }), - "test-board": ("https://archiveofsins.com/h/",), - "test-search": ("https://archiveofsins.com/_/search/text/test/",), - }, - "b4k": { - "root": "https://arch.b4k.co", - "extra": {"external": "direct"}, - "test-thread": ("https://arch.b4k.co/meta/thread/196/", { - "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a", - }), - "test-board": ("https://arch.b4k.co/meta/",), - "test-search": ("https://arch.b4k.co/_/search/text/test/",), - }, - "desuarchive": { - "root": "https://desuarchive.org", - "test-thread": ("https://desuarchive.org/a/thread/159542679/", { - "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406", - }), - "test-board": ("https://desuarchive.org/a/",), - "test-search": ("https://desuarchive.org/_/search/text/test/",), - }, - "fireden": { - "root": "https://boards.fireden.net", - "test-thread": ("https://boards.fireden.net/sci/thread/11264294/", { - "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43", - }), - "test-board": ("https://boards.fireden.net/sci/",), - "test-search": ("https://boards.fireden.net/_/search/text/test/",), - }, - "nyafuu": { - "root": "https://archive.nyafuu.org", - "pattern": r"(?:archive\.)?nyafuu\.org", - "test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", { - "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f", - }), - "test-board": ("https://archive.nyafuu.org/c/",), - "test-search": ("https://archive.nyafuu.org/_/search/text/test/",), - }, - "rbt": { - "root": "https://rbt.asia", - "pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)", - "test-thread": ( - ("https://rbt.asia/g/thread/61487650/", { - "url": "61896d9d9a2edb556b619000a308a984307b6d30", - }), - ("https://archive.rebeccablacktech.com/g/thread/61487650/", { - "url": "61896d9d9a2edb556b619000a308a984307b6d30", - }), - ), - "test-board": ("https://rbt.asia/g/",), - "test-search": ("https://rbt.asia/_/search/text/test/",), - }, - "thebarchive": { - "root": "https://thebarchive.com", - "pattern": r"thebarchive\.com", - "test-thread": ("https://thebarchive.com/b/thread/739772332/", { - "url": "e8b18001307d130d67db31740ce57c8561b5d80c", - }), - "test-board": ("https://thebarchive.com/b/",), - "test-search": ("https://thebarchive.com/_/search/text/test/",), - }, - "_ckey": "childclass", -} - -generate_extractors(EXTRACTORS, globals(), ( - FoolfuukaThreadExtractor, - FoolfuukaBoardExtractor, - FoolfuukaSearchExtractor, -)) diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index db5e250b..f8664e73 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2020 Mike Fährmann +# Copyright 2016-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,23 +8,21 @@ """Extractors for FoOlSlide based sites""" -from .common import ( - Extractor, - ChapterExtractor, - MangaExtractor, - Message, - generate_extractors, -) +from .common import BaseExtractor, Message from .. import text, util import json -class FoolslideBase(): +class FoolslideExtractor(BaseExtractor): """Base class for FoOlSlide extractors""" basecategory = "foolslide" + def __init__(self, match): + BaseExtractor.__init__(self, match) + self.gallery_url = self.root + match.group(match.lastindex) + def request(self, url): - return Extractor.request( + return BaseExtractor.request( self, url, encoding="utf-8", method="POST", data={"adult": "true"}) @staticmethod @@ -40,12 +38,53 @@ class FoolslideBase(): return data -class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): +BASE_PATTERN = FoolslideExtractor.update({ + "dokireader": { + "root": "https://kobato.hologfx.com/reader", + }, + "kireicake": { + "root": "https://reader.kireicake.com", + }, + "powermanga": { + "root": "https://read.powermanga.org", + "pattern": r"read(?:er)?\.powermanga\.org", + }, + "sensescans": { + "root": "https://sensescans.com/reader", + "pattern": r"(?:(?:www\.)?sensescans\.com/reader" + r"|reader\.sensescans\.com)", + }, +}) + + +class FoolslideChapterExtractor(FoolslideExtractor): """Base class for chapter extractors for FoOlSlide based sites""" + subcategory = "chapter" directory_fmt = ("{category}", "{manga}", "{chapter_string}") + filename_fmt = ( + "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}") archive_fmt = "{id}" - pattern_fmt = r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" - decode = "default" + pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" + test = ( + (("https://kobato.hologfx.com/reader/read/" + "hitoribocchi_no_oo_seikatsu/en/3/34"), { + "keyword": "6e719ac86f0c6dab89390dd7e507e678459e0dbc", + }), + ("https://reader.kireicake.com/read/wonderland/en/1/1/", { + "url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e", + "keyword": "9f80947920a325e33aea7f5cd69ea669171903b6", + }), + (("https://read.powermanga.org" + "/read/one_piece_digital_colour_comics/en/0/75/"), { + "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384", + "keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe", + }), + ("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", { + "url": "bbd428dc578f5055e9f86ad635b510386cd317cd", + "keyword": "083ef6f8831c84127fe4096fa340a249be9d1424", + }), + ("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"), + ) def items(self): page = self.request(self.gallery_url).text @@ -83,9 +122,51 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): return json.loads(text.extract(page, "var pages = ", ";")[0]) -class FoolslideMangaExtractor(FoolslideBase, MangaExtractor): +class FoolslideMangaExtractor(FoolslideExtractor): """Base class for manga extractors for FoOlSlide based sites""" - pattern_fmt = r"(/series/[^/?#]+)" + subcategory = "manga" + categorytransfer = True + pattern = BASE_PATTERN + r"(/series/[^/?#]+)" + test = ( + (("https://kobato.hologfx.com/reader/series/" + "boku_ha_ohimesama_ni_narenai/"), { + "url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d", + "keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995", + }), + ("https://reader.kireicake.com/series/wonderland/", { + "url": "d067b649af1cc88fa8c8b698fde04a10909fd169", + "keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb", + }), + (("https://read.powermanga.org" + "/series/one_piece_digital_colour_comics/"), { + "count": ">= 1", + "keyword": { + "chapter": int, + "chapter_minor": str, + "chapter_string": str, + "group": "PowerManga", + "lang": "en", + "language": "English", + "manga": "One Piece Digital Colour Comics", + "title": str, + "volume": int, + }, + }), + ("https://sensescans.com/reader/series/yotsubato/", { + "count": ">= 3", + }), + ) + + def items(self): + page = self.request(self.gallery_url).text + + chapters = self.chapters(page) + if not self.config("chapter-reverse", False): + chapters.reverse() + + for chapter, data in chapters: + data["_extractor"] = FoolslideChapterExtractor + yield Message.Queue, chapter, data def chapters(self, page): extr = text.extract_from(page) @@ -103,82 +184,3 @@ class FoolslideMangaExtractor(FoolslideBase, MangaExtractor): "chapter_string": extr('title="', '"'), "group" : extr('title="', '"'), }))) - - -EXTRACTORS = { - "dokireader": { - "root": "https://kobato.hologfx.com/reader", - "test-chapter": - (("https://kobato.hologfx.com/reader/read/" - "hitoribocchi_no_oo_seikatsu/en/3/34"), { - "keyword": "6e719ac86f0c6dab89390dd7e507e678459e0dbc", - }), - "test-manga": - (("https://kobato.hologfx.com/reader/series/" - "boku_ha_ohimesama_ni_narenai/"), { - "url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d", - "keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995", - }), - }, - "kireicake": { - "root": "https://reader.kireicake.com", - "test-chapter": - ("https://reader.kireicake.com/read/wonderland/en/1/1/", { - "url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e", - "keyword": "9f80947920a325e33aea7f5cd69ea669171903b6", - }), - "test-manga": - ("https://reader.kireicake.com/series/wonderland/", { - "url": "d067b649af1cc88fa8c8b698fde04a10909fd169", - "keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb", - }), - }, - "powermanga": { - "root": "https://read.powermanga.org", - "pattern": r"read(?:er)?\.powermanga\.org", - "test-chapter": - (("https://read.powermanga.org" - "/read/one_piece_digital_colour_comics/en/0/75/"), { - "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384", - "keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe", - }), - "test-manga": - (("https://read.powermanga.org" - "/series/one_piece_digital_colour_comics/"), { - "count": ">= 1", - "keyword": { - "chapter": int, - "chapter_minor": str, - "chapter_string": str, - "group": "PowerManga", - "lang": "en", - "language": "English", - "manga": "One Piece Digital Colour Comics", - "title": str, - "volume": int, - }, - }), - }, - "sensescans": { - "root": "https://sensescans.com/reader", - "pattern": r"(?:(?:www\.)?sensescans\.com/reader" - r"|reader\.sensescans\.com)", - "test-chapter": ( - ("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", { - "url": "bbd428dc578f5055e9f86ad635b510386cd317cd", - "keyword": "083ef6f8831c84127fe4096fa340a249be9d1424", - }), - ("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"), - ), - "test-manga": - ("https://sensescans.com/reader/series/yotsubato/", { - "count": ">= 3", - }), - }, - "_ckey": "chapterclass", -} - -generate_extractors(EXTRACTORS, globals(), ( - FoolslideChapterExtractor, - FoolslideMangaExtractor, -)) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 84018a94..c62880b1 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -366,13 +366,6 @@ class InstagramUserExtractor(InstagramExtractor): ) def items(self): - if self.config("highlights"): - self.log.warning("'highlights' is deprecated, " - "use '\"include\": \"…,highlights\"' instead") - default = ("highlights", "posts") - else: - default = ("posts",) - base = "{}/{}/".format(self.root, self.item) stories = "{}/stories/{}/".format(self.root, self.item) return self._dispatch_extractors(( @@ -380,7 +373,7 @@ class InstagramUserExtractor(InstagramExtractor): (InstagramHighlightsExtractor, base + "highlights/"), (InstagramPostsExtractor , base + "posts/"), (InstagramChannelExtractor , base + "channel/"), - ), default) + ), ("posts",)) class InstagramPostsExtractor(InstagramExtractor): diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 0e063d53..daa3d658 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,35 +8,25 @@ """Extractors for mastodon instances""" -from .common import Extractor, Message -from .. import text, util, config, exception -import re +from .common import BaseExtractor, Message +from .. import text, exception +from ..cache import cache -class MastodonExtractor(Extractor): +class MastodonExtractor(BaseExtractor): """Base class for mastodon extractors""" basecategory = "mastodon" directory_fmt = ("mastodon", "{instance}", "{account[username]}") filename_fmt = "{category}_{id}_{media[id]}.{extension}" archive_fmt = "{media[id]}" cookiedomain = None - instance = None - root = None def __init__(self, match): - Extractor.__init__(self, match) - self.api = MastodonAPI(self) - - def config(self, key, default=None): - return config.interpolate_common( - ("extractor",), ( - (self.category, self.subcategory), - (self.basecategory, self.instance, self.subcategory), - ), key, default, - ) + BaseExtractor.__init__(self, match) + self.instance = self.root.partition("://")[2] + self.item = match.group(match.lastindex) def items(self): - yield Message.Version, 1 for status in self.statuses(): attachments = status["media_attachments"] if attachments: @@ -60,34 +50,81 @@ class MastodonExtractor(Extractor): status["created_at"][:19], "%Y-%m-%dT%H:%M:%S") +INSTANCES = { + "mastodon.social": { + "root" : "https://mastodon.social", + "access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48", + "client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo", + "client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI", + }, + "pawoo": { + "root" : "https://pawoo.net", + "access-token" : "c12c9d275050bce0dc92169a28db09d7" + "0d62d0a75a8525953098c167eacd3668", + "client-id" : "978a25f843ec01e53d09be2c290cd75c" + "782bc3b7fdbd7ea4164b9f3c3780c8ff", + "client-secret": "9208e3d4a7997032cf4f1b0e12e5df38" + "8428ef1fadb446dcfeb4f5ed6872d97b", + }, + "baraag": { + "root" : "https://baraag.net", + "access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0", + "client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o", + "client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY", + } +} + +BASE_PATTERN = MastodonExtractor.update(INSTANCES) + + class MastodonUserExtractor(MastodonExtractor): """Extractor for all images of an account/user""" subcategory = "user" - - def __init__(self, match): - MastodonExtractor.__init__(self, match) - self.account_name = match.group(1) + pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$" + test = ( + ("https://mastodon.social/@jk", { + "pattern": r"https://files.mastodon.social/media_attachments" + r"/files/(\d+/){3,}original/\w+", + "range": "1-60", + "count": 60, + }), + ("https://pawoo.net/@yoru_nine/", { + "range": "1-60", + "count": 60, + }), + ("https://baraag.net/@pumpkinnsfw"), + ) def statuses(self): - handle = "@{}@{}".format(self.account_name, self.instance) - for account in self.api.account_search(handle, 1): - if account["username"] == self.account_name: + api = MastodonAPI(self) + username = self.item + handle = "@{}@{}".format(username, self.instance) + for account in api.account_search(handle, 1): + if account["username"] == username: break else: raise exception.NotFoundError("account") - return self.api.account_statuses(account["id"]) + return api.account_statuses(account["id"]) class MastodonStatusExtractor(MastodonExtractor): """Extractor for images from a status""" subcategory = "status" - - def __init__(self, match): - MastodonExtractor.__init__(self, match) - self.status_id = match.group(1) + pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)" + test = ( + ("https://mastodon.social/@jk/103794036899778366", { + "count": 4, + }), + ("https://pawoo.net/@yoru_nine/105038878897832922", { + "content": "b52e807f8ab548d6f896b09218ece01eba83987a", + }), + ("https://baraag.net/@pumpkinnsfw/104364170556898443", { + "content": "67748c1b828c58ad60d0fe5729b59fb29c872244", + }), + ) def statuses(self): - return (self.api.status(self.status_id),) + return (MastodonAPI(self).status(self.item),) class MastodonAPI(): @@ -97,35 +134,46 @@ class MastodonAPI(): https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md """ - def __init__(self, extractor, access_token=None): + def __init__(self, extractor): self.root = extractor.root self.extractor = extractor + access_token = extractor.config("access-token") + if access_token is None or access_token == "cache": + access_token = _access_token_cache(extractor.instance) if not access_token: - access_token = extractor.config( - "access-token", extractor.access_token) - self.headers = {"Authorization": "Bearer {}".format(access_token)} + try: + access_token = INSTANCES[extractor.category]["access-token"] + except (KeyError, TypeError): + raise exception.StopExtraction( + "Missing access token.\n" + "Run 'gallery-dl oauth:mastodon:%s' to obtain one.", + extractor.instance) + + self.headers = {"Authorization": "Bearer " + access_token} def account_search(self, query, limit=40): """Search for content""" + endpoint = "/v1/accounts/search" params = {"q": query, "limit": limit} - return self._call("accounts/search", params).json() + return self._call(endpoint, params).json() def account_statuses(self, account_id): """Get an account's statuses""" - endpoint = "accounts/{}/statuses".format(account_id) + endpoint = "/v1/accounts/{}/statuses".format(account_id) params = {"only_media": "1"} return self._pagination(endpoint, params) def status(self, status_id): - """Fetch a Status""" - return self._call("statuses/" + status_id).json() + """Fetch a status""" + endpoint = "/v1/statuses/" + status_id + return self._call(endpoint).json() def _call(self, endpoint, params=None): if endpoint.startswith("http"): url = endpoint else: - url = "{}/api/v1/{}".format(self.root, endpoint) + url = self.root + "/api" + endpoint while True: response = self.extractor.request( @@ -145,7 +193,7 @@ class MastodonAPI(): raise exception.StopExtraction(response.json().get("error")) def _pagination(self, endpoint, params): - url = "{}/api/v1/{}".format(self.root, endpoint) + url = endpoint while url: response = self._call(url, params) yield from response.json() @@ -156,86 +204,6 @@ class MastodonAPI(): url = url["url"] -def generate_extractors(): - """Dynamically generate Extractor classes for Mastodon instances""" - - symtable = globals() - extractors = config.get(("extractor",), "mastodon") - if extractors: - util.combine_dict(EXTRACTORS, extractors) - config.set(("extractor",), "mastodon", EXTRACTORS) - - for instance, info in EXTRACTORS.items(): - - if not isinstance(info, dict): - continue - - category = info.get("category") or instance.replace(".", "") - root = info.get("root") or "https://" + instance - name = (info.get("name") or category).capitalize() - token = info.get("access-token") - pattern = info.get("pattern") or re.escape(instance) - - class Extr(MastodonUserExtractor): - pass - - Extr.__name__ = Extr.__qualname__ = name + "UserExtractor" - Extr.__doc__ = "Extractor for all images of a user on " + instance - Extr.category = category - Extr.instance = instance - Extr.pattern = (r"(?:https?://)?" + pattern + - r"/@([^/?#]+)(?:/media)?/?$") - Extr.test = info.get("test-user") - Extr.root = root - Extr.access_token = token - symtable[Extr.__name__] = Extr - - class Extr(MastodonStatusExtractor): - pass - - Extr.__name__ = Extr.__qualname__ = name + "StatusExtractor" - Extr.__doc__ = "Extractor for images from a status on " + instance - Extr.category = category - Extr.instance = instance - Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?#]+/(\d+)" - Extr.test = info.get("test-status") - Extr.root = root - Extr.access_token = token - symtable[Extr.__name__] = Extr - - -EXTRACTORS = { - "mastodon.social": { - "category" : "mastodon.social", - "access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48", - "client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo", - "client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI", - "test-user" : ("https://mastodon.social/@jk", { - "pattern": r"https://files.mastodon.social/media_attachments" - r"/files/(\d+/){3,}original/\w+", - "range": "1-60", - "count": 60, - }), - "test-status" : ("https://mastodon.social/@jk/103794036899778366", { - "count": 4, - }), - }, - "pawoo.net": { - "category" : "pawoo", - "access-token" : "c12c9d275050bce0dc92169a28db09d7" - "0d62d0a75a8525953098c167eacd3668", - "client-id" : "978a25f843ec01e53d09be2c290cd75c" - "782bc3b7fdbd7ea4164b9f3c3780c8ff", - "client-secret": "9208e3d4a7997032cf4f1b0e12e5df38" - "8428ef1fadb446dcfeb4f5ed6872d97b", - }, - "baraag.net": { - "category" : "baraag", - "access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0", - "client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o", - "client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY", - }, -} - - -generate_extractors() +@cache(maxage=100*365*24*3600, keyarg=0) +def _access_token_cache(instance): + return None diff --git a/gallery_dl/extractor/message.py b/gallery_dl/extractor/message.py index 7bf00841..d5c25546 100644 --- a/gallery_dl/extractor/message.py +++ b/gallery_dl/extractor/message.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2018 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -52,4 +52,4 @@ class Message(): # Cookies = 5 Queue = 6 # Urllist = 7 - Metadata = 8 + # Metadata = 8 diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py index 0ac55cd0..df771100 100644 --- a/gallery_dl/extractor/moebooru.py +++ b/gallery_dl/extractor/moebooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,6 @@ """Extractors for Moebooru based sites""" -from .common import generate_extractors from .booru import BooruExtractor from .. import text @@ -52,15 +51,93 @@ class MoebooruExtractor(BooruExtractor): params["page"] += 1 +BASE_PATTERN = MoebooruExtractor.update({ + "yandere": { + "root": "https://yande.re", + }, + "konachan": { + "root": "https://konachan.com", + "pattern": r"konachan\.(?:com|net)", + }, + "hypnohub": { + "root": "https://hypnohub.net", + }, + "sakugabooru": { + "root": "https://www.sakugabooru.com", + "pattern": r"(?:www\.)?sakugabooru\.com", + }, + "lolibooru": { + "root": "https://lolibooru.moe", + }, +}) + + +class MoebooruPostExtractor(MoebooruExtractor): + subcategory = "post" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/post/show/(\d+)" + test = ( + ("https://yande.re/post/show/51824", { + "content": "59201811c728096b2d95ce6896fd0009235fe683", + "options": (("tags", True),), + "keyword": { + "tags_artist": "sasaki_tamaru", + "tags_circle": "softhouse_chara", + "tags_copyright": "ouzoku", + "tags_general": str, + }, + }), + ("https://konachan.com/post/show/205189", { + "content": "674e75a753df82f5ad80803f575818b8e46e4b65", + "options": (("tags", True),), + "keyword": { + "tags_artist": "patata", + "tags_character": "clownpiece", + "tags_copyright": "touhou", + "tags_general": str, + }, + }), + ("https://konachan.net/post/show/205189"), + ("https://hypnohub.net/post/show/73964", { + "content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee", + }), + ("https://www.sakugabooru.com/post/show/125570"), + ("https://lolibooru.moe/post/show/287835"), + ) + + def __init__(self, match): + MoebooruExtractor.__init__(self, match) + self.post_id = match.group(match.lastindex) + + def posts(self): + params = {"tags": "id:" + self.post_id} + return self.request(self.root + "/post.json", params=params).json() + + class MoebooruTagExtractor(MoebooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern_fmt = r"/post\?(?:[^&#]*&)*tags=([^&#]+)" + pattern = BASE_PATTERN + r"/post\?(?:[^&#]*&)*tags=([^&#]+)" + test = ( + ("https://yande.re/post?tags=ouzoku+armor", { + "content": "59201811c728096b2d95ce6896fd0009235fe683", + }), + ("https://konachan.com/post?tags=patata", { + "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d", + }), + ("https://konachan.net/post?tags=patata"), + ("https://hypnohub.net/post?tags=gonoike_biwa", { + "url": "072330c34a1e773d0cafd00e64b8060d34b078b6", + }), + ("https://www.sakugabooru.com/post?tags=nichijou"), + ("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29"), + ) def __init__(self, match): MoebooruExtractor.__init__(self, match) - self.tags = text.unquote(match.group(1).replace("+", " ")) + tags = match.group(match.lastindex) + self.tags = text.unquote(tags.replace("+", " ")) def metadata(self): return {"search_tags": self.tags} @@ -74,11 +151,25 @@ class MoebooruPoolExtractor(MoebooruExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool}") archive_fmt = "p_{pool}_{id}" - pattern_fmt = r"/pool/show/(\d+)" + pattern = BASE_PATTERN + r"/pool/show/(\d+)" + test = ( + ("https://yande.re/pool/show/318", { + "content": "2a35b9d6edecce11cc2918c6dce4de2198342b68", + }), + ("https://konachan.com/pool/show/95", { + "content": "cf0546e38a93c2c510a478f8744e60687b7a8426", + }), + ("https://konachan.net/pool/show/95"), + ("https://hypnohub.net/pool/show/61", { + "url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf", + }), + ("https://www.sakugabooru.com/pool/show/54"), + ("https://lolibooru.moe/pool/show/239"), + ) def __init__(self, match): MoebooruExtractor.__init__(self, match) - self.pool_id = match.group(1) + self.pool_id = match.group(match.lastindex) def metadata(self): return {"pool": text.parse_int(self.pool_id)} @@ -88,29 +179,34 @@ class MoebooruPoolExtractor(MoebooruExtractor): return self._pagination(self.root + "/post.json", params) -class MoebooruPostExtractor(MoebooruExtractor): - subcategory = "post" - archive_fmt = "{id}" - pattern_fmt = r"/post/show/(\d+)" - - def __init__(self, match): - MoebooruExtractor.__init__(self, match) - self.post_id = match.group(1) - - def posts(self): - params = {"tags": "id:" + self.post_id} - return self.request(self.root + "/post.json", params=params).json() - - class MoebooruPopularExtractor(MoebooruExtractor): subcategory = "popular" directory_fmt = ("{category}", "popular", "{scale}", "{date}") archive_fmt = "P_{scale[0]}_{date}_{id}" - pattern_fmt = r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?" + pattern = BASE_PATTERN + \ + r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?" + test = ( + ("https://yande.re/post/popular_by_month?month=6&year=2014", { + "count": 40, + }), + ("https://yande.re/post/popular_recent"), + ("https://konachan.com/post/popular_by_month?month=11&year=2010", { + "count": 20, + }), + ("https://konachan.com/post/popular_recent"), + ("https://konachan.net/post/popular_recent"), + ("https://hypnohub.net/post/popular_by_month?month=6&year=2014", { + "count": 20, + }), + ("https://hypnohub.net/post/popular_recent"), + ("https://www.sakugabooru.com/post/popular_recent"), + ("https://lolibooru.moe/post/popular_recent"), + ) def __init__(self, match): MoebooruExtractor.__init__(self, match) - self.scale, self.query = match.groups() + self.scale = match.group(match.lastindex-1) + self.query = match.group(match.lastindex) def metadata(self): self.params = params = text.parse_query(self.query) @@ -138,108 +234,3 @@ class MoebooruPopularExtractor(MoebooruExtractor): def posts(self): url = "{}/post/popular_{}.json".format(self.root, self.scale) return self.request(url, params=self.params).json() - - -EXTRACTORS = { - "yandere": { - "root": "https://yande.re", - "test-tag": ("https://yande.re/post?tags=ouzoku+armor", { - "content": "59201811c728096b2d95ce6896fd0009235fe683", - }), - "test-pool": ("https://yande.re/pool/show/318", { - "content": "2a35b9d6edecce11cc2918c6dce4de2198342b68", - }), - "test-post": ("https://yande.re/post/show/51824", { - "content": "59201811c728096b2d95ce6896fd0009235fe683", - "options": (("tags", True),), - "keyword": { - "tags_artist": "sasaki_tamaru", - "tags_circle": "softhouse_chara", - "tags_copyright": "ouzoku", - "tags_general": str, - }, - }), - "test-popular": ( - ("https://yande.re/post/popular_by_month?month=6&year=2014", { - "count": 40, - }), - ("https://yande.re/post/popular_recent"), - ), - }, - "konachan": { - "root": "https://konachan.com", - "pattern": r"konachan\.(?:com|net)", - "test-tag": ( - ("https://konachan.com/post?tags=patata", { - "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d", - }), - ("https://konachan.net/post?tags=patata"), - ), - "test-pool": ( - ("https://konachan.com/pool/show/95", { - "content": "cf0546e38a93c2c510a478f8744e60687b7a8426", - }), - ("https://konachan.net/pool/show/95"), - ), - "test-post": ( - ("https://konachan.com/post/show/205189", { - "content": "674e75a753df82f5ad80803f575818b8e46e4b65", - "options": (("tags", True),), - "keyword": { - "tags_artist": "patata", - "tags_character": "clownpiece", - "tags_copyright": "touhou", - "tags_general": str, - }, - }), - ("https://konachan.net/post/show/205189"), - ), - "test-popular": ( - ("https://konachan.com/post/popular_by_month?month=11&year=2010", { - "count": 20, - }), - ("https://konachan.com/post/popular_recent"), - ("https://konachan.net/post/popular_recent"), - ), - }, - "hypnohub": { - "root": "https://hypnohub.net", - "test-tag": ("https://hypnohub.net/post?tags=gonoike_biwa", { - "url": "072330c34a1e773d0cafd00e64b8060d34b078b6", - }), - "test-pool": ("https://hypnohub.net/pool/show/61", { - "url": "fd74991c8729e77acd3c35eb6ddc4128ff445adf", - }), - "test-post": ("https://hypnohub.net/post/show/73964", { - "content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee", - }), - "test-popular": ( - ("https://hypnohub.net/post/popular_by_month?month=6&year=2014", { - "count": 20, - }), - ("https://hypnohub.net/post/popular_recent"), - ), - }, - "lolibooru": { - "root": "https://lolibooru.moe", - "test-tag" : ("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29",), - "test-pool" : ("https://lolibooru.moe/pool/show/239",), - "test-post" : ("https://lolibooru.moe/post/show/287835",), - "test-popular": ("https://lolibooru.moe/post/popular_recent",), - }, - "sakugabooru": { - "root": "https://www.sakugabooru.com", - "pattern": r"(?:www\.)?sakugabooru\.com", - "test-tag" : ("https://www.sakugabooru.com/post?tags=nichijou",), - "test-pool" : ("https://www.sakugabooru.com/pool/show/54",), - "test-post" : ("https://www.sakugabooru.com/post/show/125570",), - "test-popular": ("https://www.sakugabooru.com/post/popular_recent",), - }, -} - -generate_extractors(EXTRACTORS, globals(), ( - MoebooruTagExtractor, - MoebooruPoolExtractor, - MoebooruPostExtractor, - MoebooruPopularExtractor, -)) diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 2ec71655..483c6570 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -9,7 +9,7 @@ """Utility classes to setup OAuth and link accounts to gallery-dl""" from .common import Extractor, Message -from . import deviantart, flickr, pixiv, reddit, smugmug, tumblr +from . import deviantart, flickr, mastodon, pixiv, reddit, smugmug, tumblr from .. import text, oauth, util, config, exception from ..cache import cache import urllib.parse @@ -106,9 +106,9 @@ class OAuthBase(Extractor): )) def _oauth2_authorization_code_grant( - self, client_id, client_secret, auth_url, token_url, + self, client_id, client_secret, auth_url, token_url, *, scope="read", key="refresh_token", auth=True, - message_template=None, cache=None): + cache=None, instance=None): """Perform an OAuth2 authorization code grant""" state = "gallery-dl_{}_{}".format( @@ -117,12 +117,12 @@ class OAuthBase(Extractor): ) auth_params = { - "client_id": client_id, + "client_id" : client_id, "response_type": "code", - "state": state, - "redirect_uri": self.redirect_uri, - "duration": "permanent", - "scope": scope, + "state" : state, + "redirect_uri" : self.redirect_uri, + "duration" : "permanent", + "scope" : scope, } # receive an authorization code @@ -140,8 +140,8 @@ class OAuthBase(Extractor): # exchange the authorization code for a token data = { - "grant_type": "authorization_code", - "code": params["code"], + "grant_type" : "authorization_code", + "code" : params["code"], "redirect_uri": self.redirect_uri, } @@ -159,27 +159,18 @@ class OAuthBase(Extractor): self.send(data["error"]) return + token = data[key] + token_name = key.replace("_", "-") + # write to cache if self.cache and cache: - cache.update("#" + str(client_id), data[key]) - self.log.info("Writing 'refresh-token' to cache") + cache.update(instance or ("#" + str(client_id)), token) + self.log.info("Writing '%s' to cache", token_name) # display token - if message_template: - msg = message_template.format( - category=self.subcategory, - key=key.partition("_")[0], - token=data[key], - instance=getattr(self, "instance", ""), - client_id=client_id, - client_secret=client_secret, - ) - else: - msg = self._generate_message( - ("refresh-token",), - (data[key],), - ) - self.send(msg) + self.send(self._generate_message( + (token_name,), (token,), + )) def _generate_message(self, names, values): _vh, _va, _is, _it = ( @@ -326,8 +317,10 @@ class OAuthMastodon(OAuthBase): def items(self): yield Message.Version, 1 - application = self.oauth_config(self.instance) - if not application: + for application in mastodon.INSTANCES.values(): + if self.instance == application["root"].partition("://")[2]: + break + else: application = self._register(self.instance) self._oauth2_authorization_code_grant( @@ -335,8 +328,9 @@ class OAuthMastodon(OAuthBase): application["client-secret"], "https://{}/oauth/authorize".format(self.instance), "https://{}/oauth/token".format(self.instance), + instance=self.instance, key="access_token", - message_template=MASTODON_MSG_TEMPLATE, + cache=mastodon._access_token_cache, ) @cache(maxage=10*365*24*3600, keyarg=1) @@ -425,29 +419,3 @@ class OAuthPixiv(OAuthBase): """) code = input("code: ") return code.rpartition("=")[2].strip() - - -MASTODON_MSG_TEMPLATE = """ -Your 'access-token' is - -{token} - -Put this value into your configuration file as -'extractor.mastodon.{instance}.{key}-token'. - -You can also add your 'client-id' and 'client-secret' values -if you want to register another account in the future. - -Example: -{{ - "extractor": {{ - "mastodon": {{ - "{instance}": {{ - "{key}-token": "{token}", - "client-id": "{client_id}", - "client-secret": "{client_secret}" - }} - }} - }} -}} -""" diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 688c0055..cb189587 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -42,8 +42,6 @@ class PatreonExtractor(Extractor): hashes = set() yield Message.Directory, post - yield Message.Metadata, post - for kind, url, name in itertools.chain( self._images(post), self._attachments(post), diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py index d65f3344..ba1ab08a 100644 --- a/gallery_dl/extractor/shopify.py +++ b/gallery_dl/extractor/shopify.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,28 +8,23 @@ """Extractors for Shopify instances""" -from .common import Extractor, Message, generate_extractors +from .common import BaseExtractor, Message from .. import text import re -class ShopifyExtractor(Extractor): +class ShopifyExtractor(BaseExtractor): """Base class for Shopify extractors""" basecategory = "shopify" filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}" archive_fmt = "{id}" def __init__(self, match): - Extractor.__init__(self, match) - self.item_url = self.root + match.group(1) - - def request(self, url, **kwargs): - kwargs["retries"] = float("inf") - return Extractor.request(self, url, **kwargs) + BaseExtractor.__init__(self, match) + self.item_url = self.root + match.group(match.lastindex) def items(self): data = self.metadata() - yield Message.Version, 1 yield Message.Directory, data headers = {"X-Requested-With": "XMLHttpRequest"} @@ -58,22 +53,34 @@ class ShopifyExtractor(Extractor): """Return an iterable with all relevant product URLs""" +BASE_PATTERN = ShopifyExtractor.update({ + "fashionnova": { + "root": "https://www.fashionnova.com", + "pattern": r"(?:www\.)?fashionnova\.com", + }, +}) + + class ShopifyCollectionExtractor(ShopifyExtractor): """Base class for collection extractors for Shopify based sites""" subcategory = "collection" directory_fmt = ("{category}", "{collection[title]}") - pattern_fmt = r"(/collections/[\w-]+)/?(?:\?([^#]+))?(?:$|#)" - - def __init__(self, match): - ShopifyExtractor.__init__(self, match) - self.params = match.group(2) + pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])" + test = ( + ("https://www.fashionnova.com/collections/mini-dresses", { + "range": "1-20", + "count": 20, + "archive": False, + }), + ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), + ("https://www.fashionnova.com/collections/mini-dresses#1"), + ) def metadata(self): return self.request(self.item_url + ".json").json() def products(self): - params = text.parse_query(self.params) - params["page"] = text.parse_int(params.get("page"), 1) + params = {"page": 1} fetch = True last = None @@ -107,36 +114,14 @@ class ShopifyProductExtractor(ShopifyExtractor): """Base class for product extractors for Shopify based sites""" subcategory = "product" directory_fmt = ("{category}", "Products") - pattern_fmt = r"((?:/collections/[\w-]+)?/products/[\w-]+)" + pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)" + test = ( + ("https://www.fashionnova.com/products/essential-slide-red", { + "pattern": r"https?://cdn\d*\.shopify.com/", + "count": 3, + }), + ("https://www.fashionnova.com/collections/flats/products/name"), + ) def products(self): return (self.item_url,) - - -EXTRACTORS = { - "fashionnova": { - "root": "https://www.fashionnova.com", - "pattern": r"(?:www\.)?fashionnova\.com", - "test-product": ( - ("https://www.fashionnova.com/products/essential-slide-red", { - "pattern": r"https?://cdn\d*\.shopify.com/", - "count": 3, - }), - ("https://www.fashionnova.com/collections/flats/products/name"), - ), - "test-collection": ( - ("https://www.fashionnova.com/collections/mini-dresses", { - "range": "1-20", - "count": 20, - "archive": False, - }), - ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), - ("https://www.fashionnova.com/collections/mini-dresses#1"), - ), - }, -} - -generate_extractors(EXTRACTORS, globals(), ( - ShopifyProductExtractor, - ShopifyCollectionExtractor, -)) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index c1d32ef1..2f68c594 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -111,10 +111,6 @@ class Job(): if self.pred_queue(url, kwds): self.handle_queue(url, kwds) - elif msg[0] == Message.Metadata: - self.update_kwdict(msg[1]) - self.handle_metadata(msg[1]) - elif msg[0] == Message.Version: if msg[1] != 1: raise "unsupported message-version ({}, {})".format( @@ -128,9 +124,6 @@ class Job(): def handle_directory(self, kwdict): """Handle Message.Directory""" - def handle_metadata(self, kwdict): - """Handle Message.Metadata""" - def handle_queue(self, url, kwdict): """Handle Message.Queue""" @@ -280,15 +273,6 @@ class DownloadJob(Job): for callback in self.hooks["post"]: callback(self.pathfmt) - def handle_metadata(self, kwdict): - """Run postprocessors with metadata from 'kwdict'""" - if "metadata" in self.hooks: - kwdict["extension"] = "metadata" - pathfmt = self.pathfmt - pathfmt.set_filename(kwdict) - for callback in self.hooks["metadata"]: - callback(pathfmt) - def handle_queue(self, url, kwdict): if url in self.visited: return @@ -624,8 +608,5 @@ class DataJob(Job): def handle_directory(self, kwdict): self.data.append((Message.Directory, self.filter(kwdict))) - def handle_metadata(self, kwdict): - self.data.append((Message.Metadata, self.filter(kwdict))) - def handle_queue(self, url, kwdict): self.data.append((Message.Queue, url, self.filter(kwdict))) diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 367b9342..0ca32aa6 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -136,9 +136,9 @@ def build_parser(): help="Print URLs instead of downloading", ) output.add_argument( - "-G", + "-G", "--resolve-urls", dest="list_urls", action="store_const", const=128, - help=argparse.SUPPRESS, + help="Print URLs instead of downloading; resolve intermediary URLs", ) output.add_argument( "-j", "--dump-json", diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index 5a54a77c..25142196 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -39,10 +39,6 @@ class ExecPP(PostProcessor): events = options.get("event") if events is None: events = ("after",) - if options.get("final"): - self.log.warning("'final' is deprecated, " - "use '\"event\": \"finalize\"' instead") - events = ("finalize",) elif isinstance(events, str): events = events.split(",") for event in events: diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index c08f111f..91cbca99 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -55,10 +55,6 @@ class MetadataPP(PostProcessor): events = options.get("event") if events is None: events = ("file",) - if options.get("bypost"): - self.log.warning("'bypost' is deprecated, use '\"event\": " - "\"post\"' and 'filename' instead") - events = ("metadata",) elif isinstance(events, str): events = events.split(",") for event in events: diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 8244a957..8b5e4afb 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.16.5" +__version__ = "1.17.0-dev" diff --git a/test/test_extractor.py b/test/test_extractor.py index 8bc3a278..f04e1c71 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -219,10 +219,6 @@ class TestExtractorWait(unittest.TestCase): class TextExtractorOAuth(unittest.TestCase): - @classmethod - def setUpClass(cls): - mastodon.generate_extractors() - def test_oauth1(self): for category in ("flickr", "smugmug", "tumblr"): extr = extractor.find("oauth:" + category)