From c2bc70593e962ed34dd5a15c0a63cc8b39ef687b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 30 Jan 2023 20:07:18 +0100 Subject: [PATCH] implement ability to load external extractor classes - -X/--extractors - extractor.module-sources --- docs/options.md | 5 ++-- gallery_dl/__init__.py | 48 ++++++++++++++++++++++++++------ gallery_dl/extractor/__init__.py | 36 ++++++++++++++++++------ gallery_dl/option.py | 13 ++++++--- gallery_dl/version.py | 2 +- test/test_extractor.py | 2 +- 6 files changed, 80 insertions(+), 26 deletions(-) diff --git a/docs/options.md b/docs/options.md index 7bffbd41..c0a82da7 100644 --- a/docs/options.md +++ b/docs/options.md @@ -8,10 +8,11 @@ --version Print program version and exit -i, --input-file FILE Download URLs found in FILE ('-' for stdin). More than one --input-file can be specified - -d, --destination PATH Target location for file downloads - -D, --directory PATH Exact location for file downloads -f, --filename FORMAT Filename format string for downloaded files ('/O' for "original" filenames) + -d, --destination PATH Target location for file downloads + -D, --directory PATH Exact location for file downloads + -X, --extractors PATH Load external extractors from PATH --proxy URL Use the specified proxy --source-address IP Client-side IP address to bind to --user-agent UA User-Agent request header diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 245dbf8b..973ee0bf 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -11,7 +11,7 @@ import logging from . import version, config, option, output, extractor, job, util, exception __author__ = "Mike Fährmann" -__copyright__ = "Copyright 2014-2022 Mike Fährmann" +__copyright__ = "Copyright 2014-2023 Mike Fährmann" __license__ = "GPLv2" __maintainer__ = "Mike Fährmann" __email__ = "mike_faehrmann@web.de" @@ -105,14 +105,6 @@ def main(): output.ANSI = True - # extractor modules - modules = config.get(("extractor",), "modules") - if modules is not None: - if isinstance(modules, str): - modules = modules.split(",") - extractor.modules = modules - extractor._module_iter = iter(modules) - # format string separator separator = config.get((), "format-separator") if separator: @@ -147,6 +139,44 @@ def main(): log.debug("Configuration Files %s", config._files) + # extractor modules + modules = config.get(("extractor",), "modules") + if modules is not None: + if isinstance(modules, str): + modules = modules.split(",") + extractor.modules = modules + + # external modules + if args.extractor_sources: + sources = args.extractor_sources + sources.append(None) + else: + sources = config.get(("extractor",), "module-sources") + + if sources: + import os + modules = [] + + for source in sources: + if source: + path = util.expand_path(source) + try: + files = os.listdir(path) + modules.append(extractor._modules_path(path, files)) + except Exception as exc: + log.warning("Unable to load modules from %s (%s: %s)", + path, exc.__class__.__name__, exc) + else: + modules.append(extractor._modules_internal()) + + if len(modules) > 1: + import itertools + extractor._module_iter = itertools.chain(*modules) + elif not modules: + extractor._module_iter = () + else: + extractor._module_iter = iter(modules[0]) + if args.list_modules: extractor.modules.append("") sys.stdout.write("\n".join(extractor.modules)) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6140c2ce..b4020eaa 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. +import sys import re modules = [ @@ -217,20 +218,33 @@ def extractors(): # -------------------------------------------------------------------- # internals -_cache = [] -_module_iter = iter(modules) - def _list_classes(): - """Yield all available extractor classes""" + """Yield available extractor classes""" yield from _cache - globals_ = globals() - for module_name in _module_iter: - module = __import__(module_name, globals_, None, (), 1) + for module in _module_iter: yield from add_module(module) - globals_["_list_classes"] = lambda : _cache + globals()["_list_classes"] = lambda : _cache + + +def _modules_internal(): + globals_ = globals() + for module_name in modules: + yield __import__(module_name, globals_, None, (), 1) + + +def _modules_path(path, files): + sys.path.insert(0, path) + try: + return [ + __import__(name[:-3]) + for name in files + if name.endswith(".py") + ] + finally: + del sys.path[0] def _get_classes(module): @@ -240,3 +254,7 @@ def _get_classes(module): hasattr(cls, "pattern") and cls.__module__ == module.__name__ ) ] + + +_cache = [] +_module_iter = _modules_internal() diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 213cd2d6..9b915505 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -110,6 +110,12 @@ def build_parser(): help=("Download URLs found in FILE ('-' for stdin). " "More than one --input-file can be specified"), ) + general.add_argument( + "-f", "--filename", + dest="filename", metavar="FORMAT", + help=("Filename format string for downloaded files " + "('/O' for \"original\" filenames)"), + ) general.add_argument( "-d", "--destination", dest="base-directory", metavar="PATH", action=ConfigAction, @@ -121,10 +127,9 @@ def build_parser(): help="Exact location for file downloads", ) general.add_argument( - "-f", "--filename", - dest="filename", metavar="FORMAT", - help=("Filename format string for downloaded files " - "('/O' for \"original\" filenames)"), + "-X", "--extractors", + dest="extractor_sources", metavar="PATH", action="append", + help="Load external extractors from PATH", ) general.add_argument( "--proxy", diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 6b52610e..19e49be0 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.24.5" +__version__ = "1.25.0-dev" diff --git a/test/test_extractor.py b/test/test_extractor.py index 144c6f9c..6516fa8f 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -46,7 +46,7 @@ class TestExtractorModule(unittest.TestCase): def setUp(self): extractor._cache.clear() - extractor._module_iter = iter(extractor.modules) + extractor._module_iter = extractor._modules_internal() extractor._list_classes = _list_classes def test_find(self):