implement ability to load external extractor classes

- -X/--extractors
- extractor.module-sources
This commit is contained in:
Mike Fährmann
2023-01-30 20:07:18 +01:00
parent 9ec627c760
commit c2bc70593e
6 changed files with 80 additions and 26 deletions

View File

@@ -8,10 +8,11 @@
--version Print program version and exit --version Print program version and exit
-i, --input-file FILE Download URLs found in FILE ('-' for stdin). -i, --input-file FILE Download URLs found in FILE ('-' for stdin).
More than one --input-file can be specified More than one --input-file can be specified
-d, --destination PATH Target location for file downloads
-D, --directory PATH Exact location for file downloads
-f, --filename FORMAT Filename format string for downloaded files -f, --filename FORMAT Filename format string for downloaded files
('/O' for "original" filenames) ('/O' for "original" filenames)
-d, --destination PATH Target location for file downloads
-D, --directory PATH Exact location for file downloads
-X, --extractors PATH Load external extractors from PATH
--proxy URL Use the specified proxy --proxy URL Use the specified proxy
--source-address IP Client-side IP address to bind to --source-address IP Client-side IP address to bind to
--user-agent UA User-Agent request header --user-agent UA User-Agent request header

View File

@@ -11,7 +11,7 @@ import logging
from . import version, config, option, output, extractor, job, util, exception from . import version, config, option, output, extractor, job, util, exception
__author__ = "Mike Fährmann" __author__ = "Mike Fährmann"
__copyright__ = "Copyright 2014-2022 Mike Fährmann" __copyright__ = "Copyright 2014-2023 Mike Fährmann"
__license__ = "GPLv2" __license__ = "GPLv2"
__maintainer__ = "Mike Fährmann" __maintainer__ = "Mike Fährmann"
__email__ = "mike_faehrmann@web.de" __email__ = "mike_faehrmann@web.de"
@@ -105,14 +105,6 @@ def main():
output.ANSI = True output.ANSI = True
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
if isinstance(modules, str):
modules = modules.split(",")
extractor.modules = modules
extractor._module_iter = iter(modules)
# format string separator # format string separator
separator = config.get((), "format-separator") separator = config.get((), "format-separator")
if separator: if separator:
@@ -147,6 +139,44 @@ def main():
log.debug("Configuration Files %s", config._files) log.debug("Configuration Files %s", config._files)
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
if isinstance(modules, str):
modules = modules.split(",")
extractor.modules = modules
# external modules
if args.extractor_sources:
sources = args.extractor_sources
sources.append(None)
else:
sources = config.get(("extractor",), "module-sources")
if sources:
import os
modules = []
for source in sources:
if source:
path = util.expand_path(source)
try:
files = os.listdir(path)
modules.append(extractor._modules_path(path, files))
except Exception as exc:
log.warning("Unable to load modules from %s (%s: %s)",
path, exc.__class__.__name__, exc)
else:
modules.append(extractor._modules_internal())
if len(modules) > 1:
import itertools
extractor._module_iter = itertools.chain(*modules)
elif not modules:
extractor._module_iter = ()
else:
extractor._module_iter = iter(modules[0])
if args.list_modules: if args.list_modules:
extractor.modules.append("") extractor.modules.append("")
sys.stdout.write("\n".join(extractor.modules)) sys.stdout.write("\n".join(extractor.modules))

View File

@@ -1,11 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015-2022 Mike Fährmann # Copyright 2015-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
import sys
import re import re
modules = [ modules = [
@@ -217,20 +218,33 @@ def extractors():
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# internals # internals
_cache = []
_module_iter = iter(modules)
def _list_classes(): def _list_classes():
"""Yield all available extractor classes""" """Yield available extractor classes"""
yield from _cache yield from _cache
globals_ = globals() for module in _module_iter:
for module_name in _module_iter:
module = __import__(module_name, globals_, None, (), 1)
yield from add_module(module) yield from add_module(module)
globals_["_list_classes"] = lambda : _cache globals()["_list_classes"] = lambda : _cache
def _modules_internal():
globals_ = globals()
for module_name in modules:
yield __import__(module_name, globals_, None, (), 1)
def _modules_path(path, files):
sys.path.insert(0, path)
try:
return [
__import__(name[:-3])
for name in files
if name.endswith(".py")
]
finally:
del sys.path[0]
def _get_classes(module): def _get_classes(module):
@@ -240,3 +254,7 @@ def _get_classes(module):
hasattr(cls, "pattern") and cls.__module__ == module.__name__ hasattr(cls, "pattern") and cls.__module__ == module.__name__
) )
] ]
_cache = []
_module_iter = _modules_internal()

View File

@@ -110,6 +110,12 @@ def build_parser():
help=("Download URLs found in FILE ('-' for stdin). " help=("Download URLs found in FILE ('-' for stdin). "
"More than one --input-file can be specified"), "More than one --input-file can be specified"),
) )
general.add_argument(
"-f", "--filename",
dest="filename", metavar="FORMAT",
help=("Filename format string for downloaded files "
"('/O' for \"original\" filenames)"),
)
general.add_argument( general.add_argument(
"-d", "--destination", "-d", "--destination",
dest="base-directory", metavar="PATH", action=ConfigAction, dest="base-directory", metavar="PATH", action=ConfigAction,
@@ -121,10 +127,9 @@ def build_parser():
help="Exact location for file downloads", help="Exact location for file downloads",
) )
general.add_argument( general.add_argument(
"-f", "--filename", "-X", "--extractors",
dest="filename", metavar="FORMAT", dest="extractor_sources", metavar="PATH", action="append",
help=("Filename format string for downloaded files " help="Load external extractors from PATH",
"('/O' for \"original\" filenames)"),
) )
general.add_argument( general.add_argument(
"--proxy", "--proxy",

View File

@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
__version__ = "1.24.5" __version__ = "1.25.0-dev"

View File

@@ -46,7 +46,7 @@ class TestExtractorModule(unittest.TestCase):
def setUp(self): def setUp(self):
extractor._cache.clear() extractor._cache.clear()
extractor._module_iter = iter(extractor.modules) extractor._module_iter = extractor._modules_internal()
extractor._list_classes = _list_classes extractor._list_classes = _list_classes
def test_find(self): def test_find(self):