implement ability to load external extractor classes

- -X/--extractors
- extractor.module-sources
This commit is contained in:
Mike Fährmann
2023-01-30 20:07:18 +01:00
parent 9ec627c760
commit c2bc70593e
6 changed files with 80 additions and 26 deletions

View File

@@ -8,10 +8,11 @@
--version Print program version and exit
-i, --input-file FILE Download URLs found in FILE ('-' for stdin).
More than one --input-file can be specified
-d, --destination PATH Target location for file downloads
-D, --directory PATH Exact location for file downloads
-f, --filename FORMAT Filename format string for downloaded files
('/O' for "original" filenames)
-d, --destination PATH Target location for file downloads
-D, --directory PATH Exact location for file downloads
-X, --extractors PATH Load external extractors from PATH
--proxy URL Use the specified proxy
--source-address IP Client-side IP address to bind to
--user-agent UA User-Agent request header

View File

@@ -11,7 +11,7 @@ import logging
from . import version, config, option, output, extractor, job, util, exception
__author__ = "Mike Fährmann"
__copyright__ = "Copyright 2014-2022 Mike Fährmann"
__copyright__ = "Copyright 2014-2023 Mike Fährmann"
__license__ = "GPLv2"
__maintainer__ = "Mike Fährmann"
__email__ = "mike_faehrmann@web.de"
@@ -105,14 +105,6 @@ def main():
output.ANSI = True
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
if isinstance(modules, str):
modules = modules.split(",")
extractor.modules = modules
extractor._module_iter = iter(modules)
# format string separator
separator = config.get((), "format-separator")
if separator:
@@ -147,6 +139,44 @@ def main():
log.debug("Configuration Files %s", config._files)
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
if isinstance(modules, str):
modules = modules.split(",")
extractor.modules = modules
# external modules
if args.extractor_sources:
sources = args.extractor_sources
sources.append(None)
else:
sources = config.get(("extractor",), "module-sources")
if sources:
import os
modules = []
for source in sources:
if source:
path = util.expand_path(source)
try:
files = os.listdir(path)
modules.append(extractor._modules_path(path, files))
except Exception as exc:
log.warning("Unable to load modules from %s (%s: %s)",
path, exc.__class__.__name__, exc)
else:
modules.append(extractor._modules_internal())
if len(modules) > 1:
import itertools
extractor._module_iter = itertools.chain(*modules)
elif not modules:
extractor._module_iter = ()
else:
extractor._module_iter = iter(modules[0])
if args.list_modules:
extractor.modules.append("")
sys.stdout.write("\n".join(extractor.modules))

View File

@@ -1,11 +1,12 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2022 Mike Fährmann
# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import sys
import re
modules = [
@@ -217,20 +218,33 @@ def extractors():
# --------------------------------------------------------------------
# internals
_cache = []
_module_iter = iter(modules)
def _list_classes():
"""Yield all available extractor classes"""
"""Yield available extractor classes"""
yield from _cache
globals_ = globals()
for module_name in _module_iter:
module = __import__(module_name, globals_, None, (), 1)
for module in _module_iter:
yield from add_module(module)
globals_["_list_classes"] = lambda : _cache
globals()["_list_classes"] = lambda : _cache
def _modules_internal():
globals_ = globals()
for module_name in modules:
yield __import__(module_name, globals_, None, (), 1)
def _modules_path(path, files):
sys.path.insert(0, path)
try:
return [
__import__(name[:-3])
for name in files
if name.endswith(".py")
]
finally:
del sys.path[0]
def _get_classes(module):
@@ -240,3 +254,7 @@ def _get_classes(module):
hasattr(cls, "pattern") and cls.__module__ == module.__name__
)
]
_cache = []
_module_iter = _modules_internal()

View File

@@ -110,6 +110,12 @@ def build_parser():
help=("Download URLs found in FILE ('-' for stdin). "
"More than one --input-file can be specified"),
)
general.add_argument(
"-f", "--filename",
dest="filename", metavar="FORMAT",
help=("Filename format string for downloaded files "
"('/O' for \"original\" filenames)"),
)
general.add_argument(
"-d", "--destination",
dest="base-directory", metavar="PATH", action=ConfigAction,
@@ -121,10 +127,9 @@ def build_parser():
help="Exact location for file downloads",
)
general.add_argument(
"-f", "--filename",
dest="filename", metavar="FORMAT",
help=("Filename format string for downloaded files "
"('/O' for \"original\" filenames)"),
"-X", "--extractors",
dest="extractor_sources", metavar="PATH", action="append",
help="Load external extractors from PATH",
)
general.add_argument(
"--proxy",

View File

@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
__version__ = "1.24.5"
__version__ = "1.25.0-dev"

View File

@@ -46,7 +46,7 @@ class TestExtractorModule(unittest.TestCase):
def setUp(self):
extractor._cache.clear()
extractor._module_iter = iter(extractor.modules)
extractor._module_iter = extractor._modules_internal()
extractor._list_classes = _list_classes
def test_find(self):