implement ability to load external extractor classes
- -X/--extractors - extractor.module-sources
This commit is contained in:
@@ -8,10 +8,11 @@
|
|||||||
--version Print program version and exit
|
--version Print program version and exit
|
||||||
-i, --input-file FILE Download URLs found in FILE ('-' for stdin).
|
-i, --input-file FILE Download URLs found in FILE ('-' for stdin).
|
||||||
More than one --input-file can be specified
|
More than one --input-file can be specified
|
||||||
-d, --destination PATH Target location for file downloads
|
|
||||||
-D, --directory PATH Exact location for file downloads
|
|
||||||
-f, --filename FORMAT Filename format string for downloaded files
|
-f, --filename FORMAT Filename format string for downloaded files
|
||||||
('/O' for "original" filenames)
|
('/O' for "original" filenames)
|
||||||
|
-d, --destination PATH Target location for file downloads
|
||||||
|
-D, --directory PATH Exact location for file downloads
|
||||||
|
-X, --extractors PATH Load external extractors from PATH
|
||||||
--proxy URL Use the specified proxy
|
--proxy URL Use the specified proxy
|
||||||
--source-address IP Client-side IP address to bind to
|
--source-address IP Client-side IP address to bind to
|
||||||
--user-agent UA User-Agent request header
|
--user-agent UA User-Agent request header
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import logging
|
|||||||
from . import version, config, option, output, extractor, job, util, exception
|
from . import version, config, option, output, extractor, job, util, exception
|
||||||
|
|
||||||
__author__ = "Mike Fährmann"
|
__author__ = "Mike Fährmann"
|
||||||
__copyright__ = "Copyright 2014-2022 Mike Fährmann"
|
__copyright__ = "Copyright 2014-2023 Mike Fährmann"
|
||||||
__license__ = "GPLv2"
|
__license__ = "GPLv2"
|
||||||
__maintainer__ = "Mike Fährmann"
|
__maintainer__ = "Mike Fährmann"
|
||||||
__email__ = "mike_faehrmann@web.de"
|
__email__ = "mike_faehrmann@web.de"
|
||||||
@@ -105,14 +105,6 @@ def main():
|
|||||||
|
|
||||||
output.ANSI = True
|
output.ANSI = True
|
||||||
|
|
||||||
# extractor modules
|
|
||||||
modules = config.get(("extractor",), "modules")
|
|
||||||
if modules is not None:
|
|
||||||
if isinstance(modules, str):
|
|
||||||
modules = modules.split(",")
|
|
||||||
extractor.modules = modules
|
|
||||||
extractor._module_iter = iter(modules)
|
|
||||||
|
|
||||||
# format string separator
|
# format string separator
|
||||||
separator = config.get((), "format-separator")
|
separator = config.get((), "format-separator")
|
||||||
if separator:
|
if separator:
|
||||||
@@ -147,6 +139,44 @@ def main():
|
|||||||
|
|
||||||
log.debug("Configuration Files %s", config._files)
|
log.debug("Configuration Files %s", config._files)
|
||||||
|
|
||||||
|
# extractor modules
|
||||||
|
modules = config.get(("extractor",), "modules")
|
||||||
|
if modules is not None:
|
||||||
|
if isinstance(modules, str):
|
||||||
|
modules = modules.split(",")
|
||||||
|
extractor.modules = modules
|
||||||
|
|
||||||
|
# external modules
|
||||||
|
if args.extractor_sources:
|
||||||
|
sources = args.extractor_sources
|
||||||
|
sources.append(None)
|
||||||
|
else:
|
||||||
|
sources = config.get(("extractor",), "module-sources")
|
||||||
|
|
||||||
|
if sources:
|
||||||
|
import os
|
||||||
|
modules = []
|
||||||
|
|
||||||
|
for source in sources:
|
||||||
|
if source:
|
||||||
|
path = util.expand_path(source)
|
||||||
|
try:
|
||||||
|
files = os.listdir(path)
|
||||||
|
modules.append(extractor._modules_path(path, files))
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("Unable to load modules from %s (%s: %s)",
|
||||||
|
path, exc.__class__.__name__, exc)
|
||||||
|
else:
|
||||||
|
modules.append(extractor._modules_internal())
|
||||||
|
|
||||||
|
if len(modules) > 1:
|
||||||
|
import itertools
|
||||||
|
extractor._module_iter = itertools.chain(*modules)
|
||||||
|
elif not modules:
|
||||||
|
extractor._module_iter = ()
|
||||||
|
else:
|
||||||
|
extractor._module_iter = iter(modules[0])
|
||||||
|
|
||||||
if args.list_modules:
|
if args.list_modules:
|
||||||
extractor.modules.append("")
|
extractor.modules.append("")
|
||||||
sys.stdout.write("\n".join(extractor.modules))
|
sys.stdout.write("\n".join(extractor.modules))
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2015-2022 Mike Fährmann
|
# Copyright 2015-2023 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import sys
|
||||||
import re
|
import re
|
||||||
|
|
||||||
modules = [
|
modules = [
|
||||||
@@ -217,20 +218,33 @@ def extractors():
|
|||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# internals
|
# internals
|
||||||
|
|
||||||
_cache = []
|
|
||||||
_module_iter = iter(modules)
|
|
||||||
|
|
||||||
|
|
||||||
def _list_classes():
|
def _list_classes():
|
||||||
"""Yield all available extractor classes"""
|
"""Yield available extractor classes"""
|
||||||
yield from _cache
|
yield from _cache
|
||||||
|
|
||||||
globals_ = globals()
|
for module in _module_iter:
|
||||||
for module_name in _module_iter:
|
|
||||||
module = __import__(module_name, globals_, None, (), 1)
|
|
||||||
yield from add_module(module)
|
yield from add_module(module)
|
||||||
|
|
||||||
globals_["_list_classes"] = lambda : _cache
|
globals()["_list_classes"] = lambda : _cache
|
||||||
|
|
||||||
|
|
||||||
|
def _modules_internal():
|
||||||
|
globals_ = globals()
|
||||||
|
for module_name in modules:
|
||||||
|
yield __import__(module_name, globals_, None, (), 1)
|
||||||
|
|
||||||
|
|
||||||
|
def _modules_path(path, files):
|
||||||
|
sys.path.insert(0, path)
|
||||||
|
try:
|
||||||
|
return [
|
||||||
|
__import__(name[:-3])
|
||||||
|
for name in files
|
||||||
|
if name.endswith(".py")
|
||||||
|
]
|
||||||
|
finally:
|
||||||
|
del sys.path[0]
|
||||||
|
|
||||||
|
|
||||||
def _get_classes(module):
|
def _get_classes(module):
|
||||||
@@ -240,3 +254,7 @@ def _get_classes(module):
|
|||||||
hasattr(cls, "pattern") and cls.__module__ == module.__name__
|
hasattr(cls, "pattern") and cls.__module__ == module.__name__
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
_cache = []
|
||||||
|
_module_iter = _modules_internal()
|
||||||
|
|||||||
@@ -110,6 +110,12 @@ def build_parser():
|
|||||||
help=("Download URLs found in FILE ('-' for stdin). "
|
help=("Download URLs found in FILE ('-' for stdin). "
|
||||||
"More than one --input-file can be specified"),
|
"More than one --input-file can be specified"),
|
||||||
)
|
)
|
||||||
|
general.add_argument(
|
||||||
|
"-f", "--filename",
|
||||||
|
dest="filename", metavar="FORMAT",
|
||||||
|
help=("Filename format string for downloaded files "
|
||||||
|
"('/O' for \"original\" filenames)"),
|
||||||
|
)
|
||||||
general.add_argument(
|
general.add_argument(
|
||||||
"-d", "--destination",
|
"-d", "--destination",
|
||||||
dest="base-directory", metavar="PATH", action=ConfigAction,
|
dest="base-directory", metavar="PATH", action=ConfigAction,
|
||||||
@@ -121,10 +127,9 @@ def build_parser():
|
|||||||
help="Exact location for file downloads",
|
help="Exact location for file downloads",
|
||||||
)
|
)
|
||||||
general.add_argument(
|
general.add_argument(
|
||||||
"-f", "--filename",
|
"-X", "--extractors",
|
||||||
dest="filename", metavar="FORMAT",
|
dest="extractor_sources", metavar="PATH", action="append",
|
||||||
help=("Filename format string for downloaded files "
|
help="Load external extractors from PATH",
|
||||||
"('/O' for \"original\" filenames)"),
|
|
||||||
)
|
)
|
||||||
general.add_argument(
|
general.add_argument(
|
||||||
"--proxy",
|
"--proxy",
|
||||||
|
|||||||
@@ -6,4 +6,4 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
__version__ = "1.24.5"
|
__version__ = "1.25.0-dev"
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ class TestExtractorModule(unittest.TestCase):
|
|||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
extractor._cache.clear()
|
extractor._cache.clear()
|
||||||
extractor._module_iter = iter(extractor.modules)
|
extractor._module_iter = extractor._modules_internal()
|
||||||
extractor._list_classes = _list_classes
|
extractor._list_classes = _list_classes
|
||||||
|
|
||||||
def test_find(self):
|
def test_find(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user