Merge branch 'multi-extractor'

This commit is contained in:
Mike Fährmann
2015-11-21 04:29:38 +01:00
34 changed files with 348 additions and 419 deletions

View File

@@ -46,11 +46,11 @@ modules = [
def find(url):
"""Find extractor suitable for handling the given url"""
for pattern, module, klass in _list_patterns():
for pattern, klass in _list_patterns():
match = re.match(pattern, url)
if match:
return klass(match), module.info
return None, None
return klass(match)
return None
# --------------------------------------------------------------------
# internals
@@ -59,15 +59,22 @@ _cache = []
_module_iter = iter(modules)
def _list_patterns():
"""Yield all available (pattern, module, klass) tuples"""
"""Yield all available (pattern, info, class) tuples"""
for entry in _cache:
yield entry
for module_name in _module_iter:
module = importlib.import_module("."+module_name, __package__)
klass = getattr(module, module.info["extractor"])
userpatterns = config.get(("extractor", module_name, "pattern"), default=[])
for pattern in userpatterns + module.info["pattern"]:
etuple = (pattern, module, klass)
_cache.append(etuple)
yield etuple
for klass in _get_classes(module):
for pattern in klass.pattern:
etuple = (pattern, klass)
_cache.append(etuple)
yield etuple
def _get_classes(module):
"""Return a list of all extractor classes in a module"""
return [
klass for klass in module.__dict__.values() if (
hasattr(klass, "pattern") and klass.__module__ == module.__name__
)
]