[common] implement BaseExtractor class
Should be used when the same extractor logic applies to different instances/domains of several sites, e.g. FoolFuuka, Shopify, etc. This will replace the functionality of 'generate_extractors()' in a more efficient way, by condensing everything into 1 class and not dynamically generating an extractor class for each instance.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2020 Mike Fährmann
|
||||
# Copyright 2014-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -527,6 +527,39 @@ class AsynchronousMixin():
|
||||
messages.put(None)
|
||||
|
||||
|
||||
class BaseExtractor(Extractor):
|
||||
instances = None
|
||||
|
||||
def __init__(self, match):
|
||||
if not self.category:
|
||||
for index, group in enumerate(match.groups()):
|
||||
if group is not None:
|
||||
self.category, self.root = self.instances[index]
|
||||
break
|
||||
Extractor.__init__(self, match)
|
||||
|
||||
@classmethod
|
||||
def update(cls, instances):
|
||||
extra_instances = config.get(("extractor",), cls.basecategory)
|
||||
if extra_instances:
|
||||
for category, info in extra_instances.items():
|
||||
if isinstance(info, dict) and "root" in info:
|
||||
instances[category] = info
|
||||
|
||||
pattern_list = []
|
||||
instance_list = cls.instances = []
|
||||
for category, info in instances.items():
|
||||
root = info["root"]
|
||||
instance_list.append((category, root))
|
||||
|
||||
pattern = info.get("pattern")
|
||||
if not pattern:
|
||||
pattern = re.escape(root[root.index(":") + 3:])
|
||||
pattern_list.append(pattern + "()")
|
||||
|
||||
return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")"
|
||||
|
||||
|
||||
def generate_extractors(extractor_data, symtable, classes):
|
||||
"""Dynamically generate Extractor classes"""
|
||||
extractors = config.get(("extractor",), classes[0].basecategory)
|
||||
|
||||
Reference in New Issue
Block a user