From 745a114c61085f425ce7539086fbbde126a2b7ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 26 Jan 2021 03:40:14 +0100 Subject: [PATCH] [common] implement BaseExtractor class Should be used when the same extractor logic applies to different instances/domains of several sites, e.g. FoolFuuka, Shopify, etc. This will replace the functionality of 'generate_extractors()' in a more efficient way, by condensing everything into 1 class and not dynamically generating an extractor class for each instance. --- gallery_dl/extractor/common.py | 35 +++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 15cc7768..74cd802f 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -527,6 +527,39 @@ class AsynchronousMixin(): messages.put(None) +class BaseExtractor(Extractor): + instances = None + + def __init__(self, match): + if not self.category: + for index, group in enumerate(match.groups()): + if group is not None: + self.category, self.root = self.instances[index] + break + Extractor.__init__(self, match) + + @classmethod + def update(cls, instances): + extra_instances = config.get(("extractor",), cls.basecategory) + if extra_instances: + for category, info in extra_instances.items(): + if isinstance(info, dict) and "root" in info: + instances[category] = info + + pattern_list = [] + instance_list = cls.instances = [] + for category, info in instances.items(): + root = info["root"] + instance_list.append((category, root)) + + pattern = info.get("pattern") + if not pattern: + pattern = re.escape(root[root.index(":") + 3:]) + pattern_list.append(pattern + "()") + + return r"(?:https?://)?(?:" + "|".join(pattern_list) + r")" + + def generate_extractors(extractor_data, symtable, classes): """Dynamically generate Extractor classes""" extractors = config.get(("extractor",), classes[0].basecategory)