allow BaseExtractors to use the domain pf the matched URL

2022-02-10 01:38:50 +01:00
parent c0fddcefc5
commit b4f8e15a1f
2 changed files with 12 additions and 1 deletions
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -607,6 +607,9 @@ class BaseExtractor(Extractor):
                if group is not None:
                    if index:
                        self.category, self.root = self.instances[index-1]
+                        if not self.root:
+                            url = text.ensure_http_scheme(match.group(0))
+                            self.root = url[:url.index("/", 8)]
                    else:
                        self.root = group
                        self.category = group.partition("://")[2]
@@ -624,7 +627,9 @@ class BaseExtractor(Extractor):
        pattern_list = []
        instance_list = cls.instances = []
        for category, info in instances.items():
-            root = info["root"].rstrip("/")
+            root = info["root"]
+            if root:
+                root = root.rstrip("/")
            instance_list.append((category, root))

            pattern = info.get("pattern")
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -349,6 +349,12 @@ def build_extractor_list():
            for category, root in extr.instances:
                base[category].append(extr.subcategory)
                if category not in domains:
+                    if not root:
+                        # use domain from first matching test
+                        for url, _ in extr._get_tests():
+                            if extr.from_url(url).category == category:
+                                root = url[:url.index("/", 8)]
+                                break
                    domains[category] = root + "/"

    # sort subcategory lists