allow BaseExtractors to use the domain pf the matched URL
This commit is contained in:
@@ -607,6 +607,9 @@ class BaseExtractor(Extractor):
|
||||
if group is not None:
|
||||
if index:
|
||||
self.category, self.root = self.instances[index-1]
|
||||
if not self.root:
|
||||
url = text.ensure_http_scheme(match.group(0))
|
||||
self.root = url[:url.index("/", 8)]
|
||||
else:
|
||||
self.root = group
|
||||
self.category = group.partition("://")[2]
|
||||
@@ -624,7 +627,9 @@ class BaseExtractor(Extractor):
|
||||
pattern_list = []
|
||||
instance_list = cls.instances = []
|
||||
for category, info in instances.items():
|
||||
root = info["root"].rstrip("/")
|
||||
root = info["root"]
|
||||
if root:
|
||||
root = root.rstrip("/")
|
||||
instance_list.append((category, root))
|
||||
|
||||
pattern = info.get("pattern")
|
||||
|
||||
@@ -349,6 +349,12 @@ def build_extractor_list():
|
||||
for category, root in extr.instances:
|
||||
base[category].append(extr.subcategory)
|
||||
if category not in domains:
|
||||
if not root:
|
||||
# use domain from first matching test
|
||||
for url, _ in extr._get_tests():
|
||||
if extr.from_url(url).category == category:
|
||||
root = url[:url.index("/", 8)]
|
||||
break
|
||||
domains[category] = root + "/"
|
||||
|
||||
# sort subcategory lists
|
||||
|
||||
Reference in New Issue
Block a user