allow BaseExtractors to use the domain pf the matched URL
This commit is contained in:
@@ -607,6 +607,9 @@ class BaseExtractor(Extractor):
|
|||||||
if group is not None:
|
if group is not None:
|
||||||
if index:
|
if index:
|
||||||
self.category, self.root = self.instances[index-1]
|
self.category, self.root = self.instances[index-1]
|
||||||
|
if not self.root:
|
||||||
|
url = text.ensure_http_scheme(match.group(0))
|
||||||
|
self.root = url[:url.index("/", 8)]
|
||||||
else:
|
else:
|
||||||
self.root = group
|
self.root = group
|
||||||
self.category = group.partition("://")[2]
|
self.category = group.partition("://")[2]
|
||||||
@@ -624,7 +627,9 @@ class BaseExtractor(Extractor):
|
|||||||
pattern_list = []
|
pattern_list = []
|
||||||
instance_list = cls.instances = []
|
instance_list = cls.instances = []
|
||||||
for category, info in instances.items():
|
for category, info in instances.items():
|
||||||
root = info["root"].rstrip("/")
|
root = info["root"]
|
||||||
|
if root:
|
||||||
|
root = root.rstrip("/")
|
||||||
instance_list.append((category, root))
|
instance_list.append((category, root))
|
||||||
|
|
||||||
pattern = info.get("pattern")
|
pattern = info.get("pattern")
|
||||||
|
|||||||
@@ -349,6 +349,12 @@ def build_extractor_list():
|
|||||||
for category, root in extr.instances:
|
for category, root in extr.instances:
|
||||||
base[category].append(extr.subcategory)
|
base[category].append(extr.subcategory)
|
||||||
if category not in domains:
|
if category not in domains:
|
||||||
|
if not root:
|
||||||
|
# use domain from first matching test
|
||||||
|
for url, _ in extr._get_tests():
|
||||||
|
if extr.from_url(url).category == category:
|
||||||
|
root = url[:url.index("/", 8)]
|
||||||
|
break
|
||||||
domains[category] = root + "/"
|
domains[category] = root + "/"
|
||||||
|
|
||||||
# sort subcategory lists
|
# sort subcategory lists
|
||||||
|
|||||||
Reference in New Issue
Block a user