update handling of extractor URL patterns

When loading extractor classes during 'extractor.find(…)', their
'pattern' attribute will be replaced with a compiled version of itself.
This commit is contained in:
Mike Fährmann
2019-02-08 20:08:16 +01:00
parent 6284731107
commit abbd45d0f4
6 changed files with 43 additions and 41 deletions

View File

@@ -211,13 +211,15 @@ def get_domain(classes):
if hasattr(cls, "root") and cls.root:
return cls.root + "/"
if hasattr(cls, "test") and cls.test:
url = cls.test[0][0]
return url[:url.find("/", 8)+1]
if hasattr(cls, "https"):
scheme = "https" if cls.https else "http"
domain = cls.__doc__.split()[-1]
return "{}://{}/".format(scheme, domain)
scheme = "http" if hasattr(cls, "https") and not cls.https else "https"
host = cls.__doc__.split()[-1]
return scheme + "://" + host + "/"
test = next(cls._get_tests(), None)
if test:
url = test[0]
return url[:url.find("/", 8)+1]
except (IndexError, AttributeError):
pass
return ""