decouple extractor initialization

Introduce an 'initialize()' function that does the actual init
(session, cookies, config options) and can called separately from
the constructor __init__().

This allows, for example, to adjust config access inside a Job
before most of it already happened when calling 'extractor.find()'.
This commit is contained in:
Mike Fährmann
2023-07-25 20:09:44 +02:00
parent f0203b7559
commit a383eca7f6
71 changed files with 314 additions and 193 deletions

View File

@@ -23,12 +23,10 @@ class PinterestExtractor(Extractor):
archive_fmt = "{id}{media_id}"
root = "https://www.pinterest.com"
def __init__(self, match):
Extractor.__init__(self, match)
def _init(self):
domain = self.config("domain")
if not domain or domain == "auto" :
self.root = text.root_from_url(match.group(0))
self.root = text.root_from_url(self.url)
else:
self.root = text.ensure_http_scheme(domain)