decouple extractor initialization

Introduce an 'initialize()' function that does the actual init
(session, cookies, config options) and can called separately from
the constructor __init__().

This allows, for example, to adjust config access inside a Job
before most of it already happened when calling 'extractor.find()'.
This commit is contained in:
Mike Fährmann
2023-07-25 20:09:44 +02:00
parent f0203b7559
commit a383eca7f6
71 changed files with 314 additions and 193 deletions

View File

@@ -31,17 +31,21 @@ class ExhentaiExtractor(Extractor):
LIMIT = False
def __init__(self, match):
# allow calling 'self.config()' before 'Extractor.__init__()'
self._cfgpath = ("extractor", self.category, self.subcategory)
Extractor.__init__(self, match)
self.version = match.group(1)
version = match.group(1)
def initialize(self):
domain = self.config("domain", "auto")
if domain == "auto":
domain = ("ex" if version == "ex" else "e-") + "hentai.org"
domain = ("ex" if self.version == "ex" else "e-") + "hentai.org"
self.root = "https://" + domain
self.cookies_domain = "." + domain
Extractor.__init__(self, match)
Extractor.initialize(self)
if self.version != "ex":
self.cookies.set("nw", "1", domain=self.cookies_domain)
self.session.headers["Referer"] = self.root + "/"
self.original = self.config("original", True)
limits = self.config("limits", False)
@@ -51,10 +55,6 @@ class ExhentaiExtractor(Extractor):
else:
self.limits = False
self.session.headers["Referer"] = self.root + "/"
if version != "ex":
self.cookies.set("nw", "1", domain=self.cookies_domain)
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
if response.history and response.headers.get("Content-Length") == "0":
@@ -174,6 +174,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.image_token = match.group(4)
self.image_num = text.parse_int(match.group(6), 1)
def _init(self):
source = self.config("source")
if source == "hitomi":
self.items = self._items_hitomi