decouple extractor initialization

Introduce an 'initialize()' function that does the actual init
(session, cookies, config options) and can called separately from
the constructor __init__().

This allows, for example, to adjust config access inside a Job
before most of it already happened when calling 'extractor.find()'.
This commit is contained in:
Mike Fährmann
2023-07-25 20:09:44 +02:00
parent f0203b7559
commit a383eca7f6
71 changed files with 314 additions and 193 deletions

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Jake Mannens
# Copyright 2021-2022 Mike Fährmann
# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -39,7 +39,9 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
def __init__(self, match):
self.path = match.group(1)
ChapterExtractor.__init__(self, match, self.root + self.path)
self.session.headers['Referer'] = self.root
def _init(self):
self.session.headers['Referer'] = self.root + "/"
def metadata(self, page):
_ , pos = text.extract(page, '<span itemprop="title">', '<')