From a383eca7f6ec0f08dcc854d594aecbfecc4f45be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Tue, 25 Jul 2023 20:09:44 +0200
Subject: [PATCH] decouple extractor initialization

Introduce an 'initialize()' function that does the actual init
(session, cookies, config options) and can called separately from
the constructor __init__().

This allows, for example, to adjust config access inside a Job
before most of it already happened when calling 'extractor.find()'.
---
 gallery_dl/extractor/3dbooru.py         | 12 +++---
 gallery_dl/extractor/500px.py           |  3 +-
 gallery_dl/extractor/8chan.py           |  4 +-
 gallery_dl/extractor/artstation.py      |  6 +--
 gallery_dl/extractor/aryion.py          |  8 +++-
 gallery_dl/extractor/blogger.py         |  5 ++-
 gallery_dl/extractor/common.py          | 56 ++++++++++++++++---------
 gallery_dl/extractor/danbooru.py        |  3 +-
 gallery_dl/extractor/deviantart.py      | 25 +++++++----
 gallery_dl/extractor/exhentai.py        | 19 +++++----
 gallery_dl/extractor/fanbox.py          |  5 +--
 gallery_dl/extractor/flickr.py          |  4 +-
 gallery_dl/extractor/foolfuuka.py       |  4 +-
 gallery_dl/extractor/furaffinity.py     |  5 +++
 gallery_dl/extractor/gelbooru_v02.py    |  3 +-
 gallery_dl/extractor/gfycat.py          |  1 +
 gallery_dl/extractor/hentaicosplays.py  |  4 +-
 gallery_dl/extractor/hentaifoundry.py   |  3 ++
 gallery_dl/extractor/hitomi.py          |  9 ++--
 gallery_dl/extractor/hotleak.py         |  5 +--
 gallery_dl/extractor/idolcomplex.py     |  2 +
 gallery_dl/extractor/imagebam.py        |  4 +-
 gallery_dl/extractor/imagechest.py      |  9 ++--
 gallery_dl/extractor/imagefap.py        |  5 +--
 gallery_dl/extractor/imgur.py           |  4 +-
 gallery_dl/extractor/inkbunny.py        |  3 +-
 gallery_dl/extractor/instagram.py       | 23 ++++++----
 gallery_dl/extractor/itaku.py           |  4 +-
 gallery_dl/extractor/kemonoparty.py     |  7 ++--
 gallery_dl/extractor/lolisafe.py        |  3 +-
 gallery_dl/extractor/luscious.py        |  4 +-
 gallery_dl/extractor/mangadex.py        |  6 ++-
 gallery_dl/extractor/mangafox.py        |  4 +-
 gallery_dl/extractor/mangahere.py       |  4 +-
 gallery_dl/extractor/mangakakalot.py    |  6 ++-
 gallery_dl/extractor/manganelo.py       |  4 +-
 gallery_dl/extractor/mangasee.py        |  2 +
 gallery_dl/extractor/mastodon.py        |  4 +-
 gallery_dl/extractor/misskey.py         |  4 +-
 gallery_dl/extractor/myhentaigallery.py |  4 +-
 gallery_dl/extractor/newgrounds.py      |  5 +++
 gallery_dl/extractor/nijie.py           | 18 ++++----
 gallery_dl/extractor/oauth.py           |  2 +
 gallery_dl/extractor/paheal.py          |  1 +
 gallery_dl/extractor/philomena.py       |  3 +-
 gallery_dl/extractor/photobucket.py     |  7 +++-
 gallery_dl/extractor/pinterest.py       |  6 +--
 gallery_dl/extractor/pixiv.py           |  6 ++-
 gallery_dl/extractor/pornpics.py        |  4 +-
 gallery_dl/extractor/reactor.py         | 11 +++--
 gallery_dl/extractor/readcomiconline.py |  4 +-
 gallery_dl/extractor/redgifs.py         |  2 +
 gallery_dl/extractor/rule34us.py        |  5 +--
 gallery_dl/extractor/senmanga.py        |  6 +--
 gallery_dl/extractor/shimmie2.py        | 23 +++++-----
 gallery_dl/extractor/simplyhentai.py    |  4 +-
 gallery_dl/extractor/skeb.py            |  2 +
 gallery_dl/extractor/smugmug.py         |  3 +-
 gallery_dl/extractor/szurubooru.py      |  3 +-
 gallery_dl/extractor/tapas.py           |  3 +-
 gallery_dl/extractor/tumblr.py          |  1 +
 gallery_dl/extractor/twibooru.py        |  3 +-
 gallery_dl/extractor/twitter.py         |  5 +++
 gallery_dl/extractor/urlshortener.py    |  1 +
 gallery_dl/extractor/vipergirls.py      |  5 +--
 gallery_dl/extractor/wallhaven.py       |  6 ++-
 gallery_dl/extractor/weasyl.py          |  3 +-
 gallery_dl/extractor/webtoons.py        | 26 +++++++-----
 gallery_dl/extractor/weibo.py           |  5 +++
 test/test_cookies.py                    | 43 +++++++++++--------
 test/test_downloader.py                 |  1 +
 71 files changed, 314 insertions(+), 193 deletions(-)

diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py
index e0066cb9..e83bca75 100644
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -17,12 +17,10 @@ class _3dbooruBase():
     basecategory = "booru"
     root = "http://behoimi.org"
 
-    def __init__(self, match):
-        super().__init__(match)
-        self.session.headers.update({
-            "Referer": "http://behoimi.org/post/show/",
-            "Accept-Encoding": "identity",
-        })
+    def _init(self):
+        headers = self.session.headers
+        headers["Referer"] = "http://behoimi.org/post/show/"
+        headers["Accept-Encoding"] = "identity"
 
 
 class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index ac38b604..4d1307e1 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -23,8 +23,7 @@ class _500pxExtractor(Extractor):
     root = "https://500px.com"
     cookies_domain = ".500px.com"
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         self.session.headers["Referer"] = self.root + "/"
 
     def items(self):
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index f098008a..2d043868 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -150,6 +150,8 @@ class _8chanBoardExtractor(_8chanExtractor):
     def __init__(self, match):
         _8chanExtractor.__init__(self, match)
         _, self.board, self.page = match.groups()
+
+    def _init(self):
         self.session.headers["Referer"] = self.root + "/"
 
     def items(self):
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index a3a7c1e1..77d5fbd2 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -27,12 +27,12 @@ class ArtstationExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user = match.group(1) or match.group(2)
-        self.external = self.config("external", False)
 
     def items(self):
         data = self.metadata()
 
         projects = self.projects()
+        external = self.config("external", False)
         max_posts = self.config("max-posts")
         if max_posts:
             projects = itertools.islice(projects, max_posts)
@@ -45,7 +45,7 @@ class ArtstationExtractor(Extractor):
                 asset["num"] = num
                 yield Message.Directory, asset
 
-                if adict["has_embedded_player"] and self.external:
+                if adict["has_embedded_player"] and external:
                     player = adict["player_embedded"]
                     url = (text.extr(player, 'src="', '"') or
                            text.extr(player, "src='", "'"))
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index ad0f9dc6..89a8319a 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -189,9 +189,11 @@ class AryionGalleryExtractor(AryionExtractor):
 
     def __init__(self, match):
         AryionExtractor.__init__(self, match)
-        self.recursive = self.config("recursive", True)
         self.offset = 0
 
+    def _init(self):
+        self.recursive = self.config("recursive", True)
+
     def skip(self, num):
         if self.recursive:
             return 0
@@ -217,9 +219,11 @@ class AryionTagExtractor(AryionExtractor):
         "count": ">= 5",
     })
 
-    def metadata(self):
+    def _init(self):
         self.params = text.parse_query(self.user)
         self.user = None
+
+    def metadata(self):
         return {"search_tags": self.params.get("tag")}
 
     def posts(self):
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 3ceada8d..b25af8f9 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -28,12 +28,13 @@ class BloggerExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.videos = self.config("videos", True)
         self.blog = match.group(1) or match.group(2)
+
+    def _init(self):
         self.api = BloggerAPI(self)
+        self.videos = self.config("videos", True)
 
     def items(self):
-
         blog = self.api.blog_by_url("http://" + self.blog)
         blog["pages"] = blog["pages"]["totalItems"]
         blog["posts"] = blog["posts"]["totalItems"]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 2e5ce4d4..fc6b197c 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -52,25 +52,6 @@ class Extractor():
         self._cfgpath = ("extractor", self.category, self.subcategory)
         self._parentdir = ""
 
-        self._write_pages = self.config("write-pages", False)
-        self._retry_codes = self.config("retry-codes")
-        self._retries = self.config("retries", 4)
-        self._timeout = self.config("timeout", 30)
-        self._verify = self.config("verify", True)
-        self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
-        self._interval = util.build_duration_func(
-            self.config("sleep-request", self.request_interval),
-            self.request_interval_min,
-        )
-
-        if self._retries < 0:
-            self._retries = float("inf")
-        if not self._retry_codes:
-            self._retry_codes = ()
-
-        self._init_session()
-        self._init_cookies()
-
     @classmethod
     def from_url(cls, url):
         if isinstance(cls.pattern, str):
@@ -79,8 +60,16 @@ class Extractor():
         return cls(match) if match else None
 
     def __iter__(self):
+        self.initialize()
         return self.items()
 
+    def initialize(self):
+        self._init_options()
+        self._init_session()
+        self._init_cookies()
+        self._init()
+        self.initialize = util.noop
+
     def items(self):
         yield Message.Version, 1
 
@@ -245,6 +234,26 @@ class Extractor():
 
         return username, password
 
+    def _init(self):
+        pass
+
+    def _init_options(self):
+        self._write_pages = self.config("write-pages", False)
+        self._retry_codes = self.config("retry-codes")
+        self._retries = self.config("retries", 4)
+        self._timeout = self.config("timeout", 30)
+        self._verify = self.config("verify", True)
+        self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
+        self._interval = util.build_duration_func(
+            self.config("sleep-request", self.request_interval),
+            self.request_interval_min,
+        )
+
+        if self._retries < 0:
+            self._retries = float("inf")
+        if not self._retry_codes:
+            self._retry_codes = ()
+
     def _init_session(self):
         self.session = session = requests.Session()
         headers = session.headers
@@ -454,6 +463,13 @@ class Extractor():
             self.cookies.set(
                 "__ddg2", util.generate_token(), domain=self.cookies_domain)
 
+    def _cache(self, func, maxage, keyarg=None):
+        #  return cache.DatabaseCacheDecorator(func, maxage, keyarg)
+        return cache.DatabaseCacheDecorator(func, keyarg, maxage)
+
+    def _cache_memory(self, func, maxage=None, keyarg=None):
+        return cache.Memcache()
+
     def _get_date_min_max(self, dmin=None, dmax=None):
         """Retrieve and parse 'date-min' and 'date-max' config values"""
         def get(key, default):
@@ -654,6 +670,8 @@ class AsynchronousMixin():
     """Run info extraction in a separate thread"""
 
     def __iter__(self):
+        self.initialize()
+
         messages = queue.Queue(5)
         thread = threading.Thread(
             target=self.async_items,
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 19a3aeff..b16d27a2 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -22,8 +22,7 @@ class DanbooruExtractor(BaseExtractor):
     per_page = 200
     request_interval = 1.0
 
-    def __init__(self, match):
-        BaseExtractor.__init__(self, match)
+    def _init(self):
         self.ugoira = self.config("ugoira", False)
         self.external = self.config("external", False)
         self.includes = False
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 9f16b334..3497b0c4 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -38,14 +38,18 @@ class DeviantartExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
+        self.user = match.group(1) or match.group(2)
+
+    def _init(self):
         self.flat = self.config("flat", True)
         self.extra = self.config("extra", False)
         self.original = self.config("original", True)
         self.comments = self.config("comments", False)
-        self.user = match.group(1) or match.group(2)
+
+        self.api = DeviantartOAuthAPI(self)
         self.group = False
         self.offset = 0
-        self.api = None
+        self._premium_cache = {}
 
         unwatch = self.config("auto-unwatch")
         if unwatch:
@@ -60,11 +64,13 @@ class DeviantartExtractor(Extractor):
             self._update_content = self._update_content_image
             self.original = True
 
-        self._premium_cache = {}
-        self.commit_journal = {
-            "html": self._commit_journal_html,
-            "text": self._commit_journal_text,
-        }.get(self.config("journals", "html"))
+        journals = self.config("journals", "html")
+        if journals == "html":
+            self.commit_journal = self._commit_journal_html
+        elif journals == "text":
+            self.commit_journal = self._commit_journal_text
+        else:
+            self.commit_journal = None
 
     def skip(self, num):
         self.offset += num
@@ -80,8 +86,6 @@ class DeviantartExtractor(Extractor):
             return True
 
     def items(self):
-        self.api = DeviantartOAuthAPI(self)
-
         if self.user and self.config("group", True):
             profile = self.api.user_profile(self.user)
             self.group = not profile
@@ -449,6 +453,9 @@ class DeviantartUserExtractor(DeviantartExtractor):
         ("https://shimoda7.deviantart.com/"),
     )
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = "{}/{}/".format(self.root, self.user)
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 087ff51c..d5f1d02b 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -31,17 +31,21 @@ class ExhentaiExtractor(Extractor):
     LIMIT = False
 
     def __init__(self, match):
-        # allow calling 'self.config()' before 'Extractor.__init__()'
-        self._cfgpath = ("extractor", self.category, self.subcategory)
+        Extractor.__init__(self, match)
+        self.version = match.group(1)
 
-        version = match.group(1)
+    def initialize(self):
         domain = self.config("domain", "auto")
         if domain == "auto":
-            domain = ("ex" if version == "ex" else "e-") + "hentai.org"
+            domain = ("ex" if self.version == "ex" else "e-") + "hentai.org"
         self.root = "https://" + domain
         self.cookies_domain = "." + domain
 
-        Extractor.__init__(self, match)
+        Extractor.initialize(self)
+
+        if self.version != "ex":
+            self.cookies.set("nw", "1", domain=self.cookies_domain)
+        self.session.headers["Referer"] = self.root + "/"
         self.original = self.config("original", True)
 
         limits = self.config("limits", False)
@@ -51,10 +55,6 @@ class ExhentaiExtractor(Extractor):
         else:
             self.limits = False
 
-        self.session.headers["Referer"] = self.root + "/"
-        if version != "ex":
-            self.cookies.set("nw", "1", domain=self.cookies_domain)
-
     def request(self, url, **kwargs):
         response = Extractor.request(self, url, **kwargs)
         if response.history and response.headers.get("Content-Length") == "0":
@@ -174,6 +174,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
         self.image_token = match.group(4)
         self.image_num = text.parse_int(match.group(6), 1)
 
+    def _init(self):
         source = self.config("source")
         if source == "hitomi":
             self.items = self._items_hitomi
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 40ad8cdd..921ddb62 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -6,9 +6,9 @@
 
 """Extractors for https://www.fanbox.cc/"""
 
-import re
 from .common import Extractor, Message
 from .. import text
+import re
 
 
 BASE_PATTERN = (
@@ -27,8 +27,7 @@ class FanboxExtractor(Extractor):
     archive_fmt = "{id}_{num}"
     _warning = True
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         self.embeds = self.config("embeds", True)
 
     def items(self):
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index cb7d1e81..3b18c63e 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -26,8 +26,10 @@ class FlickrExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.api = FlickrAPI(self)
         self.item_id = match.group(1)
+
+    def _init(self):
+        self.api = FlickrAPI(self)
         self.user = None
 
     def items(self):
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 76fb69eb..fefb2c4c 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -22,10 +22,12 @@ class FoolfuukaExtractor(BaseExtractor):
 
     def __init__(self, match):
         BaseExtractor.__init__(self, match)
-        self.session.headers["Referer"] = self.root
         if self.category == "b4k":
             self.remote = self._remote_direct
 
+    def _init(self):
+        self.session.headers["Referer"] = self.root + "/"
+
     def items(self):
         yield Message.Directory, self.metadata()
         for post in self.posts():
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index c03c89b2..8c3ef79d 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -28,6 +28,8 @@ class FuraffinityExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user = match.group(1)
+
+    def _init(self):
         self.offset = 0
 
         if self.config("descriptions") == "html":
@@ -384,6 +386,9 @@ class FuraffinityUserExtractor(FuraffinityExtractor):
         }),
     )
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = "{}/{{}}/{}/".format(self.root, self.user)
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 958c4b58..1ef78efd 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -19,8 +19,7 @@ import re
 class GelbooruV02Extractor(booru.BooruExtractor):
     basecategory = "gelbooru_v02"
 
-    def __init__(self, match):
-        booru.BooruExtractor.__init__(self, match)
+    def _init(self):
         self.api_key = self.config("api-key")
         self.user_id = self.config("user-id")
 
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index ccebdf98..53ef1180 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -24,6 +24,7 @@ class GfycatExtractor(Extractor):
         Extractor.__init__(self, match)
         self.key = match.group(1).lower()
 
+    def _init(self):
         formats = self.config("format")
         if formats is None:
             formats = ("mp4", "webm", "mobile", "gif")
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index 593a8464..ac03923f 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -57,7 +57,9 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
         self.root = text.ensure_http_scheme(root)
         url = "{}/story/{}/".format(self.root, self.slug)
         GalleryExtractor.__init__(self, match, url)
-        self.session.headers["Referer"] = url
+
+    def _init(self):
+        self.session.headers["Referer"] = self.gallery_url
 
     def metadata(self, page):
         title = text.extr(page, "<title>", "</title>")
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 78a576df..56ea1d4d 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -170,6 +170,9 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
     pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
     test = ("https://www.hentai-foundry.com/user/Tenpura/profile",)
 
+    def initialize(self):
+        pass
+
     def items(self):
         root = self.root
         user = "/user/" + self.user
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 4e8d1cae..c012c665 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -66,12 +66,13 @@ class HitomiGalleryExtractor(GalleryExtractor):
     )
 
     def __init__(self, match):
-        gid = match.group(1)
-        url = "https://ltn.hitomi.la/galleries/{}.js".format(gid)
+        self.gid = match.group(1)
+        url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gid)
         GalleryExtractor.__init__(self, match, url)
-        self.info = None
+
+    def _init(self):
         self.session.headers["Referer"] = "{}/reader/{}.html".format(
-            self.root, gid)
+            self.root, self.gid)
 
     def metadata(self, page):
         self.info = info = util.json_loads(page.partition("=")[2])
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index 30158b43..2ab9f3cc 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -21,9 +21,8 @@ class HotleakExtractor(Extractor):
     archive_fmt = "{type}_{creator}_{id}"
     root = "https://hotleak.vip"
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.session.headers["Referer"] = self.root
+    def _init(self):
+        self.session.headers["Referer"] = self.root + "/"
 
     def items(self):
         for post in self.posts():
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index 02f037dd..fcac7fe2 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -29,6 +29,8 @@ class IdolcomplexExtractor(SankakuExtractor):
         self.logged_in = True
         self.start_page = 1
         self.start_post = 0
+
+    def _init(self):
         self.extags = self.config("tags", False)
 
     def items(self):
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 67d0b110..9a3ea368 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -21,6 +21,8 @@ class ImagebamExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.path = match.group(1)
+
+    def _init(self):
         self.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
 
     def _parse_image_page(self, path):
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 9229617b..2babead8 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -47,8 +47,10 @@ class ImagechestGalleryExtractor(GalleryExtractor):
         url = self.root + "/p/" + self.gallery_id
         GalleryExtractor.__init__(self, match, url)
 
-        self.access_token = self.config("access-token")
-        if self.access_token:
+    def _init(self):
+        access_token = self.config("access-token")
+        if access_token:
+            self.api = ImagechestAPI(self, access_token)
             self.gallery_url = None
             self.metadata = self._metadata_api
             self.images = self._images_api
@@ -82,8 +84,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
         ]
 
     def _metadata_api(self, page):
-        api = ImagechestAPI(self, self.access_token)
-        post = api.post(self.gallery_id)
+        post = self.api.post(self.gallery_id)
 
         post["date"] = text.parse_datetime(
             post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index f5b69faa..43ac3a35 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -23,9 +23,8 @@ class ImagefapExtractor(Extractor):
     archive_fmt = "{gallery_id}_{image_id}"
     request_interval = (2.0, 4.0)
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.session.headers["Referer"] = self.root
+    def _init(self):
+        self.session.headers["Referer"] = self.root + "/"
 
     def request(self, url, **kwargs):
         response = Extractor.request(self, url, **kwargs)
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 4c29d98f..ca9671c3 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -22,8 +22,10 @@ class ImgurExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.api = ImgurAPI(self)
         self.key = match.group(1)
+
+    def _init(self):
+        self.api = ImgurAPI(self)
         self.mp4 = self.config("mp4", True)
 
     def _prepare(self, image):
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 83a1a194..c6df16b0 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -24,8 +24,7 @@ class InkbunnyExtractor(Extractor):
     archive_fmt = "{file_id}"
     root = "https://inkbunny.net"
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         self.api = InkbunnyAPI(self)
 
     def items(self):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 29208aef..cb77fa1c 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -34,16 +34,8 @@ class InstagramExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.item = match.group(1)
-        self.api = None
-        self.www_claim = "0"
-        self.csrf_token = util.generate_token()
-        self._logged_in = True
-        self._find_tags = re.compile(r"#\w+").findall
-        self._cursor = None
-        self._user = None
 
-    def items(self):
-        self.login()
+    def _init(self):
         self.cookies.set(
             "csrftoken", self.csrf_token, domain=self.cookies_domain)
 
@@ -52,6 +44,16 @@ class InstagramExtractor(Extractor):
         else:
             self.api = InstagramRestAPI(self)
 
+        self.www_claim = "0"
+        self.csrf_token = util.generate_token()
+        self._find_tags = re.compile(r"#\w+").findall
+        self._logged_in = True
+        self._cursor = None
+        self._user = None
+
+    def items(self):
+        self.login()
+
         data = self.metadata()
         videos = self.config("videos", True)
         previews = self.config("previews", False)
@@ -400,6 +402,9 @@ class InstagramUserExtractor(InstagramExtractor):
         ("https://www.instagram.com/id:25025320/"),
     )
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = "{}/{}/".format(self.root, self.item)
         stories = "{}/stories/{}/".format(self.root, self.item)
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 4bcedae1..356a002b 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -26,8 +26,10 @@ class ItakuExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.api = ItakuAPI(self)
         self.item = match.group(1)
+
+    def _init(self):
+        self.api = ItakuAPI(self)
         self.videos = self.config("videos", True)
 
     def items(self):
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index d5d02c29..2ed73e9c 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -35,14 +35,15 @@ class KemonopartyExtractor(Extractor):
         self.root = text.root_from_url(match.group(0))
         self.cookies_domain = ".{}.{}".format(domain, tld)
         Extractor.__init__(self, match)
+
+    def _init(self):
         self.session.headers["Referer"] = self.root + "/"
-
-    def items(self):
         self._prepare_ddosguard_cookies()
-
         self._find_inline = re.compile(
             r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
             r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
+
+    def items(self):
         find_hash = re.compile(HASH_PATTERN).match
         generators = self._build_file_generators(self.config("files"))
         duplicates = self.config("duplicates")
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 5d236c37..9cebe3ae 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -46,9 +46,10 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
         LolisafeExtractor.__init__(self, match)
         self.album_id = match.group(match.lastindex)
 
+    def _init(self):
         domain = self.config("domain")
         if domain == "auto":
-            self.root = text.root_from_url(match.group(0))
+            self.root = text.root_from_url(self.url)
         elif domain:
             self.root = text.ensure_http_scheme(domain)
 
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 80f8758c..dcf09d16 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -118,6 +118,8 @@ class LusciousAlbumExtractor(LusciousExtractor):
     def __init__(self, match):
         LusciousExtractor.__init__(self, match)
         self.album_id = match.group(1)
+
+    def _init(self):
         self.gif = self.config("gif", False)
 
     def items(self):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index e111fee3..b0c985de 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -30,9 +30,11 @@ class MangadexExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
+        self.uuid = match.group(1)
+
+    def _init(self):
         self.session.headers["User-Agent"] = util.USERAGENT
         self.api = MangadexAPI(self)
-        self.uuid = match.group(1)
 
     def items(self):
         for chapter in self.chapters():
@@ -202,7 +204,7 @@ class MangadexAPI():
         self.extractor = extr
         self.headers = {}
 
-        self.username, self.password = self.extractor._get_auth_info()
+        self.username, self.password = extr._get_auth_info()
         if not self.username:
             self.authenticate = util.noop
 
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index 0818fd90..8478b8de 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -33,6 +33,8 @@ class MangafoxChapterExtractor(ChapterExtractor):
         base, self.cstr, self.volume, self.chapter, self.minor = match.groups()
         self.urlbase = self.root + base
         ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
+
+    def _init(self):
         self.session.headers["Referer"] = self.root + "/"
 
     def metadata(self, page):
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index ccce09b4..97c26d47 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -42,6 +42,8 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
         self.part, self.volume, self.chapter = match.groups()
         url = self.url_fmt.format(self.part, 1)
         ChapterExtractor.__init__(self, match, url)
+
+    def _init(self):
         self.session.headers["Referer"] = self.root_mobile + "/"
 
     def metadata(self, page):
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
index ba55ac16..e397586e 100644
--- a/gallery_dl/extractor/mangakakalot.py
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 # Copyright 2020 Jake Mannens
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -39,7 +39,9 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
     def __init__(self, match):
         self.path = match.group(1)
         ChapterExtractor.__init__(self, match, self.root + self.path)
-        self.session.headers['Referer'] = self.root
+
+    def _init(self):
+        self.session.headers['Referer'] = self.root + "/"
 
     def metadata(self, page):
         _     , pos = text.extract(page, '<span itemprop="title">', '<')
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 6fd9f495..807bc5ee 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -21,7 +21,9 @@ class ManganeloBase():
     def __init__(self, match):
         domain, path = match.groups()
         super().__init__(match, "https://" + domain + path)
-        self.session.headers['Referer'] = self.root
+
+    def _init(self):
+        self.session.headers['Referer'] = self.root + "/"
 
         if self._match_chapter is None:
             ManganeloBase._match_chapter = re.compile(
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
index dfa9bdf0..00c89c1e 100644
--- a/gallery_dl/extractor/mangasee.py
+++ b/gallery_dl/extractor/mangasee.py
@@ -90,6 +90,8 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
             self.category = "mangalife"
             self.root = "https://manga4life.com"
         ChapterExtractor.__init__(self, match, self.root + match.group(2))
+
+    def _init(self):
         self.session.headers["Referer"] = self.gallery_url
 
         domain = self.root.rpartition("/")[2]
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index ddd34f0d..3bed955c 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -23,8 +23,10 @@ class MastodonExtractor(BaseExtractor):
 
     def __init__(self, match):
         BaseExtractor.__init__(self, match)
-        self.instance = self.root.partition("://")[2]
         self.item = match.group(match.lastindex)
+
+    def _init(self):
+        self.instance = self.root.partition("://")[2]
         self.reblogs = self.config("reblogs", False)
         self.replies = self.config("replies", True)
 
diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py
index 37efac07..8c717581 100644
--- a/gallery_dl/extractor/misskey.py
+++ b/gallery_dl/extractor/misskey.py
@@ -19,9 +19,11 @@ class MisskeyExtractor(BaseExtractor):
 
     def __init__(self, match):
         BaseExtractor.__init__(self, match)
+        self.item = match.group(match.lastindex)
+
+    def _init(self):
         self.api = MisskeyAPI(self)
         self.instance = self.root.rpartition("://")[2]
-        self.item = match.group(match.lastindex)
         self.renotes = self.config("renotes", False)
         self.replies = self.config("replies", True)
 
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 5dc4cb60..3301da97 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -38,7 +38,9 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
         self.gallery_id = match.group(1)
         url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id)
         GalleryExtractor.__init__(self, match, url)
-        self.session.headers["Referer"] = url
+
+    def _init(self):
+        self.session.headers["Referer"] = self.gallery_url
 
     def metadata(self, page):
         extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index e3ea3fc9..8a255280 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -29,6 +29,8 @@ class NewgroundsExtractor(Extractor):
         Extractor.__init__(self, match)
         self.user = match.group(1)
         self.user_root = "https://{}.newgrounds.com".format(self.user)
+
+    def _init(self):
         self.flash = self.config("flash", True)
 
         fmt = self.config("format", "original")
@@ -517,6 +519,9 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
         }),
     )
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = self.user_root + "/"
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index e822895b..66040d8a 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -21,19 +21,20 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
     archive_fmt = "{image_id}_{num}"
 
     def __init__(self, match):
-        self._init_category(match)
+        BaseExtractor.__init__(self, match)
+        self.user_id = text.parse_int(match.group(match.lastindex))
+
+    def initialize(self):
         self.cookies_domain = "." + self.root.rpartition("/")[2]
         self.cookies_names = (self.category + "_tok",)
 
+        BaseExtractor.initialize(self)
+
+        self.session.headers["Referer"] = self.root + "/"
+        self.user_name = None
         if self.category == "horne":
             self._extract_data = self._extract_data_horne
 
-        BaseExtractor.__init__(self, match)
-
-        self.user_id = text.parse_int(match.group(match.lastindex))
-        self.user_name = None
-        self.session.headers["Referer"] = self.root + "/"
-
     def items(self):
         self.login()
 
@@ -180,6 +181,9 @@ class NijieUserExtractor(NijieExtractor):
         ("https://horne.red/members.php?id=58000"),
     )
 
+    def initialize(self):
+        pass
+
     def items(self):
         fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 824757ce..f109d258 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -28,6 +28,8 @@ class OAuthBase(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.client = None
+
+    def _init(self):
         self.cache = config.get(("extractor", self.category), "cache", True)
 
     def oauth_config(self, key, default=None):
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 7bccf838..6bc7b9a9 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -108,6 +108,7 @@ class PahealTagExtractor(PahealExtractor):
         PahealExtractor.__init__(self, match)
         self.tags = text.unquote(match.group(1))
 
+    def _init(self):
         if self.config("metadata"):
             self._extract_data = self._extract_data_ex
 
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index e7188285..8fa5de24 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -22,8 +22,7 @@ class PhilomenaExtractor(BooruExtractor):
     page_start = 1
     per_page = 50
 
-    def __init__(self, match):
-        BooruExtractor.__init__(self, match)
+    def _init(self):
         self.api = PhilomenaAPI(self)
 
     _file_url = operator.itemgetter("view_url")
diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py
index 6234e6a9..22aff884 100644
--- a/gallery_dl/extractor/photobucket.py
+++ b/gallery_dl/extractor/photobucket.py
@@ -48,9 +48,10 @@ class PhotobucketAlbumExtractor(Extractor):
     )
 
     def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.album_path = ""
         self.root = "https://" + match.group(1)
+        Extractor.__init__(self, match)
+
+    def _init(self):
         self.session.headers["Referer"] = self.url
 
     def items(self):
@@ -129,6 +130,8 @@ class PhotobucketImageExtractor(Extractor):
         Extractor.__init__(self, match)
         self.user = match.group(1) or match.group(3)
         self.media_id = match.group(2)
+
+    def _init(self):
         self.session.headers["Referer"] = self.url
 
     def items(self):
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 92e05886..be30705b 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -23,12 +23,10 @@ class PinterestExtractor(Extractor):
     archive_fmt = "{id}{media_id}"
     root = "https://www.pinterest.com"
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-
+    def _init(self):
         domain = self.config("domain")
         if not domain or domain == "auto" :
-            self.root = text.root_from_url(match.group(0))
+            self.root = text.root_from_url(self.url)
         else:
             self.root = text.ensure_http_scheme(domain)
 
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 3cc59acf..ffe8030f 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -28,8 +28,7 @@ class PixivExtractor(Extractor):
     archive_fmt = "{id}{suffix}.{extension}"
     cookies_domain = None
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         self.api = PixivAppAPI(self)
         self.load_ugoira = self.config("ugoira", True)
         self.max_posts = self.config("max-posts", 0)
@@ -174,6 +173,9 @@ class PixivUserExtractor(PixivExtractor):
         PixivExtractor.__init__(self, match)
         self.user_id = match.group(1)
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = "{}/users/{}/".format(self.root, self.user_id)
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
index 783f3da9..929e0f58 100644
--- a/gallery_dl/extractor/pornpics.py
+++ b/gallery_dl/extractor/pornpics.py
@@ -23,7 +23,9 @@ class PornpicsExtractor(Extractor):
     def __init__(self, match):
         super().__init__(match)
         self.item = match.group(1)
-        self.session.headers["Referer"] = self.root
+
+    def _init(self):
+        self.session.headers["Referer"] = self.root + "/"
 
     def items(self):
         for gallery in self.galleries():
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 1800b68d..ba571bbd 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -22,18 +22,21 @@ class ReactorExtractor(BaseExtractor):
 
     def __init__(self, match):
         BaseExtractor.__init__(self, match)
+
         url = text.ensure_http_scheme(match.group(0), "http://")
         pos = url.index("/", 10)
-
-        self.root, self.path = url[:pos], url[pos:]
-        self.session.headers["Referer"] = self.root
-        self.gif = self.config("gif", False)
+        self.root = url[:pos]
+        self.path = url[pos:]
 
         if self.category == "reactor":
             # set category based on domain name
             netloc = urllib.parse.urlsplit(self.root).netloc
             self.category = netloc.rpartition(".")[0]
 
+    def _init(self):
+        self.session.headers["Referer"] = self.root
+        self.gif = self.config("gif", False)
+
     def items(self):
         data = self.metadata()
         yield Message.Directory, data
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index c924e0a3..c68068cb 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -57,8 +57,10 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
 
     def __init__(self, match):
         ChapterExtractor.__init__(self, match)
+        self.params = match.group(2)
 
-        params = text.parse_query(match.group(2))
+    def _init(self):
+        params = text.parse_query(self.params)
         quality = self.config("quality")
 
         if quality is None or quality == "auto":
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 9109e8dc..abd21b30 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -24,6 +24,8 @@ class RedgifsExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.key = match.group(1)
+
+    def _init(self):
         self.api = RedgifsAPI(self)
 
         formats = self.config("format")
diff --git a/gallery_dl/extractor/rule34us.py b/gallery_dl/extractor/rule34us.py
index 00b6972d..88331eaa 100644
--- a/gallery_dl/extractor/rule34us.py
+++ b/gallery_dl/extractor/rule34us.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -19,8 +19,7 @@ class Rule34usExtractor(BooruExtractor):
     root = "https://rule34.us"
     per_page = 42
 
-    def __init__(self, match):
-        BooruExtractor.__init__(self, match)
+    def _init(self):
         self._find_tags = re.compile(
             r'<li class="([^-"]+)-tag"[^>]*><a href="[^;"]+;q=([^"]+)').findall
 
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index b3b27462..c48be84e 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -66,13 +66,11 @@ class SenmangaChapterExtractor(ChapterExtractor):
         }),
     )
 
-    def __init__(self, match):
-        ChapterExtractor.__init__(self, match)
+    def _init(self):
         self.session.headers["Referer"] = self.gallery_url
 
         # select "All pages" viewer
-        self.cookies.set(
-            "viewer", "1", domain="raw.senmanga.com")
+        self.cookies.set("viewer", "1", domain="raw.senmanga.com")
 
     def metadata(self, page):
         title = text.extr(page, "<title>", "</title>")
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index b0dd9bbd..de6c8a15 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -18,21 +18,20 @@ class Shimmie2Extractor(BaseExtractor):
     filename_fmt = "{category}_{id}{md5:?_//}.{extension}"
     archive_fmt = "{id}"
 
-    def __init__(self, match):
-        BaseExtractor.__init__(self, match)
-
+    def _init(self):
         try:
             instance = INSTANCES[self.category]
         except KeyError:
-            pass
-        else:
-            cookies = instance.get("cookies")
-            if cookies:
-                domain = self.root.rpartition("/")[2]
-                self.cookies_update_dict(cookies, domain=domain)
-            file_url = instance.get("file_url")
-            if file_url:
-                self.file_url_fmt = file_url
+            return
+
+        cookies = instance.get("cookies")
+        if cookies:
+            domain = self.root.rpartition("/")[2]
+            self.cookies_update_dict(cookies, domain=domain)
+
+        file_url = instance.get("file_url")
+        if file_url:
+            self.file_url_fmt = file_url
 
     def items(self):
         data = self.metadata()
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index b5d116fd..d1ccc492 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -40,7 +40,9 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
             path = "/" + subdomain.rstrip(".") + path
         url = "https://old.simply-hentai.com" + path
         GalleryExtractor.__init__(self, match, url)
-        self.session.headers["Referer"] = url
+
+    def _init(self):
+        self.session.headers["Referer"] = self.gallery_url
 
     def metadata(self, page):
         extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 3724c859..b643c6f2 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -22,6 +22,8 @@ class SkebExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user_name = match.group(1)
+
+    def _init(self):
         self.thumbnails = self.config("thumbnails", False)
         self.article = self.config("article", False)
 
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index e30c4911..b9edd4ab 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -34,8 +34,7 @@ class SmugmugExtractor(Extractor):
         "Uris": None,
     }
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         self.api = SmugmugAPI(self)
         self.videos = self.config("videos", True)
         self.session = self.api.session
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index 4b15b144..8c816ad1 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -20,8 +20,7 @@ class SzurubooruExtractor(booru.BooruExtractor):
     filename_fmt = "{id}_{version}_{checksumMD5}.{extension}"
     per_page = 100
 
-    def __init__(self, match):
-        booru.BooruExtractor.__init__(self, match)
+    def _init(self):
         self.headers = {
             "Accept": "application/json",
             "Content-Type": "application/json",
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index ec4a249c..0e09e22a 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -26,8 +26,7 @@ class TapasExtractor(Extractor):
     cookies_names = ("_cpc_",)
     _cache = None
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         if self._cache is None:
             TapasExtractor._cache = {}
 
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index f42da488..12ea39f8 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -42,6 +42,7 @@ class TumblrExtractor(Extractor):
         else:
             self.blog = match.group(1) or match.group(3)
 
+    def _init(self):
         self.api = TumblrAPI(self)
         self.types = self._setup_posttypes()
         self.avatar = self.config("avatar", False)
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index a8acd319..c3e0a262 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -26,8 +26,7 @@ class TwibooruExtractor(BooruExtractor):
     per_page = 50
     root = "https://twibooru.org"
 
-    def __init__(self, match):
-        BooruExtractor.__init__(self, match)
+    def _init(self):
         self.api = TwibooruAPI(self)
 
     _file_url = operator.itemgetter("view_url")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7e420799..478b6d38 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -32,6 +32,8 @@ class TwitterExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user = match.group(1)
+
+    def _init(self):
         self.textonly = self.config("text-tweets", False)
         self.retweets = self.config("retweets", False)
         self.replies = self.config("replies", True)
@@ -490,6 +492,9 @@ class TwitterUserExtractor(TwitterExtractor):
         if user_id:
             self.user = "id:" + user_id
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = "{}/{}/".format(self.root, self.user)
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index 972b508d..4b49a638 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -54,6 +54,7 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor):
         UrlshortenerExtractor.__init__(self, match)
         self.id = match.group(match.lastindex)
 
+    def _init(self):
         try:
             self.headers = INSTANCES[self.category]["headers"]
         except Exception:
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index d8aa6cdb..084f9b25 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -26,9 +26,8 @@ class VipergirlsExtractor(Extractor):
     cookies_domain = ".vipergirls.to"
     cookies_names = ("vg_userid", "vg_password")
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.session.headers["Referer"] = self.root
+    def _init(self):
+        self.session.headers["Referer"] = self.root + "/"
 
     def items(self):
         self.login()
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 0ba0d910..9e271098 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -20,8 +20,7 @@ class WallhavenExtractor(Extractor):
     archive_fmt = "{id}"
     request_interval = 1.4
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         self.api = WallhavenAPI(self)
 
     def items(self):
@@ -109,6 +108,9 @@ class WallhavenUserExtractor(WallhavenExtractor):
         WallhavenExtractor.__init__(self, match)
         self.username = match.group(1)
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = "{}/user/{}/".format(self.root, self.username)
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index eca4f1ad..c4d242a1 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -30,8 +30,7 @@ class WeasylExtractor(Extractor):
             return True
         return False
 
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    def _init(self):
         self.session.headers['X-Weasyl-API-Key'] = self.config("api-key")
 
     def request_submission(self, submitid):
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 7b3e8033..31592420 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 # Copyright 2020 Leonardo Taccari
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -71,15 +71,18 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
     )
 
     def __init__(self, match):
-        self.path, self.lang, self.genre, self.comic, query = match.groups()
+        self.path, self.lang, self.genre, self.comic, self.query = \
+            match.groups()
 
-        url = "{}/{}/viewer?{}".format(self.root, self.path, query)
+        url = "{}/{}/viewer?{}".format(self.root, self.path, self.query)
         GalleryExtractor.__init__(self, match, url)
+
+    def _init(self):
         self.setup_agegate_cookies()
 
-        query = text.parse_query(query)
-        self.title_no = query.get("title_no")
-        self.episode_no = query.get("episode_no")
+        params = text.parse_query(self.query)
+        self.title_no = params.get("title_no")
+        self.episode_no = params.get("episode_no")
 
     def metadata(self, page):
         keywords, pos = text.extract(
@@ -141,12 +144,15 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
+        self.path, self.lang, self.genre, self.comic, self.query = \
+            match.groups()
+
+    def _init(self):
         self.setup_agegate_cookies()
 
-        self.path, self.lang, self.genre, self.comic, query = match.groups()
-        query = text.parse_query(query)
-        self.title_no = query.get("title_no")
-        self.page_no = text.parse_int(query.get("page"), 1)
+        params = text.parse_query(self.query)
+        self.title_no = params.get("title_no")
+        self.page_no = text.parse_int(params.get("page"), 1)
 
     def items(self):
         page = None
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 2de7a2fc..ae0fc4e3 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -28,6 +28,8 @@ class WeiboExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self._prefix, self.user = match.groups()
+
+    def _init(self):
         self.retweets = self.config("retweets", True)
         self.videos = self.config("videos", True)
         self.livephoto = self.config("livephoto", True)
@@ -228,6 +230,9 @@ class WeiboUserExtractor(WeiboExtractor):
         ("https://www.weibo.com/p/1003062314621010/home"),
     )
 
+    def initialize(self):
+        pass
+
     def items(self):
         base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
         return self._dispatch_extractors((
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 5a4fbe65..a6ad05f1 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -46,8 +46,7 @@ class TestCookiejar(unittest.TestCase):
 
     def test_cookiefile(self):
         config.set((), "cookies", self.cookiefile)
-
-        cookies = extractor.find("test:").cookies
+        cookies = _get_extractor("test").cookies
         self.assertEqual(len(cookies), 1)
 
         cookie = next(iter(cookies))
@@ -65,12 +64,14 @@ class TestCookiejar(unittest.TestCase):
     def _test_warning(self, filename, exc):
         config.set((), "cookies", filename)
         log = logging.getLogger("test")
+
         with mock.patch.object(log, "warning") as mock_warning:
-            cookies = extractor.find("test:").cookies
-            self.assertEqual(len(cookies), 0)
-            self.assertEqual(mock_warning.call_count, 1)
-            self.assertEqual(mock_warning.call_args[0][0], "cookies: %s")
-            self.assertIsInstance(mock_warning.call_args[0][1], exc)
+            cookies = _get_extractor("test").cookies
+
+        self.assertEqual(len(cookies), 0)
+        self.assertEqual(mock_warning.call_count, 1)
+        self.assertEqual(mock_warning.call_args[0][0], "cookies: %s")
+        self.assertIsInstance(mock_warning.call_args[0][1], exc)
 
 
 class TestCookiedict(unittest.TestCase):
@@ -83,7 +84,8 @@ class TestCookiedict(unittest.TestCase):
         config.clear()
 
     def test_dict(self):
-        cookies = extractor.find("test:").cookies
+        cookies = _get_extractor("test").cookies
+
         self.assertEqual(len(cookies), len(self.cdict))
         self.assertEqual(sorted(cookies.keys()), sorted(self.cdict.keys()))
         self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
@@ -122,7 +124,7 @@ class TestCookieLogin(unittest.TestCase):
 class TestCookieUtils(unittest.TestCase):
 
     def test_check_cookies(self):
-        extr = extractor.find("test:")
+        extr = _get_extractor("test")
         self.assertFalse(extr.cookies, "empty")
         self.assertFalse(extr.cookies_domain, "empty")
 
@@ -144,7 +146,7 @@ class TestCookieUtils(unittest.TestCase):
         self.assertFalse(extr.cookies_check(("a", "b", "c")))
 
     def test_check_cookies_domain(self):
-        extr = extractor.find("test:")
+        extr = _get_extractor("test")
         self.assertFalse(extr.cookies, "empty")
         extr.cookies_domain = ".example.org"
 
@@ -166,7 +168,7 @@ class TestCookieUtils(unittest.TestCase):
         self.assertTrue(extr.cookies_check(("a", "b", "c")))
 
     def test_check_cookies_expires(self):
-        extr = extractor.find("test:")
+        extr = _get_extractor("test")
         self.assertFalse(extr.cookies, "empty")
         self.assertFalse(extr.cookies_domain, "empty")
 
@@ -200,13 +202,18 @@ class TestCookieUtils(unittest.TestCase):
 
 
 def _get_extractor(category):
-    URLS = {
-        "exhentai"   : "https://exhentai.org/g/1200119/d55c44d3d0/",
-        "idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
-        "nijie"      : "https://nijie.info/view.php?id=1",
-        "horne"      : "https://horne.red/view.php?id=1",
-    }
-    return extractor.find(URLS[category])
+    extr = extractor.find(URLS[category])
+    extr.initialize()
+    return extr
+
+
+URLS = {
+    "exhentai"   : "https://exhentai.org/g/1200119/d55c44d3d0/",
+    "idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
+    "nijie"      : "https://nijie.info/view.php?id=1",
+    "horne"      : "https://horne.red/view.php?id=1",
+    "test"       : "test:",
+}
 
 
 if __name__ == "__main__":
diff --git a/test/test_downloader.py b/test/test_downloader.py
index c65be952..840e0780 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -34,6 +34,7 @@ class FakeJob():
 
     def __init__(self):
         self.extractor = extractor.find("test:")
+        self.extractor.initialize()
         self.pathfmt = path.PathFormat(self.extractor)
         self.out = output.NullOutput()
         self.get_logger = logging.getLogger