From a383eca7f6ec0f08dcc854d594aecbfecc4f45be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 25 Jul 2023 20:09:44 +0200 Subject: [PATCH] decouple extractor initialization Introduce an 'initialize()' function that does the actual init (session, cookies, config options) and can called separately from the constructor __init__(). This allows, for example, to adjust config access inside a Job before most of it already happened when calling 'extractor.find()'. --- gallery_dl/extractor/3dbooru.py | 12 +++--- gallery_dl/extractor/500px.py | 3 +- gallery_dl/extractor/8chan.py | 4 +- gallery_dl/extractor/artstation.py | 6 +-- gallery_dl/extractor/aryion.py | 8 +++- gallery_dl/extractor/blogger.py | 5 ++- gallery_dl/extractor/common.py | 56 ++++++++++++++++--------- gallery_dl/extractor/danbooru.py | 3 +- gallery_dl/extractor/deviantart.py | 25 +++++++---- gallery_dl/extractor/exhentai.py | 19 +++++---- gallery_dl/extractor/fanbox.py | 5 +-- gallery_dl/extractor/flickr.py | 4 +- gallery_dl/extractor/foolfuuka.py | 4 +- gallery_dl/extractor/furaffinity.py | 5 +++ gallery_dl/extractor/gelbooru_v02.py | 3 +- gallery_dl/extractor/gfycat.py | 1 + gallery_dl/extractor/hentaicosplays.py | 4 +- gallery_dl/extractor/hentaifoundry.py | 3 ++ gallery_dl/extractor/hitomi.py | 9 ++-- gallery_dl/extractor/hotleak.py | 5 +-- gallery_dl/extractor/idolcomplex.py | 2 + gallery_dl/extractor/imagebam.py | 4 +- gallery_dl/extractor/imagechest.py | 9 ++-- gallery_dl/extractor/imagefap.py | 5 +-- gallery_dl/extractor/imgur.py | 4 +- gallery_dl/extractor/inkbunny.py | 3 +- gallery_dl/extractor/instagram.py | 23 ++++++---- gallery_dl/extractor/itaku.py | 4 +- gallery_dl/extractor/kemonoparty.py | 7 ++-- gallery_dl/extractor/lolisafe.py | 3 +- gallery_dl/extractor/luscious.py | 4 +- gallery_dl/extractor/mangadex.py | 6 ++- gallery_dl/extractor/mangafox.py | 4 +- gallery_dl/extractor/mangahere.py | 4 +- gallery_dl/extractor/mangakakalot.py | 6 ++- gallery_dl/extractor/manganelo.py | 4 +- gallery_dl/extractor/mangasee.py | 2 + gallery_dl/extractor/mastodon.py | 4 +- gallery_dl/extractor/misskey.py | 4 +- gallery_dl/extractor/myhentaigallery.py | 4 +- gallery_dl/extractor/newgrounds.py | 5 +++ gallery_dl/extractor/nijie.py | 18 ++++---- gallery_dl/extractor/oauth.py | 2 + gallery_dl/extractor/paheal.py | 1 + gallery_dl/extractor/philomena.py | 3 +- gallery_dl/extractor/photobucket.py | 7 +++- gallery_dl/extractor/pinterest.py | 6 +-- gallery_dl/extractor/pixiv.py | 6 ++- gallery_dl/extractor/pornpics.py | 4 +- gallery_dl/extractor/reactor.py | 11 +++-- gallery_dl/extractor/readcomiconline.py | 4 +- gallery_dl/extractor/redgifs.py | 2 + gallery_dl/extractor/rule34us.py | 5 +-- gallery_dl/extractor/senmanga.py | 6 +-- gallery_dl/extractor/shimmie2.py | 23 +++++----- gallery_dl/extractor/simplyhentai.py | 4 +- gallery_dl/extractor/skeb.py | 2 + gallery_dl/extractor/smugmug.py | 3 +- gallery_dl/extractor/szurubooru.py | 3 +- gallery_dl/extractor/tapas.py | 3 +- gallery_dl/extractor/tumblr.py | 1 + gallery_dl/extractor/twibooru.py | 3 +- gallery_dl/extractor/twitter.py | 5 +++ gallery_dl/extractor/urlshortener.py | 1 + gallery_dl/extractor/vipergirls.py | 5 +-- gallery_dl/extractor/wallhaven.py | 6 ++- gallery_dl/extractor/weasyl.py | 3 +- gallery_dl/extractor/webtoons.py | 26 +++++++----- gallery_dl/extractor/weibo.py | 5 +++ test/test_cookies.py | 43 +++++++++++-------- test/test_downloader.py | 1 + 71 files changed, 314 insertions(+), 193 deletions(-) diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index e0066cb9..e83bca75 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,12 +17,10 @@ class _3dbooruBase(): basecategory = "booru" root = "http://behoimi.org" - def __init__(self, match): - super().__init__(match) - self.session.headers.update({ - "Referer": "http://behoimi.org/post/show/", - "Accept-Encoding": "identity", - }) + def _init(self): + headers = self.session.headers + headers["Referer"] = "http://behoimi.org/post/show/" + headers["Accept-Encoding"] = "identity" class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor): diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index ac38b604..4d1307e1 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -23,8 +23,7 @@ class _500pxExtractor(Extractor): root = "https://500px.com" cookies_domain = ".500px.com" - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.session.headers["Referer"] = self.root + "/" def items(self): diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index f098008a..2d043868 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -150,6 +150,8 @@ class _8chanBoardExtractor(_8chanExtractor): def __init__(self, match): _8chanExtractor.__init__(self, match) _, self.board, self.page = match.groups() + + def _init(self): self.session.headers["Referer"] = self.root + "/" def items(self): diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index a3a7c1e1..77d5fbd2 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2022 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -27,12 +27,12 @@ class ArtstationExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) or match.group(2) - self.external = self.config("external", False) def items(self): data = self.metadata() projects = self.projects() + external = self.config("external", False) max_posts = self.config("max-posts") if max_posts: projects = itertools.islice(projects, max_posts) @@ -45,7 +45,7 @@ class ArtstationExtractor(Extractor): asset["num"] = num yield Message.Directory, asset - if adict["has_embedded_player"] and self.external: + if adict["has_embedded_player"] and external: player = adict["player_embedded"] url = (text.extr(player, 'src="', '"') or text.extr(player, "src='", "'")) diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index ad0f9dc6..89a8319a 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -189,9 +189,11 @@ class AryionGalleryExtractor(AryionExtractor): def __init__(self, match): AryionExtractor.__init__(self, match) - self.recursive = self.config("recursive", True) self.offset = 0 + def _init(self): + self.recursive = self.config("recursive", True) + def skip(self, num): if self.recursive: return 0 @@ -217,9 +219,11 @@ class AryionTagExtractor(AryionExtractor): "count": ">= 5", }) - def metadata(self): + def _init(self): self.params = text.parse_query(self.user) self.user = None + + def metadata(self): return {"search_tags": self.params.get("tag")} def posts(self): diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index 3ceada8d..b25af8f9 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -28,12 +28,13 @@ class BloggerExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.videos = self.config("videos", True) self.blog = match.group(1) or match.group(2) + + def _init(self): self.api = BloggerAPI(self) + self.videos = self.config("videos", True) def items(self): - blog = self.api.blog_by_url("http://" + self.blog) blog["pages"] = blog["pages"]["totalItems"] blog["posts"] = blog["posts"]["totalItems"] diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 2e5ce4d4..fc6b197c 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -52,25 +52,6 @@ class Extractor(): self._cfgpath = ("extractor", self.category, self.subcategory) self._parentdir = "" - self._write_pages = self.config("write-pages", False) - self._retry_codes = self.config("retry-codes") - self._retries = self.config("retries", 4) - self._timeout = self.config("timeout", 30) - self._verify = self.config("verify", True) - self._proxies = util.build_proxy_map(self.config("proxy"), self.log) - self._interval = util.build_duration_func( - self.config("sleep-request", self.request_interval), - self.request_interval_min, - ) - - if self._retries < 0: - self._retries = float("inf") - if not self._retry_codes: - self._retry_codes = () - - self._init_session() - self._init_cookies() - @classmethod def from_url(cls, url): if isinstance(cls.pattern, str): @@ -79,8 +60,16 @@ class Extractor(): return cls(match) if match else None def __iter__(self): + self.initialize() return self.items() + def initialize(self): + self._init_options() + self._init_session() + self._init_cookies() + self._init() + self.initialize = util.noop + def items(self): yield Message.Version, 1 @@ -245,6 +234,26 @@ class Extractor(): return username, password + def _init(self): + pass + + def _init_options(self): + self._write_pages = self.config("write-pages", False) + self._retry_codes = self.config("retry-codes") + self._retries = self.config("retries", 4) + self._timeout = self.config("timeout", 30) + self._verify = self.config("verify", True) + self._proxies = util.build_proxy_map(self.config("proxy"), self.log) + self._interval = util.build_duration_func( + self.config("sleep-request", self.request_interval), + self.request_interval_min, + ) + + if self._retries < 0: + self._retries = float("inf") + if not self._retry_codes: + self._retry_codes = () + def _init_session(self): self.session = session = requests.Session() headers = session.headers @@ -454,6 +463,13 @@ class Extractor(): self.cookies.set( "__ddg2", util.generate_token(), domain=self.cookies_domain) + def _cache(self, func, maxage, keyarg=None): + # return cache.DatabaseCacheDecorator(func, maxage, keyarg) + return cache.DatabaseCacheDecorator(func, keyarg, maxage) + + def _cache_memory(self, func, maxage=None, keyarg=None): + return cache.Memcache() + def _get_date_min_max(self, dmin=None, dmax=None): """Retrieve and parse 'date-min' and 'date-max' config values""" def get(key, default): @@ -654,6 +670,8 @@ class AsynchronousMixin(): """Run info extraction in a separate thread""" def __iter__(self): + self.initialize() + messages = queue.Queue(5) thread = threading.Thread( target=self.async_items, diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 19a3aeff..b16d27a2 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -22,8 +22,7 @@ class DanbooruExtractor(BaseExtractor): per_page = 200 request_interval = 1.0 - def __init__(self, match): - BaseExtractor.__init__(self, match) + def _init(self): self.ugoira = self.config("ugoira", False) self.external = self.config("external", False) self.includes = False diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 9f16b334..3497b0c4 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -38,14 +38,18 @@ class DeviantartExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) + self.user = match.group(1) or match.group(2) + + def _init(self): self.flat = self.config("flat", True) self.extra = self.config("extra", False) self.original = self.config("original", True) self.comments = self.config("comments", False) - self.user = match.group(1) or match.group(2) + + self.api = DeviantartOAuthAPI(self) self.group = False self.offset = 0 - self.api = None + self._premium_cache = {} unwatch = self.config("auto-unwatch") if unwatch: @@ -60,11 +64,13 @@ class DeviantartExtractor(Extractor): self._update_content = self._update_content_image self.original = True - self._premium_cache = {} - self.commit_journal = { - "html": self._commit_journal_html, - "text": self._commit_journal_text, - }.get(self.config("journals", "html")) + journals = self.config("journals", "html") + if journals == "html": + self.commit_journal = self._commit_journal_html + elif journals == "text": + self.commit_journal = self._commit_journal_text + else: + self.commit_journal = None def skip(self, num): self.offset += num @@ -80,8 +86,6 @@ class DeviantartExtractor(Extractor): return True def items(self): - self.api = DeviantartOAuthAPI(self) - if self.user and self.config("group", True): profile = self.api.user_profile(self.user) self.group = not profile @@ -449,6 +453,9 @@ class DeviantartUserExtractor(DeviantartExtractor): ("https://shimoda7.deviantart.com/"), ) + def initialize(self): + pass + def items(self): base = "{}/{}/".format(self.root, self.user) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 087ff51c..d5f1d02b 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -31,17 +31,21 @@ class ExhentaiExtractor(Extractor): LIMIT = False def __init__(self, match): - # allow calling 'self.config()' before 'Extractor.__init__()' - self._cfgpath = ("extractor", self.category, self.subcategory) + Extractor.__init__(self, match) + self.version = match.group(1) - version = match.group(1) + def initialize(self): domain = self.config("domain", "auto") if domain == "auto": - domain = ("ex" if version == "ex" else "e-") + "hentai.org" + domain = ("ex" if self.version == "ex" else "e-") + "hentai.org" self.root = "https://" + domain self.cookies_domain = "." + domain - Extractor.__init__(self, match) + Extractor.initialize(self) + + if self.version != "ex": + self.cookies.set("nw", "1", domain=self.cookies_domain) + self.session.headers["Referer"] = self.root + "/" self.original = self.config("original", True) limits = self.config("limits", False) @@ -51,10 +55,6 @@ class ExhentaiExtractor(Extractor): else: self.limits = False - self.session.headers["Referer"] = self.root + "/" - if version != "ex": - self.cookies.set("nw", "1", domain=self.cookies_domain) - def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs) if response.history and response.headers.get("Content-Length") == "0": @@ -174,6 +174,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.image_token = match.group(4) self.image_num = text.parse_int(match.group(6), 1) + def _init(self): source = self.config("source") if source == "hitomi": self.items = self._items_hitomi diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 40ad8cdd..921ddb62 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -6,9 +6,9 @@ """Extractors for https://www.fanbox.cc/""" -import re from .common import Extractor, Message from .. import text +import re BASE_PATTERN = ( @@ -27,8 +27,7 @@ class FanboxExtractor(Extractor): archive_fmt = "{id}_{num}" _warning = True - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.embeds = self.config("embeds", True) def items(self): diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index cb7d1e81..3b18c63e 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -26,8 +26,10 @@ class FlickrExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.api = FlickrAPI(self) self.item_id = match.group(1) + + def _init(self): + self.api = FlickrAPI(self) self.user = None def items(self): diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 76fb69eb..fefb2c4c 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -22,10 +22,12 @@ class FoolfuukaExtractor(BaseExtractor): def __init__(self, match): BaseExtractor.__init__(self, match) - self.session.headers["Referer"] = self.root if self.category == "b4k": self.remote = self._remote_direct + def _init(self): + self.session.headers["Referer"] = self.root + "/" + def items(self): yield Message.Directory, self.metadata() for post in self.posts(): diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index c03c89b2..8c3ef79d 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -28,6 +28,8 @@ class FuraffinityExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) + + def _init(self): self.offset = 0 if self.config("descriptions") == "html": @@ -384,6 +386,9 @@ class FuraffinityUserExtractor(FuraffinityExtractor): }), ) + def initialize(self): + pass + def items(self): base = "{}/{{}}/{}/".format(self.root, self.user) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 958c4b58..1ef78efd 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -19,8 +19,7 @@ import re class GelbooruV02Extractor(booru.BooruExtractor): basecategory = "gelbooru_v02" - def __init__(self, match): - booru.BooruExtractor.__init__(self, match) + def _init(self): self.api_key = self.config("api-key") self.user_id = self.config("user-id") diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index ccebdf98..53ef1180 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -24,6 +24,7 @@ class GfycatExtractor(Extractor): Extractor.__init__(self, match) self.key = match.group(1).lower() + def _init(self): formats = self.config("format") if formats is None: formats = ("mp4", "webm", "mobile", "gif") diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py index 593a8464..ac03923f 100644 --- a/gallery_dl/extractor/hentaicosplays.py +++ b/gallery_dl/extractor/hentaicosplays.py @@ -57,7 +57,9 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor): self.root = text.ensure_http_scheme(root) url = "{}/story/{}/".format(self.root, self.slug) GalleryExtractor.__init__(self, match, url) - self.session.headers["Referer"] = url + + def _init(self): + self.session.headers["Referer"] = self.gallery_url def metadata(self, page): title = text.extr(page, "", "") diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 78a576df..56ea1d4d 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -170,6 +170,9 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor): pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile" test = ("https://www.hentai-foundry.com/user/Tenpura/profile",) + def initialize(self): + pass + def items(self): root = self.root user = "/user/" + self.user diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 4e8d1cae..c012c665 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -66,12 +66,13 @@ class HitomiGalleryExtractor(GalleryExtractor): ) def __init__(self, match): - gid = match.group(1) - url = "https://ltn.hitomi.la/galleries/{}.js".format(gid) + self.gid = match.group(1) + url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gid) GalleryExtractor.__init__(self, match, url) - self.info = None + + def _init(self): self.session.headers["Referer"] = "{}/reader/{}.html".format( - self.root, gid) + self.root, self.gid) def metadata(self, page): self.info = info = util.json_loads(page.partition("=")[2]) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index 30158b43..2ab9f3cc 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -21,9 +21,8 @@ class HotleakExtractor(Extractor): archive_fmt = "{type}_{creator}_{id}" root = "https://hotleak.vip" - def __init__(self, match): - Extractor.__init__(self, match) - self.session.headers["Referer"] = self.root + def _init(self): + self.session.headers["Referer"] = self.root + "/" def items(self): for post in self.posts(): diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 02f037dd..fcac7fe2 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -29,6 +29,8 @@ class IdolcomplexExtractor(SankakuExtractor): self.logged_in = True self.start_page = 1 self.start_post = 0 + + def _init(self): self.extags = self.config("tags", False) def items(self): diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index 67d0b110..9a3ea368 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2022 Mike Fährmann +# Copyright 2014-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,6 +21,8 @@ class ImagebamExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.path = match.group(1) + + def _init(self): self.cookies.set("nsfw_inter", "1", domain="www.imagebam.com") def _parse_image_page(self, path): diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py index 9229617b..2babead8 100644 --- a/gallery_dl/extractor/imagechest.py +++ b/gallery_dl/extractor/imagechest.py @@ -47,8 +47,10 @@ class ImagechestGalleryExtractor(GalleryExtractor): url = self.root + "/p/" + self.gallery_id GalleryExtractor.__init__(self, match, url) - self.access_token = self.config("access-token") - if self.access_token: + def _init(self): + access_token = self.config("access-token") + if access_token: + self.api = ImagechestAPI(self, access_token) self.gallery_url = None self.metadata = self._metadata_api self.images = self._images_api @@ -82,8 +84,7 @@ class ImagechestGalleryExtractor(GalleryExtractor): ] def _metadata_api(self, page): - api = ImagechestAPI(self, self.access_token) - post = api.post(self.gallery_id) + post = self.api.post(self.gallery_id) post["date"] = text.parse_datetime( post["created"], "%Y-%m-%dT%H:%M:%S.%fZ") diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index f5b69faa..43ac3a35 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -23,9 +23,8 @@ class ImagefapExtractor(Extractor): archive_fmt = "{gallery_id}_{image_id}" request_interval = (2.0, 4.0) - def __init__(self, match): - Extractor.__init__(self, match) - self.session.headers["Referer"] = self.root + def _init(self): + self.session.headers["Referer"] = self.root + "/" def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs) diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 4c29d98f..ca9671c3 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -22,8 +22,10 @@ class ImgurExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.api = ImgurAPI(self) self.key = match.group(1) + + def _init(self): + self.api = ImgurAPI(self) self.mp4 = self.config("mp4", True) def _prepare(self, image): diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index 83a1a194..c6df16b0 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -24,8 +24,7 @@ class InkbunnyExtractor(Extractor): archive_fmt = "{file_id}" root = "https://inkbunny.net" - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.api = InkbunnyAPI(self) def items(self): diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 29208aef..cb77fa1c 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -34,16 +34,8 @@ class InstagramExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.item = match.group(1) - self.api = None - self.www_claim = "0" - self.csrf_token = util.generate_token() - self._logged_in = True - self._find_tags = re.compile(r"#\w+").findall - self._cursor = None - self._user = None - def items(self): - self.login() + def _init(self): self.cookies.set( "csrftoken", self.csrf_token, domain=self.cookies_domain) @@ -52,6 +44,16 @@ class InstagramExtractor(Extractor): else: self.api = InstagramRestAPI(self) + self.www_claim = "0" + self.csrf_token = util.generate_token() + self._find_tags = re.compile(r"#\w+").findall + self._logged_in = True + self._cursor = None + self._user = None + + def items(self): + self.login() + data = self.metadata() videos = self.config("videos", True) previews = self.config("previews", False) @@ -400,6 +402,9 @@ class InstagramUserExtractor(InstagramExtractor): ("https://www.instagram.com/id:25025320/"), ) + def initialize(self): + pass + def items(self): base = "{}/{}/".format(self.root, self.item) stories = "{}/stories/{}/".format(self.root, self.item) diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index 4bcedae1..356a002b 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -26,8 +26,10 @@ class ItakuExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.api = ItakuAPI(self) self.item = match.group(1) + + def _init(self): + self.api = ItakuAPI(self) self.videos = self.config("videos", True) def items(self): diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index d5d02c29..2ed73e9c 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -35,14 +35,15 @@ class KemonopartyExtractor(Extractor): self.root = text.root_from_url(match.group(0)) self.cookies_domain = ".{}.{}".format(domain, tld) Extractor.__init__(self, match) + + def _init(self): self.session.headers["Referer"] = self.root + "/" - - def items(self): self._prepare_ddosguard_cookies() - self._find_inline = re.compile( r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+' r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall + + def items(self): find_hash = re.compile(HASH_PATTERN).match generators = self._build_file_generators(self.config("files")) duplicates = self.config("duplicates") diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py index 5d236c37..9cebe3ae 100644 --- a/gallery_dl/extractor/lolisafe.py +++ b/gallery_dl/extractor/lolisafe.py @@ -46,9 +46,10 @@ class LolisafeAlbumExtractor(LolisafeExtractor): LolisafeExtractor.__init__(self, match) self.album_id = match.group(match.lastindex) + def _init(self): domain = self.config("domain") if domain == "auto": - self.root = text.root_from_url(match.group(0)) + self.root = text.root_from_url(self.url) elif domain: self.root = text.ensure_http_scheme(domain) diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 80f8758c..dcf09d16 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2022 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -118,6 +118,8 @@ class LusciousAlbumExtractor(LusciousExtractor): def __init__(self, match): LusciousExtractor.__init__(self, match) self.album_id = match.group(1) + + def _init(self): self.gif = self.config("gif", False) def items(self): diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index e111fee3..b0c985de 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -30,9 +30,11 @@ class MangadexExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) + self.uuid = match.group(1) + + def _init(self): self.session.headers["User-Agent"] = util.USERAGENT self.api = MangadexAPI(self) - self.uuid = match.group(1) def items(self): for chapter in self.chapters(): @@ -202,7 +204,7 @@ class MangadexAPI(): self.extractor = extr self.headers = {} - self.username, self.password = self.extractor._get_auth_info() + self.username, self.password = extr._get_auth_info() if not self.username: self.authenticate = util.noop diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py index 0818fd90..8478b8de 100644 --- a/gallery_dl/extractor/mangafox.py +++ b/gallery_dl/extractor/mangafox.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2022 Mike Fährmann +# Copyright 2017-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -33,6 +33,8 @@ class MangafoxChapterExtractor(ChapterExtractor): base, self.cstr, self.volume, self.chapter, self.minor = match.groups() self.urlbase = self.root + base ChapterExtractor.__init__(self, match, self.urlbase + "/1.html") + + def _init(self): self.session.headers["Referer"] = self.root + "/" def metadata(self, page): diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index ccce09b4..97c26d47 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -42,6 +42,8 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor): self.part, self.volume, self.chapter = match.groups() url = self.url_fmt.format(self.part, 1) ChapterExtractor.__init__(self, match, url) + + def _init(self): self.session.headers["Referer"] = self.root_mobile + "/" def metadata(self, page): diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py index ba55ac16..e397586e 100644 --- a/gallery_dl/extractor/mangakakalot.py +++ b/gallery_dl/extractor/mangakakalot.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2020 Jake Mannens -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -39,7 +39,9 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): def __init__(self, match): self.path = match.group(1) ChapterExtractor.__init__(self, match, self.root + self.path) - self.session.headers['Referer'] = self.root + + def _init(self): + self.session.headers['Referer'] = self.root + "/" def metadata(self, page): _ , pos = text.extract(page, '', '<') diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py index 6fd9f495..807bc5ee 100644 --- a/gallery_dl/extractor/manganelo.py +++ b/gallery_dl/extractor/manganelo.py @@ -21,7 +21,9 @@ class ManganeloBase(): def __init__(self, match): domain, path = match.groups() super().__init__(match, "https://" + domain + path) - self.session.headers['Referer'] = self.root + + def _init(self): + self.session.headers['Referer'] = self.root + "/" if self._match_chapter is None: ManganeloBase._match_chapter = re.compile( diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py index dfa9bdf0..00c89c1e 100644 --- a/gallery_dl/extractor/mangasee.py +++ b/gallery_dl/extractor/mangasee.py @@ -90,6 +90,8 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor): self.category = "mangalife" self.root = "https://manga4life.com" ChapterExtractor.__init__(self, match, self.root + match.group(2)) + + def _init(self): self.session.headers["Referer"] = self.gallery_url domain = self.root.rpartition("/")[2] diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index ddd34f0d..3bed955c 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -23,8 +23,10 @@ class MastodonExtractor(BaseExtractor): def __init__(self, match): BaseExtractor.__init__(self, match) - self.instance = self.root.partition("://")[2] self.item = match.group(match.lastindex) + + def _init(self): + self.instance = self.root.partition("://")[2] self.reblogs = self.config("reblogs", False) self.replies = self.config("replies", True) diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py index 37efac07..8c717581 100644 --- a/gallery_dl/extractor/misskey.py +++ b/gallery_dl/extractor/misskey.py @@ -19,9 +19,11 @@ class MisskeyExtractor(BaseExtractor): def __init__(self, match): BaseExtractor.__init__(self, match) + self.item = match.group(match.lastindex) + + def _init(self): self.api = MisskeyAPI(self) self.instance = self.root.rpartition("://")[2] - self.item = match.group(match.lastindex) self.renotes = self.config("renotes", False) self.replies = self.config("replies", True) diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 5dc4cb60..3301da97 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -38,7 +38,9 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): self.gallery_id = match.group(1) url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) - self.session.headers["Referer"] = url + + def _init(self): + self.session.headers["Referer"] = self.gallery_url def metadata(self, page): extr = text.extract_from(page) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index e3ea3fc9..8a255280 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -29,6 +29,8 @@ class NewgroundsExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) self.user_root = "https://{}.newgrounds.com".format(self.user) + + def _init(self): self.flash = self.config("flash", True) fmt = self.config("format", "original") @@ -517,6 +519,9 @@ class NewgroundsUserExtractor(NewgroundsExtractor): }), ) + def initialize(self): + pass + def items(self): base = self.user_root + "/" return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index e822895b..66040d8a 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -21,19 +21,20 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): archive_fmt = "{image_id}_{num}" def __init__(self, match): - self._init_category(match) + BaseExtractor.__init__(self, match) + self.user_id = text.parse_int(match.group(match.lastindex)) + + def initialize(self): self.cookies_domain = "." + self.root.rpartition("/")[2] self.cookies_names = (self.category + "_tok",) + BaseExtractor.initialize(self) + + self.session.headers["Referer"] = self.root + "/" + self.user_name = None if self.category == "horne": self._extract_data = self._extract_data_horne - BaseExtractor.__init__(self, match) - - self.user_id = text.parse_int(match.group(match.lastindex)) - self.user_name = None - self.session.headers["Referer"] = self.root + "/" - def items(self): self.login() @@ -180,6 +181,9 @@ class NijieUserExtractor(NijieExtractor): ("https://horne.red/members.php?id=58000"), ) + def initialize(self): + pass + def items(self): fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 824757ce..f109d258 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -28,6 +28,8 @@ class OAuthBase(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.client = None + + def _init(self): self.cache = config.get(("extractor", self.category), "cache", True) def oauth_config(self, key, default=None): diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index 7bccf838..6bc7b9a9 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -108,6 +108,7 @@ class PahealTagExtractor(PahealExtractor): PahealExtractor.__init__(self, match) self.tags = text.unquote(match.group(1)) + def _init(self): if self.config("metadata"): self._extract_data = self._extract_data_ex diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index e7188285..8fa5de24 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -22,8 +22,7 @@ class PhilomenaExtractor(BooruExtractor): page_start = 1 per_page = 50 - def __init__(self, match): - BooruExtractor.__init__(self, match) + def _init(self): self.api = PhilomenaAPI(self) _file_url = operator.itemgetter("view_url") diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py index 6234e6a9..22aff884 100644 --- a/gallery_dl/extractor/photobucket.py +++ b/gallery_dl/extractor/photobucket.py @@ -48,9 +48,10 @@ class PhotobucketAlbumExtractor(Extractor): ) def __init__(self, match): - Extractor.__init__(self, match) - self.album_path = "" self.root = "https://" + match.group(1) + Extractor.__init__(self, match) + + def _init(self): self.session.headers["Referer"] = self.url def items(self): @@ -129,6 +130,8 @@ class PhotobucketImageExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) or match.group(3) self.media_id = match.group(2) + + def _init(self): self.session.headers["Referer"] = self.url def items(self): diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 92e05886..be30705b 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -23,12 +23,10 @@ class PinterestExtractor(Extractor): archive_fmt = "{id}{media_id}" root = "https://www.pinterest.com" - def __init__(self, match): - Extractor.__init__(self, match) - + def _init(self): domain = self.config("domain") if not domain or domain == "auto" : - self.root = text.root_from_url(match.group(0)) + self.root = text.root_from_url(self.url) else: self.root = text.ensure_http_scheme(domain) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 3cc59acf..ffe8030f 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -28,8 +28,7 @@ class PixivExtractor(Extractor): archive_fmt = "{id}{suffix}.{extension}" cookies_domain = None - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.api = PixivAppAPI(self) self.load_ugoira = self.config("ugoira", True) self.max_posts = self.config("max-posts", 0) @@ -174,6 +173,9 @@ class PixivUserExtractor(PixivExtractor): PixivExtractor.__init__(self, match) self.user_id = match.group(1) + def initialize(self): + pass + def items(self): base = "{}/users/{}/".format(self.root, self.user_id) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py index 783f3da9..929e0f58 100644 --- a/gallery_dl/extractor/pornpics.py +++ b/gallery_dl/extractor/pornpics.py @@ -23,7 +23,9 @@ class PornpicsExtractor(Extractor): def __init__(self, match): super().__init__(match) self.item = match.group(1) - self.session.headers["Referer"] = self.root + + def _init(self): + self.session.headers["Referer"] = self.root + "/" def items(self): for gallery in self.galleries(): diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index 1800b68d..ba571bbd 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -22,18 +22,21 @@ class ReactorExtractor(BaseExtractor): def __init__(self, match): BaseExtractor.__init__(self, match) + url = text.ensure_http_scheme(match.group(0), "http://") pos = url.index("/", 10) - - self.root, self.path = url[:pos], url[pos:] - self.session.headers["Referer"] = self.root - self.gif = self.config("gif", False) + self.root = url[:pos] + self.path = url[pos:] if self.category == "reactor": # set category based on domain name netloc = urllib.parse.urlsplit(self.root).netloc self.category = netloc.rpartition(".")[0] + def _init(self): + self.session.headers["Referer"] = self.root + self.gif = self.config("gif", False) + def items(self): data = self.metadata() yield Message.Directory, data diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index c924e0a3..c68068cb 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -57,8 +57,10 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): def __init__(self, match): ChapterExtractor.__init__(self, match) + self.params = match.group(2) - params = text.parse_query(match.group(2)) + def _init(self): + params = text.parse_query(self.params) quality = self.config("quality") if quality is None or quality == "auto": diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 9109e8dc..abd21b30 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -24,6 +24,8 @@ class RedgifsExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.key = match.group(1) + + def _init(self): self.api = RedgifsAPI(self) formats = self.config("format") diff --git a/gallery_dl/extractor/rule34us.py b/gallery_dl/extractor/rule34us.py index 00b6972d..88331eaa 100644 --- a/gallery_dl/extractor/rule34us.py +++ b/gallery_dl/extractor/rule34us.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,8 +19,7 @@ class Rule34usExtractor(BooruExtractor): root = "https://rule34.us" per_page = 42 - def __init__(self, match): - BooruExtractor.__init__(self, match) + def _init(self): self._find_tags = re.compile( r'
  • ]*>", "") diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py index b0dd9bbd..de6c8a15 100644 --- a/gallery_dl/extractor/shimmie2.py +++ b/gallery_dl/extractor/shimmie2.py @@ -18,21 +18,20 @@ class Shimmie2Extractor(BaseExtractor): filename_fmt = "{category}_{id}{md5:?_//}.{extension}" archive_fmt = "{id}" - def __init__(self, match): - BaseExtractor.__init__(self, match) - + def _init(self): try: instance = INSTANCES[self.category] except KeyError: - pass - else: - cookies = instance.get("cookies") - if cookies: - domain = self.root.rpartition("/")[2] - self.cookies_update_dict(cookies, domain=domain) - file_url = instance.get("file_url") - if file_url: - self.file_url_fmt = file_url + return + + cookies = instance.get("cookies") + if cookies: + domain = self.root.rpartition("/")[2] + self.cookies_update_dict(cookies, domain=domain) + + file_url = instance.get("file_url") + if file_url: + self.file_url_fmt = file_url def items(self): data = self.metadata() diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index b5d116fd..d1ccc492 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -40,7 +40,9 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): path = "/" + subdomain.rstrip(".") + path url = "https://old.simply-hentai.com" + path GalleryExtractor.__init__(self, match, url) - self.session.headers["Referer"] = url + + def _init(self): + self.session.headers["Referer"] = self.gallery_url def metadata(self, page): extr = text.extract_from(page) diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index 3724c859..b643c6f2 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -22,6 +22,8 @@ class SkebExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.user_name = match.group(1) + + def _init(self): self.thumbnails = self.config("thumbnails", False) self.article = self.config("article", False) diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index e30c4911..b9edd4ab 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -34,8 +34,7 @@ class SmugmugExtractor(Extractor): "Uris": None, } - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.api = SmugmugAPI(self) self.videos = self.config("videos", True) self.session = self.api.session diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py index 4b15b144..8c816ad1 100644 --- a/gallery_dl/extractor/szurubooru.py +++ b/gallery_dl/extractor/szurubooru.py @@ -20,8 +20,7 @@ class SzurubooruExtractor(booru.BooruExtractor): filename_fmt = "{id}_{version}_{checksumMD5}.{extension}" per_page = 100 - def __init__(self, match): - booru.BooruExtractor.__init__(self, match) + def _init(self): self.headers = { "Accept": "application/json", "Content-Type": "application/json", diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py index ec4a249c..0e09e22a 100644 --- a/gallery_dl/extractor/tapas.py +++ b/gallery_dl/extractor/tapas.py @@ -26,8 +26,7 @@ class TapasExtractor(Extractor): cookies_names = ("_cpc_",) _cache = None - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): if self._cache is None: TapasExtractor._cache = {} diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index f42da488..12ea39f8 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -42,6 +42,7 @@ class TumblrExtractor(Extractor): else: self.blog = match.group(1) or match.group(3) + def _init(self): self.api = TumblrAPI(self) self.types = self._setup_posttypes() self.avatar = self.config("avatar", False) diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py index a8acd319..c3e0a262 100644 --- a/gallery_dl/extractor/twibooru.py +++ b/gallery_dl/extractor/twibooru.py @@ -26,8 +26,7 @@ class TwibooruExtractor(BooruExtractor): per_page = 50 root = "https://twibooru.org" - def __init__(self, match): - BooruExtractor.__init__(self, match) + def _init(self): self.api = TwibooruAPI(self) _file_url = operator.itemgetter("view_url") diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 7e420799..478b6d38 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -32,6 +32,8 @@ class TwitterExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) + + def _init(self): self.textonly = self.config("text-tweets", False) self.retweets = self.config("retweets", False) self.replies = self.config("replies", True) @@ -490,6 +492,9 @@ class TwitterUserExtractor(TwitterExtractor): if user_id: self.user = "id:" + user_id + def initialize(self): + pass + def items(self): base = "{}/{}/".format(self.root, self.user) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py index 972b508d..4b49a638 100644 --- a/gallery_dl/extractor/urlshortener.py +++ b/gallery_dl/extractor/urlshortener.py @@ -54,6 +54,7 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor): UrlshortenerExtractor.__init__(self, match) self.id = match.group(match.lastindex) + def _init(self): try: self.headers = INSTANCES[self.category]["headers"] except Exception: diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py index d8aa6cdb..084f9b25 100644 --- a/gallery_dl/extractor/vipergirls.py +++ b/gallery_dl/extractor/vipergirls.py @@ -26,9 +26,8 @@ class VipergirlsExtractor(Extractor): cookies_domain = ".vipergirls.to" cookies_names = ("vg_userid", "vg_password") - def __init__(self, match): - Extractor.__init__(self, match) - self.session.headers["Referer"] = self.root + def _init(self): + self.session.headers["Referer"] = self.root + "/" def items(self): self.login() diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index 0ba0d910..9e271098 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -20,8 +20,7 @@ class WallhavenExtractor(Extractor): archive_fmt = "{id}" request_interval = 1.4 - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.api = WallhavenAPI(self) def items(self): @@ -109,6 +108,9 @@ class WallhavenUserExtractor(WallhavenExtractor): WallhavenExtractor.__init__(self, match) self.username = match.group(1) + def initialize(self): + pass + def items(self): base = "{}/user/{}/".format(self.root, self.username) return self._dispatch_extractors(( diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py index eca4f1ad..c4d242a1 100644 --- a/gallery_dl/extractor/weasyl.py +++ b/gallery_dl/extractor/weasyl.py @@ -30,8 +30,7 @@ class WeasylExtractor(Extractor): return True return False - def __init__(self, match): - Extractor.__init__(self, match) + def _init(self): self.session.headers['X-Weasyl-API-Key'] = self.config("api-key") def request_submission(self, submitid): diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index 7b3e8033..31592420 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2020 Leonardo Taccari -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -71,15 +71,18 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): ) def __init__(self, match): - self.path, self.lang, self.genre, self.comic, query = match.groups() + self.path, self.lang, self.genre, self.comic, self.query = \ + match.groups() - url = "{}/{}/viewer?{}".format(self.root, self.path, query) + url = "{}/{}/viewer?{}".format(self.root, self.path, self.query) GalleryExtractor.__init__(self, match, url) + + def _init(self): self.setup_agegate_cookies() - query = text.parse_query(query) - self.title_no = query.get("title_no") - self.episode_no = query.get("episode_no") + params = text.parse_query(self.query) + self.title_no = params.get("title_no") + self.episode_no = params.get("episode_no") def metadata(self, page): keywords, pos = text.extract( @@ -141,12 +144,15 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): def __init__(self, match): Extractor.__init__(self, match) + self.path, self.lang, self.genre, self.comic, self.query = \ + match.groups() + + def _init(self): self.setup_agegate_cookies() - self.path, self.lang, self.genre, self.comic, query = match.groups() - query = text.parse_query(query) - self.title_no = query.get("title_no") - self.page_no = text.parse_int(query.get("page"), 1) + params = text.parse_query(self.query) + self.title_no = params.get("title_no") + self.page_no = text.parse_int(params.get("page"), 1) def items(self): page = None diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 2de7a2fc..ae0fc4e3 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -28,6 +28,8 @@ class WeiboExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self._prefix, self.user = match.groups() + + def _init(self): self.retweets = self.config("retweets", True) self.videos = self.config("videos", True) self.livephoto = self.config("livephoto", True) @@ -228,6 +230,9 @@ class WeiboUserExtractor(WeiboExtractor): ("https://www.weibo.com/p/1003062314621010/home"), ) + def initialize(self): + pass + def items(self): base = "{}/u/{}?tabtype=".format(self.root, self._user_id()) return self._dispatch_extractors(( diff --git a/test/test_cookies.py b/test/test_cookies.py index 5a4fbe65..a6ad05f1 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -46,8 +46,7 @@ class TestCookiejar(unittest.TestCase): def test_cookiefile(self): config.set((), "cookies", self.cookiefile) - - cookies = extractor.find("test:").cookies + cookies = _get_extractor("test").cookies self.assertEqual(len(cookies), 1) cookie = next(iter(cookies)) @@ -65,12 +64,14 @@ class TestCookiejar(unittest.TestCase): def _test_warning(self, filename, exc): config.set((), "cookies", filename) log = logging.getLogger("test") + with mock.patch.object(log, "warning") as mock_warning: - cookies = extractor.find("test:").cookies - self.assertEqual(len(cookies), 0) - self.assertEqual(mock_warning.call_count, 1) - self.assertEqual(mock_warning.call_args[0][0], "cookies: %s") - self.assertIsInstance(mock_warning.call_args[0][1], exc) + cookies = _get_extractor("test").cookies + + self.assertEqual(len(cookies), 0) + self.assertEqual(mock_warning.call_count, 1) + self.assertEqual(mock_warning.call_args[0][0], "cookies: %s") + self.assertIsInstance(mock_warning.call_args[0][1], exc) class TestCookiedict(unittest.TestCase): @@ -83,7 +84,8 @@ class TestCookiedict(unittest.TestCase): config.clear() def test_dict(self): - cookies = extractor.find("test:").cookies + cookies = _get_extractor("test").cookies + self.assertEqual(len(cookies), len(self.cdict)) self.assertEqual(sorted(cookies.keys()), sorted(self.cdict.keys())) self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values())) @@ -122,7 +124,7 @@ class TestCookieLogin(unittest.TestCase): class TestCookieUtils(unittest.TestCase): def test_check_cookies(self): - extr = extractor.find("test:") + extr = _get_extractor("test") self.assertFalse(extr.cookies, "empty") self.assertFalse(extr.cookies_domain, "empty") @@ -144,7 +146,7 @@ class TestCookieUtils(unittest.TestCase): self.assertFalse(extr.cookies_check(("a", "b", "c"))) def test_check_cookies_domain(self): - extr = extractor.find("test:") + extr = _get_extractor("test") self.assertFalse(extr.cookies, "empty") extr.cookies_domain = ".example.org" @@ -166,7 +168,7 @@ class TestCookieUtils(unittest.TestCase): self.assertTrue(extr.cookies_check(("a", "b", "c"))) def test_check_cookies_expires(self): - extr = extractor.find("test:") + extr = _get_extractor("test") self.assertFalse(extr.cookies, "empty") self.assertFalse(extr.cookies_domain, "empty") @@ -200,13 +202,18 @@ class TestCookieUtils(unittest.TestCase): def _get_extractor(category): - URLS = { - "exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/", - "idolcomplex": "https://idol.sankakucomplex.com/post/show/1", - "nijie" : "https://nijie.info/view.php?id=1", - "horne" : "https://horne.red/view.php?id=1", - } - return extractor.find(URLS[category]) + extr = extractor.find(URLS[category]) + extr.initialize() + return extr + + +URLS = { + "exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/", + "idolcomplex": "https://idol.sankakucomplex.com/post/show/1", + "nijie" : "https://nijie.info/view.php?id=1", + "horne" : "https://horne.red/view.php?id=1", + "test" : "test:", +} if __name__ == "__main__": diff --git a/test/test_downloader.py b/test/test_downloader.py index c65be952..840e0780 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -34,6 +34,7 @@ class FakeJob(): def __init__(self): self.extractor = extractor.find("test:") + self.extractor.initialize() self.pathfmt = path.PathFormat(self.extractor) self.out = output.NullOutput() self.get_logger = logging.getLogger