implement 'downloader' options per extractor category

by setting options inside 'http' or 'ytdl' inside extractor options
or inside subcategory options

{
    "extractor": {
        "mastodon": {
            "http": {
                "rate": "10k"
            }
        },
        "mastodon.social": {
            "http": {
                "rate": "100k"
            }
        }
    },
    "downloader": {
        "rate": "100m"
    }
}

Sets download speed to
-  10k for mastodon.social URLs
- 100k for mastodon sites in general
- 100m for all other sites
This commit is contained in:
Mike Fährmann
2025-02-21 19:11:50 +01:00
parent 4906541f7d
commit 18ed39c1cf
2 changed files with 108 additions and 4 deletions

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2022 Mike Fährmann
# Copyright 2014-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,8 +17,15 @@ class DownloaderBase():
scheme = ""
def __init__(self, job):
extractor = job.extractor
opts = self._extractor_config(extractor)
if opts:
self.opts = opts
self.config = self.config_opts
self.out = job.out
self.session = job.extractor.session
self.session = extractor.session
self.part = self.config("part", True)
self.partdir = self.config("part-directory")
self.log = job.get_logger("downloader." + self.scheme)
@@ -29,7 +36,7 @@ class DownloaderBase():
proxies = self.config("proxy", util.SENTINEL)
if proxies is util.SENTINEL:
self.proxies = job.extractor._proxies
self.proxies = extractor._proxies
else:
self.proxies = util.build_proxy_map(proxies, self.log)
@@ -37,5 +44,43 @@ class DownloaderBase():
"""Interpolate downloader config value for 'key'"""
return config.interpolate(("downloader", self.scheme), key, default)
def config_opts(self, key, default=None):
value = self.opts.get(key, util.SENTINEL)
if value is not util.SENTINEL:
return value
return config.interpolate(("downloader", self.scheme), key, default)
def _extractor_config(self, extractor):
path = extractor._cfgpath
if not isinstance(path, list):
return self._extractor_opts(path[1], path[2])
opts = {}
for cat, sub in reversed(path):
popts = self._extractor_opts(cat, sub)
if popts:
opts.update(popts)
return opts
def _extractor_opts(self, category, subcategory):
cfg = config.get(("extractor",), category)
if not cfg:
return None
copts = cfg.get(self.scheme)
if copts:
if subcategory in cfg:
sopts = cfg[subcategory].get(self.scheme)
if sopts:
opts = copts.copy()
opts.update(sopts)
return opts
return copts
if subcategory in cfg:
return cfg[subcategory].get(self.scheme)
return None
def download(self, url, pathfmt):
"""Write data from 'url' into the file specified by 'pathfmt'"""

View File

@@ -20,7 +20,6 @@ import tempfile
import threading
import http.server
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import downloader, extractor, output, config, path # noqa E402
from gallery_dl.downloader.http import MIME_TYPES, SIGNATURE_CHECKS # noqa E402
@@ -55,6 +54,9 @@ class TestDownloaderModule(unittest.TestCase):
else:
del sys.modules["youtube_dl"]
def setUp(self):
downloader._cache.clear()
def tearDown(self):
downloader._cache.clear()
@@ -107,6 +109,63 @@ class TestDownloaderModule(unittest.TestCase):
self.assertEqual(import_module.call_count, 1)
class TestDownloaderConfig(unittest.TestCase):
def setUp(self):
config.clear()
def tearDown(self):
config.clear()
def test_default_http(self):
job = FakeJob()
extr = job.extractor
dl = downloader.find("http")(job)
self.assertEqual(dl.adjust_extension, True)
self.assertEqual(dl.chunk_size, 32768)
self.assertEqual(dl.metadata, None)
self.assertEqual(dl.progress, 3.0)
self.assertEqual(dl.validate, True)
self.assertEqual(dl.headers, None)
self.assertEqual(dl.minsize, None)
self.assertEqual(dl.maxsize, None)
self.assertEqual(dl.mtime, True)
self.assertEqual(dl.rate, None)
self.assertEqual(dl.part, True)
self.assertEqual(dl.partdir, None)
self.assertIs(dl.interval_429, extr._interval_429)
self.assertIs(dl.retry_codes, extr._retry_codes)
self.assertIs(dl.retries, extr._retries)
self.assertIs(dl.timeout, extr._timeout)
self.assertIs(dl.proxies, extr._proxies)
self.assertIs(dl.verify, extr._verify)
def test_config_http(self):
config.set((), "rate", 42)
config.set((), "mtime", False)
config.set((), "headers", {"foo": "bar"})
config.set(("downloader",), "retries", -1)
config.set(("downloader", "http"), "filesize-min", "10k")
config.set(("extractor", "generic"), "verify", False)
config.set(("extractor", "generic", "example.org"), "timeout", 10)
config.set(("extractor", "generic", "http"), "rate", "1k")
config.set(
("extractor", "generic", "example.org", "http"), "headers", {})
job = FakeJob()
dl = downloader.find("http")(job)
self.assertEqual(dl.headers, {})
self.assertEqual(dl.minsize, 10240)
self.assertEqual(dl.retries, float("inf"))
self.assertEqual(dl.timeout, 10)
self.assertEqual(dl.verify, False)
self.assertEqual(dl.mtime, False)
self.assertEqual(dl.rate, 1024)
class TestDownloaderBase(unittest.TestCase):
@classmethod