make 'path' and 'keywords' available in logging messages

Wrap all loggers used by job, extractor, downloader, and postprocessor
objects into a (custom) LoggerAdapter that provides access to the
underlying job, extractor, pathfmt, and kwdict objects and their
properties.

__init__() signatures for all downloader and postprocessor classes have
been changed to take the current Job object as their first argument,
instead of the current extractor or pathfmt.

(#574, #575)
This commit is contained in:
Mike Fährmann
2020-05-18 01:35:53 +02:00
parent 846d3a2466
commit ece73b5b2a
17 changed files with 149 additions and 97 deletions

View File

@@ -9,7 +9,6 @@
"""Common classes and constants used by downloader modules.""" """Common classes and constants used by downloader modules."""
import os import os
import logging
from .. import config, util from .. import config, util
@@ -17,15 +16,11 @@ class DownloaderBase():
"""Base class for downloaders""" """Base class for downloaders"""
scheme = "" scheme = ""
def __init__(self, extractor, output): def __init__(self, job):
self.session = extractor.session self.out = job.out
self.out = output
self.part = self.config("part", True) self.part = self.config("part", True)
self.partdir = self.config("part-directory") self.partdir = self.config("part-directory")
self.log = job.get_logger("downloader." + self.scheme)
self.log = logging.getLogger("downloader." + self.scheme)
self.log.job = extractor.log.job
self.log.extractor = extractor
if self.partdir: if self.partdir:
self.partdir = util.expand_path(self.partdir) self.partdir = util.expand_path(self.partdir)

View File

@@ -24,16 +24,19 @@ except ImportError:
class HttpDownloader(DownloaderBase): class HttpDownloader(DownloaderBase):
scheme = "http" scheme = "http"
def __init__(self, extractor, output): def __init__(self, job):
DownloaderBase.__init__(self, extractor, output) DownloaderBase.__init__(self, job)
extractor = job.extractor
self.session = extractor.session
self.chunk_size = 16384
self.downloading = False
self.adjust_extension = self.config("adjust-extensions", True) self.adjust_extension = self.config("adjust-extensions", True)
self.retries = self.config("retries", extractor._retries) self.retries = self.config("retries", extractor._retries)
self.timeout = self.config("timeout", extractor._timeout) self.timeout = self.config("timeout", extractor._timeout)
self.verify = self.config("verify", extractor._verify) self.verify = self.config("verify", extractor._verify)
self.mtime = self.config("mtime", True) self.mtime = self.config("mtime", True)
self.rate = self.config("rate") self.rate = self.config("rate")
self.downloading = False
self.chunk_size = 16384
if self.retries < 0: if self.retries < 0:
self.retries = float("inf") self.retries = float("inf")

View File

@@ -17,8 +17,9 @@ import os
class YoutubeDLDownloader(DownloaderBase): class YoutubeDLDownloader(DownloaderBase):
scheme = "ytdl" scheme = "ytdl"
def __init__(self, extractor, output): def __init__(self, job):
DownloaderBase.__init__(self, extractor, output) DownloaderBase.__init__(self, job)
extractor = job.extractor
retries = self.config("retries", extractor._retries) retries = self.config("retries", extractor._retries)
options = { options = {

View File

@@ -24,10 +24,16 @@ class Job():
extr = extractor.find(extr) extr = extractor.find(extr)
if not extr: if not extr:
raise exception.NoExtractorError() raise exception.NoExtractorError()
self.extractor = extr self.extractor = extr
extr.log.extractor = extr self.pathfmt = None
extr.log.job = self
self._logger_extra = {
"job" : self,
"extractor": extr,
"path" : output.PathfmtProxy(self),
"keywords" : output.KwdictProxy(self),
}
extr.log = self._wrap_logger(extr.log)
extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url) extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url)
self.status = 0 self.status = 0
@@ -171,6 +177,12 @@ class Job():
return util.build_predicate(predicates) return util.build_predicate(predicates)
def get_logger(self, name):
return self._wrap_logger(logging.getLogger(name))
def _wrap_logger(self, logger):
return output.LoggerAdapter(logger, self._logger_extra)
def _write_unsupported(self, url): def _write_unsupported(self, url):
if self.ulog: if self.ulog:
self.ulog.info(url) self.ulog.info(url)
@@ -181,8 +193,7 @@ class DownloadJob(Job):
def __init__(self, url, parent=None): def __init__(self, url, parent=None):
Job.__init__(self, url, parent) Job.__init__(self, url, parent)
self.log = logging.getLogger("download") self.log = self.get_logger("download")
self.pathfmt = None
self.archive = None self.archive = None
self.sleep = None self.sleep = None
self.downloaders = {} self.downloaders = {}
@@ -331,7 +342,7 @@ class DownloadJob(Job):
cls = downloader.find(scheme) cls = downloader.find(scheme)
if cls and config.get(("downloader", cls.scheme), "enabled", True): if cls and config.get(("downloader", cls.scheme), "enabled", True):
instance = cls(self.extractor, self.out) instance = cls(self)
else: else:
instance = None instance = None
self.log.error("'%s:' URLs are not supported/enabled", scheme) self.log.error("'%s:' URLs are not supported/enabled", scheme)
@@ -383,6 +394,7 @@ class DownloadJob(Job):
postprocessors = config("postprocessors") postprocessors = config("postprocessors")
if postprocessors: if postprocessors:
pp_log = self.get_logger("postprocessor")
pp_list = [] pp_list = []
category = self.extractor.category category = self.extractor.category
@@ -395,14 +407,13 @@ class DownloadJob(Job):
name = pp_dict.get("name") name = pp_dict.get("name")
pp_cls = postprocessor.find(name) pp_cls = postprocessor.find(name)
if not pp_cls: if not pp_cls:
postprocessor.log.warning("module '%s' not found", name) pp_log.warning("module '%s' not found", name)
continue continue
try: try:
pp_obj = pp_cls(pathfmt, pp_dict) pp_obj = pp_cls(self, pp_dict)
except Exception as exc: except Exception as exc:
postprocessor.log.error( pp_log.error("'%s' initialization failed: %s: %s",
"'%s' initialization failed: %s: %s", name, exc.__class__.__name__, exc)
name, exc.__class__.__name__, exc)
else: else:
pp_list.append(pp_obj) pp_list.append(pp_obj)

View File

@@ -22,19 +22,68 @@ LOG_LEVEL = logging.INFO
class Logger(logging.Logger): class Logger(logging.Logger):
"""Custom logger that includes extractor and job info in log records""" """Custom logger that includes extra info in log records"""
extractor = util.NONE
job = util.NONE
def makeRecord(self, name, level, fn, lno, msg, args, exc_info, def makeRecord(self, name, level, fn, lno, msg, args, exc_info,
func=None, extra=None, sinfo=None, func=None, extra=None, sinfo=None,
factory=logging._logRecordFactory): factory=logging._logRecordFactory):
rv = factory(name, level, fn, lno, msg, args, exc_info, func, sinfo) rv = factory(name, level, fn, lno, msg, args, exc_info, func, sinfo)
rv.extractor = self.extractor if extra:
rv.job = self.job rv.__dict__.update(extra)
return rv return rv
class LoggerAdapter():
"""Trimmed-down version of logging.LoggingAdapter"""
__slots__ = ("logger", "extra")
def __init__(self, logger, extra):
self.logger = logger
self.extra = extra
def debug(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.DEBUG):
kwargs["extra"] = self.extra
self.logger._log(logging.DEBUG, msg, args, **kwargs)
def info(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.INFO):
kwargs["extra"] = self.extra
self.logger._log(logging.INFO, msg, args, **kwargs)
def warning(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.WARNING):
kwargs["extra"] = self.extra
self.logger._log(logging.WARNING, msg, args, **kwargs)
def error(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.ERROR):
kwargs["extra"] = self.extra
self.logger._log(logging.ERROR, msg, args, **kwargs)
class PathfmtProxy():
__slots__ = ("job",)
def __init__(self, job):
self.job = job
def __getattribute__(self, name):
pathfmt = object.__getattribute__(self, "job").pathfmt
return pathfmt.__dict__.get(name) if pathfmt else None
class KwdictProxy():
__slots__ = ("job",)
def __init__(self, job):
self.job = job
def __getattribute__(self, name):
pathfmt = object.__getattribute__(self, "job").pathfmt
return pathfmt.kwdict.get(name) if pathfmt else None
class Formatter(logging.Formatter): class Formatter(logging.Formatter):
"""Custom formatter that supports different formats per loglevel""" """Custom formatter that supports different formats per loglevel"""

View File

@@ -9,7 +9,6 @@
"""Post-processing modules""" """Post-processing modules"""
import importlib import importlib
import logging
modules = [ modules = [
"classify", "classify",
@@ -21,8 +20,6 @@ modules = [
"zip", "zip",
] ]
log = logging.getLogger("postprocessor")
def find(name): def find(name):
"""Return a postprocessor class with the given name""" """Return a postprocessor class with the given name"""

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann # Copyright 2018-2020 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -22,8 +22,8 @@ class ClassifyPP(PostProcessor):
"Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"), "Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"),
} }
def __init__(self, pathfmt, options): def __init__(self, job, options):
PostProcessor.__init__(self) PostProcessor.__init__(self, job)
mapping = options.get("mapping", self.DEFAULT_MAPPING) mapping = options.get("mapping", self.DEFAULT_MAPPING)
self.mapping = { self.mapping = {

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018-2019 Mike Fährmann # Copyright 2018-2020 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -8,15 +8,13 @@
"""Common classes and constants used by postprocessor modules.""" """Common classes and constants used by postprocessor modules."""
import logging
class PostProcessor(): class PostProcessor():
"""Base class for postprocessors""" """Base class for postprocessors"""
def __init__(self): def __init__(self, job):
name = self.__class__.__name__[:-2].lower() name = self.__class__.__name__[:-2].lower()
self.log = logging.getLogger("postprocessor." + name) self.log = job.get_logger("postprocessor." + name)
@staticmethod @staticmethod
def prepare(pathfmt): def prepare(pathfmt):

View File

@@ -14,8 +14,8 @@ import os
class ComparePP(PostProcessor): class ComparePP(PostProcessor):
def __init__(self, pathfmt, options): def __init__(self, job, options):
PostProcessor.__init__(self) PostProcessor.__init__(self, job)
if options.get("action") == "enumerate": if options.get("action") == "enumerate":
self.run = self._run_enumerate self.run = self._run_enumerate
if options.get("shallow"): if options.get("shallow"):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018-2019 Mike Fährmann # Copyright 2018-2020 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -23,8 +23,8 @@ else:
class ExecPP(PostProcessor): class ExecPP(PostProcessor):
def __init__(self, pathfmt, options): def __init__(self, job, options):
PostProcessor.__init__(self) PostProcessor.__init__(self, job)
args = options["command"] args = options["command"]
final = options.get("final", False) final = options.get("final", False)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann # Copyright 2019-2020 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -15,8 +15,8 @@ import os
class MetadataPP(PostProcessor): class MetadataPP(PostProcessor):
def __init__(self, pathfmt, options): def __init__(self, job, options):
PostProcessor.__init__(self) PostProcessor.__init__(self, job)
mode = options.get("mode", "json") mode = options.get("mode", "json")
if mode == "custom": if mode == "custom":

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann # Copyright 2019-2020 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -14,8 +14,8 @@ from ..text import parse_int
class MtimePP(PostProcessor): class MtimePP(PostProcessor):
def __init__(self, pathfmt, options): def __init__(self, job, options):
PostProcessor.__init__(self) PostProcessor.__init__(self, job)
self.key = options.get("key", "date") self.key = options.get("key", "date")
def run(self, pathfmt): def run(self, pathfmt):

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann # Copyright 2018-2020 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Convert pixiv ugoira to webm""" """Convert Pixiv Ugoira to WebM"""
from .common import PostProcessor from .common import PostProcessor
from .. import util from .. import util
@@ -19,8 +19,8 @@ import os
class UgoiraPP(PostProcessor): class UgoiraPP(PostProcessor):
def __init__(self, pathfmt, options): def __init__(self, job, options):
PostProcessor.__init__(self) PostProcessor.__init__(self, job)
self.extension = options.get("extension") or "webm" self.extension = options.get("extension") or "webm"
self.args = options.get("ffmpeg-args") or () self.args = options.get("ffmpeg-args") or ()
self.twopass = options.get("ffmpeg-twopass", False) self.twopass = options.get("ffmpeg-twopass", False)

View File

@@ -22,8 +22,8 @@ class ZipPP(PostProcessor):
"lzma" : zipfile.ZIP_LZMA, "lzma" : zipfile.ZIP_LZMA,
} }
def __init__(self, pathfmt, options): def __init__(self, job, options):
PostProcessor.__init__(self) PostProcessor.__init__(self, job)
self.delete = not options.get("keep-files", False) self.delete = not options.get("keep-files", False)
ext = "." + options.get("extension", "zip") ext = "." + options.get("extension", "zip")
algorithm = options.get("compression", "store") algorithm = options.get("compression", "store")
@@ -33,7 +33,7 @@ class ZipPP(PostProcessor):
algorithm) algorithm)
algorithm = "store" algorithm = "store"
self.path = pathfmt.realdirectory self.path = job.pathfmt.realdirectory
args = (self.path[:-1] + ext, "a", args = (self.path[:-1] + ext, "a",
self.COMPRESSION_ALGORITHMS[algorithm], True) self.COMPRESSION_ALGORITHMS[algorithm], True)

View File

@@ -726,6 +726,7 @@ class PathFormat():
def set_directory(self, kwdict): def set_directory(self, kwdict):
"""Build directory path and create it if necessary""" """Build directory path and create it if necessary"""
self.kwdict = kwdict
windows = os.name == "nt" windows = os.name == "nt"
# Build path segments by applying 'kwdict' to directory format strings # Build path segments by applying 'kwdict' to directory format strings

View File

@@ -14,21 +14,30 @@ from unittest.mock import Mock, MagicMock, patch
import re import re
import base64 import base64
import logging
import os.path import os.path
import tempfile import tempfile
import threading import threading
import http.server import http.server
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import downloader, extractor, config, util # noqa E402 from gallery_dl import downloader, extractor, output, config, util # noqa E402
from gallery_dl.downloader.common import DownloaderBase # noqa E402
from gallery_dl.output import NullOutput # noqa E402
class MockDownloaderModule(Mock): class MockDownloaderModule(Mock):
__downloader__ = "mock" __downloader__ = "mock"
class FakeJob():
def __init__(self):
self.extractor = extractor.find("test:")
self.pathfmt = util.PathFormat(self.extractor)
self.out = output.NullOutput()
self.get_logger = logging.getLogger
class TestDownloaderModule(unittest.TestCase): class TestDownloaderModule(unittest.TestCase):
@classmethod @classmethod
@@ -96,11 +105,10 @@ class TestDownloaderBase(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.extractor = extractor.find("test:")
cls.extractor.log.job = None
cls.dir = tempfile.TemporaryDirectory() cls.dir = tempfile.TemporaryDirectory()
cls.fnum = 0 cls.fnum = 0
config.set((), "base-directory", cls.dir.name) config.set((), "base-directory", cls.dir.name)
cls.job = FakeJob()
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
@@ -113,12 +121,13 @@ class TestDownloaderBase(unittest.TestCase):
cls.fnum += 1 cls.fnum += 1
kwdict = { kwdict = {
"category": "test", "category" : "test",
"subcategory": "test", "subcategory": "test",
"filename": name, "filename" : name,
"extension": extension, "extension" : extension,
} }
pathfmt = util.PathFormat(cls.extractor)
pathfmt = cls.job.pathfmt
pathfmt.set_directory(kwdict) pathfmt.set_directory(kwdict)
pathfmt.set_filename(kwdict) pathfmt.set_filename(kwdict)
@@ -159,7 +168,7 @@ class TestHTTPDownloader(TestDownloaderBase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
TestDownloaderBase.setUpClass() TestDownloaderBase.setUpClass()
cls.downloader = downloader.find("http")(cls.extractor, NullOutput()) cls.downloader = downloader.find("http")(cls.job)
port = 8088 port = 8088
cls.address = "http://127.0.0.1:{}".format(port) cls.address = "http://127.0.0.1:{}".format(port)
@@ -196,7 +205,7 @@ class TestTextDownloader(TestDownloaderBase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
TestDownloaderBase.setUpClass() TestDownloaderBase.setUpClass()
cls.downloader = downloader.find("text")(cls.extractor, NullOutput()) cls.downloader = downloader.find("text")(cls.job)
def test_text_download(self): def test_text_download(self):
self._run_test("text:foobar", None, "foobar", "txt", "txt") self._run_test("text:foobar", None, "foobar", "txt", "txt")
@@ -208,29 +217,6 @@ class TestTextDownloader(TestDownloaderBase):
self._run_test("text:", None, "", "txt", "txt") self._run_test("text:", None, "", "txt", "txt")
class FakeDownloader(DownloaderBase):
scheme = "fake"
def __init__(self, extractor, output):
DownloaderBase.__init__(self, extractor, output)
def connect(self, url, offset):
pass
def receive(self, file):
pass
def reset(self):
pass
def get_extension(self):
pass
@staticmethod
def _check_extension(file, pathfmt):
pass
class HttpRequestHandler(http.server.BaseHTTPRequestHandler): class HttpRequestHandler(http.server.BaseHTTPRequestHandler):
def do_GET(self): def do_GET(self):

View File

@@ -12,12 +12,14 @@ import sys
import unittest import unittest
from unittest.mock import Mock, mock_open, patch from unittest.mock import Mock, mock_open, patch
import logging
import zipfile import zipfile
import tempfile import tempfile
from datetime import datetime, timezone as tz from datetime import datetime, timezone as tz
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import postprocessor, extractor, util, config # noqa E402 from gallery_dl import extractor, output, util # noqa E402
from gallery_dl import postprocessor, util, config # noqa E402
from gallery_dl.postprocessor.common import PostProcessor # noqa E402 from gallery_dl.postprocessor.common import PostProcessor # noqa E402
@@ -25,6 +27,15 @@ class MockPostprocessorModule(Mock):
__postprocessor__ = "mock" __postprocessor__ = "mock"
class FakeJob():
def __init__(self):
self.extractor = extractor.find("test:")
self.pathfmt = util.PathFormat(self.extractor)
self.out = output.NullOutput()
self.get_logger = logging.getLogger
class TestPostprocessorModule(unittest.TestCase): class TestPostprocessorModule(unittest.TestCase):
def setUp(self): def setUp(self):
@@ -58,9 +69,9 @@ class BasePostprocessorTest(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.extractor = extractor.find("test:")
cls.dir = tempfile.TemporaryDirectory() cls.dir = tempfile.TemporaryDirectory()
config.set((), "base-directory", cls.dir.name) config.set((), "base-directory", cls.dir.name)
cls.job = FakeJob()
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
@@ -74,12 +85,12 @@ class BasePostprocessorTest(unittest.TestCase):
if data is not None: if data is not None:
kwdict.update(data) kwdict.update(data)
self.pathfmt = util.PathFormat(self.extractor) self.pathfmt = self.job.pathfmt
self.pathfmt.set_directory(kwdict) self.pathfmt.set_directory(kwdict)
self.pathfmt.set_filename(kwdict) self.pathfmt.set_filename(kwdict)
pp = postprocessor.find(self.__class__.__name__[:-4].lower()) pp = postprocessor.find(self.__class__.__name__[:-4].lower())
return pp(self.pathfmt, options) return pp(self.job, options)
class ClassifyTest(BasePostprocessorTest): class ClassifyTest(BasePostprocessorTest):