make 'path' and 'keywords' available in logging messages

Wrap all loggers used by job, extractor, downloader, and postprocessor
objects into a (custom) LoggerAdapter that provides access to the
underlying job, extractor, pathfmt, and kwdict objects and their
properties.

__init__() signatures for all downloader and postprocessor classes have
been changed to take the current Job object as their first argument,
instead of the current extractor or pathfmt.

(#574, #575)
This commit is contained in:
Mike Fährmann
2020-05-18 01:35:53 +02:00
parent 846d3a2466
commit ece73b5b2a
17 changed files with 149 additions and 97 deletions

View File

@@ -9,7 +9,6 @@
"""Common classes and constants used by downloader modules."""
import os
import logging
from .. import config, util
@@ -17,15 +16,11 @@ class DownloaderBase():
"""Base class for downloaders"""
scheme = ""
def __init__(self, extractor, output):
self.session = extractor.session
self.out = output
def __init__(self, job):
self.out = job.out
self.part = self.config("part", True)
self.partdir = self.config("part-directory")
self.log = logging.getLogger("downloader." + self.scheme)
self.log.job = extractor.log.job
self.log.extractor = extractor
self.log = job.get_logger("downloader." + self.scheme)
if self.partdir:
self.partdir = util.expand_path(self.partdir)

View File

@@ -24,16 +24,19 @@ except ImportError:
class HttpDownloader(DownloaderBase):
scheme = "http"
def __init__(self, extractor, output):
DownloaderBase.__init__(self, extractor, output)
def __init__(self, job):
DownloaderBase.__init__(self, job)
extractor = job.extractor
self.session = extractor.session
self.chunk_size = 16384
self.downloading = False
self.adjust_extension = self.config("adjust-extensions", True)
self.retries = self.config("retries", extractor._retries)
self.timeout = self.config("timeout", extractor._timeout)
self.verify = self.config("verify", extractor._verify)
self.mtime = self.config("mtime", True)
self.rate = self.config("rate")
self.downloading = False
self.chunk_size = 16384
if self.retries < 0:
self.retries = float("inf")

View File

@@ -17,8 +17,9 @@ import os
class YoutubeDLDownloader(DownloaderBase):
scheme = "ytdl"
def __init__(self, extractor, output):
DownloaderBase.__init__(self, extractor, output)
def __init__(self, job):
DownloaderBase.__init__(self, job)
extractor = job.extractor
retries = self.config("retries", extractor._retries)
options = {

View File

@@ -24,10 +24,16 @@ class Job():
extr = extractor.find(extr)
if not extr:
raise exception.NoExtractorError()
self.extractor = extr
extr.log.extractor = extr
extr.log.job = self
self.pathfmt = None
self._logger_extra = {
"job" : self,
"extractor": extr,
"path" : output.PathfmtProxy(self),
"keywords" : output.KwdictProxy(self),
}
extr.log = self._wrap_logger(extr.log)
extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url)
self.status = 0
@@ -171,6 +177,12 @@ class Job():
return util.build_predicate(predicates)
def get_logger(self, name):
return self._wrap_logger(logging.getLogger(name))
def _wrap_logger(self, logger):
return output.LoggerAdapter(logger, self._logger_extra)
def _write_unsupported(self, url):
if self.ulog:
self.ulog.info(url)
@@ -181,8 +193,7 @@ class DownloadJob(Job):
def __init__(self, url, parent=None):
Job.__init__(self, url, parent)
self.log = logging.getLogger("download")
self.pathfmt = None
self.log = self.get_logger("download")
self.archive = None
self.sleep = None
self.downloaders = {}
@@ -331,7 +342,7 @@ class DownloadJob(Job):
cls = downloader.find(scheme)
if cls and config.get(("downloader", cls.scheme), "enabled", True):
instance = cls(self.extractor, self.out)
instance = cls(self)
else:
instance = None
self.log.error("'%s:' URLs are not supported/enabled", scheme)
@@ -383,6 +394,7 @@ class DownloadJob(Job):
postprocessors = config("postprocessors")
if postprocessors:
pp_log = self.get_logger("postprocessor")
pp_list = []
category = self.extractor.category
@@ -395,14 +407,13 @@ class DownloadJob(Job):
name = pp_dict.get("name")
pp_cls = postprocessor.find(name)
if not pp_cls:
postprocessor.log.warning("module '%s' not found", name)
pp_log.warning("module '%s' not found", name)
continue
try:
pp_obj = pp_cls(pathfmt, pp_dict)
pp_obj = pp_cls(self, pp_dict)
except Exception as exc:
postprocessor.log.error(
"'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
pp_log.error("'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
else:
pp_list.append(pp_obj)

View File

@@ -22,19 +22,68 @@ LOG_LEVEL = logging.INFO
class Logger(logging.Logger):
"""Custom logger that includes extractor and job info in log records"""
extractor = util.NONE
job = util.NONE
"""Custom logger that includes extra info in log records"""
def makeRecord(self, name, level, fn, lno, msg, args, exc_info,
func=None, extra=None, sinfo=None,
factory=logging._logRecordFactory):
rv = factory(name, level, fn, lno, msg, args, exc_info, func, sinfo)
rv.extractor = self.extractor
rv.job = self.job
if extra:
rv.__dict__.update(extra)
return rv
class LoggerAdapter():
"""Trimmed-down version of logging.LoggingAdapter"""
__slots__ = ("logger", "extra")
def __init__(self, logger, extra):
self.logger = logger
self.extra = extra
def debug(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.DEBUG):
kwargs["extra"] = self.extra
self.logger._log(logging.DEBUG, msg, args, **kwargs)
def info(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.INFO):
kwargs["extra"] = self.extra
self.logger._log(logging.INFO, msg, args, **kwargs)
def warning(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.WARNING):
kwargs["extra"] = self.extra
self.logger._log(logging.WARNING, msg, args, **kwargs)
def error(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.ERROR):
kwargs["extra"] = self.extra
self.logger._log(logging.ERROR, msg, args, **kwargs)
class PathfmtProxy():
__slots__ = ("job",)
def __init__(self, job):
self.job = job
def __getattribute__(self, name):
pathfmt = object.__getattribute__(self, "job").pathfmt
return pathfmt.__dict__.get(name) if pathfmt else None
class KwdictProxy():
__slots__ = ("job",)
def __init__(self, job):
self.job = job
def __getattribute__(self, name):
pathfmt = object.__getattribute__(self, "job").pathfmt
return pathfmt.kwdict.get(name) if pathfmt else None
class Formatter(logging.Formatter):
"""Custom formatter that supports different formats per loglevel"""

View File

@@ -9,7 +9,6 @@
"""Post-processing modules"""
import importlib
import logging
modules = [
"classify",
@@ -21,8 +20,6 @@ modules = [
"zip",
]
log = logging.getLogger("postprocessor")
def find(name):
"""Return a postprocessor class with the given name"""

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,8 +22,8 @@ class ClassifyPP(PostProcessor):
"Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"),
}
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
def __init__(self, job, options):
PostProcessor.__init__(self, job)
mapping = options.get("mapping", self.DEFAULT_MAPPING)
self.mapping = {

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2019 Mike Fährmann
# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,15 +8,13 @@
"""Common classes and constants used by postprocessor modules."""
import logging
class PostProcessor():
"""Base class for postprocessors"""
def __init__(self):
def __init__(self, job):
name = self.__class__.__name__[:-2].lower()
self.log = logging.getLogger("postprocessor." + name)
self.log = job.get_logger("postprocessor." + name)
@staticmethod
def prepare(pathfmt):

View File

@@ -14,8 +14,8 @@ import os
class ComparePP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
def __init__(self, job, options):
PostProcessor.__init__(self, job)
if options.get("action") == "enumerate":
self.run = self._run_enumerate
if options.get("shallow"):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2019 Mike Fährmann
# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,8 +23,8 @@ else:
class ExecPP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
def __init__(self, job, options):
PostProcessor.__init__(self, job)
args = options["command"]
final = options.get("final", False)

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -15,8 +15,8 @@ import os
class MetadataPP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
def __init__(self, job, options):
PostProcessor.__init__(self, job)
mode = options.get("mode", "json")
if mode == "custom":

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -14,8 +14,8 @@ from ..text import parse_int
class MtimePP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
def __init__(self, job, options):
PostProcessor.__init__(self, job)
self.key = options.get("key", "date")
def run(self, pathfmt):

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
# Copyright 2018 Mike Fährmann
# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Convert pixiv ugoira to webm"""
"""Convert Pixiv Ugoira to WebM"""
from .common import PostProcessor
from .. import util
@@ -19,8 +19,8 @@ import os
class UgoiraPP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
def __init__(self, job, options):
PostProcessor.__init__(self, job)
self.extension = options.get("extension") or "webm"
self.args = options.get("ffmpeg-args") or ()
self.twopass = options.get("ffmpeg-twopass", False)

View File

@@ -22,8 +22,8 @@ class ZipPP(PostProcessor):
"lzma" : zipfile.ZIP_LZMA,
}
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
def __init__(self, job, options):
PostProcessor.__init__(self, job)
self.delete = not options.get("keep-files", False)
ext = "." + options.get("extension", "zip")
algorithm = options.get("compression", "store")
@@ -33,7 +33,7 @@ class ZipPP(PostProcessor):
algorithm)
algorithm = "store"
self.path = pathfmt.realdirectory
self.path = job.pathfmt.realdirectory
args = (self.path[:-1] + ext, "a",
self.COMPRESSION_ALGORITHMS[algorithm], True)

View File

@@ -726,6 +726,7 @@ class PathFormat():
def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
self.kwdict = kwdict
windows = os.name == "nt"
# Build path segments by applying 'kwdict' to directory format strings

View File

@@ -14,21 +14,30 @@ from unittest.mock import Mock, MagicMock, patch
import re
import base64
import logging
import os.path
import tempfile
import threading
import http.server
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import downloader, extractor, config, util # noqa E402
from gallery_dl.downloader.common import DownloaderBase # noqa E402
from gallery_dl.output import NullOutput # noqa E402
from gallery_dl import downloader, extractor, output, config, util # noqa E402
class MockDownloaderModule(Mock):
__downloader__ = "mock"
class FakeJob():
def __init__(self):
self.extractor = extractor.find("test:")
self.pathfmt = util.PathFormat(self.extractor)
self.out = output.NullOutput()
self.get_logger = logging.getLogger
class TestDownloaderModule(unittest.TestCase):
@classmethod
@@ -96,11 +105,10 @@ class TestDownloaderBase(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.extractor = extractor.find("test:")
cls.extractor.log.job = None
cls.dir = tempfile.TemporaryDirectory()
cls.fnum = 0
config.set((), "base-directory", cls.dir.name)
cls.job = FakeJob()
@classmethod
def tearDownClass(cls):
@@ -113,12 +121,13 @@ class TestDownloaderBase(unittest.TestCase):
cls.fnum += 1
kwdict = {
"category": "test",
"category" : "test",
"subcategory": "test",
"filename": name,
"extension": extension,
"filename" : name,
"extension" : extension,
}
pathfmt = util.PathFormat(cls.extractor)
pathfmt = cls.job.pathfmt
pathfmt.set_directory(kwdict)
pathfmt.set_filename(kwdict)
@@ -159,7 +168,7 @@ class TestHTTPDownloader(TestDownloaderBase):
@classmethod
def setUpClass(cls):
TestDownloaderBase.setUpClass()
cls.downloader = downloader.find("http")(cls.extractor, NullOutput())
cls.downloader = downloader.find("http")(cls.job)
port = 8088
cls.address = "http://127.0.0.1:{}".format(port)
@@ -196,7 +205,7 @@ class TestTextDownloader(TestDownloaderBase):
@classmethod
def setUpClass(cls):
TestDownloaderBase.setUpClass()
cls.downloader = downloader.find("text")(cls.extractor, NullOutput())
cls.downloader = downloader.find("text")(cls.job)
def test_text_download(self):
self._run_test("text:foobar", None, "foobar", "txt", "txt")
@@ -208,29 +217,6 @@ class TestTextDownloader(TestDownloaderBase):
self._run_test("text:", None, "", "txt", "txt")
class FakeDownloader(DownloaderBase):
scheme = "fake"
def __init__(self, extractor, output):
DownloaderBase.__init__(self, extractor, output)
def connect(self, url, offset):
pass
def receive(self, file):
pass
def reset(self):
pass
def get_extension(self):
pass
@staticmethod
def _check_extension(file, pathfmt):
pass
class HttpRequestHandler(http.server.BaseHTTPRequestHandler):
def do_GET(self):

View File

@@ -12,12 +12,14 @@ import sys
import unittest
from unittest.mock import Mock, mock_open, patch
import logging
import zipfile
import tempfile
from datetime import datetime, timezone as tz
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import postprocessor, extractor, util, config # noqa E402
from gallery_dl import extractor, output, util # noqa E402
from gallery_dl import postprocessor, util, config # noqa E402
from gallery_dl.postprocessor.common import PostProcessor # noqa E402
@@ -25,6 +27,15 @@ class MockPostprocessorModule(Mock):
__postprocessor__ = "mock"
class FakeJob():
def __init__(self):
self.extractor = extractor.find("test:")
self.pathfmt = util.PathFormat(self.extractor)
self.out = output.NullOutput()
self.get_logger = logging.getLogger
class TestPostprocessorModule(unittest.TestCase):
def setUp(self):
@@ -58,9 +69,9 @@ class BasePostprocessorTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.extractor = extractor.find("test:")
cls.dir = tempfile.TemporaryDirectory()
config.set((), "base-directory", cls.dir.name)
cls.job = FakeJob()
@classmethod
def tearDownClass(cls):
@@ -74,12 +85,12 @@ class BasePostprocessorTest(unittest.TestCase):
if data is not None:
kwdict.update(data)
self.pathfmt = util.PathFormat(self.extractor)
self.pathfmt = self.job.pathfmt
self.pathfmt.set_directory(kwdict)
self.pathfmt.set_filename(kwdict)
pp = postprocessor.find(self.__class__.__name__[:-4].lower())
return pp(self.pathfmt, options)
return pp(self.job, options)
class ClassifyTest(BasePostprocessorTest):