diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py index 1c0d5b18..dcfff3b4 100644 --- a/gallery_dl/downloader/common.py +++ b/gallery_dl/downloader/common.py @@ -47,7 +47,7 @@ class DownloaderBase(): # remove file from incomplete downloads if self.downloading and not self.part: try: - os.remove(pathfmt.realpath) + os.remove(pathfmt.temppath) except (OSError, AttributeError): pass @@ -127,10 +127,7 @@ class DownloaderBase(): self.downloading = False if adj_ext: - pathfmt.adjust_extension(adj_ext) - if self.part: - pathfmt.part_move() - self.out.success(pathfmt.path, tries) + pathfmt.set_extension(adj_ext) return True def connect(self, url, offset): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index c89743ee..7a8db0c6 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -11,7 +11,8 @@ import time import json import hashlib import logging -from . import extractor, downloader, config, util, output, exception +from . import extractor, downloader, postprocessor +from . import config, util, output, exception from .extractor.message import Message @@ -92,6 +93,7 @@ class Job(): "https://github.com/mikf/gallery-dl/issues ."), exc.__class__.__name__, exc) log.debug("Traceback", exc_info=True) + self.handle_finalize() def dispatch(self, msg): """Call the appropriate message handler""" @@ -136,6 +138,9 @@ class Job(): def handle_queue(self, url, keywords): """Handle Message.Queue""" + def handle_finalize(self): + """Handle job finalization""" + def update_kwdict(self, kwdict): """Update 'kwdict' with additional metadata""" kwdict["category"] = self.extractor.category @@ -158,6 +163,7 @@ class DownloadJob(Job): self.archive = None self.sleep = None self.downloaders = {} + self.postprocessors = None self.out = output.select() def handle_url(self, url, keywords, fallback=None): @@ -186,7 +192,14 @@ class DownloadJob(Job): "Failed to download %s", self.pathfmt.filename) return + # run post processors + if self.postprocessors: + for pp in self.postprocessors: + pp.run(self.pathfmt) + # download succeeded + self.pathfmt.finalize() + self.out.success(self.pathfmt.path, 0) if self.archive: self.archive.add(keywords) @@ -198,14 +211,40 @@ class DownloadJob(Job): def handle_directory(self, keywords): """Set and create the target directory for downloads""" - if not self.pathfmt: - self.pathfmt = util.PathFormat(self.extractor) - self.sleep = self.extractor.config("sleep") - archive = self.extractor.config("archive") - if archive: - path = util.expand_path(archive) - self.archive = util.DownloadArchive(path, self.extractor) + if self.pathfmt: + self.pathfmt.set_directory(keywords) + return + + # delayed initialization + self.pathfmt = util.PathFormat(self.extractor) self.pathfmt.set_directory(keywords) + self.sleep = self.extractor.config("sleep") + + archive = self.extractor.config("archive") + if archive: + path = util.expand_path(archive) + self.archive = util.DownloadArchive(path, self.extractor) + + postprocessors = self.extractor.config("postprocessors") + if postprocessors: + self.postprocessors = [] + for pp_dict in postprocessors: + if "name" not in pp_dict: + postprocessor.log.warning("no 'name' specified") + continue + name = pp_dict["name"] + pp_cls = postprocessor.find(name) + if not pp_cls: + postprocessor.log.warning("'%s' not found", name) + continue + try: + pp_obj = pp_cls(self.pathfmt, pp_dict) + except Exception as exc: + postprocessor.log.error( + "%s: initialization failed: %s %s", + name, exc.__class__.__name__, exc) + else: + self.postprocessors.append(pp_obj) def handle_queue(self, url, keywords): try: @@ -213,6 +252,11 @@ class DownloadJob(Job): except exception.NoExtractorError: self._write_unsupported(url) + def handle_finalize(self): + if self.postprocessors: + for pp in self.postprocessors: + pp.finalize() + def get_downloader(self, url): """Return, and possibly construct, a downloader suitable for 'url'""" pos = url.find(":") diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py new file mode 100644 index 00000000..f01fca1b --- /dev/null +++ b/gallery_dl/postprocessor/__init__.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import importlib +import logging + +log = logging.getLogger("postprocessor") + + +def find(name): + """Return a postprocessor class with the given name""" + try: + return _cache[name] + except KeyError: + try: + module = importlib.import_module("."+name, __package__) + cls = module.__postprocessor__ + _cache[name] = cls + return cls + except (ImportError, AttributeError): + return None + + +# -------------------------------------------------------------------- +# internals + +_cache = {} diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py new file mode 100644 index 00000000..3af73f07 --- /dev/null +++ b/gallery_dl/postprocessor/classify.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Categorize files by file extension""" + +from .common import PostProcessor +import os + + +class ClassifyPP(PostProcessor): + + DEFAULT_MAPPING = { + "Music" : ("mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"), + "Video" : ("flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", + "webm", "vob", "wmv"), + "Pictures" : ("jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"), + "Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"), + } + + def __init__(self, pathfmt, options): + PostProcessor.__init__(self) + mapping = options.get("mapping", self.DEFAULT_MAPPING) + + self.mapping = { + ext: directory + for directory, exts in mapping.items() + for ext in exts + } + + def run(self, pathfmt): + ext = pathfmt.keywords["extension"] + + if ext in self.mapping: + path = pathfmt.realdirectory + os.sep + self.mapping[ext] + pathfmt.realpath = path + os.sep + pathfmt.filename + os.makedirs(path, exist_ok=True) + + +__postprocessor__ = ClassifyPP diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py new file mode 100644 index 00000000..af1c1ef5 --- /dev/null +++ b/gallery_dl/postprocessor/common.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Common classes and constants used by postprocessor modules.""" + +from . import log + + +class PostProcessor(): + """Base class for postprocessors""" + log = log + + def run(self, pathfmt): + """Execute the postprocessor for a file""" + + def finalize(self): + """Cleanup""" diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py new file mode 100644 index 00000000..7d190402 --- /dev/null +++ b/gallery_dl/postprocessor/exec.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Execute processes""" + +from .common import PostProcessor +import subprocess + + +class ExecPP(PostProcessor): + + def __init__(self, pathfmt, options): + PostProcessor.__init__(self) + self.args = options["command"] + if options.get("async", False): + self._exec = subprocess.Popen + + def run(self, pathfmt): + self._exec([ + arg.format_map(pathfmt.keywords) + for arg in self.args + ]) + + def _exec(self, args): + retcode = subprocess.Popen(args).wait() + if retcode: + self.log.warning( + "executing '%s' returned non-zero exit status %d", + " ".join(args), retcode) + + +__postprocessor__ = ExecPP diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py new file mode 100644 index 00000000..fc5636f3 --- /dev/null +++ b/gallery_dl/postprocessor/zip.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Store files in ZIP archives""" + +from .common import PostProcessor +import zipfile + + +class ZipPP(PostProcessor): + + COMPRESSION_ALGORITHMS = { + "store": zipfile.ZIP_STORED, + "zip": zipfile.ZIP_DEFLATED, + "bzip2": zipfile.ZIP_BZIP2, + "lzma": zipfile.ZIP_LZMA, + } + + def __init__(self, pathfmt, options): + PostProcessor.__init__(self) + self.delete = not options.get("keep-files", False) + self.ext = "." + options.get("extension", "zip") + algorithm = options.get("compression", "store") + if algorithm not in self.COMPRESSION_ALGORITHMS: + self.log.warning( + "unknown compression algorithm '%s'; falling back to 'store'", + algorithm) + algorithm = "store" + + path = pathfmt.realdirectory + self.ext + self.zfile = zipfile.ZipFile( + path, "a", self.COMPRESSION_ALGORITHMS[algorithm], True) + + def run(self, pathfmt): + # 'NameToInfo' is not officially documented, but it's available + # for all supported Python versions and using it directly is a lot + # better than calling getinfo() + if pathfmt.filename not in self.zfile.NameToInfo: + self.zfile.write(pathfmt.temppath, pathfmt.filename) + pathfmt.delete = self.delete + + def finalize(self): + self.zfile.close() + + +__postprocessor__ = ZipPP diff --git a/gallery_dl/util.py b/gallery_dl/util.py index d488d039..8ed3e225 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -359,11 +359,12 @@ class PathFormat(): "directory", extractor.directory_fmt) self.formatter = Formatter(extractor.config("keywords-default")) + self.delete = False self.has_extension = False self.keywords = {} self.filename = "" self.directory = self.realdirectory = "" - self.path = self.realpath = self.partpath = "" + self.path = self.realpath = self.temppath = "" self.basedirectory = expand_path( extractor.config("base-directory", (".", "gallery-dl"))) @@ -383,7 +384,7 @@ class PathFormat(): def open(self, mode="wb"): """Open file and return a corresponding file object""" - return open(self.partpath or self.realpath, mode) + return open(self.temppath, mode) def exists(self, archive=None): """Return True if the file exists on disk or in 'archive'""" @@ -425,6 +426,7 @@ class PathFormat(): def set_keywords(self, keywords): """Set filename keywords""" self.keywords = keywords + self.temppath = "" self.has_extension = bool(keywords.get("extension")) if self.has_extension: self.build_path() @@ -435,13 +437,6 @@ class PathFormat(): self.keywords["extension"] = extension self.build_path() - def adjust_extension(self, extension): - """Change filename extension of existing file""" - oldpath = self.realpath - self.set_extension(extension) - if not self.partpath: - os.replace(oldpath, self.realpath) - def build_path(self): """Use filename-keywords and directory to build a full path""" try: @@ -453,38 +448,46 @@ class PathFormat(): filename = os.sep + self.filename self.path = self.directory + filename self.realpath = self.realdirectory + filename + if not self.temppath: + self.temppath = self.realpath def part_enable(self, part_directory=None): """Enable .part file usage""" if self.has_extension: - self.partpath = self.realpath + ".part" + self.temppath = self.realpath + ".part" else: self.set_extension("part", False) - self.partpath = self.realpath if part_directory: - self.partpath = os.path.join( + self.temppath = os.path.join( part_directory, - os.path.basename(self.partpath), + os.path.basename(self.temppath), ) def part_size(self): """Return size of .part file""" - if self.partpath: - try: - return os.stat(self.partpath).st_size - except OSError: - pass + try: + return os.stat(self.temppath).st_size + except OSError: + pass return 0 - def part_move(self): - """Rename .part file to its actual filename""" + def finalize(self): + """Move tempfile to its target location""" + if self.delete: + os.unlink(self.temppath) + return + + if self.temppath == self.realpath: + return + try: - os.replace(self.partpath, self.realpath) + os.replace(self.temppath, self.realpath) return except OSError: pass - shutil.copyfile(self.partpath, self.realpath) - os.unlink(self.partpath) + + shutil.copyfile(self.temppath, self.realpath) + os.unlink(self.temppath) @staticmethod def adjust_path(path): diff --git a/test/test_downloader.py b/test/test_downloader.py index 19cecb8b..21b0920f 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -54,9 +54,8 @@ class TestDownloaderBase(unittest.TestCase): pathfmt.set_keywords(kwdict) if content: - path = pathfmt.realpath + (".part" if part else "") mode = "w" + ("b" if isinstance(content, bytes) else "") - with open(path, mode) as file: + with pathfmt.open(mode) as file: file.write(content) return pathfmt @@ -65,20 +64,23 @@ class TestDownloaderBase(unittest.TestCase): extension, expected_extension=None): pathfmt = self._prepare_destination(input, extension=extension) success = self.downloader.download(url, pathfmt) - path = pathfmt.realpath # test successful download self.assertTrue(success, "downloading '{}' failed".format(url)) # test content mode = "r" + ("b" if isinstance(output, bytes) else "") - with open(path, mode) as file: + with pathfmt.open(mode) as file: content = file.read() self.assertEqual(content, output) # test filename extension self.assertEqual( - os.path.splitext(path)[1][1:], + pathfmt.keywords["extension"], + expected_extension, + ) + self.assertEqual( + os.path.splitext(pathfmt.realpath)[1][1:], expected_extension, )