From 76c32d58e59c40c89c355dcab357badc9666bf96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 20 May 2018 22:03:57 +0200 Subject: [PATCH 01/11] [postprocessor] initial code --- gallery_dl/job.py | 24 ++++++++++++++++++++- gallery_dl/postprocessor/__init__.py | 32 ++++++++++++++++++++++++++++ gallery_dl/postprocessor/common.py | 15 +++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/postprocessor/__init__.py create mode 100644 gallery_dl/postprocessor/common.py diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 61755f3a..01771e60 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -11,7 +11,8 @@ import time import json import hashlib import logging -from . import extractor, downloader, config, util, output, exception +from . import extractor, downloader, postprocessor +from . import config, util, output, exception from .extractor.message import Message @@ -158,6 +159,7 @@ class DownloadJob(Job): self.archive = None self.sleep = None self.downloaders = {} + self.postprocessors = [] self.out = output.select() def handle_url(self, url, keywords, fallback=None): @@ -186,6 +188,10 @@ class DownloadJob(Job): "Failed to download %s", self.pathfmt.filename) return + # run post processors + for pp in self.postprocessors: + pp.run(self.pathfmt) + # download succeeded if self.archive: self.archive.add(keywords) @@ -201,10 +207,26 @@ class DownloadJob(Job): if not self.pathfmt: self.pathfmt = util.PathFormat(self.extractor) self.sleep = self.extractor.config("sleep") + postprocs = self.extractor.config("postprocessor") archive = self.extractor.config("archive") + + if postprocs: + for pp_dict in postprocs: + try: + name = pp_dict["name"] + pp_obj = postprocessor.find(name)(pp_dict) + except KeyError as exc: + postprocessor.log.warning("missing key %s", exc) + except Exception as exc: + postprocessor.log.warning( + "%s %s", exc.__class__.__name__, exc) + else: + self.postprocessors.append(pp_obj) + if archive: path = util.expand_path(archive) self.archive = util.DownloadArchive(path, self.extractor) + self.pathfmt.set_directory(keywords) def handle_queue(self, url, keywords): diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py new file mode 100644 index 00000000..f01fca1b --- /dev/null +++ b/gallery_dl/postprocessor/__init__.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import importlib +import logging + +log = logging.getLogger("postprocessor") + + +def find(name): + """Return a postprocessor class with the given name""" + try: + return _cache[name] + except KeyError: + try: + module = importlib.import_module("."+name, __package__) + cls = module.__postprocessor__ + _cache[name] = cls + return cls + except (ImportError, AttributeError): + return None + + +# -------------------------------------------------------------------- +# internals + +_cache = {} diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py new file mode 100644 index 00000000..2344611c --- /dev/null +++ b/gallery_dl/postprocessor/common.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Common classes and constants used by postprocessor modules.""" + + +class PostProcessor(): + + def run(self, asd): + raise NotImplementedError() From d378c0a32345a49d02af9a08a15f8449d5231a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 22 May 2018 15:00:31 +0200 Subject: [PATCH 02/11] [postprocessor] add 'exec' to execute user-defined processes --- gallery_dl/postprocessor/exec.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 gallery_dl/postprocessor/exec.py diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py new file mode 100644 index 00000000..dcec5c03 --- /dev/null +++ b/gallery_dl/postprocessor/exec.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Execute processes""" + +from .common import PostProcessor +import subprocess + + +class ExecPP(PostProcessor): + + def __init__(self, options): + PostProcessor.__init__(self) + self.args = options["args"] + + def run(self, pathfmt): + args = [ + arg.format_map(pathfmt.keywords) + for arg in self.args + ] + subprocess.Popen(args) + + +__postprocessor__ = ExecPP From ca4008e1c1b7e958a87935c39f666a78ebdb8165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 22 May 2018 16:21:17 +0200 Subject: [PATCH 03/11] [postprocessor] add 'classify' to sort downloads by fileext --- gallery_dl/postprocessor/classify.py | 51 ++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 gallery_dl/postprocessor/classify.py diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py new file mode 100644 index 00000000..f3a09c5a --- /dev/null +++ b/gallery_dl/postprocessor/classify.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Categorize files by media type""" + +from .common import PostProcessor +import os + + +class ClassifyPP(PostProcessor): + + DEFAULT_MAP = { + "Music" : ("mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"), + "Video" : ("flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", + "webm", "vob", "wmv"), + "Pictures" : ("jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"), + "Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"), + } + + def __init__(self, options): + PostProcessor.__init__(self) + mapping = options.get("map", self.DEFAULT_MAP) + + self.map = { + ext: directory + for directory, exts in mapping.items() + for ext in exts + } + + def run(self, pathfmt): + ext = pathfmt.keywords["extension"] + + if ext in self.map: + directory = self.map[ext] + path = os.path.join(pathfmt.realdirectory, directory) + try: + os.mkdir(path) + except FileExistsError: + pass + os.replace( + pathfmt.realpath, + os.path.join(path, pathfmt.filename) + ) + + +__postprocessor__ = ClassifyPP From d0ae3ed52c9d3403d5c30b4f89abfc4a45099f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 22 May 2018 16:54:17 +0200 Subject: [PATCH 04/11] [postprocessor] add 'zip' to write files to a ZIP archive (#85) --- gallery_dl/postprocessor/zip.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 gallery_dl/postprocessor/zip.py diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py new file mode 100644 index 00000000..bd39bdab --- /dev/null +++ b/gallery_dl/postprocessor/zip.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Add files to ZIP archive""" + +from .common import PostProcessor +import zipfile + + +class ZipPP(PostProcessor): + + def __init__(self, options): + PostProcessor.__init__(self) + + def run(self, pathfmt): + with zipfile.ZipFile(pathfmt.realdirectory + ".zip", "a") as zfile: + zfile.write(pathfmt.realpath, pathfmt.filename) + + +__postprocessor__ = ZipPP From 821535b458f6e7752e94715faf5b291e397f6f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 6 Jun 2018 20:17:17 +0200 Subject: [PATCH 05/11] adjust PathFormat class --- gallery_dl/downloader/common.py | 7 ++---- gallery_dl/job.py | 2 ++ gallery_dl/util.py | 44 ++++++++++++++++----------------- 3 files changed, 25 insertions(+), 28 deletions(-) diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py index 1c0d5b18..dcfff3b4 100644 --- a/gallery_dl/downloader/common.py +++ b/gallery_dl/downloader/common.py @@ -47,7 +47,7 @@ class DownloaderBase(): # remove file from incomplete downloads if self.downloading and not self.part: try: - os.remove(pathfmt.realpath) + os.remove(pathfmt.temppath) except (OSError, AttributeError): pass @@ -127,10 +127,7 @@ class DownloaderBase(): self.downloading = False if adj_ext: - pathfmt.adjust_extension(adj_ext) - if self.part: - pathfmt.part_move() - self.out.success(pathfmt.path, tries) + pathfmt.set_extension(adj_ext) return True def connect(self, url, offset): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 01771e60..a4f077e2 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -193,6 +193,8 @@ class DownloadJob(Job): pp.run(self.pathfmt) # download succeeded + self.pathfmt.finalize() + self.out.success(self.pathfmt.path, 0) if self.archive: self.archive.add(keywords) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index d488d039..78b8f88c 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -363,7 +363,7 @@ class PathFormat(): self.keywords = {} self.filename = "" self.directory = self.realdirectory = "" - self.path = self.realpath = self.partpath = "" + self.path = self.realpath = self.temppath = "" self.basedirectory = expand_path( extractor.config("base-directory", (".", "gallery-dl"))) @@ -383,7 +383,7 @@ class PathFormat(): def open(self, mode="wb"): """Open file and return a corresponding file object""" - return open(self.partpath or self.realpath, mode) + return open(self.temppath, mode) def exists(self, archive=None): """Return True if the file exists on disk or in 'archive'""" @@ -435,13 +435,6 @@ class PathFormat(): self.keywords["extension"] = extension self.build_path() - def adjust_extension(self, extension): - """Change filename extension of existing file""" - oldpath = self.realpath - self.set_extension(extension) - if not self.partpath: - os.replace(oldpath, self.realpath) - def build_path(self): """Use filename-keywords and directory to build a full path""" try: @@ -453,38 +446,43 @@ class PathFormat(): filename = os.sep + self.filename self.path = self.directory + filename self.realpath = self.realdirectory + filename + if not self.temppath: + self.temppath = self.realpath def part_enable(self, part_directory=None): """Enable .part file usage""" if self.has_extension: - self.partpath = self.realpath + ".part" + self.temppath = self.realpath + ".part" else: self.set_extension("part", False) - self.partpath = self.realpath + self.temppath = self.realpath if part_directory: - self.partpath = os.path.join( + self.temppath = os.path.join( part_directory, - os.path.basename(self.partpath), + os.path.basename(self.temppath), ) def part_size(self): """Return size of .part file""" - if self.partpath: - try: - return os.stat(self.partpath).st_size - except OSError: - pass + try: + return os.stat(self.temppath).st_size + except OSError: + pass return 0 - def part_move(self): - """Rename .part file to its actual filename""" + def finalize(self): + """Move tempfile to its target location""" + if self.temppath == self.realpath: + return + try: - os.replace(self.partpath, self.realpath) + os.replace(self.temppath, self.realpath) return except OSError: pass - shutil.copyfile(self.partpath, self.realpath) - os.unlink(self.partpath) + + shutil.copyfile(self.temppath, self.realpath) + os.unlink(self.temppath) @staticmethod def adjust_path(path): From 97189e50cd96b8242dacf556fe9b9fd8f13e4cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 6 Jun 2018 20:49:52 +0200 Subject: [PATCH 06/11] [pp:zip] use temppath; add options --- gallery_dl/postprocessor/zip.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index bd39bdab..cf7bbf1b 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Add files to ZIP archive""" +"""Add files to ZIP archives""" from .common import PostProcessor import zipfile @@ -14,12 +14,26 @@ import zipfile class ZipPP(PostProcessor): + COMPRESSION_ALGORITHMS = { + "store": zipfile.ZIP_STORED, + "zip": zipfile.ZIP_DEFLATED, + "bzip2": zipfile.ZIP_BZIP2, + "lzma": zipfile.ZIP_LZMA, + } + def __init__(self, options): PostProcessor.__init__(self) + self.ext = "." + options.get("extension", "zip") + + algorithm = options.get("compression", "store") + if algorithm not in self.COMPRESSION_ALGORITHMS: + algorithm = "store" + self.compression = self.COMPRESSION_ALGORITHMS[algorithm] def run(self, pathfmt): - with zipfile.ZipFile(pathfmt.realdirectory + ".zip", "a") as zfile: - zfile.write(pathfmt.realpath, pathfmt.filename) + archive = pathfmt.realdirectory + self.ext + with zipfile.ZipFile(archive, "a", self.compression, True) as zfile: + zfile.write(pathfmt.temppath, pathfmt.filename) __postprocessor__ = ZipPP From 37d97ff02c29d249a81ac91c6033ac39bdc3eabc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 6 Jun 2018 21:10:28 +0200 Subject: [PATCH 07/11] [pp:classify] use temppath --- gallery_dl/postprocessor/classify.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py index f3a09c5a..a43b2b98 100644 --- a/gallery_dl/postprocessor/classify.py +++ b/gallery_dl/postprocessor/classify.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Categorize files by media type""" +"""Categorize files by file extension""" from .common import PostProcessor import os @@ -24,9 +24,9 @@ class ClassifyPP(PostProcessor): def __init__(self, options): PostProcessor.__init__(self) - mapping = options.get("map", self.DEFAULT_MAP) + mapping = options.get("mapping", self.DEFAULT_MAP) - self.map = { + self.mapping = { ext: directory for directory, exts in mapping.items() for ext in exts @@ -35,17 +35,10 @@ class ClassifyPP(PostProcessor): def run(self, pathfmt): ext = pathfmt.keywords["extension"] - if ext in self.map: - directory = self.map[ext] - path = os.path.join(pathfmt.realdirectory, directory) - try: - os.mkdir(path) - except FileExistsError: - pass - os.replace( - pathfmt.realpath, - os.path.join(path, pathfmt.filename) - ) + if ext in self.mapping: + path = pathfmt.realdirectory + os.sep + self.mapping[ext] + pathfmt.realpath = path + os.sep + pathfmt.filename + os.makedirs(path, exist_ok=True) __postprocessor__ = ClassifyPP From b344f2290f1a003cefce702407b992b9df3b8254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 7 Jun 2018 22:27:36 +0200 Subject: [PATCH 08/11] fix downloader tests --- test/test_downloader.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test_downloader.py b/test/test_downloader.py index 19cecb8b..21b0920f 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -54,9 +54,8 @@ class TestDownloaderBase(unittest.TestCase): pathfmt.set_keywords(kwdict) if content: - path = pathfmt.realpath + (".part" if part else "") mode = "w" + ("b" if isinstance(content, bytes) else "") - with open(path, mode) as file: + with pathfmt.open(mode) as file: file.write(content) return pathfmt @@ -65,20 +64,23 @@ class TestDownloaderBase(unittest.TestCase): extension, expected_extension=None): pathfmt = self._prepare_destination(input, extension=extension) success = self.downloader.download(url, pathfmt) - path = pathfmt.realpath # test successful download self.assertTrue(success, "downloading '{}' failed".format(url)) # test content mode = "r" + ("b" if isinstance(output, bytes) else "") - with open(path, mode) as file: + with pathfmt.open(mode) as file: content = file.read() self.assertEqual(content, output) # test filename extension self.assertEqual( - os.path.splitext(path)[1][1:], + pathfmt.keywords["extension"], + expected_extension, + ) + self.assertEqual( + os.path.splitext(pathfmt.realpath)[1][1:], expected_extension, ) From 7646bdbcfd18db73fb05312a07d08692a04f7313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 7 Jun 2018 22:29:54 +0200 Subject: [PATCH 09/11] improve postprocessor initialization code --- gallery_dl/job.py | 59 ++++++++++++++++++++++++++++------------------ gallery_dl/util.py | 2 +- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index a4f077e2..f3f8ca74 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -159,7 +159,7 @@ class DownloadJob(Job): self.archive = None self.sleep = None self.downloaders = {} - self.postprocessors = [] + self.postprocessors = None self.out = output.select() def handle_url(self, url, keywords, fallback=None): @@ -189,8 +189,9 @@ class DownloadJob(Job): return # run post processors - for pp in self.postprocessors: - pp.run(self.pathfmt) + if self.postprocessors: + for pp in self.postprocessors: + pp.run(self.pathfmt) # download succeeded self.pathfmt.finalize() @@ -209,26 +210,8 @@ class DownloadJob(Job): if not self.pathfmt: self.pathfmt = util.PathFormat(self.extractor) self.sleep = self.extractor.config("sleep") - postprocs = self.extractor.config("postprocessor") - archive = self.extractor.config("archive") - - if postprocs: - for pp_dict in postprocs: - try: - name = pp_dict["name"] - pp_obj = postprocessor.find(name)(pp_dict) - except KeyError as exc: - postprocessor.log.warning("missing key %s", exc) - except Exception as exc: - postprocessor.log.warning( - "%s %s", exc.__class__.__name__, exc) - else: - self.postprocessors.append(pp_obj) - - if archive: - path = util.expand_path(archive) - self.archive = util.DownloadArchive(path, self.extractor) - + self._init_archive(self.extractor.config("archive")) + self._init_postprocessors(self.extractor.config("postprocessor")) self.pathfmt.set_directory(keywords) def handle_queue(self, url, keywords): @@ -250,6 +233,36 @@ class DownloadJob(Job): self.downloaders[scheme] = instance return instance + def _init_archive(self, archive): + if archive: + path = util.expand_path(archive) + self.archive = util.DownloadArchive(path, self.extractor) + + def _init_postprocessors(self, postprocessors): + if not postprocessors: + return + + self.postprocessors = [] + for pp_dict in postprocessors: + if "name" not in pp_dict: + postprocessor.log.warning("no 'name' specified") + continue + + name = pp_dict["name"] + pp_cls = postprocessor.find(name) + if not pp_cls: + postprocessor.log.warning("'%s' not found", name) + continue + + try: + pp_obj = pp_cls(pp_dict) + except Exception as exc: + postprocessor.log.error( + "%s: initialization failed: %s %s", + name, exc.__class__.__name__, exc) + else: + self.postprocessors.append(pp_obj) + class KeywordJob(Job): """Print available keywords""" diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 78b8f88c..131b8730 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -425,6 +425,7 @@ class PathFormat(): def set_keywords(self, keywords): """Set filename keywords""" self.keywords = keywords + self.temppath = "" self.has_extension = bool(keywords.get("extension")) if self.has_extension: self.build_path() @@ -455,7 +456,6 @@ class PathFormat(): self.temppath = self.realpath + ".part" else: self.set_extension("part", False) - self.temppath = self.realpath if part_directory: self.temppath = os.path.join( part_directory, From 2628911ba0bed7e60b0313ff881bd797cf3164c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 7 Jun 2018 22:40:59 +0200 Subject: [PATCH 10/11] [pp:exec] add 'async' option --- gallery_dl/postprocessor/common.py | 5 ++++- gallery_dl/postprocessor/exec.py | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 2344611c..2873703e 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -8,8 +8,11 @@ """Common classes and constants used by postprocessor modules.""" +from . import log + class PostProcessor(): + log = log - def run(self, asd): + def run(self, pathfmt): raise NotImplementedError() diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index dcec5c03..a5a9f7bd 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -16,14 +16,22 @@ class ExecPP(PostProcessor): def __init__(self, options): PostProcessor.__init__(self) - self.args = options["args"] + self.args = options["command"] + if options.get("async", False): + self._exec = subprocess.Popen def run(self, pathfmt): - args = [ + self._exec([ arg.format_map(pathfmt.keywords) for arg in self.args - ] - subprocess.Popen(args) + ]) + + def _exec(self, args): + retcode = subprocess.Popen(args).wait() + if retcode: + self.log.warning( + "executing '%s' returned non-zero exit status %d", + " ".join(args), retcode) __postprocessor__ = ExecPP From baccf8a9588fd6b2a4149c76c7501bf7c9029e3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 8 Jun 2018 17:39:02 +0200 Subject: [PATCH 11/11] improve postprocessor handling - add pathfmt argument for __init__() - add finalization step - add option to keep or delete zipped files --- gallery_dl/job.py | 77 +++++++++++++++------------- gallery_dl/postprocessor/classify.py | 6 +-- gallery_dl/postprocessor/common.py | 6 ++- gallery_dl/postprocessor/exec.py | 2 +- gallery_dl/postprocessor/zip.py | 26 +++++++--- gallery_dl/util.py | 5 ++ 6 files changed, 75 insertions(+), 47 deletions(-) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index f3f8ca74..219e7302 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -93,6 +93,7 @@ class Job(): "https://github.com/mikf/gallery-dl/issues ."), exc.__class__.__name__, exc) log.debug("Traceback", exc_info=True) + self.handle_finalize() def dispatch(self, msg): """Call the appropriate message handler""" @@ -137,6 +138,9 @@ class Job(): def handle_queue(self, url, keywords): """Handle Message.Queue""" + def handle_finalize(self): + """Handle job finalization""" + def update_kwdict(self, kwdict): """Update 'kwdict' with additional metadata""" kwdict["category"] = self.extractor.category @@ -207,12 +211,40 @@ class DownloadJob(Job): def handle_directory(self, keywords): """Set and create the target directory for downloads""" - if not self.pathfmt: - self.pathfmt = util.PathFormat(self.extractor) - self.sleep = self.extractor.config("sleep") - self._init_archive(self.extractor.config("archive")) - self._init_postprocessors(self.extractor.config("postprocessor")) + if self.pathfmt: + self.pathfmt.set_directory(keywords) + return + + # delayed initialization + self.pathfmt = util.PathFormat(self.extractor) self.pathfmt.set_directory(keywords) + self.sleep = self.extractor.config("sleep") + + archive = self.extractor.config("archive") + if archive: + path = util.expand_path(archive) + self.archive = util.DownloadArchive(path, self.extractor) + + postprocessors = self.extractor.config("postprocessors") + if postprocessors: + self.postprocessors = [] + for pp_dict in postprocessors: + if "name" not in pp_dict: + postprocessor.log.warning("no 'name' specified") + continue + name = pp_dict["name"] + pp_cls = postprocessor.find(name) + if not pp_cls: + postprocessor.log.warning("'%s' not found", name) + continue + try: + pp_obj = pp_cls(self.pathfmt, pp_dict) + except Exception as exc: + postprocessor.log.error( + "%s: initialization failed: %s %s", + name, exc.__class__.__name__, exc) + else: + self.postprocessors.append(pp_obj) def handle_queue(self, url, keywords): try: @@ -220,6 +252,11 @@ class DownloadJob(Job): except exception.NoExtractorError: self._write_unsupported(url) + def handle_finalize(self): + if self.postprocessors: + for pp in self.postprocessors: + pp.finalize() + def get_downloader(self, url): """Return, and possibly construct, a downloader suitable for 'url'""" pos = url.find(":") @@ -233,36 +270,6 @@ class DownloadJob(Job): self.downloaders[scheme] = instance return instance - def _init_archive(self, archive): - if archive: - path = util.expand_path(archive) - self.archive = util.DownloadArchive(path, self.extractor) - - def _init_postprocessors(self, postprocessors): - if not postprocessors: - return - - self.postprocessors = [] - for pp_dict in postprocessors: - if "name" not in pp_dict: - postprocessor.log.warning("no 'name' specified") - continue - - name = pp_dict["name"] - pp_cls = postprocessor.find(name) - if not pp_cls: - postprocessor.log.warning("'%s' not found", name) - continue - - try: - pp_obj = pp_cls(pp_dict) - except Exception as exc: - postprocessor.log.error( - "%s: initialization failed: %s %s", - name, exc.__class__.__name__, exc) - else: - self.postprocessors.append(pp_obj) - class KeywordJob(Job): """Print available keywords""" diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py index a43b2b98..3af73f07 100644 --- a/gallery_dl/postprocessor/classify.py +++ b/gallery_dl/postprocessor/classify.py @@ -14,7 +14,7 @@ import os class ClassifyPP(PostProcessor): - DEFAULT_MAP = { + DEFAULT_MAPPING = { "Music" : ("mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"), "Video" : ("flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", "webm", "vob", "wmv"), @@ -22,9 +22,9 @@ class ClassifyPP(PostProcessor): "Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"), } - def __init__(self, options): + def __init__(self, pathfmt, options): PostProcessor.__init__(self) - mapping = options.get("mapping", self.DEFAULT_MAP) + mapping = options.get("mapping", self.DEFAULT_MAPPING) self.mapping = { ext: directory diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 2873703e..af1c1ef5 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -12,7 +12,11 @@ from . import log class PostProcessor(): + """Base class for postprocessors""" log = log def run(self, pathfmt): - raise NotImplementedError() + """Execute the postprocessor for a file""" + + def finalize(self): + """Cleanup""" diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index a5a9f7bd..7d190402 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -14,7 +14,7 @@ import subprocess class ExecPP(PostProcessor): - def __init__(self, options): + def __init__(self, pathfmt, options): PostProcessor.__init__(self) self.args = options["command"] if options.get("async", False): diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index cf7bbf1b..fc5636f3 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Add files to ZIP archives""" +"""Store files in ZIP archives""" from .common import PostProcessor import zipfile @@ -21,19 +21,31 @@ class ZipPP(PostProcessor): "lzma": zipfile.ZIP_LZMA, } - def __init__(self, options): + def __init__(self, pathfmt, options): PostProcessor.__init__(self) + self.delete = not options.get("keep-files", False) self.ext = "." + options.get("extension", "zip") - algorithm = options.get("compression", "store") if algorithm not in self.COMPRESSION_ALGORITHMS: + self.log.warning( + "unknown compression algorithm '%s'; falling back to 'store'", + algorithm) algorithm = "store" - self.compression = self.COMPRESSION_ALGORITHMS[algorithm] + + path = pathfmt.realdirectory + self.ext + self.zfile = zipfile.ZipFile( + path, "a", self.COMPRESSION_ALGORITHMS[algorithm], True) def run(self, pathfmt): - archive = pathfmt.realdirectory + self.ext - with zipfile.ZipFile(archive, "a", self.compression, True) as zfile: - zfile.write(pathfmt.temppath, pathfmt.filename) + # 'NameToInfo' is not officially documented, but it's available + # for all supported Python versions and using it directly is a lot + # better than calling getinfo() + if pathfmt.filename not in self.zfile.NameToInfo: + self.zfile.write(pathfmt.temppath, pathfmt.filename) + pathfmt.delete = self.delete + + def finalize(self): + self.zfile.close() __postprocessor__ = ZipPP diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 131b8730..8ed3e225 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -359,6 +359,7 @@ class PathFormat(): "directory", extractor.directory_fmt) self.formatter = Formatter(extractor.config("keywords-default")) + self.delete = False self.has_extension = False self.keywords = {} self.filename = "" @@ -472,6 +473,10 @@ class PathFormat(): def finalize(self): """Move tempfile to its target location""" + if self.delete: + os.unlink(self.temppath) + return + if self.temppath == self.realpath: return