implement a post-processor module to compare file versions

(#530)
This commit is contained in:
Mike Fährmann
2020-01-18 21:08:25 +01:00
parent 32d7195d08
commit 254f7c3999
3 changed files with 91 additions and 1 deletions

View File

@@ -1421,6 +1421,33 @@ Description A mapping from directory names to filename extensions that should
=========== =====
compare
-------
| Compare versions of the same file and replace/enumerate them on mismatch
| (requires `downloader.*.part`_ = ``true`` and `extractor.*.skip`_ = `false`)
compare.action
--------------
=========== =====
Type ``string``
Default ``"replace"``
Description The action to take when files do not compare as equal.
* ``"replace"``: Replace/Overwrite the old version with the new one
* ``"enumerate"``: Add an enumeration index to the filename of the new
version like `skip = "enumerate" <extractor.*.skip_>`__
=========== =====
compare.shallow
---------------
=========== =====
Type ``bool``
Default ``false``
Description Only compare file sizes. Do not read and compare their content.
=========== =====
exec
----

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2019 Mike Fährmann
# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,6 +13,7 @@ import logging
modules = [
"classify",
"compare",
"exec",
"metadata",
"mtime",

View File

@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Compare versions of the same file and replace/enumerate them on mismatch"""
from .common import PostProcessor
import os
class ComparePP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
if options.get("action") == "enumerate":
self.run = self._run_enumerate
if options.get("shallow"):
self.compare = self._compare_size
def run(self, pathfmt):
try:
if self.compare(pathfmt.realpath, pathfmt.temppath):
pathfmt.delete = True
except OSError:
pass
def _run_enumerate(self, pathfmt):
num = 1
try:
while not self.compare(pathfmt.realpath, pathfmt.temppath):
pathfmt.prefix = str(num) + "."
pathfmt.set_extension(pathfmt.extension, False)
num += 1
pathfmt.delete = True
except OSError:
pass
def compare(self, f1, f2):
return self._compare_size(f1, f2) and self._compare_content(f1, f2)
@staticmethod
def _compare_size(f1, f2):
return os.stat(f1).st_size == os.stat(f2).st_size
@staticmethod
def _compare_content(f1, f2):
size = 16384
with open(f1, "rb") as fp1, open(f2, "rb") as fp2:
while True:
buf1 = fp1.read(size)
buf2 = fp2.read(size)
if buf1 != buf2:
return False
if not buf1:
return True
__postprocessor__ = ComparePP