[postprocessor:metadata] speed up JSON encoding
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
|
||||
from .common import PostProcessor
|
||||
from .. import util, formatter
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
@@ -46,14 +47,20 @@ class MetadataPP(PostProcessor):
|
||||
ext = "txt"
|
||||
elif mode == "jsonl":
|
||||
self.write = self._write_json
|
||||
self.indent = None
|
||||
self.ascii = options.get("ascii", False)
|
||||
self._json_encode = json.JSONEncoder(
|
||||
ensure_ascii=options.get("ascii", False),
|
||||
sort_keys=True, indent=None, default=str,
|
||||
).encode
|
||||
omode = "a"
|
||||
filename = "data.jsonl"
|
||||
else:
|
||||
self.write = self._write_json
|
||||
self.indent = options.get("indent", 4)
|
||||
self.ascii = options.get("ascii", False)
|
||||
self._json_encode = json.JSONEncoder(
|
||||
ensure_ascii=options.get("ascii", False),
|
||||
indent=options.get("indent", 4),
|
||||
sort_keys=True,
|
||||
default=str,
|
||||
).encode
|
||||
ext = "json"
|
||||
|
||||
directory = options.get("directory")
|
||||
@@ -191,7 +198,7 @@ class MetadataPP(PostProcessor):
|
||||
def _write_json(self, fp, kwdict):
|
||||
if not self.private:
|
||||
kwdict = util.filter_dict(kwdict)
|
||||
util.dump_json(kwdict, fp, self.ascii, self.indent)
|
||||
fp.write(self._json_encode(kwdict) + "\n")
|
||||
|
||||
|
||||
__postprocessor__ = MetadataPP
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2022 Mike Fährmann
|
||||
# Copyright 2019-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -171,9 +171,8 @@ class MetadataTest(BasePostprocessorTest):
|
||||
|
||||
# default arguments
|
||||
self.assertEqual(pp.write , pp._write_json)
|
||||
self.assertEqual(pp.ascii , False)
|
||||
self.assertEqual(pp.indent , 4)
|
||||
self.assertEqual(pp.extension, "json")
|
||||
self.assertTrue(callable(pp._json_encode))
|
||||
|
||||
def test_metadata_json(self):
|
||||
pp = self._create({
|
||||
@@ -182,26 +181,56 @@ class MetadataTest(BasePostprocessorTest):
|
||||
"indent" : 2,
|
||||
"extension": "JSON",
|
||||
}, {
|
||||
"public" : "hello",
|
||||
"_private" : "world",
|
||||
"public" : "hello ワールド",
|
||||
"_private" : "foo バール",
|
||||
})
|
||||
|
||||
self.assertEqual(pp.write , pp._write_json)
|
||||
self.assertEqual(pp.ascii , True)
|
||||
self.assertEqual(pp.indent , 2)
|
||||
self.assertEqual(pp.extension, "JSON")
|
||||
self.assertTrue(callable(pp._json_encode))
|
||||
|
||||
with patch("builtins.open", mock_open()) as m:
|
||||
self._trigger()
|
||||
|
||||
path = self.pathfmt.realpath + ".JSON"
|
||||
m.assert_called_once_with(path, "w", encoding="utf-8")
|
||||
self.assertEqual(self._output(m), """{
|
||||
self.assertEqual(self._output(m), r"""{
|
||||
"category": "test",
|
||||
"extension": "ext",
|
||||
"filename": "file",
|
||||
"public": "hello"
|
||||
"public": "hello \u30ef\u30fc\u30eb\u30c9"
|
||||
}
|
||||
""")
|
||||
|
||||
def test_metadata_json_options(self):
|
||||
pp = self._create({
|
||||
"mode" : "json",
|
||||
"ascii" : False,
|
||||
"private" : True,
|
||||
"indent" : None,
|
||||
"open" : "a",
|
||||
"encoding" : "UTF-8",
|
||||
"extension": "JSON",
|
||||
}, {
|
||||
"public" : "hello ワールド",
|
||||
"_private" : "foo バール",
|
||||
})
|
||||
|
||||
self.assertEqual(pp.write , pp._write_json)
|
||||
self.assertEqual(pp.extension, "JSON")
|
||||
self.assertTrue(callable(pp._json_encode))
|
||||
|
||||
with patch("builtins.open", mock_open()) as m:
|
||||
self._trigger()
|
||||
|
||||
path = self.pathfmt.realpath + ".JSON"
|
||||
m.assert_called_once_with(path, "a", encoding="UTF-8")
|
||||
self.assertEqual(self._output(m), """{\
|
||||
"_private": "foo バール", \
|
||||
"category": "test", \
|
||||
"extension": "ext", \
|
||||
"filename": "file", \
|
||||
"public": "hello ワールド"}
|
||||
""")
|
||||
|
||||
def test_metadata_tags(self):
|
||||
|
||||
Reference in New Issue
Block a user