[postprocessor:metadata] add 'sort' and 'separators' options
This commit is contained in:
@@ -4240,6 +4240,20 @@ Description
|
|||||||
Note: Only applies for ``"mode": "custom"``.
|
Note: Only applies for ``"mode": "custom"``.
|
||||||
|
|
||||||
|
|
||||||
|
metadata.ascii
|
||||||
|
--------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Escape all non-ASCII characters.
|
||||||
|
|
||||||
|
See the ``ensure_ascii`` argument of |json.dump()|_ for further details.
|
||||||
|
|
||||||
|
Note: Only applies for ``"mode": "json"`` and ``"jsonl"``.
|
||||||
|
|
||||||
|
|
||||||
metadata.indent
|
metadata.indent
|
||||||
---------------
|
---------------
|
||||||
Type
|
Type
|
||||||
@@ -4255,6 +4269,35 @@ Description
|
|||||||
Note: Only applies for ``"mode": "json"``.
|
Note: Only applies for ``"mode": "json"``.
|
||||||
|
|
||||||
|
|
||||||
|
metadata.separators
|
||||||
|
-------------------
|
||||||
|
Type
|
||||||
|
``list`` with two ``string`` elements
|
||||||
|
Default
|
||||||
|
``[", ", ": "]``
|
||||||
|
Description
|
||||||
|
``<item separator>`` - ``<key separator>`` pair
|
||||||
|
to separate JSON keys and values with.
|
||||||
|
|
||||||
|
See the ``separators`` argument of |json.dump()|_ for further details.
|
||||||
|
|
||||||
|
Note: Only applies for ``"mode": "json"`` and ``"jsonl"``.
|
||||||
|
|
||||||
|
|
||||||
|
metadata.sort
|
||||||
|
-------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Sort output by `key`.
|
||||||
|
|
||||||
|
See the ``sort_keys`` argument of |json.dump()|_ for further details.
|
||||||
|
|
||||||
|
Note: Only applies for ``"mode": "json"`` and ``"jsonl"``.
|
||||||
|
|
||||||
|
|
||||||
metadata.open
|
metadata.open
|
||||||
-------------
|
-------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -47,20 +47,12 @@ class MetadataPP(PostProcessor):
|
|||||||
ext = "txt"
|
ext = "txt"
|
||||||
elif mode == "jsonl":
|
elif mode == "jsonl":
|
||||||
self.write = self._write_json
|
self.write = self._write_json
|
||||||
self._json_encode = json.JSONEncoder(
|
self._json_encode = self._make_encoder(options).encode
|
||||||
ensure_ascii=options.get("ascii", False),
|
|
||||||
sort_keys=True, indent=None, default=str,
|
|
||||||
).encode
|
|
||||||
omode = "a"
|
omode = "a"
|
||||||
filename = "data.jsonl"
|
filename = "data.jsonl"
|
||||||
else:
|
else:
|
||||||
self.write = self._write_json
|
self.write = self._write_json
|
||||||
self._json_encode = json.JSONEncoder(
|
self._json_encode = self._make_encoder(options, 4).encode
|
||||||
ensure_ascii=options.get("ascii", False),
|
|
||||||
indent=options.get("indent", 4),
|
|
||||||
sort_keys=True,
|
|
||||||
default=str,
|
|
||||||
).encode
|
|
||||||
ext = "json"
|
ext = "json"
|
||||||
|
|
||||||
directory = options.get("directory")
|
directory = options.get("directory")
|
||||||
@@ -200,5 +192,15 @@ class MetadataPP(PostProcessor):
|
|||||||
kwdict = util.filter_dict(kwdict)
|
kwdict = util.filter_dict(kwdict)
|
||||||
fp.write(self._json_encode(kwdict) + "\n")
|
fp.write(self._json_encode(kwdict) + "\n")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_encoder(options, indent=None):
|
||||||
|
return json.JSONEncoder(
|
||||||
|
ensure_ascii=options.get("ascii", False),
|
||||||
|
sort_keys=options.get("sort", False),
|
||||||
|
separators=options.get("separators"),
|
||||||
|
indent=options.get("indent", indent),
|
||||||
|
check_circular=False, default=str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
__postprocessor__ = MetadataPP
|
__postprocessor__ = MetadataPP
|
||||||
|
|||||||
@@ -176,13 +176,11 @@ class MetadataTest(BasePostprocessorTest):
|
|||||||
|
|
||||||
def test_metadata_json(self):
|
def test_metadata_json(self):
|
||||||
pp = self._create({
|
pp = self._create({
|
||||||
"mode" : "json",
|
"mode" : "json",
|
||||||
"ascii" : True,
|
"extension" : "JSON",
|
||||||
"indent" : 2,
|
|
||||||
"extension": "JSON",
|
|
||||||
}, {
|
}, {
|
||||||
"public" : "hello ワールド",
|
"public" : "hello ワールド",
|
||||||
"_private" : "foo バール",
|
"_private" : "foo バー",
|
||||||
})
|
})
|
||||||
|
|
||||||
self.assertEqual(pp.write , pp._write_json)
|
self.assertEqual(pp.write , pp._write_json)
|
||||||
@@ -194,26 +192,31 @@ class MetadataTest(BasePostprocessorTest):
|
|||||||
|
|
||||||
path = self.pathfmt.realpath + ".JSON"
|
path = self.pathfmt.realpath + ".JSON"
|
||||||
m.assert_called_once_with(path, "w", encoding="utf-8")
|
m.assert_called_once_with(path, "w", encoding="utf-8")
|
||||||
self.assertEqual(self._output(m), r"""{
|
|
||||||
"category": "test",
|
if sys.hexversion >= 0x3060000:
|
||||||
"extension": "ext",
|
# python 3.4 & 3.5 have random order without 'sort: True'
|
||||||
"filename": "file",
|
self.assertEqual(self._output(m), """{
|
||||||
"public": "hello \u30ef\u30fc\u30eb\u30c9"
|
"category": "test",
|
||||||
|
"filename": "file",
|
||||||
|
"extension": "ext",
|
||||||
|
"public": "hello ワールド"
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def test_metadata_json_options(self):
|
def test_metadata_json_options(self):
|
||||||
pp = self._create({
|
pp = self._create({
|
||||||
"mode" : "json",
|
"mode" : "json",
|
||||||
"ascii" : False,
|
"ascii" : True,
|
||||||
"private" : True,
|
"sort" : True,
|
||||||
"indent" : None,
|
"separators": [",", " : "],
|
||||||
"open" : "a",
|
"private" : True,
|
||||||
"encoding" : "UTF-8",
|
"indent" : None,
|
||||||
"extension": "JSON",
|
"open" : "a",
|
||||||
|
"encoding" : "UTF-8",
|
||||||
|
"extension" : "JSON",
|
||||||
}, {
|
}, {
|
||||||
"public" : "hello ワールド",
|
"public" : "hello ワールド",
|
||||||
"_private" : "foo バール",
|
"_private" : "foo バー",
|
||||||
})
|
})
|
||||||
|
|
||||||
self.assertEqual(pp.write , pp._write_json)
|
self.assertEqual(pp.write , pp._write_json)
|
||||||
@@ -226,11 +229,11 @@ class MetadataTest(BasePostprocessorTest):
|
|||||||
path = self.pathfmt.realpath + ".JSON"
|
path = self.pathfmt.realpath + ".JSON"
|
||||||
m.assert_called_once_with(path, "a", encoding="UTF-8")
|
m.assert_called_once_with(path, "a", encoding="UTF-8")
|
||||||
self.assertEqual(self._output(m), """{\
|
self.assertEqual(self._output(m), """{\
|
||||||
"_private": "foo バール", \
|
"_private" : "foo \\u30d0\\u30fc",\
|
||||||
"category": "test", \
|
"category" : "test",\
|
||||||
"extension": "ext", \
|
"extension" : "ext",\
|
||||||
"filename": "file", \
|
"filename" : "file",\
|
||||||
"public": "hello ワールド"}
|
"public" : "hello \\u30ef\\u30fc\\u30eb\\u30c9"}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def test_metadata_tags(self):
|
def test_metadata_tags(self):
|
||||||
@@ -363,7 +366,7 @@ class MetadataTest(BasePostprocessorTest):
|
|||||||
m.assert_called_once_with(path, "w", encoding="utf-8")
|
m.assert_called_once_with(path, "w", encoding="utf-8")
|
||||||
|
|
||||||
def test_metadata_stdout(self):
|
def test_metadata_stdout(self):
|
||||||
self._create({"filename": "-", "indent": None})
|
self._create({"filename": "-", "indent": None, "sort": True})
|
||||||
|
|
||||||
with patch("sys.stdout", Mock()) as m:
|
with patch("sys.stdout", Mock()) as m:
|
||||||
self._trigger()
|
self._trigger()
|
||||||
|
|||||||
Reference in New Issue
Block a user