[postprocessor:metadata] add 'sort' and 'separators' options

This commit is contained in:
Mike Fährmann
2023-02-07 18:28:14 +01:00
parent 8805bd38ab
commit b7337d810e
3 changed files with 84 additions and 36 deletions

View File

@@ -4240,6 +4240,20 @@ Description
Note: Only applies for ``"mode": "custom"``. Note: Only applies for ``"mode": "custom"``.
metadata.ascii
--------------
Type
``bool``
Default
``false``
Description
Escape all non-ASCII characters.
See the ``ensure_ascii`` argument of |json.dump()|_ for further details.
Note: Only applies for ``"mode": "json"`` and ``"jsonl"``.
metadata.indent metadata.indent
--------------- ---------------
Type Type
@@ -4255,6 +4269,35 @@ Description
Note: Only applies for ``"mode": "json"``. Note: Only applies for ``"mode": "json"``.
metadata.separators
-------------------
Type
``list`` with two ``string`` elements
Default
``[", ", ": "]``
Description
``<item separator>`` - ``<key separator>`` pair
to separate JSON keys and values with.
See the ``separators`` argument of |json.dump()|_ for further details.
Note: Only applies for ``"mode": "json"`` and ``"jsonl"``.
metadata.sort
-------------
Type
``bool``
Default
``false``
Description
Sort output by `key`.
See the ``sort_keys`` argument of |json.dump()|_ for further details.
Note: Only applies for ``"mode": "json"`` and ``"jsonl"``.
metadata.open metadata.open
------------- -------------
Type Type

View File

@@ -47,20 +47,12 @@ class MetadataPP(PostProcessor):
ext = "txt" ext = "txt"
elif mode == "jsonl": elif mode == "jsonl":
self.write = self._write_json self.write = self._write_json
self._json_encode = json.JSONEncoder( self._json_encode = self._make_encoder(options).encode
ensure_ascii=options.get("ascii", False),
sort_keys=True, indent=None, default=str,
).encode
omode = "a" omode = "a"
filename = "data.jsonl" filename = "data.jsonl"
else: else:
self.write = self._write_json self.write = self._write_json
self._json_encode = json.JSONEncoder( self._json_encode = self._make_encoder(options, 4).encode
ensure_ascii=options.get("ascii", False),
indent=options.get("indent", 4),
sort_keys=True,
default=str,
).encode
ext = "json" ext = "json"
directory = options.get("directory") directory = options.get("directory")
@@ -200,5 +192,15 @@ class MetadataPP(PostProcessor):
kwdict = util.filter_dict(kwdict) kwdict = util.filter_dict(kwdict)
fp.write(self._json_encode(kwdict) + "\n") fp.write(self._json_encode(kwdict) + "\n")
@staticmethod
def _make_encoder(options, indent=None):
return json.JSONEncoder(
ensure_ascii=options.get("ascii", False),
sort_keys=options.get("sort", False),
separators=options.get("separators"),
indent=options.get("indent", indent),
check_circular=False, default=str,
)
__postprocessor__ = MetadataPP __postprocessor__ = MetadataPP

View File

@@ -176,13 +176,11 @@ class MetadataTest(BasePostprocessorTest):
def test_metadata_json(self): def test_metadata_json(self):
pp = self._create({ pp = self._create({
"mode" : "json", "mode" : "json",
"ascii" : True, "extension" : "JSON",
"indent" : 2,
"extension": "JSON",
}, { }, {
"public" : "hello ワールド", "public" : "hello ワールド",
"_private" : "foo バー", "_private" : "foo バー",
}) })
self.assertEqual(pp.write , pp._write_json) self.assertEqual(pp.write , pp._write_json)
@@ -194,26 +192,31 @@ class MetadataTest(BasePostprocessorTest):
path = self.pathfmt.realpath + ".JSON" path = self.pathfmt.realpath + ".JSON"
m.assert_called_once_with(path, "w", encoding="utf-8") m.assert_called_once_with(path, "w", encoding="utf-8")
self.assertEqual(self._output(m), r"""{
"category": "test", if sys.hexversion >= 0x3060000:
"extension": "ext", # python 3.4 & 3.5 have random order without 'sort: True'
"filename": "file", self.assertEqual(self._output(m), """{
"public": "hello \u30ef\u30fc\u30eb\u30c9" "category": "test",
"filename": "file",
"extension": "ext",
"public": "hello ワールド"
} }
""") """)
def test_metadata_json_options(self): def test_metadata_json_options(self):
pp = self._create({ pp = self._create({
"mode" : "json", "mode" : "json",
"ascii" : False, "ascii" : True,
"private" : True, "sort" : True,
"indent" : None, "separators": [",", " : "],
"open" : "a", "private" : True,
"encoding" : "UTF-8", "indent" : None,
"extension": "JSON", "open" : "a",
"encoding" : "UTF-8",
"extension" : "JSON",
}, { }, {
"public" : "hello ワールド", "public" : "hello ワールド",
"_private" : "foo バー", "_private" : "foo バー",
}) })
self.assertEqual(pp.write , pp._write_json) self.assertEqual(pp.write , pp._write_json)
@@ -226,11 +229,11 @@ class MetadataTest(BasePostprocessorTest):
path = self.pathfmt.realpath + ".JSON" path = self.pathfmt.realpath + ".JSON"
m.assert_called_once_with(path, "a", encoding="UTF-8") m.assert_called_once_with(path, "a", encoding="UTF-8")
self.assertEqual(self._output(m), """{\ self.assertEqual(self._output(m), """{\
"_private": "foo バール", \ "_private" : "foo \\u30d0\\u30fc",\
"category": "test", \ "category" : "test",\
"extension": "ext", \ "extension" : "ext",\
"filename": "file", \ "filename" : "file",\
"public": "hello ワールド"} "public" : "hello \\u30ef\\u30fc\\u30eb\\u30c9"}
""") """)
def test_metadata_tags(self): def test_metadata_tags(self):
@@ -363,7 +366,7 @@ class MetadataTest(BasePostprocessorTest):
m.assert_called_once_with(path, "w", encoding="utf-8") m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_stdout(self): def test_metadata_stdout(self):
self._create({"filename": "-", "indent": None}) self._create({"filename": "-", "indent": None, "sort": True})
with patch("sys.stdout", Mock()) as m: with patch("sys.stdout", Mock()) as m:
self._trigger() self._trigger()