[formatter] add 'Xb' format specifier - 'X' for bytes (#6582)

https://github.com/mikf/gallery-dl/issues/6582#issuecomment-3479362186
This commit is contained in:
Mike Fährmann
2025-11-03 12:35:47 +01:00
parent 0ecc1b6ead
commit 91a5fd85db
3 changed files with 42 additions and 8 deletions

View File

@@ -303,6 +303,16 @@ Format specifiers can be used for advanced formatting by using the options provi
<td><code>{foo:X6/&nbsp;.../}</code></td> <td><code>{foo:X6/&nbsp;.../}</code></td>
<td><code>Fo&nbsp;...</code></td> <td><code>Fo&nbsp;...</code></td>
</tr> </tr>
<tr>
<td rowspan="2"><code>Xb&lt;maxlen&gt;/&lt;ext&gt;/</code></td>
<td rowspan="2">Same as <code>X</code>, but applies to the <a href="https://docs.python.org/3/library/stdtypes.html#bytes"><code>bytes()</code></a> representation of a string in <a href="https://docs.python.org/3/library/sys.html#sys.getfilesystemencoding">filesystem encoding</a></td>
<td><code>{foo_ja:Xb15/〜/}</code></td>
<td><code>フー・バー</code></td>
</tr>
<tr>
<td><code>{foo_ja:Xb8/〜/}</code></td>
<td><code>フ〜</code></td>
</tr>
<tr> <tr>
<td><code>J&lt;separator&gt;/</code></td> <td><code>J&lt;separator&gt;/</code></td>
<td>Concatenates elements of a list with <code>&lt;separator&gt;</code> using <a href="https://docs.python.org/3/library/stdtypes.html#str.join" rel="nofollow"><code>str.join()</code></a></td> <td>Concatenates elements of a list with <code>&lt;separator&gt;</code> using <a href="https://docs.python.org/3/library/stdtypes.html#str.join" rel="nofollow"><code>str.join()</code></a></td>

View File

@@ -330,10 +330,10 @@ def _slice(indices):
) )
def _bytesgetter(slice, encoding=sys.getfilesystemencoding()): def _bytesgetter(slice):
def apply_slice_bytes(obj): def apply_slice_bytes(obj):
return obj.encode(encoding)[slice].decode(encoding, "ignore") return obj.encode(_ENCODING)[slice].decode(_ENCODING, "ignore")
return apply_slice_bytes return apply_slice_bytes
@@ -512,14 +512,26 @@ def _parse_sort(format_spec, default):
def _parse_limit(format_spec, default): def _parse_limit(format_spec, default):
limit, hint, format_spec = format_spec.split(_SEPARATOR, 2) limit, hint, format_spec = format_spec.split(_SEPARATOR, 2)
limit = int(limit[1:])
limit_hint = limit - len(hint)
fmt = _build_format_func(format_spec, default) fmt = _build_format_func(format_spec, default)
def apply_limit(obj): if limit[1] == "b":
if len(obj) > limit: hint = hint.encode(_ENCODING)
obj = obj[:limit_hint] + hint limit = int(limit[2:])
return fmt(obj) limit_hint = limit - len(hint)
def apply_limit(obj):
objb = obj.encode(_ENCODING)
if len(objb) > limit:
obj = (objb[:limit_hint] + hint).decode(_ENCODING, "ignore")
return fmt(obj)
else:
limit = int(limit[1:])
limit_hint = limit - len(hint)
def apply_limit(obj):
if len(obj) > limit:
obj = obj[:limit_hint] + hint
return fmt(obj)
return apply_limit return apply_limit
@@ -540,6 +552,7 @@ class Literal():
_literal = Literal() _literal = Literal()
_CACHE = {} _CACHE = {}
_ENCODING = sys.getfilesystemencoding()
_SEPARATOR = "/" _SEPARATOR = "/"
_FORMATTERS = { _FORMATTERS = {
"E" : ExpressionFormatter, "E" : ExpressionFormatter,

View File

@@ -332,6 +332,17 @@ class TestFormatter(unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
self._run_test("{a:Xfoo/ */}", "hello wo *") self._run_test("{a:Xfoo/ */}", "hello wo *")
def test_specifier_limit_bytes(self):
self._run_test("{a:Xb20/ */}", "hElLo wOrLd")
self._run_test("{a:Xb10/ */}", "hElLo wO *")
self._run_test("{j:Xb50/〜/}", "げんそうきょう")
self._run_test("{j:Xb20/〜/}", "げんそうき〜")
self._run_test("{j:Xb20/ */}", "げんそうきょ *")
with self.assertRaises(ValueError):
self._run_test("{a:Xbfoo/ */}", "hello wo *")
def test_specifier_map(self): def test_specifier_map(self):
self._run_test("{L:Mname/}" , self._run_test("{L:Mname/}" ,
"['John Doe', 'Jane Smith', 'Max Mustermann']") "['John Doe', 'Jane Smith', 'Max Mustermann']")