[formatter] add 'Xb' format specifier - 'X' for bytes (#6582)

https://github.com/mikf/gallery-dl/issues/6582#issuecomment-3479362186
2025-11-03 12:35:47 +01:00
parent 0ecc1b6ead
commit 91a5fd85db
3 changed files with 42 additions and 8 deletions
--- a/docs/formatting.md
+++ b/docs/formatting.md
@@ -303,6 +303,16 @@ Format specifiers can be used for advanced formatting by using the options provi
    <td><code>{foo:X6/&nbsp;.../}</code></td>
    <td><code>Fo&nbsp;...</code></td>
 </tr>
+<tr>
+    <td rowspan="2"><code>Xb&lt;maxlen&gt;/&lt;ext&gt;/</code></td>
+    <td rowspan="2">Same as <code>X</code>, but applies to the <a href="https://docs.python.org/3/library/stdtypes.html#bytes"><code>bytes()</code></a> representation of a string in <a href="https://docs.python.org/3/library/sys.html#sys.getfilesystemencoding">filesystem encoding</a></td>
+    <td><code>{foo_ja:Xb15/〜/}</code></td>
+    <td><code>フー・バー</code></td>
+</tr>
+<tr>
+    <td><code>{foo_ja:Xb8/〜/}</code></td>
+    <td><code>フ〜</code></td>
+</tr>
 <tr>
    <td><code>J&lt;separator&gt;/</code></td>
    <td>Concatenates elements of a list with <code>&lt;separator&gt;</code> using <a href="https://docs.python.org/3/library/stdtypes.html#str.join" rel="nofollow"><code>str.join()</code></a></td>
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -330,10 +330,10 @@ def _slice(indices):
    )


-def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
+def _bytesgetter(slice):

    def apply_slice_bytes(obj):
-        return obj.encode(encoding)[slice].decode(encoding, "ignore")
+        return obj.encode(_ENCODING)[slice].decode(_ENCODING, "ignore")

    return apply_slice_bytes

@@ -512,14 +512,26 @@ def _parse_sort(format_spec, default):

 def _parse_limit(format_spec, default):
    limit, hint, format_spec = format_spec.split(_SEPARATOR, 2)
-    limit = int(limit[1:])
-    limit_hint = limit - len(hint)
    fmt = _build_format_func(format_spec, default)

-    def apply_limit(obj):
-        if len(obj) > limit:
-            obj = obj[:limit_hint] + hint
-        return fmt(obj)
+    if limit[1] == "b":
+        hint = hint.encode(_ENCODING)
+        limit = int(limit[2:])
+        limit_hint = limit - len(hint)
+
+        def apply_limit(obj):
+            objb = obj.encode(_ENCODING)
+            if len(objb) > limit:
+                obj = (objb[:limit_hint] + hint).decode(_ENCODING, "ignore")
+            return fmt(obj)
+    else:
+        limit = int(limit[1:])
+        limit_hint = limit - len(hint)
+
+        def apply_limit(obj):
+            if len(obj) > limit:
+                obj = obj[:limit_hint] + hint
+            return fmt(obj)
    return apply_limit


@@ -540,6 +552,7 @@ class Literal():
 _literal = Literal()

 _CACHE = {}
+_ENCODING = sys.getfilesystemencoding()
 _SEPARATOR = "/"
 _FORMATTERS = {
    "E" : ExpressionFormatter,
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -332,6 +332,17 @@ class TestFormatter(unittest.TestCase):
        with self.assertRaises(ValueError):
            self._run_test("{a:Xfoo/ */}", "hello wo *")

+    def test_specifier_limit_bytes(self):
+        self._run_test("{a:Xb20/ */}", "hElLo wOrLd")
+        self._run_test("{a:Xb10/ */}", "hElLo wO *")
+
+        self._run_test("{j:Xb50/〜/}", "げんそうきょう")
+        self._run_test("{j:Xb20/〜/}", "げんそうき〜")
+        self._run_test("{j:Xb20/ */}", "げんそうきょ *")
+
+        with self.assertRaises(ValueError):
+            self._run_test("{a:Xbfoo/ */}", "hello wo *")
+
    def test_specifier_map(self):
        self._run_test("{L:Mname/}" ,
                       "['John Doe', 'Jane Smith', 'Max Mustermann']")