From 91a5fd85db825e7ff9151867091b179fa9f6f830 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 3 Nov 2025 12:35:47 +0100
Subject: [PATCH] [formatter] add 'Xb' format specifier - 'X' for bytes (#6582)

https://github.com/mikf/gallery-dl/issues/6582#issuecomment-3479362186
---
 docs/formatting.md      | 10 ++++++++++
 gallery_dl/formatter.py | 29 +++++++++++++++++++++--------
 test/test_formatter.py  | 11 +++++++++++
 3 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/docs/formatting.md b/docs/formatting.md
index 1368356b..b81069cf 100644
--- a/docs/formatting.md
+++ b/docs/formatting.md
@@ -303,6 +303,16 @@ Format specifiers can be used for advanced formatting by using the options provi
     <td><code>{foo:X6/&nbsp;.../}</code></td>
     <td><code>Fo&nbsp;...</code></td>
 </tr>
+<tr>
+    <td rowspan="2"><code>Xb&lt;maxlen&gt;/&lt;ext&gt;/</code></td>
+    <td rowspan="2">Same as <code>X</code>, but applies to the <a href="https://docs.python.org/3/library/stdtypes.html#bytes"><code>bytes()</code></a> representation of a string in <a href="https://docs.python.org/3/library/sys.html#sys.getfilesystemencoding">filesystem encoding</a></td>
+    <td><code>{foo_ja:Xb15/〜/}</code></td>
+    <td><code>フー・バー</code></td>
+</tr>
+<tr>
+    <td><code>{foo_ja:Xb8/〜/}</code></td>
+    <td><code>フ〜</code></td>
+</tr>
 <tr>
     <td><code>J&lt;separator&gt;/</code></td>
     <td>Concatenates elements of a list with <code>&lt;separator&gt;</code> using <a href="https://docs.python.org/3/library/stdtypes.html#str.join" rel="nofollow"><code>str.join()</code></a></td>
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 8b5e7adf..04f5cc8d 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -330,10 +330,10 @@ def _slice(indices):
     )
 
 
-def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
+def _bytesgetter(slice):
 
     def apply_slice_bytes(obj):
-        return obj.encode(encoding)[slice].decode(encoding, "ignore")
+        return obj.encode(_ENCODING)[slice].decode(_ENCODING, "ignore")
 
     return apply_slice_bytes
 
@@ -512,14 +512,26 @@ def _parse_sort(format_spec, default):
 
 def _parse_limit(format_spec, default):
     limit, hint, format_spec = format_spec.split(_SEPARATOR, 2)
-    limit = int(limit[1:])
-    limit_hint = limit - len(hint)
     fmt = _build_format_func(format_spec, default)
 
-    def apply_limit(obj):
-        if len(obj) > limit:
-            obj = obj[:limit_hint] + hint
-        return fmt(obj)
+    if limit[1] == "b":
+        hint = hint.encode(_ENCODING)
+        limit = int(limit[2:])
+        limit_hint = limit - len(hint)
+
+        def apply_limit(obj):
+            objb = obj.encode(_ENCODING)
+            if len(objb) > limit:
+                obj = (objb[:limit_hint] + hint).decode(_ENCODING, "ignore")
+            return fmt(obj)
+    else:
+        limit = int(limit[1:])
+        limit_hint = limit - len(hint)
+
+        def apply_limit(obj):
+            if len(obj) > limit:
+                obj = obj[:limit_hint] + hint
+            return fmt(obj)
     return apply_limit
 
 
@@ -540,6 +552,7 @@ class Literal():
 _literal = Literal()
 
 _CACHE = {}
+_ENCODING = sys.getfilesystemencoding()
 _SEPARATOR = "/"
 _FORMATTERS = {
     "E" : ExpressionFormatter,
diff --git a/test/test_formatter.py b/test/test_formatter.py
index f08ae49c..b4603c06 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -332,6 +332,17 @@ class TestFormatter(unittest.TestCase):
         with self.assertRaises(ValueError):
             self._run_test("{a:Xfoo/ */}", "hello wo *")
 
+    def test_specifier_limit_bytes(self):
+        self._run_test("{a:Xb20/ */}", "hElLo wOrLd")
+        self._run_test("{a:Xb10/ */}", "hElLo wO *")
+
+        self._run_test("{j:Xb50/〜/}", "げんそうきょう")
+        self._run_test("{j:Xb20/〜/}", "げんそうき〜")
+        self._run_test("{j:Xb20/ */}", "げんそうきょ *")
+
+        with self.assertRaises(ValueError):
+            self._run_test("{a:Xbfoo/ */}", "hello wo *")
+
     def test_specifier_map(self):
         self._run_test("{L:Mname/}" ,
                        "['John Doe', 'Jane Smith', 'Max Mustermann']")