From 91a5fd85db825e7ff9151867091b179fa9f6f830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 3 Nov 2025 12:35:47 +0100 Subject: [PATCH] [formatter] add 'Xb' format specifier - 'X' for bytes (#6582) https://github.com/mikf/gallery-dl/issues/6582#issuecomment-3479362186 --- docs/formatting.md | 10 ++++++++++ gallery_dl/formatter.py | 29 +++++++++++++++++++++-------- test/test_formatter.py | 11 +++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/docs/formatting.md b/docs/formatting.md index 1368356b..b81069cf 100644 --- a/docs/formatting.md +++ b/docs/formatting.md @@ -303,6 +303,16 @@ Format specifiers can be used for advanced formatting by using the options provi {foo:X6/ .../} Fo ... + + Xb<maxlen>/<ext>/ + Same as X, but applies to the bytes() representation of a string in filesystem encoding + {foo_ja:Xb15/〜/} + フー・バー + + + {foo_ja:Xb8/〜/} + フ〜 + J<separator>/ Concatenates elements of a list with <separator> using str.join() diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 8b5e7adf..04f5cc8d 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -330,10 +330,10 @@ def _slice(indices): ) -def _bytesgetter(slice, encoding=sys.getfilesystemencoding()): +def _bytesgetter(slice): def apply_slice_bytes(obj): - return obj.encode(encoding)[slice].decode(encoding, "ignore") + return obj.encode(_ENCODING)[slice].decode(_ENCODING, "ignore") return apply_slice_bytes @@ -512,14 +512,26 @@ def _parse_sort(format_spec, default): def _parse_limit(format_spec, default): limit, hint, format_spec = format_spec.split(_SEPARATOR, 2) - limit = int(limit[1:]) - limit_hint = limit - len(hint) fmt = _build_format_func(format_spec, default) - def apply_limit(obj): - if len(obj) > limit: - obj = obj[:limit_hint] + hint - return fmt(obj) + if limit[1] == "b": + hint = hint.encode(_ENCODING) + limit = int(limit[2:]) + limit_hint = limit - len(hint) + + def apply_limit(obj): + objb = obj.encode(_ENCODING) + if len(objb) > limit: + obj = (objb[:limit_hint] + hint).decode(_ENCODING, "ignore") + return fmt(obj) + else: + limit = int(limit[1:]) + limit_hint = limit - len(hint) + + def apply_limit(obj): + if len(obj) > limit: + obj = obj[:limit_hint] + hint + return fmt(obj) return apply_limit @@ -540,6 +552,7 @@ class Literal(): _literal = Literal() _CACHE = {} +_ENCODING = sys.getfilesystemencoding() _SEPARATOR = "/" _FORMATTERS = { "E" : ExpressionFormatter, diff --git a/test/test_formatter.py b/test/test_formatter.py index f08ae49c..b4603c06 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -332,6 +332,17 @@ class TestFormatter(unittest.TestCase): with self.assertRaises(ValueError): self._run_test("{a:Xfoo/ */}", "hello wo *") + def test_specifier_limit_bytes(self): + self._run_test("{a:Xb20/ */}", "hElLo wOrLd") + self._run_test("{a:Xb10/ */}", "hElLo wO *") + + self._run_test("{j:Xb50/〜/}", "げんそうきょう") + self._run_test("{j:Xb20/〜/}", "げんそうき〜") + self._run_test("{j:Xb20/ */}", "げんそうきょ *") + + with self.assertRaises(ValueError): + self._run_test("{a:Xbfoo/ */}", "hello wo *") + def test_specifier_map(self): self._run_test("{L:Mname/}" , "['John Doe', 'Jane Smith', 'Max Mustermann']")