diff --git a/docs/formatting.md b/docs/formatting.md
index cc2703d2..7c571fdd 100644
--- a/docs/formatting.md
+++ b/docs/formatting.md
@@ -11,14 +11,15 @@ Field names select the metadata value to use in a replacement field.
While simple names are usually enough, more complex forms like accessing values by attribute, element index, or slicing are also supported.
-| | Example | Result |
-| -------------------- | ----------------- | ---------------------- |
-| Name | `{title}` | `Hello World` |
-| Element Index | `{title[6]}` | `W` |
-| Slicing | `{title[3:8]}` | `lo Wo` |
-| Alternatives | `{empty\|title}` | `Hello World` |
-| Element Access | `{user[name]}` | `John Doe` |
-| Attribute Access | `{extractor.url}` | `https://example.org/` |
+| | Example | Result |
+| -------------------- | ------------------- | ---------------------- |
+| Name | `{title}` | `Hello World` |
+| Element Index | `{title[6]}` | `W` |
+| Slicing | `{title[3:8]}` | `lo Wo` |
+| Slicing (Bytes) | `{title_ja[b3:18]}` | `ロー・ワー` |
+| Alternatives | `{empty\|title}` | `Hello World` |
+| Element Access | `{user[name]}` | `John Doe` |
+| Attribute Access | `{extractor.url}` | `https://example.org/` |
All of these methods can be combined as needed.
For example `{title[24]|empty|extractor.url[15:-1]}` would result in `.org`.
@@ -150,6 +151,12 @@ Format specifiers can be used for advanced formatting by using the options provi
{foo:[1:-1]} |
oo Ba |
+
+ [b<start>:<stop>] |
+ Same as above, but applies to the bytes() representation of a string in filesystem encoding |
+ {foo_ja:[b3:-1]} |
+ ー・バ |
+
L<maxlen>/<repl>/ |
Replaces the entire output with <repl> if its length exceeds <maxlen> |
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index fc36fa2c..2ff48c32 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -9,6 +9,7 @@
"""String formatters"""
import os
+import sys
import time
import string
import _string
@@ -255,7 +256,11 @@ def parse_field_name(field_name):
func = operator.itemgetter
try:
if ":" in key:
- key = _slice(key)
+ if key[0] == "b":
+ func = _bytesgetter
+ key = _slice(key[1:])
+ else:
+ key = _slice(key)
else:
key = key.strip("\"'")
except TypeError:
@@ -276,6 +281,14 @@ def _slice(indices):
)
+def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
+
+ def apply_slice_bytes(obj):
+ return obj.encode(encoding)[slice].decode(encoding, "ignore")
+
+ return apply_slice_bytes
+
+
def _build_format_func(format_spec, default):
if format_spec:
return _FORMAT_SPECIFIERS.get(
@@ -295,11 +308,20 @@ def _parse_optional(format_spec, default):
def _parse_slice(format_spec, default):
indices, _, format_spec = format_spec.partition("]")
- slice = _slice(indices[1:])
fmt = _build_format_func(format_spec, default)
- def apply_slice(obj):
- return fmt(obj[slice])
+ if indices[1] == "b":
+ slice_bytes = _bytesgetter(_slice(indices[2:]))
+
+ def apply_slice(obj):
+ return fmt(slice_bytes(obj))
+
+ else:
+ slice = _slice(indices[1:])
+
+ def apply_slice(obj):
+ return fmt(obj[slice])
+
return apply_slice
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 22589668..1bda9d9c 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,6 +23,7 @@ class TestFormatter(unittest.TestCase):
kwdict = {
"a": "hElLo wOrLd",
"b": "äöü",
+ "j": "げんそうきょう",
"d": {"a": "foo", "b": 0, "c": None},
"l": ["a", "b", "c"],
"n": None,
@@ -133,7 +134,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{d['a']}", "foo")
self._run_test('{d["a"]}', "foo")
- def test_slicing(self):
+ def test_slice_str(self):
v = self.kwdict["a"]
self._run_test("{a[1:10]}" , v[1:10])
self._run_test("{a[-10:-1]}", v[-10:-1])
@@ -165,6 +166,26 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a:[:50:2]}", v[:50:2])
self._run_test("{a:[::]}" , v)
+ def test_slice_bytes(self):
+ v = self.kwdict["j"]
+ self._run_test("{j[b1:10]}" , v[1:3])
+ self._run_test("{j[b-10:-1]}", v[-3:-1])
+ self._run_test("{j[b5:]}" , v[2:])
+ self._run_test("{j[b50:]}" , v[50:])
+ self._run_test("{j[b:5]}" , v[:1])
+ self._run_test("{j[b:50]}" , v[:50])
+ self._run_test("{j[b:]}" , v)
+ self._run_test("{j[b::]}" , v)
+
+ self._run_test("{j:[b1:10]}" , v[1:3])
+ self._run_test("{j:[b-10:-1]}", v[-3:-1])
+ self._run_test("{j:[b5:]}" , v[2:])
+ self._run_test("{j:[b50:]}" , v[50:])
+ self._run_test("{j:[b:5]}" , v[:1])
+ self._run_test("{j:[b:50]}" , v[:50])
+ self._run_test("{j:[b:]}" , v)
+ self._run_test("{j:[b::]}" , v)
+
def test_maxlen(self):
v = self.kwdict["a"]
self._run_test("{a:L5/foo/}" , "foo")
@@ -413,10 +434,10 @@ def noarg():
fmt4 = formatter.parse("\fM " + path + ":lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt2.format_map(self.kwdict), "89")
+ self.assertEqual(fmt2.format_map(self.kwdict), "96")
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt4.format_map(self.kwdict), "89")
+ self.assertEqual(fmt4.format_map(self.kwdict), "96")
with self.assertRaises(TypeError):
self.assertEqual(fmt0.format_map(self.kwdict), "")