diff --git a/docs/formatting.md b/docs/formatting.md index 8387a731..1368356b 100644 --- a/docs/formatting.md +++ b/docs/formatting.md @@ -201,6 +201,12 @@ Conversion specifiers allow to *convert* the value to a different form or type. {html!H} foo & bar + + R + Extract URLs + {lorem!R} + ["https://example.org/"] + s Convert value to str diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 7a49049b..b09203fb 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -565,6 +565,7 @@ _CONVERSIONS = { "U": text.unescape, "H": lambda s: text.unescape(text.remove_html(s)), "g": text.slugify, + "R": text.re(r"https?://[^\s\"']+").findall, "W": text.sanitize_whitespace, "S": util.to_string, "s": str, diff --git a/test/test_formatter.py b/test/test_formatter.py index f3ed9dd3..8b35a2bd 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -44,6 +44,10 @@ class TestFormatter(unittest.TestCase): "s": " \n\r\tSPACE ", "S": " \n\r\tS P A\tC\nE ", "h": "

foo

& bar

", + "H": """

+ Lorem ipsum dolor sit amet. + Duis aute irure dolor. +

""", "u": "'< / >'", "t": 1262304000, "ds": "2010-01-01T01:00:00+01:00", @@ -72,6 +76,9 @@ class TestFormatter(unittest.TestCase): self._run_test("{h!H}", "foo & bar") self._run_test("{u!H}", "'< / >'") self._run_test("{n!H}", "") + self._run_test("{h!R}", []) + self._run_test("{H!R}", ["http://www.example.com", + "http://blog.example.org"]) self._run_test("{a!s}", self.kwdict["a"]) self._run_test("{a!r}", f"'{self.kwdict['a']}'") self._run_test("{a!a}", f"'{self.kwdict['a']}'") @@ -590,10 +597,11 @@ def gentext(kwdict): def lengths(kwdict): a = 0 for k, v in kwdict.items(): - try: - a += len(v) - except TypeError: - pass + if k == k.lower(): + try: + a += len(v) + except TypeError: + pass return format(a) def noarg(): @@ -616,10 +624,10 @@ def noarg(): fmt4 = formatter.parse(f"\fM {path}:lengths") self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name") - self.assertEqual(fmt2.format_map(self.kwdict), "168") + self.assertEqual(fmt2.format_map(self.kwdict), "139") self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name") - self.assertEqual(fmt4.format_map(self.kwdict), "168") + self.assertEqual(fmt4.format_map(self.kwdict), "139") with self.assertRaises(TypeError): self.assertEqual(fmt0.format_map(self.kwdict), "")