[formatter] add 'R' conversion - extract URLs (#8125)

This commit is contained in:
Mike Fährmann
2025-08-26 16:44:22 +02:00
parent a953d3d316
commit 9309204d9b
3 changed files with 21 additions and 6 deletions

View File

@@ -201,6 +201,12 @@ Conversion specifiers allow to *convert* the value to a different form or type.
<td><code>{html!H}</code></td>
<td><code>foo &amp; bar</code></td>
</tr>
<tr>
<td align="center"><code>R</code></td>
<td>Extract URLs</td>
<td><code>{lorem!R}</code></td>
<td><code>["https://example.org/"]</code></td>
</tr>
<tr>
<td align="center"><code>s</code></td>
<td>Convert value to <a href="https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str" rel="nofollow"><code>str</code></a></td>

View File

@@ -565,6 +565,7 @@ _CONVERSIONS = {
"U": text.unescape,
"H": lambda s: text.unescape(text.remove_html(s)),
"g": text.slugify,
"R": text.re(r"https?://[^\s\"']+").findall,
"W": text.sanitize_whitespace,
"S": util.to_string,
"s": str,

View File

@@ -44,6 +44,10 @@ class TestFormatter(unittest.TestCase):
"s": " \n\r\tSPACE ",
"S": " \n\r\tS P A\tC\nE ",
"h": "<p>foo </p> &amp; bar <p> </p>",
"H": """<p>
<a href="http://www.example.com">Lorem ipsum dolor sit amet</a>.
Duis aute irure <a href="http://blog.example.org">dolor</a>.
</p>""",
"u": "&#x27;&lt; / &gt;&#x27;",
"t": 1262304000,
"ds": "2010-01-01T01:00:00+01:00",
@@ -72,6 +76,9 @@ class TestFormatter(unittest.TestCase):
self._run_test("{h!H}", "foo & bar")
self._run_test("{u!H}", "'< / >'")
self._run_test("{n!H}", "")
self._run_test("{h!R}", [])
self._run_test("{H!R}", ["http://www.example.com",
"http://blog.example.org"])
self._run_test("{a!s}", self.kwdict["a"])
self._run_test("{a!r}", f"'{self.kwdict['a']}'")
self._run_test("{a!a}", f"'{self.kwdict['a']}'")
@@ -590,10 +597,11 @@ def gentext(kwdict):
def lengths(kwdict):
a = 0
for k, v in kwdict.items():
try:
a += len(v)
except TypeError:
pass
if k == k.lower():
try:
a += len(v)
except TypeError:
pass
return format(a)
def noarg():
@@ -616,10 +624,10 @@ def noarg():
fmt4 = formatter.parse(f"\fM {path}:lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt2.format_map(self.kwdict), "168")
self.assertEqual(fmt2.format_map(self.kwdict), "139")
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt4.format_map(self.kwdict), "168")
self.assertEqual(fmt4.format_map(self.kwdict), "139")
with self.assertRaises(TypeError):
self.assertEqual(fmt0.format_map(self.kwdict), "")