From bcf4d579b06a44067d3055e49a0e689668fe758d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 11 Sep 2025 08:25:23 +0200 Subject: [PATCH] [formatter] exclude '<>\' from '!R' URLs (#8180) --- gallery_dl/formatter.py | 2 +- test/test_formatter.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index b09203fb..e1b26f9d 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -565,7 +565,7 @@ _CONVERSIONS = { "U": text.unescape, "H": lambda s: text.unescape(text.remove_html(s)), "g": text.slugify, - "R": text.re(r"https?://[^\s\"']+").findall, + "R": text.re(r"https?://[^\s\"'<>\\]+").findall, "W": text.sanitize_whitespace, "S": util.to_string, "s": str, diff --git a/test/test_formatter.py b/test/test_formatter.py index 8b35a2bd..a0a5224f 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -46,7 +46,8 @@ class TestFormatter(unittest.TestCase): "h": "

foo

& bar

", "H": """

Lorem ipsum dolor sit amet. - Duis aute irure dolor. + Duis aute irure + http://blog.example.org.

""", "u": "'< / >'", "t": 1262304000, @@ -78,6 +79,7 @@ class TestFormatter(unittest.TestCase): self._run_test("{n!H}", "") self._run_test("{h!R}", []) self._run_test("{H!R}", ["http://www.example.com", + "http://blog.example.org/lorem?foo=bar", "http://blog.example.org"]) self._run_test("{a!s}", self.kwdict["a"]) self._run_test("{a!r}", f"'{self.kwdict['a']}'")