From c978fe18d4e7e2d693494fe42cbef9e8de00061f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Fri, 6 Feb 2026 20:46:35 +0100
Subject: [PATCH] [text] add 'extract_urls()' helper

---
 gallery_dl/formatter.py |  2 +-
 gallery_dl/text.py      |  5 ++++-
 test/test_text.py       | 18 +++++++++++++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 1afea5a4..0f9004a3 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -611,7 +611,7 @@ _CONVERSIONS = {
     "U": text.unescape,
     "H": lambda s: text.unescape(text.remove_html(s)),
     "g": text.slugify,
-    "R": text.re(r"https?://[^\s\"'<>\\]+").findall,
+    "R": text.extract_urls,
     "W": text.sanitize_whitespace,
     "S": util.to_string,
     "s": str,
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 5b074d9e..9fdf05bc 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2025 Mike Fährmann
+# Copyright 2015-2026 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -231,6 +231,9 @@ def extract_from(txt, pos=None, default=""):
     return extr
 
 
+extract_urls = re(r"https?://[^\s\"'<>\\]+").findall
+
+
 def parse_unicode_escapes(txt):
     """Convert JSON Unicode escapes in 'txt' into actual characters"""
     if "\\u" in txt:
diff --git a/test/test_text.py b/test/test_text.py
index eac79069..681dda6b 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright 2015-2025 Mike Fährmann
+# Copyright 2015-2026 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -391,6 +391,22 @@ class TestText(unittest.TestCase):
         self.assertEqual(e("[", "]"), "END")
         self.assertEqual(e("[", "]"), "END")
 
+    def test_extract_urls(self, f=text.extract_urls):
+        txt = ""
+        self.assertEqual(f(txt), [])
+
+        txt = "<p>foo </p> &amp; bar <p> </p>"
+        self.assertEqual(f(txt), [])
+
+        txt = """<p>
+  <a href="http://www.example.com">Lorem ipsum dolor sit amet</a>.
+  Duis aute irure <a href="http://blog.example.org/lorem?foo=bar">
+  http://blog.example.org</a>.
+</p>"""
+        self.assertEqual(f(txt), ["http://www.example.com",
+                                  "http://blog.example.org/lorem?foo=bar",
+                                  "http://blog.example.org"])
+
     def test_parse_unicode_escapes(self, f=text.parse_unicode_escapes):
         self.assertEqual(f(""), "")
         self.assertEqual(f("foobar"), "foobar")