From 2d64e7622388520155ea93850548447cc0e1bae9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 7 Feb 2026 11:41:32 +0100
Subject: [PATCH] [job] implement 'follow' option (#8752)

Follow and process URLs found in the given format string result.
---
 docs/configuration.rst | 16 +++++++++++++++-
 docs/gallery-dl.conf   |  1 +
 gallery_dl/job.py      | 34 ++++++++++++++++++++++++++++++++++
 test/test_job.py       | 42 ++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/docs/configuration.rst b/docs/configuration.rst
index 921342a2..3c32ab87 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -212,6 +212,20 @@ Description
     Specifying a default |Path|_ with ``""`` is required.
 
 
+extractor.*.follow
+------------------
+Type
+    `Format String`_
+Default
+    ``null``
+Example
+    * ``"{content}"``
+    * ``"\fE body or html or text"``
+Description
+    Follow URLs in the given `Format String`_'s result and
+    process them with child extractors.
+
+
 extractor.*.parent
 ------------------
 Type
@@ -10230,7 +10244,7 @@ Example
     * ``"foo"``
     * ``"{username}"``
     * ``"{title} ({id}).{extension}"``
-    * ``"\fF {title.title()} ({num:>0:>0{len(str(a))}} / {count}).{extension}"``
+    * ``"\fF {title.title()} ({num:>0{len(str(count))}} / {count}).{extension}"``
 Description
     A `Format String`_ allows creating dynamic text
     by embedding metadata values directly into replacement fields
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index ffdd984a..62bfc4f6 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -14,6 +14,7 @@
         "postprocessors": null,
         "skip"          : true,
         "skip-filter"   : null,
+        "follow"        : null,
 
         "user-agent"    : "auto",
         "referer"       : true,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index c70bbc2a..019c30b1 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -211,13 +211,31 @@ class Job():
         msg = None
         process = True
 
+        if follow := self.extractor.config("follow"):
+            follow = formatter.parse(follow, None, util.identity).format_map
+            follow_urls = follow_kwdict = None
+        else:
+            follow = follow_urls = None
+
         for msg, url, kwdict in messages:
 
             if msg == Message.Directory:
+                if follow_urls is not None:
+                    for furl in follow_urls:
+                        if self.metadata_url:
+                            follow_kwdict[self.metadata_url] = furl
+                        if self.pred_queue(furl, follow_kwdict):
+                            self.handle_queue(furl, follow_kwdict)
+                    follow_urls = None
+
                 self.update_kwdict(kwdict)
                 if self.pred_post(url, kwdict):
                     process = True
                     self.handle_directory(kwdict)
+                    if follow is not None:
+                        follow_urls = self._collect_urls(follow(kwdict))
+                        if follow_urls is not None:
+                            follow_kwdict = kwdict.copy()
                 else:
                     process = None
                 if FLAGS.POST is not None:
@@ -253,6 +271,13 @@ class Job():
                 if FLAGS.CHILD is not None:
                     FLAGS.process("CHILD")
 
+        if follow_urls is not None:
+            for furl in follow_urls:
+                if self.metadata_url:
+                    follow_kwdict[self.metadata_url] = furl
+                if self.pred_queue(furl, follow_kwdict):
+                    self.handle_queue(furl, follow_kwdict)
+
         return msg
 
     def handle_url(self, url, kwdict):
@@ -300,6 +325,15 @@ class Job():
         if init and init != "lazy":
             self.initialize()
 
+    def _collect_urls(self, source):
+        if not source:
+            return None
+        if isinstance(source, list):
+            return source
+        if isinstance(source, str):
+            if urls := text.extract_urls(source):
+                return urls
+
     def _prepare_predicates(self, target, alt=None, skip=None):
         predicates = []
         extr = self.extractor
diff --git a/test/test_job.py b/test/test_job.py
index c73577f3..92d15542 100644
--- a/test/test_job.py
+++ b/test/test_job.py
@@ -207,7 +207,7 @@ test:child
 
     def test_child(self):
         extr = TestExtractorParent.from_url("test:parent")
-        tjob = job.UrlJob(extr, depth=0)
+        tjob = self.jobclass(extr, depth=0)
         self.assertEqual(self._capture_stdout(tjob), 3 * """\
 https://example.org/1.jpg
 https://example.org/2.jpg
@@ -221,6 +221,20 @@ https://example.org/3.jpg
         tjob = self.jobclass(extr)
         tjob._init()
 
+    def test_opt_follow(self):
+        config.set((), "follow", "{user[bio]}")
+
+        extr = TestExtractor.from_url("test:urls")
+        tjob = self.jobclass(extr)
+        self.assertEqual(self._capture_stdout(tjob), """\
+https://example.org/1.jpg
+https://example.org/2.jpg
+https://example.org/3.jpg
+https://example1.org/content/abc
+https://example2.org/content?query=123
+https://example3.org/content/#frag
+""")
+
 
 class TestInfoJob(TestJob):
     jobclass = job.InfoJob
@@ -448,6 +462,21 @@ class TestDataJob(TestJob):
         tjob = self.jobclass(extr)
         tjob._init()
 
+    def test_opt_follow(self):
+        config.set((), "follow", "{user[bio]!R}")
+
+        extr = TestExtractor.from_url("test:urls")
+        tjob = self.jobclass(extr, file=None)
+        tjob.run()
+        self.assertEqual(tjob.data_urls, [
+            "https://example.org/1.jpg",
+            "https://example.org/2.jpg",
+            "https://example.org/3.jpg",
+            "https://example1.org/content/abc",
+            "https://example2.org/content?query=123",
+            "https://example3.org/content/#frag"
+        ])
+
     def test_resolve(self):
         extr = TestExtractorParent.from_url("test:parent:3")
         tjob = self.jobclass(extr, file=None, resolve=0)
@@ -486,13 +515,22 @@ class TestExtractor(Extractor):
     subcategory = "test_subcategory"
     directory_fmt = ("{category}",)
     filename_fmt = "test_{filename}.{extension}"
-    pattern = r"test:(child|self)?$"
+    pattern = r"test:(child|self|urls)?$"
 
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.user = {"id": 123, "name": "test"}
         if match[1] == "self":
             self.user["self"] = self.user
+        elif match[1] == "urls":
+            self.user["bio"] = """
+Site 1:
+* https://example1.org/content/abc
+Site 2:
+* https://example2.org/content?query=123
+
+<a href="https://example3.org/content/#frag">Site 3</a>
+"""
 
     def items(self):
         root = "https://example.org"