[job] implement 'follow' option (#8752)
Follow and process URLs found in the given format string result.
This commit is contained in:
@@ -207,7 +207,7 @@ test:child
|
||||
|
||||
def test_child(self):
|
||||
extr = TestExtractorParent.from_url("test:parent")
|
||||
tjob = job.UrlJob(extr, depth=0)
|
||||
tjob = self.jobclass(extr, depth=0)
|
||||
self.assertEqual(self._capture_stdout(tjob), 3 * """\
|
||||
https://example.org/1.jpg
|
||||
https://example.org/2.jpg
|
||||
@@ -221,6 +221,20 @@ https://example.org/3.jpg
|
||||
tjob = self.jobclass(extr)
|
||||
tjob._init()
|
||||
|
||||
def test_opt_follow(self):
|
||||
config.set((), "follow", "{user[bio]}")
|
||||
|
||||
extr = TestExtractor.from_url("test:urls")
|
||||
tjob = self.jobclass(extr)
|
||||
self.assertEqual(self._capture_stdout(tjob), """\
|
||||
https://example.org/1.jpg
|
||||
https://example.org/2.jpg
|
||||
https://example.org/3.jpg
|
||||
https://example1.org/content/abc
|
||||
https://example2.org/content?query=123
|
||||
https://example3.org/content/#frag
|
||||
""")
|
||||
|
||||
|
||||
class TestInfoJob(TestJob):
|
||||
jobclass = job.InfoJob
|
||||
@@ -448,6 +462,21 @@ class TestDataJob(TestJob):
|
||||
tjob = self.jobclass(extr)
|
||||
tjob._init()
|
||||
|
||||
def test_opt_follow(self):
|
||||
config.set((), "follow", "{user[bio]!R}")
|
||||
|
||||
extr = TestExtractor.from_url("test:urls")
|
||||
tjob = self.jobclass(extr, file=None)
|
||||
tjob.run()
|
||||
self.assertEqual(tjob.data_urls, [
|
||||
"https://example.org/1.jpg",
|
||||
"https://example.org/2.jpg",
|
||||
"https://example.org/3.jpg",
|
||||
"https://example1.org/content/abc",
|
||||
"https://example2.org/content?query=123",
|
||||
"https://example3.org/content/#frag"
|
||||
])
|
||||
|
||||
def test_resolve(self):
|
||||
extr = TestExtractorParent.from_url("test:parent:3")
|
||||
tjob = self.jobclass(extr, file=None, resolve=0)
|
||||
@@ -486,13 +515,22 @@ class TestExtractor(Extractor):
|
||||
subcategory = "test_subcategory"
|
||||
directory_fmt = ("{category}",)
|
||||
filename_fmt = "test_{filename}.{extension}"
|
||||
pattern = r"test:(child|self)?$"
|
||||
pattern = r"test:(child|self|urls)?$"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.user = {"id": 123, "name": "test"}
|
||||
if match[1] == "self":
|
||||
self.user["self"] = self.user
|
||||
elif match[1] == "urls":
|
||||
self.user["bio"] = """
|
||||
Site 1:
|
||||
* https://example1.org/content/abc
|
||||
Site 2:
|
||||
* https://example2.org/content?query=123
|
||||
|
||||
<a href="https://example3.org/content/#frag">Site 3</a>
|
||||
"""
|
||||
|
||||
def items(self):
|
||||
root = "https://example.org"
|
||||
|
||||
Reference in New Issue
Block a user