diff --git a/test/test_job.py b/test/test_job.py index a33738e7..2c6bcaf7 100644 --- a/test/test_job.py +++ b/test/test_job.py @@ -432,6 +432,38 @@ class TestDataJob(TestJob): tjob = self.jobclass(extr) tjob._init() + def test_resolve(self): + extr = TestExtractorParent.from_url("test:parent:3") + tjob = self.jobclass(extr, file=None, resolve=0) + tjob.run() + self.assertEqual(len(tjob.data_urls), 3) + for url in tjob.data_urls: + self.assertEqual(url, "test:parent:2") + + extr = TestExtractorParent.from_url("test:parent:3") + tjob = self.jobclass(extr, file=None, resolve=1) + tjob.run() + self.assertEqual(len(tjob.data_urls), 9) + for url in tjob.data_urls: + self.assertEqual(url, "test:parent:1") + + extr = TestExtractorParent.from_url("test:parent") + tjob = self.jobclass(extr, file=None, resolve=64) + tjob.run() + self.assertEqual(len(tjob.data_urls), 9) + for url in tjob.data_urls: + self.assertRegex(url, r"^https://example.org/\d\.jpg$") + + extr = TestExtractorParent.from_url("test:parent:1") + tjob = self.jobclass(extr, file=None, resolve=64) + tjob.run() + self.assertEqual(len(tjob.data_urls), 27) + + extr = TestExtractorParent.from_url("test:parent:2") + tjob = self.jobclass(extr, file=None, resolve=64) + tjob.run() + self.assertEqual(len(tjob.data_urls), 81) + class TestExtractor(Extractor): category = "test_category" @@ -469,16 +501,22 @@ class TestExtractor(Extractor): class TestExtractorParent(Extractor): category = "test_category" subcategory = "test_subcategory_parent" - pattern = r"test:parent" + pattern = r"test:parent(:\d+)?" def items(self): - url = "test:child" + level = self.groups[0] + if level in {None, ":0"}: + url = "test:child" + extr = TestExtractor + else: + url = f"test:parent:{int(level[1:])-1}" + extr = TestExtractorParent for i in range(11, 14): yield Message.Queue, url, { "num" : i, "tags": ["abc", "def"], - "_extractor": TestExtractor, + "_extractor": extr, }