use '_extractor' info in UrlJobs

This commit is contained in:
Mike Fährmann
2021-05-19 15:52:30 +02:00
parent 4fc9668922
commit adf4d661b3
2 changed files with 18 additions and 9 deletions

View File

@@ -597,10 +597,16 @@ class UrlJob(Job):
for url in kwdict["_fallback"]: for url in kwdict["_fallback"]:
print("|", url) print("|", url)
def handle_queue(self, url, _): def handle_queue(self, url, kwdict):
try: cls = kwdict.get("_extractor")
UrlJob(url, self, self.depth + 1).run() if cls:
except exception.NoExtractorError: extr = cls.from_url(url)
else:
extr = extractor.find(url)
if extr:
self.status |= self.__class__(extr, self).run()
else:
self._write_unsupported(url) self._write_unsupported(url)

View File

@@ -104,11 +104,14 @@ test:child
test:child test:child
""") """)
# def test_child(self): def test_child(self):
# extr = TestExtractorParent.from_url("test:parent") extr = TestExtractorParent.from_url("test:parent")
# tjob = job.UrlJob(extr, depth=0) tjob = job.UrlJob(extr, depth=0)
# self.assertEqual(self._capture_stdout(tjob), """\ self.assertEqual(self._capture_stdout(tjob), 3 * """\
# """) https://example.org/1.jpg
https://example.org/2.jpg
https://example.org/3.jpg
""")
class TestInfoJob(TestJob): class TestInfoJob(TestJob):