[reddit] some small fixes

- filter or complete some URLs
- remove the 'nofollow:' scheme before printing URLs
- (#15)
This commit is contained in:
Mike Fährmann
2017-05-23 11:48:00 +02:00
parent a22892f494
commit e425243b1e
3 changed files with 19 additions and 7 deletions

View File

@@ -214,9 +214,10 @@ class UrlJob(Job):
Job.__init__(self, url)
self.depth = depth
if depth == self.maxdepth:
self.handle_queue = print
self.handle_queue = self._print
def handle_url(self, url, _):
@staticmethod
def handle_url(url, _):
print(url)
def handle_queue(self, url):
@@ -225,6 +226,12 @@ class UrlJob(Job):
except exception.NoExtractorError:
pass
@staticmethod
def _print(url):
if url.startswith("nofollow:"):
url = url[9:]
print(url)
class TestJob(DownloadJob):
"""Generate test-results for extractor runs"""