share extractor and downloader sessions

There was never any "good" reason for the strict separation
between extractors and downloaders. This change allows for
reduced resource usage (probably unnoticeable) and less lines
of code at the "cost" of tighter coupling.
This commit is contained in:
Mike Fährmann
2017-06-30 19:38:14 +02:00
parent 4414aefe97
commit 58e95a7487
9 changed files with 13 additions and 62 deletions

View File

@@ -79,12 +79,6 @@ class Job():
if self.pred_queue:
self.handle_queue(msg[1])
elif msg[0] == Message.Headers:
self.handle_headers(msg[1])
elif msg[0] == Message.Cookies:
self.handle_cookies(msg[1])
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
@@ -101,12 +95,6 @@ class Job():
def handle_queue(self, url):
"""Handle Message.Queue"""
def handle_headers(self, headers):
"""Handle Message.Headers"""
def handle_cookies(self, cookies):
"""Handle Message.Cookies"""
def update_kwdict(self, kwdict):
"""Add 'category' and 'subcategory' keywords"""
kwdict["category"] = self.extractor.category
@@ -145,12 +133,6 @@ class DownloadJob(Job):
except exception.NoExtractorError:
self._write_unsupported(url)
def handle_headers(self, headers):
self.get_downloader("http:").set_headers(headers)
def handle_cookies(self, cookies):
self.get_downloader("http:").set_cookies(cookies)
def get_downloader(self, url):
"""Return, and possibly construct, a downloader suitable for 'url'"""
pos = url.find(":")
@@ -160,7 +142,7 @@ class DownloadJob(Job):
instance = self.downloaders.get(scheme)
if instance is None:
klass = downloader.find(scheme)
instance = klass(self.out)
instance = klass(self.extractor.session, self.out)
self.downloaders[scheme] = instance
return instance
@@ -300,13 +282,10 @@ class DataJob(Job):
# collect data
try:
for msg in self.extractor:
if msg[0] in (Message.Headers, Message.Cookies):
copy = (msg[0], dict(msg[1]))
else:
copy = [
part.copy() if hasattr(part, "copy") else part
for part in msg
]
copy = [
part.copy() if hasattr(part, "copy") else part
for part in msg
]
self.data.append(copy)
except Exception as exc:
self.data.append((exc.__class__.__name__, str(exc)))