@@ -363,6 +363,20 @@ Description Transfer an extractor's (sub)category values to all child
|
|||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
|
extractor.*.blacklist & .whitelist
|
||||||
|
----------------------------------
|
||||||
|
=========== =====
|
||||||
|
Type ``list`` of ``strings``
|
||||||
|
Default ``["oauth", "recursive", "test"]`` + current extractor category
|
||||||
|
Description A list of extractor categories to ignore (or allow)
|
||||||
|
when spawning child extractors for unknown URLs,
|
||||||
|
e.g. from ``reddit`` or ``plurk``.
|
||||||
|
|
||||||
|
Note: Any ``blacklist`` setting will automatically include
|
||||||
|
``"oauth"``, ``"recursive"``, and ``"test"``.
|
||||||
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
extractor.*.archive
|
extractor.*.archive
|
||||||
-------------------
|
-------------------
|
||||||
=========== =====
|
=========== =====
|
||||||
|
|||||||
@@ -197,6 +197,7 @@ class DownloadJob(Job):
|
|||||||
def __init__(self, url, parent=None):
|
def __init__(self, url, parent=None):
|
||||||
Job.__init__(self, url, parent)
|
Job.__init__(self, url, parent)
|
||||||
self.log = self.get_logger("download")
|
self.log = self.get_logger("download")
|
||||||
|
self.blacklist = None
|
||||||
self.archive = None
|
self.archive = None
|
||||||
self.sleep = None
|
self.sleep = None
|
||||||
self.downloaders = {}
|
self.downloaders = {}
|
||||||
@@ -308,6 +309,12 @@ class DownloadJob(Job):
|
|||||||
extr = kwdict["_extractor"].from_url(url)
|
extr = kwdict["_extractor"].from_url(url)
|
||||||
else:
|
else:
|
||||||
extr = extractor.find(url)
|
extr = extractor.find(url)
|
||||||
|
if extr:
|
||||||
|
if self.blacklist is None:
|
||||||
|
self.blacklist = self._build_blacklist()
|
||||||
|
if extr.category in self.blacklist:
|
||||||
|
extr = None
|
||||||
|
|
||||||
if extr:
|
if extr:
|
||||||
self.status |= self.__class__(extr, self).run()
|
self.status |= self.__class__(extr, self).run()
|
||||||
else:
|
else:
|
||||||
@@ -437,6 +444,25 @@ class DownloadJob(Job):
|
|||||||
self.extractor.log.debug(
|
self.extractor.log.debug(
|
||||||
"Active postprocessor modules: %s", pp_list)
|
"Active postprocessor modules: %s", pp_list)
|
||||||
|
|
||||||
|
def _build_blacklist(self):
|
||||||
|
wlist = self.extractor.config("whitelist")
|
||||||
|
if wlist:
|
||||||
|
if isinstance(wlist, str):
|
||||||
|
wlist = wlist.split(",")
|
||||||
|
blist = {e.category for e in extractor._list_classes()}
|
||||||
|
blist.difference_update(wlist)
|
||||||
|
return blist
|
||||||
|
|
||||||
|
blist = self.extractor.config("blacklist")
|
||||||
|
if blist:
|
||||||
|
if isinstance(blist, str):
|
||||||
|
blist = blist.split(",")
|
||||||
|
blist = set(blist)
|
||||||
|
else:
|
||||||
|
blist = {self.extractor.category}
|
||||||
|
blist |= util.SPECIAL_EXTRACTORS
|
||||||
|
return blist
|
||||||
|
|
||||||
|
|
||||||
class SimulationJob(DownloadJob):
|
class SimulationJob(DownloadJob):
|
||||||
"""Simulate the extraction process without downloading anything"""
|
"""Simulate the extraction process without downloading anything"""
|
||||||
|
|||||||
Reference in New Issue
Block a user