From c78aa175064fde846674fcec239e74ed24122e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 10 Sep 2020 22:54:10 +0200 Subject: [PATCH] add general 'blacklist' and 'whitelist' options (#492, #844) --- docs/configuration.rst | 14 ++++++++++++++ gallery_dl/job.py | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index d6ff67ce..c80f8f4e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -363,6 +363,20 @@ Description Transfer an extractor's (sub)category values to all child =========== ===== +extractor.*.blacklist & .whitelist +---------------------------------- +=========== ===== +Type ``list`` of ``strings`` +Default ``["oauth", "recursive", "test"]`` + current extractor category +Description A list of extractor categories to ignore (or allow) + when spawning child extractors for unknown URLs, + e.g. from ``reddit`` or ``plurk``. + + Note: Any ``blacklist`` setting will automatically include + ``"oauth"``, ``"recursive"``, and ``"test"``. +=========== ===== + + extractor.*.archive ------------------- =========== ===== diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 84ff160c..c9f89319 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -197,6 +197,7 @@ class DownloadJob(Job): def __init__(self, url, parent=None): Job.__init__(self, url, parent) self.log = self.get_logger("download") + self.blacklist = None self.archive = None self.sleep = None self.downloaders = {} @@ -308,6 +309,12 @@ class DownloadJob(Job): extr = kwdict["_extractor"].from_url(url) else: extr = extractor.find(url) + if extr: + if self.blacklist is None: + self.blacklist = self._build_blacklist() + if extr.category in self.blacklist: + extr = None + if extr: self.status |= self.__class__(extr, self).run() else: @@ -437,6 +444,25 @@ class DownloadJob(Job): self.extractor.log.debug( "Active postprocessor modules: %s", pp_list) + def _build_blacklist(self): + wlist = self.extractor.config("whitelist") + if wlist: + if isinstance(wlist, str): + wlist = wlist.split(",") + blist = {e.category for e in extractor._list_classes()} + blist.difference_update(wlist) + return blist + + blist = self.extractor.config("blacklist") + if blist: + if isinstance(blist, str): + blist = blist.split(",") + blist = set(blist) + else: + blist = {self.extractor.category} + blist |= util.SPECIAL_EXTRACTORS + return blist + class SimulationJob(DownloadJob): """Simulate the extraction process without downloading anything"""