remove 'extractor.blacklist' context manager

2020-09-11 13:11:46 +02:00
parent c78aa17506
commit 3918b69677
7 changed files with 12 additions and 78 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -140,7 +140,7 @@ def find(url):
    """Find a suitable extractor for the given URL"""
    for cls in _list_classes():
        match = cls.pattern.match(url)
-        if match and cls not in _blacklist:
+        if match:
            return cls(match)
    return None

@@ -169,26 +169,10 @@ def extractors():
    )


-class blacklist():
-    """Context Manager to blacklist extractor modules"""
-    def __init__(self, categories, extractors=None):
-        self.extractors = extractors or []
-        for cls in _list_classes():
-            if cls.category in categories:
-                self.extractors.append(cls)
-
-    def __enter__(self):
-        _blacklist.update(self.extractors)
-
-    def __exit__(self, etype, value, traceback):
-        _blacklist.clear()
-
-
 # --------------------------------------------------------------------
 # internals

 _cache = []
-_blacklist = set()
 _module_iter = iter(modules)


--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,7 @@
 """Extractors for https://www.plurk.com/"""

 from .common import Extractor, Message
-from .. import text, extractor, exception
+from .. import text, exception
 import datetime
 import time
 import json
@@ -23,12 +23,9 @@ class PlurkExtractor(Extractor):

    def items(self):
        urls = self._urls_ex if self.config("comments", False) else self._urls
-
-        yield Message.Version, 1
-        with extractor.blacklist(("plurk",)):
-            for plurk in self.plurks():
-                for url in urls(plurk):
-                    yield Message.Queue, url, plurk
+        for plurk in self.plurks():
+            for url in urls(plurk):
+                yield Message.Queue, url, plurk

    def plurks(self):
        """Return an iterable with all relevant 'plurk' objects"""
--- a/gallery_dl/extractor/recursive.py
+++ b/gallery_dl/extractor/recursive.py
@@ -9,7 +9,6 @@
 """Recursive extractor"""

 from .common import Extractor, Message
-from .. import extractor, util
 import requests
 import re

@@ -23,17 +22,12 @@ class RecursiveExtractor(Extractor):
    })

    def items(self):
-        blist = self.config(
-            "blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS)
-
        self.session.mount("file://", FileAdapter())
        page = self.request(self.url.partition(":")[2]).text
        del self.session.adapters["file://"]

-        yield Message.Version, 1
-        with extractor.blacklist(blist):
-            for match in re.finditer(r"https?://[^\s\"']+", page):
-                yield Message.Queue, match.group(0), {}
+        for match in re.finditer(r"https?://[^\s\"']+", page):
+            yield Message.Queue, match.group(0), {}


 class FileAdapter(requests.adapters.BaseAdapter):
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -9,7 +9,7 @@
 """Extract images from https://www.tumblr.com/"""

 from .common import Extractor, Message
-from .. import text, oauth, extractor, exception
+from .. import text, oauth, exception
 from datetime import datetime, timedelta
 import re

@@ -128,12 +128,9 @@ class TumblrExtractor(Extractor):

            if self.external:  # external links
                post["extension"] = None
-                with extractor.blacklist(("tumblr",)):
-                    for key in ("permalink_url", "url"):
-                        url = post.get(key)
-                        if url:
-                            yield Message.Queue, url, post
-                            break
+                url = post.get("permalink_url") or post.get("url")
+                if url:
+                    yield Message.Queue, url, post

    def posts(self):
        """Return an iterable containing all relevant posts"""