update extractor blacklist to also allow classes
This commit is contained in:
@@ -98,7 +98,7 @@ def find(url):
|
|||||||
"""Find suitable extractor for the given url"""
|
"""Find suitable extractor for the given url"""
|
||||||
for pattern, klass in _list_patterns():
|
for pattern, klass in _list_patterns():
|
||||||
match = pattern.match(url)
|
match = pattern.match(url)
|
||||||
if match and klass.category not in _blacklist:
|
if match and klass not in _blacklist:
|
||||||
return klass(match)
|
return klass(match)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -113,11 +113,15 @@ def extractors():
|
|||||||
|
|
||||||
class blacklist():
|
class blacklist():
|
||||||
"""Context Manager to blacklist extractor modules"""
|
"""Context Manager to blacklist extractor modules"""
|
||||||
def __init__(self, categories):
|
def __init__(self, categories, extractors=None):
|
||||||
self.categories = categories
|
self.extractors = extractors or []
|
||||||
|
for _, klass in _list_patterns():
|
||||||
|
if klass.category in categories:
|
||||||
|
self.extractors.append(klass)
|
||||||
|
print(self.extractors)
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
_blacklist.extend(self.categories)
|
_blacklist.update(self.extractors)
|
||||||
|
|
||||||
def __exit__(self, etype, value, traceback):
|
def __exit__(self, etype, value, traceback):
|
||||||
_blacklist.clear()
|
_blacklist.clear()
|
||||||
@@ -127,7 +131,7 @@ class blacklist():
|
|||||||
# internals
|
# internals
|
||||||
|
|
||||||
_cache = []
|
_cache = []
|
||||||
_blacklist = []
|
_blacklist = set()
|
||||||
_module_iter = iter(modules)
|
_module_iter = iter(modules)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ class RecursiveExtractor(Extractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
blist = self.config(
|
blist = self.config(
|
||||||
"blacklist", ("directlink",) + util.SPECIAL_EXTRACTORS)
|
"blacklist", {"directlink"} | util.SPECIAL_EXTRACTORS)
|
||||||
page = self.request(self.url).text
|
page = self.request(self.url).text
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
with extractor.blacklist(blist):
|
with extractor.blacklist(blist):
|
||||||
|
|||||||
@@ -183,7 +183,7 @@ CODES = {
|
|||||||
"zh": "Chinese",
|
"zh": "Chinese",
|
||||||
}
|
}
|
||||||
|
|
||||||
SPECIAL_EXTRACTORS = ("oauth", "recursive", "test")
|
SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
|
||||||
|
|
||||||
|
|
||||||
def build_predicate(predicates):
|
def build_predicate(predicates):
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ SKIP = {
|
|||||||
# temporary issues
|
# temporary issues
|
||||||
"batoto", # R.I.P.
|
"batoto", # R.I.P.
|
||||||
"imgyt", # server maintenance
|
"imgyt", # server maintenance
|
||||||
"luscious",
|
"gelbooru",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user