extend blacklist/whitelist syntax (#2025)

Each entry in such a list can now also include a subcategory
'<category>:<subcategory>'
and it is possible to use '*' or an empty string as placeholder
'*:<subcategory>', ':<subcategory>', '<category>:*'

For example
  "blacklist": "imgur,*:tag,gfycat:user" or
  "blacklist": ["imgur", "*:tag", "gfycat:user"]
will filter all 'imgur' extractors, all extractors  with a 'tag'
subcategory (e.g. https://danbooru.donmai.us/posts?tags=bonocho),
and all 'gfycat' user extractors.
This commit is contained in:
Mike Fährmann
2021-11-23 19:23:02 +01:00
parent 11a3d96d13
commit 010d65dcec
4 changed files with 191 additions and 46 deletions

View File

@@ -37,6 +37,31 @@ class TestJob(unittest.TestCase):
return buffer.getvalue()
class TestDownloadJob(TestJob):
jobclass = job.DownloadJob
def test_extractor_filter(self):
extr = TestExtractor.from_url("test:")
tjob = self.jobclass(extr)
func = tjob._build_extractor_filter()
self.assertEqual(func(TestExtractor) , False)
self.assertEqual(func(TestExtractorParent), False)
self.assertEqual(func(TestExtractorAlt) , True)
config.set((), "blacklist", ":test_subcategory")
func = tjob._build_extractor_filter()
self.assertEqual(func(TestExtractor) , False)
self.assertEqual(func(TestExtractorParent), True)
self.assertEqual(func(TestExtractorAlt) , False)
config.set((), "whitelist", "test_category:test_subcategory")
func = tjob._build_extractor_filter()
self.assertEqual(func(TestExtractor) , True)
self.assertEqual(func(TestExtractorParent), False)
self.assertEqual(func(TestExtractorAlt) , False)
class TestKeywordJob(TestJob):
jobclass = job.KeywordJob
@@ -334,5 +359,10 @@ class TestExtractorException(Extractor):
return 1/0
class TestExtractorAlt(Extractor):
category = "test_category_alt"
subcategory = "test_subcategory"
if __name__ == '__main__':
unittest.main()