extend blacklist/whitelist syntax (#2025)

Each entry in such a list can now also include a subcategory
'<category>:<subcategory>'
and it is possible to use '*' or an empty string as placeholder
'*:<subcategory>', ':<subcategory>', '<category>:*'

For example
  "blacklist": "imgur,*:tag,gfycat:user" or
  "blacklist": ["imgur", "*:tag", "gfycat:user"]
will filter all 'imgur' extractors, all extractors  with a 'tag'
subcategory (e.g. https://danbooru.donmai.us/posts?tags=bonocho),
and all 'gfycat' user extractors.
This commit is contained in:
Mike Fährmann
2021-11-23 19:23:02 +01:00
parent 11a3d96d13
commit 010d65dcec
4 changed files with 191 additions and 46 deletions

View File

@@ -357,6 +357,58 @@ class TestOther(unittest.TestCase):
with self.assertRaises(exception.StopExtraction):
expr()
def test_extractor_filter(self):
# empty
func = util.build_extractor_filter("")
self.assertEqual(func(TestExtractor) , True)
self.assertEqual(func(TestExtractorParent), True)
self.assertEqual(func(TestExtractorAlt) , True)
# category
func = util.build_extractor_filter("test_category")
self.assertEqual(func(TestExtractor) , False)
self.assertEqual(func(TestExtractorParent), False)
self.assertEqual(func(TestExtractorAlt) , True)
# subcategory
func = util.build_extractor_filter("*:test_subcategory")
self.assertEqual(func(TestExtractor) , False)
self.assertEqual(func(TestExtractorParent), True)
self.assertEqual(func(TestExtractorAlt) , False)
# basecategory
func = util.build_extractor_filter("test_basecategory")
self.assertEqual(func(TestExtractor) , False)
self.assertEqual(func(TestExtractorParent), False)
self.assertEqual(func(TestExtractorAlt) , False)
# category-subcategory pair
func = util.build_extractor_filter("test_category:test_subcategory")
self.assertEqual(func(TestExtractor) , False)
self.assertEqual(func(TestExtractorParent), True)
self.assertEqual(func(TestExtractorAlt) , True)
# combination
func = util.build_extractor_filter(
["test_category", "*:test_subcategory"])
self.assertEqual(func(TestExtractor) , False)
self.assertEqual(func(TestExtractorParent), False)
self.assertEqual(func(TestExtractorAlt) , False)
# whitelist
func = util.build_extractor_filter(
"test_category:test_subcategory", negate=False)
self.assertEqual(func(TestExtractor) , True)
self.assertEqual(func(TestExtractorParent), False)
self.assertEqual(func(TestExtractorAlt) , False)
func = util.build_extractor_filter(
["test_category:test_subcategory", "*:test_subcategory_parent"],
negate=False)
self.assertEqual(func(TestExtractor) , True)
self.assertEqual(func(TestExtractorParent), True)
self.assertEqual(func(TestExtractorAlt) , False)
def test_generate_token(self):
tokens = set()
for _ in range(100):
@@ -469,5 +521,21 @@ class TestOther(unittest.TestCase):
self.assertIs(obj["key"], obj)
class TestExtractor():
category = "test_category"
subcategory = "test_subcategory"
basecategory = "test_basecategory"
class TestExtractorParent(TestExtractor):
category = "test_category"
subcategory = "test_subcategory_parent"
class TestExtractorAlt(TestExtractor):
category = "test_category_alt"
subcategory = "test_subcategory"
if __name__ == '__main__':
unittest.main()