[test] add unit tests for extractor module functions

2018-03-24 17:24:34 +01:00
parent a993d0ea90
commit dd314279fb
5 changed files with 143 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,7 @@
 # Changelog
 ## Unreleased
 ## 1.3.2 - 2018-03-23
 - Added extractors for `artstation` albums, challenges and search results
 - Improved URL and metadata extraction for `hitomi`and `nhentai`
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -107,7 +107,7 @@ def find(url):
 def add(klass):
    """Add 'klass' to the list of available extractors"""
-    for pattern in klass:
+    for pattern in klass.pattern:
        _cache.append((re.compile(pattern), klass))
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
-__version__ = "1.3.2"
+__version__ = "1.3.3-dev"
--- a/scripts/run_tests.sh
+++ b/scripts/run_tests.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-TESTS_CORE=(config cookies oauth text util)
+TESTS_CORE=(config cookies extractor oauth text util)
 TESTS_RESULTS=(results)
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -0,0 +1,138 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright 2018 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 import sys
 import unittest
 import gallery_dl.extractor as extractor
 from gallery_dl.extractor.common import Extractor, Message
 from gallery_dl.extractor.directlink import DirectlinkExtractor as DLExtractor
 class FakeExtractor(Extractor):
    category = "fake"
    subcategory = "test"
    pattern = ["fake:"]
    def __init__(self, match=None):
        Extractor.__init__(self)
    def items(self):
        yield Message.Version, 1
        yield Message.Url, "text:foobar", {}
 class TestExtractor(unittest.TestCase):
    def setUp(self):
        extractor._cache.clear()
        extractor._module_iter = iter(extractor.modules)
    def test_find(self):
        valid_uris = (
            "https://example.org/file.jpg",
            "tumblr:foobar",
            "oauth:flickr",
            "test:pixiv:",
            "recursive:https://example.org/document.html",
        )
        for uri in valid_uris:
            result = extractor.find(uri)
            self.assertIsInstance(result, Extractor, uri)
        for not_found in ("", "/tmp/file.ext"):
            self.assertIsNone(extractor.find(not_found))
        for invalid in (None, [], {}, 123, b"test:"):
            with self.assertRaises(TypeError):
                extractor.find(invalid)
    def test_add(self):
        uri = "fake:foobar"
        self.assertIsNone(extractor.find(uri))
        extractor.add(FakeExtractor)
        self.assertIsInstance(extractor.find(uri), FakeExtractor)
    def test_add_module(self):
        uri = "fake:foobar"
        self.assertIsNone(extractor.find(uri))
        tuples = extractor.add_module(sys.modules[__name__])
        self.assertEqual(len(tuples), 1)
        self.assertEqual(tuples[0][0].pattern, FakeExtractor.pattern[0])
        self.assertEqual(tuples[0][1], FakeExtractor)
        self.assertIsInstance(extractor.find(uri), FakeExtractor)
    def test_blacklist(self):
        link_uri = "https://example.org/file.jpg"
        test_uri = "test:"
        fake_uri = "fake:"
        self.assertIsInstance(extractor.find(link_uri), DLExtractor)
        self.assertIsInstance(extractor.find(test_uri), Extractor)
        self.assertIsNone(extractor.find(fake_uri))
        with extractor.blacklist(["directlink"]):
            self.assertIsNone(extractor.find(link_uri))
            self.assertIsInstance(extractor.find(test_uri), Extractor)
            self.assertIsNone(extractor.find(fake_uri))
        with extractor.blacklist([], [DLExtractor, FakeExtractor]):
            self.assertIsNone(extractor.find(link_uri))
            self.assertIsInstance(extractor.find(test_uri), Extractor)
            self.assertIsNone(extractor.find(fake_uri))
        with extractor.blacklist(["test"], [DLExtractor]):
            self.assertIsNone(extractor.find(link_uri))
            self.assertIsNone(extractor.find(test_uri))
            self.assertIsNone(extractor.find(fake_uri))
    def test_unique_pattern_matches(self):
        test_urls = []
        # collect testcase URLs
        for extr in extractor.extractors():
            if not hasattr(extr, "test"):
                continue
            for testcase in extr.test:
                test_urls.append((testcase[0], extr))
        # iterate over all testcase URLs
        for url, extr1 in test_urls:
            matches = []
            # ... and apply all regex patterns to each one
            for pattern, extr2 in extractor._cache:
                # skip DirectlinkExtractor pattern if it isn't tested
                if extr1 != DLExtractor and extr2 == DLExtractor:
                    continue
                match = pattern.match(url)
                if match:
                    self.assertEqual(extr1, extr2)
                    matches.append(match)
            # fail if more or less than 1 match happened
            if len(matches) > 1:
                msg = "'{}' gets matched by more than one pattern:".format(url)
                for match in matches:
                    msg += "\n- "
                    msg += match.re.pattern
                self.fail(msg)
            if len(matches) < 1:
                msg = "'{}' isn't matched by any pattern".format(url)
                self.fail(msg)
 if __name__ == "__main__":
    unittest.main()