rewrite text tests and improve functions
- test more edge cases - consistently return an empty string for invalid arguments - remove the ungreedy-flag in 'remove_html()'
This commit is contained in:
@@ -36,17 +36,20 @@ def clean_xml(xmldata, repl=""):
|
|||||||
return xmldata
|
return xmldata
|
||||||
|
|
||||||
|
|
||||||
def remove_html(text):
|
def remove_html(txt):
|
||||||
"""Remove html-tags from a string"""
|
"""Remove html-tags from a string"""
|
||||||
return " ".join(re.sub("<[^>]+?>", " ", text).split())
|
try:
|
||||||
|
return " ".join(re.sub("<[^>]+>", " ", txt).split())
|
||||||
|
except TypeError:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def filename_from_url(url):
|
def filename_from_url(url):
|
||||||
"""Extract the last part of an url to use as a filename"""
|
"""Extract the last part of an url to use as a filename"""
|
||||||
try:
|
try:
|
||||||
return urllib.parse.urlsplit(url).path.rpartition("/")[2]
|
return urllib.parse.urlsplit(url).path.rpartition("/")[2]
|
||||||
except ValueError:
|
except (TypeError, AttributeError):
|
||||||
return url
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def nameext_from_url(url, data=None):
|
def nameext_from_url(url, data=None):
|
||||||
@@ -64,7 +67,7 @@ def clean_path_windows(path):
|
|||||||
try:
|
try:
|
||||||
return re.sub(r'[<>:"\\/|?*]', "_", path)
|
return re.sub(r'[<>:"\\/|?*]', "_", path)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
return path
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def clean_path_posix(path):
|
def clean_path_posix(path):
|
||||||
@@ -72,7 +75,7 @@ def clean_path_posix(path):
|
|||||||
try:
|
try:
|
||||||
return path.replace("/", "_")
|
return path.replace("/", "_")
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return path
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()):
|
def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()):
|
||||||
@@ -112,7 +115,7 @@ def extract(txt, begin, end, pos=0):
|
|||||||
first = txt.index(begin, pos) + len(begin)
|
first = txt.index(begin, pos) + len(begin)
|
||||||
last = txt.index(end, first)
|
last = txt.index(end, first)
|
||||||
return txt[first:last], last+len(end)
|
return txt[first:last], last+len(end)
|
||||||
except ValueError:
|
except (ValueError, TypeError, AttributeError):
|
||||||
return None, pos
|
return None, pos
|
||||||
|
|
||||||
|
|
||||||
@@ -139,9 +142,12 @@ def extract_iter(txt, begin, end, pos=0):
|
|||||||
def parse_query(qs):
|
def parse_query(qs):
|
||||||
"""Parse a query string into key-value pairs"""
|
"""Parse a query string into key-value pairs"""
|
||||||
result = {}
|
result = {}
|
||||||
for key, value in urllib.parse.parse_qsl(qs):
|
try:
|
||||||
if key not in result:
|
for key, value in urllib.parse.parse_qsl(qs):
|
||||||
result[key] = value
|
if key not in result:
|
||||||
|
result[key] = value
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,9 @@ import sys
|
|||||||
from gallery_dl import text
|
from gallery_dl import text
|
||||||
|
|
||||||
|
|
||||||
|
INVALID = ((), [], {}, None, 1, 2.3)
|
||||||
|
|
||||||
|
|
||||||
class TestText(unittest.TestCase):
|
class TestText(unittest.TestCase):
|
||||||
|
|
||||||
def test_clean_xml(self, f=text.clean_xml):
|
def test_clean_xml(self, f=text.clean_xml):
|
||||||
@@ -35,59 +38,85 @@ class TestText(unittest.TestCase):
|
|||||||
self.assertEqual(f(value), "\t\n\r")
|
self.assertEqual(f(value), "\t\n\r")
|
||||||
|
|
||||||
# 'invalid' arguments
|
# 'invalid' arguments
|
||||||
for value in ((), [], {}, None, 1, 2.3):
|
for value in INVALID:
|
||||||
self.assertEqual(f(value), "")
|
self.assertEqual(f(value), "")
|
||||||
|
|
||||||
def test_remove_html(self):
|
def test_remove_html(self, f=text.remove_html):
|
||||||
cases = (
|
|
||||||
"Hello World.",
|
|
||||||
" Hello World. ",
|
|
||||||
"Hello<br/>World.",
|
|
||||||
"<div><span class='a'>Hello</span><strong>World.</strong></div>"
|
|
||||||
)
|
|
||||||
result = "Hello World."
|
result = "Hello World."
|
||||||
for case in cases:
|
|
||||||
self.assertEqual(text.remove_html(case), result)
|
|
||||||
|
|
||||||
def test_filename_from_url(self):
|
# standard usage
|
||||||
cases = (
|
self.assertEqual(f(""), "")
|
||||||
"http://example.org/v2/filename.ext",
|
self.assertEqual(f("Hello World."), result)
|
||||||
"http://example.org/v2/filename.ext?param=value#fragment",
|
self.assertEqual(f(" Hello World. "), result)
|
||||||
"example.org/filename.ext",
|
self.assertEqual(f("Hello<br/>World."), result)
|
||||||
"/filename.ext",
|
self.assertEqual(
|
||||||
"filename.ext",
|
f("<div><b class='a'>Hello</b><i>World.</i></div>"), result)
|
||||||
)
|
|
||||||
|
# empty HTML
|
||||||
|
self.assertEqual(f("<div></div>"), "")
|
||||||
|
self.assertEqual(f(" <div> </div> "), "")
|
||||||
|
|
||||||
|
# malformed HTML
|
||||||
|
self.assertEqual(f("<div</div>"), "")
|
||||||
|
self.assertEqual(f("<div<Hello World.</div>"), "")
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID:
|
||||||
|
self.assertEqual(f(value), "")
|
||||||
|
|
||||||
|
def test_filename_from_url(self, f=text.filename_from_url):
|
||||||
result = "filename.ext"
|
result = "filename.ext"
|
||||||
for case in cases:
|
|
||||||
self.assertEqual(text.filename_from_url(case), result)
|
|
||||||
|
|
||||||
def test_nameext_from_url(self):
|
# standard usage
|
||||||
cases = (
|
self.assertEqual(f(""), "")
|
||||||
"http://example.org/v2/filename.ext",
|
self.assertEqual(f("filename.ext"), result)
|
||||||
"http://example.org/v2/filename.ext?param=value#fragment",
|
self.assertEqual(f("/filename.ext"), result)
|
||||||
"example.org/filename.ext",
|
self.assertEqual(f("example.org/filename.ext"), result)
|
||||||
"/filename.ext",
|
self.assertEqual(f("http://example.org/v2/filename.ext"), result)
|
||||||
"filename.ext",
|
self.assertEqual(
|
||||||
)
|
f("http://example.org/v2/filename.ext?param=value#frag"), result)
|
||||||
result = {
|
|
||||||
"filename" : "filename.ext",
|
|
||||||
"name" : "filename",
|
|
||||||
"extension": "ext",
|
|
||||||
}
|
|
||||||
for case in cases:
|
|
||||||
self.assertEqual(text.nameext_from_url(case), result)
|
|
||||||
|
|
||||||
def test_clean_path(self):
|
# invalid arguments
|
||||||
cases = {
|
for value in INVALID:
|
||||||
"Hello World." : ("Hello World.", "Hello World."),
|
self.assertEqual(f(value), "")
|
||||||
"Hello/World/.": ("Hello_World_.", "Hello_World_."),
|
|
||||||
r'<Hello>:|"World\*?': (
|
def test_nameext_from_url(self, f=text.nameext_from_url):
|
||||||
'_Hello____World___', r'<Hello>:|"World\*?'
|
empty = {"filename": "", "name": "", "extension": ""}
|
||||||
),
|
result = {"filename": "filename.ext",
|
||||||
}
|
"name": "filename", "extension": "ext"}
|
||||||
for case, result in cases.items():
|
|
||||||
self.assertEqual(text.clean_path_windows(case), result[0])
|
# standard usage
|
||||||
self.assertEqual(text.clean_path_posix(case), result[1])
|
self.assertEqual(f(""), empty)
|
||||||
|
self.assertEqual(f("filename.ext"), result)
|
||||||
|
self.assertEqual(f("/filename.ext"), result)
|
||||||
|
self.assertEqual(f("example.org/filename.ext"), result)
|
||||||
|
self.assertEqual(f("http://example.org/v2/filename.ext"), result)
|
||||||
|
self.assertEqual(
|
||||||
|
f("http://example.org/v2/filename.ext?param=value#frag"), result)
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID:
|
||||||
|
self.assertEqual(f(value), empty)
|
||||||
|
|
||||||
|
def test_clean_path_windows(self, f=text.clean_path_windows):
|
||||||
|
self.assertEqual(f(""), "")
|
||||||
|
self.assertEqual(f("foo"), "foo")
|
||||||
|
self.assertEqual(f("foo/bar"), "foo_bar")
|
||||||
|
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar")
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID:
|
||||||
|
self.assertEqual(f(value), "")
|
||||||
|
|
||||||
|
def test_clean_path_posix(self, f=text.clean_path_posix):
|
||||||
|
self.assertEqual(f(""), "")
|
||||||
|
self.assertEqual(f("foo"), "foo")
|
||||||
|
self.assertEqual(f("foo/bar"), "foo_bar")
|
||||||
|
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar")
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID:
|
||||||
|
self.assertEqual(f(value), "")
|
||||||
|
|
||||||
def test_shorten_path(self):
|
def test_shorten_path(self):
|
||||||
cases = {
|
cases = {
|
||||||
@@ -115,68 +144,104 @@ class TestText(unittest.TestCase):
|
|||||||
self.assertEqual(fname, result)
|
self.assertEqual(fname, result)
|
||||||
self.assertTrue(len(fname.encode(enc)) <= 255)
|
self.assertTrue(len(fname.encode(enc)) <= 255)
|
||||||
|
|
||||||
def test_extract(self):
|
def test_extract(self, f=text.extract):
|
||||||
cases = {
|
txt = "<a><b>"
|
||||||
("<a><b>", "<", ">") : ("a", 3),
|
self.assertEqual(f(txt, "<", ">"), ("a", 3))
|
||||||
("<a><b>", "X", ">") : (None, 0),
|
self.assertEqual(f(txt, "X", ">"), (None, 0))
|
||||||
("<a><b>", "<", "X") : (None, 0),
|
self.assertEqual(f(txt, "<", "X"), (None, 0))
|
||||||
("<a><b>", "<", ">", 3): ("b", 6),
|
|
||||||
("<a><b>", "X", ">", 3): (None, 3),
|
|
||||||
("<a><b>", "<", "X", 3): (None, 3),
|
|
||||||
}
|
|
||||||
for case, result in cases.items():
|
|
||||||
self.assertEqual(text.extract(*case), result)
|
|
||||||
|
|
||||||
def test_extract_all(self):
|
# 'pos' argument
|
||||||
|
for i in range(1, 4):
|
||||||
|
self.assertEqual(f(txt, "<", ">", i), ("b", 6))
|
||||||
|
for i in range(4, 10):
|
||||||
|
self.assertEqual(f(txt, "<", ">", i), (None, i))
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID:
|
||||||
|
self.assertEqual(f(value , "<" , ">") , (None, 0))
|
||||||
|
self.assertEqual(f(txt, value, ">") , (None, 0))
|
||||||
|
self.assertEqual(f(txt, "<" , value), (None, 0))
|
||||||
|
|
||||||
|
def test_extract_all(self, f=text.extract_all):
|
||||||
txt = "[c][b][a]: xyz! [d][e"
|
txt = "[c][b][a]: xyz! [d][e"
|
||||||
result = ({
|
|
||||||
"A": "a",
|
|
||||||
"B": "b",
|
|
||||||
"X": "xyz",
|
|
||||||
"E": "xtra",
|
|
||||||
}, 15)
|
|
||||||
self.assertEqual(text.extract_all(txt, (
|
|
||||||
(None, "[", "]"),
|
|
||||||
("B" , "[", "]"),
|
|
||||||
("A" , "[", "]"),
|
|
||||||
("X" , ": ", "!"),
|
|
||||||
), values={"E": "xtra"}), result)
|
|
||||||
|
|
||||||
def test_extract_iter(self):
|
self.assertEqual(
|
||||||
|
f(txt, ()), ({}, 0))
|
||||||
|
self.assertEqual(
|
||||||
|
f(txt, (("C", "[", "]"), ("B", "[", "]"), ("A", "[", "]"))),
|
||||||
|
({"A": "a", "B": "b", "C": "c"}, 9),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 'None' as field name
|
||||||
|
self.assertEqual(
|
||||||
|
f(txt, ((None, "[", "]"), (None, "[", "]"), ("A", "[", "]"))),
|
||||||
|
({"A": "a"}, 9),
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
f(txt, ((None, "[", "]"), (None, "[", "]"), (None, "[", "]"))),
|
||||||
|
({}, 9),
|
||||||
|
)
|
||||||
|
|
||||||
|
# failed matches
|
||||||
|
self.assertEqual(
|
||||||
|
f(txt, (("C", "[", "]"), ("X", "X", "X"), ("B", "[", "]"))),
|
||||||
|
({"B": "b", "C": "c", "X": None}, 6),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 'pos' argument
|
||||||
|
self.assertEqual(
|
||||||
|
f(txt, (("B", "[", "]"), ("A", "[", "]")), pos=1),
|
||||||
|
({"A": "a", "B": "b"}, 9),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 'values' argument
|
||||||
|
self.assertEqual(
|
||||||
|
f(txt, (("C", "[", "]"),), values={"A": "a", "B": "b"}),
|
||||||
|
({"A": "a", "B": "b", "C": "c"}, 3),
|
||||||
|
)
|
||||||
|
|
||||||
|
vdict = {}
|
||||||
|
rdict, pos = f(txt, (), values=vdict)
|
||||||
|
self.assertIs(vdict, rdict)
|
||||||
|
|
||||||
|
def test_extract_iter(self, f=text.extract_iter):
|
||||||
txt = "[c][b][a]: xyz! [d][e"
|
txt = "[c][b][a]: xyz! [d][e"
|
||||||
result = ["c", "b", "a", "d"]
|
|
||||||
self.assertEqual(list(text.extract_iter(txt, "[", "]")), result)
|
|
||||||
|
|
||||||
def test_parse_query(self):
|
def g(*args):
|
||||||
# standard stuff
|
return list(f(*args))
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
text.parse_query(""), {})
|
g("", "[", "]"), [])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
text.parse_query("foo=1"), {"foo": "1"})
|
g("[a]", "[", "]"), ["a"])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
text.parse_query("foo=1&bar=2"), {"foo": "1", "bar": "2"})
|
g(txt, "[", "]"), ["c", "b", "a", "d"])
|
||||||
|
self.assertEqual(
|
||||||
|
g(txt, "X", "X"), [])
|
||||||
|
self.assertEqual(
|
||||||
|
g(txt, "[", "]", 6), ["a", "d"])
|
||||||
|
|
||||||
|
def test_parse_query(self, f=text.parse_query):
|
||||||
|
# standard usage
|
||||||
|
self.assertEqual(f(""), {})
|
||||||
|
self.assertEqual(f("foo=1"), {"foo": "1"})
|
||||||
|
self.assertEqual(f("foo=1&bar=2"), {"foo": "1", "bar": "2"})
|
||||||
|
|
||||||
# missing value
|
# missing value
|
||||||
self.assertEqual(
|
self.assertEqual(f("bar"), {})
|
||||||
text.parse_query("bar"), {})
|
self.assertEqual(f("foo=1&bar"), {"foo": "1"})
|
||||||
self.assertEqual(
|
self.assertEqual(f("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"})
|
||||||
text.parse_query("foo=1&bar"), {"foo": "1"})
|
|
||||||
self.assertEqual(
|
|
||||||
text.parse_query("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"})
|
|
||||||
|
|
||||||
# keys with identical names
|
# keys with identical names
|
||||||
|
self.assertEqual(f("foo=1&foo=2"), {"foo": "1"})
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
text.parse_query("foo=1&foo=2"), {"foo": "1"})
|
f("foo=1&bar=2&foo=3&bar=4"),
|
||||||
self.assertEqual(
|
|
||||||
text.parse_query("foo=1&bar=2&foo=3&bar=4"),
|
|
||||||
{"foo": "1", "bar": "2"},
|
{"foo": "1", "bar": "2"},
|
||||||
)
|
)
|
||||||
|
|
||||||
# non-string arguments
|
# invalid arguments
|
||||||
self.assertEqual(text.parse_query(()), {})
|
for value in INVALID:
|
||||||
self.assertEqual(text.parse_query([]), {})
|
self.assertEqual(f(value), {})
|
||||||
self.assertEqual(text.parse_query({}), {})
|
|
||||||
self.assertEqual(text.parse_query(None), {})
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
Reference in New Issue
Block a user