implement text.split_html()
This commit is contained in:
@@ -43,6 +43,17 @@ def remove_html(txt):
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def split_html(txt, sep=None):
|
||||||
|
"""Split input string by html-tags"""
|
||||||
|
try:
|
||||||
|
return [
|
||||||
|
x for x in re.split("<[^>]+>", txt)
|
||||||
|
if x and not x.isspace()
|
||||||
|
]
|
||||||
|
except TypeError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def filename_from_url(url):
|
def filename_from_url(url):
|
||||||
"""Extract the last part of an url to use as a filename"""
|
"""Extract the last part of an url to use as a filename"""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -64,6 +64,30 @@ class TestText(unittest.TestCase):
|
|||||||
for value in INVALID:
|
for value in INVALID:
|
||||||
self.assertEqual(f(value), "")
|
self.assertEqual(f(value), "")
|
||||||
|
|
||||||
|
def test_split_html(self, f=text.split_html):
|
||||||
|
result = ["Hello", "World."]
|
||||||
|
empty = []
|
||||||
|
|
||||||
|
# standard usage
|
||||||
|
self.assertEqual(f(""), empty)
|
||||||
|
self.assertEqual(f("Hello World."), ["Hello World."])
|
||||||
|
self.assertEqual(f(" Hello World. "), [" Hello World. "])
|
||||||
|
self.assertEqual(f("Hello<br/>World."), result)
|
||||||
|
self.assertEqual(
|
||||||
|
f("<div><b class='a'>Hello</b><i>World.</i></div>"), result)
|
||||||
|
|
||||||
|
# empty HTML
|
||||||
|
self.assertEqual(f("<div></div>"), empty)
|
||||||
|
self.assertEqual(f(" <div> </div> "), empty)
|
||||||
|
|
||||||
|
# malformed HTML
|
||||||
|
self.assertEqual(f("<div</div>"), empty)
|
||||||
|
self.assertEqual(f("<div<Hello World.</div>"), empty)
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID:
|
||||||
|
self.assertEqual(f(value), empty)
|
||||||
|
|
||||||
def test_filename_from_url(self, f=text.filename_from_url):
|
def test_filename_from_url(self, f=text.filename_from_url):
|
||||||
result = "filename.ext"
|
result = "filename.ext"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user