implement text.split_html()

This commit is contained in:
Mike Fährmann
2018-05-27 15:00:41 +02:00
parent 53f36176fd
commit ae9a37a528
2 changed files with 35 additions and 0 deletions

View File

@@ -43,6 +43,17 @@ def remove_html(txt):
return ""
def split_html(txt, sep=None):
"""Split input string by html-tags"""
try:
return [
x for x in re.split("<[^>]+>", txt)
if x and not x.isspace()
]
except TypeError:
return []
def filename_from_url(url):
"""Extract the last part of an url to use as a filename"""
try: