unescape items in text.split_html()

This commit is contained in:
Mike Fährmann
2021-03-29 02:12:29 +02:00
parent 36291176bc
commit 387fe415d5
4 changed files with 13 additions and 9 deletions

View File

@@ -27,11 +27,12 @@ def remove_html(txt, repl=" ", sep=" "):
return txt.strip()
def split_html(txt, sep=None):
"""Split input string by html-tags"""
def split_html(txt):
"""Split input string by HTML tags"""
try:
return [
x.strip() for x in HTML_RE.split(txt)
unescape(x).strip()
for x in HTML_RE.split(txt)
if x and not x.isspace()
]
except TypeError: