add 'repl' and 'sep' arguments to text.replace_html()

This commit is contained in:
Mike Fährmann
2019-07-17 14:48:24 +02:00
parent 8d1ae9b715
commit 1740086d8a

View File

@@ -36,12 +36,15 @@ def clean_xml(xmldata, repl=""):
return xmldata
def remove_html(txt):
def remove_html(txt, repl=" ", sep=" "):
"""Remove html-tags from a string"""
try:
return " ".join(re.sub("<[^>]+>", " ", txt).split())
txt = re.sub("<[^>]+>", repl, txt)
except TypeError:
return ""
if sep:
return sep.join(txt.split())
return txt.strip()
def split_html(txt, sep=None):