From f6c5edb76be125da0bd837a3f79affed8481b015 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Fri, 13 Mar 2020 23:30:16 +0100
Subject: [PATCH] pre-compile regex pattern for remove_html() and split_html()

---
 gallery_dl/text.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 72dad5b1..a3f4e0ac 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -15,6 +15,8 @@ import datetime
 import urllib.parse
 
 
+HTML_RE = re.compile("<[^>]+>")
+
 INVALID_XML_CHARS = (
     "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07",
     "\x08", "\x0b", "\x0c", "\x0e", "\x0f", "\x10", "\x11", "\x12",
@@ -39,7 +41,7 @@ def clean_xml(xmldata, repl=""):
 def remove_html(txt, repl=" ", sep=" "):
     """Remove html-tags from a string"""
     try:
-        txt = re.sub("<[^>]+>", repl, txt)
+        txt = HTML_RE.sub(repl, txt)
     except TypeError:
         return ""
     if sep:
@@ -51,7 +53,7 @@ def split_html(txt, sep=None):
     """Split input string by html-tags"""
     try:
         return [
-            x.strip() for x in re.split("<[^>]+>", txt)
+            x.strip() for x in HTML_RE.split(txt)
             if x and not x.isspace()
         ]
     except TypeError: