rewrite text tests and improve functions

- test more edge cases - consistently return an empty string for invalid arguments - remove the ungreedy-flag in 'remove_html()'
2018-04-14 22:09:42 +02:00
parent e3f2bd4087
commit 27eab4e467
2 changed files with 173 additions and 102 deletions
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -36,17 +36,20 @@ def clean_xml(xmldata, repl=""):
    return xmldata


-def remove_html(text):
+def remove_html(txt):
    """Remove html-tags from a string"""
-    return " ".join(re.sub("<[^>]+?>", " ", text).split())
+    try:
+        return " ".join(re.sub("<[^>]+>", " ", txt).split())
+    except TypeError:
+        return ""


 def filename_from_url(url):
    """Extract the last part of an url to use as a filename"""
    try:
        return urllib.parse.urlsplit(url).path.rpartition("/")[2]
-    except ValueError:
-        return url
+    except (TypeError, AttributeError):
+        return ""


 def nameext_from_url(url, data=None):
@@ -64,7 +67,7 @@ def clean_path_windows(path):
    try:
        return re.sub(r'[<>:"\\/|?*]', "_", path)
    except TypeError:
-        return path
+        return ""


 def clean_path_posix(path):
@@ -72,7 +75,7 @@ def clean_path_posix(path):
    try:
        return path.replace("/", "_")
    except AttributeError:
-        return path
+        return ""


 def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()):
@@ -112,7 +115,7 @@ def extract(txt, begin, end, pos=0):
        first = txt.index(begin, pos) + len(begin)
        last = txt.index(end, first)
        return txt[first:last], last+len(end)
-    except ValueError:
+    except (ValueError, TypeError, AttributeError):
        return None, pos


@@ -139,9 +142,12 @@ def extract_iter(txt, begin, end, pos=0):
 def parse_query(qs):
    """Parse a query string into key-value pairs"""
    result = {}
-    for key, value in urllib.parse.parse_qsl(qs):
-        if key not in result:
-            result[key] = value
+    try:
+        for key, value in urllib.parse.parse_qsl(qs):
+            if key not in result:
+                result[key] = value
+    except AttributeError:
+        pass
    return result