diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index e439c2b8..9ce7ef1f 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -36,17 +36,20 @@ def clean_xml(xmldata, repl=""):
return xmldata
-def remove_html(text):
+def remove_html(txt):
"""Remove html-tags from a string"""
- return " ".join(re.sub("<[^>]+?>", " ", text).split())
+ try:
+ return " ".join(re.sub("<[^>]+>", " ", txt).split())
+ except TypeError:
+ return ""
def filename_from_url(url):
"""Extract the last part of an url to use as a filename"""
try:
return urllib.parse.urlsplit(url).path.rpartition("/")[2]
- except ValueError:
- return url
+ except (TypeError, AttributeError):
+ return ""
def nameext_from_url(url, data=None):
@@ -64,7 +67,7 @@ def clean_path_windows(path):
try:
return re.sub(r'[<>:"\\/|?*]', "_", path)
except TypeError:
- return path
+ return ""
def clean_path_posix(path):
@@ -72,7 +75,7 @@ def clean_path_posix(path):
try:
return path.replace("/", "_")
except AttributeError:
- return path
+ return ""
def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()):
@@ -112,7 +115,7 @@ def extract(txt, begin, end, pos=0):
first = txt.index(begin, pos) + len(begin)
last = txt.index(end, first)
return txt[first:last], last+len(end)
- except ValueError:
+ except (ValueError, TypeError, AttributeError):
return None, pos
@@ -139,9 +142,12 @@ def extract_iter(txt, begin, end, pos=0):
def parse_query(qs):
"""Parse a query string into key-value pairs"""
result = {}
- for key, value in urllib.parse.parse_qsl(qs):
- if key not in result:
- result[key] = value
+ try:
+ for key, value in urllib.parse.parse_qsl(qs):
+ if key not in result:
+ result[key] = value
+ except AttributeError:
+ pass
return result
diff --git a/test/test_text.py b/test/test_text.py
index c4b02969..4afa058d 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -13,6 +13,9 @@ import sys
from gallery_dl import text
+INVALID = ((), [], {}, None, 1, 2.3)
+
+
class TestText(unittest.TestCase):
def test_clean_xml(self, f=text.clean_xml):
@@ -35,59 +38,85 @@ class TestText(unittest.TestCase):
self.assertEqual(f(value), "\t\n\r")
# 'invalid' arguments
- for value in ((), [], {}, None, 1, 2.3):
+ for value in INVALID:
self.assertEqual(f(value), "")
- def test_remove_html(self):
- cases = (
- "Hello World.",
- " Hello World. ",
- "Hello
World.",
- "