diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 23c6d0f2..5cd0be75 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -103,7 +103,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
"gallery_id": text.parse_int(self.gallery_id),
"title": text.unescape(title or ""),
"user": ", ".join(users),
- "fields": [f for f in text.split_html(fields) if f != ", "],
+ "fields": [f for f in text.split_html(fields) if f != ","],
"date": text.parse_int(date),
"views": text.parse_int(stats[0]),
"votes": text.parse_int(stats[1]),
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 1b78f601..d6df528c 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2018 Mike Fährmann
+# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -88,7 +88,7 @@ class SankakuExtractor(SharedConfigExtractor):
"id": text.parse_int(post_id),
"md5": file_url.rpartition("/")[2].partition(".")[0],
"tags": text.unescape(tags),
- "vote_average": float(vavg or 0),
+ "vote_average": text.parse_float(vavg),
"vote_count": text.parse_int(vcnt),
"created_at": created,
"rating": (rating or "?")[0].lower(),
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index af906731..65e988ae 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2018 Mike Fährmann
+# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Collection of functions that work in strings/text"""
+"""Collection of functions that work on strings/text"""
import re
import html
@@ -47,7 +47,7 @@ def split_html(txt, sep=None):
"""Split input string by html-tags"""
try:
return [
- x for x in re.split("<[^>]+>", txt)
+ x.strip() for x in re.split("<[^>]+>", txt)
if x and not x.isspace()
]
except TypeError:
@@ -165,6 +165,16 @@ def parse_int(value, default=0):
return default
+def parse_float(value, default=0.0):
+ """Convert 'value' to float"""
+ if not value:
+ return default
+ try:
+ return float(value)
+ except (ValueError, TypeError):
+ return default
+
+
def parse_query(qs):
"""Parse a query string into key-value pairs"""
result = {}
@@ -182,12 +192,11 @@ if os.name == "nt":
else:
clean_path = clean_path_posix
-urljoin = urllib.parse.urljoin
-unquote = urllib.parse.unquote
-escape = html.escape
-try:
- unescape = html.unescape
-except AttributeError:
- import html.parser
- unescape = html.parser.HTMLParser().unescape
+urljoin = urllib.parse.urljoin
+
+quote = urllib.parse.quote
+unquote = urllib.parse.unquote
+
+escape = html.escape
+unescape = html.unescape
diff --git a/test/test_text.py b/test/test_text.py
index 697d83b0..314578d0 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -71,8 +71,9 @@ class TestText(unittest.TestCase):
# standard usage
self.assertEqual(f(""), empty)
self.assertEqual(f("Hello World."), ["Hello World."])
- self.assertEqual(f(" Hello World. "), [" Hello World. "])
+ self.assertEqual(f(" Hello World. "), ["Hello World."])
self.assertEqual(f("Hello
World."), result)
+ self.assertEqual(f(" Hello
World. "), result)
self.assertEqual(
f("