add 'text.parse_float()' + cleanup in text.py
This commit is contained in:
@@ -103,7 +103,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
|||||||
"gallery_id": text.parse_int(self.gallery_id),
|
"gallery_id": text.parse_int(self.gallery_id),
|
||||||
"title": text.unescape(title or ""),
|
"title": text.unescape(title or ""),
|
||||||
"user": ", ".join(users),
|
"user": ", ".join(users),
|
||||||
"fields": [f for f in text.split_html(fields) if f != ", "],
|
"fields": [f for f in text.split_html(fields) if f != ","],
|
||||||
"date": text.parse_int(date),
|
"date": text.parse_int(date),
|
||||||
"views": text.parse_int(stats[0]),
|
"views": text.parse_int(stats[0]),
|
||||||
"votes": text.parse_int(stats[1]),
|
"votes": text.parse_int(stats[1]),
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2014-2018 Mike Fährmann
|
# Copyright 2014-2019 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -88,7 +88,7 @@ class SankakuExtractor(SharedConfigExtractor):
|
|||||||
"id": text.parse_int(post_id),
|
"id": text.parse_int(post_id),
|
||||||
"md5": file_url.rpartition("/")[2].partition(".")[0],
|
"md5": file_url.rpartition("/")[2].partition(".")[0],
|
||||||
"tags": text.unescape(tags),
|
"tags": text.unescape(tags),
|
||||||
"vote_average": float(vavg or 0),
|
"vote_average": text.parse_float(vavg),
|
||||||
"vote_count": text.parse_int(vcnt),
|
"vote_count": text.parse_int(vcnt),
|
||||||
"created_at": created,
|
"created_at": created,
|
||||||
"rating": (rating or "?")[0].lower(),
|
"rating": (rating or "?")[0].lower(),
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2015-2018 Mike Fährmann
|
# Copyright 2015-2019 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
"""Collection of functions that work in strings/text"""
|
"""Collection of functions that work on strings/text"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import html
|
import html
|
||||||
@@ -47,7 +47,7 @@ def split_html(txt, sep=None):
|
|||||||
"""Split input string by html-tags"""
|
"""Split input string by html-tags"""
|
||||||
try:
|
try:
|
||||||
return [
|
return [
|
||||||
x for x in re.split("<[^>]+>", txt)
|
x.strip() for x in re.split("<[^>]+>", txt)
|
||||||
if x and not x.isspace()
|
if x and not x.isspace()
|
||||||
]
|
]
|
||||||
except TypeError:
|
except TypeError:
|
||||||
@@ -165,6 +165,16 @@ def parse_int(value, default=0):
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def parse_float(value, default=0.0):
|
||||||
|
"""Convert 'value' to float"""
|
||||||
|
if not value:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return float(value)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
def parse_query(qs):
|
def parse_query(qs):
|
||||||
"""Parse a query string into key-value pairs"""
|
"""Parse a query string into key-value pairs"""
|
||||||
result = {}
|
result = {}
|
||||||
@@ -182,12 +192,11 @@ if os.name == "nt":
|
|||||||
else:
|
else:
|
||||||
clean_path = clean_path_posix
|
clean_path = clean_path_posix
|
||||||
|
|
||||||
urljoin = urllib.parse.urljoin
|
|
||||||
unquote = urllib.parse.unquote
|
|
||||||
escape = html.escape
|
|
||||||
|
|
||||||
try:
|
urljoin = urllib.parse.urljoin
|
||||||
unescape = html.unescape
|
|
||||||
except AttributeError:
|
quote = urllib.parse.quote
|
||||||
import html.parser
|
unquote = urllib.parse.unquote
|
||||||
unescape = html.parser.HTMLParser().unescape
|
|
||||||
|
escape = html.escape
|
||||||
|
unescape = html.unescape
|
||||||
|
|||||||
@@ -71,8 +71,9 @@ class TestText(unittest.TestCase):
|
|||||||
# standard usage
|
# standard usage
|
||||||
self.assertEqual(f(""), empty)
|
self.assertEqual(f(""), empty)
|
||||||
self.assertEqual(f("Hello World."), ["Hello World."])
|
self.assertEqual(f("Hello World."), ["Hello World."])
|
||||||
self.assertEqual(f(" Hello World. "), [" Hello World. "])
|
self.assertEqual(f(" Hello World. "), ["Hello World."])
|
||||||
self.assertEqual(f("Hello<br/>World."), result)
|
self.assertEqual(f("Hello<br/>World."), result)
|
||||||
|
self.assertEqual(f(" Hello <br/> World. "), result)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
f("<div><b class='a'>Hello</b><i>World.</i></div>"), result)
|
f("<div><b class='a'>Hello</b><i>World.</i></div>"), result)
|
||||||
|
|
||||||
@@ -260,6 +261,27 @@ class TestText(unittest.TestCase):
|
|||||||
self.assertEqual(f(value, default), default)
|
self.assertEqual(f(value, default), default)
|
||||||
self.assertEqual(f("zzz", default), default)
|
self.assertEqual(f("zzz", default), default)
|
||||||
|
|
||||||
|
def test_parse_float(self, f=text.parse_float):
|
||||||
|
self.assertEqual(f(0), 0.0)
|
||||||
|
self.assertEqual(f("0"), 0.0)
|
||||||
|
self.assertEqual(f(123), 123.0)
|
||||||
|
self.assertEqual(f("123"), 123.0)
|
||||||
|
self.assertEqual(f(123.456), 123.456)
|
||||||
|
self.assertEqual(f("123.456"), 123.456)
|
||||||
|
|
||||||
|
# invalid arguments
|
||||||
|
for value in INVALID_ALT:
|
||||||
|
self.assertEqual(f(value), 0.0)
|
||||||
|
self.assertEqual(f("zzz"), 0.0)
|
||||||
|
self.assertEqual(f([1, 2, 3]), 0.0)
|
||||||
|
self.assertEqual(f({1: 2, 3: 4}), 0.0)
|
||||||
|
|
||||||
|
# 'default' argument
|
||||||
|
default = "default"
|
||||||
|
for value in INVALID_ALT:
|
||||||
|
self.assertEqual(f(value, default), default)
|
||||||
|
self.assertEqual(f("zzz", default), default)
|
||||||
|
|
||||||
def test_parse_query(self, f=text.parse_query):
|
def test_parse_query(self, f=text.parse_query):
|
||||||
# standard usage
|
# standard usage
|
||||||
self.assertEqual(f(""), {})
|
self.assertEqual(f(""), {})
|
||||||
|
|||||||
Reference in New Issue
Block a user