[util] move 're' functions to text.py
This commit is contained in:
@@ -8,14 +8,29 @@
|
|||||||
|
|
||||||
"""Collection of functions that work on strings/text"""
|
"""Collection of functions that work on strings/text"""
|
||||||
|
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import html
|
import html
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import re as re_module
|
||||||
|
|
||||||
HTML_RE = re.compile("<[^>]+>")
|
try:
|
||||||
|
re_compile = re_module._compiler.compile
|
||||||
|
except AttributeError:
|
||||||
|
re_compile = re_module.sre_compile.compile
|
||||||
|
|
||||||
|
HTML_RE = re_compile(r"<[^>]+>")
|
||||||
|
PATTERN_CACHE = {}
|
||||||
|
|
||||||
|
|
||||||
|
def re(pattern):
|
||||||
|
"""Compile a regular expression pattern"""
|
||||||
|
try:
|
||||||
|
return PATTERN_CACHE[pattern]
|
||||||
|
except KeyError:
|
||||||
|
p = PATTERN_CACHE[pattern] = re_compile(pattern)
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
def remove_html(txt, repl=" ", sep=" "):
|
def remove_html(txt, repl=" ", sep=" "):
|
||||||
@@ -47,8 +62,8 @@ def slugify(value):
|
|||||||
Adapted from:
|
Adapted from:
|
||||||
https://github.com/django/django/blob/master/django/utils/text.py
|
https://github.com/django/django/blob/master/django/utils/text.py
|
||||||
"""
|
"""
|
||||||
value = re.sub(r"[^\w\s-]", "", str(value).lower())
|
value = re(r"[^\w\s-]").sub("", str(value).lower())
|
||||||
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
return re(r"[-\s]+").sub("-", value).strip("-_")
|
||||||
|
|
||||||
|
|
||||||
def ensure_http_scheme(url, scheme="https://"):
|
def ensure_http_scheme(url, scheme="https://"):
|
||||||
@@ -199,7 +214,7 @@ def extract_from(txt, pos=None, default=""):
|
|||||||
def parse_unicode_escapes(txt):
|
def parse_unicode_escapes(txt):
|
||||||
"""Convert JSON Unicode escapes in 'txt' into actual characters"""
|
"""Convert JSON Unicode escapes in 'txt' into actual characters"""
|
||||||
if "\\u" in txt:
|
if "\\u" in txt:
|
||||||
return re.sub(r"\\u([0-9a-fA-F]{4})", _hex_to_char, txt)
|
return re(r"\\u([0-9a-fA-F]{4})").sub(_hex_to_char, txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
"""Utility functions and classes"""
|
"""Utility functions and classes"""
|
||||||
|
|
||||||
import re as re_module
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
@@ -27,24 +26,6 @@ from http.cookiejar import Cookie
|
|||||||
from email.utils import mktime_tz, parsedate_tz
|
from email.utils import mktime_tz, parsedate_tz
|
||||||
from . import text, version, exception
|
from . import text, version, exception
|
||||||
|
|
||||||
try:
|
|
||||||
re_compile = re_module._compiler.compile
|
|
||||||
except AttributeError:
|
|
||||||
re_compile = re_module.sre_compile.compile
|
|
||||||
|
|
||||||
CACHE_PATTERN = {}
|
|
||||||
|
|
||||||
|
|
||||||
def re(pattern):
|
|
||||||
"""Compile a regular expression pattern"""
|
|
||||||
try:
|
|
||||||
return CACHE_PATTERN[pattern]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
p = CACHE_PATTERN[pattern] = re_compile(pattern)
|
|
||||||
return p
|
|
||||||
|
|
||||||
|
|
||||||
def bencode(num, alphabet="0123456789"):
|
def bencode(num, alphabet="0123456789"):
|
||||||
"""Encode an integer into a base-N encoded string"""
|
"""Encode an integer into a base-N encoded string"""
|
||||||
@@ -752,6 +733,9 @@ class CustomNone():
|
|||||||
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
|
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
|
||||||
# _ch_ver = _ff_ver - 2
|
# _ch_ver = _ff_ver - 2
|
||||||
|
|
||||||
|
re = text.re
|
||||||
|
re_compile = text.re_compile
|
||||||
|
|
||||||
NONE = CustomNone()
|
NONE = CustomNone()
|
||||||
EPOCH = datetime.datetime(1970, 1, 1)
|
EPOCH = datetime.datetime(1970, 1, 1)
|
||||||
SECOND = datetime.timedelta(0, 1)
|
SECOND = datetime.timedelta(0, 1)
|
||||||
@@ -784,7 +768,7 @@ GLOBALS = {
|
|||||||
"hash_sha1": sha1,
|
"hash_sha1": sha1,
|
||||||
"hash_md5" : md5,
|
"hash_md5" : md5,
|
||||||
"std" : ModuleProxy(),
|
"std" : ModuleProxy(),
|
||||||
"re" : re_module,
|
"re" : text.re_module,
|
||||||
"exts_image" : EXTS_IMAGE,
|
"exts_image" : EXTS_IMAGE,
|
||||||
"exts_video" : EXTS_VIDEO,
|
"exts_video" : EXTS_VIDEO,
|
||||||
"exts_archive": EXTS_ARCHIVE,
|
"exts_archive": EXTS_ARCHIVE,
|
||||||
|
|||||||
@@ -23,6 +23,20 @@ INVALID_ALT = ((), [], {}, None, "")
|
|||||||
|
|
||||||
class TestText(unittest.TestCase):
|
class TestText(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_re(self):
|
||||||
|
p1 = text.re_compile("foo")
|
||||||
|
p2 = text.re("foo")
|
||||||
|
p3 = text.re("foo")
|
||||||
|
|
||||||
|
Pattern = text.re_module.Pattern
|
||||||
|
self.assertIsInstance(p1, Pattern)
|
||||||
|
self.assertIsInstance(p2, Pattern)
|
||||||
|
self.assertIsInstance(p3, Pattern)
|
||||||
|
|
||||||
|
self.assertEqual(p1, p2)
|
||||||
|
self.assertIsNot(p1, p2)
|
||||||
|
self.assertIs(p2, p3)
|
||||||
|
|
||||||
def test_remove_html(self, f=text.remove_html):
|
def test_remove_html(self, f=text.remove_html):
|
||||||
result = "Hello World."
|
result = "Hello World."
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ import unittest
|
|||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
@@ -1042,21 +1041,6 @@ value = 123
|
|||||||
self.assertEqual(response.links.get("next"), None)
|
self.assertEqual(response.links.get("next"), None)
|
||||||
self.assertEqual(response.close(), None)
|
self.assertEqual(response.close(), None)
|
||||||
|
|
||||||
def test_re(self):
|
|
||||||
Pattern = type(re.compile(""))
|
|
||||||
|
|
||||||
p1 = util.re_compile("foo")
|
|
||||||
p2 = util.re("foo")
|
|
||||||
p3 = util.re("foo")
|
|
||||||
|
|
||||||
self.assertIsInstance(p1, Pattern)
|
|
||||||
self.assertIsInstance(p2, Pattern)
|
|
||||||
self.assertIsInstance(p3, Pattern)
|
|
||||||
|
|
||||||
self.assertIsNot(p1, p2)
|
|
||||||
self.assertIs(p2, p3)
|
|
||||||
self.assertEqual(p1, p2)
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractor():
|
class TestExtractor():
|
||||||
category = "test_category"
|
category = "test_category"
|
||||||
|
|||||||
Reference in New Issue
Block a user