[util] move 're' functions to text.py
This commit is contained in:
@@ -8,14 +8,29 @@
|
||||
|
||||
"""Collection of functions that work on strings/text"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import html
|
||||
import time
|
||||
import datetime
|
||||
import urllib.parse
|
||||
import re as re_module
|
||||
|
||||
HTML_RE = re.compile("<[^>]+>")
|
||||
try:
|
||||
re_compile = re_module._compiler.compile
|
||||
except AttributeError:
|
||||
re_compile = re_module.sre_compile.compile
|
||||
|
||||
HTML_RE = re_compile(r"<[^>]+>")
|
||||
PATTERN_CACHE = {}
|
||||
|
||||
|
||||
def re(pattern):
|
||||
"""Compile a regular expression pattern"""
|
||||
try:
|
||||
return PATTERN_CACHE[pattern]
|
||||
except KeyError:
|
||||
p = PATTERN_CACHE[pattern] = re_compile(pattern)
|
||||
return p
|
||||
|
||||
|
||||
def remove_html(txt, repl=" ", sep=" "):
|
||||
@@ -47,8 +62,8 @@ def slugify(value):
|
||||
Adapted from:
|
||||
https://github.com/django/django/blob/master/django/utils/text.py
|
||||
"""
|
||||
value = re.sub(r"[^\w\s-]", "", str(value).lower())
|
||||
return re.sub(r"[-\s]+", "-", value).strip("-_")
|
||||
value = re(r"[^\w\s-]").sub("", str(value).lower())
|
||||
return re(r"[-\s]+").sub("-", value).strip("-_")
|
||||
|
||||
|
||||
def ensure_http_scheme(url, scheme="https://"):
|
||||
@@ -199,7 +214,7 @@ def extract_from(txt, pos=None, default=""):
|
||||
def parse_unicode_escapes(txt):
|
||||
"""Convert JSON Unicode escapes in 'txt' into actual characters"""
|
||||
if "\\u" in txt:
|
||||
return re.sub(r"\\u([0-9a-fA-F]{4})", _hex_to_char, txt)
|
||||
return re(r"\\u([0-9a-fA-F]{4})").sub(_hex_to_char, txt)
|
||||
return txt
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
"""Utility functions and classes"""
|
||||
|
||||
import re as re_module
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
@@ -27,24 +26,6 @@ from http.cookiejar import Cookie
|
||||
from email.utils import mktime_tz, parsedate_tz
|
||||
from . import text, version, exception
|
||||
|
||||
try:
|
||||
re_compile = re_module._compiler.compile
|
||||
except AttributeError:
|
||||
re_compile = re_module.sre_compile.compile
|
||||
|
||||
CACHE_PATTERN = {}
|
||||
|
||||
|
||||
def re(pattern):
|
||||
"""Compile a regular expression pattern"""
|
||||
try:
|
||||
return CACHE_PATTERN[pattern]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
p = CACHE_PATTERN[pattern] = re_compile(pattern)
|
||||
return p
|
||||
|
||||
|
||||
def bencode(num, alphabet="0123456789"):
|
||||
"""Encode an integer into a base-N encoded string"""
|
||||
@@ -752,6 +733,9 @@ class CustomNone():
|
||||
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
|
||||
# _ch_ver = _ff_ver - 2
|
||||
|
||||
re = text.re
|
||||
re_compile = text.re_compile
|
||||
|
||||
NONE = CustomNone()
|
||||
EPOCH = datetime.datetime(1970, 1, 1)
|
||||
SECOND = datetime.timedelta(0, 1)
|
||||
@@ -784,7 +768,7 @@ GLOBALS = {
|
||||
"hash_sha1": sha1,
|
||||
"hash_md5" : md5,
|
||||
"std" : ModuleProxy(),
|
||||
"re" : re_module,
|
||||
"re" : text.re_module,
|
||||
"exts_image" : EXTS_IMAGE,
|
||||
"exts_video" : EXTS_VIDEO,
|
||||
"exts_archive": EXTS_ARCHIVE,
|
||||
|
||||
@@ -23,6 +23,20 @@ INVALID_ALT = ((), [], {}, None, "")
|
||||
|
||||
class TestText(unittest.TestCase):
|
||||
|
||||
def test_re(self):
|
||||
p1 = text.re_compile("foo")
|
||||
p2 = text.re("foo")
|
||||
p3 = text.re("foo")
|
||||
|
||||
Pattern = text.re_module.Pattern
|
||||
self.assertIsInstance(p1, Pattern)
|
||||
self.assertIsInstance(p2, Pattern)
|
||||
self.assertIsInstance(p3, Pattern)
|
||||
|
||||
self.assertEqual(p1, p2)
|
||||
self.assertIsNot(p1, p2)
|
||||
self.assertIs(p2, p3)
|
||||
|
||||
def test_remove_html(self, f=text.remove_html):
|
||||
result = "Hello World."
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
import io
|
||||
import re
|
||||
import time
|
||||
import random
|
||||
import string
|
||||
@@ -1042,21 +1041,6 @@ value = 123
|
||||
self.assertEqual(response.links.get("next"), None)
|
||||
self.assertEqual(response.close(), None)
|
||||
|
||||
def test_re(self):
|
||||
Pattern = type(re.compile(""))
|
||||
|
||||
p1 = util.re_compile("foo")
|
||||
p2 = util.re("foo")
|
||||
p3 = util.re("foo")
|
||||
|
||||
self.assertIsInstance(p1, Pattern)
|
||||
self.assertIsInstance(p2, Pattern)
|
||||
self.assertIsInstance(p3, Pattern)
|
||||
|
||||
self.assertIsNot(p1, p2)
|
||||
self.assertIs(p2, p3)
|
||||
self.assertEqual(p1, p2)
|
||||
|
||||
|
||||
class TestExtractor():
|
||||
category = "test_category"
|
||||
|
||||
Reference in New Issue
Block a user