add 'text.extr()'

a stripped-down version of text.extract() that
- always returns a string (like 'extract_from')
- only returns a string
- does not deal with 'pos' arguments
- is ~20% faster
This commit is contained in:
Mike Fährmann
2022-11-04 21:37:36 +01:00
parent 597b63d922
commit eb33e6cf2d
2 changed files with 22 additions and 0 deletions

View File

@@ -120,6 +120,15 @@ def extract(txt, begin, end, pos=0):
return None, pos
def extr(txt, begin, end):
"""Stripped-down version of 'extract()'"""
try:
first = txt.index(begin) + len(begin)
return txt[first:txt.index(end, first)]
except (ValueError, TypeError, AttributeError):
return ""
def rextract(txt, begin, end, pos=-1):
try:
lbeg = len(begin)

View File

@@ -203,6 +203,19 @@ class TestText(unittest.TestCase):
self.assertEqual(f(txt , value, ">") , (None, 0))
self.assertEqual(f(txt , "<" , value), (None, 0))
def test_extr(self, f=text.extr):
txt = "<a><b>"
self.assertEqual(f(txt, "X", ">"), "")
self.assertEqual(f(txt, "<", "X"), "")
self.assertEqual(f(txt, "<", ">"), "a")
self.assertEqual(f(txt, "><", ">"), "b")
# invalid arguments
for value in INVALID:
self.assertEqual(f(value, "<" , ">") , "")
self.assertEqual(f(txt , value, ">") , "")
self.assertEqual(f(txt , "<" , value), "")
def test_rextract(self, f=text.rextract):
txt = "<a><b>"
self.assertEqual(f(txt, "<", ">"), ("b" , 3))