use internal, non-caching version of re.compile for extractor patterns

speeds up total compile time of extractor patterns by ~10ms
This commit is contained in:
Mike Fährmann
2025-04-13 21:37:32 +02:00
parent 26747051b0
commit 4c8c98a14d
4 changed files with 15 additions and 7 deletions

View File

@@ -7,7 +7,7 @@
# published by the Free Software Foundation.
import sys
import re
from ..util import re_compile
modules = [
"2ch",
@@ -234,7 +234,8 @@ def find(url):
def add(cls):
"""Add 'cls' to the list of available extractors"""
cls.pattern = re.compile(cls.pattern)
if isinstance(cls.pattern, str):
cls.pattern = re_compile(cls.pattern)
_cache.append(cls)
return cls
@@ -242,9 +243,11 @@ def add(cls):
def add_module(module):
"""Add all extractors in 'module' to the list of available extractors"""
classes = _get_classes(module)
for cls in classes:
cls.pattern = re.compile(cls.pattern)
_cache.extend(classes)
if classes:
if isinstance(classes[0].pattern, str):
for cls in classes:
cls.pattern = re_compile(cls.pattern)
_cache.extend(classes)
return classes

View File

@@ -59,7 +59,7 @@ class Extractor():
@classmethod
def from_url(cls, url):
if isinstance(cls.pattern, str):
cls.pattern = re.compile(cls.pattern)
cls.pattern = util.re_compile(cls.pattern)
match = cls.pattern.match(url)
return cls(match) if match else None

View File

@@ -27,6 +27,11 @@ from http.cookiejar import Cookie
from email.utils import mktime_tz, parsedate_tz
from . import text, version, exception
try:
re_compile = re._compiler.compile
except AttributeError:
re_compile = re.sre_compile.compile
def bencode(num, alphabet="0123456789"):
"""Encode an integer into a base-N encoded string"""

View File

@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
__version__ = "1.29.4"
__version__ = "1.29.5-dev"
__variant__ = None