[text] add 'extract_urls()' helper
This commit is contained in:
@@ -611,7 +611,7 @@ _CONVERSIONS = {
|
||||
"U": text.unescape,
|
||||
"H": lambda s: text.unescape(text.remove_html(s)),
|
||||
"g": text.slugify,
|
||||
"R": text.re(r"https?://[^\s\"'<>\\]+").findall,
|
||||
"R": text.extract_urls,
|
||||
"W": text.sanitize_whitespace,
|
||||
"S": util.to_string,
|
||||
"s": str,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2025 Mike Fährmann
|
||||
# Copyright 2015-2026 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -231,6 +231,9 @@ def extract_from(txt, pos=None, default=""):
|
||||
return extr
|
||||
|
||||
|
||||
extract_urls = re(r"https?://[^\s\"'<>\\]+").findall
|
||||
|
||||
|
||||
def parse_unicode_escapes(txt):
|
||||
"""Convert JSON Unicode escapes in 'txt' into actual characters"""
|
||||
if "\\u" in txt:
|
||||
|
||||
Reference in New Issue
Block a user