replace 'util.re()' with 'text.re()'

remove unnecessary 'util' imports
This commit is contained in:
Mike Fährmann
2025-10-20 17:44:58 +02:00
parent c8fc790028
commit 9bf76c1352
42 changed files with 91 additions and 91 deletions

View File

@@ -9,7 +9,7 @@
"""Extractors for https://agn.ph/"""
from . import booru
from .. import text, util
from .. import text
import collections
BASE_PATTERN = r"(?:https?://)?agn\.ph"
@@ -70,7 +70,7 @@ class AgnphExtractor(booru.BooruExtractor):
return
tags = collections.defaultdict(list)
pattern = util.re(r'class="(.)typetag">([^<]+)')
pattern = text.re(r'class="(.)typetag">([^<]+)')
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
for key, value in tags.items():

View File

@@ -63,7 +63,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
def _extract_files(self, post):
files = []
for video, media in util.re(r"<(?:img|vide(o)) ([^>]+)").findall(
for video, media in text.re(r"<(?:img|vide(o)) ([^>]+)").findall(
post["content"]):
if not self.emoticons and 'class="arca-emoticon"' in media:
continue

View File

@@ -104,7 +104,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
info = text.remove_html(extr('link-hover">', "</"))
info = text.unescape(info)
match = util.re(
match = text.re(
r"(?i)(?:(?:Volume|S(?:eason)?)\s*(\d+)\s+)?"
r"(?:Chapter|Episode)\s*(\d+)([\w.]*)").match(info)
if match:

View File

@@ -13,7 +13,7 @@ from .. import text, util
def original(url):
return (util.re(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)")
return (text.re(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)")
.sub(r"\1s0", url)
.replace("http:", "https:", 1))
@@ -32,7 +32,7 @@ class BloggerExtractor(BaseExtractor):
self.videos = self.config("videos", True)
if self.videos:
self.findall_video = util.re(
self.findall_video = text.re(
r"""src=["'](https?://www\.blogger\.com"""
r"""/video\.g\?token=[^"']+)""").findall
@@ -43,7 +43,7 @@ class BloggerExtractor(BaseExtractor):
blog["date"] = self.parse_datetime_iso(blog["published"])
del blog["selfLink"]
findall_image = util.re(
findall_image = text.re(
r'src="(https?://(?:'
r'blogger\.googleusercontent\.com/img|'
r'lh\d+(?:-\w+)?\.googleusercontent\.com|'

View File

@@ -64,13 +64,13 @@ class DeviantartExtractor(Extractor):
if self.quality:
if self.quality == "png":
self.quality = "-fullview.png?"
self.quality_sub = util.re(r"-fullview\.[a-z0-9]+\?").sub
self.quality_sub = text.re(r"-fullview\.[a-z0-9]+\?").sub
else:
self.quality = f",q_{self.quality}"
self.quality_sub = util.re(r",q_\d+").sub
self.quality_sub = text.re(r",q_\d+").sub
if self.intermediary:
self.intermediary_subn = util.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
self.intermediary_subn = text.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
if isinstance(self.original, str) and \
self.original.lower().startswith("image"):
@@ -269,7 +269,7 @@ class DeviantartExtractor(Extractor):
)
# filename metadata
sub = util.re(r"\W").sub
sub = text.re(r"\W").sub
deviation["filename"] = "".join((
sub("_", deviation["title"].lower()), "_by_",
sub("_", deviation["author"]["username"].lower()), "-d",
@@ -675,7 +675,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
def _find_folder(self, folders, name, uuid):
if uuid.isdecimal():
match = util.re(
match = text.re(
"(?i)" + name.replace("-", "[^a-z0-9]+") + "$").match
for folder in folders:
if match(folder["name"]):

View File

@@ -46,7 +46,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
match = util.re(
match = text.re(
r"(?:<a[^>]*>)?([^<]+)(?:</a>)?" # manga name
r"(?: ch(\d+)([^:<]*))?" # chapter info
r"(?:: (.+))?" # title

View File

@@ -7,7 +7,7 @@
"""Extractors for https://everia.club"""
from .common import Extractor, Message
from .. import text, util
from .. import text
BASE_PATTERN = r"(?:https?://)?everia\.club"
@@ -25,7 +25,7 @@ class EveriaExtractor(Extractor):
return self._pagination(self.groups[0])
def _pagination(self, path, params=None, pnum=1):
find_posts = util.re(r'thumbnail">\s*<a href="([^"]+)').findall
find_posts = text.re(r'thumbnail">\s*<a href="([^"]+)').findall
while True:
if pnum == 1:
@@ -52,7 +52,7 @@ class EveriaPostExtractor(EveriaExtractor):
url = self.root + self.groups[0] + "/"
page = self.request(url).text
content = text.extr(page, 'itemprop="text">', "<h3")
urls = util.re(r'img.*?lazy-src="([^"]+)').findall(content)
urls = text.re(r'img.*?lazy-src="([^"]+)').findall(content)
data = {
"title": text.unescape(

View File

@@ -216,7 +216,7 @@ class FanboxExtractor(Extractor):
def _get_urls_from_post(self, content_body, post):
num = 0
if cover_image := post.get("coverImageUrl"):
cover_image = util.re("/c/[0-9a-z_]+").sub("", cover_image)
cover_image = text.re("/c/[0-9a-z_]+").sub("", cover_image)
final_post = post.copy()
final_post["isCoverImage"] = True
final_post["fileUrl"] = cover_image

View File

@@ -96,7 +96,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start * self.per_page
data = {}
find_ids = util.re(r"\sid=\"p(\d+)").findall
find_ids = text.re(r"\sid=\"p(\d+)").findall
while True:
page = self.request(url, params=params).text
@@ -136,7 +136,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
return
tags = collections.defaultdict(list)
pattern = util.re(r"(?s)tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)")
pattern = text.re(r"(?s)tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)")
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
for key, value in tags.items():

View File

@@ -7,7 +7,7 @@
"""Generic information extractor"""
from .common import Extractor, Message
from .. import config, text, util
from .. import config, text
import os.path
@@ -171,8 +171,8 @@ class GenericExtractor(Extractor):
r"(?:[^\"'<>\s]*)?" # optional query and fragment
)
imageurls_src = util.re(imageurl_pattern_src).findall(page)
imageurls_ext = util.re(imageurl_pattern_ext).findall(page)
imageurls_src = text.re(imageurl_pattern_src).findall(page)
imageurls_ext = text.re(imageurl_pattern_ext).findall(page)
imageurls = imageurls_src + imageurls_ext
# Resolve relative urls
@@ -181,7 +181,7 @@ class GenericExtractor(Extractor):
# by prepending a suitable base url.
#
# If the page contains a <base> element, use it as base url
basematch = util.re(
basematch = text.re(
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)").search(page)
if basematch:
self.baseurl = basematch['url'].rstrip('/')

View File

@@ -5,7 +5,7 @@
# published by the Free Software Foundation.
from .common import Extractor, Message
from .. import text, util, exception
from .. import text, exception
from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
@@ -155,7 +155,7 @@ class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor):
raise exception.AuthorizationError(msg)
page = response.text
match = util.re(r"Page (\d+) of (\d+)").search(page)
match = text.re(r"Page (\d+) of (\d+)").search(page)
current, total = match.groups()
current, total = text.parse_int(current), text.parse_int(total)

View File

@@ -7,7 +7,7 @@
"""Extractors for https://hatenablog.com"""
from .common import Extractor, Message
from .. import text, util
from .. import text
BASE_PATTERN = (
@@ -30,7 +30,7 @@ class HatenablogExtractor(Extractor):
self.domain = match[1] or match[2]
def _init(self):
self._find_img = util.re(r'<img +([^>]+)').finditer
self._find_img = text.re(r'<img +([^>]+)').finditer
def _handle_article(self, article: str):
extr = text.extract_from(article)
@@ -73,7 +73,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
def _init(self):
HatenablogExtractor._init(self)
self._find_pager_url = util.re(
self._find_pager_url = text.re(
r' class="pager-next">\s*<a href="([^"]+)').search
def items(self):

View File

@@ -30,7 +30,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
chapter, sep, minor = self.groups[1].partition(".")
match = util.re(
match = text.re(
r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - "
r"([^:]+): (.+) . Page 1 ").match(title)
if match:

View File

@@ -33,7 +33,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
title = text.extr(page, "<title>", "</title>")
chapter_id = text.extr(page, 'report/C', '"')
chapter, sep, minor = self.chapter.partition(".")
match = util.re(
match = text.re(
r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by "
r"(.+) at ").match(title)
return {

View File

@@ -9,7 +9,7 @@
"""Extractors for https://hiperdex.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, util
from .. import text
from ..cache import memcache
BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
@@ -79,7 +79,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
return self.chapter_data(self.chapter)
def images(self, page):
pattern = util.re(r'id="image-\d+"\s+(?:data-)?src="([^"]+)')
pattern = text.re(r'id="image-\d+"\s+(?:data-)?src="([^"]+)')
return [
(url.strip(), None)
for url in pattern.findall(page)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.imagebam.com/"""
from .common import Extractor, Message
from .. import text, util
from .. import text
class ImagebamExtractor(Extractor):
@@ -69,7 +69,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
page, 'id="gallery-name">', '<').strip())}
def images(self, page):
findall = util.re(r'<a href="https://www\.imagebam\.com'
findall = text.re(r'<a href="https://www\.imagebam\.com'
r'(/(?:image/|view/M)[a-zA-Z0-9]+)').findall
paths = []
while True:

View File

@@ -9,7 +9,7 @@
"""Extractors for https://imgbox.com/"""
from .common import Extractor, Message, AsynchronousMixin
from .. import text, util, exception
from .. import text, exception
class ImgboxExtractor(Extractor):
@@ -69,7 +69,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
page = self.request(self.root + "/g/" + self.gallery_key).text
if "The specified gallery could not be found." in page:
raise exception.NotFoundError("gallery")
self.image_keys = util.re(
self.image_keys = text.re(
r'<a href="/([^"]+)"><img alt="').findall(page)
title = text.extr(page, "<h1>", "</h1>")

View File

@@ -38,7 +38,7 @@ class InstagramExtractor(Extractor):
def _init(self):
self.www_claim = "0"
self.csrf_token = util.generate_token()
self._find_tags = util.re(r"#\w+").findall
self._find_tags = text.re(r"#\w+").findall
self._logged_in = True
self._cursor = None
self._user = None

View File

@@ -44,7 +44,7 @@ class KemonoExtractor(Extractor):
order = self.config("order-revisions")
self.revisions_reverse = order[0] in ("r", "a") if order else False
self._find_inline = util.re(
self._find_inline = text.re(
r'src="(?:https?://(?:kemono\.cr|coomer\.st))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
self._json_dumps = json.JSONEncoder(
@@ -52,7 +52,7 @@ class KemonoExtractor(Extractor):
sort_keys=True, separators=(",", ":")).encode
def items(self):
find_hash = util.re(HASH_PATTERN).match
find_hash = text.re(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
announcements = True if self.config("announcements") else None
archives = True if self.config("archives") else False
@@ -413,10 +413,10 @@ class KemonoDiscordExtractor(KemonoExtractor):
"parent_id" : channel["parent_channel_id"],
}
find_inline = util.re(
find_inline = text.re(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
find_hash = util.re(HASH_PATTERN).match
find_hash = text.re(HASH_PATTERN).match
if (order := self.config("order-posts")) and order[0] in ("r", "d"):
posts = self.api.discord_channel(channel_id, channel["post_count"])

View File

@@ -9,7 +9,7 @@
"""Extractors for https://komikcast.li/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, util
from .. import text
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
r"komikcast\d*\.(?:l(?:i|a|ol)|com|cz|site|mo?e)")
@@ -25,7 +25,7 @@ class KomikcastBase():
if data is None:
data = {}
pattern = util.re(r"(?:(.*) Chapter )?0*(\d+)([^ ]*)(?: (?:- )?(.+))?")
pattern = text.re(r"(?:(.*) Chapter )?0*(\d+)([^ ]*)(?: (?:- )?(.+))?")
match = pattern.match(text.unescape(chapter_string))
manga, chapter, data["chapter_minor"], title = match.groups()
@@ -54,7 +54,7 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
def images(self, page):
readerarea = text.extr(
page, '<div class="main-reading-area', '</div')
pattern = util.re(r"<img[^>]* src=[\"']([^\"']+)")
pattern = text.re(r"<img[^>]* src=[\"']([^\"']+)")
return [
(text.unescape(url), None)
for url in pattern.findall(readerarea)

View File

@@ -9,7 +9,7 @@
"""Extractors for https://www.mangahere.cc/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, util
from .. import text
class MangahereBase():
@@ -102,7 +102,7 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
info, pos = text.extract(page, 'class="title3">', '<', pos)
date, pos = text.extract(page, 'class="title2">', '<', pos)
match = util.re(
match = text.re(
r"(?:Vol\.0*(\d+) )?Ch\.0*(\d+)(\S*)(?: - (.*))?").match(info)
if match:
volume, chapter, minor, title = match.groups()

View File

@@ -23,7 +23,7 @@ class MangaparkBase():
category = "mangapark"
def _parse_chapter_title(self, title):
match = util.re(
match = text.re(
r"(?i)"
r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?"
r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)"

View File

@@ -7,7 +7,7 @@
"""Extractors for https://mangaread.org/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, util, exception
from .. import text, exception
class MangareadBase():
@@ -16,7 +16,7 @@ class MangareadBase():
root = "https://www.mangaread.org"
def parse_chapter_string(self, chapter_string, data):
match = util.re(
match = text.re(
r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?"
).match(text.unescape(chapter_string).strip())
manga, chapter, minor, title = match.groups()

View File

@@ -9,7 +9,7 @@
"""Extractors for Moebooru based sites"""
from .booru import BooruExtractor
from .. import text, util, dt
from .. import text, dt
import collections
@@ -32,7 +32,7 @@ class MoebooruExtractor(BooruExtractor):
return
tags = collections.defaultdict(list)
pattern = util.re(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
pattern = text.re(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():

View File

@@ -34,7 +34,7 @@ class NewgroundsExtractor(Extractor):
self.user_root = f"https://{self.user}.newgrounds.com"
def _init(self):
self._extract_comment_urls = util.re(
self._extract_comment_urls = text.re(
r'(?:<img |data-smartload-)src="([^"]+)').findall
self.flash = self.config("flash", True)
@@ -321,7 +321,7 @@ class NewgroundsExtractor(Extractor):
def _video_formats(self, sources):
src = sources["360p"][0]["src"]
sub = util.re(r"\.360p\.\w+").sub
sub = text.re(r"\.360p\.\w+").sub
for fmt in self.format:
try:

View File

@@ -10,7 +10,7 @@
from .common import Extractor, Message
from ..cache import cache
from .. import text, util, exception
from .. import text, exception
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
@@ -36,7 +36,7 @@ class PillowfortExtractor(Extractor):
external = self.config("external", False)
if inline:
inline = util.re(r'src="(https://img\d+\.pillowfort\.social'
inline = text.re(r'src="(https://img\d+\.pillowfort\.social'
r'/posts/[^"]+)').findall
for post in self.posts():

View File

@@ -43,7 +43,7 @@ class PixivExtractor(Extractor):
self.meta_captions = self.config("captions")
if self.sanity_workaround or self.meta_captions:
self.meta_captions_sub = util.re(
self.meta_captions_sub = text.re(
r'<a href="/jump\.php\?([^"]+)').sub
def items(self):

View File

@@ -61,7 +61,7 @@ class PlurkExtractor(Extractor):
if not data:
raise exception.NotFoundError("user")
return util.json_loads(
util.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
text.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
class PlurkTimelineExtractor(PlurkExtractor):

View File

@@ -7,7 +7,7 @@
"""Extractors for Postmill instances"""
from .common import BaseExtractor, Message
from .. import text, util, exception
from .. import text, exception
class PostmillExtractor(BaseExtractor):
@@ -20,8 +20,8 @@ class PostmillExtractor(BaseExtractor):
def _init(self):
self.instance = self.root.partition("://")[2]
self.save_link_post_body = self.config("save-link-post-body", False)
self._search_canonical_url = util.re(r"/f/([\w\d_]+)/(\d+)/").search
self._search_image_tag = util.re(
self._search_canonical_url = text.re(r"/f/([\w\d_]+)/(\d+)/").search
self._search_image_tag = text.re(
r'<a href="[^"]+"\n +class="submission__image-link"').search
def items(self):

View File

@@ -70,7 +70,7 @@ class RealbooruExtractor(booru.BooruExtractor):
page = post["_html"]
tag_container = text.extr(page, 'id="tagLink"', '</div>')
tags = collections.defaultdict(list)
pattern = util.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
pattern = text.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
for key, value in tags.items():

View File

@@ -9,7 +9,7 @@
"""Recursive extractor"""
from .common import Extractor, Message
from .. import text, util
from .. import text
class RecursiveExtractor(Extractor):
@@ -27,5 +27,5 @@ class RecursiveExtractor(Extractor):
else:
page = self.request(text.ensure_http_scheme(url)).text
for match in util.re(r"https?://[^\s\"']+").finditer(page):
for match in text.re(r"https?://[^\s\"']+").finditer(page):
yield Message.Queue, match[0], {}

View File

@@ -9,7 +9,7 @@
"""Extractors for https://rule34.us/"""
from .booru import BooruExtractor
from .. import text, util
from .. import text
import collections
@@ -19,7 +19,7 @@ class Rule34usExtractor(BooruExtractor):
per_page = 42
def _init(self):
self._find_tags = util.re(
self._find_tags = text.re(
r'<li class="([^-"]+)-tag"[^>]*><a href="[^;"]+;q=([^"]+)').findall
def _parse_post(self, post_id):

View File

@@ -47,7 +47,7 @@ class SankakuExtractor(BooruExtractor):
self.api = SankakuAPI(self)
if self.config("tags") == "extended":
self._tags = self._tags_extended
self._tags_findall = util.re(
self._tags_findall = text.re(
r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
def _file_url(self, post):
@@ -129,10 +129,10 @@ class SankakuTagExtractor(SankakuExtractor):
if "date:" in self.tags:
# rewrite 'date:' tags (#1790)
self.tags = util.re(
self.tags = text.re(
r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)").sub(
r"date:\3-\2-\1T00:00", self.tags)
self.tags = util.re(
self.tags = text.re(
r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)").sub(
r"date:\1-\2-\3T00:00", self.tags)

View File

@@ -64,19 +64,19 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
yield Message.Url, url, file
def _extract_images(self, content):
orig_sub = util.re(r"-\d+x\d+\.").sub
orig_sub = text.re(r"-\d+x\d+\.").sub
return [
orig_sub(".", url) for url in
util.unique(text.extract_iter(content, 'data-lazy-src="', '"'))
]
def _extract_videos(self, content):
return util.re(r"<source [^>]*src=[\"']([^\"']+)").findall(content)
return text.re(r"<source [^>]*src=[\"']([^\"']+)").findall(content)
def _extract_embeds(self, content):
return [
"ytdl:" + url for url in
util.re(r"<iframe [^>]*src=[\"']([^\"']+)").findall(content)
text.re(r"<iframe [^>]*src=[\"']([^\"']+)").findall(content)
]

View File

@@ -9,7 +9,7 @@
"""Extractors for https://speakerdeck.com/"""
from .common import GalleryExtractor
from .. import text, util
from .. import text
class SpeakerdeckPresentationExtractor(GalleryExtractor):
@@ -46,7 +46,7 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor):
def images(self, _):
url = f"{self.root}/player/{self.presentation_id}"
page = self.request(url).text
page = util.re(r"\s+").sub(" ", page)
page = text.re(r"\s+").sub(" ", page)
return [
(url, None)
for url in text.extract_iter(page, 'js-sd-slide" data-url="', '"')

View File

@@ -161,7 +161,7 @@ class SubscribestarExtractor(Extractor):
attachments = text.extr(
html, 'class="uploads-docs"', 'class="post-edit_form"')
if attachments:
for att in util.re(r'class="doc_preview[" ]').split(
for att in text.re(r'class="doc_preview[" ]').split(
attachments)[1:]:
media.append({
"id" : text.parse_int(text.extr(
@@ -175,7 +175,7 @@ class SubscribestarExtractor(Extractor):
audios = text.extr(
html, 'class="uploads-audios"', 'class="post-edit_form"')
if audios:
for audio in util.re(r'class="audio_preview-data[" ]').split(
for audio in text.re(r'class="audio_preview-data[" ]').split(
audios)[1:]:
media.append({
"id" : text.parse_int(text.extr(

View File

@@ -60,16 +60,16 @@ class TumblrExtractor(Extractor):
blog = None
# pre-compile regular expressions
self._sub_video = util.re(
self._sub_video = text.re(
r"https?://((?:vt|vtt|ve)(?:\.media)?\.tumblr\.com"
r"/tumblr_[^_]+)_\d+\.([0-9a-z]+)").sub
if self.inline:
self._sub_image = util.re(
self._sub_image = text.re(
r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)").sub
self._subn_orig_image = util.re(r"/s\d+x\d+/").subn
_findall_image = util.re('<img src="([^"]+)"').findall
_findall_video = util.re('<source src="([^"]+)"').findall
self._subn_orig_image = text.re(r"/s\d+x\d+/").subn
_findall_image = text.re('<img src="([^"]+)"').findall
_findall_video = text.re('<source src="([^"]+)"').findall
for post in self.posts():
if self.date_min > post["timestamp"]:

View File

@@ -76,7 +76,7 @@ class TwitterExtractor(Extractor):
seen_tweets = set() if self.config("unique", True) else None
if self.twitpic:
self._find_twitpic = util.re(
self._find_twitpic = text.re(
r"https?(://twitpic\.com/(?!photos/)\w+)").findall
tweets = self.tweets()

View File

@@ -9,7 +9,7 @@
"""Extractors for https://vk.com/"""
from .common import Extractor, Message
from .. import text, util, exception
from .. import text, exception
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
@@ -36,7 +36,7 @@ class VkExtractor(Extractor):
return num
def items(self):
subn = util.re(r"/imp[fg]/").subn
subn = text.re(r"/imp[fg]/").subn
sizes = "wzyxrqpo"
data = self.metadata()

View File

@@ -7,7 +7,7 @@
"""Extractors for https://www.xasiat.com"""
from .common import Extractor, Message
from .. import text, util
from .. import text
import time
BASE_PATTERN = r"(?:https?://)?(?:www\.)?xasiat\.com((?:/fr|/ja)?/albums"
@@ -29,7 +29,7 @@ class XasiatExtractor(Extractor):
def _pagination(self, path, pnum=1):
url = f"{self.root}{path}/"
find_posts = util.re(r'class="item ">\s*<a href="([^"]+)').findall
find_posts = text.re(r'class="item ">\s*<a href="([^"]+)').findall
while True:
params = {
@@ -69,11 +69,11 @@ class XasiatAlbumExtractor(XasiatExtractor):
data = {
"title": text.unescape(title),
"model": util.re(
"model": text.re(
r'top_models1"></i>\s*(.+)\s*</span').findall(info),
"tags": util.re(
"tags": text.re(
r'tags/[^"]+\">\s*(.+)\s*</a').findall(info),
"album_category": util.re(
"album_category": text.re(
r'categories/[^"]+\">\s*(.+)\s*</a').findall(info)[0],
"album_url": response.url,
"album_id": text.parse_int(album_id),

View File

@@ -128,7 +128,7 @@ class ZerochanExtractor(BooruExtractor):
return data
def _parse_json(self, txt):
txt = util.re(r"[\x00-\x1f\x7f]").sub("", txt)
txt = text.re(r"[\x00-\x1f\x7f]").sub("", txt)
main, _, tags = txt.partition('tags": [')
item = {}

View File

@@ -262,7 +262,7 @@ def parse_command_line(module, argv):
else module.match_filter_func(opts.match_filter))
if cookiesfrombrowser := getattr(opts, "cookiesfrombrowser", None):
pattern = util.re(r"""(?x)
pattern = text.re(r"""(?x)
(?P<name>[^+:]+)
(?:\s*\+\s*(?P<keyring>[^:]+))?
(?:\s*:\s*(?!:)(?P<profile>.+?))?
@@ -528,7 +528,7 @@ def legacy_postprocessors(opts, module, ytdlp, compat_opts):
if len(dur) == 2 and all(t is not None for t in dur):
remove_ranges.append(tuple(dur))
continue
remove_chapters_patterns.append(util.re(regex))
remove_chapters_patterns.append(text.re(regex))
if opts.remove_chapters or sponsorblock_query:
postprocessors.append({
"key": "ModifyChapters",