'match.group(N)' -> 'match[N]' (#7671)

2.5x faster
This commit is contained in:
Mike Fährmann
2025-06-18 12:59:37 +02:00
parent 475506cc39
commit 41191bb60a
135 changed files with 363 additions and 363 deletions

View File

@@ -36,28 +36,28 @@ class GenericExtractor(Extractor):
example = "generic:https://www.nongnu.org/lzip/"
def __init__(self, match):
self.subcategory = match.group('domain')
self.subcategory = match['domain']
Extractor.__init__(self, match)
# Strip the "g(eneric):" prefix
# and inform about "forced" or "fallback" mode
if match.group('generic'):
self.url = match.group(0).partition(":")[2]
if match['generic']:
self.url = match[0].partition(":")[2]
else:
self.log.info("Falling back on generic information extractor.")
self.url = match.group(0)
self.url = match[0]
# Make sure we have a scheme, or use https
if match.group('scheme'):
self.scheme = match.group('scheme')
if match['scheme']:
self.scheme = match['scheme']
else:
self.scheme = 'https://'
self.url = text.ensure_http_scheme(self.url, self.scheme)
self.path = match.group('path')
self.path = match['path']
# Used to resolve relative image urls
self.root = self.scheme + match.group('domain')
self.root = self.scheme + match['domain']
def items(self):
"""Get page, extract metadata & images, yield them in suitable messages
@@ -184,7 +184,7 @@ class GenericExtractor(Extractor):
basematch = util.re(
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)").search(page)
if basematch:
self.baseurl = basematch.group('url').rstrip('/')
self.baseurl = basematch['url'].rstrip('/')
# Otherwise, extract the base url from self.url
else:
if self.url.endswith("/"):