'match.group(N)' -> 'match[N]' (#7671)

2.5x faster
This commit is contained in:
Mike Fährmann
2025-06-18 12:59:37 +02:00
parent 475506cc39
commit 41191bb60a
135 changed files with 363 additions and 363 deletions

View File

@@ -27,7 +27,7 @@ class HatenablogExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.domain = match.group(1) or match.group(2)
self.domain = match[1] or match[2]
def _init(self):
self._find_img = util.re(r'<img +([^>]+)').finditer
@@ -42,8 +42,8 @@ class HatenablogExtractor(Extractor):
'<div class="entry-content hatenablog-entry">', '</div>')
images = []
for i in self._find_img(content):
attributes = i.group(1)
for match in self._find_img(content):
attributes = match[1]
if 'class="hatena-fotolife"' not in attributes:
continue
image = text.unescape(text.extr(attributes, 'src="', '"'))
@@ -67,9 +67,9 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
self.path = match.group(3)
self.path = match[3]
self.query = {key: value for key, value in text.parse_query(
match.group(4)).items() if self._acceptable_query(key)}
match[4]).items() if self._acceptable_query(key)}
def _init(self):
HatenablogExtractor._init(self)
@@ -91,7 +91,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
yield from self._handle_full_articles(extr)
match = self._find_pager_url(page)
url = text.unescape(match.group(1)) if match else None
url = text.unescape(match[1]) if match else None
query = None
def _handle_partial_articles(self, extr):
@@ -128,7 +128,7 @@ class HatenablogEntryExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
self.path = match.group(3)
self.path = match[3]
def items(self):
url = "https://" + self.domain + "/entry/" + self.path