@@ -27,7 +27,7 @@ class HatenablogExtractor(Extractor):
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.domain = match.group(1) or match.group(2)
|
||||
self.domain = match[1] or match[2]
|
||||
|
||||
def _init(self):
|
||||
self._find_img = util.re(r'<img +([^>]+)').finditer
|
||||
@@ -42,8 +42,8 @@ class HatenablogExtractor(Extractor):
|
||||
'<div class="entry-content hatenablog-entry">', '</div>')
|
||||
|
||||
images = []
|
||||
for i in self._find_img(content):
|
||||
attributes = i.group(1)
|
||||
for match in self._find_img(content):
|
||||
attributes = match[1]
|
||||
if 'class="hatena-fotolife"' not in attributes:
|
||||
continue
|
||||
image = text.unescape(text.extr(attributes, 'src="', '"'))
|
||||
@@ -67,9 +67,9 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
HatenablogExtractor.__init__(self, match)
|
||||
self.path = match.group(3)
|
||||
self.path = match[3]
|
||||
self.query = {key: value for key, value in text.parse_query(
|
||||
match.group(4)).items() if self._acceptable_query(key)}
|
||||
match[4]).items() if self._acceptable_query(key)}
|
||||
|
||||
def _init(self):
|
||||
HatenablogExtractor._init(self)
|
||||
@@ -91,7 +91,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
|
||||
yield from self._handle_full_articles(extr)
|
||||
|
||||
match = self._find_pager_url(page)
|
||||
url = text.unescape(match.group(1)) if match else None
|
||||
url = text.unescape(match[1]) if match else None
|
||||
query = None
|
||||
|
||||
def _handle_partial_articles(self, extr):
|
||||
@@ -128,7 +128,7 @@ class HatenablogEntryExtractor(HatenablogExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
HatenablogExtractor.__init__(self, match)
|
||||
self.path = match.group(3)
|
||||
self.path = match[3]
|
||||
|
||||
def items(self):
|
||||
url = "https://" + self.domain + "/entry/" + self.path
|
||||
|
||||
Reference in New Issue
Block a user