[simpcity] fix 'content' for first post of a thread
This commit is contained in:
@@ -161,8 +161,11 @@ class SimpcityExtractor(Extractor):
|
|||||||
"id": extr('data-content="post-', '"'),
|
"id": extr('data-content="post-', '"'),
|
||||||
"author_url": extr('itemprop="url" content="', '"'),
|
"author_url": extr('itemprop="url" content="', '"'),
|
||||||
"date": self.parse_datetime_iso(extr('datetime="', '"')),
|
"date": self.parse_datetime_iso(extr('datetime="', '"')),
|
||||||
"content": extr('<div itemprop="text">',
|
"content": (
|
||||||
'<div class="js-selectToQuote').strip(),
|
extr('<div itemprop="text">',
|
||||||
|
'<div class="js-selectToQuote') or
|
||||||
|
extr('<div >',
|
||||||
|
'<div class="js-selectToQuote')).strip(),
|
||||||
}
|
}
|
||||||
|
|
||||||
url_a = post["author_url"]
|
url_a = post["author_url"]
|
||||||
@@ -184,7 +187,7 @@ class SimpcityPostExtractor(SimpcityExtractor):
|
|||||||
pos = page.find(f'data-content="post-{post_id}"')
|
pos = page.find(f'data-content="post-{post_id}"')
|
||||||
if pos < 0:
|
if pos < 0:
|
||||||
raise exception.NotFoundError("post")
|
raise exception.NotFoundError("post")
|
||||||
html = text.extract(page, "<article ", "</article>", pos-200)[0]
|
html = text.extract(page, "<article ", "<footer", pos-200)[0]
|
||||||
|
|
||||||
self.kwdict["thread"] = self._parse_thread(page)
|
self.kwdict["thread"] = self._parse_thread(page)
|
||||||
return (self._parse_post(html),)
|
return (self._parse_post(html),)
|
||||||
@@ -207,7 +210,7 @@ class SimpcityThreadExtractor(SimpcityExtractor):
|
|||||||
for page in pages:
|
for page in pages:
|
||||||
if "thread" not in self.kwdict:
|
if "thread" not in self.kwdict:
|
||||||
self.kwdict["thread"] = self._parse_thread(page)
|
self.kwdict["thread"] = self._parse_thread(page)
|
||||||
posts = text.extract_iter(page, "<article ", "</article>")
|
posts = text.extract_iter(page, "<article ", "<footer")
|
||||||
if reverse:
|
if reverse:
|
||||||
posts = list(posts)
|
posts = list(posts)
|
||||||
posts.reverse()
|
posts.reverse()
|
||||||
|
|||||||
Reference in New Issue
Block a user