[simpcity] improve post content extraction (#8214)
use a better end marker to not stop prematurely when a quoted post is present
This commit is contained in:
@@ -92,7 +92,8 @@ class SimpcityExtractor(Extractor):
|
||||
"id": extr('data-content="post-', '"'),
|
||||
"author_url": extr('itemprop="url" content="', '"'),
|
||||
"date": text.parse_datetime(extr('datetime="', '"')),
|
||||
"content": extr('<div itemprop="text">', "\t\t</div>").strip(),
|
||||
"content": extr('<div itemprop="text">',
|
||||
'<div class="js-selectToQuote').strip(),
|
||||
}
|
||||
|
||||
url_a = post["author_url"]
|
||||
|
||||
@@ -59,6 +59,28 @@ __tests__ = (
|
||||
"#exception": exception.AuthRequired,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://simpcity.cr/threads/puutin_cos.219873/post-26053409",
|
||||
"#comment" : "iframe embeds (#8214)",
|
||||
"#class" : simpcity.SimpcityPostExtractor,
|
||||
"#auth" : True,
|
||||
"#results" : (
|
||||
"https://jpg5.su/img/NNFssUg",
|
||||
"https://saint2.cr/embed/nPy1kG3w55V",
|
||||
"https://saint2.cr/embed/c0KhPjU4-F3",
|
||||
"https://saint2.cr/embed/sZWnVZ_mQsV",
|
||||
"https://saint2.cr/embed/MEBiLx6DETQ",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://simpcity.cr/threads/shinhashimoto00-shinhashimoto01.184378/post-13389764",
|
||||
"#comment" : "quote in post content (#8214)",
|
||||
"#class" : simpcity.SimpcityPostExtractor,
|
||||
"#auth" : True,
|
||||
"#results" : ("/goto/post?id=13358068", "https://cyberdrop.me/a/Sh9GlG38"),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
||||
"#class" : simpcity.SimpcityThreadExtractor,
|
||||
|
||||
Reference in New Issue
Block a user