[simpcity] improve post content extraction (#8214)

use a better end marker to not stop prematurely
when a quoted post is present
This commit is contained in:
Mike Fährmann
2025-09-15 17:09:10 +02:00
parent a3b1100600
commit 835dfae345
2 changed files with 24 additions and 1 deletions

View File

@@ -92,7 +92,8 @@ class SimpcityExtractor(Extractor):
"id": extr('data-content="post-', '"'),
"author_url": extr('itemprop="url" content="', '"'),
"date": text.parse_datetime(extr('datetime="', '"')),
"content": extr('<div itemprop="text">', "\t\t</div>").strip(),
"content": extr('<div itemprop="text">',
'<div class="js-selectToQuote').strip(),
}
url_a = post["author_url"]

View File

@@ -59,6 +59,28 @@ __tests__ = (
"#exception": exception.AuthRequired,
},
{
"#url" : "https://simpcity.cr/threads/puutin_cos.219873/post-26053409",
"#comment" : "iframe embeds (#8214)",
"#class" : simpcity.SimpcityPostExtractor,
"#auth" : True,
"#results" : (
"https://jpg5.su/img/NNFssUg",
"https://saint2.cr/embed/nPy1kG3w55V",
"https://saint2.cr/embed/c0KhPjU4-F3",
"https://saint2.cr/embed/sZWnVZ_mQsV",
"https://saint2.cr/embed/MEBiLx6DETQ",
),
},
{
"#url" : "https://simpcity.cr/threads/shinhashimoto00-shinhashimoto01.184378/post-13389764",
"#comment" : "quote in post content (#8214)",
"#class" : simpcity.SimpcityPostExtractor,
"#auth" : True,
"#results" : ("/goto/post?id=13358068", "https://cyberdrop.me/a/Sh9GlG38"),
},
{
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
"#class" : simpcity.SimpcityThreadExtractor,