[simpcity] improve post content extraction (#8214)
use a better end marker to not stop prematurely when a quoted post is present
This commit is contained in:
@@ -92,7 +92,8 @@ class SimpcityExtractor(Extractor):
|
|||||||
"id": extr('data-content="post-', '"'),
|
"id": extr('data-content="post-', '"'),
|
||||||
"author_url": extr('itemprop="url" content="', '"'),
|
"author_url": extr('itemprop="url" content="', '"'),
|
||||||
"date": text.parse_datetime(extr('datetime="', '"')),
|
"date": text.parse_datetime(extr('datetime="', '"')),
|
||||||
"content": extr('<div itemprop="text">', "\t\t</div>").strip(),
|
"content": extr('<div itemprop="text">',
|
||||||
|
'<div class="js-selectToQuote').strip(),
|
||||||
}
|
}
|
||||||
|
|
||||||
url_a = post["author_url"]
|
url_a = post["author_url"]
|
||||||
|
|||||||
@@ -59,6 +59,28 @@ __tests__ = (
|
|||||||
"#exception": exception.AuthRequired,
|
"#exception": exception.AuthRequired,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/threads/puutin_cos.219873/post-26053409",
|
||||||
|
"#comment" : "iframe embeds (#8214)",
|
||||||
|
"#class" : simpcity.SimpcityPostExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#results" : (
|
||||||
|
"https://jpg5.su/img/NNFssUg",
|
||||||
|
"https://saint2.cr/embed/nPy1kG3w55V",
|
||||||
|
"https://saint2.cr/embed/c0KhPjU4-F3",
|
||||||
|
"https://saint2.cr/embed/sZWnVZ_mQsV",
|
||||||
|
"https://saint2.cr/embed/MEBiLx6DETQ",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/threads/shinhashimoto00-shinhashimoto01.184378/post-13389764",
|
||||||
|
"#comment" : "quote in post content (#8214)",
|
||||||
|
"#class" : simpcity.SimpcityPostExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#results" : ("/goto/post?id=13358068", "https://cyberdrop.me/a/Sh9GlG38"),
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
||||||
"#class" : simpcity.SimpcityThreadExtractor,
|
"#class" : simpcity.SimpcityThreadExtractor,
|
||||||
|
|||||||
Reference in New Issue
Block a user