[simpcity] extract "click to load media" URLs (#8609)

This commit is contained in:
Mike Fährmann
2025-11-26 18:35:58 +01:00
parent 25ac5099cf
commit 1dc7955ba2
2 changed files with 18 additions and 5 deletions

View File

@@ -30,11 +30,14 @@ class SimpcityExtractor(Extractor):
self.login()
extract_urls = text.re(
r'(?s)<(?:'
r'video (.*?\ssrc="[^"]+".*?)</video>'
r'|a [^>]*?href="(?:https://[^"]+)?(/attachments/[^"]+".*?)</a>'
r'|div [^>]*?ata-src="(?:https://[^"]+)?(/attachments/[^"]+".*?)/>'
r'|(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)'
r'(?s)(?:'
r'<video (.*?\ssrc="[^"]+".*?)</video>'
r'|<a [^>]*?href="'
r'(?:https://[^"]+)?(/attachments/[^"]+".*?)</a>'
r'|<div [^>]*?data-src="'
r'(?:https://[^"]+)?(/attachments/[^"]+".*?)/>'
r'|(?:<a [^>]*?href="|<iframe [^>]*?src="|'
r'''onclick="loadMedia\(this, ')([^"']+)'''
r')'
).findall
@@ -53,6 +56,8 @@ class SimpcityExtractor(Extractor):
data["num"] += 1
data["num_external"] += 1
data["type"] = "external"
if ext.startswith("//"):
ext = "https:" + ext
yield Message.Queue, ext, data
elif video:

View File

@@ -205,6 +205,14 @@ __tests__ = (
},
},
{
"#url" : "https://simpcity.cr/threads/lustn4lexi-hot4lexi-lexi-2-legit-hott4lexi-lexi.175167/post-2512729",
"#comment" : "'Click here to load redgifs media' (#8609)",
"#class" : simpcity.SimpcityPostExtractor,
"#auth" : True,
"#results" : "https://redgifs.com/ifr/unusedsubmissivemullet",
},
{
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
"#class" : simpcity.SimpcityThreadExtractor,