[simpcity] extract "click to load media" URLs (#8609)
This commit is contained in:
@@ -30,11 +30,14 @@ class SimpcityExtractor(Extractor):
|
|||||||
self.login()
|
self.login()
|
||||||
|
|
||||||
extract_urls = text.re(
|
extract_urls = text.re(
|
||||||
r'(?s)<(?:'
|
r'(?s)(?:'
|
||||||
r'video (.*?\ssrc="[^"]+".*?)</video>'
|
r'<video (.*?\ssrc="[^"]+".*?)</video>'
|
||||||
r'|a [^>]*?href="(?:https://[^"]+)?(/attachments/[^"]+".*?)</a>'
|
r'|<a [^>]*?href="'
|
||||||
r'|div [^>]*?ata-src="(?:https://[^"]+)?(/attachments/[^"]+".*?)/>'
|
r'(?:https://[^"]+)?(/attachments/[^"]+".*?)</a>'
|
||||||
r'|(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)'
|
r'|<div [^>]*?data-src="'
|
||||||
|
r'(?:https://[^"]+)?(/attachments/[^"]+".*?)/>'
|
||||||
|
r'|(?:<a [^>]*?href="|<iframe [^>]*?src="|'
|
||||||
|
r'''onclick="loadMedia\(this, ')([^"']+)'''
|
||||||
r')'
|
r')'
|
||||||
).findall
|
).findall
|
||||||
|
|
||||||
@@ -53,6 +56,8 @@ class SimpcityExtractor(Extractor):
|
|||||||
data["num"] += 1
|
data["num"] += 1
|
||||||
data["num_external"] += 1
|
data["num_external"] += 1
|
||||||
data["type"] = "external"
|
data["type"] = "external"
|
||||||
|
if ext.startswith("//"):
|
||||||
|
ext = "https:" + ext
|
||||||
yield Message.Queue, ext, data
|
yield Message.Queue, ext, data
|
||||||
|
|
||||||
elif video:
|
elif video:
|
||||||
|
|||||||
@@ -205,6 +205,14 @@ __tests__ = (
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/threads/lustn4lexi-hot4lexi-lexi-2-legit-hott4lexi-lexi.175167/post-2512729",
|
||||||
|
"#comment" : "'Click here to load redgifs media' (#8609)",
|
||||||
|
"#class" : simpcity.SimpcityPostExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#results" : "https://redgifs.com/ifr/unusedsubmissivemullet",
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
||||||
"#class" : simpcity.SimpcityThreadExtractor,
|
"#class" : simpcity.SimpcityThreadExtractor,
|
||||||
|
|||||||
Reference in New Issue
Block a user