[bellazon] improve video URL regex (#8392)
match <source> elements with attributes before 'src="..."'
This commit is contained in:
@@ -27,7 +27,7 @@ class BellazonExtractor(Extractor):
|
||||
native = (f"{self.root}/", f"{self.root[6:]}/")
|
||||
extract_urls = text.re(
|
||||
r'(?s)<('
|
||||
r'(?:video .*?<source src|a [^>]*?href)="([^"]+).*?</a>'
|
||||
r'(?:video .*?<source [^>]*?src|a [^>]*?href)="([^"]+).*?</a>'
|
||||
r'|img [^>]*?src="([^"]+)"[^>]*>'
|
||||
r')'
|
||||
).findall
|
||||
|
||||
@@ -204,6 +204,16 @@ __tests__ = (
|
||||
"#results" : "https://www.bellazon.com/main/uploads/monthly_2018_04/30602369_1891291154222843_1650952189830496256_n.jpg.33e6ab78dd0e8723f790ad4f58f3761a.jpg",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/70367-elyzaveta-kovalenko/page/5/#comment-5464973",
|
||||
"#comment" : "(#8392)",
|
||||
"#class" : bellazon.BellazonPostExtractor,
|
||||
"#results" : (
|
||||
"https://www.bellazon.com/main/uploads/monthly_2022_05/917305269_LizaKovalenko-Instagram2021_04_19.mp4.467d190a54e1bcabc50767a69706501d.mp4",
|
||||
"https://www.bellazon.com/main/uploads/monthly_2022_05/2027180206_LizaKovalenko-Instagram2021_04_23.mp4.2eae87d7e9d6f1a993611fa1f73e8e7b.mp4",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/",
|
||||
"#class" : bellazon.BellazonThreadExtractor,
|
||||
|
||||
Reference in New Issue
Block a user