[facebook] decode surrogate pairs in metadata values (#6599)
This commit is contained in:
@@ -40,7 +40,8 @@ class FacebookExtractor(Extractor):
|
||||
@staticmethod
|
||||
def decode_all(txt):
|
||||
return text.unescape(
|
||||
txt.encode("utf-8").decode("unicode_escape")
|
||||
txt.encode().decode("unicode_escape")
|
||||
.encode("utf_16", "surrogatepass").decode("utf_16")
|
||||
).replace("\\/", "/")
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -113,6 +113,15 @@ __tests__ = (
|
||||
"username" : "Facebook",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.facebook.com/photo.php?fbid=1156625586261770",
|
||||
"#comment" : "surrogate pair in 'caption' data (#6599)",
|
||||
"#category": ("", "facebook", "photo"),
|
||||
"#class" : facebook.FacebookPhotoExtractor,
|
||||
|
||||
"caption" : "A century of innovation parked side by side.\n\n📸: Vocabutesla via X",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.facebook.com/watch/?v=1165557851291824",
|
||||
"#category": ("", "facebook", "video"),
|
||||
|
||||
Reference in New Issue
Block a user