diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 04acfc52..2f3fdbf3 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -40,7 +40,8 @@ class FacebookExtractor(Extractor): @staticmethod def decode_all(txt): return text.unescape( - txt.encode("utf-8").decode("unicode_escape") + txt.encode().decode("unicode_escape") + .encode("utf_16", "surrogatepass").decode("utf_16") ).replace("\\/", "/") @staticmethod diff --git a/test/results/facebook.py b/test/results/facebook.py index 7b34a248..165a2239 100644 --- a/test/results/facebook.py +++ b/test/results/facebook.py @@ -113,6 +113,15 @@ __tests__ = ( "username" : "Facebook", }, +{ + "#url" : "https://www.facebook.com/photo.php?fbid=1156625586261770", + "#comment" : "surrogate pair in 'caption' data (#6599)", + "#category": ("", "facebook", "photo"), + "#class" : facebook.FacebookPhotoExtractor, + + "caption" : "A century of innovation parked side by side.\n\n📸: Vocabutesla via X", +}, + { "#url" : "https://www.facebook.com/watch/?v=1165557851291824", "#category": ("", "facebook", "video"),