[facebook] decode surrogate pairs in metadata values (#6599)

This commit is contained in:
Mike Fährmann
2024-12-12 20:20:30 +01:00
parent a33065be86
commit 85a37ca039
2 changed files with 11 additions and 1 deletions

View File

@@ -40,7 +40,8 @@ class FacebookExtractor(Extractor):
@staticmethod
def decode_all(txt):
return text.unescape(
txt.encode("utf-8").decode("unicode_escape")
txt.encode().decode("unicode_escape")
.encode("utf_16", "surrogatepass").decode("utf_16")
).replace("\\/", "/")
@staticmethod