From 1fbc341e60c63615cf30a457bd2696d58b334fd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 2 Aug 2025 10:19:34 +0200 Subject: [PATCH] [facebook] ensure numeric 'user_id' values (#7953) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handle 'pfbid…' IDs --- gallery_dl/extractor/facebook.py | 14 ++++++++++ test/results/facebook.py | 44 ++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 069ed994..546e9566 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -61,6 +61,7 @@ class FacebookExtractor(Extractor): "user_id": text.extr( set_page, '"owner":{"__typename":"User","id":"', '"' ), + "user_pfbid": "", "title": self.decode_all(text.extr( set_page, '"title":{"text":"', '"' )), @@ -74,6 +75,13 @@ class FacebookExtractor(Extractor): ) } + if directory["user_id"].startswith("pfbid"): + directory["user_pfbid"] = directory["user_id"] + directory["user_id"] = ( + text.extr( + set_page, '"actors":[{"__typename":"User","id":"', '"') or + directory["set_id"].split(".")[1]) + return directory def parse_photo_page(self, photo_page): @@ -92,6 +100,7 @@ class FacebookExtractor(Extractor): "user_id": text.extr( photo_page, '"owner":{"__typename":"User","id":"', '"' ), + "user_pfbid": "", "caption": self.decode_all(text.extr( photo_page, '"message":{"delight_ranges"', @@ -115,6 +124,11 @@ class FacebookExtractor(Extractor): ) } + if photo["user_id"].startswith("pfbid"): + photo["user_pfbid"] = photo["user_id"] + photo["user_id"] = text.extr( + photo_page, r'\"content_owner_id_new\":\"', r'\"') + text.nameext_from_url(photo["url"], photo) photo["followups_ids"] = [] diff --git a/test/results/facebook.py b/test/results/facebook.py index a4a554ff..fcb2e23c 100644 --- a/test/results/facebook.py +++ b/test/results/facebook.py @@ -43,6 +43,16 @@ __tests__ = ( "#count" : 3, }, +{ + "#url" : "https://www.facebook.com/profile.php?id=100074229772340/photos", + "#comment" : "pfbid user ID (#7953)", + "#class" : facebook.FacebookPhotosExtractor, + "#range" : "1", + + "user_id" : "100074229772340", + "user_pfbid": "pfbid0xCZArBYwZ9d4KLbXzSLF6YNUTY3rTJhPpVZ92gBpFQi1JK9RemJ5KhtqSJXkVnQ3l", +}, + { "#url" : "https://www.facebook.com/facebook/photos_by", "#class" : facebook.FacebookPhotosExtractor, @@ -124,15 +134,45 @@ __tests__ = ( "caption" : "", "date" : datetime.datetime(2014, 5, 3, 0, 44, 47), - "filename" : "10334445_10152716011076729_6502314875328401420_n", + "filename" : str, "extension": "png", "id" : "10152716011076729", "set_id" : "a.10152716010956729", "url" : str, "user_id" : "100064860875397", + "user_pfbid": "", "username" : "Facebook", }, +{ + "#url" : "https://www.facebook.com/photo.php?fbid=1143447107814264&set=pb.100064469571787.-2207520000&type=3", + "#class" : facebook.FacebookPhotoExtractor, + "#count" : 1, + + "caption" : "Wanting to post a pic on Stories but it’s too small? 😡❌\n\nTry using Meta AI to make the pic fit your screen 😇✅\n\n(Available in most of the US)", + "date" : "dt:2025-05-30 18:47:34", + "extension": "jpg", + "id" : "1143447107814264", + "set_id" : "a.596799269145720", + "user_id" : "100064469571787", + "user_pfbid": "", + "username" : "Instagram", +}, + +{ + "#url" : "https://www.facebook.com/photo/?fbid=221820450302279", + "#comment" : "pfbid user ID (#7953)", + "#class" : facebook.FacebookPhotoExtractor, + + "date" : "dt:2023-02-05 22:41:02", + "id" : "221820450302279", + "set_id" : "a.109762038174788", + "user_id" : "100074229772340", + "user_pfbid": "pfbid0xCZArBYwZ9d4KLbXzSLF6YNUTY3rTJhPpVZ92gBpFQi1JK9RemJ5KhtqSJXkVnQ3l", + "username": "Throwaway Kwon", + +}, + { "#url" : "https://www.facebook.com/photo.php?fbid=1156625586261770", "#comment" : "surrogate pair in 'caption' data (#6599)", @@ -155,7 +195,7 @@ __tests__ = ( "#count" : 1, "date" : datetime.datetime(2024, 4, 19, 17, 25, 48), - "filename" : "462125225_400524393116630_7457168924362807384_n", + "filename" : str, "id" : "1165557851291824", "url" : str, "user_id" : "100064860875397",