[facebook] improve 'date' extraction (#7151)

use 'created_time' as alternative when 'publish_time' isn't available
This commit is contained in:
Mike Fährmann
2025-03-10 17:35:32 +01:00
parent 04464b6cf0
commit ce01835995
2 changed files with 13 additions and 3 deletions

View File

@@ -99,9 +99,10 @@ class FacebookExtractor(Extractor):
'"message":{"delight_ranges"',
'"},"message_preferred_body"'
).rsplit('],"text":"', 1)[-1]),
"date": text.parse_timestamp(text.extr(
photo_page, '\\"publish_time\\":', ','
)),
"date": text.parse_timestamp(
text.extr(photo_page, '\\"publish_time\\":', ',') or
text.extr(photo_page, '"created_time":', ',')
),
"url": FacebookExtractor.decode_all(text.extr(
photo_page, ',"image":{"uri":"', '","'
)),

View File

@@ -122,6 +122,15 @@ __tests__ = (
"caption" : "A century of innovation parked side by side.\n\n📸: Vocabutesla via X",
},
{
"#url" : "https://www.facebook.com/photo.php?fbid=989340003138066&set=pb.100061862277212.-2207520000&type=3",
"#comment" : "no 'publish_time' (#7151)",
"#category": ("", "facebook", "photo"),
"#class" : facebook.FacebookPhotoExtractor,
"date" : "dt:2025-02-25 15:00:09",
},
{
"#url" : "https://www.facebook.com/watch/?v=1165557851291824",
"#category": ("", "facebook", "video"),