[facebook] improve 'date' extraction (#7151)
use 'created_time' as alternative when 'publish_time' isn't available
This commit is contained in:
@@ -99,9 +99,10 @@ class FacebookExtractor(Extractor):
|
||||
'"message":{"delight_ranges"',
|
||||
'"},"message_preferred_body"'
|
||||
).rsplit('],"text":"', 1)[-1]),
|
||||
"date": text.parse_timestamp(text.extr(
|
||||
photo_page, '\\"publish_time\\":', ','
|
||||
)),
|
||||
"date": text.parse_timestamp(
|
||||
text.extr(photo_page, '\\"publish_time\\":', ',') or
|
||||
text.extr(photo_page, '"created_time":', ',')
|
||||
),
|
||||
"url": FacebookExtractor.decode_all(text.extr(
|
||||
photo_page, ',"image":{"uri":"', '","'
|
||||
)),
|
||||
|
||||
@@ -122,6 +122,15 @@ __tests__ = (
|
||||
"caption" : "A century of innovation parked side by side.\n\n📸: Vocabutesla via X",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.facebook.com/photo.php?fbid=989340003138066&set=pb.100061862277212.-2207520000&type=3",
|
||||
"#comment" : "no 'publish_time' (#7151)",
|
||||
"#category": ("", "facebook", "photo"),
|
||||
"#class" : facebook.FacebookPhotoExtractor,
|
||||
|
||||
"date" : "dt:2025-02-25 15:00:09",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.facebook.com/watch/?v=1165557851291824",
|
||||
"#category": ("", "facebook", "video"),
|
||||
|
||||
Reference in New Issue
Block a user