[twitter] small metadata cleanup

- add 'date' field
- remove 'entities' and 'extended_entities'
- don't include 'focus_fields' from 'original_info'
This commit is contained in:
Mike Fährmann
2020-06-04 18:21:54 +02:00
parent 655c98cbef
commit 3eed5f52d7

View File

@@ -50,10 +50,17 @@ class TwitterExtractor(Extractor):
continue
tweet.update(metadata)
tweet["date"] = text.parse_datetime(
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
entities = tweet["extended_entities"]
del tweet["extended_entities"]
del tweet["entities"]
yield Message.Directory, tweet
for tweet["num"], media in enumerate(
tweet["extended_entities"]["media"], 1):
tweet.update(media["original_info"])
for tweet["num"], media in enumerate(entities["media"], 1):
tweet["width"] = media["original_info"].get("width", 0)
tweet["height"] = media["original_info"].get("height", 0)
if "video_info" in media and self.videos: