[twitter] small metadata cleanup
- add 'date' field - remove 'entities' and 'extended_entities' - don't include 'focus_fields' from 'original_info'
This commit is contained in:
@@ -50,10 +50,17 @@ class TwitterExtractor(Extractor):
|
||||
continue
|
||||
|
||||
tweet.update(metadata)
|
||||
tweet["date"] = text.parse_datetime(
|
||||
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||
entities = tweet["extended_entities"]
|
||||
del tweet["extended_entities"]
|
||||
del tweet["entities"]
|
||||
|
||||
yield Message.Directory, tweet
|
||||
for tweet["num"], media in enumerate(
|
||||
tweet["extended_entities"]["media"], 1):
|
||||
tweet.update(media["original_info"])
|
||||
for tweet["num"], media in enumerate(entities["media"], 1):
|
||||
|
||||
tweet["width"] = media["original_info"].get("width", 0)
|
||||
tweet["height"] = media["original_info"].get("height", 0)
|
||||
|
||||
if "video_info" in media and self.videos:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user