[twitter] resolve t.co URLs in 'content' (#1532)

This commit is contained in:
Mike Fährmann
2021-05-15 02:46:46 +02:00
parent 2b5d80862e
commit 41457dbb1b

View File

@@ -168,7 +168,6 @@ class TwitterExtractor(Extractor):
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"user" : self._transform_user(tweet["user"]),
"lang" : tweet["lang"],
"content" : tweet["full_text"],
"favorite_count": tweet["favorite_count"],
"quote_count" : tweet["quote_count"],
"reply_count" : tweet["reply_count"],
@@ -187,6 +186,13 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
content = tweet["full_text"]
urls = entities.get("urls")
if urls:
for url in urls:
content = content.replace(url["url"], url["expanded_url"])
tdata["content"] = content
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]