[twitter] resolve t.co URLs in 'content' (#1532)
This commit is contained in:
@@ -168,7 +168,6 @@ class TwitterExtractor(Extractor):
|
|||||||
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
|
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
|
||||||
"user" : self._transform_user(tweet["user"]),
|
"user" : self._transform_user(tweet["user"]),
|
||||||
"lang" : tweet["lang"],
|
"lang" : tweet["lang"],
|
||||||
"content" : tweet["full_text"],
|
|
||||||
"favorite_count": tweet["favorite_count"],
|
"favorite_count": tweet["favorite_count"],
|
||||||
"quote_count" : tweet["quote_count"],
|
"quote_count" : tweet["quote_count"],
|
||||||
"reply_count" : tweet["reply_count"],
|
"reply_count" : tweet["reply_count"],
|
||||||
@@ -187,6 +186,13 @@ class TwitterExtractor(Extractor):
|
|||||||
"nick": u["name"],
|
"nick": u["name"],
|
||||||
} for u in mentions]
|
} for u in mentions]
|
||||||
|
|
||||||
|
content = tweet["full_text"]
|
||||||
|
urls = entities.get("urls")
|
||||||
|
if urls:
|
||||||
|
for url in urls:
|
||||||
|
content = content.replace(url["url"], url["expanded_url"])
|
||||||
|
tdata["content"] = content
|
||||||
|
|
||||||
if "in_reply_to_screen_name" in tweet:
|
if "in_reply_to_screen_name" in tweet:
|
||||||
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
|
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user