[twitter] ignore previously seen Tweets (#2712)
occurs primarily for /with_replies results when logged in
This commit is contained in:
@@ -64,6 +64,11 @@ class TwitterExtractor(Extractor):
|
||||
tweets = self._expand_tweets(self.tweets())
|
||||
self.tweets = lambda : tweets
|
||||
|
||||
if self.config("unique", True):
|
||||
seen_tweets = set()
|
||||
else:
|
||||
seen_tweets = None
|
||||
|
||||
for tweet in self.tweets():
|
||||
|
||||
if "legacy" in tweet:
|
||||
@@ -71,6 +76,11 @@ class TwitterExtractor(Extractor):
|
||||
else:
|
||||
data = tweet
|
||||
|
||||
if seen_tweets is not None:
|
||||
if data["id_str"] in seen_tweets:
|
||||
continue
|
||||
seen_tweets.add(data["id_str"])
|
||||
|
||||
if not self.retweets and "retweeted_status_id_str" in data:
|
||||
self.log.debug("Skipping %s (retweet)", data["id_str"])
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user