[twitter] ignore previously seen Tweets (#2712)

occurs primarily for /with_replies results when logged in
This commit is contained in:
Mike Fährmann
2022-07-03 16:07:07 +02:00
parent 4b2a0a0eda
commit 1d14928bd9
3 changed files with 21 additions and 0 deletions

View File

@@ -64,6 +64,11 @@ class TwitterExtractor(Extractor):
tweets = self._expand_tweets(self.tweets())
self.tweets = lambda : tweets
if self.config("unique", True):
seen_tweets = set()
else:
seen_tweets = None
for tweet in self.tweets():
if "legacy" in tweet:
@@ -71,6 +76,11 @@ class TwitterExtractor(Extractor):
else:
data = tweet
if seen_tweets is not None:
if data["id_str"] in seen_tweets:
continue
seen_tweets.add(data["id_str"])
if not self.retweets and "retweeted_status_id_str" in data:
self.log.debug("Skipping %s (retweet)", data["id_str"])
continue