merge #3455: [twitter] apply tweet type checks before uniqueness check
This commit is contained in:
@@ -76,11 +76,6 @@ class TwitterExtractor(Extractor):
|
||||
else:
|
||||
data = tweet
|
||||
|
||||
if seen_tweets is not None:
|
||||
if data["id_str"] in seen_tweets:
|
||||
continue
|
||||
seen_tweets.add(data["id_str"])
|
||||
|
||||
if not self.retweets and "retweeted_status_id_str" in data:
|
||||
self.log.debug("Skipping %s (retweet)", data["id_str"])
|
||||
continue
|
||||
@@ -98,6 +93,13 @@ class TwitterExtractor(Extractor):
|
||||
self.log.debug("Skipping %s (reply)", data["id_str"])
|
||||
continue
|
||||
|
||||
if seen_tweets is not None:
|
||||
if data["id_str"] in seen_tweets:
|
||||
self.log.debug(
|
||||
"Skipping %s (previously seen)", data["id_str"])
|
||||
continue
|
||||
seen_tweets.add(data["id_str"])
|
||||
|
||||
files = []
|
||||
if "extended_entities" in data:
|
||||
self._extract_media(
|
||||
|
||||
Reference in New Issue
Block a user