From 1d14928bd9f979a5a5c3d77cf3fdb0d25a0d99e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 3 Jul 2022 16:07:07 +0200 Subject: [PATCH] [twitter] ignore previously seen Tweets (#2712) occurs primarily for /with_replies results when logged in --- docs/configuration.rst | 10 ++++++++++ docs/gallery-dl.conf | 1 + gallery_dl/extractor/twitter.py | 10 ++++++++++ 3 files changed, 21 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index e3388f77..43c453e9 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2415,6 +2415,16 @@ Description Extract `TwitPic `__ embeds. +extractor.twitter.unique +------------------------ +Type + ``bool`` +Default + ``true`` +Description + Ignore previously seen Tweets. + + extractor.twitter.users ----------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index d447bdbf..14926534 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -288,6 +288,7 @@ "strategy": null, "text-tweets": false, "twitpic": false, + "unique": true, "users": "timeline", "videos": true }, diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 33e9e10d..fc81f1cd 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -64,6 +64,11 @@ class TwitterExtractor(Extractor): tweets = self._expand_tweets(self.tweets()) self.tweets = lambda : tweets + if self.config("unique", True): + seen_tweets = set() + else: + seen_tweets = None + for tweet in self.tweets(): if "legacy" in tweet: @@ -71,6 +76,11 @@ class TwitterExtractor(Extractor): else: data = tweet + if seen_tweets is not None: + if data["id_str"] in seen_tweets: + continue + seen_tweets.add(data["id_str"]) + if not self.retweets and "retweeted_status_id_str" in data: self.log.debug("Skipping %s (retweet)", data["id_str"]) continue