diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 043940b6..acaad26b 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -22,7 +22,10 @@ jobs: runs-on: ubuntu-latest # on release commits, run only for tag event - if: ${{ ! startsWith( github.event.head_commit.message , 'release version ' ) || startsWith( github.ref , 'refs/tags/v' ) }} + if: | + github.repository == 'mikf/gallery-dl' && + ( ! startsWith( github.event.head_commit.message , 'release version ' ) || + startsWith( github.ref , 'refs/tags/v' ) ) steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/executables.yml b/.github/workflows/executables.yml index 2c27d643..78c95add 100644 --- a/.github/workflows/executables.yml +++ b/.github/workflows/executables.yml @@ -14,6 +14,7 @@ env: jobs: build: + if: github.repository == 'mikf/gallery-dl' runs-on: ${{ matrix.os }} defaults: run: diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index e0335a55..31fa63e8 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -20,6 +20,7 @@ concurrency: jobs: dispatch: + if: github.repository == 'mikf/gallery-dl' runs-on: ubuntu-latest steps: diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 2ef6d36e..3b2aa96e 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -51,6 +51,8 @@ class TwitterExtractor(Extractor): if not self.config("transform", True): self._transform_user = util.identity self._transform_tweet = util.identity + + self._cursor = None self._user = None self._user_obj = None self._user_cache = {} @@ -501,6 +503,14 @@ class TwitterExtractor(Extractor): }, } + def _init_cursor(self): + return self.config("cursor") or None + + def _update_cursor(self, cursor): + self.log.debug("Cursor: %s", cursor) + self._cursor = cursor + return cursor + def metadata(self): """Return general metadata""" return {} @@ -508,6 +518,11 @@ class TwitterExtractor(Extractor): def tweets(self): """Yield all relevant tweet objects""" + def finalize(self): + if self._cursor: + self.log.info("Use '-o cursor=%s' to continue downloading " + "from the current position", self._cursor) + def login(self): if self.cookies_check(self.cookies_names): return @@ -539,6 +554,9 @@ class TwitterUserExtractor(TwitterExtractor): def initialize(self): pass + def finalize(self): + pass + def items(self): base = "{}/{}/".format(self.root, self.user) return self._dispatch_extractors(( @@ -558,30 +576,76 @@ class TwitterTimelineExtractor(TwitterExtractor): pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)" example = "https://x.com/USER/timeline" + def _init_cursor(self): + if self._cursor: + return self._cursor.partition("/")[2] or None + return None + + def _update_cursor(self, cursor): + if cursor: + self._cursor = self._cursor_prefix + cursor + self.log.debug("Cursor: %s", self._cursor) + else: + self._cursor = None + return cursor + def tweets(self): - # yield initial batch of (media) tweets - tweet = None - for tweet in self._select_tweet_source()(self.user): - yield tweet - if tweet is None: - return + self._cursor = cursor = self.config("cursor") or None + reset = False + + if cursor: + state = cursor.partition("/")[0] + state, _, tweet_id = state.partition("_") + state = text.parse_int(state, 1) + else: + state = 1 + + if state <= 1: + self._cursor_prefix = "1/" + + # yield initial batch of (media) tweets + tweet = None + for tweet in self._select_tweet_source()(self.user): + yield tweet + if tweet is None and not cursor: + return + + user = self._user["name"] + tweet_id = tweet["rest_id"] + + state = reset = 2 + else: + user = self.user # build search query - query = "from:{} max_id:{}".format( - self._user["name"], tweet["rest_id"]) + query = "from:{} max_id:{}".format(user, tweet_id) if self.retweets: query += " include:retweets include:nativeretweets" - if not self.textonly: - # try to search for media-only tweets - tweet = None - for tweet in self.api.search_timeline(query + " filter:links"): - yield tweet - if tweet is not None: - return + if state <= 2: + self._cursor_prefix = "2_{}/".format(tweet_id) + if reset: + self._cursor = self._cursor_prefix - # yield unfiltered search results - yield from self.api.search_timeline(query) + if not self.textonly: + # try to search for media-only tweets + tweet = None + for tweet in self.api.search_timeline(query + " filter:links"): + yield tweet + break + if tweet is not None: + return self._update_cursor(None) + + state = reset = 3 + + if state <= 3: + # yield unfiltered search results + self._cursor_prefix = "3_{}/".format(tweet_id) + if reset: + self._cursor = self._cursor_prefix + + yield from self.api.search_timeline(query) + return self._update_cursor(None) def _select_tweet_source(self): strategy = self.config("strategy") @@ -1415,7 +1479,9 @@ class TwitterAPI(): "%s %s (%s)", response.status_code, response.reason, errors) def _pagination_legacy(self, endpoint, params): - original_retweets = (self.extractor.retweets == "original") + extr = self.extractor + params["cursor"] = extr._init_cursor() + original_retweets = (extr.retweets == "original") bottom = ("cursor-bottom-", "sq-cursor-bottom") while True: @@ -1423,7 +1489,7 @@ class TwitterAPI(): instructions = data["timeline"]["instructions"] if not instructions: - return + return extr._update_cursor(None) tweets = data["globalObjects"]["tweets"] users = data["globalObjects"]["users"] @@ -1504,8 +1570,8 @@ class TwitterAPI(): # stop on empty response if not cursor or (not tweets and not tweet_id): - return - params["cursor"] = cursor + return extr._update_cursor(None) + params["cursor"] = extr._update_cursor(cursor) def _pagination_tweets(self, endpoint, variables, path=None, stop_tweets=True, features=None): @@ -1514,6 +1580,7 @@ class TwitterAPI(): pinned_tweet = extr.pinned params = {"variables": None} + variables["cursor"] = extr._init_cursor() if features is None: features = self.features_pagination if features: @@ -1550,7 +1617,7 @@ class TwitterAPI(): cursor = entry["content"]["value"] if entries is None: if not cursor: - return + return extr._update_cursor(None) entries = () except LookupError: @@ -1699,12 +1766,14 @@ class TwitterAPI(): continue if stop_tweets and not tweet: - return + return extr._update_cursor(None) if not cursor or cursor == variables.get("cursor"): - return - variables["cursor"] = cursor + return extr._update_cursor(None) + variables["cursor"] = extr._update_cursor(cursor) def _pagination_users(self, endpoint, variables, path=None): + extr = self.extractor + variables["cursor"] = extr._init_cursor() params = { "variables": None, "features" : self._json_dumps(self.features_pagination), @@ -1724,7 +1793,7 @@ class TwitterAPI(): data = data[key] instructions = data["instructions"] except KeyError: - return + return extr._update_cursor(None) for instr in instructions: if instr["type"] == "TimelineAddEntries": @@ -1742,8 +1811,8 @@ class TwitterAPI(): cursor = entry["content"]["value"] if not cursor or cursor.startswith(("-1|", "0|")) or not entry: - return - variables["cursor"] = cursor + return extr._update_cursor(None) + variables["cursor"] = extr._update_cursor(cursor) def _handle_ratelimit(self, response): rl = self.extractor.config("ratelimit")