diff --git a/docs/configuration.rst b/docs/configuration.rst index f0ae4d98..015c24b8 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1860,6 +1860,20 @@ Description You can use ``"all"`` instead of listing all values separately. +extractor.nitter.videos +----------------------- +Type + ``bool`` or ``string`` +Default + ``true`` +Description + Control video download behavior. + + * ``true``: Download videos + * ``"ytdl"``: Download videos using `youtube-dl`_ + * ``false``: Skip video Tweets + + extractor.oauth.browser ----------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 2002c0f2..168131ed 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -204,6 +204,9 @@ "password": null, "include": "illustration,doujin" }, + "nitter": { + "videos": true + }, "oauth": { "browser": true, diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py index 1ba8253b..05a5ae1b 100644 --- a/gallery_dl/extractor/nitter.py +++ b/gallery_dl/extractor/nitter.py @@ -24,31 +24,47 @@ class NitterExtractor(BaseExtractor): self.user = match.group(match.lastindex) def items(self): + videos = self.config("videos", True) + ytdl = (videos == "ytdl") + for tweet_html in self.tweets(): tweet = self._tweet_from_html(tweet_html) - attachments_html = tweet.pop("_attach", "") - if attachments_html: - attachments = list(text.extract_iter( - attachments_html, 'href="', '"')) - attachments.extend(text.extract_iter( - attachments_html, 'data-url="', '"')) + attachments = tweet.pop("_attach", "") + if attachments: + files = [] + append = files.append + + for url in text.extract_iter( + attachments, 'href="', '"'): + if url[0] == "/": + url = self.root + url + append({"url": url}) + + if videos and not files: + if ytdl: + append({ + "url": "ytdl:{}/i/status/{}".format( + self.root, tweet["tweet_id"]), + "extension": None, + }) + else: + for url in text.extract_iter( + attachments, 'data-url="', '"'): + if url[0] == "/": + url = self.root + url + append({"url": "ytdl:" + url}) else: - attachments = () - tweet["count"] = len(attachments) + files = () + tweet["count"] = len(files) yield Message.Directory, tweet - for tweet["num"], url in enumerate(attachments, 1): - if url[0] == "/": - url = self.root + url - if "/video/" in url: - url = "ytdl:" + url - tweet["filename"] = url.rpartition( - "%2F")[2].partition(".")[0] - tweet["extension"] = "mp4" - else: - text.nameext_from_url(url, tweet) - yield Message.Url, url, tweet + for tweet["num"], file in enumerate(files, 1): + url = file["url"] + file.update(tweet) + if "extension" not in file: + text.nameext_from_url(url, file) + yield Message.Url, url, file def _tweet_from_html(self, html): extr = text.extract_from(html)