diff --git a/docs/configuration.rst b/docs/configuration.rst index acb03d42..a9ef5731 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -654,6 +654,15 @@ Description Extract images from retweets. =========== ===== +extractor.twitter.videos +------------------------ +=========== ===== +Type ``bool`` +Default ``false`` +Description Output video tweets as unsupported URLs. +=========== ===== + + extractor.[booru].tags ---------------------- =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index fdb6ce9b..579154b0 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -110,7 +110,8 @@ }, "twitter": { - "retweets": true + "retweets": true, + "videos": false }, "booru": { diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 6343b8b6..27fd2be5 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -9,7 +9,7 @@ """Extract images from https://twitter.com/""" from .common import Extractor, Message -from .. import text +from .. import text, extractor class TwitterExtractor(Extractor): @@ -24,32 +24,38 @@ class TwitterExtractor(Extractor): Extractor.__init__(self) self.user = match.group(1) self.retweets = self.config("retweets", True) + self.videos = self.config("videos", False) + + if self.videos: + self._blacklist = extractor.blacklist(("twitter",)) def items(self): yield Message.Version, 1 yield Message.Directory, self.metadata() for tweet in self.tweets(): - images = list(text.extract_iter( - tweet, 'data-image-url="', '"')) - if not images: - continue - data = self._data_from_tweet(tweet) if not self.retweets and data["retweet_id"]: continue + images = text.extract_iter( + tweet, 'data-image-url="', '"') for data["num"], url in enumerate(images, 1): text.nameext_from_url(url, data) yield Message.Url, url + ":orig", data + if self.videos and "-videoContainer" in tweet: + url = "{}/{}/status/{}".format( + self.root, data["user"], data["tweet_id"]) + with self._blacklist: + yield Message.Queue, url, data + def metadata(self): """Return general metadata""" return {"user": self.user} def tweets(self): """Yield HTML content of all relevant tweets""" - return () @staticmethod def _data_from_tweet(tweet):