diff --git a/docs/configuration.rst b/docs/configuration.rst index 5c2d97d1..8534ca8b 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1067,6 +1067,15 @@ Description Extract images from retweets. =========== ===== +extractor.twitter.twitpic +------------------------- +=========== ===== +Type ``bool`` +Default ``false`` +Description Extract `TwitPic `__ embeds. +=========== ===== + + extractor.twitter.videos ------------------------ =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 69d1e036..326bd076 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -144,6 +144,7 @@ { "content": false, "retweets": true, + "twitpic": false, "videos": false }, "vsco": diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index d8682ed2..dc558c0c 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -30,6 +30,7 @@ class TwitterExtractor(Extractor): self._user_dict = None self.logged_in = False self.retweets = self.config("retweets", True) + self.twitpic = self.config("twitpic", False) self.content = self.config("content", False) self.videos = self.config("videos", False) @@ -79,6 +80,26 @@ class TwitterExtractor(Extractor): urls = [url + size for size in self.sizes] yield Message.Urllist, urls, data + if self.twitpic and "//twitpic.com/" in tweet: + urls = [ + url for url in text.extract_iter( + tweet, 'data-expanded-url="', '"') + if "//twitpic.com/" in url + ] + + if "num" not in data: + if urls: + yield Message.Directory, data + data["num"] = 0 + + for data["num"], url in enumerate(urls, data["num"]+1): + response = self.request(url, fatal=False) + if response.status_code >= 400: + continue + url = text.extract( + response.text, 'name="twitter:image" value="', '"')[0] + yield Message.Url, url, text.nameext_from_url(url, data) + def metadata(self): """Return general metadata""" return {} @@ -230,7 +251,7 @@ class TwitterExtractor(Extractor): for tweet in text.extract_iter( data["items_html"], '