diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 6f538811..542a4f2d 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -116,8 +116,10 @@ class TumblrExtractor(Extractor): if self.original and "/s2048x3072/" in photo["url"] and ( photo["width"] == 2048 or photo["height"] == 3072): - photo["url"] = self._original_photo(photo["url"]) - + try: + photo["url"] = self._original_photo(photo["url"]) + except Exception: + self._warn_original(photo["url"], post) del photo["original_size"] del photo["alt_sizes"] posts.append( @@ -138,7 +140,10 @@ class TumblrExtractor(Extractor): # API response, but they can't contain images/videos anyway body = post["reblog"]["comment"] + post["reblog"]["tree_html"] for url in _findall_image(body): - url = self._original_inline_image(url) + try: + url = self._original_inline_image(url) + except Exception: + self._warn_original(url, post) posts.append(self._prepare_image(url, post.copy())) for url in _findall_video(body): url = self._original_video(url) @@ -218,23 +223,33 @@ class TumblrExtractor(Extractor): return self.blog != post.get("reblogged_root_uuid") def _original_photo(self, url): - return self._update_image_token( - url.replace("/s2048x3072/", "/s99999x99999/", 1)) + resized = url.replace("/s2048x3072/", "/s99999x99999/", 1) + return self._update_image_token(resized) def _original_inline_image(self, url): if self.original: - url, n = self._subn_orig_image("/s99999x99999/", url, 1) + resized, n = self._subn_orig_image("/s99999x99999/", url, 1) if n: - return self._update_image_token(url) + return self._update_image_token(resized) return self._sub_image(r"https://\1_1280.\2", url) def _original_video(self, url): return self._sub_video(r"https://\1.\2", url) - def _update_image_token(self, url): + def _update_image_token(self, resized): headers = {"Accept": "text/html,*/*;q=0.8"} - response = self.request(url, headers=headers) - return text.extract(response.text, '" src="', '"')[0] + + for _ in range(3): + response = self.request(resized, headers=headers) + updated = text.extract(response.text, '" src="', '"')[0] + if updated != resized: + return updated + + raise RuntimeError("invalid token") + + def _warn_original(self, url, post): + self.log.warning("Unable to fetch higher-resolution " + "version of %s (%s)", url, post["id"]) class TumblrUserExtractor(TumblrExtractor):