[tumblr] warn when unable to fetch higher-resolution images (#2957)

and download the smaller version instead of failing with a 404 error
2022-09-26 12:05:34 +02:00
parent 08b0c4c0b1
commit 32c30754d1
1 changed files with 25 additions and 10 deletions
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -116,8 +116,10 @@ class TumblrExtractor(Extractor):
                    if self.original and "/s2048x3072/" in photo["url"] and (
                            photo["width"] == 2048 or photo["height"] == 3072):
-                        photo["url"] = self._original_photo(photo["url"])
+                        try:
-
+                            photo["url"] = self._original_photo(photo["url"])
                        except Exception:
                            self._warn_original(photo["url"], post)
                    del photo["original_size"]
                    del photo["alt_sizes"]
                    posts.append(
@@ -138,7 +140,10 @@ class TumblrExtractor(Extractor):
                # API response, but they can't contain images/videos anyway
                body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
                for url in _findall_image(body):
-                    url = self._original_inline_image(url)
+                    try:
                        url = self._original_inline_image(url)
                    except Exception:
                        self._warn_original(url, post)
                    posts.append(self._prepare_image(url, post.copy()))
                for url in _findall_video(body):
                    url = self._original_video(url)
@@ -218,23 +223,33 @@ class TumblrExtractor(Extractor):
        return self.blog != post.get("reblogged_root_uuid")
    def _original_photo(self, url):
-        return self._update_image_token(
+        resized = url.replace("/s2048x3072/", "/s99999x99999/", 1)
-            url.replace("/s2048x3072/", "/s99999x99999/", 1))
+        return self._update_image_token(resized)
    def _original_inline_image(self, url):
        if self.original:
-            url, n = self._subn_orig_image("/s99999x99999/", url, 1)
+            resized, n = self._subn_orig_image("/s99999x99999/", url, 1)
            if n:
-                return self._update_image_token(url)
+                return self._update_image_token(resized)
        return self._sub_image(r"https://\1_1280.\2", url)
    def _original_video(self, url):
        return self._sub_video(r"https://\1.\2", url)
-    def _update_image_token(self, url):
+    def _update_image_token(self, resized):
        headers = {"Accept": "text/html,*/*;q=0.8"}
-        response = self.request(url, headers=headers)
+
-        return text.extract(response.text, '" src="', '"')[0]
+        for _ in range(3):
            response = self.request(resized, headers=headers)
            updated = text.extract(response.text, '" src="', '"')[0]
            if updated != resized:
                return updated
        raise RuntimeError("invalid token")
    def _warn_original(self, url, post):
        self.log.warning("Unable to fetch higher-resolution "
                         "version of %s (%s)", url, post["id"])
 class TumblrUserExtractor(TumblrExtractor):