[twitter] handle missing 'expanded_url' fields (#5463, #5490)

2024-04-19 21:42:22 +02:00
parent c9d3b5e5d9
commit 347af7f5c8
1 changed files with 10 additions and 4 deletions
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -243,8 +243,8 @@ class TwitterExtractor(Extractor):

        # collect URLs from entities
        for url in tweet["entities"].get("urls") or ():
-            url = url["expanded_url"]
-            if "//twitpic.com/" not in url or "/photos/" in url:
+            url = url.get("expanded_url") or url.get("url") or ""
+            if not url or "//twitpic.com/" not in url or "/photos/" in url:
                continue
            if url.startswith("http:"):
                url = "https" + url[4:]
@@ -336,7 +336,10 @@ class TwitterExtractor(Extractor):
        urls = entities.get("urls")
        if urls:
            for url in urls:
-                content = content.replace(url["url"], url["expanded_url"])
+                try:
+                    content = content.replace(url["url"], url["expanded_url"])
+                except KeyError:
+                    pass
        txt, _, tco = content.rpartition(" ")
        tdata["content"] = txt if tco.startswith("https://t.co/") else content

@@ -403,7 +406,10 @@ class TwitterExtractor(Extractor):
        urls = entities["description"].get("urls")
        if urls:
            for url in urls:
-                descr = descr.replace(url["url"], url["expanded_url"])
+                try:
+                    descr = descr.replace(url["url"], url["expanded_url"])
+                except KeyError:
+                    pass
        udata["description"] = descr

        if "url" in entities: