From 7af4d2047b2a63f518732cf9d9ce1fd05b3db23f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Fri, 24 Jun 2022 23:44:20 +0200
Subject: [PATCH] [instagram] improve metadata generated by _parse_post_api()

(#2695)
---
 gallery_dl/extractor/instagram.py | 55 +++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 8d76260a..31f5b320 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -250,14 +250,36 @@ class InstagramExtractor(Extractor):
         return data
 
     def _parse_post_api(self, post):
-
-        if "media" in post:
-            post = post["media"]
+        if "items" in post:
+            items = post["items"]
+            reel_id = str(post["id"]).rpartition(":")[2]
+            data = {
+                "expires": text.parse_timestamp(post.get("expiring_at")),
+                "post_id": reel_id,
+                "post_shortcode": shortcode_from_id(reel_id),
+            }
+        else:
             data = {
                 "post_id" : post["pk"],
-                "post_shortcode": shortcode_from_id(post["pk"]),
+                "post_shortcode": post["code"],
+                "likes": post["like_count"],
             }
 
+            caption = post["caption"]
+            data["description"] = caption["text"] if caption else ""
+
+            tags = self._find_tags(data["description"])
+            if tags:
+                data["tags"] = sorted(set(tags))
+
+            location = post.get("location")
+            if location:
+                slug = location["short_name"].replace(" ", "-").lower()
+                data["location_id"] = location["pk"]
+                data["location_slug"] = slug
+                data["location_url"] = "{}/explore/locations/{}/{}/".format(
+                    self.root, location["pk"], slug)
+
             if "carousel_media" in post:
                 items = post["carousel_media"]
                 data["sidecar_media_id"] = data["post_id"]
@@ -265,21 +287,13 @@ class InstagramExtractor(Extractor):
             else:
                 items = (post,)
 
-        else:
-            items = post["items"]
-            reel_id = str(post["id"]).rpartition(":")[2]
-            data = {
-                "expires" : text.parse_timestamp(post.get("expiring_at")),
-                "post_id" : reel_id,
-                "post_shortcode": shortcode_from_id(reel_id),
-            }
-
         owner = post["user"]
         data["owner_id"] = owner["pk"]
         data["username"] = owner.get("username")
         data["fullname"] = owner.get("full_name")
-        data["_files"] = files = []
+        data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"])
 
+        data["_files"] = files = []
         for num, item in enumerate(items, 1):
 
             image = item["image_versions2"]["candidates"][0]
@@ -307,6 +321,10 @@ class InstagramExtractor(Extractor):
                 "width"      : media["width"],
                 "height"     : media["height"],
             }
+
+            if "expiring_at" in item:
+                media["expires"] = text.parse_timestamp(post["expiring_at"])
+
             self._extract_tagged_users(item, media)
             files.append(media)
 
@@ -376,8 +394,7 @@ class InstagramExtractor(Extractor):
     def _pagination_api(self, endpoint, params=None):
         while True:
             data = self._request_api(endpoint, params=params)
-            for item in data["items"]:
-                yield {"media": item}
+            yield from data["items"]
 
             if not data["more_available"]:
                 return
@@ -386,7 +403,8 @@ class InstagramExtractor(Extractor):
     def _pagination_api_post(self, endpoint, params, post=False):
         while True:
             data = self._request_api(endpoint, method="POST", data=params)
-            yield from data["items"]
+            for item in data["items"]:
+                yield item["media"]
 
             info = data["paging_info"]
             if not info["more_available"]:
@@ -520,7 +538,8 @@ class InstagramTagExtractor(InstagramExtractor):
             info = self._request_api(endpoint, method="POST", data=data)
 
             for section in info["sections"]:
-                yield from section["layout_content"]["medias"]
+                for media in section["layout_content"]["medias"]:
+                    yield media["media"]
 
             if not info.get("more_available"):
                 return