From 7af4d2047b2a63f518732cf9d9ce1fd05b3db23f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 24 Jun 2022 23:44:20 +0200 Subject: [PATCH] [instagram] improve metadata generated by _parse_post_api() (#2695) --- gallery_dl/extractor/instagram.py | 55 +++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 8d76260a..31f5b320 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -250,14 +250,36 @@ class InstagramExtractor(Extractor): return data def _parse_post_api(self, post): - - if "media" in post: - post = post["media"] + if "items" in post: + items = post["items"] + reel_id = str(post["id"]).rpartition(":")[2] + data = { + "expires": text.parse_timestamp(post.get("expiring_at")), + "post_id": reel_id, + "post_shortcode": shortcode_from_id(reel_id), + } + else: data = { "post_id" : post["pk"], - "post_shortcode": shortcode_from_id(post["pk"]), + "post_shortcode": post["code"], + "likes": post["like_count"], } + caption = post["caption"] + data["description"] = caption["text"] if caption else "" + + tags = self._find_tags(data["description"]) + if tags: + data["tags"] = sorted(set(tags)) + + location = post.get("location") + if location: + slug = location["short_name"].replace(" ", "-").lower() + data["location_id"] = location["pk"] + data["location_slug"] = slug + data["location_url"] = "{}/explore/locations/{}/{}/".format( + self.root, location["pk"], slug) + if "carousel_media" in post: items = post["carousel_media"] data["sidecar_media_id"] = data["post_id"] @@ -265,21 +287,13 @@ class InstagramExtractor(Extractor): else: items = (post,) - else: - items = post["items"] - reel_id = str(post["id"]).rpartition(":")[2] - data = { - "expires" : text.parse_timestamp(post.get("expiring_at")), - "post_id" : reel_id, - "post_shortcode": shortcode_from_id(reel_id), - } - owner = post["user"] data["owner_id"] = owner["pk"] data["username"] = owner.get("username") data["fullname"] = owner.get("full_name") - data["_files"] = files = [] + data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"]) + data["_files"] = files = [] for num, item in enumerate(items, 1): image = item["image_versions2"]["candidates"][0] @@ -307,6 +321,10 @@ class InstagramExtractor(Extractor): "width" : media["width"], "height" : media["height"], } + + if "expiring_at" in item: + media["expires"] = text.parse_timestamp(post["expiring_at"]) + self._extract_tagged_users(item, media) files.append(media) @@ -376,8 +394,7 @@ class InstagramExtractor(Extractor): def _pagination_api(self, endpoint, params=None): while True: data = self._request_api(endpoint, params=params) - for item in data["items"]: - yield {"media": item} + yield from data["items"] if not data["more_available"]: return @@ -386,7 +403,8 @@ class InstagramExtractor(Extractor): def _pagination_api_post(self, endpoint, params, post=False): while True: data = self._request_api(endpoint, method="POST", data=params) - yield from data["items"] + for item in data["items"]: + yield item["media"] info = data["paging_info"] if not info["more_available"]: @@ -520,7 +538,8 @@ class InstagramTagExtractor(InstagramExtractor): info = self._request_api(endpoint, method="POST", data=data) for section in info["sections"]: - yield from section["layout_content"]["medias"] + for media in section["layout_content"]["medias"]: + yield media["media"] if not info.get("more_available"): return