diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c7fae6d2..c37fc4ae 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -376,7 +376,7 @@ Consider all sites to be NSFW unless otherwise known. Instagram https://www.instagram.com/ - Channels, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles + Channels, Collections, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles Supported diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 4a2c3bb7..56f63f3e 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -398,7 +398,7 @@ class InstagramExtractor(Extractor): variables["after"] = self._cursor = info["end_cursor"] self.log.debug("Cursor: %s", self._cursor) - def _pagination_api(self, endpoint, params=None): + def _pagination_api(self, endpoint, params={}): while True: data = self._request_api(endpoint, params=params) yield from data["items"] @@ -509,7 +509,7 @@ class InstagramChannelExtractor(InstagramExtractor): class InstagramSavedExtractor(InstagramExtractor): """Extractor for ProfilePage saved media""" subcategory = "saved" - pattern = USER_PATTERN + r"/saved" + pattern = USER_PATTERN + r"/saved/?$" test = ("https://www.instagram.com/instagram/saved/",) def posts(self): @@ -518,6 +518,30 @@ class InstagramSavedExtractor(InstagramExtractor): return self._pagination_graphql(query_hash, variables) +class InstagramCollectionExtractor(InstagramExtractor): + """Extractor for ProfilePage saved collection media""" + subcategory = "collection" + pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)" + test = ( + "https://www.instagram.com/instagram/saved/collection_name/123456789/", + ) + + def __init__(self, match): + InstagramExtractor.__init__(self, match) + self.user, self.collection_name, self.collection_id = match.groups() + + def metadata(self): + return { + "collection_id" : self.collection_id, + "collection_name": text.unescape(self.collection_name), + } + + def posts(self): + endpoint = "/v1/feed/collection/{}/posts/".format(self.collection_id) + for item in self._pagination_api(endpoint): + yield item["media"] + + class InstagramTagExtractor(InstagramExtractor): """Extractor for TagPage""" subcategory = "tag"