From 160328d21c3e02697cac693d2f805b98e2531965 Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Mon, 16 Mar 2020 21:09:14 +0100 Subject: [PATCH] [instagram] Add support for user's saved medias (#644) * [instagram] Gracefully handle possible 'HttpErrorPage' in _extract_page() `HttpErrorPage' is returned in shared_data at least when not authenticated or when trying to fetch other users saved medias (i.e. `instagram.com//saved/'). Gracefully handle it by returning nothing. * [instagram] Add support for user's saved medias (Please note that this need the user to be authenticated and they can only see their saved media (not other users ones).) Close #643. * [instagram] Bump copyright year --- gallery_dl/extractor/instagram.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 96afea17..5c8004ea 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Leonardo Taccari +# Copyright 2018-2020 Leonardo Taccari # Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify @@ -268,6 +268,9 @@ class InstagramExtractor(Extractor): # Deal with different structure of pages: the first page # has interesting data in `entry_data', next pages in `data'. if 'entry_data' in shared_data: + if 'HttpErrorPage' in shared_data['entry_data']: + return + base_shared_data = shared_data['entry_data'][psdf['page']][0]['graphql'] # variables_id is available only in the first page @@ -404,6 +407,31 @@ class InstagramStoriesExtractor(InstagramExtractor): return self._extract_stories(url) +class InstagramSavedExtractor(InstagramExtractor): + """Extractor for ProfilePage saved media""" + subcategory = "saved" + pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" + r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)" + r"([^/?&#]+)/saved") + + def __init__(self, match): + InstagramExtractor.__init__(self, match) + self.username = match.group(1) + + def instagrams(self): + url = '{}/{}/saved/'.format(self.root, self.username) + shared_data = self._extract_shared_data(url) + + return self._extract_page(shared_data, { + 'page': 'ProfilePage', + 'node': 'user', + 'node_id': 'id', + 'variables_id': 'id', + 'edge_to_medias': 'edge_saved_media', + 'query_hash': '8c86fed24fa03a8a2eea2a70a80c7b6b', + }) + + class InstagramUserExtractor(InstagramExtractor): """Extractor for ProfilePage""" subcategory = "user"