From 5fa6ff04ddf1ef9145233237c635cce93b3a8687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 29 Oct 2019 16:00:31 +0100 Subject: [PATCH] [instagram] extract '__additionalDataLoaded' (#391) The '_sharedData' of Post pages is missing its 'graphql' part for logged in users. This data is now included in the parameters of a function call to '__additionalDataLoaded(...)' And, of course, video extraction with youtube-dl broke because of this change as well. --- gallery_dl/extractor/instagram.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 8eee3905..a369b811 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -101,8 +101,16 @@ class InstagramExtractor(Extractor): def _extract_shared_data(self, url): page = self.request(url).text - data = text.extract(page, 'window._sharedData = ', ';')[0] - return json.loads(data) + shared_data, pos = text.extract( + page, 'window._sharedData =', ';') + additional_data, pos = text.extract( + page, 'window.__additionalDataLoaded(', ');', pos) + + data = json.loads(shared_data) + if additional_data: + next(iter(data['entry_data'].values()))[0] = \ + json.loads(additional_data.partition(',')[2]) + return data def _extract_postpage(self, url): shared_data = self._extract_shared_data(url)