[instagram] reimplement support for story highlights (#1149)
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
import itertools
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
@@ -40,7 +41,11 @@ class InstagramExtractor(Extractor):
|
||||
videos = self.config("videos", True)
|
||||
|
||||
for post in self.posts():
|
||||
post = self._parse_post(post)
|
||||
|
||||
if post["__typename"] == "GraphHighlightReel":
|
||||
post = self._parse_reel("highlight:" + post["id"])
|
||||
else:
|
||||
post = self._parse_post(post)
|
||||
post.update(data)
|
||||
files = post.pop("_files")
|
||||
|
||||
@@ -70,6 +75,20 @@ class InstagramExtractor(Extractor):
|
||||
self.www_claim = www_claim
|
||||
return response
|
||||
|
||||
def _api_request(self, endpoint, params):
|
||||
url = "https://i.instagram.com/api/" + endpoint
|
||||
headers = {
|
||||
"X-CSRFToken" : self.csrf_token,
|
||||
"X-IG-App-ID" : "936619743392459",
|
||||
"X-IG-WWW-Claim": self.www_claim,
|
||||
}
|
||||
cookies = {
|
||||
"csrftoken": self.csrf_token,
|
||||
}
|
||||
return self.request(
|
||||
url, params=params, headers=headers, cookies=cookies,
|
||||
).json()
|
||||
|
||||
def _graphql_request(self, query_hash, variables):
|
||||
url = self.root + "/graphql/query/"
|
||||
params = {
|
||||
@@ -201,6 +220,55 @@ class InstagramExtractor(Extractor):
|
||||
|
||||
return data
|
||||
|
||||
def _parse_reel(self, reel_id):
|
||||
params = {"reel_ids": reel_id}
|
||||
data = self._api_request("v1/feed/reels_media/", params)
|
||||
if not data["reels_media"]:
|
||||
raise exception.NotFoundError("reel")
|
||||
reel = data["reels_media"][0]
|
||||
|
||||
reel_id = reel_id.rpartition(":")[2]
|
||||
owner = reel["user"]
|
||||
|
||||
data = {
|
||||
"date" : text.parse_timestamp(reel["created_at"]),
|
||||
"owner_id" : owner["pk"],
|
||||
"username" : owner.get("username"),
|
||||
"fullname" : owner.get("full_name"),
|
||||
"post_id" : reel_id,
|
||||
"post_shortcode": self._shortcode_from_id(reel_id),
|
||||
# "post_url" : "{}/p/{}/".format(self.root, post["shortcode"]),
|
||||
}
|
||||
|
||||
data["_files"] = files = []
|
||||
for num, item in enumerate(reel["items"], 1):
|
||||
|
||||
image = item["image_versions2"]["candidates"][0]
|
||||
|
||||
if "video_versions" in item:
|
||||
video = max(
|
||||
item["video_versions"],
|
||||
key=lambda x: (x["width"], x["height"], x["type"]),
|
||||
)
|
||||
media = video
|
||||
else:
|
||||
video = None
|
||||
media = image
|
||||
|
||||
files.append({
|
||||
'num': num,
|
||||
'media_id' : item["pk"],
|
||||
'shortcode' : item["code"],
|
||||
'display_url': image["url"],
|
||||
'video_url' : video["url"] if video else None,
|
||||
'width' : media["width"],
|
||||
'height' : media["height"],
|
||||
'sidecar_media_id' : reel_id,
|
||||
'sidecar_shortcode': data["post_shortcode"],
|
||||
})
|
||||
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def _shortcode_from_id(post_id):
|
||||
return util.bencode(
|
||||
@@ -290,9 +358,30 @@ class InstagramUserExtractor(InstagramExtractor):
|
||||
user = self._extract_profile_page(url)
|
||||
edge = user["edge_owner_to_timeline_media"]
|
||||
|
||||
if user.get("highlight_reel_count") and self.config("highlights"):
|
||||
query_hash = "d4d88dc1500312af6f937f7b804c68c3"
|
||||
variables = {
|
||||
"user_id": user["id"],
|
||||
"include_chaining": False,
|
||||
"include_reel": True,
|
||||
"include_suggested_users": False,
|
||||
"include_logged_out_extras": False,
|
||||
"include_highlight_reels": True,
|
||||
"include_live_status": True,
|
||||
}
|
||||
data = self._graphql_request(query_hash, variables)
|
||||
highlights = [
|
||||
edge["node"]
|
||||
for edge in data["user"]["edge_highlight_reels"]["edges"]
|
||||
]
|
||||
else:
|
||||
highlights = None
|
||||
|
||||
query_hash = "003056d32c2554def87228bc3fd9668a"
|
||||
variables = {"id": user["id"], "first": 12}
|
||||
return self._pagination(query_hash, variables, edge)
|
||||
posts = self._pagination(query_hash, variables, edge)
|
||||
|
||||
return itertools.chain(highlights, posts) if highlights else posts
|
||||
|
||||
|
||||
class InstagramChannelExtractor(InstagramExtractor):
|
||||
@@ -504,3 +593,21 @@ class InstagramPostExtractor(InstagramExtractor):
|
||||
}
|
||||
data = self._graphql_request(query_hash, variables)
|
||||
return (data["shortcode_media"],)
|
||||
|
||||
|
||||
class InstagramHighlightExtractor(InstagramExtractor):
|
||||
"""Extractor for Instagram story highlights"""
|
||||
subcategory = "highlight"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
|
||||
r"/stories/highlights/(\d+)")
|
||||
test = ("https://www.instagram.com/stories/highlights/18042509488170095/",)
|
||||
|
||||
def __init__(self, match):
|
||||
InstagramExtractor.__init__(self, match)
|
||||
self.highlight_id = match.group(1)
|
||||
|
||||
def posts(self):
|
||||
return ({
|
||||
"__typename": "GraphHighlightReel",
|
||||
"id" : self.highlight_id,
|
||||
},)
|
||||
|
||||
Reference in New Issue
Block a user