[instagram] refactor reel handling
This commit is contained in:
@@ -46,10 +46,10 @@ class InstagramExtractor(Extractor):
|
|||||||
|
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
|
|
||||||
if post["__typename"] == "GraphReel":
|
if "__typename" in post:
|
||||||
post = self._parse_reel(post["id"])
|
post = self._parse_post_graphql(post)
|
||||||
else:
|
else:
|
||||||
post = self._parse_post(post)
|
post = self._parse_post_reel(post)
|
||||||
post.update(data)
|
post.update(data)
|
||||||
files = post.pop("_files")
|
files = post.pop("_files")
|
||||||
|
|
||||||
@@ -85,8 +85,8 @@ class InstagramExtractor(Extractor):
|
|||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _api_request(self, endpoint, params):
|
def _request_api(self, endpoint, params=None):
|
||||||
url = "https://i.instagram.com/api/" + endpoint
|
url = "https://i.instagram.com/api" + endpoint
|
||||||
headers = {
|
headers = {
|
||||||
"X-CSRFToken" : self.csrf_token,
|
"X-CSRFToken" : self.csrf_token,
|
||||||
"X-IG-App-ID" : "936619743392459",
|
"X-IG-App-ID" : "936619743392459",
|
||||||
@@ -99,7 +99,7 @@ class InstagramExtractor(Extractor):
|
|||||||
url, params=params, headers=headers, cookies=cookies,
|
url, params=params, headers=headers, cookies=cookies,
|
||||||
).json()
|
).json()
|
||||||
|
|
||||||
def _graphql_request(self, query_hash, variables):
|
def _request_graphql(self, query_hash, variables):
|
||||||
url = self.root + "/graphql/query/"
|
url = self.root + "/graphql/query/"
|
||||||
params = {
|
params = {
|
||||||
"query_hash": query_hash,
|
"query_hash": query_hash,
|
||||||
@@ -162,7 +162,7 @@ class InstagramExtractor(Extractor):
|
|||||||
for key in ("sessionid", "mid", "csrftoken")
|
for key in ("sessionid", "mid", "csrftoken")
|
||||||
}
|
}
|
||||||
|
|
||||||
def _parse_post(self, post):
|
def _parse_post_graphql(self, post):
|
||||||
if post.get("is_video") and "video_url" not in post:
|
if post.get("is_video") and "video_url" not in post:
|
||||||
url = "{}/tv/{}/".format(self.root, post["shortcode"])
|
url = "{}/tv/{}/".format(self.root, post["shortcode"])
|
||||||
post = self._extract_post_page(url)
|
post = self._extract_post_page(url)
|
||||||
@@ -230,18 +230,12 @@ class InstagramExtractor(Extractor):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _parse_reel(self, reel_id):
|
def _parse_post_reel(self, post):
|
||||||
params = {"reel_ids": reel_id}
|
reel_id = str(post["id"]).rpartition(":")[2]
|
||||||
data = self._api_request("v1/feed/reels_media/", params)
|
owner = post["user"]
|
||||||
if not data["reels_media"]:
|
|
||||||
raise exception.NotFoundError("reel")
|
|
||||||
reel = data["reels_media"][0]
|
|
||||||
|
|
||||||
reel_id = reel_id.rpartition(":")[2]
|
|
||||||
owner = reel["user"]
|
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"expires" : text.parse_timestamp(reel.get("expiring_at")),
|
"expires" : text.parse_timestamp(post.get("expiring_at")),
|
||||||
"owner_id" : owner["pk"],
|
"owner_id" : owner["pk"],
|
||||||
"username" : owner.get("username"),
|
"username" : owner.get("username"),
|
||||||
"fullname" : owner.get("full_name"),
|
"fullname" : owner.get("full_name"),
|
||||||
@@ -250,7 +244,7 @@ class InstagramExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
data["_files"] = files = []
|
data["_files"] = files = []
|
||||||
for num, item in enumerate(reel["items"], 1):
|
for num, item in enumerate(post["items"], 1):
|
||||||
|
|
||||||
image = item["image_versions2"]["candidates"][0]
|
image = item["image_versions2"]["candidates"][0]
|
||||||
|
|
||||||
@@ -337,7 +331,7 @@ class InstagramExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
return user[key]
|
return user[key]
|
||||||
|
|
||||||
def _pagination(self, query_hash, variables, data):
|
def _pagination_graphql(self, query_hash, variables, data):
|
||||||
while True:
|
while True:
|
||||||
for edge in data["edges"]:
|
for edge in data["edges"]:
|
||||||
yield edge["node"]
|
yield edge["node"]
|
||||||
@@ -352,9 +346,19 @@ class InstagramExtractor(Extractor):
|
|||||||
|
|
||||||
variables["after"] = self._cursor = info["end_cursor"]
|
variables["after"] = self._cursor = info["end_cursor"]
|
||||||
self.log.debug("Cursor: %s", self._cursor)
|
self.log.debug("Cursor: %s", self._cursor)
|
||||||
data = next(iter(self._graphql_request(
|
data = next(iter(self._request_graphql(
|
||||||
query_hash, variables)["user"].values()))
|
query_hash, variables)["user"].values()))
|
||||||
|
|
||||||
|
def _pagination_api(self, endpoint, params):
|
||||||
|
while True:
|
||||||
|
data = self._request_api(endpoint, params)
|
||||||
|
yield from data["items"]
|
||||||
|
|
||||||
|
info = data["paging_info"]
|
||||||
|
if not info["more_available"]:
|
||||||
|
return
|
||||||
|
params["max_id"] = info["max_id"]
|
||||||
|
|
||||||
|
|
||||||
class InstagramUserExtractor(InstagramExtractor):
|
class InstagramUserExtractor(InstagramExtractor):
|
||||||
"""Extractor for an Instagram user profile"""
|
"""Extractor for an Instagram user profile"""
|
||||||
@@ -392,7 +396,7 @@ class InstagramPostsExtractor(InstagramExtractor):
|
|||||||
query_hash = "003056d32c2554def87228bc3fd9668a"
|
query_hash = "003056d32c2554def87228bc3fd9668a"
|
||||||
variables = {"id": user["id"], "first": 50}
|
variables = {"id": user["id"], "first": 50}
|
||||||
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
|
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
|
||||||
return self._pagination(query_hash, variables, edge)
|
return self._pagination_graphql(query_hash, variables, edge)
|
||||||
|
|
||||||
|
|
||||||
class InstagramChannelExtractor(InstagramExtractor):
|
class InstagramChannelExtractor(InstagramExtractor):
|
||||||
@@ -411,7 +415,7 @@ class InstagramChannelExtractor(InstagramExtractor):
|
|||||||
query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
|
query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
|
||||||
variables = {"id": user["id"], "first": 50}
|
variables = {"id": user["id"], "first": 50}
|
||||||
edge = self._get_edge_data(user, "edge_felix_video_timeline")
|
edge = self._get_edge_data(user, "edge_felix_video_timeline")
|
||||||
return self._pagination(query_hash, variables, edge)
|
return self._pagination_graphql(query_hash, variables, edge)
|
||||||
|
|
||||||
|
|
||||||
class InstagramSavedExtractor(InstagramExtractor):
|
class InstagramSavedExtractor(InstagramExtractor):
|
||||||
@@ -427,7 +431,7 @@ class InstagramSavedExtractor(InstagramExtractor):
|
|||||||
query_hash = "2ce1d673055b99250e93b6f88f878fde"
|
query_hash = "2ce1d673055b99250e93b6f88f878fde"
|
||||||
variables = {"id": user["id"], "first": 50}
|
variables = {"id": user["id"], "first": 50}
|
||||||
edge = self._get_edge_data(user, "edge_saved_media")
|
edge = self._get_edge_data(user, "edge_saved_media")
|
||||||
return self._pagination(query_hash, variables, edge)
|
return self._pagination_graphql(query_hash, variables, edge)
|
||||||
|
|
||||||
|
|
||||||
class InstagramTagExtractor(InstagramExtractor):
|
class InstagramTagExtractor(InstagramExtractor):
|
||||||
@@ -451,9 +455,9 @@ class InstagramTagExtractor(InstagramExtractor):
|
|||||||
query_hash = "9b498c08113f1e09617a1703c22b2f32"
|
query_hash = "9b498c08113f1e09617a1703c22b2f32"
|
||||||
variables = {"tag_name": hashtag["name"], "first": 50}
|
variables = {"tag_name": hashtag["name"], "first": 50}
|
||||||
edge = self._get_edge_data(hashtag, "edge_hashtag_to_media")
|
edge = self._get_edge_data(hashtag, "edge_hashtag_to_media")
|
||||||
return self._pagination(query_hash, variables, edge)
|
return self._pagination_graphql(query_hash, variables, edge)
|
||||||
|
|
||||||
def _pagination(self, query_hash, variables, data):
|
def _pagination_graphql(self, query_hash, variables, data):
|
||||||
while True:
|
while True:
|
||||||
for edge in data["edges"]:
|
for edge in data["edges"]:
|
||||||
yield edge["node"]
|
yield edge["node"]
|
||||||
@@ -464,7 +468,7 @@ class InstagramTagExtractor(InstagramExtractor):
|
|||||||
|
|
||||||
variables["after"] = self._cursor = info["end_cursor"]
|
variables["after"] = self._cursor = info["end_cursor"]
|
||||||
self.log.debug("Cursor: %s", self._cursor)
|
self.log.debug("Cursor: %s", self._cursor)
|
||||||
data = self._graphql_request(
|
data = self._request_graphql(
|
||||||
query_hash, variables)["hashtag"]["edge_hashtag_to_media"]
|
query_hash, variables)["hashtag"]["edge_hashtag_to_media"]
|
||||||
|
|
||||||
|
|
||||||
@@ -575,7 +579,7 @@ class InstagramPostExtractor(InstagramExtractor):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
query_hash = "a9441f24ac73000fa17fe6e6da11d59d"
|
query_hash = "2c4c2e343a8f64c625ba02b2aa12c7f8"
|
||||||
variables = {
|
variables = {
|
||||||
"shortcode" : self.item,
|
"shortcode" : self.item,
|
||||||
"child_comment_count" : 3,
|
"child_comment_count" : 3,
|
||||||
@@ -583,7 +587,7 @@ class InstagramPostExtractor(InstagramExtractor):
|
|||||||
"parent_comment_count" : 24,
|
"parent_comment_count" : 24,
|
||||||
"has_threaded_comments": True
|
"has_threaded_comments": True
|
||||||
}
|
}
|
||||||
data = self._graphql_request(query_hash, variables)
|
data = self._request_graphql(query_hash, variables)
|
||||||
media = data.get("shortcode_media")
|
media = data.get("shortcode_media")
|
||||||
if not media:
|
if not media:
|
||||||
raise exception.NotFoundError("post")
|
raise exception.NotFoundError("post")
|
||||||
@@ -619,7 +623,9 @@ class InstagramStoriesExtractor(InstagramExtractor):
|
|||||||
return ()
|
return ()
|
||||||
reel_id = user["id"]
|
reel_id = user["id"]
|
||||||
|
|
||||||
return ({"__typename": "GraphReel", "id": reel_id},)
|
endpoint = "/v1/feed/reels_media/"
|
||||||
|
params = {"reel_ids": reel_id}
|
||||||
|
return self._request_api(endpoint, params)["reels"].values()
|
||||||
|
|
||||||
|
|
||||||
class InstagramHighlightsExtractor(InstagramExtractor):
|
class InstagramHighlightsExtractor(InstagramExtractor):
|
||||||
@@ -642,12 +648,13 @@ class InstagramHighlightsExtractor(InstagramExtractor):
|
|||||||
"include_highlight_reels": True,
|
"include_highlight_reels": True,
|
||||||
"include_live_status": True,
|
"include_live_status": True,
|
||||||
}
|
}
|
||||||
data = self._graphql_request(query_hash, variables)
|
data = self._request_graphql(query_hash, variables)
|
||||||
|
edges = data["user"]["edge_highlight_reels"]["edges"]
|
||||||
|
if not edges:
|
||||||
|
return ()
|
||||||
|
|
||||||
return [
|
reel_ids = ["highlight:" + edge["node"]["id"] for edge in edges]
|
||||||
{
|
endpoint = "/v1/feed/reels_media/?reel_ids=" + \
|
||||||
"__typename": "GraphReel",
|
"&reel_ids=".join(text.quote(rid) for rid in reel_ids)
|
||||||
"id" : "highlight:" + edge["node"]["id"],
|
reels = self._request_api(endpoint)["reels"]
|
||||||
}
|
return [reels[rid] for rid in reel_ids]
|
||||||
for edge in data["user"]["edge_highlight_reels"]["edges"]
|
|
||||||
]
|
|
||||||
|
|||||||
Reference in New Issue
Block a user