[reddit] add ability to load more comments (#15)
The 'extractor.reddit.morecomments' option enables the use of the '/api/morechildren' API endpoint (1) to load even more comments than the usual submission-request provides. Possible values are the booleans 'true' and 'false' (default). Note: this feature comes at the cost of 1 extra API call towards the rate limit for every 100 extra comments. (1) https://www.reddit.com/dev/api/#GET_api_morechildren
This commit is contained in:
@@ -108,6 +108,7 @@ class RedditAPI():
|
|||||||
def __init__(self, extractor):
|
def __init__(self, extractor):
|
||||||
self.extractor = extractor
|
self.extractor = extractor
|
||||||
self.comments = extractor.config("comments", 500)
|
self.comments = extractor.config("comments", 500)
|
||||||
|
self.morecomments = extractor.config("morecomments", False)
|
||||||
self.refresh_token = extractor.config("refresh-token")
|
self.refresh_token = extractor.config("refresh-token")
|
||||||
self.log = extractor.log
|
self.log = extractor.log
|
||||||
self.session = extractor.session
|
self.session = extractor.session
|
||||||
@@ -116,9 +117,10 @@ class RedditAPI():
|
|||||||
def submission(self, submission_id):
|
def submission(self, submission_id):
|
||||||
"""Fetch the (submission, comments)=-tuple for a submission id"""
|
"""Fetch the (submission, comments)=-tuple for a submission id"""
|
||||||
endpoint = "/comments/" + submission_id + "/.json"
|
endpoint = "/comments/" + submission_id + "/.json"
|
||||||
|
link_id = "t3_" + submission_id if self.morecomments else None
|
||||||
submission, comments = self._call(endpoint, {"limit": self.comments})
|
submission, comments = self._call(endpoint, {"limit": self.comments})
|
||||||
return (submission["data"]["children"][0]["data"],
|
return (submission["data"]["children"][0]["data"],
|
||||||
self._unfold(comments))
|
self._flatten(comments, link_id))
|
||||||
|
|
||||||
def submissions_subreddit(self, subreddit, params):
|
def submissions_subreddit(self, subreddit, params):
|
||||||
"""Collect all (submission, comments)-tuples of a subreddit"""
|
"""Collect all (submission, comments)-tuples of a subreddit"""
|
||||||
@@ -126,6 +128,24 @@ class RedditAPI():
|
|||||||
params["limit"] = 100
|
params["limit"] = 100
|
||||||
return self._pagination(endpoint, params)
|
return self._pagination(endpoint, params)
|
||||||
|
|
||||||
|
def morechildren(self, link_id, children):
|
||||||
|
"""Load additional comments from a submission"""
|
||||||
|
endpoint = "/api/morechildren"
|
||||||
|
params = {"link_id": link_id, "api_type": "json"}
|
||||||
|
index, done = 0, False
|
||||||
|
while not done:
|
||||||
|
if len(children) - index < 100:
|
||||||
|
done = True
|
||||||
|
params["children"] = ",".join(children[index:index + 100])
|
||||||
|
index += 100
|
||||||
|
|
||||||
|
data = self._call(endpoint, params)["json"]
|
||||||
|
for thing in data["data"]["things"]:
|
||||||
|
if thing["kind"] == "more":
|
||||||
|
children.extend(thing["data"]["children"])
|
||||||
|
else:
|
||||||
|
yield thing["data"]
|
||||||
|
|
||||||
def authenticate(self):
|
def authenticate(self):
|
||||||
"""Authenticate the application by requesting an access token"""
|
"""Authenticate the application by requesting an access token"""
|
||||||
access_token = self._authenticate_impl(self.refresh_token)
|
access_token = self._authenticate_impl(self.refresh_token)
|
||||||
@@ -190,15 +210,18 @@ class RedditAPI():
|
|||||||
return
|
return
|
||||||
params["after"] = data["after"]
|
params["after"] = data["after"]
|
||||||
|
|
||||||
@staticmethod
|
def _flatten(self, comments, link_id=None):
|
||||||
def _unfold(comments):
|
extra = []
|
||||||
# TODO: order?
|
|
||||||
queue = comments["data"]["children"]
|
queue = comments["data"]["children"]
|
||||||
while queue:
|
while queue:
|
||||||
comment = queue.pop()
|
comment = queue.pop(0)
|
||||||
if comment["kind"] == "more":
|
if comment["kind"] == "more":
|
||||||
|
if link_id:
|
||||||
|
extra.extend(comment["data"]["children"])
|
||||||
continue
|
continue
|
||||||
comment = comment["data"]
|
comment = comment["data"]
|
||||||
yield comment
|
yield comment
|
||||||
if comment["replies"]:
|
if comment["replies"]:
|
||||||
queue += comment["replies"]["data"]["children"]
|
queue += comment["replies"]["data"]["children"]
|
||||||
|
if link_id and extra:
|
||||||
|
yield from self.morechildren(link_id, extra)
|
||||||
|
|||||||
Reference in New Issue
Block a user