[reddit] support filtering by timestamp (#15)
- Added the 'extractor.reddit.date-min' and '….date-max' config options. These values should be UTC timestamps. - All submissions not posted in date-min <= T <= date-max will be ignored. - Fixed the limit parameter for submission comments by setting it to its apparent max value (500).
This commit is contained in:
@@ -20,7 +20,7 @@ class RedditExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.api = RedditAPI(self.session, self.log)
|
self.api = RedditAPI(self)
|
||||||
self.max_depth = int(self.config("recursion", 0))
|
self.max_depth = int(self.config("recursion", 0))
|
||||||
self._visited = set()
|
self._visited = set()
|
||||||
|
|
||||||
@@ -101,24 +101,25 @@ class RedditSubmissionExtractor(RedditExtractor):
|
|||||||
|
|
||||||
class RedditAPI():
|
class RedditAPI():
|
||||||
"""Minimal interface for the reddit API"""
|
"""Minimal interface for the reddit API"""
|
||||||
def __init__(self, session, log, client_id="6N9uN0krSDE-ig"):
|
def __init__(self, extractor, client_id="6N9uN0krSDE-ig"):
|
||||||
self.session = session
|
self.session = extractor.session
|
||||||
self.log = log
|
self.date_min = int(extractor.config("date-min", 0))
|
||||||
|
# 253402210800 == datetime.max.timestamp()
|
||||||
|
self.date_max = int(extractor.config("date-max", 253402210800))
|
||||||
self.client_id = client_id
|
self.client_id = client_id
|
||||||
session.headers["User-Agent"] = "Python:gallery-dl:0.8.4 (by /u/mikf1)"
|
self.session.headers["User-Agent"] = ("Python:gallery-dl:0.8.4"
|
||||||
|
" (by /u/mikf1)")
|
||||||
|
|
||||||
def submission(self, submission_id):
|
def submission(self, submission_id):
|
||||||
"""Fetch the (submission, comments)=-tuple for a submission id"""
|
"""Fetch the (submission, comments)=-tuple for a submission id"""
|
||||||
endpoint = "/comments/" + submission_id + "/.json"
|
endpoint = "/comments/" + submission_id + "/.json"
|
||||||
params = {"raw_json": 1, "limit": 100}
|
submission, comments = self._call(endpoint, {"limit": 500})
|
||||||
submission, comments = self._call(endpoint, params)
|
|
||||||
return (submission["data"]["children"][0]["data"],
|
return (submission["data"]["children"][0]["data"],
|
||||||
self._unfold(comments))
|
self._unfold(comments))
|
||||||
|
|
||||||
def submissions_subreddit(self, subreddit, params):
|
def submissions_subreddit(self, subreddit, params):
|
||||||
"""Collect all (submission, comments)-tuples of a subreddit"""
|
"""Collect all (submission, comments)-tuples of a subreddit"""
|
||||||
endpoint = "/r/" + subreddit + "/.json"
|
endpoint = "/r/" + subreddit + "/.json"
|
||||||
params["raw_json"] = 1
|
|
||||||
params["limit"] = 100
|
params["limit"] = 100
|
||||||
return self._pagination(endpoint, params)
|
return self._pagination(endpoint, params)
|
||||||
|
|
||||||
@@ -142,6 +143,7 @@ class RedditAPI():
|
|||||||
|
|
||||||
def _call(self, endpoint, params):
|
def _call(self, endpoint, params):
|
||||||
url = "https://oauth.reddit.com" + endpoint
|
url = "https://oauth.reddit.com" + endpoint
|
||||||
|
params["raw_json"] = 1
|
||||||
self.authenticate()
|
self.authenticate()
|
||||||
data = self.session.get(url, params=params).json()
|
data = self.session.get(url, params=params).json()
|
||||||
if "error" in data:
|
if "error" in data:
|
||||||
@@ -158,10 +160,11 @@ class RedditAPI():
|
|||||||
|
|
||||||
for submission in data["children"]:
|
for submission in data["children"]:
|
||||||
submission = submission["data"]
|
submission = submission["data"]
|
||||||
if submission["num_comments"]:
|
if self.date_min <= submission["created_utc"] <= self.date_max:
|
||||||
yield self.submission(submission["id"])
|
if submission["num_comments"]:
|
||||||
else:
|
yield self.submission(submission["id"])
|
||||||
yield submission, _empty
|
else:
|
||||||
|
yield submission, _empty
|
||||||
|
|
||||||
if not data["after"]:
|
if not data["after"]:
|
||||||
return
|
return
|
||||||
|
|||||||
Reference in New Issue
Block a user