[reddit] add 'user' extractor (closes #350)

This commit is contained in:
Mike Fährmann
2019-09-22 22:10:18 +02:00
parent c14abb9fb8
commit 946f2751e2

View File

@@ -31,7 +31,8 @@ class RedditExtractor(Extractor):
yield Message.Version, 1
with extractor.blacklist(
util.SPECIAL_EXTRACTORS, [RedditSubredditExtractor]):
util.SPECIAL_EXTRACTORS,
(RedditSubredditExtractor, RedditUserExtractor)):
while True:
extra = []
for url, data in self._urls(submissions):
@@ -97,6 +98,29 @@ class RedditSubredditExtractor(RedditExtractor):
return self.api.submissions_subreddit(self.subreddit, self.params)
class RedditUserExtractor(RedditExtractor):
"""Extractor for URLs from posts by a reddit user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/"
r"([^/?&#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?")
test = (
("https://www.reddit.com/user/username/", {
"count": ">= 2",
}),
("https://www.reddit.com/user/username/gilded/?sort=top&t=month"),
("https://old.reddit.com/user/username/"),
("https://www.reddit.com/u/username/"),
)
def __init__(self, match):
RedditExtractor.__init__(self, match)
self.user = match.group(1)
self.params = text.parse_query(match.group(2))
def submissions(self):
return self.api.submissions_user(self.user, self.params)
class RedditSubmissionExtractor(RedditExtractor):
"""Extractor for URLs from a submission on reddit.com"""
subcategory = "submission"
@@ -186,6 +210,12 @@ class RedditAPI():
params["limit"] = 100
return self._pagination(endpoint, params)
def submissions_user(self, user, params):
"""Collect all (submission, comments)-tuples posted by a user"""
endpoint = "/user/" + user + "/.json"
params["limit"] = 100
return self._pagination(endpoint, params)
def morechildren(self, link_id, children):
"""Load additional comments from a submission"""
endpoint = "/api/morechildren"