[reddit] fix errors with 't1_…' submissions

This commit is contained in:
Mike Fährmann
2019-12-20 16:49:44 +01:00
parent 58391d492d
commit 9c0928457a

View File

@@ -60,14 +60,16 @@ class RedditExtractor(Extractor):
def _urls(self, submissions): def _urls(self, submissions):
for submission, comments in submissions: for submission, comments in submissions:
self._visited.add(submission["id"])
if not submission["is_self"]: if submission:
yield submission["url"], submission self._visited.add(submission["id"])
for url in text.extract_iter( if not submission["is_self"]:
submission["selftext_html"] or "", ' href="', '"'): yield submission["url"], submission
yield url, submission
for url in text.extract_iter(
submission["selftext_html"] or "", ' href="', '"'):
yield url, submission
if comments: if comments:
for comment in comments: for comment in comments:
@@ -298,17 +300,24 @@ class RedditAPI():
while True: while True:
data = self._call(endpoint, params)["data"] data = self._call(endpoint, params)["data"]
for submission in data["children"]: for child in data["children"]:
submission = submission["data"] kind = child["kind"]
if (date_min <= submission["created_utc"] <= date_max and post = child["data"]
id_min <= self._decode(submission["id"]) <= id_max):
if submission["num_comments"] and self.comments: if (date_min <= post["created_utc"] <= date_max and
try: id_min <= self._decode(post["id"]) <= id_max):
yield self.submission(submission["id"])
except exception.AuthorizationError: if kind == "t3":
pass if post["num_comments"] and self.comments:
else: try:
yield submission, None yield self.submission(post["id"])
except exception.AuthorizationError:
pass
else:
yield post, None
elif kind == "t1" and self.comments:
yield None, (post,)
if not data["after"]: if not data["after"]:
return return