[reddit] add 'limit' option (#7997 #8012 #8092)

change default value from 100 to null,
i.e. don't send a 'limit' parameter
This commit is contained in:
Mike Fährmann
2025-08-28 22:55:29 +02:00
parent 2eca790c87
commit de05453707
3 changed files with 20 additions and 2 deletions

View File

@@ -4605,6 +4605,22 @@ Description
Ignore all submissions posted before/after the submission with this ID.
extractor.reddit.limit
----------------------
Type
``integer``
Default
``null``
Description
Number of results to return in a single API query.
This value specifies the ``limit`` parameter
used for API requests when retrieving paginated results.
``null`` means not including this parameter at all
and letting Reddit chose a default.
extractor.reddit.previews
-------------------------
Type

View File

@@ -632,6 +632,7 @@
"date-format" : "%Y-%m-%dT%H:%M:%S",
"id-min" : null,
"id-max" : null,
"limit" : null,
"previews" : true,
"recursion" : 0,
"selftext" : null,

View File

@@ -426,13 +426,11 @@ class RedditAPI():
def submissions_subreddit(self, subreddit, params):
"""Collect all (submission, comments)-tuples of a subreddit"""
endpoint = subreddit + "/.json"
params["limit"] = 100
return self._pagination(endpoint, params)
def submissions_user(self, user, params):
"""Collect all (submission, comments)-tuples posted by a user"""
endpoint = "/user/" + user + "/.json"
params["limit"] = 100
return self._pagination(endpoint, params)
def morechildren(self, link_id, children):
@@ -532,6 +530,9 @@ class RedditAPI():
id_max = float("inf")
date_min, date_max = self.extractor._get_date_min_max(0, 253402210800)
if limit := self.extractor.config("limit"):
params["limit"] = limit
while True:
data = self._call(endpoint, params)["data"]