[reddit] move date-min/-max handling into Extractor class
This commit is contained in:
@@ -13,6 +13,7 @@ import time
|
||||
import netrc
|
||||
import queue
|
||||
import logging
|
||||
import datetime
|
||||
import requests
|
||||
import threading
|
||||
import http.cookiejar
|
||||
@@ -217,6 +218,20 @@ class Extractor():
|
||||
return False
|
||||
return True
|
||||
|
||||
def _get_date_min_max(self, dmin=None, dmax=None):
|
||||
"""Retrieve and parse 'date-min' and 'date-max' config values"""
|
||||
def get(key, default):
|
||||
ts = self.config(key, default)
|
||||
if isinstance(ts, str):
|
||||
try:
|
||||
ts = int(datetime.datetime.strptime(ts, fmt).timestamp())
|
||||
except ValueError as exc:
|
||||
self.log.warning("Unable to parse '%s': %s", key, exc)
|
||||
ts = default
|
||||
return ts
|
||||
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
|
||||
return get("date-min", dmin), get("date-max", dmax)
|
||||
|
||||
@classmethod
|
||||
def _get_tests(cls):
|
||||
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, extractor, exception
|
||||
from ..cache import cache
|
||||
import datetime
|
||||
import time
|
||||
|
||||
|
||||
@@ -251,12 +250,9 @@ class RedditAPI():
|
||||
return data
|
||||
|
||||
def _pagination(self, endpoint, params, _empty=()):
|
||||
date_fmt = self.extractor.config("date-format", "%Y-%m-%dT%H:%M:%S")
|
||||
date_min = self._parse_datetime("date-min", 0, date_fmt)
|
||||
date_max = self._parse_datetime("date-max", 253402210800, date_fmt)
|
||||
|
||||
id_min = self._parse_id("id-min", 0)
|
||||
id_max = self._parse_id("id-max", 2147483647)
|
||||
date_min, date_max = self.extractor._get_date_min_max(0, 253402210800)
|
||||
|
||||
while True:
|
||||
data = self._call(endpoint, params)["data"]
|
||||
@@ -293,16 +289,6 @@ class RedditAPI():
|
||||
if link_id and extra:
|
||||
yield from self.morechildren(link_id, extra)
|
||||
|
||||
def _parse_datetime(self, key, default, fmt):
|
||||
ts = self.extractor.config(key, default)
|
||||
if isinstance(ts, str):
|
||||
try:
|
||||
ts = int(datetime.datetime.strptime(ts, fmt).timestamp())
|
||||
except ValueError as exc:
|
||||
self.log.warning("Unable to parse '%s': %s", key, exc)
|
||||
ts = default
|
||||
return ts
|
||||
|
||||
def _parse_id(self, key, default):
|
||||
sid = self.extractor.config(key)
|
||||
return self._decode(sid.rpartition("_")[2].lower()) if sid else default
|
||||
|
||||
Reference in New Issue
Block a user