[reddit] move date-min/-max handling into Extractor class

This commit is contained in:
Mike Fährmann
2019-07-16 22:54:39 +02:00
parent fb875d1ab8
commit 09f37fde39
2 changed files with 16 additions and 15 deletions

View File

@@ -13,6 +13,7 @@ import time
import netrc
import queue
import logging
import datetime
import requests
import threading
import http.cookiejar
@@ -217,6 +218,20 @@ class Extractor():
return False
return True
def _get_date_min_max(self, dmin=None, dmax=None):
"""Retrieve and parse 'date-min' and 'date-max' config values"""
def get(key, default):
ts = self.config(key, default)
if isinstance(ts, str):
try:
ts = int(datetime.datetime.strptime(ts, fmt).timestamp())
except ValueError as exc:
self.log.warning("Unable to parse '%s': %s", key, exc)
ts = default
return ts
fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
return get("date-min", dmin), get("date-max", dmax)
@classmethod
def _get_tests(cls):
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""

View File

@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, util, extractor, exception
from ..cache import cache
import datetime
import time
@@ -251,12 +250,9 @@ class RedditAPI():
return data
def _pagination(self, endpoint, params, _empty=()):
date_fmt = self.extractor.config("date-format", "%Y-%m-%dT%H:%M:%S")
date_min = self._parse_datetime("date-min", 0, date_fmt)
date_max = self._parse_datetime("date-max", 253402210800, date_fmt)
id_min = self._parse_id("id-min", 0)
id_max = self._parse_id("id-max", 2147483647)
date_min, date_max = self.extractor._get_date_min_max(0, 253402210800)
while True:
data = self._call(endpoint, params)["data"]
@@ -293,16 +289,6 @@ class RedditAPI():
if link_id and extra:
yield from self.morechildren(link_id, extra)
def _parse_datetime(self, key, default, fmt):
ts = self.extractor.config(key, default)
if isinstance(ts, str):
try:
ts = int(datetime.datetime.strptime(ts, fmt).timestamp())
except ValueError as exc:
self.log.warning("Unable to parse '%s': %s", key, exc)
ts = default
return ts
def _parse_id(self, key, default):
sid = self.extractor.config(key)
return self._decode(sid.rpartition("_")[2].lower()) if sid else default