[reddit] allow 'date-min/max' to be human readable dates

If the date-min/max config value is a string, try parsing it using
datetime.strptime [1] with 'date-format' as format string [2]
(default: "%Y-%m-%dT%H:%M:%S")

Example: get all submissions posted in 2016

$ gallery-dl reddit.com/r/... \
    -o date-format=%Y \
    -o date-min=\"2016\" \
    -o date-max=\"2017\"

[1] https://docs.python.org/3/library/datetime.html#datetime.datetime.strptime
[2] https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
This commit is contained in:
Mike Fährmann
2017-07-01 18:46:38 +02:00
parent 4414aefe97
commit 80c2e03aaa
2 changed files with 19 additions and 16 deletions

View File

@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, util, extractor, exception
from ..cache import cache
import datetime
import time
import re
@@ -189,15 +190,19 @@ class RedditAPI():
return data
def _pagination(self, endpoint, params, _empty=()):
ts_min, ts_max = self._parse_timestamps()
id_min, id_max = self._parse_ids()
date_fmt = self.extractor.config("date-format", "%Y-%m-%dT%H:%M:%S")
date_min = self._parse_datetime("date-min", 0, date_fmt)
date_max = self._parse_datetime("date-max", 253402210800, date_fmt)
id_min = self._parse_id("id-min", 0)
id_max = self._parse_id("id-max", 2147483647)
while True:
data = self._call(endpoint, params)["data"]
for submission in data["children"]:
submission = submission["data"]
if (ts_min <= submission["created_utc"] <= ts_max and
if (date_min <= submission["created_utc"] <= date_max and
id_min <= self._decode(submission["id"]) <= id_max):
if submission["num_comments"] and self.comments:
try:
@@ -227,19 +232,18 @@ class RedditAPI():
if link_id and extra:
yield from self.morechildren(link_id, extra)
def _parse_timestamps(self):
return (
int(self.extractor.config("date-min", 0)),
int(self.extractor.config("date-max", 253402210800)),
)
def _parse_datetime(self, key, default, fmt):
ts = self.extractor.config(key, default)
if isinstance(ts, str):
try:
ts = int(datetime.datetime.strptime(ts, fmt).timestamp())
except ValueError as exc:
self.warning("Unable to parse '%s': %s", key, exc)
return ts
def _parse_ids(self):
id_min = self.extractor.config("id-min")
id_max = self.extractor.config("id-max")
return (
self._decode(id_min.rpartition("_")[2]) if id_min else 0,
self._decode(id_max.rpartition("_")[2]) if id_max else 2147483647,
)
def _parse_id(self, key, default):
sid = self.extractor.config(key)
return self._decode(sid.rpartition("_")[2]) if sid else default
@staticmethod
def _decode(sid):

View File

@@ -23,7 +23,6 @@ class TumblrUserExtractor(Extractor):
test = [("http://demo.tumblr.com/", {
"url": "5c113da25a605b7449de8ca1606eec5502b4dc9f",
"keyword": "d2cf142bcaf1cbea29291f8c8ccb5f582962d8be",
"content": "31495fdb9f84edbb7f67972746a1521456f649e2",
})]
def __init__(self, match):