diff --git a/docs/configuration.rst b/docs/configuration.rst index 7fa7881e..95e47b4b 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -420,6 +420,18 @@ Description Like `image-unique`__, but applies to delegated URLs __ `extractor.*.image-unique`_ +extractor.*.date-format +---------------------------- +=========== ===== +Type ``string`` +Default ``"%Y-%m-%dT%H:%M:%S"`` +Description Format string used to parse ``string`` values of + `date-min` and `date-max`. + + See |strptime|_ for a list of formatting directives. +=========== ===== + + Extractor-specific Options ========================== @@ -776,24 +788,9 @@ Description Retrieve additional comments by resolving the ``more`` comment extractor.reddit.date-min & .date-max ------------------------------------- =========== ===== -Type ``integer`` or ``string`` +Type |Date|_ Default ``0`` and ``253402210800`` (timestamp of |datetime.max|_) Description Ignore all submissions posted before/after this date. - - * If this is an ``integer``, it represents the date as UTC timestamp. - * If this is a ``string``, it will get parsed according to date-format_. -=========== ===== - - -extractor.reddit.date-format ----------------------------- -=========== ===== -Type ``string`` -Default ``"%Y-%m-%dT%H:%M:%S"`` -Description An explicit format string used to parse the ``string`` values of - `date-min and date-max`_. - - See |strptime|_ for a list of formatting directives. =========== ===== @@ -870,6 +867,15 @@ Description Download blog avatars. =========== ===== +extractor.tumblr.date-min & .date-max +------------------------------------- +=========== ===== +Type |Date|_ +Default ``0`` and ``null`` +Description Ignore all posts published before/after this date. +=========== ===== + + extractor.tumblr.external ------------------------- =========== ===== @@ -1546,6 +1552,20 @@ Custom Types ============ +Date +---- +=========== ===== +Type ``string`` or ``integer`` +Examples * ``"2019-01-01T00:00:00"`` + * ``"2019"`` with ``"%Y"`` as date-format_ + * ``1546297200`` +Description A |Date|_ value represents a specific point in time. + + * If given as ``string``, it is parsed according to date-format_. + * If given as ``integer``, it is interpreted as UTC timestamp. +=========== ===== + + Path ---- =========== ===== @@ -1667,6 +1687,7 @@ Description An object with the ``name`` of a post-processor and its options. .. |webbrowser.open()| replace:: ``webbrowser.open()`` .. |datetime| replace:: ``datetime`` .. |datetime.max| replace:: ``datetime.max`` +.. |Date| replace:: ``Date`` .. |Path| replace:: ``Path`` .. |Last-Modified| replace:: ``Last-Modified`` .. |Logging Configuration| replace:: ``Logging Configuration`` @@ -1675,8 +1696,7 @@ Description An object with the ``name`` of a post-processor and its options. .. _base-directory: `extractor.*.base-directory`_ .. _skipped: `extractor.*.skip`_ -.. _`date-min and date-max`: `extractor.reddit.date-min & .date-max`_ -.. _date-format: extractor.reddit.date-format_ +.. _date-format: `extractor.*.date-format`_ .. _deviantart.metadata: extractor.deviantart.metadata_ .. _.netrc: https://stackoverflow.com/tags/.netrc/info diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 5679cdc2..37432406 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -65,11 +65,15 @@ class TumblrExtractor(Extractor): if self.reblogs == "same-blog": self._skip_reblog = self._skip_reblog_same_blog + self.date_min, self.api.before = self._get_date_min_max(0, None) + def items(self): blog = None yield Message.Version, 1 for post in self.posts(): + if self.date_min > post["timestamp"]: + return if post["type"] not in self.types: continue if not blog: @@ -223,6 +227,11 @@ class TumblrUserExtractor(TumblrExtractor): "count": 2, "keyword": {"tags": ["test", "private", "hidden"]}, }), + ("https://mikf123.tumblr.com/", { # date-min/-max/-format (#337) + "count": 4, + "options": (("date-min", "201804"), ("date-max", "201805"), + ("date-format", "%Y%m")) + }), ("https://demo.tumblr.com/page/2"), ("https://demo.tumblr.com/archive"), ("tumblr:http://www.b-authentique.com/"), @@ -280,6 +289,7 @@ class TumblrPostExtractor(TumblrExtractor): TumblrExtractor.__init__(self, match) self.post_id = match.group(3) self.reblogs = True + self.date_min = 0 def posts(self): return self.api.posts(self.blog, {"id": self.post_id}) @@ -328,7 +338,7 @@ class TumblrAPI(oauth.OAuth1API): def __init__(self, extractor): oauth.OAuth1API.__init__(self, extractor) - self.posts_type = None + self.posts_type = self.before = None def info(self, blog): """Return general information about a blog""" @@ -350,6 +360,8 @@ class TumblrAPI(oauth.OAuth1API): params.update({"offset": 0, "limit": 50, "reblog_info": "true"}) if self.posts_type: params["type"] = self.posts_type + if self.before: + params["before"] = self.before while True: data = self._call(blog, "posts", params) self.BLOG_CACHE[blog] = data["blog"] @@ -360,7 +372,7 @@ class TumblrAPI(oauth.OAuth1API): def likes(self, blog): """Retrieve liked posts""" - params = {"limit": 50} + params = {"limit": "50", "before": self.before} while True: posts = self._call(blog, "likes", params)["liked_posts"] if not posts: