From 7742cf860132f7cf318677236da2209223a3a0f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 10 Sep 2018 15:40:25 +0200 Subject: [PATCH] [tumblr] change 'reblogs' option (#103) - rename "deleted" to "same-blog" - change test for deleted original post to test if original post owner has the same UUID (full blog name) as the one being downloaded from - add 'blog[uuid]' metadata to allow comparison with 'reblogged_from_uuid' --- docs/configuration.rst | 5 ++--- gallery_dl/extractor/tumblr.py | 21 +++++++++------------ 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 9cd86d8f..db1a1bbf 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -625,9 +625,8 @@ Type ``bool`` or ``string`` Default ``true`` Description * ``true``: Extract media from reblogged posts * ``false``: Skip reblogged posts - * ``"deleted"``: Skip reblogged posts, but download from them - anyway if the parent post has been deleted - (requires 1 additional API request per reblogged post) + * ``"same-blog"``: Skip reblogged posts unless the original post + is from the same blog =========== ===== diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 4bf3705e..e9bec637 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -61,6 +61,9 @@ class TumblrExtractor(Extractor): elif not self.types: self.log.warning("no valid post types selected") + if self.reblogs == "same-blog": + self._skip_reblog = self._skip_reblog_same_blog + def items(self): blog = None yield Message.Version, 1 @@ -70,6 +73,7 @@ class TumblrExtractor(Extractor): continue if not blog: blog = self.api.info(self.blog) + blog["uuid"] = self.blog yield Message.Directory, blog.copy() reblog = "reblogged_from_id" in post @@ -158,18 +162,11 @@ class TumblrExtractor(Extractor): return Message.Url, url, post - def _skip_reblog(self, post): - if self.reblogs != "deleted": - return not self.reblogs - match = re.match( - TumblrPostExtractor.pattern[0], post["reblogged_root_url"]) - if match: - blog = match.group(1) or match.group(2) - try: - next(self.api.posts(blog, {"id": match.group(3)})) - except exception.NotFoundError: - return False - return True + def _skip_reblog(self, _): + return not self.reblogs + + def _skip_reblog_same_blog(self, post): + return self.blog != post["reblogged_root_uuid"] class TumblrUserExtractor(TumblrExtractor):