[tumblr] extend 'reblogs' functionality (#103)
Setting 'reblogs' to "deleted" will check if the parent post of a reblog has been deleted and download its media content if that is the case, otherwise it will be skipped. This is a rather costly operation (1 API request per reblogged post) and should therefore be used with care.
This commit is contained in:
@@ -614,16 +614,20 @@ extractor.tumblr.inline
|
|||||||
=========== =====
|
=========== =====
|
||||||
Type ``bool``
|
Type ``bool``
|
||||||
Default ``false``
|
Default ``false``
|
||||||
Description Search posts for inline images.
|
Description Search posts for inline images and videos.
|
||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
extractor.tumblr.reblogs
|
extractor.tumblr.reblogs
|
||||||
------------------------
|
------------------------
|
||||||
=========== =====
|
=========== =====
|
||||||
Type ``bool``
|
Type ``bool`` or ``string``
|
||||||
Default ``true``
|
Default ``true``
|
||||||
Description Extract images from reblogged posts.
|
Description * ``true``: Extract media from reblogged posts
|
||||||
|
* ``false``: Skip reblogged posts
|
||||||
|
* ``"deleted"``: Skip reblogged posts, but download from them
|
||||||
|
anyway if the parent post has been deleted
|
||||||
|
(requires 1 additional API request per reblogged post)
|
||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ class TumblrExtractor(Extractor):
|
|||||||
yield Message.Directory, blog.copy()
|
yield Message.Directory, blog.copy()
|
||||||
|
|
||||||
reblog = "reblogged_from_id" in post
|
reblog = "reblogged_from_id" in post
|
||||||
if reblog and not self.reblogs:
|
if reblog and self._skip_reblog(post):
|
||||||
continue
|
continue
|
||||||
post["reblogged"] = reblog
|
post["reblogged"] = reblog
|
||||||
|
|
||||||
@@ -158,6 +158,19 @@ class TumblrExtractor(Extractor):
|
|||||||
|
|
||||||
return Message.Url, url, post
|
return Message.Url, url, post
|
||||||
|
|
||||||
|
def _skip_reblog(self, post):
|
||||||
|
if self.reblogs != "deleted":
|
||||||
|
return not self.reblogs
|
||||||
|
match = re.match(
|
||||||
|
TumblrPostExtractor.pattern[0], post["reblogged_root_url"])
|
||||||
|
if match:
|
||||||
|
blog = match.group(1) or match.group(2)
|
||||||
|
try:
|
||||||
|
next(self.api.posts(blog, {"id": match.group(3)}))
|
||||||
|
except exception.NotFoundError:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class TumblrUserExtractor(TumblrExtractor):
|
class TumblrUserExtractor(TumblrExtractor):
|
||||||
"""Extractor for all images from a tumblr-user"""
|
"""Extractor for all images from a tumblr-user"""
|
||||||
|
|||||||
Reference in New Issue
Block a user