@@ -102,8 +102,7 @@ class TumblrExtractor(Extractor):
|
||||
del post["trail"]
|
||||
post["blog"] = blog
|
||||
post["date"] = text.parse_timestamp(post["timestamp"])
|
||||
yield Message.Directory, post
|
||||
post["num"] = 0
|
||||
posts = []
|
||||
|
||||
if "photos" in post: # type "photo" or "link"
|
||||
photos = post["photos"]
|
||||
@@ -125,15 +124,17 @@ class TumblrExtractor(Extractor):
|
||||
|
||||
del photo["original_size"]
|
||||
del photo["alt_sizes"]
|
||||
yield self._prepare_image(photo["url"], post)
|
||||
posts.append(
|
||||
self._prepare_image(photo["url"], post.copy()))
|
||||
del post["photo"]
|
||||
|
||||
url = post.get("audio_url") # type "audio"
|
||||
if url and url.startswith("https://a.tumblr.com/"):
|
||||
yield self._prepare(url, post)
|
||||
posts.append(self._prepare(url, post.copy()))
|
||||
|
||||
url = post.get("video_url") # type "video"
|
||||
if url:
|
||||
yield self._prepare(_original_video(url), post)
|
||||
posts.append(self._prepare(_original_video(url), post.copy()))
|
||||
|
||||
if self.inline and "reblog" in post: # inline media
|
||||
# only "chat" posts are missing a "reblog" key in their
|
||||
@@ -141,16 +142,25 @@ class TumblrExtractor(Extractor):
|
||||
body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
|
||||
for url in re.findall('<img src="([^"]+)"', body):
|
||||
url = _original_inline_image(url)
|
||||
yield self._prepare_image(url, post)
|
||||
posts.append(self._prepare_image(url, post.copy()))
|
||||
for url in re.findall('<source src="([^"]+)"', body):
|
||||
url = _original_video(url)
|
||||
yield self._prepare(url, post)
|
||||
posts.append(self._prepare(url, post.copy()))
|
||||
|
||||
if self.external: # external links
|
||||
post["extension"] = None
|
||||
url = post.get("permalink_url") or post.get("url")
|
||||
if url:
|
||||
yield Message.Queue, url, post
|
||||
post["extension"] = None
|
||||
posts.append((Message.Queue, url, post.copy()))
|
||||
del post["extension"]
|
||||
|
||||
post["count"] = len(posts)
|
||||
yield Message.Directory, post
|
||||
|
||||
for num, (msg, url, post) in enumerate(posts, 1):
|
||||
post["num"] = num
|
||||
post["count"] = len(posts)
|
||||
yield msg, url, post
|
||||
|
||||
def posts(self):
|
||||
"""Return an iterable containing all relevant posts"""
|
||||
@@ -179,14 +189,12 @@ class TumblrExtractor(Extractor):
|
||||
@staticmethod
|
||||
def _prepare(url, post):
|
||||
text.nameext_from_url(url, post)
|
||||
post["num"] += 1
|
||||
post["hash"] = post["filename"].partition("_")[2]
|
||||
return Message.Url, url, post
|
||||
|
||||
@staticmethod
|
||||
def _prepare_image(url, post):
|
||||
text.nameext_from_url(url, post)
|
||||
post["num"] += 1
|
||||
|
||||
parts = post["filename"].split("_")
|
||||
try:
|
||||
@@ -200,7 +208,7 @@ class TumblrExtractor(Extractor):
|
||||
@staticmethod
|
||||
def _prepare_avatar(url, post, blog):
|
||||
text.nameext_from_url(url, post)
|
||||
post["num"] = 1
|
||||
post["num"] = post["count"] = 1
|
||||
post["blog"] = blog
|
||||
post["reblogged"] = False
|
||||
post["type"] = post["id"] = post["hash"] = "avatar"
|
||||
|
||||
Reference in New Issue
Block a user