[tumblr] improve inline extraction (#137)
This commit is contained in:
@@ -104,16 +104,16 @@ class TumblrExtractor(Extractor):
|
||||
if "video_url" in post: # type: "video"
|
||||
yield self._prepare(_original_video(post["video_url"]), post)
|
||||
|
||||
if self.inline: # inline media
|
||||
for key in ("body", "description", "source"):
|
||||
if key not in post:
|
||||
continue
|
||||
for url in re.findall('<img src="([^"]+)"', post[key]):
|
||||
url = _original_inline_image(url)
|
||||
yield self._prepare_image(url, post)
|
||||
for url in re.findall('<source src="([^"]+)"', post[key]):
|
||||
url = _original_video(url)
|
||||
yield self._prepare(url, post)
|
||||
if self.inline and "reblog" in post: # inline media
|
||||
# only "chat" posts are missing a "reblog" key in their
|
||||
# API response, but they can't contain images/videos anyway
|
||||
body = post["reblog"]["comment"]
|
||||
for url in re.findall('<img src="([^"]+)"', body):
|
||||
url = _original_inline_image(url)
|
||||
yield self._prepare_image(url, post)
|
||||
for url in re.findall('<source src="([^"]+)"', body):
|
||||
url = _original_video(url)
|
||||
yield self._prepare(url, post)
|
||||
|
||||
if self.external: # external links
|
||||
post["extension"] = None
|
||||
|
||||
Reference in New Issue
Block a user