[tumblr] improve 'inline' extraction

'quote' posts store their HTML content in the 'source' field
This commit is contained in:
Mike Fährmann
2018-03-02 06:59:44 +01:00
parent 1d54a8e07d
commit 858fdbdb22
2 changed files with 2 additions and 2 deletions

View File

@@ -77,7 +77,7 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"] pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
test = [("http://imgchili.net/album/7a3824c59f77c8d39b260f9168d4b49b", { test = [("http://imgchili.net/album/7a3824c59f77c8d39b260f9168d4b49b", {
"url": "995e32b62c36d48b02ef4c7a7a19463924391e2a", "url": "995e32b62c36d48b02ef4c7a7a19463924391e2a",
"keyword": "ae0c56cfd1fe032e5bc22f1188767b2a923ae25e", "keyword": "14e903b320702faae0f057af910f2fa04ef2ad66",
})] })]
def get_job_metadata(self, page): def get_job_metadata(self, page):

View File

@@ -108,7 +108,7 @@ class TumblrExtractor(Extractor):
yield self._prepare(_original_video(post["video_url"]), post) yield self._prepare(_original_video(post["video_url"]), post)
if self.inline: # inline images if self.inline: # inline images
for key in ("body", "description"): for key in ("body", "description", "source"):
if key in post: if key in post:
for url in re.findall('<img src="([^"]+)"', post[key]): for url in re.findall('<img src="([^"]+)"', post[key]):
yield self._prepare_image(url, post) yield self._prepare_image(url, post)