[tumblr] improve 'inline' extraction

'quote' posts store their HTML content in the 'source' field
This commit is contained in:
Mike Fährmann
2018-03-02 06:59:44 +01:00
parent 1d54a8e07d
commit 858fdbdb22
2 changed files with 2 additions and 2 deletions

View File

@@ -77,7 +77,7 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
test = [("http://imgchili.net/album/7a3824c59f77c8d39b260f9168d4b49b", {
"url": "995e32b62c36d48b02ef4c7a7a19463924391e2a",
"keyword": "ae0c56cfd1fe032e5bc22f1188767b2a923ae25e",
"keyword": "14e903b320702faae0f057af910f2fa04ef2ad66",
})]
def get_job_metadata(self, page):

View File

@@ -108,7 +108,7 @@ class TumblrExtractor(Extractor):
yield self._prepare(_original_video(post["video_url"]), post)
if self.inline: # inline images
for key in ("body", "description"):
for key in ("body", "description", "source"):
if key in post:
for url in re.findall('<img src="([^"]+)"', post[key]):
yield self._prepare_image(url, post)