[tumblr] improve 'inline' extraction
'quote' posts store their HTML content in the 'source' field
This commit is contained in:
@@ -77,7 +77,7 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
|
|||||||
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
|
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
|
||||||
test = [("http://imgchili.net/album/7a3824c59f77c8d39b260f9168d4b49b", {
|
test = [("http://imgchili.net/album/7a3824c59f77c8d39b260f9168d4b49b", {
|
||||||
"url": "995e32b62c36d48b02ef4c7a7a19463924391e2a",
|
"url": "995e32b62c36d48b02ef4c7a7a19463924391e2a",
|
||||||
"keyword": "ae0c56cfd1fe032e5bc22f1188767b2a923ae25e",
|
"keyword": "14e903b320702faae0f057af910f2fa04ef2ad66",
|
||||||
})]
|
})]
|
||||||
|
|
||||||
def get_job_metadata(self, page):
|
def get_job_metadata(self, page):
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ class TumblrExtractor(Extractor):
|
|||||||
yield self._prepare(_original_video(post["video_url"]), post)
|
yield self._prepare(_original_video(post["video_url"]), post)
|
||||||
|
|
||||||
if self.inline: # inline images
|
if self.inline: # inline images
|
||||||
for key in ("body", "description"):
|
for key in ("body", "description", "source"):
|
||||||
if key in post:
|
if key in post:
|
||||||
for url in re.findall('<img src="([^"]+)"', post[key]):
|
for url in re.findall('<img src="([^"]+)"', post[key]):
|
||||||
yield self._prepare_image(url, post)
|
yield self._prepare_image(url, post)
|
||||||
|
|||||||
Reference in New Issue
Block a user