[twitter] fix image extraction

This commit is contained in:
Mike Fährmann
2017-02-09 02:09:23 +01:00
parent 0af02007a9
commit c84e975dcb
2 changed files with 5 additions and 4 deletions

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -35,7 +35,7 @@ class TwitterTweetExtractor(Extractor):
"Mozilla/5.0 (X11; Linux x86_64; rv:48.0) "
"Gecko/20100101 Firefox/48.0"
)
page = self.request("https://mobile.twitter.com/" + self.path).text
page = self.request("https://twitter.com/" + self.path).text
data = self.get_job_metadata()
imgs = self.get_image_urls(page)
data["count"] = len(imgs)
@@ -54,4 +54,5 @@ class TwitterTweetExtractor(Extractor):
@staticmethod
def get_image_urls(page):
"""Extract and return a list of all image-urls"""
return list(text.extract_iter(page, 'alt="Embedded image" src="', '"'))
needle = '<img data-aria-label-part src="'
return list(text.extract_iter(page, needle, '"'))

View File

@@ -81,7 +81,7 @@ class WhentaiImageExtractor(Extractor):
test = [("http://whentai.com/view/2089/", {
"url": "116761cdd3a4d78f9ebe22c18efae1465b2e4d1b",
"keyword": "686bb5af1694efb326d597d4c98ee12d88078455",
"content": "b4545aaeb3bc7d94c8941bdfdcb768261f2579b3",
"content": "31909f3d52d14ba6bf79bf303eb31d6807588f25",
})]
def __init__(self, match):