[twitter] fix image extraction
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2016 Mike Fährmann
|
# Copyright 2016-2017 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -35,7 +35,7 @@ class TwitterTweetExtractor(Extractor):
|
|||||||
"Mozilla/5.0 (X11; Linux x86_64; rv:48.0) "
|
"Mozilla/5.0 (X11; Linux x86_64; rv:48.0) "
|
||||||
"Gecko/20100101 Firefox/48.0"
|
"Gecko/20100101 Firefox/48.0"
|
||||||
)
|
)
|
||||||
page = self.request("https://mobile.twitter.com/" + self.path).text
|
page = self.request("https://twitter.com/" + self.path).text
|
||||||
data = self.get_job_metadata()
|
data = self.get_job_metadata()
|
||||||
imgs = self.get_image_urls(page)
|
imgs = self.get_image_urls(page)
|
||||||
data["count"] = len(imgs)
|
data["count"] = len(imgs)
|
||||||
@@ -54,4 +54,5 @@ class TwitterTweetExtractor(Extractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def get_image_urls(page):
|
def get_image_urls(page):
|
||||||
"""Extract and return a list of all image-urls"""
|
"""Extract and return a list of all image-urls"""
|
||||||
return list(text.extract_iter(page, 'alt="Embedded image" src="', '"'))
|
needle = '<img data-aria-label-part src="'
|
||||||
|
return list(text.extract_iter(page, needle, '"'))
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ class WhentaiImageExtractor(Extractor):
|
|||||||
test = [("http://whentai.com/view/2089/", {
|
test = [("http://whentai.com/view/2089/", {
|
||||||
"url": "116761cdd3a4d78f9ebe22c18efae1465b2e4d1b",
|
"url": "116761cdd3a4d78f9ebe22c18efae1465b2e4d1b",
|
||||||
"keyword": "686bb5af1694efb326d597d4c98ee12d88078455",
|
"keyword": "686bb5af1694efb326d597d4c98ee12d88078455",
|
||||||
"content": "b4545aaeb3bc7d94c8941bdfdcb768261f2579b3",
|
"content": "31909f3d52d14ba6bf79bf303eb31d6807588f25",
|
||||||
})]
|
})]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
|
|||||||
Reference in New Issue
Block a user