[imagetwist] detect removed images (#8415)
This commit is contained in:
@@ -53,6 +53,8 @@ class ImagehostImageExtractor(Extractor):
|
|||||||
).text
|
).text
|
||||||
|
|
||||||
url, filename = self.get_info(page)
|
url, filename = self.get_info(page)
|
||||||
|
if not url:
|
||||||
|
return
|
||||||
data = text.nameext_from_url(filename, {"token": self.token})
|
data = text.nameext_from_url(filename, {"token": self.token})
|
||||||
data.update(self.metadata(page))
|
data.update(self.metadata(page))
|
||||||
if self._https and url.startswith("http:"):
|
if self._https and url.startswith("http:"):
|
||||||
@@ -199,6 +201,8 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
|
|||||||
|
|
||||||
def get_info(self, page):
|
def get_info(self, page):
|
||||||
url , pos = text.extract(page, '<img src="', '"')
|
url , pos = text.extract(page, '<img src="', '"')
|
||||||
|
if url and url.startswith("/imgs/"):
|
||||||
|
return None, None
|
||||||
filename, pos = text.extract(page, ' alt="', '"', pos)
|
filename, pos = text.extract(page, ' alt="', '"', pos)
|
||||||
return url, filename
|
return url, filename
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,14 @@ __tests__ = (
|
|||||||
"#class" : imagehosts.ImagetwistImageExtractor,
|
"#class" : imagehosts.ImagetwistImageExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://imagetwist.com/tynhxt4ay9rl/9g09tq0e2i1b.jpg",
|
||||||
|
"#comment" : "'Image not found' (#8415)",
|
||||||
|
"#category": ("imagehost", "imagetwist", "image"),
|
||||||
|
"#class" : imagehosts.ImagetwistImageExtractor,
|
||||||
|
"#count" : 0,
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://imagetwist.com/p/gdldev/747223/digits",
|
"#url" : "https://imagetwist.com/p/gdldev/747223/digits",
|
||||||
"#category": ("imagehost", "imagetwist", "gallery"),
|
"#category": ("imagehost", "imagetwist", "gallery"),
|
||||||
|
|||||||
Reference in New Issue
Block a user