[postimg] fix extraction (#8505)

This commit is contained in:
Mike Fährmann
2025-11-05 11:15:05 +01:00
parent 617bd7a657
commit 9b03990be0
2 changed files with 26 additions and 4 deletions

View File

@@ -320,8 +320,8 @@ class PostimgImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
pos = page.index(' id="download"')
url , pos = text.rextract(page, ' href="', '"', pos)
filename, pos = text.extract(page, 'class="imagename">', '<', pos)
return url, text.unescape(filename)
filename, pos = text.extract(page, ' class="my-4">', '<', pos)
return url, text.unescape(filename) if filename else None
class PostimgGalleryExtractor(ImagehostImageExtractor):
@@ -335,8 +335,16 @@ class PostimgGalleryExtractor(ImagehostImageExtractor):
def items(self):
page = self.request(self.page_url).text
data = {"_extractor": PostimgImageExtractor}
for url in text.extract_iter(page, ' class="thumb"><a href="', '"'):
title = text.extr(
page, 'property="og:title" content="', ' — Postimages"')
data = {
"_extractor" : PostimgImageExtractor,
"gallery_title": text.unescape(title),
}
for token in text.extract_iter(page, 'data-image="', '"'):
url = f"{self.root}/{token}"
yield Message.Queue, url, data

View File

@@ -39,12 +39,26 @@ __tests__ = (
"token" : "Wtn2b3hC",
},
{
"#url" : "http://postimg.org/image/5l1cogxcr/",
"#comment" : "no 'imagename' (#8505)",
"#category": ("imagehost", "postimg", "image"),
"#class" : imagehosts.PostimgImageExtractor,
"#results" : "https://i.postimg.cc/08bm81zX/fashion-show-dream-angels-fantasy-bra-2014-adria.jpg?dl=1",
"extension": "jpg",
"filename" : "fashion-show-dream-angels-fantasy-bra-2014-adria",
"token" : "5l1cogxcr",
},
{
"#url" : "https://postimg.cc/gallery/wxpDLgX",
"#category": ("imagehost", "postimg", "gallery"),
"#class" : imagehosts.PostimgGalleryExtractor,
"#pattern" : imagehosts.PostimgImageExtractor.pattern,
"#count" : 22,
"gallery_title": "My Gallery",
},
)