[imagefap] fix metadata extraction
This commit is contained in:
@@ -89,22 +89,21 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
descr, pos = text.extract(
|
||||
page, '<meta name="description" content="Browse ', '"')
|
||||
count, pos = text.extract(page, ' 1 of ', ' pics"', pos)
|
||||
self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0]
|
||||
extr = text.extract_from(page)
|
||||
|
||||
title, _, descr = descr.partition(" porn picture gallery by ")
|
||||
uploader, _, tags = descr.partition(" to see hottest ")
|
||||
self._count = text.parse_int(count)
|
||||
return {
|
||||
data = {
|
||||
"gallery_id": text.parse_int(self.gid),
|
||||
"title": text.unescape(title),
|
||||
"uploader": uploader,
|
||||
"tags": tags[:-11].split(", "),
|
||||
"count": self._count,
|
||||
"tags": extr('name="keywords" content="', '"').split(", "),
|
||||
"uploader": extr("porn picture gallery by ", " to see hottest"),
|
||||
"title": text.unescape(extr("<title>", "<")),
|
||||
"count": text.parse_int(extr(' 1 of ', ' pics"')),
|
||||
}
|
||||
|
||||
self.image_id = extr('id="img_ed_', '"')
|
||||
self._count = data["count"]
|
||||
|
||||
return data
|
||||
|
||||
def get_images(self):
|
||||
"""Collect image-urls and -metadata"""
|
||||
url = "{}/photo/{}/".format(self.root, self.image_id)
|
||||
|
||||
Reference in New Issue
Block a user