[behance] fix title extraction

This commit is contained in:
Mike Fährmann
2018-08-08 10:48:58 +02:00
parent 3fc2f269fa
commit 2a9f3341a2
2 changed files with 3 additions and 2 deletions

View File

@@ -67,7 +67,7 @@ class BehanceGalleryExtractor(Extractor):
users, pos = text.extract(
page, 'class="project-owner-info ', 'class="project-owner-actions')
title, pos = text.extract(
page, '<div class="project-title">', '</div>', pos)
page, 'project-title">', '</div>', pos)
fields, pos = text.extract(
page, '<ul id="project-fields-list">', '</ul>', pos)
stats, pos = text.extract(
@@ -80,7 +80,7 @@ class BehanceGalleryExtractor(Extractor):
return {
"gallery_id": text.parse_int(self.gallery_id),
"title": text.unescape(title),
"title": text.unescape(title or ""),
"user": ", ".join(users),
"fields": [f for f in text.split_html(fields) if f != ", "],
"date": text.parse_int(date),