merge #5094: [webtoons] fix extracting comic and episode name with commas

This commit is contained in:
Mike Fährmann
2024-01-21 00:47:26 +01:00
2 changed files with 41 additions and 4 deletions

View File

@@ -88,10 +88,20 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
keywords = extr('<meta name="keywords" content="', '"').split(", ")
title = extr('<meta property="og:title" content="', '"')
descr = extr('<meta property="og:description" content="', '"')
if extr('<div class="subj_info"', '\n'):
comic_name = extr('>', '<')
episode_name = extr('<h1 class="subj_episode" title="', '"')
else:
comic_name = episode_name = ""
if extr('<span class="tx _btnOpenEpisodeList ', '"'):
episode = extr('>#', '<')
else:
episode = ""
if extr('<div class="author_area"', '\n'):
username = extr('/creator/', '"')
author_name = extr('<span>', '</span>')
@@ -104,9 +114,9 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"title_no" : self.title_no,
"episode_no" : self.episode_no,
"title" : text.unescape(title),
"episode" : keywords[1],
"comic_name" : text.unescape(keywords[0]),
"episode_name": text.unescape(keywords[2]),
"episode" : episode,
"comic_name" : text.unescape(comic_name),
"episode_name": text.unescape(episode_name),
"username" : username,
"author_name" : text.unescape(author_name),
"description" : text.unescape(descr),