[webtoons] extract more metadata
This commit is contained in:
@@ -87,23 +87,30 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
|||||||
self.episode_no = params.get("episode_no")
|
self.episode_no = params.get("episode_no")
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
keywords, pos = text.extract(
|
extr = text.extract_from(page)
|
||||||
page, '<meta name="keywords" content="', '"')
|
keywords = extr('<meta name="keywords" content="', '"').split(", ")
|
||||||
title, pos = text.extract(
|
title = extr('<meta property="og:title" content="', '"')
|
||||||
page, '<meta property="og:title" content="', '"', pos)
|
descr = extr('<meta property="og:description" content="', '"')
|
||||||
descr, pos = text.extract(
|
|
||||||
page, '<meta property="og:description" content="', '"', pos)
|
author_area = extr('<div class="author_area">', '</div>')
|
||||||
|
aa_extr = text.extract_from(author_area)
|
||||||
|
username = aa_extr('/creator/', '"')
|
||||||
|
author_name = aa_extr('<span>', '</span>')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"genre" : self.genre,
|
"genre" : self.genre,
|
||||||
"comic" : self.comic,
|
"comic" : self.comic,
|
||||||
"title_no" : self.title_no,
|
"title_no" : self.title_no,
|
||||||
"episode_no" : self.episode_no,
|
"episode_no" : self.episode_no,
|
||||||
"title" : text.unescape(title),
|
"title" : text.unescape(title),
|
||||||
"episode" : keywords.split(", ")[1],
|
"episode" : keywords[1],
|
||||||
"description": text.unescape(descr),
|
"comic_name" : text.unescape(keywords[0]),
|
||||||
"lang" : self.lang,
|
"episode_name": text.unescape(keywords[2]),
|
||||||
"language" : util.code_to_language(self.lang),
|
"username" : username,
|
||||||
|
"author_name" : text.unescape(author_name),
|
||||||
|
"description" : text.unescape(descr),
|
||||||
|
"lang" : self.lang,
|
||||||
|
"language" : util.code_to_language(self.lang),
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@@ -37,6 +37,18 @@ __tests__ = (
|
|||||||
"title_no" : "312584",
|
"title_no" : "312584",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.webtoons.com/en/canvas/i-want-to-be-a-cute-anime-girl/209-the-storys-story/viewer?title_no=349416&episode_no=214",
|
||||||
|
"#category": ("", "webtoons", "episode"),
|
||||||
|
"#class" : webtoons.WebtoonsEpisodeExtractor,
|
||||||
|
"#count" : 4,
|
||||||
|
|
||||||
|
"comic_name" : "I want to be a cute anime girl",
|
||||||
|
"episode_name": "209 - The story's story",
|
||||||
|
"username" : "m9huj",
|
||||||
|
"author_name" : "Azul Crescent",
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.webtoons.com/en/comedy/live-with-yourself/list?title_no=919",
|
"#url" : "https://www.webtoons.com/en/comedy/live-with-yourself/list?title_no=919",
|
||||||
"#comment" : "english",
|
"#comment" : "english",
|
||||||
|
|||||||
Reference in New Issue
Block a user