[hitomi] simplify data extraction code
This commit is contained in:
@@ -40,32 +40,23 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
|||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
pos = page.index('<h1><a href="/reader/')
|
extr = text.extract_from(page, page.index('<h1><a href="/reader/'))
|
||||||
extr = text.extract
|
data = {
|
||||||
title , pos = extr(page, '.html">', '<', pos)
|
|
||||||
artist, pos = extr(page, '<h2>', '</h2>', pos)
|
|
||||||
group , pos = extr(page, '<td>Group</td><td>', '</td>', pos)
|
|
||||||
gtype , pos = extr(page, '<td>Type</td><td>', '</td>', pos)
|
|
||||||
lang , pos = extr(page, '<td>Language</td><td>', '</td>', pos)
|
|
||||||
series, pos = extr(page, '<td>Series</td><td>', '</td>', pos)
|
|
||||||
chars , pos = extr(page, '<td>Characters</td><td>', '</td>', pos)
|
|
||||||
tags , pos = extr(page, '<td>Tags</td><td>', '</td>', pos)
|
|
||||||
date , pos = extr(page, '<span class="date">', '</span>', pos)
|
|
||||||
lang = None if lang == "N/A" else text.remove_html(lang)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"gallery_id": self.gallery_id,
|
"gallery_id": self.gallery_id,
|
||||||
"title" : text.unescape(title.strip()),
|
"title" : text.unescape(extr('.html">', '<').strip()),
|
||||||
"artist" : self._prepare(artist),
|
"artist" : self._prep(extr('<h2>', '</h2>')),
|
||||||
"group" : self._prepare(group),
|
"group" : self._prep(extr('<td>Group</td><td>', '</td>')),
|
||||||
"parody" : self._prepare(series),
|
"type" : self._prep_1(extr('<td>Type</td><td>', '</td>')),
|
||||||
"characters": self._prepare(chars),
|
"language" : self._prep_1(extr('<td>Language</td><td>', '</td>')),
|
||||||
"tags" : self._prepare(tags),
|
"parody" : self._prep(extr('<td>Series</td><td>', '</td>')),
|
||||||
"type" : text.remove_html(gtype).capitalize(),
|
"characters": self._prep(extr('<td>Characters</td><td>', '</td>')),
|
||||||
"lang" : util.language_to_code(lang),
|
"tags" : self._prep(extr('<td>Tags</td><td>', '</td>')),
|
||||||
"language" : lang,
|
"date" : extr('<span class="date">', '</span>'),
|
||||||
"date" : date,
|
|
||||||
}
|
}
|
||||||
|
if data["language"] == "N/A":
|
||||||
|
data["language"] = None
|
||||||
|
data["lang"] = util.language_to_code(data["language"])
|
||||||
|
return data
|
||||||
|
|
||||||
def images(self, page):
|
def images(self, page):
|
||||||
# see https://ltn.hitomi.la/common.js
|
# see https://ltn.hitomi.la/common.js
|
||||||
@@ -84,8 +75,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _prepare(value):
|
def _prep(value):
|
||||||
return [
|
return [
|
||||||
text.unescape(string.capwords(v))
|
text.unescape(string.capwords(v))
|
||||||
for v in text.extract_iter(value or "", '.html">', '<')
|
for v in text.extract_iter(value or "", '.html">', '<')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _prep_1(value):
|
||||||
|
return text.remove_html(value).capitalize()
|
||||||
|
|||||||
Reference in New Issue
Block a user