diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 1511e211..24dea67f 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -40,32 +40,23 @@ class HitomiGalleryExtractor(GalleryExtractor): GalleryExtractor.__init__(self, match, url) def metadata(self, page): - pos = page.index('

', '<', pos) - artist, pos = extr(page, '

', '

', pos) - group , pos = extr(page, 'Group', '', pos) - gtype , pos = extr(page, 'Type', '', pos) - lang , pos = extr(page, 'Language', '', pos) - series, pos = extr(page, 'Series', '', pos) - chars , pos = extr(page, 'Characters', '', pos) - tags , pos = extr(page, 'Tags', '', pos) - date , pos = extr(page, '', '', pos) - lang = None if lang == "N/A" else text.remove_html(lang) - - return { + extr = text.extract_from(page, page.index('

', '<').strip()), + "artist" : self._prep(extr('

', '

')), + "group" : self._prep(extr('Group', '')), + "type" : self._prep_1(extr('Type', '')), + "language" : self._prep_1(extr('Language', '')), + "parody" : self._prep(extr('Series', '')), + "characters": self._prep(extr('Characters', '')), + "tags" : self._prep(extr('Tags', '')), + "date" : extr('', ''), } + if data["language"] == "N/A": + data["language"] = None + data["lang"] = util.language_to_code(data["language"]) + return data def images(self, page): # see https://ltn.hitomi.la/common.js @@ -84,8 +75,12 @@ class HitomiGalleryExtractor(GalleryExtractor): ] @staticmethod - def _prepare(value): + def _prep(value): return [ text.unescape(string.capwords(v)) for v in text.extract_iter(value or "", '.html">', '<') ] + + @staticmethod + def _prep_1(value): + return text.remove_html(value).capitalize()