diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 83c33a98..26243a54 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -110,7 +110,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): r"|/s/([\da-f]{10})/(\d+)-(\d+))") test = ( ("https://exhentai.org/g/960460/4f0e369d82/", { - "keyword": "993bfaf68b4823084fbd0d3339564666463b1432", + "keyword": "1532ca4d0e4e0738dc994ca725a228af04a4e480", "content": "493d759de534355c9f55f8e365565b62411de146", }), ("https://exhentai.org/g/960461/4f0e369d82/", { @@ -169,57 +169,55 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def get_metadata(self, page): """Extract gallery metadata""" - data, pos = text.extract_all(page, ( - ("title" , '

', '

'), - ("title_jp" , '

', '

'), - ("date" , '>Posted:', ''), - ("parent" , '>Parent:Visible:', '<'), - ("language" , '>Language:', ' '), - ("gallery_size", '>File Size:', '<'), - ("count" , '>Length:', ' '), - )) + extr = text.extract_from(page) + data = { + "gallery_id" : self.gallery_id, + "gallery_token": self.gallery_token, + "title" : text.unescape(extr('

', '

')), + "title_jp" : text.unescape(extr('

', '

')), + "date" : text.parse_datetime(extr( + '>Posted:', ''), "%Y-%m-%d %H:%M"), + "parent" : extr( + '>Parent:
Visible:', '<'), + "language" : extr( + '>Language:', ' '), + "gallery_size" : text.parse_bytes(extr( + '>File Size:', '<').rstrip("Bb")), + "count" : text.parse_int(extr( + '>Length:', ' ')), + } data["lang"] = util.language_to_code(data["language"]) - data["title"] = text.unescape(data["title"]) - data["title_jp"] = text.unescape(data["title_jp"]) - data["count"] = text.parse_int(data["count"]) - data["gallery_id"] = self.gallery_id - data["gallery_token"] = self.gallery_token - data["gallery_size"] = text.parse_bytes( - data["gallery_size"].rstrip("Bb")) data["tags"] = [ text.unquote(tag) - for tag in text.extract_iter(page, 'hentai.org/tag/', '"', pos) + for tag in text.extract_iter(page, 'hentai.org/tag/', '"') ] + return data def image_from_page(self, page): """Get image url and data from webpage""" - info = text.extract_all(page, ( - (None , '