diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 83c33a98..26243a54 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -110,7 +110,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
("https://exhentai.org/g/960460/4f0e369d82/", {
- "keyword": "993bfaf68b4823084fbd0d3339564666463b1432",
+ "keyword": "1532ca4d0e4e0738dc994ca725a228af04a4e480",
"content": "493d759de534355c9f55f8e365565b62411de146",
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
@@ -169,57 +169,55 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def get_metadata(self, page):
"""Extract gallery metadata"""
- data, pos = text.extract_all(page, (
- ("title" , '
', '
'),
- ("title_jp" , '', '
'),
- ("date" , '>Posted:', ' | '),
- ("parent" , '>Parent:Visible: | ', '<'),
- ("language" , '>Language: | ', ' '),
- ("gallery_size", '>File Size: | ', '<'),
- ("count" , '>Length: | ', ' '),
- ))
+ extr = text.extract_from(page)
+ data = {
+ "gallery_id" : self.gallery_id,
+ "gallery_token": self.gallery_token,
+ "title" : text.unescape(extr('', '')),
+ "title_jp" : text.unescape(extr('', '')),
+ "date" : text.parse_datetime(extr(
+ '>Posted: | ', ' | '), "%Y-%m-%d %H:%M"),
+ "parent" : extr(
+ '>Parent:Visible: | ', '<'),
+ "language" : extr(
+ '>Language: | ', ' '),
+ "gallery_size" : text.parse_bytes(extr(
+ '>File Size: | ', '<').rstrip("Bb")),
+ "count" : text.parse_int(extr(
+ '>Length: | ', ' ')),
+ }
data["lang"] = util.language_to_code(data["language"])
- data["title"] = text.unescape(data["title"])
- data["title_jp"] = text.unescape(data["title_jp"])
- data["count"] = text.parse_int(data["count"])
- data["gallery_id"] = self.gallery_id
- data["gallery_token"] = self.gallery_token
- data["gallery_size"] = text.parse_bytes(
- data["gallery_size"].rstrip("Bb"))
data["tags"] = [
text.unquote(tag)
- for tag in text.extract_iter(page, 'hentai.org/tag/', '"', pos)
+ for tag in text.extract_iter(page, 'hentai.org/tag/', '"')
]
+
return data
def image_from_page(self, page):
"""Get image url and data from webpage"""
- info = text.extract_all(page, (
- (None , ' |