diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 8cb2efa1..cb98b999 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2018 Mike Fährmann +# Copyright 2014-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -15,6 +15,9 @@ import time import random +BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org" + + class ExhentaiExtractor(Extractor): """Base class for exhentai extractors""" category = "exhentai" @@ -99,10 +102,10 @@ class ExhentaiExtractor(Extractor): class ExhentaiGalleryExtractor(ExhentaiExtractor): """Extractor for image galleries from exhentai.org""" subcategory = "gallery" - pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"] + pattern = [BASE_PATTERN + r"/g/(\d+)/([\da-f]{10})"] test = [ ("https://exhentai.org/g/960460/4f0e369d82/", { - "keyword": "15b755fd3e2c710d7fd7ff112a5cdbf4333201b2", + "keyword": "900b8dccd23c41a76e915a8df70ae77c4e0f52c7", "content": "493d759de534355c9f55f8e365565b62411de146", }), ("https://exhentai.org/g/960461/4f0e369d82/", { @@ -152,7 +155,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "gallery_id" : self.gid, "gallery_token": self.token, } - text.extract_all(page, ( + data, pos = text.extract_all(page, ( ("title" , '

', '

'), ("title_jp" , '

', '

'), ("date" , '>Posted:', ''), @@ -160,12 +163,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): ("gallery_size", '>File Size:', '<'), ("count" , '>Length:', ' '), ), values=data) + data["lang"] = util.language_to_code(data["language"]) data["title"] = text.unescape(data["title"]) data["title_jp"] = text.unescape(data["title_jp"]) data["count"] = text.parse_int(data["count"]) data["gallery_size"] = text.parse_bytes( data["gallery_size"].rstrip("Bb")) + data["tags"] = [ + text.unquote(tag) + for tag in text.extract_iter(page, 'hentai.org/tag/', '"', pos) + ] return data def get_images(self, page): @@ -258,12 +266,16 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): class ExhentaiSearchExtractor(ExhentaiExtractor): """Extractor for exhentai search results""" subcategory = "search" - pattern = [r"(?:https?://)?(?:g\.e-|e-|ex)hentai\.org/?\?(.*)$"] + pattern = [BASE_PATTERN + r"/?\?(.*)$"] test = [ ("https://exhentai.org/?f_search=touhou", None), - ("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0" - "&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0" - "&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter", None), + (("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0" + "&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0" + "&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), { + "pattern": ExhentaiGalleryExtractor.pattern[0], + "range": "1-30", + "count": 30, + }), ] def __init__(self, match): @@ -318,8 +330,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor): """Extractor for favorited exhentai galleries""" subcategory = "favorite" - pattern = [r"(?:https?://)?(?:g\.e-|e-|ex)hentai\.org" - r"/favorites\.php(?:\?(.*))?"] + pattern = [BASE_PATTERN + r"/favorites\.php(?:\?(.*))?"] test = [ ("https://exhentai.org/favorites.php", None), ("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"