diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index cb98b999..ee7e70db 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -11,8 +11,10 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache -import time +import itertools import random +import time +import math BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org" @@ -102,10 +104,12 @@ class ExhentaiExtractor(Extractor): class ExhentaiGalleryExtractor(ExhentaiExtractor): """Extractor for image galleries from exhentai.org""" subcategory = "gallery" - pattern = [BASE_PATTERN + r"/g/(\d+)/([\da-f]{10})"] + pattern = [BASE_PATTERN + + r"(?:/g/(\d+)/([\da-f]{10})" + r"|/s/([\da-f]{10})/(\d+)-(\d+))"] test = [ ("https://exhentai.org/g/960460/4f0e369d82/", { - "keyword": "900b8dccd23c41a76e915a8df70ae77c4e0f52c7", + "keyword": "ba0785e49e3877cfa3f91c1ad9a5ac7816339bf5", "content": "493d759de534355c9f55f8e365565b62411de146", }), ("https://exhentai.org/g/960461/4f0e369d82/", { @@ -114,6 +118,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): ("http://exhentai.org/g/962698/7f02358e00/", { "exception": exception.AuthorizationError, }), + ("https://exhentai.org/s/3957343c3b/960460-5", { + "count": 2, + }), ("https://e-hentai.org/g/960460/4f0e369d82/", None), ("https://g.e-hentai.org/g/960460/4f0e369d82/", None), ] @@ -122,52 +129,60 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): ExhentaiExtractor.__init__(self) self.key = {} self.count = 0 - self.version, self.gid, self.token = match.groups() - self.gid = text.parse_int(self.gid) + self.gallery_id = text.parse_int(match.group(2) or match.group(5)) + self.gallery_token = match.group(3) + self.image_token = match.group(4) + self.image_num = text.parse_int(match.group(6), 1) def items(self): self.login() - yield Message.Version, 1 - url = "{}/g/{}/{}/".format(self.root, self.gid, self.token) - response = self.request(url, expect=range(400, 500)) - page = response.text + if self.gallery_token: + gpage = self._gallery_page() + self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0] + self.wait() + ipage = self._image_page() + else: + ipage = self._image_page() + part = text.extract(ipage, 'hentai.org/g/', '"')[0] + self.gallery_token = part.split("/")[1] + self.wait() + gpage = self._gallery_page() - if response.status_code == 404 and "Gallery Not Available" in page: - raise exception.AuthorizationError() - if page.startswith(("Key missing", "Gallery not found")): - raise exception.NotFoundError("gallery") - - data = self.get_job_metadata(page) + data = self.get_metadata(gpage) self.count = data["count"] + + yield Message.Version, 1 yield Message.Directory, data - for url, image in self.get_images(page): + images = itertools.chain( + (self.image_from_page(ipage),), self.images_from_api()) + for url, image in images: data.update(image) if "/fullimg.php" in url: data["extension"] = "" self.wait(1.5) yield Message.Url, url, data - def get_job_metadata(self, page): - """Collect metadata for extractor-job""" - data = { - "gallery_id" : self.gid, - "gallery_token": self.token, - } + def get_metadata(self, page): + """Extract gallery metadata""" data, pos = text.extract_all(page, ( - ("title" , '