From 134487ffb0c093c0439798939a572d48580f7b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 26 Jan 2019 18:40:39 +0100 Subject: [PATCH] [exhentai] stop extraction if image limit is exceeded (#141) can be turned off with the `exhentai.limits' option --- docs/configuration.rst | 10 ++++++++++ gallery_dl/extractor/exhentai.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index d6d5067e..d7709883 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -475,6 +475,16 @@ Description Minimum wait time in seconds before API requests. =========== ===== +extractor.exhentai.limits +------------------------- +=========== ===== +Type ``bool`` +Default ``true`` +Description Check image download limits + and stop extraction when they are exceeded. +=========== ===== + + extractor.exhentai.original --------------------------- =========== ===== diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index ee7e70db..1ca81a89 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -32,9 +32,12 @@ class ExhentaiExtractor(Extractor): def __init__(self): Extractor.__init__(self) + self.limits = self.config("limits", True) self.original = self.config("original", True) self.wait_min = self.config("wait-min", 3) self.wait_max = self.config("wait-max", 6) + + self._remaining = 0 if self.wait_max < self.wait_min: self.wait_max = self.wait_min self.session.headers["Referer"] = self.root + "/" @@ -63,6 +66,7 @@ class ExhentaiExtractor(Extractor): self.log.info("no username given; using e-hentai.org") self.root = "https://e-hentai.org" self.original = False + self.limits = False self.session.cookies["nw"] = "1" return cookies = self._login_impl(username, password) @@ -159,6 +163,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): (self.image_from_page(ipage),), self.images_from_api()) for url, image in images: data.update(image) + if self.limits: + self._check_limits(data) if "/fullimg.php" in url: data["extension"] = "" self.wait(1.5) @@ -271,6 +277,32 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.NotFoundError("image page") return page + def _check_limits(self, data): + if not self._remaining or data["num"] % 20 == 0: + self._update_limits() + self._remaining -= data["cost"] + + if self._remaining <= 0: + url = "{}/s/{}/{}-{}".format( + self.root, data["image_token"], self.gallery_id, data["num"]) + self.log.error( + "Image limit reached! Reset it and continue with " + "'%s' as URL.", url) + raise exception.StopExtraction() + + def _update_limits(self): + url = "https://e-hentai.org/home.php" + cookies = { + cookie.name: cookie.value + for cookie in self.session.cookies + if cookie.domain == self.cookiedomain and cookie.name != "igneous" + } + + page = self.request(url, cookies=cookies).text + current, pos = text.extract(page, "", "") + maximum, pos = text.extract(page, "", "", pos) + self._remaining = text.parse_int(maximum) - text.parse_int(current) + @staticmethod def _parse_image_info(url): parts = url.split("/")[4].split("-")