From e95b24f056d9998ecdbf0e7a7cda1f3e9dcff39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 7 Jan 2019 18:04:16 +0100 Subject: [PATCH] [reactor] add wait-min & -max options (#148) --- docs/configuration.rst | 10 ++++++++++ gallery_dl/extractor/reactor.py | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index 0ec15634..f5e188cd 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -583,6 +583,16 @@ Description Download Pixiv's Ugoira animations or ignore them. =========== ===== +extractor.reactor.wait-min & .wait-max +-------------------------------------- +=========== ===== +Type ``float`` +Default ``3.0`` and ``6.0`` +Description Minimum and maximum wait time in seconds between HTTP requests + during the extraction process. +=========== ===== + + extractor.recursive.blacklist ----------------------------- =========== ===== diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index a8dae15e..6a3abbeb 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -11,6 +11,8 @@ from .common import SharedConfigExtractor, Message from .. import text import urllib.parse +import random +import time import json @@ -30,6 +32,11 @@ class ReactorExtractor(SharedConfigExtractor): self.root = "http://" + match.group(1) self.session.headers["Referer"] = self.root + self.wait_min = self.config("wait-min", 3) + self.wait_max = self.config("wait-max", 6) + if self.wait_max < self.wait_min: + self.wait_max = self.wait_min + if not self.category: # set category based on domain name netloc = urllib.parse.urlsplit(self.root).netloc @@ -55,6 +62,7 @@ class ReactorExtractor(SharedConfigExtractor): def _pagination(self, url): while True: + time.sleep(random.uniform(self.wait_min, self.wait_max)) page = self.request(url).text yield from text.extract_iter(