replace 'wait-min/-max' with 'sleep-request'

on exhentai, idolcomplex, reactor
This commit is contained in:
Mike Fährmann
2021-03-02 22:55:45 +01:00
parent e165e6c265
commit bae874f370
4 changed files with 5 additions and 70 deletions

View File

@@ -949,21 +949,6 @@ Description
Download full-sized original images if available. Download full-sized original images if available.
extractor.exhentai.wait-min & .wait-max
---------------------------------------
Type
``float``
Default
``3.0`` and ``6.0``
Description
Minimum and maximum wait time in seconds between each image
ExHentai detects and blocks automated downloaders.
*gallery-dl* waits a randomly selected number of
seconds between ``wait-min`` and ``wait-max`` after
each image to prevent getting blocked.
extractor.flickr.access-token & .access-token-secret extractor.flickr.access-token & .access-token-secret
---------------------------------------------------- ----------------------------------------------------
Type Type
@@ -1353,17 +1338,6 @@ Description
Also search Plurk comments for URLs. Also search Plurk comments for URLs.
extractor.reactor.wait-min & .wait-max
--------------------------------------
Type
``float``
Default
``3.0`` and ``6.0``
Description
Minimum and maximum wait time in seconds between HTTP requests
during the extraction process.
extractor.readcomiconline.captcha extractor.readcomiconline.captcha
--------------------------------- ---------------------------------
Type Type

View File

@@ -12,8 +12,6 @@ from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import itertools import itertools
import random
import time
import math import math
BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org" BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org"
@@ -29,6 +27,7 @@ class ExhentaiExtractor(Extractor):
cookienames = ("ipb_member_id", "ipb_pass_hash") cookienames = ("ipb_member_id", "ipb_pass_hash")
cookiedomain = ".exhentai.org" cookiedomain = ".exhentai.org"
root = "https://exhentai.org" root = "https://exhentai.org"
request_interval = 5.0
LIMIT = False LIMIT = False
@@ -46,8 +45,6 @@ class ExhentaiExtractor(Extractor):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.limits = self.config("limits", True) self.limits = self.config("limits", True)
self.original = self.config("original", True) self.original = self.config("original", True)
self.wait_min = self.config("wait-min", 3)
self.wait_max = self.config("wait-max", 6)
if type(self.limits) is int: if type(self.limits) is int:
self._limit_max = self.limits self._limit_max = self.limits
@@ -56,8 +53,6 @@ class ExhentaiExtractor(Extractor):
self._limit_max = 0 self._limit_max = 0
self._remaining = 0 self._remaining = 0
if self.wait_max < self.wait_min:
self.wait_max = self.wait_min
self.session.headers["Referer"] = self.root + "/" self.session.headers["Referer"] = self.root + "/"
if version != "ex": if version != "ex":
self.session.cookies.set("nw", "1", domain=self.cookiedomain) self.session.cookies.set("nw", "1", domain=self.cookiedomain)
@@ -69,14 +64,6 @@ class ExhentaiExtractor(Extractor):
raise exception.AuthorizationError() raise exception.AuthorizationError()
return response return response
def wait(self, waittime=None):
"""Wait for a randomly chosen amount of seconds"""
if not waittime:
waittime = random.uniform(self.wait_min, self.wait_max)
else:
waittime = random.uniform(waittime * 0.66, waittime * 1.33)
time.sleep(waittime)
def login(self): def login(self):
"""Login and set necessary cookies""" """Login and set necessary cookies"""
if self.LIMIT: if self.LIMIT:
@@ -200,7 +187,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.log.error("Failed to extract initial image token") self.log.error("Failed to extract initial image token")
self.log.debug("Page content:\n%s", gpage) self.log.debug("Page content:\n%s", gpage)
return return
self.wait()
ipage = self._image_page() ipage = self._image_page()
else: else:
ipage = self._image_page() ipage = self._image_page()
@@ -210,7 +196,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.log.debug("Page content:\n%s", ipage) self.log.debug("Page content:\n%s", ipage)
return return
self.gallery_token = part.split("/")[1] self.gallery_token = part.split("/")[1]
self.wait()
gpage = self._gallery_page() gpage = self._gallery_page()
data = self.get_metadata(gpage) data = self.get_metadata(gpage)
@@ -225,7 +210,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self._check_limits(data) self._check_limits(data)
if "/fullimg.php" in url: if "/fullimg.php" in url:
data["extension"] = "" data["extension"] = ""
self.wait(self.wait_max / 4)
yield Message.Url, url, data yield Message.Url, url, data
def get_metadata(self, page): def get_metadata(self, page):
@@ -322,7 +306,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"showkey": self.key["show"], "showkey": self.key["show"],
} }
for request["page"] in range(self.image_num + 1, self.count + 1): for request["page"] in range(self.image_num + 1, self.count + 1):
self.wait()
page = self.request(api_url, method="POST", json=request).json() page = self.request(api_url, method="POST", json=request).json()
imgkey = nextkey imgkey = nextkey
nextkey, pos = text.extract(page["i3"], "'", "'") nextkey, pos = text.extract(page["i3"], "'", "'")
@@ -372,7 +355,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
return page return page
def _check_limits(self, data): def _check_limits(self, data):
if not self._remaining or data["num"] % 20 == 0: if not self._remaining or data["num"] % 25 == 0:
self._update_limits() self._update_limits()
self._remaining -= data["cost"] self._remaining -= data["cost"]
@@ -455,7 +438,6 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
def items(self): def items(self):
self.login() self.login()
yield Message.Version, 1
data = {"_extractor": ExhentaiGalleryExtractor} data = {"_extractor": ExhentaiGalleryExtractor}
while True: while True:
@@ -472,7 +454,6 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
if 'class="ptdd">&gt;<' in page or ">No hits found</p>" in page: if 'class="ptdd">&gt;<' in page or ">No hits found</p>" in page:
return return
self.params["page"] += 1 self.params["page"] += 1
self.wait()
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor): class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018-2020 Mike Fährmann # Copyright 2018-2021 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -13,8 +13,6 @@ from .common import Message
from ..cache import cache from ..cache import cache
from .. import text, util, exception from .. import text, util, exception
import collections import collections
import random
import time
import re import re
@@ -24,6 +22,7 @@ class IdolcomplexExtractor(SankakuExtractor):
cookienames = ("login", "pass_hash") cookienames = ("login", "pass_hash")
cookiedomain = "idol.sankakucomplex.com" cookiedomain = "idol.sankakucomplex.com"
root = "https://" + cookiedomain root = "https://" + cookiedomain
request_interval = 5.0
def __init__(self, match): def __init__(self, match):
SankakuExtractor.__init__(self, match) SankakuExtractor.__init__(self, match)
@@ -31,17 +30,12 @@ class IdolcomplexExtractor(SankakuExtractor):
self.start_page = 1 self.start_page = 1
self.start_post = 0 self.start_post = 0
self.extags = self.config("tags", False) self.extags = self.config("tags", False)
self.wait_min = self.config("wait-min", 3.0)
self.wait_max = self.config("wait-max", 6.0)
if self.wait_max < self.wait_min:
self.wait_max = self.wait_min
def items(self): def items(self):
self.login() self.login()
data = self.metadata() data = self.metadata()
for post_id in util.advance(self.post_ids(), self.start_post): for post_id in util.advance(self.post_ids(), self.start_post):
self.wait()
post = self._parse_post(post_id) post = self._parse_post(post_id)
url = post["file_url"] url = post["file_url"]
post.update(data) post.update(data)
@@ -130,10 +124,6 @@ class IdolcomplexExtractor(SankakuExtractor):
return data return data
def wait(self):
"""Wait for a randomly chosen amount of seconds"""
time.sleep(random.uniform(self.wait_min, self.wait_max))
class IdolcomplexTagExtractor(IdolcomplexExtractor): class IdolcomplexTagExtractor(IdolcomplexExtractor):
"""Extractor for images from idol.sankakucomplex.com by search-tags""" """Extractor for images from idol.sankakucomplex.com by search-tags"""
@@ -192,7 +182,6 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
params["page"] = self.start_page params["page"] = self.start_page
while True: while True:
self.wait()
page = self.request(self.root, params=params, retries=10).text page = self.request(self.root, params=params, retries=10).text
pos = page.find("<div id=more-popular-posts-link>") + 1 pos = page.find("<div id=more-popular-posts-link>") + 1

View File

@@ -11,11 +11,8 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text
import urllib.parse import urllib.parse
import random
import time
import json import json
BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)" BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
@@ -25,17 +22,13 @@ class ReactorExtractor(Extractor):
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}" filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
archive_fmt = "{post_id}_{num}" archive_fmt = "{post_id}_{num}"
instances = () instances = ()
request_interval = 5.0
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.root = "http://" + match.group(1) self.root = "http://" + match.group(1)
self.session.headers["Referer"] = self.root self.session.headers["Referer"] = self.root
self.wait_min = self.config("wait-min", 3)
self.wait_max = self.config("wait-max", 6)
if self.wait_max < self.wait_min:
self.wait_max = self.wait_min
if not self.category: if not self.category:
# set category based on domain name # set category based on domain name
netloc = urllib.parse.urlsplit(self.root).netloc netloc = urllib.parse.urlsplit(self.root).netloc
@@ -61,8 +54,6 @@ class ReactorExtractor(Extractor):
def _pagination(self, url): def _pagination(self, url):
while True: while True:
time.sleep(random.uniform(self.wait_min, self.wait_max))
response = self.request(url) response = self.request(url)
if response.history: if response.history:
# sometimes there is a redirect from # sometimes there is a redirect from