replace 'wait-min/-max' with 'sleep-request'
on exhentai, idolcomplex, reactor
This commit is contained in:
@@ -949,21 +949,6 @@ Description
|
|||||||
Download full-sized original images if available.
|
Download full-sized original images if available.
|
||||||
|
|
||||||
|
|
||||||
extractor.exhentai.wait-min & .wait-max
|
|
||||||
---------------------------------------
|
|
||||||
Type
|
|
||||||
``float``
|
|
||||||
Default
|
|
||||||
``3.0`` and ``6.0``
|
|
||||||
Description
|
|
||||||
Minimum and maximum wait time in seconds between each image
|
|
||||||
|
|
||||||
ExHentai detects and blocks automated downloaders.
|
|
||||||
*gallery-dl* waits a randomly selected number of
|
|
||||||
seconds between ``wait-min`` and ``wait-max`` after
|
|
||||||
each image to prevent getting blocked.
|
|
||||||
|
|
||||||
|
|
||||||
extractor.flickr.access-token & .access-token-secret
|
extractor.flickr.access-token & .access-token-secret
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
Type
|
Type
|
||||||
@@ -1353,17 +1338,6 @@ Description
|
|||||||
Also search Plurk comments for URLs.
|
Also search Plurk comments for URLs.
|
||||||
|
|
||||||
|
|
||||||
extractor.reactor.wait-min & .wait-max
|
|
||||||
--------------------------------------
|
|
||||||
Type
|
|
||||||
``float``
|
|
||||||
Default
|
|
||||||
``3.0`` and ``6.0``
|
|
||||||
Description
|
|
||||||
Minimum and maximum wait time in seconds between HTTP requests
|
|
||||||
during the extraction process.
|
|
||||||
|
|
||||||
|
|
||||||
extractor.readcomiconline.captcha
|
extractor.readcomiconline.captcha
|
||||||
---------------------------------
|
---------------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -12,8 +12,6 @@ from .common import Extractor, Message
|
|||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
import itertools
|
import itertools
|
||||||
import random
|
|
||||||
import time
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org"
|
BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org"
|
||||||
@@ -29,6 +27,7 @@ class ExhentaiExtractor(Extractor):
|
|||||||
cookienames = ("ipb_member_id", "ipb_pass_hash")
|
cookienames = ("ipb_member_id", "ipb_pass_hash")
|
||||||
cookiedomain = ".exhentai.org"
|
cookiedomain = ".exhentai.org"
|
||||||
root = "https://exhentai.org"
|
root = "https://exhentai.org"
|
||||||
|
request_interval = 5.0
|
||||||
|
|
||||||
LIMIT = False
|
LIMIT = False
|
||||||
|
|
||||||
@@ -46,8 +45,6 @@ class ExhentaiExtractor(Extractor):
|
|||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.limits = self.config("limits", True)
|
self.limits = self.config("limits", True)
|
||||||
self.original = self.config("original", True)
|
self.original = self.config("original", True)
|
||||||
self.wait_min = self.config("wait-min", 3)
|
|
||||||
self.wait_max = self.config("wait-max", 6)
|
|
||||||
|
|
||||||
if type(self.limits) is int:
|
if type(self.limits) is int:
|
||||||
self._limit_max = self.limits
|
self._limit_max = self.limits
|
||||||
@@ -56,8 +53,6 @@ class ExhentaiExtractor(Extractor):
|
|||||||
self._limit_max = 0
|
self._limit_max = 0
|
||||||
|
|
||||||
self._remaining = 0
|
self._remaining = 0
|
||||||
if self.wait_max < self.wait_min:
|
|
||||||
self.wait_max = self.wait_min
|
|
||||||
self.session.headers["Referer"] = self.root + "/"
|
self.session.headers["Referer"] = self.root + "/"
|
||||||
if version != "ex":
|
if version != "ex":
|
||||||
self.session.cookies.set("nw", "1", domain=self.cookiedomain)
|
self.session.cookies.set("nw", "1", domain=self.cookiedomain)
|
||||||
@@ -69,14 +64,6 @@ class ExhentaiExtractor(Extractor):
|
|||||||
raise exception.AuthorizationError()
|
raise exception.AuthorizationError()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def wait(self, waittime=None):
|
|
||||||
"""Wait for a randomly chosen amount of seconds"""
|
|
||||||
if not waittime:
|
|
||||||
waittime = random.uniform(self.wait_min, self.wait_max)
|
|
||||||
else:
|
|
||||||
waittime = random.uniform(waittime * 0.66, waittime * 1.33)
|
|
||||||
time.sleep(waittime)
|
|
||||||
|
|
||||||
def login(self):
|
def login(self):
|
||||||
"""Login and set necessary cookies"""
|
"""Login and set necessary cookies"""
|
||||||
if self.LIMIT:
|
if self.LIMIT:
|
||||||
@@ -200,7 +187,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
self.log.error("Failed to extract initial image token")
|
self.log.error("Failed to extract initial image token")
|
||||||
self.log.debug("Page content:\n%s", gpage)
|
self.log.debug("Page content:\n%s", gpage)
|
||||||
return
|
return
|
||||||
self.wait()
|
|
||||||
ipage = self._image_page()
|
ipage = self._image_page()
|
||||||
else:
|
else:
|
||||||
ipage = self._image_page()
|
ipage = self._image_page()
|
||||||
@@ -210,7 +196,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
self.log.debug("Page content:\n%s", ipage)
|
self.log.debug("Page content:\n%s", ipage)
|
||||||
return
|
return
|
||||||
self.gallery_token = part.split("/")[1]
|
self.gallery_token = part.split("/")[1]
|
||||||
self.wait()
|
|
||||||
gpage = self._gallery_page()
|
gpage = self._gallery_page()
|
||||||
|
|
||||||
data = self.get_metadata(gpage)
|
data = self.get_metadata(gpage)
|
||||||
@@ -225,7 +210,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
self._check_limits(data)
|
self._check_limits(data)
|
||||||
if "/fullimg.php" in url:
|
if "/fullimg.php" in url:
|
||||||
data["extension"] = ""
|
data["extension"] = ""
|
||||||
self.wait(self.wait_max / 4)
|
|
||||||
yield Message.Url, url, data
|
yield Message.Url, url, data
|
||||||
|
|
||||||
def get_metadata(self, page):
|
def get_metadata(self, page):
|
||||||
@@ -322,7 +306,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
"showkey": self.key["show"],
|
"showkey": self.key["show"],
|
||||||
}
|
}
|
||||||
for request["page"] in range(self.image_num + 1, self.count + 1):
|
for request["page"] in range(self.image_num + 1, self.count + 1):
|
||||||
self.wait()
|
|
||||||
page = self.request(api_url, method="POST", json=request).json()
|
page = self.request(api_url, method="POST", json=request).json()
|
||||||
imgkey = nextkey
|
imgkey = nextkey
|
||||||
nextkey, pos = text.extract(page["i3"], "'", "'")
|
nextkey, pos = text.extract(page["i3"], "'", "'")
|
||||||
@@ -372,7 +355,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
return page
|
return page
|
||||||
|
|
||||||
def _check_limits(self, data):
|
def _check_limits(self, data):
|
||||||
if not self._remaining or data["num"] % 20 == 0:
|
if not self._remaining or data["num"] % 25 == 0:
|
||||||
self._update_limits()
|
self._update_limits()
|
||||||
self._remaining -= data["cost"]
|
self._remaining -= data["cost"]
|
||||||
|
|
||||||
@@ -455,7 +438,6 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.login()
|
self.login()
|
||||||
yield Message.Version, 1
|
|
||||||
data = {"_extractor": ExhentaiGalleryExtractor}
|
data = {"_extractor": ExhentaiGalleryExtractor}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@@ -472,7 +454,6 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
|||||||
if 'class="ptdd">><' in page or ">No hits found</p>" in page:
|
if 'class="ptdd">><' in page or ">No hits found</p>" in page:
|
||||||
return
|
return
|
||||||
self.params["page"] += 1
|
self.params["page"] += 1
|
||||||
self.wait()
|
|
||||||
|
|
||||||
|
|
||||||
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
|
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2018-2020 Mike Fährmann
|
# Copyright 2018-2021 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -13,8 +13,6 @@ from .common import Message
|
|||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
import collections
|
import collections
|
||||||
import random
|
|
||||||
import time
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
@@ -24,6 +22,7 @@ class IdolcomplexExtractor(SankakuExtractor):
|
|||||||
cookienames = ("login", "pass_hash")
|
cookienames = ("login", "pass_hash")
|
||||||
cookiedomain = "idol.sankakucomplex.com"
|
cookiedomain = "idol.sankakucomplex.com"
|
||||||
root = "https://" + cookiedomain
|
root = "https://" + cookiedomain
|
||||||
|
request_interval = 5.0
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
SankakuExtractor.__init__(self, match)
|
SankakuExtractor.__init__(self, match)
|
||||||
@@ -31,17 +30,12 @@ class IdolcomplexExtractor(SankakuExtractor):
|
|||||||
self.start_page = 1
|
self.start_page = 1
|
||||||
self.start_post = 0
|
self.start_post = 0
|
||||||
self.extags = self.config("tags", False)
|
self.extags = self.config("tags", False)
|
||||||
self.wait_min = self.config("wait-min", 3.0)
|
|
||||||
self.wait_max = self.config("wait-max", 6.0)
|
|
||||||
if self.wait_max < self.wait_min:
|
|
||||||
self.wait_max = self.wait_min
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.login()
|
self.login()
|
||||||
data = self.metadata()
|
data = self.metadata()
|
||||||
|
|
||||||
for post_id in util.advance(self.post_ids(), self.start_post):
|
for post_id in util.advance(self.post_ids(), self.start_post):
|
||||||
self.wait()
|
|
||||||
post = self._parse_post(post_id)
|
post = self._parse_post(post_id)
|
||||||
url = post["file_url"]
|
url = post["file_url"]
|
||||||
post.update(data)
|
post.update(data)
|
||||||
@@ -130,10 +124,6 @@ class IdolcomplexExtractor(SankakuExtractor):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def wait(self):
|
|
||||||
"""Wait for a randomly chosen amount of seconds"""
|
|
||||||
time.sleep(random.uniform(self.wait_min, self.wait_max))
|
|
||||||
|
|
||||||
|
|
||||||
class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
||||||
"""Extractor for images from idol.sankakucomplex.com by search-tags"""
|
"""Extractor for images from idol.sankakucomplex.com by search-tags"""
|
||||||
@@ -192,7 +182,6 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
|||||||
params["page"] = self.start_page
|
params["page"] = self.start_page
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
self.wait()
|
|
||||||
page = self.request(self.root, params=params, retries=10).text
|
page = self.request(self.root, params=params, retries=10).text
|
||||||
pos = page.find("<div id=more-popular-posts-link>") + 1
|
pos = page.find("<div id=more-popular-posts-link>") + 1
|
||||||
|
|
||||||
|
|||||||
@@ -11,11 +11,8 @@
|
|||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import random
|
|
||||||
import time
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
|
BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
|
||||||
|
|
||||||
|
|
||||||
@@ -25,17 +22,13 @@ class ReactorExtractor(Extractor):
|
|||||||
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
|
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
|
||||||
archive_fmt = "{post_id}_{num}"
|
archive_fmt = "{post_id}_{num}"
|
||||||
instances = ()
|
instances = ()
|
||||||
|
request_interval = 5.0
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.root = "http://" + match.group(1)
|
self.root = "http://" + match.group(1)
|
||||||
self.session.headers["Referer"] = self.root
|
self.session.headers["Referer"] = self.root
|
||||||
|
|
||||||
self.wait_min = self.config("wait-min", 3)
|
|
||||||
self.wait_max = self.config("wait-max", 6)
|
|
||||||
if self.wait_max < self.wait_min:
|
|
||||||
self.wait_max = self.wait_min
|
|
||||||
|
|
||||||
if not self.category:
|
if not self.category:
|
||||||
# set category based on domain name
|
# set category based on domain name
|
||||||
netloc = urllib.parse.urlsplit(self.root).netloc
|
netloc = urllib.parse.urlsplit(self.root).netloc
|
||||||
@@ -61,8 +54,6 @@ class ReactorExtractor(Extractor):
|
|||||||
|
|
||||||
def _pagination(self, url):
|
def _pagination(self, url):
|
||||||
while True:
|
while True:
|
||||||
time.sleep(random.uniform(self.wait_min, self.wait_max))
|
|
||||||
|
|
||||||
response = self.request(url)
|
response = self.request(url)
|
||||||
if response.history:
|
if response.history:
|
||||||
# sometimes there is a redirect from
|
# sometimes there is a redirect from
|
||||||
|
|||||||
Reference in New Issue
Block a user