implement extractor.wait()

This commit is contained in:
Mike Fährmann
2020-01-04 23:21:45 +01:00
parent 5532e9c158
commit 569747a78d
3 changed files with 28 additions and 21 deletions

View File

@@ -122,6 +122,23 @@ class Extractor():
raise exception.HttpError(msg)
def wait(self, *, seconds=None, until=None, reason=None, adjust=1):
now = datetime.datetime.now()
if seconds:
seconds = float(seconds)
until = now + datetime.timedelta(seconds=seconds)
elif until:
until = datetime.datetime.fromtimestamp(float(until))
seconds = (until - now).total_seconds()
else:
raise ValueError("Either 'seconds' or 'until' is required")
if reason:
isotime = until.time().isoformat("seconds")
self.log.info("Waiting until %s for %s.", isotime, reason)
time.sleep(seconds + adjust)
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
username = self.config("username")

View File

@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, util, extractor, exception
from ..cache import cache
import time
class RedditExtractor(Extractor):
@@ -278,11 +277,13 @@ class RedditAPI():
params["raw_json"] = 1
self.authenticate()
response = self.extractor.request(url, params=params, fatal=None)
remaining = response.headers.get("x-ratelimit-remaining")
if remaining and float(remaining) < 2:
wait = int(response.headers["x-ratelimit-reset"])
self.log.info("Waiting %d seconds for ratelimit reset", wait)
time.sleep(wait)
reset = response.headers["x-ratelimit-reset"]
self.extractor.wait(seconds=reset, reason="rate limit reset")
return self._call(endpoint, params)
data = response.json()
if "error" in data:
if data["error"] == 403:

View File

@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text, oauth, extractor, exception
from datetime import datetime, timedelta
import re
import time
def _original_inline_image(url):
@@ -408,27 +407,17 @@ class TumblrAPI(oauth.OAuth1API):
# daily rate limit
if response.headers.get("x-ratelimit-perday-remaining") == "0":
reset = response.headers.get("x-ratelimit-perday-reset")
until = datetime.now() + timedelta(seconds=float(reset))
self.log.error("Daily API rate limit exceeded")
raise exception.StopExtraction(
"Daily API rate limit exceeded: aborting; "
"rate limit will reset at %s", self._to_time(reset),
)
"Aborting - Rate limit will reset at %s",
until.time().isoformat("seconds"))
# hourly rate limit
reset = response.headers.get("x-ratelimit-perhour-reset")
if reset:
self.log.info(
"Hourly API rate limit exceeded; waiting until "
"%s for rate limit reset", self._to_time(reset),
)
time.sleep(int(reset) + 1)
self.log.info("Hourly API rate limit exceeded")
self.extractor.wait(seconds=reset, reason="rate limit reset")
return self._call(blog, endpoint, params)
raise exception.StopExtraction(data)
@staticmethod
def _to_time(reset):
try:
reset_time = datetime.now() + timedelta(seconds=int(reset))
except (ValueError, TypeError):
return "?"
return reset_time.strftime("%H:%M:%S")