From 61775d90831704d012b9f3d6c5453ca738bc0724 Mon Sep 17 00:00:00 2001 From: Lorenz Diener Date: Fri, 25 Nov 2016 23:14:00 +0100 Subject: Rate limiting now works. --- docs/index.rst | 27 +++++++++++++++++ mastodon/Mastodon.py | 86 ++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 87 insertions(+), 26 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index e7c9366..02676a4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -39,6 +39,33 @@ as a single python module. By default, it talks to the `Mastodon flagship instance`_, but it can be set to talk to any node running Mastodon. +A note about rate limits +------------------------ +Mastodons API rate limits per IP. Mastodon.py has three modes for dealing +with rate limiting that you can pass to the constructor, "throw", "wait" +and "pace", "wait" being the default. + +In "throw" mode, Mastodon.py makes no attempt to stick to rate limits. When +a request hits the rate limit, it simply throws a MastodonRateLimitError. This is +for applications that need to handle all rate limiting themselves (i.e. interactive apps), +or applications wanting to use Mastodon.py in a multi-threaded context ("wait" and "pace" +modes are not thread safe). + +In "wait" mode, once a request hits the rate limit, Mastodon.py will wait until +the rate limit resets and then try again, until the request succeeds or an error +is encountered. This mode is for applications that would rather just not worry about rate limits +much, don't poll the api all that often, and are okay with a call sometimes just taking +a while. + +In "pace" mode, Mastodon.py will delay each new request after the first one such that, +if requests were to continue at the same rate, only a certain fraction (set in the +constructor as ratelimit_pacefactor) of the rate limit will be used up. The fraction can +be (and by default, is) greater than one. If the rate limit is hit, "pace" behaves like +"wait". This mode is probably the most advanced one and allows you to just poll in +a loop without ever sleeping at all yourself. It is for applications that would rather +just pretend there is no such thing as a rate limit and are fine with sometimes not +being very interactive. + A note about IDs ---------------- Mastodons API uses IDs in several places: User IDs, Toot IDs, ... diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index bc1c52b..bb16d95 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -7,7 +7,10 @@ import mimetypes import time import random import string -from datetime import datetime +import pytz +import datetime +import dateutil +import dateutil.parser class Mastodon: """ @@ -62,12 +65,12 @@ class Mastodon: ### # Authentication, including constructor ### - def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 0.9): + def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 1.1): """ Create a new API wrapper instance based on the given client_secret and client_id. If you give a client_id and it is not a file, you must also give a secret. - You can also directly specify an access_token, directly or as a file. + You can also specify an access_token, directly or as a file (as written by log_in). Mastodon.py can try to respect rate limits in several ways, controlled by ratelimit_method. "throw" makes functions throw a MastodonRatelimitError when the rate @@ -92,7 +95,7 @@ class Mastodon: self.ratelimit_reset = time.time() self.ratelimit_remaining = 150 self.ratelimit_lastcall = time.time() - self.ratelimit_pacefactor = 0.9 + self.ratelimit_pacefactor = ratelimit_pacefactor if os.path.isfile(self.client_id): with open(self.client_id, 'r') as secret_file: @@ -426,15 +429,26 @@ class Mastodon: ### # Internal helpers, dragons probably ### - def __api_request(self, method, endpoint, params = {}, files = {}, do_ratelimiting = True): + def __datetime_to_epoch(self, date_time): """ - Internal API request helper. + Converts a python datetime to unix epoch, accounting for + time zones and such. - TODO FIXME: time.time() does not match server time neccesarily. Using the time from the request - would be correct. + Assumes UTC if timezone is not given. + """ + date_time_utc = None + if date_time.tzinfo == None: + date_time_utc = date_time.replace(tzinfo = pytz.utc) + else: + date_time_utc = date_time.astimezone(pytz.utc) - TODO FIXME: Date parsing can fail. Should probably use a proper "date parsing" module rather than - rely on the server to return the right thing. + epoch_utc = datetime.datetime.utcfromtimestamp(0).replace(tzinfo = pytz.utc) + + return (date_time_utc - epoch_utc).total_seconds() + + def __api_request(self, method, endpoint, params = {}, files = {}, do_ratelimiting = True): + """ + Internal API request helper. """ response = None headers = None @@ -445,6 +459,8 @@ class Mastodon: if self.ratelimit_remaining == 0: to_next = self.ratelimit_reset - time.time() if to_next > 0: + # As a precaution, never sleep longer than 5 minutes + to_next = min(to_next, 5 * 60) time.sleep(to_next) else: time_waited = time.time() - self.ratelimit_lastcall @@ -452,7 +468,9 @@ class Mastodon: remaining_wait = time_wait - time_waited if remaining_wait > 0: - time.sleep(remaining_wait * self.ratelimit_pacefactor) + to_next = remaining_wait / self.ratelimit_pacefactor + to_next = min(to_next, 5 * 60) + time.sleep(to_next) # Generate request headers if self.access_token != None: @@ -503,21 +521,34 @@ class Mastodon: raise MastodonAPIError("Could not parse response as JSON, respose code was " + str(response_object.status_code)) # Handle rate limiting - if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting: - self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining']) - self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit']) - self.ratelimit_reset = (datetime.strptime(response_object.headers['X-RateLimit-Reset'], "%Y-%m-%dT%H:%M:%S.%fZ") - datetime(1970, 1, 1)).total_seconds() - self.ratelimit_lastcall = time.time() - - if "error" in response and response["error"] == "Throttled": - if self.ratelimit_method == "throw": - raise MastodonRatelimitError("Hit rate limit.") - - if self.ratelimit_method == "wait" or self.ratelimit_method == "pace": - to_next = self.ratelimit_reset - time.time() - if to_next > 0: - time.sleep(to_next) - request_complete = False + try: + if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting: + self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining']) + self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit']) + + ratelimit_reset_datetime = dateutil.parser.parse(response_object.headers['X-RateLimit-Reset']) + self.ratelimit_reset = self.__datetime_to_epoch(ratelimit_reset_datetime) + + # Adjust server time to local clock + server_time_datetime = dateutil.parser.parse(response_object.headers['Date']) + server_time = self.__datetime_to_epoch(server_time_datetime) + server_time_diff = time.time() - server_time + self.ratelimit_reset += server_time_diff + self.ratelimit_lastcall = time.time() + + if "error" in response and response["error"] == "Throttled": + if self.ratelimit_method == "throw": + raise MastodonRatelimitError("Hit rate limit.") + + if self.ratelimit_method == "wait" or self.ratelimit_method == "pace": + to_next = self.ratelimit_reset - time.time() + if to_next > 0: + # As a precaution, never sleep longer than 5 minutes + to_next = min(to_next, 5 * 60) + time.sleep(to_next) + request_complete = False + except: + raise MastodonRatelimitError("Rate limit time calculations failed.") return response @@ -547,6 +578,9 @@ class Mastodon: class MastodonIllegalArgumentError(ValueError): pass +class MastodonFileNotFoundError(IOError): + pass + class MastodonNetworkError(IOError): pass -- cgit v1.2.3