2 files changed, 136 insertions, 5 deletions
diff --git a/docs/index.rst b/docs/index.rst
index de14554..48cea67 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -73,6 +73,28 @@ a loop without ever sleeping at all yourself. It is for applications that would
 just pretend there is no such thing as a rate limit and are fine with sometimes not
 being very interactive.
+A note about pagination
+-----------------------
+Many of Mastodons API endpoints are paginated. What this means is that if you request
+data from them, you might not get all the data at once - instead, you might only get the
+first few results.
+All endpoints that are paginated have three parameters: since_id, max_id and limit.
+since_id allows you to specify the smallest id you want in the returned data. max_id,
+similarly, allows you to specify the largest. By specifying either one (generally,
+only one, not both) of them you can go through pages forwards and backwards.
+limit allows you to specify how many results you would like returned. Note that an
+instance may choose to return less results than you requested.
+The responses returned by paginated endpoints contain a "link" header that specifies
+which parameters to use to get the next and previous pages. Mastodon.py parses these
+and stores them (if present) in the first (for the previous page) and last (for the 
+next page) item of the returned list as _pagination_prev and _pagination_next.
+There are convenience functions available for fetching the previous and next page of
+a paginated request as well as for fetching all pages starting from a first page.
 A note about IDs
 ----------------
 Mastodons API uses IDs in several places: User IDs, Toot IDs, ...
@@ -257,8 +279,9 @@ you can simply pass them to the constructor of the class, too!
 Note that while it is perfectly reasonable to log back in whenever 
 your app starts, registering a new application on every 
 startup is not, so don't do that - instead, register an application 
-once, and then persist your client id and secret. Convenience
+once, and then persist your client id and secret. A convenient method
-methods for this are provided.
+for this is provided by the functions dealing with registering the app,
+logging in and the Mastodon classes constructor.
 To talk to an instance different from the flagship instance, specify
 the api_base_url (usually, just the URL of the instance, i.e. 
@@ -405,12 +428,20 @@ Writing data: Reports
 Writing data: Domain blocks
 ---------------------------
-These methods allow you to block and unblock all statuses from a domain
+These functions allow you to block and unblock all statuses from a domain
 for the logged-in user.
 .. automethod:: Mastodon.domain_block
 .. automethod:: Mastodon.domain_unblock
+Pagination
+----------
+These functions allow for convenient retrieval of paginated data.
+.. automethod:: Mastodon.fetch_next
+.. automethod:: Mastodon.fetch_previous
+.. automethod:: Mastodon.fetch_remaining
 Streaming
 ---------
 These functions allow access to the streaming API.
diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py
index be6ea3f..26294f7 100644
--- a/mastodon/Mastodon.py
+++ b/mastodon/Mastodon.py
@@ -14,10 +14,12 @@ import requests
 from requests.models import urlencode
 import dateutil
 import dateutil.parser
+import re
+import copy
 class Mastodon:
    """
-    Super basic but thorough and easy to use mastodon.social
+    Super basic but thorough and easy to use Mastodon
    api wrapper in python.
    If anything is unclear, check the official API docs at
@@ -744,6 +746,76 @@ class Mastodon:
        return self.__api_request('DELETE', '/api/v1/domain_blocks', params)
    
    ###
+    # Pagination
+    ###
+    def fetch_next(self, previous_page):
+        """
+        Fetches the next page of results of a paginated request. Pass in the
+        previous page in its entirety, or the pagination information dict 
+        returned as a part of that pages last status ('_pagination_next').
+        
+        Returns the next page or None if no further data is available.
+        """
+        if isinstance(previous_page, list):
+            if '_pagination_next' in previous_page[-1]:
+                params = previous_page[-1]['_pagination_next']
+            else:
+                return None
+        else:
+            params = previous_page
+        
+        method = params['_pagination_method']
+        del params['_pagination_method']
+        
+        endpoint = params['_pagination_endpoint']
+        del params['_pagination_endpoint']
+        
+        return self.__api_request(method, endpoint, params)
+    
+    def fetch_previous(self, next_page):
+        """
+        Fetches the previous page of results of a paginated request. Pass in the
+        previous page in its entirety, or the pagination information dict 
+        returned as a part of that pages first status ('_pagination_prev').
+        
+        Returns the previous page or None if no further data is available.
+        """
+        if isinstance(next_page, list):
+            if '_pagination_prev' in next_page[-1]:
+                params = next_page[-1]['_pagination_prev']
+            else:
+                return None
+        else:
+            params = next_page
+        
+        method = params['_pagination_method']
+        del params['_pagination_method']
+        
+        endpoint = params['_pagination_endpoint']
+        del params['_pagination_endpoint']
+        
+        return self.__api_request(method, endpoint, params)
+    
+    def fetch_remaining(self, first_page):
+        """
+        Fetches all the remaining pages of a paginated request starting from a 
+        first page and returns the entire set of results (including the first page
+        that was passed in) as a big list.
+        
+        Be careful, as this might generate a lot of requests, depending on what you are
+        fetching, and might cause you to run into rate limits very quickly.
+        """
+        first_page = copy.deepcopy(first_page)
+        
+        all_pages = []
+        current_page = first_page
+        while current_page != None:
+            all_pages.extend(current_page)
+            current_page = self.fetch_next(current_page)
+            
+        return all_pages
+    
+    ###
    # Streaming
    ###
    def user_stream(self, listener):
@@ -786,7 +858,7 @@ class Mastodon:
        incoming events.
        """
        return self.__stream('/api/v1/streaming/hashtag', listener, params={'tag': tag})
+    
    ###
    # Internal helpers, dragons probably
    ###
@@ -884,6 +956,34 @@ class Mastodon:
            except:
                raise MastodonAPIError("Could not parse response as JSON, response code was %s, bad json content was '%s'" % (response_object.status_code, response_object.content))
+            # Parse link headers
+            if isinstance(response, list) and 'Link' in response_object.headers:
+                tmp_urls = requests.utils.parse_header_links(response_object.headers['Link'].rstrip('>').replace('>,<', ',<'))   
+                for url in tmp_urls:
+                    if url['rel'] == 'next':
+                        # Be paranoid and extract max_id specifically
+                        next_url = url['url']
+                        matchgroups = re.search(r"max_id=([0-9]*)", next_url)
+                        
+                        if matchgroups:
+                            next_params = copy.deepcopy(params)
+                            next_params['_pagination_method'] = method
+                            next_params['_pagination_endpoint'] = endpoint
+                            next_params['max_id'] = int(matchgroups.group(1))
+                            response[-1]['_pagination_next'] = next_params
+                            
+                    if url['rel'] == 'prev':
+                        # Be paranoid and extract since_id specifically
+                        prev_url = url['url']
+                        matchgroups = re.search(r"since_id=([0-9]*)", prev_url)
+                        
+                        if matchgroups:
+                            prev_params = copy.deepcopy(params)
+                            prev_params['_pagination_method'] = method
+                            prev_params['_pagination_endpoint'] = endpoint
+                            prev_params['max_id'] = int(matchgroups.group(1))
+                            response[0]['_pagination_prev'] = prev_params
+                
            # Handle rate limiting
            if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
                self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])

diff --git a/docs/index.rst b/docs/index.rst index de14554..48cea67 100644 --- a/docs/index.rst +++ b/docs/index.rst
@@ -73,6 +73,28 @@ a loop without ever sleeping at all yourself. It is for applications that would
73	just pretend there is no such thing as a rate limit and are fine with sometimes not	73	just pretend there is no such thing as a rate limit and are fine with sometimes not
74	being very interactive.	74	being very interactive.
75		75
		76	A note about pagination
		77	-----------------------
		78	Many of Mastodons API endpoints are paginated. What this means is that if you request
		79	data from them, you might not get all the data at once - instead, you might only get the
		80	first few results.
		81
		82	All endpoints that are paginated have three parameters: since_id, max_id and limit.
		83	since_id allows you to specify the smallest id you want in the returned data. max_id,
		84	similarly, allows you to specify the largest. By specifying either one (generally,
		85	only one, not both) of them you can go through pages forwards and backwards.
		86
		87	limit allows you to specify how many results you would like returned. Note that an
		88	instance may choose to return less results than you requested.
		89
		90	The responses returned by paginated endpoints contain a "link" header that specifies
		91	which parameters to use to get the next and previous pages. Mastodon.py parses these
		92	and stores them (if present) in the first (for the previous page) and last (for the
		93	next page) item of the returned list as _pagination_prev and _pagination_next.
		94
		95	There are convenience functions available for fetching the previous and next page of
		96	a paginated request as well as for fetching all pages starting from a first page.
		97
76	A note about IDs	98	A note about IDs
77	----------------	99	----------------
78	Mastodons API uses IDs in several places: User IDs, Toot IDs, ...	100	Mastodons API uses IDs in several places: User IDs, Toot IDs, ...
@@ -257,8 +279,9 @@ you can simply pass them to the constructor of the class, too!
257	Note that while it is perfectly reasonable to log back in whenever	279	Note that while it is perfectly reasonable to log back in whenever
258	your app starts, registering a new application on every	280	your app starts, registering a new application on every
259	startup is not, so don't do that - instead, register an application	281	startup is not, so don't do that - instead, register an application
260	once, and then persist your client id and secret. Convenience	282	once, and then persist your client id and secret. A convenient method
261	methods for this are provided.	283	for this is provided by the functions dealing with registering the app,
		284	logging in and the Mastodon classes constructor.
262		285
263	To talk to an instance different from the flagship instance, specify	286	To talk to an instance different from the flagship instance, specify
264	the api_base_url (usually, just the URL of the instance, i.e.	287	the api_base_url (usually, just the URL of the instance, i.e.
@@ -405,12 +428,20 @@ Writing data: Reports
405		428
406	Writing data: Domain blocks	429	Writing data: Domain blocks
407	---------------------------	430	---------------------------
408	These methods allow you to block and unblock all statuses from a domain	431	These functions allow you to block and unblock all statuses from a domain
409	for the logged-in user.	432	for the logged-in user.
410		433
411	.. automethod:: Mastodon.domain_block	434	.. automethod:: Mastodon.domain_block
412	.. automethod:: Mastodon.domain_unblock	435	.. automethod:: Mastodon.domain_unblock
413		436
		437	Pagination
		438	----------
		439	These functions allow for convenient retrieval of paginated data.
		440
		441	.. automethod:: Mastodon.fetch_next
		442	.. automethod:: Mastodon.fetch_previous
		443	.. automethod:: Mastodon.fetch_remaining
		444
414	Streaming	445	Streaming
415	---------	446	---------
416	These functions allow access to the streaming API.	447	These functions allow access to the streaming API.


diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index be6ea3f..26294f7 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py
@@ -14,10 +14,12 @@ import requests
14	from requests.models import urlencode	14	from requests.models import urlencode
15	import dateutil	15	import dateutil
16	import dateutil.parser	16	import dateutil.parser
		17	import re
		18	import copy
17		19
18	class Mastodon:	20	class Mastodon:
19	"""	21	"""
20	Super basic but thorough and easy to use mastodon.social	22	Super basic but thorough and easy to use Mastodon
21	api wrapper in python.	23	api wrapper in python.
22		24
23	If anything is unclear, check the official API docs at	25	If anything is unclear, check the official API docs at
@@ -744,6 +746,76 @@ class Mastodon:
744	return self.__api_request('DELETE', '/api/v1/domain_blocks', params)	746	return self.__api_request('DELETE', '/api/v1/domain_blocks', params)
745		747
746	###	748	###
		749	# Pagination
		750	###
		751	def fetch_next(self, previous_page):
		752	"""
		753	Fetches the next page of results of a paginated request. Pass in the
		754	previous page in its entirety, or the pagination information dict
		755	returned as a part of that pages last status ('_pagination_next').
		756
		757	Returns the next page or None if no further data is available.
		758	"""
		759	if isinstance(previous_page, list):
		760	if '_pagination_next' in previous_page[-1]:
		761	params = previous_page[-1]['_pagination_next']
		762	else:
		763	return None
		764	else:
		765	params = previous_page
		766
		767	method = params['_pagination_method']
		768	del params['_pagination_method']
		769
		770	endpoint = params['_pagination_endpoint']
		771	del params['_pagination_endpoint']
		772
		773	return self.__api_request(method, endpoint, params)
		774
		775	def fetch_previous(self, next_page):
		776	"""
		777	Fetches the previous page of results of a paginated request. Pass in the
		778	previous page in its entirety, or the pagination information dict
		779	returned as a part of that pages first status ('_pagination_prev').
		780
		781	Returns the previous page or None if no further data is available.
		782	"""
		783	if isinstance(next_page, list):
		784	if '_pagination_prev' in next_page[-1]:
		785	params = next_page[-1]['_pagination_prev']
		786	else:
		787	return None
		788	else:
		789	params = next_page
		790
		791	method = params['_pagination_method']
		792	del params['_pagination_method']
		793
		794	endpoint = params['_pagination_endpoint']
		795	del params['_pagination_endpoint']
		796
		797	return self.__api_request(method, endpoint, params)
		798
		799	def fetch_remaining(self, first_page):
		800	"""
		801	Fetches all the remaining pages of a paginated request starting from a
		802	first page and returns the entire set of results (including the first page
		803	that was passed in) as a big list.
		804
		805	Be careful, as this might generate a lot of requests, depending on what you are
		806	fetching, and might cause you to run into rate limits very quickly.
		807	"""
		808	first_page = copy.deepcopy(first_page)
		809
		810	all_pages = []
		811	current_page = first_page
		812	while current_page != None:
		813	all_pages.extend(current_page)
		814	current_page = self.fetch_next(current_page)
		815
		816	return all_pages
		817
		818	###
747	# Streaming	819	# Streaming
748	###	820	###
749	def user_stream(self, listener):	821	def user_stream(self, listener):
@@ -786,7 +858,7 @@ class Mastodon:
786	incoming events.	858	incoming events.
787	"""	859	"""
788	return self.__stream('/api/v1/streaming/hashtag', listener, params={'tag': tag})	860	return self.__stream('/api/v1/streaming/hashtag', listener, params={'tag': tag})
789		861
790	###	862	###
791	# Internal helpers, dragons probably	863	# Internal helpers, dragons probably
792	###	864	###
@@ -884,6 +956,34 @@ class Mastodon:
884	except:	956	except:
885	raise MastodonAPIError("Could not parse response as JSON, response code was %s, bad json content was '%s'" % (response_object.status_code, response_object.content))	957	raise MastodonAPIError("Could not parse response as JSON, response code was %s, bad json content was '%s'" % (response_object.status_code, response_object.content))
886		958
		959	# Parse link headers
		960	if isinstance(response, list) and 'Link' in response_object.headers:
		961	tmp_urls = requests.utils.parse_header_links(response_object.headers['Link'].rstrip('>').replace('>,<', ',<'))
		962	for url in tmp_urls:
		963	if url['rel'] == 'next':
		964	# Be paranoid and extract max_id specifically
		965	next_url = url['url']
		966	matchgroups = re.search(r"max_id=([0-9]*)", next_url)
		967
		968	if matchgroups:
		969	next_params = copy.deepcopy(params)
		970	next_params['_pagination_method'] = method
		971	next_params['_pagination_endpoint'] = endpoint
		972	next_params['max_id'] = int(matchgroups.group(1))
		973	response[-1]['_pagination_next'] = next_params
		974
		975	if url['rel'] == 'prev':
		976	# Be paranoid and extract since_id specifically
		977	prev_url = url['url']
		978	matchgroups = re.search(r"since_id=([0-9]*)", prev_url)
		979
		980	if matchgroups:
		981	prev_params = copy.deepcopy(params)
		982	prev_params['_pagination_method'] = method
		983	prev_params['_pagination_endpoint'] = endpoint
		984	prev_params['max_id'] = int(matchgroups.group(1))
		985	response[0]['_pagination_prev'] = prev_params
		986
887	# Handle rate limiting	987	# Handle rate limiting
888	if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:	988	if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
889	self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])	989	self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])