From ad96297a0608bfd5196598cc3bbcce1f4aa03bc9 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 03:23:04 +0200 Subject: more robust handling of pagination Link headers during a cursory investigation for #163 I found that the code handling Link headers would not handle non-numeric post IDs like pleroma's flakeIDs correctly IDs starting with a number would be truncated to the first non-digit, and IDs not starting with a number would throw. Thankfully, all flakeIDs generated so far start with 9. Maybe 8 for the earliest ones, I'm not sure. Either way, so far it would only have misbehaved when using the pagination functions or accessing the _pagination_prev and _pagination_next attributes directly --- mastodon/Mastodon.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'mastodon') diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index fc585ba..0550d85 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -2272,13 +2272,17 @@ class Mastodon: if url['rel'] == 'next': # Be paranoid and extract max_id specifically next_url = url['url'] - matchgroups = re.search(r"max_id=([0-9]*)", next_url) + matchgroups = re.search(r"max_id=([^&]+)", next_url) if matchgroups: next_params = copy.deepcopy(params) next_params['_pagination_method'] = method next_params['_pagination_endpoint'] = endpoint - next_params['max_id'] = int(matchgroups.group(1)) + max_id = matchgroups.group(1) + if max_id.is_digit(): + next_params['max_id'] = int(max_id) + else: + next_params['max_id'] = max_id if "since_id" in next_params: del next_params['since_id'] response[-1]._pagination_next = next_params @@ -2286,13 +2290,17 @@ class Mastodon: if url['rel'] == 'prev': # Be paranoid and extract since_id specifically prev_url = url['url'] - matchgroups = re.search(r"since_id=([0-9]*)", prev_url) + matchgroups = re.search(r"since_id=([^&]+)", prev_url) if matchgroups: prev_params = copy.deepcopy(params) prev_params['_pagination_method'] = method prev_params['_pagination_endpoint'] = endpoint - prev_params['since_id'] = int(matchgroups.group(1)) + since_id = matchgroups.group(1) + if since_id.is_digit(): + prev_params['since_id'] = int(since_id) + else: + prev_params['since_id'] = since_id if "max_id" in prev_params: del prev_params['max_id'] response[0]._pagination_prev = prev_params -- cgit v1.2.3 From 62a47f4e92bd58639beeb682cb7cc53ebba8b803 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 14:19:45 +0200 Subject: oops. str.is_digit() -> str.isdigit() --- mastodon/Mastodon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mastodon') diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index 0550d85..123d1a6 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -2279,7 +2279,7 @@ class Mastodon: next_params['_pagination_method'] = method next_params['_pagination_endpoint'] = endpoint max_id = matchgroups.group(1) - if max_id.is_digit(): + if max_id.isdigit(): next_params['max_id'] = int(max_id) else: next_params['max_id'] = max_id @@ -2297,7 +2297,7 @@ class Mastodon: prev_params['_pagination_method'] = method prev_params['_pagination_endpoint'] = endpoint since_id = matchgroups.group(1) - if since_id.is_digit(): + if since_id.isdigit(): prev_params['since_id'] = int(since_id) else: prev_params['since_id'] = since_id -- cgit v1.2.3 From eca31ea732d1c2c8f0491a138b2940e828a45973 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 14:26:43 +0200 Subject: improve link header parameter extraction regex --- mastodon/Mastodon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mastodon') diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index 123d1a6..3f594cf 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -2272,7 +2272,7 @@ class Mastodon: if url['rel'] == 'next': # Be paranoid and extract max_id specifically next_url = url['url'] - matchgroups = re.search(r"max_id=([^&]+)", next_url) + matchgroups = re.search(r"[?&]max_id=([^&]+)", next_url) if matchgroups: next_params = copy.deepcopy(params) @@ -2290,7 +2290,7 @@ class Mastodon: if url['rel'] == 'prev': # Be paranoid and extract since_id specifically prev_url = url['url'] - matchgroups = re.search(r"since_id=([^&]+)", prev_url) + matchgroups = re.search(r"[?&]since_id=([^&]+)", prev_url) if matchgroups: prev_params = copy.deepcopy(params) -- cgit v1.2.3