Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Create new array pagination, and apply it to path contents #1009

Merged
merged 9 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 121 additions & 9 deletions graphql_api/helpers/connection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import enum
from dataclasses import dataclass
from functools import cached_property
from typing import Any, Dict, List, Optional

from cursor_pagination import CursorPage, CursorPaginator
from django.db.models import QuerySet
Expand Down Expand Up @@ -68,6 +69,110 @@ def page_info(self, *args, **kwargs):
}


class ArrayPaginator:
"""Cursor-based paginator for in-memory arrays."""
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was debating between using stringified random cursors and numeric ones for pagination. I decided to go with numeric cursors because they are simple, efficient, and directly map to the array indices (we have cases where customers have over 1,000 files), making pagination faster and easier to debug. Since the dataset is static, there’s no need for the added complexity or security of random strings. If you have other perspectives, lmk


def __init__(
self,
data: List[Any],
first: Optional[int] = None,
last: Optional[int] = None,
after: Optional[str] = None,
before: Optional[str] = None,
):
self.data = data
self.start_index = 0
self.end_index = len(data)

if first and last:
raise ValueError("Cannot provide both 'first' and 'last'")
Copy link
Contributor

@JerrySentry JerrySentry Nov 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use ValidationError instead so it can be handled well in the response, I have a feeling worse case this will be an internal server error or best case something ugly in the response for the user.


if after is not None:
self.start_index = int(after) + 1

if before is not None:
self.end_index = min(self.end_index, int(before))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's also validate that before and after can be casted to int, can wrap this in try/catch or use isdecimal() or something.


# Ensure valid bounds after 'after' and 'before'
self.start_index = max(self.start_index, 0)
self.end_index = min(self.end_index, len(data))

if first is not None:
self.end_index = min(self.start_index + first, len(data))

if last is not None:
range_length = self.end_index - self.start_index
if range_length > last:
self.start_index = self.end_index - last

# Ensure bounds remain valid
self.start_index = max(self.start_index, 0)
self.end_index = min(self.end_index, len(data))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This final safe guard is great!


def cursor(self, position: int) -> str:
"""Generate a cursor based on the position (index)."""
return str(position)

@property
def page(self) -> List[Any]:
"""Returns the sliced page of data."""
return self.data[self.start_index : self.end_index]

@property
def has_next(self) -> bool:
"""Check if there's a next page."""
return self.end_index < len(self.data)

@property
def has_previous(self) -> bool:
"""Check if there's a previous page."""
return self.start_index > 0


class ArrayConnection:
"""Connection wrapper for array pagination."""

def __init__(self, data: List[Any], paginator: ArrayPaginator, page: List[Any]):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't actually need data and page right?
Total count will be len(self.paginator.data) and references of self.page will be self.paginator.page.
IMO adding these two params to the ArrayConnection class just adds more confusion

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, I was trying to decouple the two to some extent, but it's not necessary at this point, as I doubt we'll use it anywhere besides the query_to_connection function

self.data = data
self.paginator = paginator
self.page = page

@property
def edges(self) -> List[Dict[str, Any]]:
"""Generate edges with cursor and node information"""
return [
{"cursor": self.paginator.cursor(pos), "node": node}
for pos, node in enumerate(self.page)
]

@property
def total_count(self) -> int:
"""Total number of items in the original data"""
return len(self.data)

@property
def start_cursor(self) -> Optional[str]:
"""Cursor for the first item in the page"""
return self.paginator.cursor(self.paginator.start_index) if self.page else None

@property
def end_cursor(self) -> Optional[str]:
"""Cursor for the last item in the page"""
return (
self.paginator.cursor(self.paginator.end_index - 1) if self.page else None
)

@property
def page_info(self) -> Dict[str, Any]:
"""Pagination information"""
return {
"has_next_page": self.paginator.has_next,
"has_previous_page": self.paginator.has_previous,
"start_cursor": self.start_cursor,
"end_cursor": self.end_cursor,
}


class DictCursorPaginator(CursorPaginator):
"""
WARNING: DictCursorPaginator does not work for dict objects where a key contains the following string: "__"
Expand Down Expand Up @@ -112,26 +217,33 @@ def position_from_instance(self, instance):


def queryset_to_connection_sync(
queryset,
data: QuerySet | list,
*,
ordering,
ordering_direction,
ordering=None,
ordering_direction=None,
first=None,
after=None,
last=None,
before=None,
):
"""
A method to take a queryset and return it in paginated order based on the cursor pattern.
A method to take a queryset or an array and return it in paginated order based on the cursor pattern.
Handles both QuerySets (database queries) and arrays (in-memory data).
"""
if not first and not last:
first = 25

ordering = tuple(field_order(field, ordering_direction) for field in ordering)
paginator = DictCursorPaginator(queryset, ordering=ordering)
page = paginator.page(first=first, after=after, last=last, before=before)

return Connection(queryset, paginator, page)
if isinstance(data, list):
array_paginator = ArrayPaginator(
data, first=first, last=last, after=after, before=before
)
return ArrayConnection(data, array_paginator, array_paginator.page)

else:
ordering = tuple(field_order(field, ordering_direction) for field in ordering)
paginator = DictCursorPaginator(data, ordering=ordering)
page = paginator.page(first=first, after=after, last=last, before=before)
return Connection(data, paginator, page)


@sync_to_async
Expand Down
78 changes: 78 additions & 0 deletions graphql_api/helpers/tests/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,81 @@ def test_queryset_to_connection_defers_count(self):

count = async_to_sync(connection.total_count)()
assert count == 3

def test_array_pagination_first_after(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

# Test first parameter
connection = queryset_to_connection_sync(data, first=2)
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2])
self.assertTrue(connection.page_info["has_next_page"])
self.assertFalse(connection.page_info["has_previous_page"])

# Test after parameter
connection = queryset_to_connection_sync(data, first=2, after="1")
self.assertEqual([edge["node"] for edge in connection.edges], [3, 4])
self.assertTrue(connection.page_info["has_next_page"])
self.assertTrue(connection.page_info["has_previous_page"])

def test_array_pagination_last_before(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

# Test last parameter
connection = queryset_to_connection_sync(data, last=2)
self.assertEqual([edge["node"] for edge in connection.edges], [4, 5])
self.assertFalse(connection.page_info["has_next_page"])
self.assertTrue(connection.page_info["has_previous_page"])

# Test before parameter
connection = queryset_to_connection_sync(data, last=2, before="4")
self.assertEqual([edge["node"] for edge in connection.edges], [3, 4])
self.assertTrue(connection.page_info["has_next_page"])
self.assertTrue(connection.page_info["has_previous_page"])

def test_array_pagination_edge_cases(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3]

# Empty array
connection = queryset_to_connection_sync([], first=2)
self.assertEqual(connection.edges, [])
self.assertEqual(connection.total_count, 0)

# First greater than array length
connection = queryset_to_connection_sync(data, first=5)
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2, 3])
self.assertEqual(connection.total_count, 3)

# Last greater than array length
connection = queryset_to_connection_sync(data, last=5)
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2, 3])
self.assertEqual(connection.total_count, 3)

def test_array_pagination_edge_cases_with_before_cursor_2(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

connection = queryset_to_connection_sync(data, last=3, before="3")
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2, 3])

def test_array_pagination_edge_cases_with_before_and_after(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

connection = queryset_to_connection_sync(data, last=3, before="3", after="0")
self.assertEqual([edge["node"] for edge in connection.edges], [2, 3])

def test_both_first_and_last(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

with self.assertRaises(ValueError):
queryset_to_connection_sync(data, last=3, first=2)
Loading
Loading