Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Create new array pagination, and apply it to path contents #1009

Merged
merged 9 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 127 additions & 8 deletions graphql_api/helpers/connection.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import enum
from dataclasses import dataclass
from functools import cached_property
from typing import Any, Dict, List, Optional

from cursor_pagination import CursorPage, CursorPaginator
from django.db.models import QuerySet

from codecov.commands.exceptions import ValidationError
from codecov.db import sync_to_async
from graphql_api.types.enums import OrderingDirection

Expand Down Expand Up @@ -68,6 +70,116 @@ def page_info(self, *args, **kwargs):
}


class ArrayPaginator:
"""Cursor-based paginator for in-memory arrays."""
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was debating between using stringified random cursors and numeric ones for pagination. I decided to go with numeric cursors because they are simple, efficient, and directly map to the array indices (we have cases where customers have over 1,000 files), making pagination faster and easier to debug. Since the dataset is static, there’s no need for the added complexity or security of random strings. If you have other perspectives, lmk


def __init__(
self,
data: List[Any],
first: Optional[int] = None,
last: Optional[int] = None,
after: Optional[str] = None,
before: Optional[str] = None,
):
self.data = data
self.start_index = 0
self.end_index = len(data)

if first and last:
raise ValidationError("Cannot provide both 'first' and 'last'")

if after is not None:
try:
self.start_index = int(after) + 1
except ValueError:
raise ValidationError("'after' cursor must be an integer")

if before is not None:
try:
self.end_index = min(self.end_index, int(before))
except ValueError:
raise ValidationError("'before' cursor must be an integer")

# Ensure valid bounds after 'after' and 'before'
self.start_index = max(self.start_index, 0)
self.end_index = min(self.end_index, len(data))

if first is not None:
self.end_index = min(self.start_index + first, len(data))

if last is not None:
range_length = self.end_index - self.start_index
if range_length > last:
self.start_index = self.end_index - last

# Ensure bounds remain valid
self.start_index = max(self.start_index, 0)
self.end_index = min(self.end_index, len(data))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This final safe guard is great!


def cursor(self, position: int) -> str:
"""Generate a cursor based on the position (index)."""
return str(position)

@property
def page(self) -> List[Any]:
"""Returns the sliced page of data."""
return self.data[self.start_index : self.end_index]

@property
def has_next(self) -> bool:
"""Check if there's a next page."""
return self.end_index < len(self.data)

@property
def has_previous(self) -> bool:
"""Check if there's a previous page."""
return self.start_index > 0


class ArrayConnection:
"""Connection wrapper for array pagination."""

def __init__(self, paginator: ArrayPaginator):
self.data = paginator.data
self.paginator = paginator
self.page = paginator.page

@property
def edges(self) -> List[Dict[str, Any]]:
"""Generate edges with cursor and node information"""
return [
{"cursor": self.paginator.cursor(pos), "node": node}
for pos, node in enumerate(self.page)
]

@property
def total_count(self) -> int:
"""Total number of items in the original data"""
return len(self.data)

@property
def start_cursor(self) -> Optional[str]:
"""Cursor for the first item in the page"""
return self.paginator.cursor(self.paginator.start_index) if self.page else None

@property
def end_cursor(self) -> Optional[str]:
"""Cursor for the last item in the page"""
return (
self.paginator.cursor(self.paginator.end_index - 1) if self.page else None
)

@property
def page_info(self) -> Dict[str, Any]:
"""Pagination information"""
return {
"has_next_page": self.paginator.has_next,
"has_previous_page": self.paginator.has_previous,
"start_cursor": self.start_cursor,
"end_cursor": self.end_cursor,
}


class DictCursorPaginator(CursorPaginator):
"""
WARNING: DictCursorPaginator does not work for dict objects where a key contains the following string: "__"
Expand Down Expand Up @@ -112,26 +224,33 @@ def position_from_instance(self, instance):


def queryset_to_connection_sync(
queryset,
data: QuerySet | list,
*,
ordering,
ordering_direction,
ordering=None,
ordering_direction=None,
first=None,
after=None,
last=None,
before=None,
):
"""
A method to take a queryset and return it in paginated order based on the cursor pattern.
A method to take a queryset or an array and return it in paginated order based on the cursor pattern.
Handles both QuerySets (database queries) and arrays (in-memory data).
"""
if not first and not last:
first = 25

ordering = tuple(field_order(field, ordering_direction) for field in ordering)
paginator = DictCursorPaginator(queryset, ordering=ordering)
page = paginator.page(first=first, after=after, last=last, before=before)
if isinstance(data, list):
array_paginator = ArrayPaginator(
data, first=first, last=last, after=after, before=before
)
return ArrayConnection(array_paginator)

return Connection(queryset, paginator, page)
else:
ordering = tuple(field_order(field, ordering_direction) for field in ordering)
paginator = DictCursorPaginator(data, ordering=ordering)
page = paginator.page(first=first, after=after, last=last, before=before)
return Connection(data, paginator, page)


@sync_to_async
Expand Down
90 changes: 90 additions & 0 deletions graphql_api/helpers/tests/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from django.test import TransactionTestCase
from shared.django_apps.core.tests.factories import RepositoryFactory

from codecov.commands.exceptions import ValidationError
from core.models import Repository
from graphql_api.types.enums import OrderingDirection, RepositoryOrdering

Expand Down Expand Up @@ -65,3 +66,92 @@ def test_queryset_to_connection_defers_count(self):

count = async_to_sync(connection.total_count)()
assert count == 3

def test_array_pagination_first_after(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

# Test first parameter
connection = queryset_to_connection_sync(data, first=2)
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2])
self.assertTrue(connection.page_info["has_next_page"])
self.assertFalse(connection.page_info["has_previous_page"])

# Test after parameter
connection = queryset_to_connection_sync(data, first=2, after="1")
self.assertEqual([edge["node"] for edge in connection.edges], [3, 4])
self.assertTrue(connection.page_info["has_next_page"])
self.assertTrue(connection.page_info["has_previous_page"])

def test_array_pagination_last_before(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

# Test last parameter
connection = queryset_to_connection_sync(data, last=2)
self.assertEqual([edge["node"] for edge in connection.edges], [4, 5])
self.assertFalse(connection.page_info["has_next_page"])
self.assertTrue(connection.page_info["has_previous_page"])

# Test before parameter
connection = queryset_to_connection_sync(data, last=2, before="4")
self.assertEqual([edge["node"] for edge in connection.edges], [3, 4])
self.assertTrue(connection.page_info["has_next_page"])
self.assertTrue(connection.page_info["has_previous_page"])

def test_array_pagination_edge_cases(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3]

# Empty array
connection = queryset_to_connection_sync([], first=2)
self.assertEqual(connection.edges, [])
self.assertEqual(connection.total_count, 0)

# First greater than array length
connection = queryset_to_connection_sync(data, first=5)
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2, 3])
self.assertEqual(connection.total_count, 3)

# Last greater than array length
connection = queryset_to_connection_sync(data, last=5)
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2, 3])
self.assertEqual(connection.total_count, 3)

def test_array_pagination_edge_cases_with_before_cursor_2(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

connection = queryset_to_connection_sync(data, last=3, before="3")
self.assertEqual([edge["node"] for edge in connection.edges], [1, 2, 3])

def test_array_pagination_edge_cases_with_before_and_after(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

connection = queryset_to_connection_sync(data, last=3, before="3", after="0")
self.assertEqual([edge["node"] for edge in connection.edges], [2, 3])

def test_both_first_and_last(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

with self.assertRaises(ValidationError):
queryset_to_connection_sync(data, last=3, first=2)

def test_invalid_cursors(self):
from graphql_api.helpers.connection import queryset_to_connection_sync

data = [1, 2, 3, 4, 5]

with self.assertRaises(ValidationError):
queryset_to_connection_sync(data, last=3, before="invalid")

with self.assertRaises(ValidationError):
queryset_to_connection_sync(data, first=3, after="invalid")
Loading
Loading