Skip to content

Commit

Permalink
Add unique functions to algorithms.nim (#645)
Browse files Browse the repository at this point in the history
These functions are similar to but not as fully featured as `numpy.unique`.
They are missing a way to count the number or returning the indexes of the unique elements.
However, they make it possibel to (optionally) sort the output, or to use a more efficient algorithm if the input is already sorted.
  • Loading branch information
AngelEzquerra authored Apr 17, 2024
1 parent 05ae049 commit 1448698
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
77 changes: 77 additions & 0 deletions src/arraymancer/tensor/algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,80 @@ proc argsort*[T](t: Tensor[T], order = SortOrder.Ascending, toCopy = false): Ten
result = newTensorUninit[int](t.shape)
for i in 0 ..< t.size:
result[i] = tups[i][1]

proc unique*[T](t: Tensor[T], isSorted=false): Tensor[T] =
## Return a new Tensor with the unique elements of the input Tensor in the order they first appear
##
## Note that this is the *"unsorted"* version of this procedure which returns
## the unique values in the order in which they first appear on the input.
## Do not get confused by the `isSorted` argument which is not used to sort
## the output, but to make the algorithm more efficient when the input tensor
## is already sorted.
##
## There is another version of this procedure which gets an `order` argument
## that let's you sort the output (in ascending or descending order).
##
## Inputs:
## - t: The input Tensor
## - isSorted: Set this to `true` if the input tensor is already sorted,
## in order to use a more efficient algorithm for finding the
## unique of the input Tensor. Be careful however when using
## this option, since if the input tensor is not really sorted,
## the output will be wrong.
##
## Result:
## - A new Tensor with the unique elements of the input Tensor in the order
## in which they first appear on the input Tensor.
##
## Examples:
## ```nim
## let
## dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
## assert dup.unique == [1, 3, 2, 4, 8].toTensor
##
## # Use `isSorted = true` only if the input tensor is already sorted
## assert dup.sorted.unique(isSorted = true) == [1, 3, 2, 4, 8].toTensor
## ```

if t.is_C_contiguous:
# Note that since deduplicate returns a new sequence, it is safe to apply it
# to a view of the raw data of the input tensor
toOpenArray(t.toUnsafeView, 0, t.size - 1).deduplicate(isSorted = isSorted).toTensor
else:
# Clone the tensor in order to make it C continuous and then make it unique
unique(t.clone(), isSorted = isSorted)

proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] =
## Return a new sorted Tensor with the unique elements of the input Tensor
##
## Note that this is the "sorted" version of this procedure. There is
## another version which doesn't get a `sort` argument that returns the
## unique elements int he order in which they first appear ont he input.
##
## Inputs:
## - t: The input Tensor
## - order: The order in which elements are sorted (`SortOrder.Ascending` or `SortOrder.Descending`)
##
## Result:
## - A new Tensor with the unique elements of the input Tensor sorted in the specified order.
##
## Examples:
## ```nim
## let
## dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
## unique_ascending_sort = dup.unique(order = SortOrder.Ascending)
## unique_descending_sort = dup.unique(order = SortOrder.Descending)
## assert unique_ascending_sort == [1, 2, 3, 4, 8].toTensor
## assert unique_descending_sort == [8, 4, 3, 2, 1].toTensor
## ```

if t.is_C_contiguous:
# Note that since sorted returns a new sequence, it is safe to apply it
# to a view of the raw data of the input tensor
sorted(toOpenArray(t.toUnsafeView, 0, t.size - 1),
order = order)
.deduplicate(isSorted = true).toTensor
else:
# We need to clone the tensor in order to make it C continuous
# and then we can make it unique assuming that it is already sorted
sorted(t, order = order).unique(isSorted = true)
21 changes: 21 additions & 0 deletions tests/tensor/test_algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,24 @@ suite "[Core] Testing algorithm functions":
let idxSorted = t.argsort(order = SortOrder.Descending)
check idxSorted == exp
check t[idxSorted] == @[7, 4, 3, 2, 1].toTensor()

test "Unique":
block:
let
dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
unique_unsorted = dup.unique
unique_presorted_ascending = sorted(dup.unique).unique(isSorted = true)
unique_presorted_descending = sorted(dup.unique, order = SortOrder.Descending).unique(isSorted = true)
unique_sorted_ascending = dup.unique(order = SortOrder.Ascending)
unique_sorted_descending = dup.unique(order = SortOrder.Descending)
dup_not_C_continuous = dup[_ | 2]
unique_not_c_continuous = dup_not_C_continuous.unique
unique_sorted_not_c_continuous = dup_not_C_continuous.unique(order = SortOrder.Descending)

check unique_unsorted == [1, 3, 2, 4, 8].toTensor
check unique_presorted_ascending == [1, 2, 3, 4, 8].toTensor
check unique_presorted_descending == [8, 4, 3, 2, 1].toTensor
check unique_sorted_ascending == [1, 2, 3, 4, 8].toTensor
check unique_sorted_descending == [8, 4, 3, 2, 1].toTensor
check unique_not_c_continuous == [1, 2, 4].toTensor
check unique_sorted_not_c_continuous == [4, 2, 1].toTensor

0 comments on commit 1448698

Please sign in to comment.