Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 7th No.25】为 Paddle 新增 is_coalesced -part #68334

Merged
merged 18 commits into from
Oct 28, 2024
57 changes: 57 additions & 0 deletions paddle/fluid/pybind/eager_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2735,6 +2735,59 @@ static PyObject* tensor_method_to_sparse_csr(TensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL
}

PyDoc_STRVAR(tensor_is_coalesced__doc__, // NOLINT
R"DOC(is_coalesced($self, /)
--

Check whether the Tensor is a coalesced SparseCooTensor. If not it will return False.
Any Tensor type among DenseTensor/SparseCooTensor/SparseCsrTensor are supported.

Args:
x (Tensor): The input tensor. It can be DenseTensor/SparseCooTensor/SparseCsrTensor.

Returns:
bool: True if the Tensor is a coalesced SparseCooTensor, and False otherwise.

Examples:

.. code-block:: python

>>> import paddle

>>> indices = [[0, 0, 1], [1, 1, 2]]
>>> values = [1.0, 2.0, 3.0]
>>> x = paddle.sparse.sparse_coo_tensor(indices, values)

>>> x.is_coalesced()
False
>>> x = x.coalesce()
>>> x.is_coalesced()
True

>>> x = paddle.to_tensor([[1., 2., 3.]])
>>> x.is_coalesced()
False

>>> x = x.to_sparse_csr()
>>> x.is_coalesced()
False

)DOC"); // NOLINT

static PyObject* tensor_method_is_coalesced(TensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_TRY
if (self->tensor.is_sparse_coo_tensor()) {
auto sparse_coo_tensor =
std::dynamic_pointer_cast<phi::SparseCooTensor>(self->tensor.impl());
return ToPyObject(sparse_coo_tensor->coalesced());
} else {
return ToPyObject(false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

只是一个疑问,这里传入错误类型的情况下,感觉报错比返回 False 更好?这点在原来的设计中是有考虑到的么?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个是参考 pytorch 的设计

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> x = torch.as_tensor([[1., 2., 3.]])
>>> x.is_coalesced()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
RuntimeError: is_coalesced expected sparse coordinate tensor layout but got Strided
>>> 

可是 PyTorch 貌似是报错?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的,我再改一下吧

}
EAGER_CATCH_AND_THROW_RETURN_NULL
}

PyDoc_STRVAR(tensor_is_same_shape__doc__, // NOLINT
R"DOC(is_same_shape($self, y, /)
--
Expand Down Expand Up @@ -3503,6 +3556,10 @@ PyMethodDef variable_methods[] = { // NOLINT
(PyCFunction)(void (*)())tensor_method_to_sparse_csr,
METH_VARARGS | METH_KEYWORDS,
tensor_to_sparse_csr__doc__},
{"is_coalesced",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要在 python/paddle/tensor/tensor.prototype.pyi stub 中补充新 Tensor API 类型

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的,已补充

(PyCFunction)(void (*)())tensor_method_is_coalesced,
METH_VARARGS | METH_KEYWORDS,
tensor_is_coalesced__doc__},
/***the method of sparse tensor****/
{"element_size",
(PyCFunction)(void (*)())tensor_method_element_size,
Expand Down
1 change: 1 addition & 0 deletions test/dygraph_to_static/test_tensor_attr_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
'grad_fn',
'gradient',
'inplace_version',
'is_coalesced',
'is_dense',
'is_dist',
'is_leaf',
Expand Down
267 changes: 267 additions & 0 deletions test/legacy_test/test_sparse_is_coalesced.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import paddle
import paddle.sparse
from paddle.base import core


def is_coalesced_naive(x):
if not x.is_sparse_coo():
return False
indices = x.indices().numpy()
indices = list(zip(*indices))
duplicated_len = len(indices)
remove_duplicated_len = len(set(indices))
return duplicated_len == remove_duplicated_len


def is_coalesced_naive_static(indices):
indices = list(zip(*indices))
duplicated_len = len(indices)
remove_duplicated_len = len(set(indices))
return duplicated_len == remove_duplicated_len
Copy link
Contributor

@jeff41404 jeff41404 Sep 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this function seem to be unused?

Copy link
Contributor Author

@NKNaN NKNaN Sep 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It has been removed.



class TestSparseIsCoalescedAPI(unittest.TestCase):
def setUp(self):
self.dtype = "float32"
coo_indices = [[0, 0, 0, 1], [0, 0, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]

def test_is_coalesced(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))

excepted = [is_coalesced_naive(t) for t in self.tensors]
for place in places:
paddle.disable_static(place)
for i in range(len(self.tensors)):
self.assertEqual(self.tensors[i].is_coalesced(), excepted[i])

paddle.enable_static()


class TestSparseIsCoalescedAPI1(unittest.TestCase):
def setUp(self):
self.dtype = "float64"
coo_indices = [[0, 0, 1, 2], [0, 1, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI2(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 1, 2], [0, 1, 1, 2], [0, 1, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
self.dtype = "int8"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI3(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 1, 2], [0, 2, 0, 2], [0, 1, 1, 0]]
coo_values = [1.0, 2.0, 3.0, 4.0]
self.dtype = "int16"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
).coalesce()
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI4(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 0, 1], [0, 0, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
self.dtype = "int32"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
).coalesce()
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI5(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 0, 1], [0, 0, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
self.dtype = "int64"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI6(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 0, 1], [0, 0, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
self.dtype = "uint8"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI7(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 1, 2], [0, 1, 1, 2], [0, 1, 1, 2]]
coo_values = [1.0, 0.0, 0.0, 1.0]
self.dtype = "bool"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 0, 1, 0, 0]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI8(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 1, 2], [0, 1, 1, 2], [0, 1, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
self.dtype = "complex64"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


class TestSparseIsCoalescedAPI9(unittest.TestCase):
def setUp(self):
coo_indices = [[0, 0, 1, 2], [0, 1, 1, 2], [1, 0, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
self.dtype = "complex128"
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
)
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_float16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA and not support the float16",
)
class TestSparseIsCoalescedFP16API(unittest.TestCase):
def setUp(self):
self.dtype = "float16"
coo_indices = [[0, 0, 0, 1], [0, 0, 1, 2]]
coo_values = [1.0, 2.0, 3.0, 4.0]
coo_tenosr = paddle.sparse.sparse_coo_tensor(
coo_indices, coo_values, dtype=self.dtype
).coalesce()
csr_crows = [0, 2, 3, 5]
csr_cols = [1, 3, 2, 0, 1]
csr_values = [1, 2, 3, 4, 5]
csr_shape = [3, 4]
csr_tensor = paddle.sparse.sparse_csr_tensor(
csr_crows, csr_cols, csr_values, csr_shape, dtype=self.dtype
)
other_tensor = paddle.to_tensor([1, 2, 3, 4], dtype=self.dtype)
self.tensors = [coo_tenosr, csr_tensor, other_tensor]


if __name__ == "__main__":
unittest.main()