-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
【PaddlePaddle Hackathon 2】3、为 Paddle 新增 corrcoef(皮尔逊积矩相关系数) API #40690
Changes from all commits
001a5f1
45a53eb
d2aa0fb
bb5c04d
6d88d9a
2c1cfe8
1cf83e8
2d654fd
e5b66e9
6efaca6
53ee671
9a74568
fe98fcd
f39ad07
4ee155e
794e368
02c9ef9
af6b514
f90d599
3d8e0b0
1991f92
760b858
694b895
510c6e7
74969a4
677ba6f
6b8e3d3
87ba181
49227fb
84c65a7
a80e7a5
d63e2dc
3c1dd13
9b66604
46b9021
299d4c0
937a4fe
f631db9
dde2566
f59b0ef
7eddd43
67be88e
607eb71
552c07b
ae572c4
3652bbd
18339f5
decb986
189a29f
69064d2
7c3b09d
7c5efcf
e97d91c
4fca073
51da5d6
2255acf
9cbac57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import paddle.fluid as fluid | ||
import unittest | ||
import numpy as np | ||
import six | ||
import paddle | ||
import warnings | ||
|
||
|
||
def numpy_corr(np_arr, rowvar=True, dtype='float64'): | ||
return np.corrcoef(np_arr, rowvar=rowvar, dtype=dtype) | ||
|
||
|
||
class Corr_Test(unittest.TestCase): | ||
def setUp(self): | ||
self.shape = [4, 5] | ||
|
||
def test_tensor_corr_default(self): | ||
typelist = ['float64', 'float32'] | ||
places = [fluid.CPUPlace()] | ||
if fluid.core.is_compiled_with_cuda(): | ||
places.append(fluid.CUDAPlace(0)) | ||
for idx, p in enumerate(places): | ||
if idx == 0: | ||
paddle.set_device('cpu') | ||
else: | ||
paddle.set_device('gpu') | ||
|
||
for dtype in typelist: | ||
np_arr = np.random.rand(*self.shape).astype(dtype) | ||
tensor = paddle.to_tensor(np_arr, place=p) | ||
corr = paddle.linalg.corrcoef(tensor) | ||
np_corr = numpy_corr(np_arr, rowvar=True, dtype=dtype) | ||
if dtype == 'float32': | ||
self.assertTrue( | ||
np.allclose( | ||
np_corr, corr.numpy(), atol=1.e-5)) | ||
else: | ||
self.assertTrue(np.allclose(np_corr, corr.numpy())) | ||
|
||
def test_tensor_corr_rowvar(self): | ||
typelist = ['float64', 'float32'] | ||
places = [fluid.CPUPlace()] | ||
if fluid.core.is_compiled_with_cuda(): | ||
places.append(fluid.CUDAPlace(0)) | ||
|
||
for idx, p in enumerate(places): | ||
if idx == 0: | ||
paddle.set_device('cpu') | ||
else: | ||
paddle.set_device('gpu') | ||
|
||
for dtype in typelist: | ||
np_arr = np.random.rand(*self.shape).astype(dtype) | ||
tensor = paddle.to_tensor(np_arr, place=p) | ||
corr = paddle.linalg.corrcoef(tensor, rowvar=False) | ||
np_corr = numpy_corr(np_arr, rowvar=False, dtype=dtype) | ||
if dtype == 'float32': | ||
self.assertTrue( | ||
np.allclose( | ||
np_corr, corr.numpy(), atol=1.e-5)) | ||
else: | ||
self.assertTrue(np.allclose(np_corr, corr.numpy())) | ||
|
||
|
||
# Input(x) only support N-D (1<=N<=2) tensor | ||
class Corr_Test2(Corr_Test): | ||
def setUp(self): | ||
self.shape = [10] | ||
|
||
|
||
class Corr_Test3(Corr_Test): | ||
def setUp(self): | ||
self.shape = [4, 5] | ||
|
||
|
||
# Input(x) only support N-D (1<=N<=2) tensor | ||
class Corr_Test4(unittest.TestCase): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 每个test类添加一下注释, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
我改好啦,麻烦看一下哦 |
||
def setUp(self): | ||
self.shape = [2, 5, 2] | ||
|
||
def test_errors(self): | ||
def test_err(): | ||
np_arr = np.random.rand(*self.shape).astype('float64') | ||
tensor = paddle.to_tensor(np_arr) | ||
covrr = paddle.linalg.corrcoef(tensor) | ||
|
||
self.assertRaises(ValueError, test_err) | ||
|
||
|
||
# test unsupported complex input | ||
class Corr_Comeplex_Test(unittest.TestCase): | ||
def setUp(self): | ||
self.dtype = 'complex128' | ||
|
||
def test_errors(self): | ||
paddle.enable_static() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 静态图可否写到基类中 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
可以的 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 不过fp32,我测试了下cov的,补充以后cov的test不能通过,这个是在cov的基础上写的,所以可能不太方便。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. api测试需要覆盖所支持的数据类型,可以调节allclose精度 |
||
x1 = fluid.data(name=self.dtype, shape=[2], dtype=self.dtype) | ||
self.assertRaises(TypeError, paddle.linalg.corrcoef, x=x1) | ||
paddle.disable_static() | ||
|
||
|
||
class Corr_Test5(Corr_Comeplex_Test): | ||
def setUp(self): | ||
self.dtype = 'complex64' | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
from .creation import full | ||
|
||
import paddle | ||
import warnings | ||
from paddle.common_ops_import import core | ||
from paddle.common_ops_import import VarDesc | ||
from paddle import _C_ops | ||
|
@@ -3181,3 +3182,72 @@ def lstsq(x, y, rcond=None, driver=None, name=None): | |
singular_values = paddle.static.data(name='singular_values', shape=[0]) | ||
|
||
return solution, residuals, rank, singular_values | ||
|
||
|
||
def corrcoef(x, rowvar=True, name=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 接口跟numpy比较,缺少了y参数,缺少的原因是什么?后续会添加y参数吗? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
这个是由于计算corrcoef首先要计算cov,paddle的cov在编写的时候对比numpy也是没有y参数,后续需要看cov是否添加y参数。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
""" | ||
|
||
A correlation coefficient matrix indicate the correlation of each pair variables in the input matrix. | ||
For example, for an N-dimensional samples X=[x1,x2,…xN]T, then the correlation coefficient matrix | ||
element Rij is the correlation of xi and xj. The element Rii is the covariance of xi itself. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 中英文文档不统一?需要保证内容一致,且不可以照抄numpy哦~ |
||
|
||
The relationship between the correlation coefficient matrix `R` and the | ||
covariance matrix `C`, is | ||
|
||
.. math:: R_{ij} = \\frac{ C_{ij} } { \\sqrt{ C_{ii} * C_{jj} } } | ||
|
||
The values of `R` are between -1 and 1. | ||
|
||
Parameters: | ||
|
||
x(Tensor): A N-D(N<=2) Tensor containing multiple variables and observations. By default, each row of x represents a variable. Also see rowvar below. | ||
rowvar(Bool, optional): If rowvar is True (default), then each row represents a variable, with observations in the columns. Default: True. | ||
name(str, optional): Name of the output. Default is None. It's used to print debug info for developers. Details: :ref:`api_guide_Name`. | ||
|
||
Returns: | ||
|
||
The correlation coefficient matrix of the variables. | ||
|
||
Examples: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
.. code-block:: python | ||
:name: code-example1 | ||
|
||
import paddle | ||
|
||
xt = paddle.rand((3,4)) | ||
print(paddle.linalg.corrcoef(xt)) | ||
|
||
# Tensor(shape=[3, 3], dtype=float32, place=Place(cpu), stop_gradient=True, | ||
# [[ 1. , -0.73702252, 0.66228950], | ||
# [-0.73702258, 1. , -0.77104872], | ||
# [ 0.66228974, -0.77104825, 1. ]]) | ||
|
||
""" | ||
if len(x.shape) > 2 or len(x.shape) < 1: | ||
raise ValueError( | ||
"Input(x) only support N-D (1<=N<=2) tensor in corrcoef, but received " | ||
"length of Input(input) is %s." % len(x.shape)) | ||
check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'corrcoef') | ||
|
||
c = cov(x, rowvar) | ||
if (c.ndim == 0): | ||
# scalar covariance | ||
# nan if incorrect value (nan, inf, 0), 1 otherwise | ||
return c / c | ||
|
||
d = paddle.diag(c) | ||
|
||
if paddle.is_complex(d): | ||
d = d.real() | ||
stddev = paddle.sqrt(d) | ||
c /= stddev[:, None] | ||
c /= stddev[None, :] | ||
|
||
# Clip to [-1, 1]. This does not guarantee | ||
if paddle.is_complex(c): | ||
return paddle.complex( | ||
paddle.clip(c.real(), -1, 1), paddle.clip(c.imag(), -1, 1)) | ||
else: | ||
c = paddle.clip(c, -1, 1) | ||
|
||
return c |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
还是过不了,float32精度是要差一些,就测float64精度吧,你找找最小的atol
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
好的好的 我调整一下