From 7d2e856ec0d1af7fb5b0cbe561574cab7ebb6db9 Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Thu, 14 Sep 2023 02:52:59 +0800 Subject: [PATCH 01/13] add copysign rfc --- rfcs/APIs/20230914_api_design_for_copysign.md | 257 ++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 rfcs/APIs/20230914_api_design_for_copysign.md diff --git a/rfcs/APIs/20230914_api_design_for_copysign.md b/rfcs/APIs/20230914_api_design_for_copysign.md new file mode 100644 index 000000000..d320f5efc --- /dev/null +++ b/rfcs/APIs/20230914_api_design_for_copysign.md @@ -0,0 +1,257 @@ +# paddle.copysign 设计文档 + +| API 名称 | paddle.copysign | +| ------------ | -------------------------------- | +| 提交作者 | coco | +| 提交时间 | 2023-09-14 | +| 版本号 | V1.0 | +| 依赖飞桨版本 | develop | +| 文件名 | 20230914_api_defign_for_copysign | + +# 一、概述 + +## 1、相关背景 + +为了提升飞桨API丰富度,Paddle需要扩充API,调用路径为: + +- paddle.copysign 作为独立的函数调用,非 inplace +- paddle.copysign_,作为独立的函数,inplace 地修改输入; +- Tensor.copysign做为 Tensor 的方法使用,非 inplace; +- Tensor.copysign_做为 Tensor 的方法使用, inplace 修改输入; + +## 2、功能目标 + +根据两个输入逐元素地计算结果张量,其结果由第一个输入的绝对值大小及第二个输入的符号组成。 + +## 3、意义 + +飞桨支持直接通过张量进行批量正负符号复制 + +# 二、飞桨现状 + +目前paddle缺少相关功能实现。 + +# 三、业内方案调研 + +## PyTorch + +PyTorch中有API `torch.copysign(input, other, *, out=None) → [Tensor]` 以及对应的`torch.Tensor.copysign` + +在PyTorch中介绍为: + +``` +Create a new floating-point tensor with the magnitude of input and the sign of other, elementwise. + +Supports broadcasting to a common shape, and integer and float inputs. +``` + +## 实现方法 + +从实现方法上,PyTorch是通过c++实现的,[CPU kernel代码位置](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp#L1148-L1158) + +```cpp +void copysign_kernel(TensorIteratorBase& iter) { + AT_DISPATCH_FLOATING_TYPES_AND2(kBFloat16, kHalf, iter.common_dtype(), "copysign_cpu", [&]() { + cpu_kernel_vec(iter, + [](scalar_t a, scalar_t b) -> scalar_t { + return c10::copysign(a, b); + }, + [](Vectorized a, Vectorized b) -> Vectorized { + return a.copysign(b); + }); + }); +} +``` + +在c10 namespace中,[代码位置](https://github.com/pytorch/pytorch/blob/main/c10/util/copysign.h#L12-L15): + +```cpp +namespace c10 { + +// Note: Explicit implementation of copysign for Half and BFloat16 +// is needed to workaround g++-7/8 crash on aarch64, but also makes +// copysign faster for the half-precision types +template +inline auto copysign(const T& a, const U& b) { + return std::copysign(a, b); +} +... +} // namespace c10 +``` + +[cuda kernel代码位置](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/cuda/CopysignKernel.cu#L23-L29) + +```cpp +namespace at::native { + +void copysign_kernel_cuda(TensorIteratorBase& iter) { + AT_DISPATCH_FLOATING_TYPES_AND2(kBFloat16, kHalf, iter.common_dtype(), "copysign_cuda", [&]() { + gpu_kernel_with_scalars(iter, []GPU_LAMBDA(scalar_t a, scalar_t b) -> scalar_t { + return c10::cuda::compat::copysign(a, b); + }); + }); +} + +REGISTER_DISPATCH(copysign_stub, ©sign_kernel_cuda); + +} // namespace at::native +``` + +namespace中的`copysign`调用,[代码位置](https://github.com/pytorch/pytorch/blob/main/c10/cuda/CUDAMathCompat.h#L46-L65) + +```cpp +__MATH_FUNCTIONS_DECL__ float copysign(float x, float y) { +#if defined(__CUDA_ARCH__) || defined(__HIPCC__) + return ::copysignf(x, y); +#else + // std::copysign gets ICE/Segfaults with gcc 7.5/8 on arm64 + // (e.g. Jetson), see PyTorch PR #51834 + // This host function needs to be here for the compiler but is never used + TORCH_INTERNAL_ASSERT( + false, "CUDAMathCompat copysign should not run on the CPU"); +#endif +} +__MATH_FUNCTIONS_DECL__ double copysign(double x, double y) { +#if defined(__CUDA_ARCH__) || defined(__HIPCC__) + return ::copysign(x, y); +#else + // see above + TORCH_INTERNAL_ASSERT( + false, "CUDAMathCompat copysign should not run on the CPU"); +#endif +} +``` + +方法都是底层cpp调用copysign函数 + + + +**反向backward:** + +算子配置[代码位置](https://github.com/pytorch/pytorch/blob/main/tools/autograd/derivatives.yaml#L474-L481C28) + +```yaml +- name: copysign.Tensor(Tensor self, Tensor other) -> Tensor + self: copysign_tensor_self_backward(grad, self, result) + other: zeros_like(other) + result: copysign_tensor_self_backward(self_t, self_p, result) + +- name: copysign.Scalar(Tensor self, Scalar other) -> Tensor + self: copysign_tensor_self_backward(grad, self, result) + result: auto_element_wise +``` + +backward 反向[代码位置](https://github.com/pytorch/pytorch/blob/main/torch/csrc/autograd/FunctionsManual.cpp#L94-L101) + +```cpp +Tensor copysign_tensor_self_backward( + const Tensor& grad, + const Tensor& self, + const Tensor& result) { + auto ratio = result / self; + ratio.masked_fill_(self == 0, 0); + return grad * ratio; +} +``` + +## TensorFlow + +无`copysign`实现 + +## Numpy + +numpy.**copysign**(*x1*, *x2*, */*, *out=None*, ***, *where=True*, *casting='same_kind'*, *order='K'*, *dtype=None*, *subok=True*[, *signature*, *extobj*]) *= * + +Change the sign of x1 to that of x2, element-wise.If *x2* is a scalar, its sign will be copied to all elements of *x1*. + +### 实现方法 + +先模板生成函数,底层cpp调用实现[代码位置](https://github.com/numpy/numpy/blob/main/numpy/core/src/umath/loops.c.src#L1213-L1221) + +``` +NPY_NO_EXPORT void +@TYPE@_copysign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) +{ + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1)= npy_copysign@c@(in1, in2); + } +} +``` + +实际调用cpp的math库[代码位置](https://github.com/numpy/numpy/blob/main/numpy/core/include/numpy/npy_math.h#L199) + +```cpp +#include + +... +#define npy_copysign copysign +... +``` + + + +# 四、对比分析 + +PyTorch和Numpy实现方式基本一致,都是底层调用cpp的math库实现`copysign`,PyTorch可进行backward。 + +# 五、设计思路与实现方案 + +## 命名与参数设计 + +API的设计为: + +- paddle.copysign(x, y) 作为独立的函数调用,非 inplace +- paddle.copysign_(x, y),作为独立的函数,inplace 地修改输入; +- Tensor.copysign(y)做为 Tensor 的方法使用,非 inplace; +- Tensor.copysign_(y)做为 Tensor 的方法使用, inplace 修改输入; + +其中 + ++ x(Tensor) - 需要取用绝对值作为输出数值部分的Tensor ++ y(Tensor, int, float 等 number) + +## 底层OP设计 + +参考PyTorch与Numpy中的设计,调用底层cpp实现OP + +## API实现方案 + +1. 配置算子的yaml,注意配置inplace +2. 实现`CopySignInferMeta`,在调用kernel之前计算好`out`的`shape`和`dtype` +3. 实现`CopySignKernel`的CPU和GPU代码以及forward、backward +4. 封装Python的API,支持动态图和静态图,编写文档 +5. 编写单测 + +# 六、测试和验收的考量 + +测试考虑的case如下: + ++ **编程范式场景**:常规覆盖动态图和静态图的测试场景 + ++ **硬件场景**:常规需覆盖 CPU、GPU 两种测试场景 ++ **参数组合场景**:常规覆盖 API 的全部入参,需要对全部入参进行参数有效性和边界值测试,同时可选参数也需有相应的测试覆盖 ++ **计算精度**:需要保证前向计算、反向计算的精度正确性 + + 前向计算:通过 numpy 实现的函数的对比结果 + + 反向计算:通过 numpy 推导,计算反向结果的正确性 ++ **维度测试**:Paddle API 支持的最低维度为 0 维,单测中应编写相应的 0 维尺寸测试 case ++ **边界测试**:y为0、+0、-0时,测试与numpy结果的一致性 + +# 七、可行性分析及规划排期 + +有业内方案实现作为参考,工期上可以满足在当前版本周期内开发完成。 + +# 八、影响面 + +为独立新增API,对其他模块没有影响 + +# 名词解释 + +无 + +# 附件及参考资料 + +[PyTorch文档](https://pytorch.org/docs/stable/generated/torch.copysign.html?highlight=copysign#torch.copysign) + +[Numpy文档](https://numpy.org/doc/stable/reference/generated/numpy.copysign.html#numpy-copysign) \ No newline at end of file From 4c0022db6fef9cd3c0029f630a265bc9ac64caed Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Fri, 15 Sep 2023 12:02:14 +0800 Subject: [PATCH 02/13] fix input args, add backward kernel, fix python api --- rfcs/APIs/20230914_api_design_for_copysign.md | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/rfcs/APIs/20230914_api_design_for_copysign.md b/rfcs/APIs/20230914_api_design_for_copysign.md index d320f5efc..68a47cb3e 100644 --- a/rfcs/APIs/20230914_api_design_for_copysign.md +++ b/rfcs/APIs/20230914_api_design_for_copysign.md @@ -202,19 +202,55 @@ PyTorch和Numpy实现方式基本一致,都是底层调用cpp的math库实现` API的设计为: -- paddle.copysign(x, y) 作为独立的函数调用,非 inplace -- paddle.copysign_(x, y),作为独立的函数,inplace 地修改输入; -- Tensor.copysign(y)做为 Tensor 的方法使用,非 inplace; -- Tensor.copysign_(y)做为 Tensor 的方法使用, inplace 修改输入; +- paddle.copysign(x, y, name=None) 作为独立的函数调用,非 inplace; +- paddle.copysign_(x, y, name=None),作为独立的函数,inplace 地修改输入; +- Tensor.copysign(y, name=None)做为 Tensor 的方法使用,非 inplace; +- Tensor.copysign_(y, name=None)做为 Tensor 的方法使用, inplace 修改输入; 其中 -+ x(Tensor) - 需要取用绝对值作为输出数值部分的Tensor -+ y(Tensor, int, float 等 number) ++ x(Tensor) - 需要取用绝对值作为输出数值部分的 Tensor , 支持 `int32`、`int64`、`float32`、`float64` ++ y(Tensor | Number) - 为 Tensor 时,shape 需要与 x 相同,或者可广播成 x.shape;为 Number 时,支持 `int32`、`int64`、`float32`、`float64` ## 底层OP设计 -参考PyTorch与Numpy中的设计,调用底层cpp实现OP +参考PyTorch与Numpy中的设计,调用底层cpp实现OP,反向 kernel impl 大致如下: + +```cpp +template +struct CopySignGradFunctor { + CopySignGradFunctor(const T* x_data, const T* y_data, const T* dout, T* dx, int64_t numel) + : x_data_(x_data), y_data_(y_data), dout_(dout), dx_(dx), numel_(numel) {} + + // backward 逻辑如下 + HOSTDEVICE void operator()(int64_t idx) const { + if (x_data_[idx] == T(0)) dx_[idx] = T(0); + else dx_[idx] = T(dout_[idx]) * (T(std::copysign(x_data_[idx], y_data_[idx]) / x_data_[idx])); + } + + const T* x_data_; + const T* y_data_; + const T* dout_; + T* dx_; + int64_t numel_; +}; + +template +void CopySignGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + dev_ctx.template Alloc(x_grad); + auto x_data = x.data(), y_data = y.data(), out_grad_data = out_grad.data(); + auto x_grad_data = x_grad->data(); + phi::funcs::ForRange for_range(dev_ctx, x.numel()); + phi::CopySignGradFunctor functor(x_data, y_data, out_grad_data, x_grad_data, x.numel()); + for_range(functor); +} +``` + + ## API实现方案 From 5fff675576d71b8f17f07f1cad7bbd28a9c0412e Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Fri, 15 Sep 2023 14:37:32 +0800 Subject: [PATCH 03/13] fix types --- rfcs/APIs/20230914_api_design_for_copysign.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rfcs/APIs/20230914_api_design_for_copysign.md b/rfcs/APIs/20230914_api_design_for_copysign.md index 68a47cb3e..742299e43 100644 --- a/rfcs/APIs/20230914_api_design_for_copysign.md +++ b/rfcs/APIs/20230914_api_design_for_copysign.md @@ -209,8 +209,8 @@ API的设计为: 其中 -+ x(Tensor) - 需要取用绝对值作为输出数值部分的 Tensor , 支持 `int32`、`int64`、`float32`、`float64` -+ y(Tensor | Number) - 为 Tensor 时,shape 需要与 x 相同,或者可广播成 x.shape;为 Number 时,支持 `int32`、`int64`、`float32`、`float64` ++ x(Tensor) - 需要取用绝对值作为输出数值部分的 Tensor , 支持 `bool`、`float16`、`float32`、`float64`、`uint8`、`int8`、`int16`、`int32`、`int64`、`bfloat16` ++ y(Tensor | Number) - 为 Tensor 时,shape 需要与 x 相同,或者可广播成 x.shape;为 Number 时,支持 `bool`、`float16`、`float32`、`float64`、`uint8`、`int8`、`int16`、`int32`、`int64`、`bfloat16` ## 底层OP设计 From bfa46fec6518b3784294cfd2ad6faaeb3c814f1e Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Fri, 15 Sep 2023 14:56:00 +0800 Subject: [PATCH 04/13] fix Number types --- rfcs/APIs/20230914_api_design_for_copysign.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rfcs/APIs/20230914_api_design_for_copysign.md b/rfcs/APIs/20230914_api_design_for_copysign.md index 742299e43..f2a816d4d 100644 --- a/rfcs/APIs/20230914_api_design_for_copysign.md +++ b/rfcs/APIs/20230914_api_design_for_copysign.md @@ -210,7 +210,7 @@ API的设计为: 其中 + x(Tensor) - 需要取用绝对值作为输出数值部分的 Tensor , 支持 `bool`、`float16`、`float32`、`float64`、`uint8`、`int8`、`int16`、`int32`、`int64`、`bfloat16` -+ y(Tensor | Number) - 为 Tensor 时,shape 需要与 x 相同,或者可广播成 x.shape;为 Number 时,支持 `bool`、`float16`、`float32`、`float64`、`uint8`、`int8`、`int16`、`int32`、`int64`、`bfloat16` ++ y(Tensor | Number) - 为 Tensor 时,shape 需要与 x 相同,或者可广播成 x.shape,支持 `bool`、`float16`、`float32`、`float64`、`uint8`、`int8`、`int16`、`int32`、`int64`、`bfloat16`;为 Number 时,支持 `bool`、`int`、`float` ## 底层OP设计 From 72fbcfc9ede2814b78780372175eaf35402187b9 Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Wed, 27 Sep 2023 06:40:36 +0800 Subject: [PATCH 05/13] add pdist api design --- rfcs/20230926_api_design_for_pdist.md | 409 ++++++++++++++++++++++++++ 1 file changed, 409 insertions(+) create mode 100644 rfcs/20230926_api_design_for_pdist.md diff --git a/rfcs/20230926_api_design_for_pdist.md b/rfcs/20230926_api_design_for_pdist.md new file mode 100644 index 000000000..4ffe30875 --- /dev/null +++ b/rfcs/20230926_api_design_for_pdist.md @@ -0,0 +1,409 @@ +# paddle.pdist设计文档 + +| API 名称 | paddle.pdist | +| ------------ | ----------------------------- | +| 提交作者 | coco | +| 提交时间 | 2023-09-26 | +| 版本号 | V1.0 | +| 依赖飞桨版本 | develop | +| 文件名 | 20230926_api_defign_for_pdist | + +# 一、概述 + +## 1、相关背景 + +为paddle新增该API,为计算N个向量两两之间的p-norm距离。 + +## 2、功能目标 + +一个矩阵`A`的大小为`MxN`,那么`B=pdist(A)`得到的矩阵B的大小为1行`M*(M-1)/2`列,表示的意义是M行数据,每两行计算一下p-norm距离,默认欧式距离。例如a = [[0.0, 1.0],[2.0,3.0],[4.0,5.0],[6.0,7.0]],输出为[2.8284, 5.6569, 8.4853, 2.8284, 5.6569, 2.8284]。输出顺序为distance(第一行,第二行), distance(第一行,第三行), ... distance(第二行,第三行)... + +## 3、意义 + +飞桨支持直接两两计算向量间的距离。 + +# 二、飞桨现状 + +目前paddle缺少相关功能实现。 + +# 三、业内方案调研 + +## Scipy + +Scipy中有API`scipy.spatial.distance.pdist` + +在Scipy中介绍为: + +``` +Pairwise distances between observations in n-dimensional space. +``` + +## 实现方法 + +从实现方法上,Scipy是通过py实现的,[代码位置](https://github.com/scipy/scipy/blob/v1.11.2/scipy/spatial/distance.py#L2195-L2233) + +```python + X = _asarray_validated(X, sparse_ok=False, objects_ok=True, mask_ok=True, + check_finite=False) + + s = X.shape + if len(s) != 2: + raise ValueError('A 2-dimensional array must be passed.') + + m, n = s + + if callable(metric): + mstr = getattr(metric, '__name__', 'UnknownCustomMetric') + metric_info = _METRIC_ALIAS.get(mstr, None) + + if metric_info is not None: + X, typ, kwargs = _validate_pdist_input( + X, m, n, metric_info, **kwargs) + + return _pdist_callable(X, metric=metric, out=out, **kwargs) + elif isinstance(metric, str): + mstr = metric.lower() + metric_info = _METRIC_ALIAS.get(mstr, None) + + if metric_info is not None: + pdist_fn = metric_info.pdist_func + _extra_windows_error_checks(X, out, (m * (m - 1) / 2,), **kwargs) + return pdist_fn(X, out=out, **kwargs) + elif mstr.startswith("test_"): + metric_info = _TEST_METRICS.get(mstr, None) + if metric_info is None: + raise ValueError(f'Unknown "Test" Distance Metric: {mstr[5:]}') + X, typ, kwargs = _validate_pdist_input( + X, m, n, metric_info, **kwargs) + return _pdist_callable( + X, metric=metric_info.dist_func, out=out, **kwargs) + else: + raise ValueError('Unknown Distance Metric: %s' % mstr) + else: + raise TypeError('2nd argument metric must be a string identifier ' + 'or a function.') +``` + +先找到`mertric`对应的函数,然后call调用,例如`metric`为`euclidean`时,调用`euclidean`的函数。[代码位置](https://github.com/scipy/scipy/blob/v1.11.2/scipy/spatial/distance.py#L1781C1-L1787C7) + + + +```python + MetricInfo( + canonical_name='euclidean', + aka={'euclidean', 'euclid', 'eu', 'e'}, + dist_func=euclidean, + cdist_func=_distance_pybind.cdist_euclidean, + pdist_func=_distance_pybind.pdist_euclidean, + ), +``` + +[euclidean调用minkowski](https://github.com/scipy/scipy/blob/v1.11.2/scipy/spatial/distance.py#L500-L536)和[minkowski实现](https://github.com/scipy/scipy/blob/v1.11.2/scipy/spatial/distance.py#L429-L497) + +```python +def euclidean(u, v, w=None): + return minkowski(u, v, p=2, w=w) + + +def minkowski(u, v, p=2, w=None): + u = _validate_vector(u) + v = _validate_vector(v) + if p <= 0: + raise ValueError("p must be greater than 0") + u_v = u - v + if w is not None: + w = _validate_weights(w) + if p == 1: + root_w = w + elif p == 2: + # better precision and speed + root_w = np.sqrt(w) + elif p == np.inf: + root_w = (w != 0) + else: + root_w = np.power(w, 1/p) + u_v = root_w * u_v + dist = norm(u_v, ord=p) + return dist +``` + +主要是调用`norm`实现计算 + +```python +def norm(x, ord=None, axis=None): + if not issparse(x): + raise TypeError("input is not sparse. use numpy.linalg.norm") + + # Check the default case first and handle it immediately. + if axis is None and ord in (None, 'fro', 'f'): + return _sparse_frobenius_norm(x) + + # Some norms require functions that are not implemented for all types. + x = x.tocsr() + + if axis is None: + axis = (0, 1) + elif not isinstance(axis, tuple): + msg = "'axis' must be None, an integer or a tuple of integers" + try: + int_axis = int(axis) + except TypeError as e: + raise TypeError(msg) from e + if axis != int_axis: + raise TypeError(msg) + axis = (int_axis,) + + nd = 2 + if len(axis) == 2: + row_axis, col_axis = axis + if not (-nd <= row_axis < nd and -nd <= col_axis < nd): + raise ValueError('Invalid axis %r for an array with shape %r' % + (axis, x.shape)) + if row_axis % nd == col_axis % nd: + raise ValueError('Duplicate axes given.') + if ord == 2: + # Only solver="lobpcg" supports all numpy dtypes + _, s, _ = svds(x, k=1, solver="lobpcg") + return s[0] + elif ord == -2: + raise NotImplementedError + #return _multi_svd_norm(x, row_axis, col_axis, amin) + elif ord == 1: + return abs(x).sum(axis=row_axis).max(axis=col_axis)[0,0] + elif ord == np.inf: + return abs(x).sum(axis=col_axis).max(axis=row_axis)[0,0] + elif ord == -1: + return abs(x).sum(axis=row_axis).min(axis=col_axis)[0,0] + elif ord == -np.inf: + return abs(x).sum(axis=col_axis).min(axis=row_axis)[0,0] + elif ord in (None, 'f', 'fro'): + # The axis order does not matter for this norm. + return _sparse_frobenius_norm(x) + else: + raise ValueError("Invalid norm order for matrices.") + elif len(axis) == 1: + a, = axis + if not (-nd <= a < nd): + raise ValueError('Invalid axis %r for an array with shape %r' % + (axis, x.shape)) + if ord == np.inf: + M = abs(x).max(axis=a) + elif ord == -np.inf: + M = abs(x).min(axis=a) + elif ord == 0: + # Zero norm + M = (x != 0).sum(axis=a) + elif ord == 1: + # special case for speedup + M = abs(x).sum(axis=a) + elif ord in (2, None): + M = sqrt(abs(x).power(2).sum(axis=a)) + else: + try: + ord + 1 + except TypeError as e: + raise ValueError('Invalid norm order for vectors.') from e + M = np.power(abs(x).power(ord).sum(axis=a), 1 / ord) + if hasattr(M, 'toarray'): + return M.toarray().ravel() + elif hasattr(M, 'A'): + return M.A.ravel() + else: + return M.ravel() + else: + raise ValueError("Improper number of dimensions to norm.") +``` + + + + + + + + + +## PyTorch + +Parameters: + +- **input** – input tensor of shape N×M. +- **p** – p value for the p-norm distance to calculate between each vector pair ∈[0,∞]∈[0,∞]. + +并且有相关描述: + +This function is equivalent to `scipy.spatial.distance.pdist(input, 'minkowski', p=p)` if p∈(0,∞). When p=0 it is equivalent to `scipy.spatial.distance.pdist(input, 'hamming') * M`. When p=∞, the closest scipy function is `scipy.spatial.distance.pdist(xn, lambda x, y: np.abs(x - y).max())`. + + + +相关[实现位置](https://github.com/pytorch/pytorch/blob/d0f82cd082fad7243226e0ab68fd995873ea7d76/aten/src/ATen/native/Distance.cpp#L58-L64) + +```cpp +Tensor pdist(const Tensor& self, const double p) { + TORCH_CHECK(self.dim() == 2, + "pdist only supports 2D tensors, got: ", self.dim(), "D"); + TORCH_CHECK(at::isFloatingType(self.scalar_type()), "pdist only supports floating-point dtypes"); + TORCH_CHECK(p >= 0, "pdist only supports non-negative p values"); + return at::_pdist_forward(self.contiguous(), p); +} +``` + +调用`_pdist_forward`,[实现位置](https://github.com/pytorch/pytorch/blob/d0f82cd082fad7243226e0ab68fd995873ea7d76/aten/src/ATen/native/Distance.cpp#L244-L262) + +```cpp +Tensor _pdist_forward(const Tensor& self, const double p) { + TORCH_CHECK(self.is_contiguous(), "_pdist_forward requires contiguous input"); + auto device = self.device().type(); + TORCH_CHECK(device == kCPU || device == kCUDA, "_pdist_forward only supports CPU and CUDA devices, got: ", device); + Tensor result = at::empty({0}, self.options(), LEGACY_CONTIGUOUS_MEMORY_FORMAT); + if (self.size(0) <= 1) { + result.resize_({0}); + } else { + int64_t n = self.size(0); + int64_t c = n * (n - 1) / 2; + result.resize_({c}); + if (self.size(1) == 0) { + result.fill_(0); + } else { + pdist_forward_stub(device, result, self, p); + } + } + return result; +} +``` + +主要调用`pdist_forward_stub`,绑定了具体的`pdist_forward_kernel_impl` + +```cpp +REGISTER_DISPATCH(pdist_forward_stub, &pdist_forward_kernel_impl); +``` + +([CPU](https://github.com/pytorch/pytorch/blob/d0f82cd082fad7243226e0ab68fd995873ea7d76/aten/src/ATen/native/cpu/DistanceOpsKernel.cpp#L446)和[CUDA](https://github.com/pytorch/pytorch/blob/d0f82cd082fad7243226e0ab68fd995873ea7d76/aten/src/ATen/native/cuda/DistanceKernel.cu#L360)实现绑定了同一个`pdist_forward_kernel_impl`) + +而后`pdist_forward_kernel_impl`的[实现位置](https://github.com/pytorch/pytorch/blob/d0f82cd082fad7243226e0ab68fd995873ea7d76/aten/src/ATen/native/cpu/DistanceOpsKernel.cpp#L419C1-L423C2) + +```cpp +void pdist_forward_kernel_impl(Tensor& result, const Tensor& self, const double p) { + AT_DISPATCH_FLOATING_TYPES(self.scalar_type(), "pdist", [&] { + Dist::apply_pdist(result, self, p); + }); +} +``` + +调用`apply_pdist`,[代码位置](https://github.com/pytorch/pytorch/blob/d0f82cd082fad7243226e0ab68fd995873ea7d76/aten/src/ATen/native/cpu/DistanceOpsKernel.cpp#L190-L202) + +```cpp + // Assumes self is nonempty, contiguous, and 2D + static void apply_pdist(Tensor& result, const Tensor& self, const scalar_t p) { + if (p == 0.0) { + run_parallel_pdist>(result, self, p); + } else if (p == 1.0) { + run_parallel_pdist>(result, self, p); + } else if (p == 2.0) { + run_parallel_pdist>(result, self, p); + } else if (std::isinf(p)) { + run_parallel_pdist>(result, self, p); + } else { + run_parallel_pdist>(result, self, p); + } + } +``` + +`run_parallel_pdist`具体实现 + +```cpp + template + static void run_parallel_pdist(Tensor& result, const Tensor& self, const scalar_t p) { + const scalar_t * const self_start = self.data_ptr(); + const scalar_t * const self_end = self_start + self.numel(); + int64_t n = self.size(0); + int64_t m = self.size(1); + + scalar_t * const res_start = result.data_ptr(); + int64_t combs = result.numel(); // n * (n - 1) / 2 + + // We conceptually iterate over tuples of (i, j, k) where i is the first + // vector from the input, j is the second, and k is the result index. This + // parallelizes over the range of k and infers what i and j are from the + // value of k. + parallel_for(0, combs, internal::GRAIN_SIZE / (16 * m), [p, self_start, self_end, n, m, res_start](int64_t k, int64_t end) { + const Vec pvec(p); + double n2 = n - .5; + // The -1 accounts for floating point truncation issues + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + int64_t i = static_cast((n2 - std::sqrt(n2 * n2 - 2 * k - 1))); + int64_t j = k - n * i + i * (i + 1) / 2 + i + 1; + + const scalar_t * self_i = self_start + i * m; + const scalar_t * self_j = self_start + j * m; + scalar_t * res = res_start + k; + const scalar_t * const res_end = res_start + end; + + while (res != res_end) { + *res = F::finish(vec::map2_reduce_all( + [&pvec](Vec a, Vec b) { return F::map((a - b).abs(), pvec); }, + F::red, self_i, self_j, m), p); + + res += 1; + self_j += m; + if (self_j == self_end) { + self_i += m; + self_j = self_i + m; + } + } + }); + } +``` + + + +# 四、对比分析 + +Scipy利用现有API组合实现,PyTorch则在底层重写cpp算子。 + +# 五、设计思路与实现方案 + +## 命名与参数设计 + +API的设计为paddle.cdist(x, y, p=2.0),其中 `x` 严格为 shape=[M, N] 的 Tensor,`p` 为p-范数对应的p值,输出为一行 `Mx(M-1)/2` 列的 Tensor + +## API实现方案 + +参考`Paddle.cdist`和与`Scipy`中的设计,组合已有API实现功能 + +# 六、测试和验收的考量 + +测试考虑的case如下: + +1. 当`x`、`y` 2D 的 Tensor,并如PyTorch给出合理提示 + + ```python + >>> a = [] + >>> a = torch.tensor(a) + >>> b = torch.nn.functional.pdist(a) + Traceback (most recent call last): + File "", line 1, in + RuntimeError: pdist only supports 2D tensors, got: 1D + >>> b + ``` + + + +2. 结果一致性,和 SciPy 以及 PyTorch 结果的数值的一致性 + +# 七、可行性分析及规划排期 + +有业内方案实现作为参考,工期上可以满足在当前版本周期内开发完成。 + +# 八、影响面 + +为独立新增API,对其他模块没有影响 + +# 名词解释 + +无 + +# 附件及参考资料 + +[PyTorch文档](https://pytorch.org/docs/stable/generated/torch.nn.functional.pdist.html?highlight=pdist#torch.nn.functional.pdist) + +[Scipy文档](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html) \ No newline at end of file From 9d2918be55daeb72b367b116ae266d6827e2c83c Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Wed, 27 Sep 2023 16:42:51 +0800 Subject: [PATCH 06/13] fix typo --- rfcs/{ => APIs}/20230926_api_design_for_pdist.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename rfcs/{ => APIs}/20230926_api_design_for_pdist.md (98%) diff --git a/rfcs/20230926_api_design_for_pdist.md b/rfcs/APIs/20230926_api_design_for_pdist.md similarity index 98% rename from rfcs/20230926_api_design_for_pdist.md rename to rfcs/APIs/20230926_api_design_for_pdist.md index 4ffe30875..ee64c3148 100644 --- a/rfcs/20230926_api_design_for_pdist.md +++ b/rfcs/APIs/20230926_api_design_for_pdist.md @@ -364,11 +364,11 @@ Scipy利用现有API组合实现,PyTorch则在底层重写cpp算子。 ## 命名与参数设计 -API的设计为paddle.cdist(x, y, p=2.0),其中 `x` 严格为 shape=[M, N] 的 Tensor,`p` 为p-范数对应的p值,输出为一行 `Mx(M-1)/2` 列的 Tensor +API的设计为paddle.pdist(x, p=2.0),其中 `x` 严格为 shape=[M, N] 的 Tensor,`p` 为p-范数对应的p值,输出为一行 `Mx(M-1)/2` 列的 Tensor ## API实现方案 -参考`Paddle.cdist`和与`Scipy`中的设计,组合已有API实现功能 +参考`Paddle.pdist`和与`Scipy`中的设计,组合已有API实现功能 # 六、测试和验收的考量 From 3b3e350c49fe09b99f96cee32d4e2e46c5f4e2c7 Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Thu, 28 Sep 2023 06:39:54 +0800 Subject: [PATCH 07/13] fix --- rfcs/APIs/20230926_api_design_for_pdist.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rfcs/APIs/20230926_api_design_for_pdist.md b/rfcs/APIs/20230926_api_design_for_pdist.md index ee64c3148..71b46d1d3 100644 --- a/rfcs/APIs/20230926_api_design_for_pdist.md +++ b/rfcs/APIs/20230926_api_design_for_pdist.md @@ -368,7 +368,7 @@ API的设计为paddle.pdist(x, p=2.0),其中 `x` 严格为 shape=[M, N] 的 Te ## API实现方案 -参考`Paddle.pdist`和与`Scipy`中的设计,组合已有API实现功能 +参考`PyTorch`与`Scipy`中的设计,组合已有API实现功能 # 六、测试和验收的考量 From ab440c29329bba7e83f161da48b8a6183110a3de Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Thu, 28 Sep 2023 06:58:23 +0800 Subject: [PATCH 08/13] add bitwise_shift rfc --- .../20230927_api_design_for_bitwise_shift.md | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 rfcs/APIs/20230927_api_design_for_bitwise_shift.md diff --git a/rfcs/APIs/20230927_api_design_for_bitwise_shift.md b/rfcs/APIs/20230927_api_design_for_bitwise_shift.md new file mode 100644 index 000000000..2f1f6911b --- /dev/null +++ b/rfcs/APIs/20230927_api_design_for_bitwise_shift.md @@ -0,0 +1,186 @@ +# paddle.pdist设计文档 + +| API 名称 | paddle.bitwise_right_shift
paddle.bitwise_left_shift | +| ------------ | --------------------------------------------------------- | +| 提交作者 | coco | +| 提交时间 | 2023-09-27 | +| 版本号 | V1.0 | +| 依赖飞桨版本 | develop | +| 文件名 | 20230927_api_defign_for_bitwise_shift | + +# 一、概述 + +## 1、相关背景 + +为paddle新增该API,给 Tensor 做 element wise 的算数(或逻辑)左移/右移。 + +## 2、功能目标 + +通过一个Tensor给定的bits计算另一个Tensor的的算术(或逻辑)右移/左移。 + +## 3、意义 + +飞桨支持直接对Tensor进行元素粒度的左移右移。 + +# 二、飞桨现状 + +目前paddle缺少相关功能实现。 + +# 三、业内方案调研 + +## PyTorch + +PyTorch中有API`torch.bitwise_right_shift(input, other, *, out=None) → Tensor` + +介绍为: + +``` +Computes the right arithmetic shift of input by other bits. The input tensor must be of integral type. This operator supports broadcasting to a common shape and type promotion. +``` + +## 实现方法 + +从实现方法上,PyTorch是将位运算注册到element_wise系列中实现的,[代码位置](https://github.com/pytorch/pytorch/blob/main/torch/_prims/__init__.py#L1144-L1149) + +```python +shift_right_arithmetic = _make_elementwise_binary_prim( + "shift_right_arithmetic", + impl_aten=torch.bitwise_right_shift, + doc="", + type_promotion=ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND.DEFAULT, +) +``` + +具体元素尺度的实现,[代码位置](https://github.com/pytorch/pytorch/blob/main/torch/_inductor/codegen/common.py#L401-L405): + +```python +# TODO(fdrocha): this is currently not being used anywhere, +# pending on moving triton pin past 972b761 +@staticmethod +def bitwise_right_shift(x, y): + return f"{ExprPrinter.paren(x)} >> {ExprPrinter.paren(y)}" +``` + + + +## Numpy + +- Parameters: + + - **x1**:array_like, int + + Input values. + + - **x2**:array_like, int + + Number of bits to remove at the right of *x1*. If `x1.shape != x2.shape`, they must be broadcastable to a common shape (which becomes the shape of the output). + + - **out**:ndarray, None, or tuple of ndarray and None, optional + + A location into which the result is stored. If provided, it must have a shape that the inputs broadcast to. If not provided or None, a freshly-allocated array is returned. A tuple (possible only as a keyword argument) must have length equal to the number of outputs. + + - **where**:array_like, optional + + This condition is broadcast over the input. At locations where the condition is True, the *out* array will be set to the ufunc result. Elsewhere, the *out* array will retain its original value. Note that if an uninitialized *out* array is created via the default `out=None`, locations within it where the condition is False will remain uninitialized. + + - **kwargs: + + For other keyword-only arguments, see the [ufunc docs](https://numpy.org/doc/stable/reference/ufuncs.html#ufuncs-kwargs). + +Returns: + +- **out**:ndarray, int + + Return *x1* with bits shifted *x2* times to the right. This is a scalar if both *x1* and *x2* are scalars. + + + +相关[实现位置](https://github.com/numpy/numpy/blob/9d4c1484b96ed2b7dff49c479e9d0822a4b91f80/numpy/core/src/umath/loops_autovec.dispatch.c.src#L81-L105) + +```cpp +NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_left_shift) +(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2)); +#ifdef @TYPE@_left_shift_needs_clear_floatstatus + // For some reason, our macOS CI sets an "invalid" flag here, but only + // for some types. + npy_clear_floatstatus_barrier((char*)dimensions); +#endif +} + +NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_right_shift) +(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) +{ +#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift + BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2)); +#else + BINARY_LOOP { + @type@ in1 = *(@type@ *)ip1; + @type@ in2 = *(@type@ *)ip2; + *(@type@ *)op1 = npy_rshift@c@(in1, in2); + } +#endif +} +``` + +`npy_rshift`相关调用 + +```cpp +NPY_INPLACE npy_@u@@type@ +npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) +{ + if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) { + return a >> b; + } +#if @is_signed@ + else if (a < 0) { + return (npy_@u@@type@)-1; /* preserve the sign bit */ + } +#endif + else { + return 0; + } +} +``` + +# 四、对比分析 + +PyTorch是将算子注册到element wise系列中,Numpy也类似地`BINARY_LOOP`来做element wise的shift操作。 + +# 五、设计思路与实现方案 + +## 命名与参数设计 + +API的设计为`paddle.bitwise_right_shift(x, y)`,其余几个shift操作同理,其中 `x` 与 `y` 需要有相同的shape或者能够进行广播,且类型都必须为int。 + +## API实现方案 + +参考`PyTorch`和与`Numpy`中的设计,组合已有API实现功能 + +# 六、测试和验收的考量 + +测试考虑的case如下: + +1. 对 `x`、`y`的 shape 和 dtype 有限制,并给出合理提示 + +2. 结果一致性,和 PyTorch、Numpy 结果的数值的一致性 + +# 七、可行性分析及规划排期 + +有业内方案实现作为参考,工期上可以满足在当前版本周期内开发完成。 + +# 八、影响面 + +为独立新增API,对其他模块没有影响 + +# 名词解释 + +无 + +# 附件及参考资料 + +[PyTorch文档](https://pytorch.org/docs/stable/generated/torch.bitwise_right_shift.html?highlight=bitwise_right_shift#torch.bitwise_right_shift) + +[Numpy文档](https://numpy.org/doc/stable/reference/generated/numpy.right_shift.html#numpy.right_shift) \ No newline at end of file From e3fe25c69c4dd868f98a3474a72afe70c132130a Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Thu, 28 Sep 2023 07:24:06 +0800 Subject: [PATCH 09/13] update --- .../20230927_api_design_for_bitwise_shift.md | 186 ------------------ 1 file changed, 186 deletions(-) delete mode 100644 rfcs/APIs/20230927_api_design_for_bitwise_shift.md diff --git a/rfcs/APIs/20230927_api_design_for_bitwise_shift.md b/rfcs/APIs/20230927_api_design_for_bitwise_shift.md deleted file mode 100644 index 2f1f6911b..000000000 --- a/rfcs/APIs/20230927_api_design_for_bitwise_shift.md +++ /dev/null @@ -1,186 +0,0 @@ -# paddle.pdist设计文档 - -| API 名称 | paddle.bitwise_right_shift
paddle.bitwise_left_shift | -| ------------ | --------------------------------------------------------- | -| 提交作者 | coco | -| 提交时间 | 2023-09-27 | -| 版本号 | V1.0 | -| 依赖飞桨版本 | develop | -| 文件名 | 20230927_api_defign_for_bitwise_shift | - -# 一、概述 - -## 1、相关背景 - -为paddle新增该API,给 Tensor 做 element wise 的算数(或逻辑)左移/右移。 - -## 2、功能目标 - -通过一个Tensor给定的bits计算另一个Tensor的的算术(或逻辑)右移/左移。 - -## 3、意义 - -飞桨支持直接对Tensor进行元素粒度的左移右移。 - -# 二、飞桨现状 - -目前paddle缺少相关功能实现。 - -# 三、业内方案调研 - -## PyTorch - -PyTorch中有API`torch.bitwise_right_shift(input, other, *, out=None) → Tensor` - -介绍为: - -``` -Computes the right arithmetic shift of input by other bits. The input tensor must be of integral type. This operator supports broadcasting to a common shape and type promotion. -``` - -## 实现方法 - -从实现方法上,PyTorch是将位运算注册到element_wise系列中实现的,[代码位置](https://github.com/pytorch/pytorch/blob/main/torch/_prims/__init__.py#L1144-L1149) - -```python -shift_right_arithmetic = _make_elementwise_binary_prim( - "shift_right_arithmetic", - impl_aten=torch.bitwise_right_shift, - doc="", - type_promotion=ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND.DEFAULT, -) -``` - -具体元素尺度的实现,[代码位置](https://github.com/pytorch/pytorch/blob/main/torch/_inductor/codegen/common.py#L401-L405): - -```python -# TODO(fdrocha): this is currently not being used anywhere, -# pending on moving triton pin past 972b761 -@staticmethod -def bitwise_right_shift(x, y): - return f"{ExprPrinter.paren(x)} >> {ExprPrinter.paren(y)}" -``` - - - -## Numpy - -- Parameters: - - - **x1**:array_like, int - - Input values. - - - **x2**:array_like, int - - Number of bits to remove at the right of *x1*. If `x1.shape != x2.shape`, they must be broadcastable to a common shape (which becomes the shape of the output). - - - **out**:ndarray, None, or tuple of ndarray and None, optional - - A location into which the result is stored. If provided, it must have a shape that the inputs broadcast to. If not provided or None, a freshly-allocated array is returned. A tuple (possible only as a keyword argument) must have length equal to the number of outputs. - - - **where**:array_like, optional - - This condition is broadcast over the input. At locations where the condition is True, the *out* array will be set to the ufunc result. Elsewhere, the *out* array will retain its original value. Note that if an uninitialized *out* array is created via the default `out=None`, locations within it where the condition is False will remain uninitialized. - - - **kwargs: - - For other keyword-only arguments, see the [ufunc docs](https://numpy.org/doc/stable/reference/ufuncs.html#ufuncs-kwargs). - -Returns: - -- **out**:ndarray, int - - Return *x1* with bits shifted *x2* times to the right. This is a scalar if both *x1* and *x2* are scalars. - - - -相关[实现位置](https://github.com/numpy/numpy/blob/9d4c1484b96ed2b7dff49c479e9d0822a4b91f80/numpy/core/src/umath/loops_autovec.dispatch.c.src#L81-L105) - -```cpp -NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_left_shift) -(char **args, npy_intp const *dimensions, npy_intp const *steps, - void *NPY_UNUSED(func)) -{ - BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2)); -#ifdef @TYPE@_left_shift_needs_clear_floatstatus - // For some reason, our macOS CI sets an "invalid" flag here, but only - // for some types. - npy_clear_floatstatus_barrier((char*)dimensions); -#endif -} - -NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_right_shift) -(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) -{ -#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift - BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2)); -#else - BINARY_LOOP { - @type@ in1 = *(@type@ *)ip1; - @type@ in2 = *(@type@ *)ip2; - *(@type@ *)op1 = npy_rshift@c@(in1, in2); - } -#endif -} -``` - -`npy_rshift`相关调用 - -```cpp -NPY_INPLACE npy_@u@@type@ -npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) -{ - if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) { - return a >> b; - } -#if @is_signed@ - else if (a < 0) { - return (npy_@u@@type@)-1; /* preserve the sign bit */ - } -#endif - else { - return 0; - } -} -``` - -# 四、对比分析 - -PyTorch是将算子注册到element wise系列中,Numpy也类似地`BINARY_LOOP`来做element wise的shift操作。 - -# 五、设计思路与实现方案 - -## 命名与参数设计 - -API的设计为`paddle.bitwise_right_shift(x, y)`,其余几个shift操作同理,其中 `x` 与 `y` 需要有相同的shape或者能够进行广播,且类型都必须为int。 - -## API实现方案 - -参考`PyTorch`和与`Numpy`中的设计,组合已有API实现功能 - -# 六、测试和验收的考量 - -测试考虑的case如下: - -1. 对 `x`、`y`的 shape 和 dtype 有限制,并给出合理提示 - -2. 结果一致性,和 PyTorch、Numpy 结果的数值的一致性 - -# 七、可行性分析及规划排期 - -有业内方案实现作为参考,工期上可以满足在当前版本周期内开发完成。 - -# 八、影响面 - -为独立新增API,对其他模块没有影响 - -# 名词解释 - -无 - -# 附件及参考资料 - -[PyTorch文档](https://pytorch.org/docs/stable/generated/torch.bitwise_right_shift.html?highlight=bitwise_right_shift#torch.bitwise_right_shift) - -[Numpy文档](https://numpy.org/doc/stable/reference/generated/numpy.right_shift.html#numpy.right_shift) \ No newline at end of file From 933ee4e49aec49437a487d8fd9ec8c1f773c9aeb Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Sat, 7 Oct 2023 15:07:41 +0800 Subject: [PATCH 10/13] fix --- rfcs/APIs/20230926_api_design_for_pdist.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rfcs/APIs/20230926_api_design_for_pdist.md b/rfcs/APIs/20230926_api_design_for_pdist.md index 71b46d1d3..92a9b2662 100644 --- a/rfcs/APIs/20230926_api_design_for_pdist.md +++ b/rfcs/APIs/20230926_api_design_for_pdist.md @@ -20,7 +20,7 @@ ## 3、意义 -飞桨支持直接两两计算向量间的距离。 +飞桨支持计算大小为(NxM)的矩阵中,N个向量两两之间的p-norm距离。 # 二、飞桨现状 @@ -368,7 +368,11 @@ API的设计为paddle.pdist(x, p=2.0),其中 `x` 严格为 shape=[M, N] 的 Te ## API实现方案 -参考`PyTorch`与`Scipy`中的设计,组合已有API实现功能 +参考`PyTorch`与`Scipy`中的设计,组合已有API实现功能: + +在 Paddle repo 的 python/paddle/nn/functional/distance.py文件;并在 python/paddle/nn/functional/init.py中,添加 pdist API,以支持 paddle.Tensor.pdist 的调用方式; + +使用的API:`paddle.cdist`,`paddle.tril`,`paddle.masked_select` # 六、测试和验收的考量 From 1d88c0907af5709ed7e890def73b3e0abcdca19d Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Sat, 7 Oct 2023 18:22:33 +0800 Subject: [PATCH 11/13] add test path --- rfcs/APIs/20230926_api_design_for_pdist.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rfcs/APIs/20230926_api_design_for_pdist.md b/rfcs/APIs/20230926_api_design_for_pdist.md index 92a9b2662..7480e9e6f 100644 --- a/rfcs/APIs/20230926_api_design_for_pdist.md +++ b/rfcs/APIs/20230926_api_design_for_pdist.md @@ -376,6 +376,8 @@ API的设计为paddle.pdist(x, p=2.0),其中 `x` 严格为 shape=[M, N] 的 Te # 六、测试和验收的考量 +单测代码位置,Paddle repo 的 paddle/test/legacy_test/test_pdist.py 目录 + 测试考虑的case如下: 1. 当`x`、`y` 2D 的 Tensor,并如PyTorch给出合理提示 From 4ff2616c5d127a55e9f715111f08b98217f9bf67 Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Sat, 7 Oct 2023 19:02:19 +0800 Subject: [PATCH 12/13] add args details --- rfcs/APIs/20230926_api_design_for_pdist.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/rfcs/APIs/20230926_api_design_for_pdist.md b/rfcs/APIs/20230926_api_design_for_pdist.md index 7480e9e6f..0b7172a72 100644 --- a/rfcs/APIs/20230926_api_design_for_pdist.md +++ b/rfcs/APIs/20230926_api_design_for_pdist.md @@ -364,7 +364,25 @@ Scipy利用现有API组合实现,PyTorch则在底层重写cpp算子。 ## 命名与参数设计 -API的设计为paddle.pdist(x, p=2.0),其中 `x` 严格为 shape=[M, N] 的 Tensor,`p` 为p-范数对应的p值,输出为一行 `Mx(M-1)/2` 列的 Tensor +API的设计为: + +`paddle.pdist(x, p=2.0, compute_mode="use_mm_for_euclid_dist_if_necessary", name=None)` + +Args: + ++ x(Tensor): 严格为 shape=[M, N] 的 Tensor ++ p(float, optional): 为p-范数对应的p值,默认为2.0 ++ compute_mode(str, optional): 默认为`use_mm_for_euclid_dist_if_necessary`(组合已有API过程中用到了`paddle.cdist`,当`p=2.0`时,可以设置`compute_mode`利用矩阵运算进行优化) + + `compute_mode=use_mm_for_euclid_dist_if_necessary`时,当p=2.0且M>25时使用矩阵乘法计算距离 + + `compute_mode=use_mm_for_euclid_dist`时,当p=2.0时使用矩阵乘法计算距离 + + `compute_mode=donot_use_mm_for_euclid_dist`时,不使用矩阵乘法计算距离 ++ name(str, 可选): 操作的名称(默认为None) + +Return: + ++ 一行 `Mx(M-1)/2` 列的 Tensor + + ## API实现方案 From e574498f968ae58a3a55bcfcc86b313f73b9fa30 Mon Sep 17 00:00:00 2001 From: coco <1228759711@qq.com> Date: Sat, 7 Oct 2023 19:04:53 +0800 Subject: [PATCH 13/13] typo --- rfcs/APIs/20230926_api_design_for_pdist.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rfcs/APIs/20230926_api_design_for_pdist.md b/rfcs/APIs/20230926_api_design_for_pdist.md index 0b7172a72..89d9b92b9 100644 --- a/rfcs/APIs/20230926_api_design_for_pdist.md +++ b/rfcs/APIs/20230926_api_design_for_pdist.md @@ -371,8 +371,8 @@ API的设计为: Args: + x(Tensor): 严格为 shape=[M, N] 的 Tensor -+ p(float, optional): 为p-范数对应的p值,默认为2.0 -+ compute_mode(str, optional): 默认为`use_mm_for_euclid_dist_if_necessary`(组合已有API过程中用到了`paddle.cdist`,当`p=2.0`时,可以设置`compute_mode`利用矩阵运算进行优化) ++ p(float, 可选): 为p-范数对应的p值,默认为2.0 ++ compute_mode(str, 可选): 默认为`use_mm_for_euclid_dist_if_necessary`(组合已有API过程中用到了`paddle.cdist`,当`p=2.0`时,可以设置`compute_mode`利用矩阵运算进行优化) + `compute_mode=use_mm_for_euclid_dist_if_necessary`时,当p=2.0且M>25时使用矩阵乘法计算距离 + `compute_mode=use_mm_for_euclid_dist`时,当p=2.0时使用矩阵乘法计算距离 + `compute_mode=donot_use_mm_for_euclid_dist`时,不使用矩阵乘法计算距离