Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
op unittest for sort & enhance test helper (#1411)
Browse files Browse the repository at this point in the history
* fix typo select unittest

* op unittest for sort

* op unittest for sort

* enhance TestCaseHelper & add special case for sort op

* fix sort bug for duplicate element

* fix index typo

* refine hard code testcase

* reduce array size to avoid large cuda memory occupation

* add not passed test case

* reduce array size again

* fix magic number

* remove headers

* remove cpp style code from .cuh
  • Loading branch information
zzk0 authored May 24, 2023
1 parent 169126b commit 500ff05
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 54 deletions.
2 changes: 1 addition & 1 deletion cinn/hlir/op/contrib/argmax_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void TestGenerateCodeCpu_Argmax_Keep(void* _args, int32_t num_args)
for (int32_t j = 0; j < 3; j += 1) {
for (int32_t k = 0; k < 28; k += 1) {
for (int32_t a = 0; a < 28; a += 1) {
test_argmax_in_index[((2352 * i) + ((784 * j) + ((28 * k) + a)))] = cinn_host_find_int_nd(_test_argmax_in_index_temp, 3, j, ((2352 * i) + ((28 * k) + a)), 784);
test_argmax_in_index[((2352 * i) + ((784 * j) + ((28 * k) + a)))] = cinn_host_next_smallest_int32(_test_argmax_in_index_temp, 3, j, ((2352 * i) + ((28 * k) + a)), 784);
};
};
};
Expand Down
2 changes: 1 addition & 1 deletion cinn/hlir/op/contrib/argmin_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void TestGenerateCodeCpu_Argmin_Keep(void* _args, int32_t num_args)
for (int32_t j = 0; j < 3; j += 1) {
for (int32_t k = 0; k < 28; k += 1) {
for (int32_t a = 0; a < 28; a += 1) {
test_argmin_in_index[((2352 * i) + ((784 * j) + ((28 * k) + a)))] = cinn_host_find_int_nd(_test_argmin_in_index_temp, 3, j, ((2352 * i) + ((28 * k) + a)), 784);
test_argmin_in_index[((2352 * i) + ((784 * j) + ((28 * k) + a)))] = cinn_host_next_smallest_int32(_test_argmin_in_index_temp, 3, j, ((2352 * i) + ((28 * k) + a)), 784);
};
};
};
Expand Down
4 changes: 2 additions & 2 deletions cinn/hlir/op/contrib/sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ std::vector<ir::Tensor> ArgSort(const ir::Tensor &A,
std::string find_func_name;
std::string index_func_name;
if (target.arch == common::Target::Arch::NVGPU) {
find_func_name.assign("cinn_cuda_find_int_nd");
find_func_name.assign("cinn_nvgpu_next_smallest_int32");
} else if (target.arch == common::Target::Arch::X86) {
find_func_name.assign("cinn_host_find_int_nd");
find_func_name.assign("cinn_host_next_smallest_int32");
} else {
LOG(FATAL) << "ArgSort only supports X86 and NVGPU ! Please Check.\n";
}
Expand Down
2 changes: 1 addition & 1 deletion cinn/hlir/op/contrib/sort_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ void TestGenerateCodeCpu_Sort(void* _args, int32_t num_args)
};
for (int32_t i = 0; i < 4; i += 1) {
for (int32_t j = 0; j < 28; j += 1) {
test_sort_out_index[((28 * i) + j)] = cinn_host_find_int_nd(_test_sort_out_index_temp, 28, j, (28 * i), 1);
test_sort_out_index[((28 * i) + j)] = cinn_host_next_smallest_int32(_test_sort_out_index_temp, 28, j, (28 * i), 1);
};
};
for (int32_t i = 0; i < 4; i += 1) {
Expand Down
23 changes: 23 additions & 0 deletions cinn/runtime/cpu/host_intrinsics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,20 @@ inline int cinn_host_find_float_nd(const cinn_buffer_t* buf, int size, float num

#undef __cinn_host_find_kernel

inline int cinn_host_next_smallest_int32(cinn_buffer_t* buf, int size, int num, int begin, int stride) {
int id = -1;
for (int i = begin; i < begin + size * stride; i += stride) {
if (id == -1 || reinterpret_cast<int*>(buf->memory)[i] < reinterpret_cast<int*>(buf->memory)[id]) {
id = i;
}
}
if (id != -1) {
reinterpret_cast<int*>(buf->memory)[id] = 2147483647;
return (id - begin) / stride;
}
return -1;
}

#define CINN_HOST_LT_NUM(TYPE_SUFFIX, TYPE) \
inline int cinn_host_lt_num_##TYPE_SUFFIX( \
const cinn_buffer_t* buf, const int size, const TYPE num, const int offset, const int stride) { \
Expand Down Expand Up @@ -349,6 +363,15 @@ CINN_REGISTER_HELPER(host_intrinsics) {
.AddInputType<int>()
.End();

REGISTER_EXTERN_FUNC_HELPER(cinn_host_next_smallest_int32, host_target)
.SetRetType<int>()
.AddInputType<cinn_buffer_t*>()
.AddInputType<int>()
.AddInputType<int>()
.AddInputType<int>()
.AddInputType<int>()
.End();

#define _REGISTER_CINN_HOST_LT_NUM(TYPE_SUFFIX, TYPE) \
REGISTER_EXTERN_FUNC_HELPER(cinn_host_lt_num_##TYPE_SUFFIX, host_target) \
.SetRetType<int>() \
Expand Down
25 changes: 23 additions & 2 deletions cinn/runtime/cuda/cinn_cuda_runtime_source.cuh
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
/**
* \file This file contains all the intrinsics available to be used in CUDA code generated by CodeGen.
*/

extern "C" {

#define CINN_INT32_MAX 2147483647
#define CINN_INT32_MIN -2147483648

// *************************************************************** //
// bool unary and binary operator
#define FN_BOOL(func) cinn_nvgpu_##func##_bool
Expand Down Expand Up @@ -364,8 +369,8 @@ __device__ inline float16 FN_FP16(pow)(float16 a, float16 b) {
#define EXPAND_REDUCE_INT32_MARCO(MARCO, ...) \
MARCO(sum_int32, 0, int, ##__VA_ARGS__) \
MARCO(prod_int32, 1, int, ##__VA_ARGS__) \
MARCO(max_int32, -2147483648, int, ##__VA_ARGS__) \
MARCO(min_int32, 2147483647, int, ##__VA_ARGS__)
MARCO(max_int32, CINN_INT32_MIN, int, ##__VA_ARGS__) \
MARCO(min_int32, CINN_INT32_MAX, int, ##__VA_ARGS__)

__device__ inline int cinn_sum_int32(const int left, const int right) { return left + right; }
__device__ inline int cinn_prod_int32(const int left, const int right) { return left * right; }
Expand Down Expand Up @@ -634,6 +639,20 @@ __device__ inline int cinn_cuda_find_float_nd(const float *buf, int size, float

#undef __cinn_cuda_find_kernel

__device__ inline int cinn_nvgpu_next_smallest_int32(int *buf, int size, int num, int begin, int stride) {
int id = -1;
for (int i = begin; i < begin + size * stride; i += stride) {
if (id == -1 || buf[i] < buf[id]) {
id = i;
}
}
if (id != -1) {
buf[id] = CINN_INT32_MAX;
return (id - begin) / stride;
}
return -1;
}

#define __cinn_cuda_find_from_kernel(buf, size, num, begin) \
do { \
for (int i = begin; i < size; ++i) { \
Expand Down Expand Up @@ -812,6 +831,8 @@ __device__ int cinn_cuda_resize_bicubic(const int *buf,

// *************************************************************** //
// end of macro undef
#undef CINN_INT32_MAX
#undef CINN_INT32_MIN
#undef FN_BOOL
#undef FN_UINT8
#undef FN_INT8
Expand Down
9 changes: 9 additions & 0 deletions cinn/runtime/cuda/cuda_intrinsics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,15 @@ CINN_REGISTER_HELPER(cuda_intrinsics) {
.AddInputType<int>()
.End();

REGISTER_FACKED_EXTERN_FUNC_HELPER(cinn_nvgpu_next_smallest_int32, target)
.SetRetType<int>()
.AddInputType<cinn_buffer_t *>()
.AddInputType<int>()
.AddInputType<int>()
.AddInputType<int>()
.AddInputType<int>()
.End();

#define _REGISTER_CINN_NVGPU_LT_NUM(TYPE_SUFFIX, TYPE) \
REGISTER_FACKED_EXTERN_FUNC_HELPER(cinn_nvgpu_lt_num_##TYPE_SUFFIX, target) \
.SetRetType<int>() \
Expand Down
17 changes: 17 additions & 0 deletions python/tests/ops/op_test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
import unittest
import re

from unittest import suite
from typing import Union, List

parser = argparse.ArgumentParser(description="Argparse for op test helper")
parser.add_argument(
"--case",
Expand Down Expand Up @@ -104,3 +107,17 @@ def run(self):
res = runner.run(test_suite)
if not res.wasSuccessful():
sys.exit(not res.wasSuccessful())


def run_test(test_class: Union[suite.TestSuite, List[suite.TestSuite]]):
test_suite = unittest.TestSuite()
test_loader = unittest.TestLoader()
if isinstance(test_class, type):
test_suite.addTests(test_loader.loadTestsFromTestCase(test_class))
else:
for cls in test_class:
test_suite.addTests(test_loader.loadTestsFromTestCase(cls))
runner = unittest.TextTestRunner()
res = runner.run(test_suite)
if not res.wasSuccessful():
sys.exit(not res.wasSuccessful())
Loading

0 comments on commit 500ff05

Please sign in to comment.