Skip to content

Commit

Permalink
fixed transpose test
Browse files Browse the repository at this point in the history
  • Loading branch information
t4c1 committed Mar 19, 2020
1 parent 805a293 commit bf60476
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 31 deletions.
36 changes: 6 additions & 30 deletions test/unit/math/opencl/kernel_generator/reference_kernels/a+aT.cl
Original file line number Diff line number Diff line change
@@ -1,34 +1,10 @@
kernel void calculate(__global double* var3_global, int var3_rows, int var3_view, int var4, __global double* var7_global, int var7_rows, int var7_view, int var8, __global double* var9_global, int var9_rows, int var9_view, const int rows, const int cols){
const int gid_i = get_global_id(0);
const int lid_i = get_local_id(0);
const int lsize_i = get_local_size(0);
const int wg_id_i = get_group_id(0);
const int n_groups_i = get_num_groups(0);
const int blocks_rows = (rows + lsize_i - 1) / lsize_i;
const int blocks_cols = (cols + lsize_i - 1) / lsize_i;
for (int idx = wg_id_i; idx < blocks_rows * blocks_cols; idx += n_groups_i){
const int i0 = lsize_i * (idx % blocks_rows);
const int i = i0 + lid_i;
const int j0 = lsize_i * (idx / blocks_rows);
__local double var5_local[LOCAL_SIZE_ * (LOCAL_SIZE_ + 1)];
for(int lid_j = 0; lid_j < min(rows - i0, lsize_i); lid_j++){
const int j = j0 + lid_j;
if(j0 + lid_i < cols){
double var7 = 0; if (!((!contains_nonzero(var7_view, LOWER) && (i - lid_i + lid_j) < (j - lid_j + lid_i)) || (!contains_nonzero(var7_view, UPPER) && (i - lid_i + lid_j) > (j - lid_j + lid_i)))) {var7 = var7_global[(j - lid_j + lid_i) + var7_rows * (i - lid_i + lid_j)];}
double var6 = var7 + var8;
var5_local[lid_i + lid_j * (LOCAL_SIZE_ + 1)] = var6;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lid_j = 0; lid_j < min(cols - j0, lsize_i); lid_j++){
const int j = j0 + lid_j;
if(i < rows){
kernel void calculate(__global double* var3_global, int var3_rows, int var3_view, int var4, __global double* var7_global, int var7_rows, int var7_view, int var8, __global double* var9_global, int var9_rows, int var9_view){
int i = get_global_id(0);
int j = get_global_id(1);
double var3 = 0; if (!((!contains_nonzero(var3_view, LOWER) && j < i) || (!contains_nonzero(var3_view, UPPER) && j > i))) {var3 = var3_global[i + var3_rows * j];}
double var2 = var3 + var4;
double var5 = var5_local[lid_j + lid_i * (LOCAL_SIZE_ + 1)];
double var1 = var2 + var5;
double var7 = 0; if (!((!contains_nonzero(var7_view, LOWER) && i < j) || (!contains_nonzero(var7_view, UPPER) && i > j))) {var7 = var7_global[j + var7_rows * i];}
double var6 = var7 + var8;
double var1 = var2 + var6;
var9_global[i + var9_rows * j] = var1;
}
}
}
}
2 changes: 1 addition & 1 deletion test/unit/math/opencl/kernel_generator/transpose_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ TEST(MathMatrixCL, block_of_transpose_test) {
}

TEST(MathMatrixCL, a_plus_a_transpose_test) {
std::string kernel_filename = "a+aT_simple.cl";
std::string kernel_filename = "a+aT.cl";
MatrixXd m(3, 3);
m << 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9;

Expand Down

0 comments on commit bf60476

Please sign in to comment.