diff --git a/GRiDCodeGenerator b/GRiDCodeGenerator index 45d15b4..67ca959 160000 --- a/GRiDCodeGenerator +++ b/GRiDCodeGenerator @@ -1 +1 @@ -Subproject commit 45d15b4cfc8e5cda03be6e1fc1365a7ce791efbd +Subproject commit 67ca959ea6b3af6f33121c3de93b54e16b4d8542 diff --git a/test/finite_diff/finite_diff.cuh b/test/finite_diff/finite_diff.cuh new file mode 100644 index 0000000..02e1366 --- /dev/null +++ b/test/finite_diff/finite_diff.cuh @@ -0,0 +1,1124 @@ +// #include "grid.cuh" +#include "/home/a2rlab4/GRiDBenchmarks/GRiD/grid.cuh" +#include +#include +#include +#include +#include + +using namespace grid; + +template + __device__ void finite_diff(T *input, T *output, int (*func)(T* input, T* output, const grid::robotModel *d_robotMode, const T gravity), T eps, const grid::robotModel *d_robotModel, const T gravity){ +// void finite_diff(T *input, T *output, int (*func)(T* input, T* output, const grid::robotModel *d_robotModel*, const T gravity), T eps){ + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ + T *val_plus[O_R * O_C]; + T *val_minus[O_R * O_C]; + + unsigned input_ind = ind/I_N; + + // T *input_plus = input; + // T *input_minus = input; + // input_plus[input_ind] += eps; + // input_minus[input_ind] -= eps; + + input->h_q_qd_u[input_ind] += eps; + val_plus[0] = func(input, output, d_robotModel,gravity); + input->h_q_qd_u[input_ind] -= (2*eps); + val_minus[0] = func(input, output, d_robotModel,gravity); + + for(int rc = 0; rc < (O_R*O_C); rc++){ + output[(ind*O_C*O_R) + rc] = (val_plus[rc] - val_minus[rc])/ (2* eps); + } + printMat(&output[0],7); + } +} + +template +__device__ void finite_diff_aba(T *s_q_qd_tau, T *s_fdoutput, T eps, const grid::robotModel *d_robotModel, const T gravity){ + __shared__ T s_qdd_plus[O_R]; + __shared__ T s_qdd_minus[O_R]; + T *s_tau = &s_q_qd_tau[2 * O_R]; + T *qqd_plus = s_q_qd_tau; + T *qqd_minus = s_q_qd_tau; + + __shared__ T s_va[12*O_R]; + extern __shared__ T s_XITemp[]; T *s_XImats = s_XITemp; T *s_temp = &s_XITemp[504]; + // load_update_XImats_helpers(s_XImats, s_q, d_robotModel, s_temp); + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N*2; ind += blockDim.x*blockDim.y){ + + if(threadIdx.x == 0 && blockIdx.x == 0){ + qqd_plus[ind] += eps; + } + + grid::load_update_XImats_helpers(s_XImats, &qqd_plus[0], d_robotModel, s_temp); + __syncthreads(); + grid::aba_inner(s_qdd_plus, s_va, &qqd_plus[0], &qqd_plus[7], s_tau, s_XImats, s_temp, gravity); + __syncthreads(); + + if(threadIdx.x == 0 && blockIdx.x == 0){ + qqd_minus[ind] -= (2*eps); + } + // __syncthreads(); + grid::load_update_XImats_helpers(s_XImats, &qqd_minus[0], d_robotModel, s_temp); + __syncthreads(); + grid::aba_inner(s_qdd_minus, s_va, &qqd_minus[0], &qqd_minus[7], s_tau, s_XImats, s_temp, gravity); + __syncthreads(); + + + if(threadIdx.x == 0 && blockIdx.x == 0){ + qqd_minus[ind] += (eps); + } + __syncthreads(); + + for(int i = 0; i < (O_R); i++){ + s_fdoutput[(ind*O_R) + i] = (s_qdd_plus[i] - s_qdd_minus[i])/ (2* eps); + } + __syncthreads(); + + } + + __syncthreads(); + if(threadIdx.x == 0 && blockIdx.x == 0){ + printf("s_fdoutput \n"); + printMat(&s_fdoutput[0],7); + } + __syncthreads(); + +} + +template +__device__ void finite_diff_fdsva(T *s_q_qd_tau, T *s_fdoutput, T eps, const grid::robotModel *d_robotModel, const T gravity){ + __shared__ T s_df_du_plus[O_R*O_R*3*21]; + __shared__ T s_df_du_minus[O_R*O_R*3*21]; + __shared__ T s_qdd_plus[O_R]; + __shared__ T s_qdd_minus[O_R]; + T *s_tau = &s_q_qd_tau[2 * O_R]; + T *qqd_plus = s_q_qd_tau; + T *qqd_minus = s_q_qd_tau; + + __shared__ T s_vaf[12*O_R]; + __shared__ T s_va[12*O_R]; + extern __shared__ T s_XITemp[]; T *s_XImats = s_XITemp; T *s_temp = &s_XITemp[504]; + // load_update_XImats_helpers(s_XImats, s_q, d_robotModel, s_temp); + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N*3; ind += blockDim.x*blockDim.y){ + // __syncthreads(); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind %d \n", ind); + // } + // __syncthreads(); + + qqd_plus[ind] += eps; + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // // qqd_plus[ind] += eps; + // printf("qqd_plus[ind] %f \n", qqd_plus[ind]); + // printMat(&qqd_plus[0],1); + // } + + grid::load_update_XImats_helpers(s_XImats, &qqd_plus[0], d_robotModel, s_temp); + __syncthreads(); + // void inverse_dynamics_gradient_inner(T *s_dc_du, const T *s_q, const T *s_qd, const T *s_vaf, T *s_XImats, T *s_temp, const T gravity) + // grid::inverse_dynamics_gradient_inner(s_dc_du_plus, &qqd_plus[0], &qqd_plus[O_R], s_vaf, s_XImats, s_temp, gravity); + grid::aba_inner(s_qdd_plus, s_va, &qqd_plus[0], &qqd_plus[7], &qqd_plus[14], s_XImats, s_temp, gravity); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // // qqd_plus[ind] += eps; + // printf("s_qdd_plus\n"); + // printMat(&s_qdd_plus[0],1); + // } + // grid::fdsva_device(&qqd_plus[0], &qqd_plus[7], s_qdd_plus, const T *s_tau,const robotModel *d_robotModel, const T gravity) + grid::fdsva_inner(&s_df_du_plus[21*7*ind], &qqd_plus[0], &qqd_plus[7], s_qdd_plus, &qqd_plus[14], s_XImats, s_temp, gravity); + // (T *s_fddq_fddqd_fddt, T *s_q, T *s_qd, const T *s_qdd, const T *s_tau, T *s_XImats, T *s_temp, const T gravity) + // __syncthreads(); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // // if(ind < 7){ + // printf("s_df_du_plus %d \n", ind); + // printMat(&s_df_du_plus[21*7*ind],O_R); + // // } + // } + // __syncthreads(); + // __syncthreads(); + + qqd_minus[ind] -= (2*eps); + + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // // if(ind < 7){ + // // qqd_minus[ind] -= (2*eps); + // printf("qqd_minus\n"); + // printMat(&qqd_minus[0],1); + // // } + // } + __syncthreads(); + grid::load_update_XImats_helpers(s_XImats, &qqd_minus[0], d_robotModel, s_temp); + __syncthreads(); + // grid::inverse_dynamics_gradient_inner(s_dc_du_minus, &qqd_minus[0], &qqd_minus[O_R], s_vaf, s_XImats, s_temp, gravity); + grid::aba_inner(s_qdd_minus, s_va, &qqd_minus[0], &qqd_minus[7], &qqd_minus[14], s_XImats, s_temp, gravity); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // // qqd_plus[ind] += eps; + // printf("s_qdd_minus\n"); + // printMat(&s_qdd_minus[0],1); + // } + grid::fdsva_inner(&s_df_du_minus[21*7*ind], &qqd_minus[0], &qqd_minus[7], s_qdd_minus, &qqd_minus[14], s_XImats, s_temp, gravity); + + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // // if(ind < 7){ + + // printf("s_df_du_minus %d \n", ind); + // printMat(&s_df_du_minus[21*7*ind],O_R); + // // } + // } + // __syncthreads(); + + // if(threadIdx.x == 0 && blockIdx.x == 0){ + qqd_minus[ind] += eps; + // } + __syncthreads(); + + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*3*21; ind += blockDim.x*blockDim.y){ + s_fdoutput[ind] = (s_df_du_plus[ind] - s_df_du_minus[ind])/ (2* eps); + + } + + __syncthreads(); + if(threadIdx.x == 0 && blockIdx.x == 0){ + for(int i=0; i<21; i++){ + printf("s_fdoutput %d\n", i); + printMat(&s_fdoutput[21*7*i],O_R); + } + } + __syncthreads(); + +} + +template +__device__ void finite_diff_for_ddp(T *s_q_qd_tau, T *s_df2, T eps, const grid::robotModel *d_robotModel, const T gravity){ + __shared__ T s_Minv[49]; + __shared__ T s_qdd[49]; + __shared__ T s_dc_du_plus[98*7]; + __shared__ T s_vaf_plus[84]; + __shared__ T s_qdd1_plus[7]; + __shared__ T s_M_plus[49*7]; + __shared__ T s_dc_du_minus[98*7]; + __shared__ T s_vaf_minus[84]; + __shared__ T s_qdd1_minus[7]; + __shared__ T s_M_minus[49*7]; + __shared__ T s_di_du[1372]; + __shared__ T s_df_du[147]; + __shared__ T s_XImats_plus[504]; + __shared__ T s_XImats_minus[504]; + extern __shared__ T s_XITemp[]; T *s_XImats = s_XITemp; T *s_temp = &s_XITemp[504]; + + // T *s_di_dqq = s_di_du; T *s_di_dqdqd = &s_di_du[343]; T *s_di_dqqd = &s_di_du[686]; T *s_dm_dq = &s_di_du[1029]; + // grid::load_update_XImats_helpers(s_XImats, &s_q_qd_tau[0], d_robotModel, s_temp); + // T *s_df_dq = s_df_du; T *s_df_dqd = &s_df_du[49]; T *s_df_tau = &s_df_du[98]; + // grid::direct_minv_inner(s_Minv, &s_q_qd_tau[0], s_XImats, s_temp); + // grid::forward_dynamics_finish(s_qdd, &s_q_qd_tau[14], s_temp, s_Minv); + // grid::fdsva_inner(s_df_du, &s_q_qd_tau[0], &s_q_qd_tau[7], s_qdd, &s_q_qd_tau[14], s_XImats, s_temp, gravity); + + T *s_tau = &s_q_qd_tau[2 * O_R]; + T *qqd_plus = s_q_qd_tau; + T *qqd_minus = s_q_qd_tau; + + // solves for di_dq + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ + qqd_plus[ind] += eps; + + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("qqd_plus[%d] = \n", ind); + // printMat(&qqd_plus[0],1); + + // } + + load_update_XImats_helpers(s_XImats_plus, &qqd_plus[0], d_robotModel, s_temp); + __syncthreads(); + + inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_XImats_plus, s_temp, gravity); + // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); + forward_dynamics_finish(s_qdd1_plus, s_tau, s_temp, s_Minv); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("s_Minv \n"); + // printMat(&s_Minv[0],7); + + // } + inverse_dynamics_inner_vaf(s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_qdd1_minus, s_XImats_plus, s_temp, gravity); + inverse_dynamics_gradient_inner(&s_dc_du_plus[98*ind], &qqd_plus[0], &qqd_plus[7], s_vaf_plus, s_XImats_plus, s_temp, gravity); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("s_dc_du_plus[%d] = \n", ind); + // printMat(&s_dc_du_plus[ind*98],7); + // printMat(&s_dc_du_plus[ind*98 + 49],7); + + // } + + qqd_minus[ind] -= (2*eps); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("qqd_plus[%d] = \n", ind); + // printMat(&qqd_plus[0],1); + + // } + load_update_XImats_helpers(s_XImats_minus, &qqd_minus[0], d_robotModel, s_temp); + __syncthreads(); + + inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_minus[0], &qqd_minus[7], s_XImats_minus, s_temp, gravity); + // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); + forward_dynamics_finish(s_qdd1_minus, s_tau, s_temp, s_Minv); + inverse_dynamics_inner_vaf(s_vaf_minus, &qqd_minus[0], &qqd_minus[7], s_qdd1_minus, s_XImats_minus, s_temp, gravity); + inverse_dynamics_gradient_inner(&s_dc_du_minus[98*ind], &qqd_minus[0], &qqd_minus[7], s_vaf_minus, s_XImats_minus, s_temp, gravity); + + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("s_dc_du_minus[%d] = \n", ind); + // printMat(&s_dc_du_minus[ind*98],7); + // printMat(&s_dc_du_minus[ind*98 + 49],7); + + // } + qqd_minus[ind] += eps; + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("qqd_minus[%d] = \n", ind); + // printMat(&qqd_minus[0],1); + // printf("qqd_plus[%d] = \n", ind); + // printMat(&qqd_plus[0],1); + + // } + + + } + __syncthreads(); + // __syncthreads(); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + + // for(int i=0; i<7; i++){ + // printf("s_dc_du_plus[%d] = \n", i*49); + // printMat(&s_dc_du_plus[i*49],7); + // printf("s_dc_du_minus[%d] = \n", i*49); + // printMat(&s_dc_du_minus[i*49],7); + // } + // } + + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[ind] = (s_dc_du_plus[ind] - s_dc_du_minus[ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[49 + ind] = (s_dc_du_plus[98 +ind] - s_dc_du_minus[98 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[98+ ind] = (s_dc_du_plus[196+ind] - s_dc_du_minus[196+ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[147 + ind] = (s_dc_du_plus[294 +ind] - s_dc_du_minus[294 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[196 + ind] = (s_dc_du_plus[392 +ind] - s_dc_du_minus[392 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[245 + ind] = (s_dc_du_plus[490 +ind] - s_dc_du_minus[490 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[294+ ind] = (s_dc_du_plus[588+ind] - s_dc_du_minus[ 588+ind])/ (eps* 2); + // } + } + + __syncthreads(); + // solves for di_dqd + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ + qqd_plus[7 + ind] += eps; + + load_update_XImats_helpers(s_XImats_plus, &qqd_plus[0], d_robotModel, s_temp); + __syncthreads(); + + inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_XImats_plus, &s_temp[7], gravity); + // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); + forward_dynamics_finish(s_qdd1_plus, s_tau, s_temp, s_Minv); + inverse_dynamics_inner_vaf(s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_qdd1_minus, s_XImats_plus, s_temp, gravity); + inverse_dynamics_gradient_inner(&s_dc_du_plus[98*ind], &qqd_plus[0], &qqd_plus[7], s_vaf_plus, s_XImats_plus, s_temp, gravity); + + qqd_minus[7 + ind] -= (2*eps); + load_update_XImats_helpers(s_XImats_minus, &qqd_minus[0], d_robotModel, s_temp); + __syncthreads(); + + inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_minus[0], &qqd_minus[7], s_XImats_minus, &s_temp[7], gravity); + // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); + forward_dynamics_finish(s_qdd1_minus, s_tau, s_temp, s_Minv); + inverse_dynamics_inner_vaf(s_vaf_minus, &qqd_minus[0], &qqd_minus[7], s_qdd1_minus, s_XImats_minus, s_temp, gravity); + inverse_dynamics_gradient_inner(&s_dc_du_minus[98*ind], &qqd_minus[0], &qqd_minus[7], s_vaf_minus, s_XImats_minus, s_temp, gravity); + + + qqd_minus[7 + ind] += eps; + + } + __syncthreads(); + __syncthreads(); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + + // for(int i=0; i<7; i++){ + // printf("s_dc_du_plus[%d] = \n", i*49); + // printMat(&s_dc_du_plus[i*49],7); + // printf("s_dc_du_minus[%d] = \n", i*49); + // printMat(&s_dc_du_minus[i*49],7); + // } + // } + + // for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*O_R; ind += blockDim.x*blockDim.y){ + // // if(threadIdx.x == 0 && blockIdx.x == 0){ + // // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // // } + + // s_di_du[343 + ind] = (s_dc_du_plus[ind] - s_dc_du_minus[ind])/ (2* eps); + // } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[343+ ind] = (s_dc_du_plus[49 +ind] - s_dc_du_minus[49+ ind])/ (eps* 2); + // s_di_du[343+ ind] = 3; + + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[343+ 49 + ind] = (s_dc_du_plus[147 +ind] - s_dc_du_minus[147 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[343+ 98+ ind] = (s_dc_du_plus[245+ind] - s_dc_du_minus[245+ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[343+ 147 + ind] = (s_dc_du_plus[343 +ind] - s_dc_du_minus[343 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[343+ 196 + ind] = (s_dc_du_plus[441 +ind] - s_dc_du_minus[441 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[343+ 245 + ind] = (s_dc_du_plus[539 +ind] - s_dc_du_minus[539 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[343+ 294+ ind] = (s_dc_du_plus[637+ind] - s_dc_du_minus[ 637+ind])/ (eps* 2); + // } + } + __syncthreads(); + + // solves for di_dqd_dq + + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ + qqd_plus[ind] += eps; + // qqd_plus[7 + ind] += eps; + + load_update_XImats_helpers(s_XImats_plus, &qqd_plus[0], d_robotModel, s_temp); + __syncthreads(); + + inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_XImats_plus, &s_temp[7], gravity); + // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); + forward_dynamics_finish(s_qdd1_plus, s_tau, s_temp, s_Minv); + inverse_dynamics_inner_vaf(s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_qdd1_minus, s_XImats_plus, s_temp, gravity); + inverse_dynamics_gradient_inner(&s_dc_du_plus[98*ind], &qqd_plus[0], &qqd_plus[7], s_vaf_plus, s_XImats_plus, s_temp, gravity); + + qqd_minus[ind] -= (2*eps); + // qqd_minus[7 + ind] -= (2*eps); + load_update_XImats_helpers(s_XImats_minus, &qqd_minus[0], d_robotModel, s_temp); + __syncthreads(); + + inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_minus[0], &qqd_minus[7], s_XImats_minus, &s_temp[7], gravity); + // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); + forward_dynamics_finish(s_qdd1_minus, s_tau, s_temp, s_Minv); + inverse_dynamics_inner_vaf(s_vaf_minus, &qqd_minus[0], &qqd_minus[7], s_qdd1_minus, s_XImats_minus, s_temp, gravity); + inverse_dynamics_gradient_inner(&s_dc_du_minus[98*ind], &qqd_minus[0], &qqd_minus[7], s_vaf_minus, s_XImats_minus, s_temp, gravity); + + + qqd_minus[ind] += eps; + // qqd_minus[7 + ind] += eps; + + } + __syncthreads(); + // __syncthreads(); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + + // for(int i=0; i<7; i++){ + // printf("s_dc_du_plus[%d] = \n", i*49); + // printMat(&s_dc_du_plus[i*49],7); + // printf("s_dc_du_minus[%d] = \n", i*49); + // printMat(&s_dc_du_minus[i*49],7); + // } + // } + __syncthreads(); + + // for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*O_R; ind += blockDim.x*blockDim.y){ + // s_di_du[686 + ind] = (s_dc_du_plus[ind] - s_dc_du_minus[ind])/ (2* eps); + // } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[686 +ind] = (s_dc_du_plus[49 +ind] - s_dc_du_minus[49 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[686 +49 + ind] = (s_dc_du_plus[147 +ind] - s_dc_du_minus[147 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[686 +98+ ind] = (s_dc_du_plus[245+ind] - s_dc_du_minus[245+ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[686 +147 + ind] = (s_dc_du_plus[343 +ind] - s_dc_du_minus[343 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[686 +196 + ind] = (s_dc_du_plus[441 +ind] - s_dc_du_minus[441 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[686 +245 + ind] = (s_dc_du_plus[539 +ind] - s_dc_du_minus[539 +ind])/ (eps* 2); + // } + } + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R; ind += blockDim.x*blockDim.y){ + // if (ind % 7 != 0){ + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("ind = %d and s_dc_du_plus[ind] = %f and s_dc_du_minus[ind] = %f \n", ind, s_dc_du_plus[ind], s_dc_du_minus[ind]); + // } + s_di_du[686 +294+ ind] = (s_dc_du_plus[637+ind] - s_dc_du_minus[ 637+ind])/ (eps* 2); + // } + } + + __syncthreads(); + __syncthreads(); + + // solves for dM_dq + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ + qqd_plus[ind] += eps; + + load_update_XImats_helpers(s_XImats_plus, &qqd_plus[0], d_robotModel, s_temp); + // __syncthreads(); + + // grid::direct_minv_inner(&s_Minv_plus[ind*49], &qqd_plus[0], s_XImats, s_temp); + crba_inner(&s_M_plus[ind*49], &qqd_plus[0], &qqd_plus[7], s_XImats_plus, s_temp, gravity); + // __syncthreads(); + + + qqd_minus[ind] -= (2*eps); + load_update_XImats_helpers(s_XImats_minus, &qqd_minus[0], d_robotModel, s_temp); + // __syncthreads(); + + // grid::direct_minv_inner(&s_Minv_minus[ind*49], &qqd_plus[0], s_XImats, s_temp); + crba_inner(&s_M_minus[ind*49], &qqd_minus[0], &qqd_minus[7], s_XImats_minus, s_temp, gravity); + // __syncthreads(); + + + qqd_minus[ind] += eps; + + } + __syncthreads(); + + // __syncthreads(); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + + // for(int i=0; i<7; i++){ + // printf("s_M_plus[%d] = \n", i*49); + // printMat(&s_M_plus[i*49],7); + // printf("s_M_minus[%d] = \n", i*49); + // printMat(&s_M_minus[i*49],7); + // } + // } + // __syncthreads(); + + + for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*O_R; ind += blockDim.x*blockDim.y){ + s_di_du[1029 + ind] = (s_M_plus[ind] - s_M_minus[ind])/ (2* eps); + // if(threadIdx.x == 0 && blockIdx.x == 0){ + // printf("s_di_du[1029 + ind]%d = %f bc s_M_plus[ind] = %f and s_M_minus[ind] = %f\n", ind,s_di_du[1029 +ind], s_M_plus[ind], s_M_minus[ind]); + // } + + } + + if(threadIdx.x == 0 && blockIdx.x == 0){ + for(int i=0; i<28; i++){ + printf("s_di_du %d\n", i); + printMat(&s_di_du[49*i],7); + } + } + __syncthreads(); + + load_update_XImats_helpers(s_XImats, &s_q_qd_tau[0], d_robotModel, s_temp); + T *s_df_dq = s_df_du; T *s_df_dqd = &s_df_du[49]; T *s_df_tau = &s_df_du[98]; + direct_minv_inner(s_Minv, &s_q_qd_tau[0], s_XImats, s_temp); + forward_dynamics_finish(s_qdd, &s_q_qd_tau[14], s_temp, s_Minv); + fdsva_inner(s_df_du, &s_q_qd_tau[0], &s_q_qd_tau[7], s_qdd, &s_q_qd_tau[14], s_XImats, s_temp, gravity); + + __syncthreads(); + T *s_dm_dq = &s_di_du[1029]; + + + T *dM_dqxfd_dq = s_temp; + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ + int page = ind / 49; + int row = ind % 7; + int col = ind % 49 / 7; + dM_dqxfd_dq[ind] = dot_prod(&s_dm_dq[49*page + row], &s_df_dq[7*col]); + } + __syncthreads(); + T *rot_dM_dqxfd_dqd = &s_temp[0]; + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ + int page = 0; + int row = ind % 7; + int col = ind / 7; + rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ + int page = 1; + int row = ind % 7; + int col = ind / 7; + rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ + int page = 2; + int row = ind % 7; + int col = ind / 7; + rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ + int page = 3; + int row = ind % 7; + int col = ind / 7; + rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ + int page = 4; + int row = ind % 7; + int col = ind / 7; + rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ + int page = 5; + int row = ind % 7; + int col = ind / 7; + rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ + int page = 6; + int row = ind % 7; + int col = ind / 7; + rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ + s_df2[ind] = s_di_du[ind] + dM_dqxfd_dq[ind] + rot_dM_dqxfd_dqd[ind]; + } + __syncthreads(); + T *dM_dqxfd_dqd = s_temp; + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ + int page = ind / 49; + int row = ind % 7; + int col = ind % 49 / 7; + dM_dqxfd_dqd[ind] = dot_prod(&s_dm_dq[49*page + row], &s_df_dqd[7*col]); + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ + s_df2[343+ ind] = s_di_du[343 + ind] + dM_dqxfd_dqd[ind]; + s_df2[686+ ind] = s_di_du[686 + ind]; + } + __syncthreads(); + s_Minv[1] = s_Minv[7]; + s_Minv[2] = s_Minv[14]; + s_Minv[9] = s_Minv[15]; + s_Minv[3] = s_Minv[21]; + s_Minv[10] = s_Minv[22]; + s_Minv[17] = s_Minv[23]; + s_Minv[4] = s_Minv[28]; + s_Minv[11] = s_Minv[29]; + s_Minv[18] = s_Minv[30]; + s_Minv[25] = s_Minv[31]; + s_Minv[5] = s_Minv[35]; + s_Minv[12] = s_Minv[36]; + s_Minv[19] = s_Minv[37]; + s_Minv[26] = s_Minv[38]; + s_Minv[33] = s_Minv[39]; + s_Minv[6] = s_Minv[42]; + s_Minv[13] = s_Minv[43]; + s_Minv[20] = s_Minv[44]; + s_Minv[27] = s_Minv[45]; + s_Minv[34] = s_Minv[46]; + s_Minv[41] = s_Minv[47]; + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ + int page = ind / 49; + int row = ind % 7; + int col = ind % 49 / 7; + s_df2[1029+ ind] = dot_prod(&s_dm_dq[49*page + row], &s_Minv[7*col]); + } + __syncthreads(); + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 1372; ind += blockDim.x*blockDim.y){ + int page = ind / 49; + int row = ind % 7; + int col = ind % 49 / 7; + s_df2[ind] = dot_prod(&s_Minv[row], &s_df2[49*page + 7*col]); + s_df2[ind] *= (-1); + } + __syncthreads(); + + + __syncthreads(); + if(threadIdx.x == 0 && blockIdx.x == 0){ + + for(int i=0; i<28; i++){ + if (i == 0){ + printf("solving for fd_dq \n"); + } + if (i == 7){ + printf("solving for fd_dqd \n"); + } + if (i == 14){ + printf("solving for fd_dqd_dq \n"); + } + if (i == 21){ + printf("solving for fd_dtau_dq \n"); + } + printf("s_df2[%d] = \n", i*49); + printMat(&s_df2[i*49],7); + } + } + +} + +// template +// __device__ void finite_diff_for_ddp(T *s_q_qd_tau, T *s_df2, T eps, const grid::robotModel *d_robotModel, const T gravity){ +// __shared__ T s_Minv[49]; +// __shared__ T s_qdd[49]; +// __shared__ T s_dc_du_plus[98*7]; +// __shared__ T s_vaf_plus[84]; +// __shared__ T s_qdd1_plus[7]; +// __shared__ T s_M_plus[49*7]; +// __shared__ T s_dc_du_minus[98*7]; +// __shared__ T s_vaf_minus[84]; +// __shared__ T s_qdd1_minus[7]; +// __shared__ T s_M_minus[49*7]; +// __shared__ T s_di_du[1372]; +// __shared__ T s_df_du[147]; +// extern __shared__ T s_XITemp[]; T *s_XImats = s_XITemp; T *s_temp = &s_XITemp[504]; + +// // T *s_di_dqq = s_di_du; T *s_di_dqdqd = &s_di_du[343]; T *s_di_dqqd = &s_di_du[686]; T *s_dm_dq = &s_di_du[1029]; +// // grid::load_update_XImats_helpers(s_XImats, &s_q_qd_tau[0], d_robotModel, s_temp); +// // T *s_df_dq = s_df_du; T *s_df_dqd = &s_df_du[49]; T *s_df_tau = &s_df_du[98]; +// // grid::direct_minv_inner(s_Minv, &s_q_qd_tau[0], s_XImats, s_temp); +// // grid::forward_dynamics_finish(s_qdd, &s_q_qd_tau[14], s_temp, s_Minv); +// // grid::fdsva_inner(s_df_du, &s_q_qd_tau[0], &s_q_qd_tau[7], s_qdd, &s_q_qd_tau[14], s_XImats, s_temp, gravity); + +// // fd of idsva +// T *s_tau = &s_q_qd_tau[2 * O_R]; +// T *qqd_plus = s_q_qd_tau; +// T *qqd_minus = s_q_qd_tau; + +// // solves for di_dq +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ +// qqd_plus[ind] += eps; + +// grid::load_update_XImats_helpers(s_XImats, &qqd_plus[0], d_robotModel, s_temp); +// __syncthreads(); + +// inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_XImats, &s_temp[7], gravity); +// // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); +// forward_dynamics_finish(s_qdd1_plus, s_tau, s_temp, s_Minv); +// // if(threadIdx.x == 0 && blockIdx.x == 0){ +// // printf("s_Minv \n"); +// // printMat(&s_Minv[0],7); + +// // } +// inverse_dynamics_inner_vaf(s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_qdd1_minus, s_XImats, s_temp, gravity); +// inverse_dynamics_gradient_inner(&s_dc_du_plus[49*ind], &qqd_plus[0], &qqd_plus[7], s_vaf_plus, s_XImats, s_temp, gravity); + +// qqd_minus[ind] -= (2*eps); +// grid::load_update_XImats_helpers(s_XImats, &qqd_minus[0], d_robotModel, s_temp); +// __syncthreads(); + +// inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_minus[0], &qqd_minus[7], s_XImats, &s_temp[7], gravity); +// // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); +// forward_dynamics_finish(s_qdd1_minus, s_tau, s_temp, s_Minv); +// inverse_dynamics_inner_vaf(s_vaf_minus, &qqd_minus[0], &qqd_minus[7], s_qdd1_minus, s_XImats, s_temp, gravity); +// inverse_dynamics_gradient_inner(&s_dc_du_minus[49*ind], &qqd_minus[0], &qqd_minus[7], s_vaf_minus, s_XImats, s_temp, gravity); + + +// qqd_minus[ind] += eps; + +// } +// __syncthreads(); +// __syncthreads(); +// if(threadIdx.x == 0 && blockIdx.x == 0){ + +// for(int i=0; i<7; i++){ +// printf("s_dc_du_plus[%d] = \n", i*49); +// printMat(&s_dc_du_plus[i*49],7); +// printf("s_dc_du_minus[%d] = \n", i*49); +// printMat(&s_dc_du_minus[i*49],7); +// } +// } + +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*O_R; ind += blockDim.x*blockDim.y){ +// s_di_du[ind] = (s_dc_du_plus[ind] - s_dc_du_minus[ind])/ (2* eps); +// } + +// __syncthreads(); +// // solves for di_dqd +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ +// qqd_plus[7 + ind] += eps; + +// grid::load_update_XImats_helpers(s_XImats, &qqd_plus[0], d_robotModel, s_temp); +// __syncthreads(); + +// inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_XImats, &s_temp[7], gravity); +// // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); +// forward_dynamics_finish(s_qdd1_plus, s_tau, s_temp, s_Minv); +// inverse_dynamics_inner_vaf(s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_qdd1_minus, s_XImats, s_temp, gravity); +// inverse_dynamics_gradient_inner(&s_dc_du_plus[49*ind], &qqd_plus[0], &qqd_plus[7], s_vaf_plus, s_XImats, s_temp, gravity); + +// qqd_minus[7 + ind] -= (2*eps); +// grid::load_update_XImats_helpers(s_XImats, &qqd_minus[0], d_robotModel, s_temp); +// __syncthreads(); + +// inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_minus[0], &qqd_minus[7], s_XImats, &s_temp[7], gravity); +// // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); +// forward_dynamics_finish(s_qdd1_minus, s_tau, s_temp, s_Minv); +// inverse_dynamics_inner_vaf(s_vaf_minus, &qqd_minus[0], &qqd_minus[7], s_qdd1_minus, s_XImats, s_temp, gravity); +// inverse_dynamics_gradient_inner(&s_dc_du_minus[49*ind], &qqd_minus[0], &qqd_minus[7], s_vaf_minus, s_XImats, s_temp, gravity); + + +// qqd_minus[7 + ind] += eps; + +// } +// __syncthreads(); +// __syncthreads(); +// // if(threadIdx.x == 0 && blockIdx.x == 0){ + +// // for(int i=0; i<7; i++){ +// // printf("s_dc_du_plus[%d] = \n", i*49); +// // printMat(&s_dc_du_plus[i*49],7); +// // printf("s_dc_du_minus[%d] = \n", i*49); +// // printMat(&s_dc_du_minus[i*49],7); +// // } +// // } + +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*O_R; ind += blockDim.x*blockDim.y){ +// s_di_du[343 + ind] = (s_dc_du_plus[ind] - s_dc_du_minus[ind])/ (2* eps); +// } +// __syncthreads(); + +// // solves for di_dq_dqd + +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ +// qqd_plus[ind] += eps; +// qqd_plus[7 + ind] += eps; + +// grid::load_update_XImats_helpers(s_XImats, &qqd_plus[0], d_robotModel, s_temp); +// __syncthreads(); + +// inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_XImats, &s_temp[7], gravity); +// // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); +// forward_dynamics_finish(s_qdd1_plus, s_tau, s_temp, s_Minv); +// inverse_dynamics_inner_vaf(s_vaf_plus, &qqd_plus[0], &qqd_plus[7], s_qdd1_minus, s_XImats, s_temp, gravity); +// inverse_dynamics_gradient_inner(&s_dc_du_plus[49*ind], &qqd_plus[0], &qqd_plus[7], s_vaf_plus, s_XImats, s_temp, gravity); + +// qqd_minus[ind] -= (2*eps); +// qqd_minus[7 + ind] -= (2*eps); +// grid::load_update_XImats_helpers(s_XImats, &qqd_minus[0], d_robotModel, s_temp); +// __syncthreads(); + +// inverse_dynamics_inner(s_temp, s_vaf_plus, &qqd_minus[0], &qqd_minus[7], s_XImats, &s_temp[7], gravity); +// // direct_minv_inner(s_Minv, &qqd_plus[0], s_XImats, s_temp); +// forward_dynamics_finish(s_qdd1_minus, s_tau, s_temp, s_Minv); +// inverse_dynamics_inner_vaf(s_vaf_minus, &qqd_minus[0], &qqd_minus[7], s_qdd1_minus, s_XImats, s_temp, gravity); +// inverse_dynamics_gradient_inner(&s_dc_du_minus[49*ind], &qqd_minus[0], &qqd_minus[7], s_vaf_minus, s_XImats, s_temp, gravity); + + +// qqd_minus[ind] += eps; +// qqd_minus[7 + ind] += eps; + +// } +// __syncthreads(); +// // __syncthreads(); +// // if(threadIdx.x == 0 && blockIdx.x == 0){ + +// // for(int i=0; i<7; i++){ +// // printf("s_dc_du_plus[%d] = \n", i*49); +// // printMat(&s_dc_du_plus[i*49],7); +// // printf("s_dc_du_minus[%d] = \n", i*49); +// // printMat(&s_dc_du_minus[i*49],7); +// // } +// // } +// __syncthreads(); + +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*O_R; ind += blockDim.x*blockDim.y){ +// s_di_du[686 + ind] = (s_dc_du_plus[ind] - s_dc_du_minus[ind])/ (2* eps); +// } +// __syncthreads(); + +// // solves for dM_dq +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < I_N; ind += blockDim.x*blockDim.y){ +// qqd_plus[ind] += eps; + +// grid::load_update_XImats_helpers(s_XImats, &qqd_plus[0], d_robotModel, s_temp); +// __syncthreads(); + +// // grid::direct_minv_inner(&s_Minv_plus[ind*49], &qqd_plus[0], s_XImats, s_temp); +// crba_inner(&s_M_plus[ind*49], &qqd_plus[0], &qqd_plus[7], s_XImats, s_temp, gravity); +// __syncthreads(); + + +// qqd_minus[ind] -= (2*eps); +// grid::load_update_XImats_helpers(s_XImats, &qqd_minus[0], d_robotModel, s_temp); +// __syncthreads(); + +// // grid::direct_minv_inner(&s_Minv_minus[ind*49], &qqd_plus[0], s_XImats, s_temp); +// crba_inner(&s_M_minus[ind*49], &qqd_minus[0], &qqd_minus[7], s_XImats, s_temp, gravity); +// __syncthreads(); + + +// qqd_minus[ind] += eps; + +// } +// __syncthreads(); +// if(threadIdx.x == 0 && blockIdx.x == 0){ + +// for(int i=0; i<7; i++){ +// printf("s_M_plus[%d] = \n", i*49); +// printMat(&s_M_plus[i*49],7); +// printf("s_M_minus[%d] = \n", i*49); +// printMat(&s_M_minus[i*49],7); +// } +// } +// __syncthreads(); + + +// for (int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < O_R*O_R*O_R; ind += blockDim.x*blockDim.y){ +// s_di_du[1029 + ind] = (s_M_plus[ind] - s_M_minus[ind])/ (2* eps); +// if(threadIdx.x == 0 && blockIdx.x == 0){ +// printf("s_di_du[1029 + ind]%d = %f bc s_M_plus[ind] = %f and s_M_minus[ind] = %f\n", ind,s_di_du[1029 +ind], s_M_plus[ind], s_M_minus[ind]); + +// } + +// } + +// if(threadIdx.x == 0 && blockIdx.x == 0){ +// for(int i=0; i<28; i++){ +// printf("s_di_du %d\n", i); +// printMat(&s_di_du[49*i],7); +// } +// } +// __syncthreads(); + +// load_update_XImats_helpers(s_XImats, &s_q_qd_tau[0], d_robotModel, s_temp); +// T *s_df_dq = s_df_du; T *s_df_dqd = &s_df_du[49]; T *s_df_tau = &s_df_du[98]; +// direct_minv_inner(s_Minv, &s_q_qd_tau[0], s_XImats, s_temp); +// forward_dynamics_finish(s_qdd, &s_q_qd_tau[14], s_temp, s_Minv); +// fdsva_inner(s_df_du, &s_q_qd_tau[0], &s_q_qd_tau[7], s_qdd, &s_q_qd_tau[14], s_XImats, s_temp, gravity); + +// __syncthreads(); +// T *s_dm_dq = &s_di_du[1029]; + + +// T *dM_dqxfd_dq = s_temp; +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ +// int page = ind / 49; +// int row = ind % 7; +// int col = ind % 49 / 7; +// dM_dqxfd_dq[ind] = dot_prod(&s_dm_dq[49*page + row], &s_df_dq[7*col]); +// } +// __syncthreads(); +// T *rot_dM_dqxfd_dqd = &s_temp[0]; +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ +// int page = 0; +// int row = ind % 7; +// int col = ind / 7; +// rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ +// int page = 1; +// int row = ind % 7; +// int col = ind / 7; +// rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ +// int page = 2; +// int row = ind % 7; +// int col = ind / 7; +// rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ +// int page = 3; +// int row = ind % 7; +// int col = ind / 7; +// rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ +// int page = 4; +// int row = ind % 7; +// int col = ind / 7; +// rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ +// int page = 5; +// int row = ind % 7; +// int col = ind / 7; +// rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49; ind += blockDim.x*blockDim.y){ +// int page = 6; +// int row = ind % 7; +// int col = ind / 7; +// rot_dM_dqxfd_dqd[49*col + row + 7*page] = dM_dqxfd_dq[49*page + ind]; +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ +// s_df2[ind] = s_di_du[ind] + dM_dqxfd_dq[ind] + rot_dM_dqxfd_dqd[ind]; +// } +// __syncthreads(); +// T *dM_dqxfd_dqd = s_temp; +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ +// int page = ind / 49; +// int row = ind % 7; +// int col = ind % 49 / 7; +// dM_dqxfd_dqd[ind] = dot_prod(&s_dm_dq[49*page + row], &s_df_dqd[7*col]); +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ +// s_df2[343+ ind] = s_di_du[343 + ind] + dM_dqxfd_dqd[ind]; +// s_df2[686+ ind] = s_di_du[686 + ind]; +// } +// __syncthreads(); +// s_Minv[1] = s_Minv[7]; +// s_Minv[2] = s_Minv[14]; +// s_Minv[9] = s_Minv[15]; +// s_Minv[3] = s_Minv[21]; +// s_Minv[10] = s_Minv[22]; +// s_Minv[17] = s_Minv[23]; +// s_Minv[4] = s_Minv[28]; +// s_Minv[11] = s_Minv[29]; +// s_Minv[18] = s_Minv[30]; +// s_Minv[25] = s_Minv[31]; +// s_Minv[5] = s_Minv[35]; +// s_Minv[12] = s_Minv[36]; +// s_Minv[19] = s_Minv[37]; +// s_Minv[26] = s_Minv[38]; +// s_Minv[33] = s_Minv[39]; +// s_Minv[6] = s_Minv[42]; +// s_Minv[13] = s_Minv[43]; +// s_Minv[20] = s_Minv[44]; +// s_Minv[27] = s_Minv[45]; +// s_Minv[34] = s_Minv[46]; +// s_Minv[41] = s_Minv[47]; +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 343; ind += blockDim.x*blockDim.y){ +// int page = ind / 49; +// int row = ind % 7; +// int col = ind % 49 / 7; +// s_df2[1029+ ind] = dot_prod(&s_dm_dq[49*page + row], &s_Minv[7*col]); +// } +// __syncthreads(); +// for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 1372; ind += blockDim.x*blockDim.y){ +// int page = ind / 49; +// int row = ind % 7; +// int col = ind % 49 / 7; +// s_df2[ind] = dot_prod(&s_Minv[row], &s_df2[49*page + 7*col]); +// s_df2[ind] *= (-1); +// } +// __syncthreads(); + + +// // __syncthreads(); +// // if(threadIdx.x == 0 && blockIdx.x == 0){ + +// // for(int i=0; i<28; i++){ +// // printf("s_df2[%d] = \n", i*49); +// // printMat(&s_df2[i*49],7); +// // } +// // } + +// } \ No newline at end of file diff --git a/test/finite_diff/finite_diff_tester.cu b/test/finite_diff/finite_diff_tester.cu new file mode 100644 index 0000000..17eb403 --- /dev/null +++ b/test/finite_diff/finite_diff_tester.cu @@ -0,0 +1,118 @@ +#include +#include "finite_diff.cuh" +// #include "/home/a2rlab4/GRiDBenchmarks/GRiD/grid.cuh" +#include +#include +#include +#include +#include + +using namespace grid; + +int example_func(float *input, float *func_output, const grid::robotModel *d_robotModel, const float gravity) { + + // grid::aba(func_output, va, input, &input[7], &input[14], &input[21], temp, gravity); + dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); + grid::gridData *hd_data = grid::init_gridData(); + + hd_data->h_q_qd_u[0] = 1.24; + hd_data->h_q_qd_u[1] = 0.13; + hd_data->h_q_qd_u[2] = -0.17; + hd_data->h_q_qd_u[3] = 1.33; + hd_data->h_q_qd_u[4] = 0.22; + hd_data->h_q_qd_u[5] = -0.56; + hd_data->h_q_qd_u[6] = 0.99; + + hd_data->h_q_qd_u[7] = 0; + hd_data->h_q_qd_u[8] = 0; + hd_data->h_q_qd_u[9] = 0; + hd_data->h_q_qd_u[10] = 0; + hd_data->h_q_qd_u[11] = 0; + hd_data->h_q_qd_u[12] = 0; + hd_data->h_q_qd_u[13] = 0; + + grid::aba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + return 0; + +} + +template +__global__ void fd_kernel(T *d_q_qd_u, T *d_fdoutput, T eps, const robotModel *d_robotModel, const T gravity) { + __shared__ T s_fdoutput[49*7*4]; + // __shared__ T s_fdoutput[98]; + __shared__ T s_qdd_plus[O_R]; + __shared__ T s_qdd_minus[O_R]; + __shared__ T s_q_qd_tau[3*O_R]; + + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 3*O_R; ind += blockDim.x*blockDim.y){ + s_q_qd_tau[ind] = d_q_qd_u[ind]; + } + // finite_diff_aba(s_q_qd_tau, s_fdoutput, eps, d_robotModel, gravity); + finite_diff_for_ddp(s_q_qd_tau, s_fdoutput, eps, d_robotModel, gravity); + // finite_diff(input, output, example_func(input, func_output, d_robotModel, gravity), eps); + + for(int ind = threadIdx.x + threadIdx.y*blockDim.x; ind < 49*7*4; ind += blockDim.x*blockDim.y){ + d_fdoutput[ind] = s_fdoutput[ind]; + } + __syncthreads(); +} + +int main() { + grid::gridData *hd_data = grid::init_gridData(); + grid::robotModel *d_robotModel = grid::init_robotModel(); + float gravity = static_cast(9.81); + // dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); + + // q + hd_data->h_q_qd_u[0] = 1.24; + hd_data->h_q_qd_u[1] = 0.13; + hd_data->h_q_qd_u[2] = -0.17; + hd_data->h_q_qd_u[3] = 1.33; + hd_data->h_q_qd_u[4] = 0.22; + hd_data->h_q_qd_u[5] = -0.56; + hd_data->h_q_qd_u[6] = 0.99; + //qd + hd_data->h_q_qd_u[7] = 0.0; + hd_data->h_q_qd_u[8] = 0.0; + hd_data->h_q_qd_u[9] = 0.0; + hd_data->h_q_qd_u[10] = 0.0; + hd_data->h_q_qd_u[11] = 0.0; + hd_data->h_q_qd_u[12] = 0.0; + hd_data->h_q_qd_u[13] = 0.0; + + //tau + hd_data->h_q_qd_u[14] = 0.0; + hd_data->h_q_qd_u[15] = 0.0; + hd_data->h_q_qd_u[16] = 0.0; + hd_data->h_q_qd_u[17] = 0.0; + hd_data->h_q_qd_u[18] = 0.0; + hd_data->h_q_qd_u[19] = 0.0; + hd_data->h_q_qd_u[20] = 0.0; + + gpuErrchk(cudaMemcpyAsync(hd_data->d_q_qd_u,hd_data->h_q_qd_u,21*sizeof(float),cudaMemcpyHostToDevice,streams[0])); + gpuErrchk(cudaDeviceSynchronize()); + + float eps = 0.1; + // T *input, T *output, T (*func)(T*), T eps, const robotModel *d_robotModel, const float gravity, const int NUM_TIMESTEPS) + // fd_kernel<<<1,1>>>(input, func_output, output, eps, d_robotModel, gravity); + // printf("in main to kernel\n"); + // fd_kernel<<<1,1,grid::ABA_DYNAMIC_SHARED_MEM_COUNT*sizeof(float)>>>(hd_data->d_q_qd_u, hd_data->d_fdoutput, eps, d_robotModel, gravity); + fd_kernel<<<1,1,grid:: ID_DU_DYNAMIC_SHARED_MEM_COUNT*sizeof(float)>>>(hd_data->d_q_qd_u, hd_data->d_fdoutput, eps, d_robotModel, gravity); + gpuErrchk(cudaDeviceSynchronize()); + + gpuErrchk(cudaMemcpy(hd_data->h_fdoutput,hd_data->d_fdoutput,49*7*4*sizeof(float),cudaMemcpyDeviceToHost)); + gpuErrchk(cudaDeviceSynchronize()); + + + gpuErrchk(cudaFree(d_robotModel)); + gpuErrchk(cudaFree(hd_data->d_q_qd_u)); + gpuErrchk(cudaFree(hd_data->d_fdoutput)); + free(hd_data->h_fdoutput); + free(hd_data->h_q_qd_u); + for(int i=0; i<3; i++){gpuErrchk(cudaStreamDestroy(streams[i]));} free(streams); + + return 0; + +}; \ No newline at end of file diff --git a/test/test_aba.cu b/test/test_aba.cu index d6381e0..ccf4e85 100644 --- a/test/test_aba.cu +++ b/test/test_aba.cu @@ -1,11 +1,58 @@ -#include "grid.cuh" +// #include "grid.cuh" +#include "/home/a2rlab4/GRiDBenchmarks/GRiD/grid.cuh" #include #include #include #include #include -int main() { + +int iiwa(){ + grid::gridData *hd_data = grid::init_gridData(); + grid::robotModel *d_robotModel = grid::init_robotModel();; + const int num_timesteps = 1; + float gravity = static_cast(9.81); + dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); + hd_data->h_q_qd_u[0] = 1.24; + hd_data->h_q_qd_u[1] = 0.13; + hd_data->h_q_qd_u[2] = -0.17; + hd_data->h_q_qd_u[3] = 1.33; + hd_data->h_q_qd_u[4] = 0.22; + hd_data->h_q_qd_u[5] = -0.56; + hd_data->h_q_qd_u[6] = 0.99; + + hd_data->h_q_qd_u[7] = 0; + hd_data->h_q_qd_u[8] = 0; + hd_data->h_q_qd_u[9] = 0; + hd_data->h_q_qd_u[10] = 0; + hd_data->h_q_qd_u[11] = 0; + hd_data->h_q_qd_u[12] = 0; + hd_data->h_q_qd_u[13] = 0; + + hd_data->h_q_qd_u[14] = 0; + hd_data->h_q_qd_u[15] = 0; + hd_data->h_q_qd_u[16] = 0; + hd_data->h_q_qd_u[17] = 0; + hd_data->h_q_qd_u[18] = 0; + hd_data->h_q_qd_u[19] = 0; + hd_data->h_q_qd_u[20] = 0; + + gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); + gpuErrchk(cudaDeviceSynchronize()); + + printf("q,qd,u\n"); + printMat(hd_data->h_q_qd_u,1); + printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); + printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + + printf("aba\n"); + grid::aba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + printMat(hd_data->h_qdd,1); + return 0; +} + +int hyq(){ grid::gridData *hd_data = grid::init_gridData(); grid::robotModel *d_robotModel = grid::init_robotModel();; const int num_timesteps = 1; @@ -19,15 +66,73 @@ int main() { hd_data->h_q_qd_u[4] = 0.7; hd_data->h_q_qd_u[5] = 0.6; hd_data->h_q_qd_u[6] = 0.4; + hd_data->h_q_qd_u[7] = 0.5; + hd_data->h_q_qd_u[8] = 0.3; + hd_data->h_q_qd_u[9] = 0.8; + hd_data->h_q_qd_u[10] = 0.1; + hd_data->h_q_qd_u[11] = 0.2; - hd_data->h_q_qd_u[7] = 0; - hd_data->h_q_qd_u[8] = 0; - hd_data->h_q_qd_u[9] = 0; - hd_data->h_q_qd_u[10] = 0; + hd_data->h_q_qd_u[12] = 0.1; + hd_data->h_q_qd_u[13] = 0.1; + hd_data->h_q_qd_u[14] = 0.1; + hd_data->h_q_qd_u[15] = 0.1; + hd_data->h_q_qd_u[16] = 0.1; + hd_data->h_q_qd_u[17] = 0.1; + hd_data->h_q_qd_u[18] = 0.1; + hd_data->h_q_qd_u[19] = 0.1; + hd_data->h_q_qd_u[20] = 0.1; + hd_data->h_q_qd_u[21] = 0.1; + hd_data->h_q_qd_u[22] = 0.1; + + hd_data->h_q_qd_u[23] = 0.1; + hd_data->h_q_qd_u[24] = 0.1; + hd_data->h_q_qd_u[25] = 0.1; + hd_data->h_q_qd_u[26] = 0.1; + hd_data->h_q_qd_u[27] = 0.1; + hd_data->h_q_qd_u[28] = 0.1; + hd_data->h_q_qd_u[29] = 0.1; + hd_data->h_q_qd_u[30] = 0.1; + hd_data->h_q_qd_u[31] = 0.1; + hd_data->h_q_qd_u[32] = 0.1; + + + gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); + gpuErrchk(cudaDeviceSynchronize()); + + printf("q,qd,u\n"); + printMat(hd_data->h_q_qd_u,1); + printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); + printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + + printf("aba\n"); + grid::aba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + printMat(hd_data->h_qdd,1); + + return 0; +} + +int hyq_missing_limb(){ + grid::gridData *hd_data = grid::init_gridData(); + grid::robotModel *d_robotModel = grid::init_robotModel();; + const int num_timesteps = 1; + float gravity = static_cast(9.81); + dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); + hd_data->h_q_qd_u[0] = 0.8; + hd_data->h_q_qd_u[1] = 0.3; + hd_data->h_q_qd_u[2] = 1; + hd_data->h_q_qd_u[3] = 0.2; + hd_data->h_q_qd_u[4] = 0.7; + hd_data->h_q_qd_u[5] = 0.6; + hd_data->h_q_qd_u[6] = 0.4; + hd_data->h_q_qd_u[7] = 0.4; + hd_data->h_q_qd_u[8] = 0.3; + hd_data->h_q_qd_u[9] = 0.8; + hd_data->h_q_qd_u[10] = 0.1; + hd_data->h_q_qd_u[11] = 0; hd_data->h_q_qd_u[12] = 0; hd_data->h_q_qd_u[13] = 0; - hd_data->h_q_qd_u[14] = 0; hd_data->h_q_qd_u[15] = 0; hd_data->h_q_qd_u[16] = 0; @@ -35,6 +140,20 @@ int main() { hd_data->h_q_qd_u[18] = 0; hd_data->h_q_qd_u[19] = 0; hd_data->h_q_qd_u[20] = 0; + hd_data->h_q_qd_u[21] = 0; + + hd_data->h_q_qd_u[22] = 0; + hd_data->h_q_qd_u[23] = 0; + hd_data->h_q_qd_u[24] = 0; + hd_data->h_q_qd_u[25] = 0; + hd_data->h_q_qd_u[26] = 0; + hd_data->h_q_qd_u[27] = 0; + hd_data->h_q_qd_u[28] = 0; + hd_data->h_q_qd_u[29] = 0; + hd_data->h_q_qd_u[30] = 0; + hd_data->h_q_qd_u[31] = 0; + hd_data->h_q_qd_u[32] = 0; + gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); gpuErrchk(cudaDeviceSynchronize()); @@ -47,5 +166,178 @@ int main() { printf("aba\n"); grid::aba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); printMat(hd_data->h_qdd,1); + return 0; -} \ No newline at end of file +} + +int atlas(){ + grid::gridData *hd_data = grid::init_gridData(); + grid::robotModel *d_robotModel = grid::init_robotModel();; + const int num_timesteps = 1; + float gravity = static_cast(9.81); + dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); + hd_data->h_q_qd_u[0] = 0.8; + hd_data->h_q_qd_u[1] = 0.3; + hd_data->h_q_qd_u[2] = 1; + hd_data->h_q_qd_u[3] = 0.2; + hd_data->h_q_qd_u[4] = 0.7; + hd_data->h_q_qd_u[5] = 0.6; + hd_data->h_q_qd_u[6] = 0.4; + hd_data->h_q_qd_u[7] = 0.4; + hd_data->h_q_qd_u[8] = 0.3; + hd_data->h_q_qd_u[9] = 0.8; + hd_data->h_q_qd_u[10] = 0.1; + hd_data->h_q_qd_u[11] = 0.2; + hd_data->h_q_qd_u[12] = 0.3; + hd_data->h_q_qd_u[13] = 0.5; + hd_data->h_q_qd_u[14] = 0.4; + hd_data->h_q_qd_u[15] = 0.37; + hd_data->h_q_qd_u[16] = 0.9; + hd_data->h_q_qd_u[17] = 0.53; + hd_data->h_q_qd_u[18] = 0.2; + hd_data->h_q_qd_u[19] = 1; + hd_data->h_q_qd_u[20] = 0.6; + hd_data->h_q_qd_u[21] = 0.8; + hd_data->h_q_qd_u[22] = 0.9; + hd_data->h_q_qd_u[23] = 0.3; + hd_data->h_q_qd_u[24] = 0.7; + hd_data->h_q_qd_u[25] = 0.44; + hd_data->h_q_qd_u[26] = 0.28; + hd_data->h_q_qd_u[27] = 0.6; + hd_data->h_q_qd_u[28] = 1; + hd_data->h_q_qd_u[29] = 0.1; + + hd_data->h_q_qd_u[60] = 0.1; + hd_data->h_q_qd_u[61] = 0.1; + hd_data->h_q_qd_u[62] = 0.1; + hd_data->h_q_qd_u[63] = 0.1; + hd_data->h_q_qd_u[64] = 0.1; + hd_data->h_q_qd_u[65] = 0.1; + hd_data->h_q_qd_u[66] = 0.1; + hd_data->h_q_qd_u[67] = 0.1; + hd_data->h_q_qd_u[68] = 0.1; + hd_data->h_q_qd_u[69] = 0.1; + hd_data->h_q_qd_u[70] = 0.1; + hd_data->h_q_qd_u[71] = 0.1; + hd_data->h_q_qd_u[72] = 0.1; + hd_data->h_q_qd_u[73] = 0.1; + hd_data->h_q_qd_u[74] = 0.1; + hd_data->h_q_qd_u[75] = 0.1; + hd_data->h_q_qd_u[76] = 0.1; + hd_data->h_q_qd_u[77] = 0.1; + hd_data->h_q_qd_u[78] = 0.1; + hd_data->h_q_qd_u[79] = 0.1; + hd_data->h_q_qd_u[80] = 0.1; + hd_data->h_q_qd_u[81] = 0.1; + hd_data->h_q_qd_u[82] = 0.1; + hd_data->h_q_qd_u[83] = 0.1; + hd_data->h_q_qd_u[84] = 0.1; + hd_data->h_q_qd_u[85] = 0.1; + hd_data->h_q_qd_u[86] = 0.1; + hd_data->h_q_qd_u[87] = 0.1; + hd_data->h_q_qd_u[88] = 0.1; + hd_data->h_q_qd_u[89] = 0.1; + + hd_data->h_q_qd_u[30] = 0.1; + hd_data->h_q_qd_u[31] = 0.1; + hd_data->h_q_qd_u[32] = 0.1; + hd_data->h_q_qd_u[33] = 0.1; + hd_data->h_q_qd_u[34] = 0.1; + hd_data->h_q_qd_u[35] = 0.1; + hd_data->h_q_qd_u[36] = 0.1; + hd_data->h_q_qd_u[37] = 0.1; + hd_data->h_q_qd_u[38] = 0.1; + hd_data->h_q_qd_u[39] = 0.1; + hd_data->h_q_qd_u[40] = 0.1; + hd_data->h_q_qd_u[41] = 0.1; + hd_data->h_q_qd_u[42] = 0.1; + hd_data->h_q_qd_u[43] = 0.1; + hd_data->h_q_qd_u[44] = 0.1; + hd_data->h_q_qd_u[45] = 0.1; + hd_data->h_q_qd_u[46] = 0.1; + hd_data->h_q_qd_u[47] = 0.1; + hd_data->h_q_qd_u[48] = 0.1; + hd_data->h_q_qd_u[49] = 0.1; + hd_data->h_q_qd_u[50] = 0.1; + hd_data->h_q_qd_u[51] = 0.1; + hd_data->h_q_qd_u[52] = 0.1; + hd_data->h_q_qd_u[53] = 0.1; + hd_data->h_q_qd_u[54] = 0.1; + hd_data->h_q_qd_u[55] = 0.1; + hd_data->h_q_qd_u[56] = 0.1; + hd_data->h_q_qd_u[57] = 0.1; + hd_data->h_q_qd_u[58] = 0.1; + hd_data->h_q_qd_u[59] = 0.1; + + + + gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); + gpuErrchk(cudaDeviceSynchronize()); + + printf("q,qd,u\n"); + printMat(hd_data->h_q_qd_u,1); + printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); + printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + + printf("aba\n"); + grid::aba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + printMat(hd_data->h_qdd,1); + + return 0; +} + +int main() { + //uncomment the line for the URDF you are using + + iiwa(); + //hyq(); + //hyq_missing_limb(); + // atlas(); + return 0; +} + +// int main() { +// grid::gridData *hd_data = grid::init_gridData(); +// grid::robotModel *d_robotModel = grid::init_robotModel();; +// const int num_timesteps = 1; +// float gravity = static_cast(9.81); +// dim3 dimms(grid::SUGGESTED_THREADS,1,1); +// cudaStream_t *streams = grid::init_grid(); +// hd_data->h_q_qd_u[0] = 0.8; +// hd_data->h_q_qd_u[1] = 0.3; +// hd_data->h_q_qd_u[2] = 1; +// hd_data->h_q_qd_u[3] = 0.2; +// hd_data->h_q_qd_u[4] = 0.7; +// hd_data->h_q_qd_u[5] = 0.6; +// hd_data->h_q_qd_u[6] = 0.4; + +// hd_data->h_q_qd_u[7] = 0; +// hd_data->h_q_qd_u[8] = 0; +// hd_data->h_q_qd_u[9] = 0; +// hd_data->h_q_qd_u[10] = 0; +// hd_data->h_q_qd_u[11] = 0; +// hd_data->h_q_qd_u[12] = 0; +// hd_data->h_q_qd_u[13] = 0; + +// hd_data->h_q_qd_u[14] = 0; +// hd_data->h_q_qd_u[15] = 0; +// hd_data->h_q_qd_u[16] = 0; +// hd_data->h_q_qd_u[17] = 0; +// hd_data->h_q_qd_u[18] = 0; +// hd_data->h_q_qd_u[19] = 0; +// hd_data->h_q_qd_u[20] = 0; + +// gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); +// gpuErrchk(cudaDeviceSynchronize()); + +// printf("q,qd,u\n"); +// printMat(hd_data->h_q_qd_u,1); +// printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); +// printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + +// printf("aba\n"); +// grid::aba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); +// printMat(hd_data->h_qdd,1); +// return 0; +// } \ No newline at end of file diff --git a/test/test_crba.cu b/test/test_crba.cu index aab45d7..a27c1c3 100644 --- a/test/test_crba.cu +++ b/test/test_crba.cu @@ -1,24 +1,183 @@ -#include "grid.cuh" +#include "grid.cuh" #include #include #include #include -#include #include -//run CodeGen to get grid.cuh -int main() { +int hyq(){ + grid::gridData *hd_data = grid::init_gridData(); + grid::robotModel *d_robotModel = grid::init_robotModel();; + //const int num_timesteps = 1; + float gravity = static_cast(9.81); + dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); + hd_data->h_q_qd_u[0] = 0.8; + hd_data->h_q_qd_u[1] = 0.3; + hd_data->h_q_qd_u[2] = 1; + hd_data->h_q_qd_u[3] = 0.2; + hd_data->h_q_qd_u[4] = 0.7; + hd_data->h_q_qd_u[5] = 0.6; + hd_data->h_q_qd_u[6] = 0.4; + hd_data->h_q_qd_u[7] = 0.5; + hd_data->h_q_qd_u[8] = 0.3; + hd_data->h_q_qd_u[9] = 0.8; + hd_data->h_q_qd_u[10] = 0.1; + hd_data->h_q_qd_u[11] = 0.2; - printf("buckle up and enjoy the ride \n"); + hd_data->h_q_qd_u[12] = 0.1; + hd_data->h_q_qd_u[13] = 0.1; + hd_data->h_q_qd_u[14] = 0.1; + hd_data->h_q_qd_u[15] = 0.1; + hd_data->h_q_qd_u[16] = 0.1; + hd_data->h_q_qd_u[17] = 0.1; + hd_data->h_q_qd_u[18] = 0.1; + hd_data->h_q_qd_u[19] = 0.1; + hd_data->h_q_qd_u[20] = 0.1; + hd_data->h_q_qd_u[21] = 0.1; + hd_data->h_q_qd_u[22] = 0.1; + + hd_data->h_q_qd_u[23] = 0.1; + hd_data->h_q_qd_u[24] = 0.1; + hd_data->h_q_qd_u[25] = 0.1; + hd_data->h_q_qd_u[26] = 0.1; + hd_data->h_q_qd_u[27] = 0.1; + hd_data->h_q_qd_u[28] = 0.1; + hd_data->h_q_qd_u[29] = 0.1; + hd_data->h_q_qd_u[30] = 0.1; + hd_data->h_q_qd_u[31] = 0.1; + hd_data->h_q_qd_u[32] = 0.1; + + /*hd_data->h_q_qd_u[11] = 0.2; + hd_data->h_q_qd_u[12] = 0.1; + hd_data->h_q_qd_u[13] = 0.1; + hd_data->h_q_qd_u[14] = 0.5; + hd_data->h_q_qd_u[15] = 0.1; + hd_data->h_q_qd_u[16] = 0.1; + hd_data->h_q_qd_u[17] = 0.1; + hd_data->h_q_qd_u[18] = 0.1; + hd_data->h_q_qd_u[19] = 0.1; + hd_data->h_q_qd_u[20] = 0.1; + hd_data->h_q_qd_u[21] = 0.1; + + hd_data->h_q_qd_u[22] = 0.2; + hd_data->h_q_qd_u[23] = 0.1; + hd_data->h_q_qd_u[24] = 0.1; + hd_data->h_q_qd_u[25] = 0.5; + hd_data->h_q_qd_u[26] = 0.1; + hd_data->h_q_qd_u[27] = 0.1; + hd_data->h_q_qd_u[28] = 0.1; + hd_data->h_q_qd_u[29] = 0.1; + hd_data->h_q_qd_u[30] = 0.1; + hd_data->h_q_qd_u[31] = 0.1; + hd_data->h_q_qd_u[32] = 0.1;*/ + + gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); + gpuErrchk(cudaDeviceSynchronize()); + + printf("q,qd,u\n"); + printMat(hd_data->h_q_qd_u,1); + printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); + printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + + printf("crba\n"); + grid::crba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + printf("M = \n"); + printMat(hd_data->h_M,1); + + return 0; +} +int hyq_missing_limb(){ grid::gridData *hd_data = grid::init_gridData(); - grid::robotModel *d_robotModel = grid::init_robotModel(); - const int num_timesteps = 1; + grid::robotModel *d_robotModel = grid::init_robotModel();; + //const int num_timesteps = 1; float gravity = static_cast(9.81); - //printf("gravity = %f", gravity); dim3 dimms(grid::SUGGESTED_THREADS,1,1); cudaStream_t *streams = grid::init_grid(); + hd_data->h_q_qd_u[0] = 0.8; + hd_data->h_q_qd_u[1] = 0.3; + hd_data->h_q_qd_u[2] = 1; + hd_data->h_q_qd_u[3] = 0.2; + hd_data->h_q_qd_u[4] = 0.7; + hd_data->h_q_qd_u[5] = 0.6; + hd_data->h_q_qd_u[6] = 0.4; + hd_data->h_q_qd_u[7] = 0.4; + hd_data->h_q_qd_u[8] = 0.3; + hd_data->h_q_qd_u[9] = 0.8; + hd_data->h_q_qd_u[10] = 0.1; + + hd_data->h_q_qd_u[11] = 0; + hd_data->h_q_qd_u[12] = 0; + hd_data->h_q_qd_u[13] = 0; + hd_data->h_q_qd_u[14] = 0; + hd_data->h_q_qd_u[15] = 0; + hd_data->h_q_qd_u[16] = 0; + hd_data->h_q_qd_u[17] = 0; + hd_data->h_q_qd_u[18] = 0; + hd_data->h_q_qd_u[19] = 0; + hd_data->h_q_qd_u[20] = 0; + hd_data->h_q_qd_u[21] = 0; + + hd_data->h_q_qd_u[22] = 0; + hd_data->h_q_qd_u[23] = 0; + hd_data->h_q_qd_u[24] = 0; + hd_data->h_q_qd_u[25] = 0; + hd_data->h_q_qd_u[26] = 0; + hd_data->h_q_qd_u[27] = 0; + hd_data->h_q_qd_u[28] = 0; + hd_data->h_q_qd_u[29] = 0; + hd_data->h_q_qd_u[30] = 0; + hd_data->h_q_qd_u[31] = 0; + hd_data->h_q_qd_u[32] = 0; + /*hd_data->h_q_qd_u[11] = 0.2; + hd_data->h_q_qd_u[12] = 0.1; + hd_data->h_q_qd_u[13] = 0.1; + hd_data->h_q_qd_u[14] = 0.5; + hd_data->h_q_qd_u[15] = 0.1; + hd_data->h_q_qd_u[16] = 0.1; + hd_data->h_q_qd_u[17] = 0.1; + hd_data->h_q_qd_u[18] = 0.1; + hd_data->h_q_qd_u[19] = 0.1; + hd_data->h_q_qd_u[20] = 0.1; + hd_data->h_q_qd_u[21] = 0.1; + + hd_data->h_q_qd_u[22] = 0.2; + hd_data->h_q_qd_u[23] = 0.1; + hd_data->h_q_qd_u[24] = 0.1; + hd_data->h_q_qd_u[25] = 0.5; + hd_data->h_q_qd_u[26] = 0.1; + hd_data->h_q_qd_u[27] = 0.1; + hd_data->h_q_qd_u[28] = 0.1; + hd_data->h_q_qd_u[29] = 0.1; + hd_data->h_q_qd_u[30] = 0.1; + hd_data->h_q_qd_u[31] = 0.1; + hd_data->h_q_qd_u[32] = 0.1;*/ + + gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); + gpuErrchk(cudaDeviceSynchronize()); + + printf("q,qd,u\n"); + printMat(hd_data->h_q_qd_u,1); + printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); + printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + + printf("crba\n"); + grid::crba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + printf("M = \n"); + printMat(hd_data->h_M,1); + + return 0; +} + +int iiwa(){ + grid::gridData *hd_data = grid::init_gridData(); + grid::robotModel *d_robotModel = grid::init_robotModel(); + //const int num_timesteps = 1; + float gravity = static_cast(9.81); + dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); hd_data->h_q_qd_u[0] = 1.24; hd_data->h_q_qd_u[1] = 0.13; hd_data->h_q_qd_u[2] = -0.17; @@ -44,26 +203,199 @@ int main() { hd_data->h_q_qd_u[20] = 0; gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); - gpuErrchk(cudaDeviceSynchronize()); + gpuErrchk(cudaDeviceSynchronize()); - //printf("testing...."); - //printf("q,qd,u\n"); - //printMat(hd_data->h_q_qd_u,1); - //printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); - //printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + printf("crba\n"); + grid::crba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + printf("M = "); + printMat(hd_data->h_M,1); + return 0; +} + +int atlas(){ + grid::gridData *hd_data = grid::init_gridData(); + grid::robotModel *d_robotModel = grid::init_robotModel();; + //const int num_timesteps = 1; + float gravity = static_cast(9.81); + dim3 dimms(grid::SUGGESTED_THREADS,1,1); + cudaStream_t *streams = grid::init_grid(); + hd_data->h_q_qd_u[0] = 0.8; + hd_data->h_q_qd_u[1] = 0.3; + hd_data->h_q_qd_u[2] = 1; + hd_data->h_q_qd_u[3] = 0.2; + hd_data->h_q_qd_u[4] = 0.7; + hd_data->h_q_qd_u[5] = 0.6; + hd_data->h_q_qd_u[6] = 0.4; + hd_data->h_q_qd_u[7] = 0.4; + hd_data->h_q_qd_u[8] = 0.3; + hd_data->h_q_qd_u[9] = 0.8; + hd_data->h_q_qd_u[10] = 0.1; + hd_data->h_q_qd_u[11] = 0.2; + hd_data->h_q_qd_u[12] = 0.3; + hd_data->h_q_qd_u[13] = 0.5; + hd_data->h_q_qd_u[14] = 0.4; + hd_data->h_q_qd_u[15] = 0.37; + hd_data->h_q_qd_u[16] = 0.9; + hd_data->h_q_qd_u[17] = 0.53; + hd_data->h_q_qd_u[18] = 0.2; + hd_data->h_q_qd_u[19] = 1; + hd_data->h_q_qd_u[20] = 0.6; + hd_data->h_q_qd_u[21] = 0.8; + hd_data->h_q_qd_u[22] = 0.9; + hd_data->h_q_qd_u[23] = 0.3; + hd_data->h_q_qd_u[24] = 0.7; + hd_data->h_q_qd_u[25] = 0.44; + hd_data->h_q_qd_u[26] = 0.28; + hd_data->h_q_qd_u[27] = 0.6; + hd_data->h_q_qd_u[28] = 1; + hd_data->h_q_qd_u[29] = 0.1; + + hd_data->h_q_qd_u[60] = 0.1; + hd_data->h_q_qd_u[61] = 0.1; + hd_data->h_q_qd_u[62] = 0.1; + hd_data->h_q_qd_u[63] = 0.1; + hd_data->h_q_qd_u[64] = 0.1; + hd_data->h_q_qd_u[65] = 0.1; + hd_data->h_q_qd_u[66] = 0.1; + hd_data->h_q_qd_u[67] = 0.1; + hd_data->h_q_qd_u[68] = 0.1; + hd_data->h_q_qd_u[69] = 0.1; + hd_data->h_q_qd_u[70] = 0.1; + hd_data->h_q_qd_u[71] = 0.1; + hd_data->h_q_qd_u[72] = 0.1; + hd_data->h_q_qd_u[73] = 0.1; + hd_data->h_q_qd_u[74] = 0.1; + hd_data->h_q_qd_u[75] = 0.1; + hd_data->h_q_qd_u[76] = 0.1; + hd_data->h_q_qd_u[77] = 0.1; + hd_data->h_q_qd_u[78] = 0.1; + hd_data->h_q_qd_u[79] = 0.1; + hd_data->h_q_qd_u[80] = 0.1; + hd_data->h_q_qd_u[81] = 0.1; + hd_data->h_q_qd_u[82] = 0.1; + hd_data->h_q_qd_u[83] = 0.1; + hd_data->h_q_qd_u[84] = 0.1; + hd_data->h_q_qd_u[85] = 0.1; + hd_data->h_q_qd_u[86] = 0.1; + hd_data->h_q_qd_u[87] = 0.1; + hd_data->h_q_qd_u[88] = 0.1; + hd_data->h_q_qd_u[89] = 0.1; + + hd_data->h_q_qd_u[30] = 0.1; + hd_data->h_q_qd_u[31] = 0.1; + hd_data->h_q_qd_u[32] = 0.1; + hd_data->h_q_qd_u[33] = 0.1; + hd_data->h_q_qd_u[34] = 0.1; + hd_data->h_q_qd_u[35] = 0.1; + hd_data->h_q_qd_u[36] = 0.1; + hd_data->h_q_qd_u[37] = 0.1; + hd_data->h_q_qd_u[38] = 0.1; + hd_data->h_q_qd_u[39] = 0.1; + hd_data->h_q_qd_u[40] = 0.1; + hd_data->h_q_qd_u[41] = 0.1; + hd_data->h_q_qd_u[42] = 0.1; + hd_data->h_q_qd_u[43] = 0.1; + hd_data->h_q_qd_u[44] = 0.1; + hd_data->h_q_qd_u[45] = 0.1; + hd_data->h_q_qd_u[46] = 0.1; + hd_data->h_q_qd_u[47] = 0.1; + hd_data->h_q_qd_u[48] = 0.1; + hd_data->h_q_qd_u[49] = 0.1; + hd_data->h_q_qd_u[50] = 0.1; + hd_data->h_q_qd_u[51] = 0.1; + hd_data->h_q_qd_u[52] = 0.1; + hd_data->h_q_qd_u[53] = 0.1; + hd_data->h_q_qd_u[54] = 0.1; + hd_data->h_q_qd_u[55] = 0.1; + hd_data->h_q_qd_u[56] = 0.1; + hd_data->h_q_qd_u[57] = 0.1; + hd_data->h_q_qd_u[58] = 0.1; + hd_data->h_q_qd_u[59] = 0.1; - grid::crba(hd_data,d_robotModel,gravity,1,dim3(1,1,1),dimms,streams); - printMat(hd_data->h_H,1); - printf("byeee \n"); - // print H matrix so for loops; + // hd_data->h_q_qd_u[60] = 0; + // hd_data->h_q_qd_u[61] = 0; + // hd_data->h_q_qd_u[62] = 0; + // hd_data->h_q_qd_u[63] = 0; + // hd_data->h_q_qd_u[64] = 0; + // hd_data->h_q_qd_u[65] = 0; + // hd_data->h_q_qd_u[66] = 0; + // hd_data->h_q_qd_u[67] = 0; + // hd_data->h_q_qd_u[68] = 0; + // hd_data->h_q_qd_u[69] = 0; + // hd_data->h_q_qd_u[70] = 0; + // hd_data->h_q_qd_u[71] = 0; + // hd_data->h_q_qd_u[72] = 0; + // hd_data->h_q_qd_u[73] = 0; + // hd_data->h_q_qd_u[74] = 0; + // hd_data->h_q_qd_u[75] = 0; + // hd_data->h_q_qd_u[76] = 0; + // hd_data->h_q_qd_u[77] = 0; + // hd_data->h_q_qd_u[78] = 0; + // hd_data->h_q_qd_u[79] = 0; + // hd_data->h_q_qd_u[80] = 0; + // hd_data->h_q_qd_u[81] = 0; + // hd_data->h_q_qd_u[82] = 0; + // hd_data->h_q_qd_u[83] = 0; + // hd_data->h_q_qd_u[84] = 0; + // hd_data->h_q_qd_u[85] = 0; + // hd_data->h_q_qd_u[86] = 0; + // hd_data->h_q_qd_u[87] = 0; + // hd_data->h_q_qd_u[88] = 0; + // hd_data->h_q_qd_u[89] = 0; - return 0; + // hd_data->h_q_qd_u[30] = 0; + // hd_data->h_q_qd_u[31] = 0; + // hd_data->h_q_qd_u[32] = 0; + // hd_data->h_q_qd_u[33] = 0; + // hd_data->h_q_qd_u[34] = 0; + // hd_data->h_q_qd_u[35] = 0; + // hd_data->h_q_qd_u[36] = 0; + // hd_data->h_q_qd_u[37] = 0; + // hd_data->h_q_qd_u[38] = 0; + // hd_data->h_q_qd_u[39] = 0; + // hd_data->h_q_qd_u[40] = 0; + // hd_data->h_q_qd_u[41] = 0; + // hd_data->h_q_qd_u[42] = 0; + // hd_data->h_q_qd_u[43] = 0; + // hd_data->h_q_qd_u[44] = 0; + // hd_data->h_q_qd_u[45] = 0; + // hd_data->h_q_qd_u[46] = 0; + // hd_data->h_q_qd_u[47] = 0; + // hd_data->h_q_qd_u[48] = 0; + // hd_data->h_q_qd_u[49] = 0; + // hd_data->h_q_qd_u[50] = 0; + // hd_data->h_q_qd_u[51] = 0; + // hd_data->h_q_qd_u[52] = 0; + // hd_data->h_q_qd_u[53] = 0; + // hd_data->h_q_qd_u[54] = 0; + // hd_data->h_q_qd_u[55] = 0; + // hd_data->h_q_qd_u[56] = 0; + // hd_data->h_q_qd_u[57] = 0; + // hd_data->h_q_qd_u[58] = 0; + // hd_data->h_q_qd_u[59] = 0; -} + gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*sizeof(float),cudaMemcpyHostToDevice)); + gpuErrchk(cudaDeviceSynchronize()); + + printf("q,qd,u\n"); + printMat(hd_data->h_q_qd_u,1); + printMat(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1); + printMat(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1); + + printf("crba\n"); + grid::crba(hd_data, d_robotModel, gravity, 1, dim3(1,1,1), dimms, streams); + printf("M = \n"); + printMat(hd_data->h_M,1); + + return 0; +} -// run it with nvcc test_crba_cu -std=c++11 --> run on ssh or old laptop bc GPU -// genCode = compute_??? arch__??? --> need to google to see what type of GPU -//nvidia-smi will spit out lots of info about GPU -//-o text.exe changes the name to test.exe \ No newline at end of file +int main() { + //atlas(); + //hyq_missing_limb(); + hyq(); + //iiwa(); + return 0; +} \ No newline at end of file