forked from masa16/narray
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnarray_local.h
291 lines (262 loc) · 8.39 KB
/
narray_local.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/*
narray_local.h
Numerical Array Extention for Ruby
(C) Copyright 1999-2008 by Masahiro TANAKA
This program is free software.
You can distribute/modify this program
under the same terms as Ruby itself.
NO WARRANTY.
*/
typedef int32_t na_index_t;
struct slice {
char *p; /* pointer to data --- used in loop */
int n; /* n of indices of this rank */
int pstep; /* = step * stride * elmsz --- set in na_init_slice */
int pbeg; /* = beg * stride * elmsz --- set in na_init_slice */
int stride; /* = shape[0]*shape[1]*...*shape[r-1]
--- set in na_init_slice */
int step;
int beg;
na_index_t *idx; /* NULL if normal step */
};
typedef void (*na_setfunc_t[NA_NTYPES][NA_NTYPES]) ();
typedef void (*na_func_t[NA_NTYPES]) ();
typedef void (*na_ufunc_t[NA_NTYPES]) ();
typedef void (*na_bifunc_t[NA_NTYPES]) ();
typedef void (*na_mathfunc_t[NA_NTYPES]) ();
typedef int (*na_sortfunc_t[NA_NTYPES]) (const void *, const void *);
/* function arrays */
extern na_setfunc_t SetFuncs;
extern na_ufunc_t SwpFuncs;
extern na_ufunc_t H2NFuncs;
extern na_ufunc_t H2VFuncs;
extern na_ufunc_t NegFuncs;
extern na_ufunc_t RcpFuncs;
extern na_ufunc_t AbsFuncs;
extern na_ufunc_t RealFuncs;
extern na_ufunc_t ImagFuncs;
extern na_ufunc_t AnglFuncs;
extern na_ufunc_t ImagMulFuncs;
extern na_ufunc_t ConjFuncs;
extern na_ufunc_t FloorFuncs;
extern na_ufunc_t CeilFuncs;
extern na_ufunc_t RoundFuncs;
extern na_ufunc_t ToStrFuncs;
extern na_ufunc_t InspFuncs;
extern na_ufunc_t IndGenFuncs;
extern na_ufunc_t AddUFuncs;
extern na_ufunc_t SbtUFuncs;
extern na_ufunc_t MulUFuncs;
extern na_ufunc_t DivUFuncs;
extern na_bifunc_t AddBFuncs;
extern na_bifunc_t SbtBFuncs;
extern na_bifunc_t MulBFuncs;
extern na_bifunc_t DivBFuncs;
extern na_bifunc_t MulAddFuncs;
extern na_bifunc_t MulSbtFuncs;
extern na_bifunc_t ModBFuncs;
extern na_bifunc_t BAnFuncs;
extern na_bifunc_t BOrFuncs;
extern na_bifunc_t BXoFuncs;
extern na_ufunc_t BRvFuncs;
extern na_bifunc_t ImgSetFuncs;
extern na_setfunc_t PowFuncs;
extern na_bifunc_t atan2Funcs;
extern na_bifunc_t CmpFuncs;
extern na_bifunc_t EqlFuncs;
extern na_ufunc_t AndFuncs;
extern na_ufunc_t Or_Funcs;
extern na_ufunc_t XorFuncs;
extern na_ufunc_t NotFuncs;
extern na_ufunc_t MinFuncs;
extern na_ufunc_t MaxFuncs;
extern na_sortfunc_t SortFuncs;
extern na_sortfunc_t SortIdxFuncs;
extern na_bifunc_t RefMaskFuncs;
extern na_bifunc_t SetMaskFuncs;
#ifdef __OPENCL__
/* kernel arrays */
typedef cl_kernel na_opencl_kernel1_t[NA_NTYPES];
typedef cl_kernel na_opencl_kernel2_t[NA_NTYPES][NA_NTYPES];
extern na_opencl_kernel2_t SetKernels;
extern na_opencl_kernel1_t SwpKernels;
extern na_opencl_kernel1_t H2NKernels;
extern na_opencl_kernel1_t H2VKernels;
extern na_opencl_kernel1_t NegKernels;
extern na_opencl_kernel1_t RcpKernels;
extern na_opencl_kernel1_t AbsKernels;
extern na_opencl_kernel1_t RealKernels;
extern na_opencl_kernel1_t ImagKernels;
extern na_opencl_kernel1_t AnglKernels;
extern na_opencl_kernel1_t ImagMulKernels;
extern na_opencl_kernel1_t ConjKernels;
extern na_opencl_kernel1_t FloorKernels;
extern na_opencl_kernel1_t CeilKernels;
extern na_opencl_kernel1_t RoundKernels;
//extern na_opencl_kernel1_t ToStrKernels;
//extern na_opencl_kernel1_t InspKernels;
extern na_opencl_kernel1_t IndGenKernels;
extern na_opencl_kernel1_t AddUKernels;
extern na_opencl_kernel1_t SbtUKernels;
extern na_opencl_kernel1_t MulUKernels;
extern na_opencl_kernel1_t DivUKernels;
extern na_opencl_kernel1_t AddBKernels;
extern na_opencl_kernel1_t SbtBKernels;
extern na_opencl_kernel1_t MulBKernels;
extern na_opencl_kernel1_t DivBKernels;
extern na_opencl_kernel1_t MulAddKernels;
extern na_opencl_kernel1_t MulSbtKernels;
extern na_opencl_kernel1_t ModBKernels;
extern na_opencl_kernel1_t BAnKernels;
extern na_opencl_kernel1_t BOrKernels;
extern na_opencl_kernel1_t BXoKernels;
extern na_opencl_kernel1_t BRvKernels;
extern na_opencl_kernel1_t ImgSetKernels;
extern na_opencl_kernel2_t PowKernels;
extern na_opencl_kernel1_t atan2Kernels;
extern na_opencl_kernel1_t CmpKernels;
extern na_opencl_kernel1_t EqlKernels;
extern na_opencl_kernel1_t AndKernels;
extern na_opencl_kernel1_t Or_Kernels;
extern na_opencl_kernel1_t XorKernels;
extern na_opencl_kernel1_t NotKernels;
extern na_opencl_kernel1_t MinKernels;
extern na_opencl_kernel1_t MaxKernels;
//extern na_sortfunc_t SortKernels;
//extern na_sortfunc_t SortIdxKernels;
//extern na_opencl_kernel1_t RefMaskKernels;
//extern na_opencl_kernel1_t SetMaskKernels;
extern na_opencl_kernel1_t sqrtKernels;
extern na_opencl_kernel1_t sinKernels;
extern na_opencl_kernel1_t cosKernels;
extern na_opencl_kernel1_t tanKernels;
extern na_opencl_kernel1_t sinhKernels;
extern na_opencl_kernel1_t coshKernels;
extern na_opencl_kernel1_t tanhKernels;
extern na_opencl_kernel1_t expKernels;
extern na_opencl_kernel1_t logKernels;
extern na_opencl_kernel1_t log10Kernels;
extern na_opencl_kernel1_t log2Kernels;
extern na_opencl_kernel1_t asinKernels;
extern na_opencl_kernel1_t asinhKernels;
extern na_opencl_kernel1_t acosKernels;
extern na_opencl_kernel1_t acoshKernels;
extern na_opencl_kernel1_t atanKernels;
extern na_opencl_kernel1_t atanhKernels;
extern na_opencl_kernel1_t RndKernels;
extern cl_kernel init_genrandKernel;
#endif
/* variables */
extern VALUE rb_mNMath;
extern ID na_id_beg, na_id_end, na_id_exclude_end;
extern ID na_id_minus, na_id_abs, na_id_power;
extern ID na_id_compare, na_id_and, na_id_or;
extern ID na_id_equal;
extern ID na_id_class_dim;
extern ID na_id_add, na_id_sbt, na_id_mul, na_id_div, na_id_mod;
extern ID na_id_real, na_id_imag;
extern ID na_id_coerce_rev;
extern ID na_id_new;
extern ID na_id_Complex;
extern const int na_upcast[NA_NTYPES][NA_NTYPES];
extern const int na_no_cast[NA_NTYPES];
extern const int na_cast_real[NA_NTYPES];
extern const int na_cast_comp[NA_NTYPES];
extern const int na_cast_round[NA_NTYPES];
extern const int na_cast_byte[NA_NTYPES];
extern const char *na_typestring[];
extern VALUE cNArrayScalar, cComplex;
/* narray.c */
VALUE na_newdim_ref(int argc, VALUE *argv, VALUE self);
/* na_func.c */
int na_max3(int a, int b, int c);
void na_shape_max3(int ndim, int *max_shp, int *shp1, int *shp2, int *shp3);
void na_shape_copy( int ndim, int *shape, struct NARRAY *a );
void na_init_slice(struct slice *s, int rank, int *shape, int elmsz);
void na_set_slice_1obj(int ndim, struct slice *slc, int *shape);
int na_set_slice_3obj( int ndim,
struct slice *s1, struct slice *s2, struct slice *s3,
int *shp1, int *shp2, int *shp3, int *shape );
void na_loop_general(struct NARRAY *a1, struct NARRAY *a2,
struct slice *s1, struct slice *s2, void (*func)());
void na_loop_index_ref(struct NARRAY *a1, struct NARRAY *a2,
struct slice *s1, struct slice *s2, void (*func)());
/* na_index.c */
void na_aset_slice(struct NARRAY *dst, struct NARRAY *src, struct slice *s1);
int na_shrink_class(int class_dim, int *shrink);
VALUE na_shrink_rank(VALUE obj, int class_dim, int *shrink);
#define rb_complex_new(r,i) \
rb_funcall(rb_mKernel, na_id_Complex, 2, rb_float_new(r), rb_float_new(i))
typedef union {
u_int8_t b[2];
int16_t s;
} na_size16_t;
typedef union {
u_int8_t b[4];
int32_t i;
float f;
} na_size32_t;
typedef union {
u_int8_t b[8];
float f[2];
double d;
} na_size64_t;
typedef union {
u_int8_t b[16];
double d[2];
} na_size128_t;
#define swap16(d,s) \
(d).b[0]=(s).b[1];\
(d).b[1]=(s).b[0];
#define swap32(d,s) \
(d).b[0]=(s).b[3];\
(d).b[1]=(s).b[2];\
(d).b[2]=(s).b[1];\
(d).b[3]=(s).b[0];
#define swap64(d,s) \
(d).b[0]=(s).b[7];\
(d).b[1]=(s).b[6];\
(d).b[2]=(s).b[5];\
(d).b[3]=(s).b[4];\
(d).b[4]=(s).b[3];\
(d).b[5]=(s).b[2];\
(d).b[6]=(s).b[1];\
(d).b[7]=(s).b[0];
#define swap64c(d,s) \
(d).b[0]=(s).b[3];\
(d).b[1]=(s).b[2];\
(d).b[2]=(s).b[1];\
(d).b[3]=(s).b[0];\
(d).b[4]=(s).b[7];\
(d).b[5]=(s).b[6];\
(d).b[6]=(s).b[5];\
(d).b[7]=(s).b[4];
#define swap128c(d,s) \
(d).b[0]=(s).b[7];\
(d).b[1]=(s).b[6];\
(d).b[2]=(s).b[5];\
(d).b[3]=(s).b[4];\
(d).b[4]=(s).b[3];\
(d).b[5]=(s).b[2];\
(d).b[6]=(s).b[1];\
(d).b[7]=(s).b[0];\
(d).b[8]=(s).b[15];\
(d).b[9]=(s).b[14];\
(d).b[10]=(s).b[13];\
(d).b[11]=(s).b[12];\
(d).b[12]=(s).b[11];\
(d).b[13]=(s).b[10];\
(d).b[14]=(s).b[9];\
(d).b[15]=(s).b[8];
#if !defined RSTRING_LEN
#define RSTRING_LEN(a) RSTRING(a)->len
#endif
#if !defined RSTRING_PTR
#define RSTRING_PTR(a) RSTRING(a)->ptr
#endif
#if !defined RARRAY_LEN
#define RARRAY_LEN(a) RARRAY(a)->len
#endif
#if !defined RARRAY_PTR
#define RARRAY_PTR(a) RARRAY(a)->ptr
#endif