From 083f3d7c99cb2ad192dee81c7bea5cf026503cb1 Mon Sep 17 00:00:00 2001 From: Sergio Pascual Date: Thu, 31 Oct 2024 17:38:12 +0100 Subject: [PATCH] Use several NpyIter ptrs (fixes #222) This is a workaround to the limitation in the number of arguments in numpy (64 or 32 depending on the version) --- src/numina/array/src/combinemodule.cc | 465 ++++++++++++++++++- src/numina/array/tests/test_combine.py | 23 +- src/numina/array/tests/test_combine_miter.py | 84 ++++ 3 files changed, 563 insertions(+), 9 deletions(-) create mode 100644 src/numina/array/tests/test_combine_miter.py diff --git a/src/numina/array/src/combinemodule.cc b/src/numina/array/src/combinemodule.cc index 077f9395..38feea5b 100644 --- a/src/numina/array/src/combinemodule.cc +++ b/src/numina/array/src/combinemodule.cc @@ -38,6 +38,18 @@ static inline int check_1d_array(PyObject* array, size_t nimages, const char* na return 1; } +static inline int advance_multi_iter(const std::vector& iter_next, const std::vector& iters) { + int val; + for(int i=0; i < iters.size(); ++i) { + val = iter_next[i](iters[i]); + //printf("iter %d, return val %d\n", i, val); + if(val == 0) + break; + } + return val; +} + + static PyObject* py_generic_combine(PyObject *self, PyObject *args, PyObject *kwargs) { /* Inputs */ @@ -335,6 +347,441 @@ static PyObject* py_generic_combine(PyObject *self, PyObject *args, PyObject *kw } + +static PyObject* py_generic_combine_miter(PyObject *self, PyObject *args, PyObject *kwargs) +{ + /* workaround the limitation in NPY_MAXARGS by having several iterators + if some day the limitation is removed, we could use the impl in "py_generic_combine" + + We can have a mask for each image (1) or no masks (2). In both cases, the first iter + will contain out1/out2/out3 as the last operands (WRITEONLY | ALLOCATE) and the + following posible iters will out1 as the last operand as READONLY. The purpose of including + out1 in all iters is to have a common shape to broadcast + + In 1) each image and its mask are inserted together, in pairs. The first iterator + will contain img/mask/img/mask/.../out1/out2/out3. The following iters will + contain img/mask/img/mask/..../out1. + + In 2), the first iterator + will contain img/img/img/.../out1/out2/out3. The following iters will + contain img/img/img/.../out1. + */ + + /* Inputs */ + PyObject *images = NULL; + PyObject *images_seq = NULL; + Py_ssize_t nimages = 0; + /* masks */ + PyObject *masks = NULL; + PyObject *masks_seq = NULL; + Py_ssize_t nmasks = 0; + PyObject** tmp_list_i = NULL; + PyObject** tmp_list_m = NULL; + + npy_bool has_masks = NPY_FALSE; + /* outputs */ + const Py_ssize_t nout = 3; + const Py_ssize_t nouti = 1; + Py_ssize_t nout_x; + PyObject *out_arr[nout] = {NULL, NULL, NULL}; + Py_ssize_t ui; + + /* MAXARGS */ + const Py_ssize_t CAP = NPY_MAXARGS; // 64 in np2, 32 in np1 + // has_masks == T <-> group_size == 2 + // has_masks == F <-> group_size == 1 + Py_ssize_t group_size = 1; + + Py_ssize_t max_ins0, max_insi, max_ins; + + /* scales, zeros and weights */ + PyObject* scales = NULL; + PyObject* zeros = NULL; + PyObject* weights = NULL; + PyObject* zeros_arr = NULL; + PyObject* scales_arr = NULL; + PyObject* weights_arr = NULL; + double* zbuffer = NULL; + double* sbuffer = NULL; + double* wbuffer = NULL; + + /* Capsule */ + PyObject* fnc = NULL; + void *capsule_func = (void*)NU_mean_function; + void *capsule_data = NULL; + + /* Iterator */ + PyArray_Descr* dtype_res = NULL; + PyArray_Descr* dtype_pix = NULL; + npy_uint32 iflags; + npy_uint32 flag_out = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE | NPY_ITER_NBO| NPY_ITER_ALIGNED; + npy_uint32 flag_in = NPY_ITER_READONLY | NPY_ITER_NBO; + npy_uint32 flag_pix = NPY_ITER_READONLY | NPY_ITER_NBO; + std::vector op_flags; + std::vector op_dtypes; + std::vector ops; + /* base iterator */ + NpyIter *tmp_iter = NULL; + + std::vector iters; + std::vector iter_next; + std::vector dataptrs; + + std::vector iter_insert; + std::vector img_iter; + std::vector img_pos; + + // + int iter_count = 0; + int base_out = 0; + + Py_ssize_t remain_groups = 0; + Py_ssize_t inserts; + Py_ssize_t base_idx; + + /* NU_generic_combine */ + std::vector pdata; + std::vector wdata; + double results_buffer[nout]; + double* pvalues[nout]; + + int outval; + const char *arr_name[nout] = {"res", "var", "pix"}; + const char *kwlist[] = {"method", "images", "out_res", "out_var", "out_pix", + "masks", "zeros", "scales", "weights", NULL}; + + outval = PyArg_ParseTupleAndKeywords(args, kwargs, + "OO|OOOOOOO:generic_combine", const_cast(kwlist), + &fnc, /* capsule */ + &images, /* sequence of images */ + &out_arr[0], /* result */ + &out_arr[1], /* variance */ + &out_arr[2], /* npix */ + &masks, /* sequence of masks */ + &zeros, /* sequence of zeros */ + &scales,/* sequence of scales */ + &weights /* sequence of weights */ + ); + //printf("outval %d\n", outval); + + if (!outval) return NULL; + + images_seq = PySequence_Fast(images, "expected a sequence of data arrays"); + nimages = PySequence_Size(images_seq); + // printf("We have nimages=%d\n", nimages); + + if (nimages == 0) { + PyErr_SetString(PyExc_ValueError, "data sequence is empty"); + goto exit; + } + if (masks != NULL && masks != Py_None) { + masks_seq = PySequence_Fast(masks, "expected a sequence of mask arrays"); + nmasks = PySequence_Size(masks_seq); + if (nmasks == nimages) { + has_masks = NPY_TRUE; + group_size = 2; + } + else if (nmasks == 0) { + has_masks = NPY_FALSE; + group_size = 1; + } + else { + PyErr_Format(PyExc_ValueError, + "number of images (%zd) and masks (%zd) is different", nimages, nmasks); + + goto exit; + } + } + // printf("We have nmasks=%d\n", nmasks); + + for(int i = 0; i < nout; ++i) { + if (out_arr[i] == Py_None) { + out_arr[i] = NULL; + } + } + + if (!PyCapsule_IsValid(fnc, "numina.cmethod")) { + PyErr_SetString(PyExc_TypeError, "parameter is not a valid capsule"); + goto exit; + } + + capsule_func = PyCapsule_GetPointer(fnc, "numina.cmethod"); + capsule_data = PyCapsule_GetContext(fnc); + + // Checking zeros, scales and weights + if (zeros == Py_None || zeros == NULL) { + zbuffer = (double*)PyMem_Malloc(nimages * sizeof(double)); + if (zbuffer == NULL) { + PyErr_NoMemory(); + goto exit; + } + for(ui = 0; ui < nimages; ++ui) + zbuffer[ui] = 0.0; + } + else { + zeros_arr = PyArray_FROM_OTF(zeros, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); + if (!check_1d_array(zeros, nimages, "zeros")) + goto exit; + + zbuffer = (double*)PyArray_DATA((PyArrayObject*)zeros_arr); + } + + if (scales == Py_None || scales == NULL) { + sbuffer = (double*)PyMem_Malloc(nimages * sizeof(double)); + if (sbuffer == NULL) { + PyErr_NoMemory(); + goto exit; + } + for(ui = 0; ui < nimages; ++ui) + sbuffer[ui] = 1.0; + } + else { + scales_arr = PyArray_FROM_OTF(scales, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); + if (!check_1d_array(scales_arr, nimages, "scales")) + goto exit; + + sbuffer = (double*)PyArray_DATA((PyArrayObject*)scales_arr); + } + + if (weights == Py_None || weights == NULL) { + wbuffer = (double*)PyMem_Malloc(nimages * sizeof(double)); + if (wbuffer == NULL) { + PyErr_NoMemory(); + goto exit; + } + for(ui = 0; ui < nimages; ++ui) + wbuffer[ui] = 1.0; + } + else { + weights_arr = PyArray_FROM_OTF(weights, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); + if (!check_1d_array(weights, nimages, "weights")) + goto exit; + + wbuffer = (double*)PyArray_DATA((PyArrayObject*)weights_arr); + } + + pdata.reserve(nimages); + wdata.reserve(nimages); + + // We need allocated memory + for (ui = 0; ui < nout; ++ui) + pvalues[ui] = &results_buffer[ui]; + + // Insertions in iter0 and iterI + max_ins0 = (CAP - nout) / group_size; + max_insi = (CAP - nouti) / group_size; + + //printf("MAX_insert0 %d\n", max_ins0); + //printf("MAX_insertI %d\n", max_insi); + //printf("MAX_cap0 %d\n", max_ins0 * group_size); + //printf("MAX_capI %d\n", max_insi * group_size); + + remain_groups = nimages; + base_idx = 0; + /* iters*/ + dtype_res = PyArray_DescrFromType(NPY_DOUBLE); + dtype_pix = PyArray_DescrFromType(NPY_INT); + + tmp_list_i = PySequence_Fast_ITEMS(images_seq); + tmp_list_m = NULL; + if (has_masks == NPY_TRUE) { + tmp_list_m = PySequence_Fast_ITEMS(masks_seq); + } + + /* insert outputs in ops, op_dtypes and op_flags*/ + while (remain_groups > 0) { + /* images stored in iter 0 */ + if (iter_count == 0) { + max_ins = max_ins0; + nout_x = nout; + } + else { + max_ins = max_insi; + nout_x = nouti; + } + + inserts = std::min(remain_groups, max_ins); + iter_insert.push_back(inserts); + + /* printf(".add iter %d\n", iter_count); + printf("...insert %d\n", inserts); + printf("...occupancy %d (%d)\n", inserts * group_size, inserts * group_size + nout_x); + */ + + for(int i=base_idx; i < base_idx + inserts; ++i) { + // Insert alternating image/mask + //printf("...a i iter=%d index=%d\n",iter_count, i); + ops.push_back(tmp_list_i[i]); + op_flags.push_back(flag_in); + op_dtypes.push_back(dtype_res); + // Counters + img_iter.push_back(iter_count); + img_pos.push_back((i - base_idx) * group_size); + if (has_masks == NPY_TRUE) { + //printf("...a m iter=%d index=%d\n",iter_count, i); + ops.push_back(tmp_list_m[i]); + op_flags.push_back(flag_pix); + op_dtypes.push_back(dtype_pix); + } + } + + // Output at the end + if(iter_count == 0) { + // First iter + for(int i=0; i < nout_x; ++i) { + ops.push_back(out_arr[i]); + op_dtypes.push_back(dtype_res); + op_flags.push_back(flag_out); + } + } else { + // We insert one copy of the output, as read-only + // to allow broadcasting + if (out_arr[0] == NULL) { + ops.push_back((PyObject*)NpyIter_GetOperandArray(iters[0])[iter_insert[0]]); + } + else { + ops.push_back(out_arr[0]); + } + op_dtypes.push_back(dtype_res); + op_flags.push_back(flag_in); + } + + remain_groups -= inserts; + base_idx += inserts; + // printf("...remaining %d, groups=%d\n", remain_groups * group_size, remain_groups); + // printf("......%d %d %d %d\n", cap0 + nout_x, ops.size(), op_flags.size(), op_dtypes.size()); + + tmp_iter = NpyIter_MultiNew(ops.size(), (PyArrayObject**)(&ops[0]), + NPY_ITER_BUFFERED, + NPY_KEEPORDER, NPY_UNSAFE_CASTING, + &op_flags[0], &op_dtypes[0]); + + /* clean up vectors */ + ops.clear(); + op_dtypes.clear(); + op_flags.clear(); + + /* if we fail in iter creation, clean up */ + if (tmp_iter == NULL) { + Py_DECREF(dtype_res); + Py_DECREF(dtype_pix); + dtype_res = dtype_pix = NULL; + goto exit; + } + + // Store iter and auxiliary functions + iters.push_back(tmp_iter); + iter_next.push_back(NpyIter_GetIterNext(tmp_iter, NULL)); + dataptrs.push_back(NpyIter_GetDataPtrArray(tmp_iter)); + iter_count++; + // Repeat while there are remaining images + } + //printf("we have %d (%d) iters\n", iter_count, iters.size()); + // printf("img indirection %d (%d)\n", img_iter.size(), img_pos.size()); + + base_out = iter_insert[0] * group_size; + + do { + double* p_val; + double converted; + + for(ui = 0; ui < nimages; ++ui) { + npy_bool m_val = NPY_FALSE; + int idx_iter; + int idx_pos; + + // If a weight is negative, we ignore the image + if(wbuffer[ui] < 0) + continue; + + // Iterator and position within for ehac image + idx_iter = img_iter[ui]; + idx_pos = img_pos[ui]; + + //printf("+++-----------------\n"); + //printf("img %d iter %d pos %d\n", ui, idx_iter, idx_pos); + // printf("img %d iter %d pos %d NOP %d\n", ui, idx_iter, idx_pos, NpyIter_GetNOp(iters[idx_iter])); + + if(has_masks == NPY_TRUE) { + /* access dataptrs[i][j + 1] */ + //m_val = (* (int*)dataptrs[idx_iter][idx_pos + 1]); + //printf("mask of img=%d, value=%d\n", ui, m_val); + m_val = (* (int*)dataptrs[idx_iter][idx_pos + 1]) > 0; + } + + if (m_val == NPY_TRUE) + continue; + + /* access dataptrs[i][j] */ + p_val = (double*)dataptrs[idx_iter][idx_pos]; + //printf("img=%d, value=%f\n", ui, *p_val); + + converted = (*p_val - zbuffer[ui]) / (sbuffer[ui]); + pdata.push_back(converted); + wdata.push_back(wbuffer[ui]); + //printf("------------------\n"); + } + + // And pass the data to the combine method + if (pdata.size() > 0) { + outval = ((CombineFunc)capsule_func)(&pdata[0], &wdata[0], pdata.size(), pvalues, capsule_data); + if(!outval) { + PyErr_SetString(PyExc_RuntimeError, "unknown error in combine method"); + goto exit; + } + } + else { + /* without points, 0 and 1 should be nan */ + *pvalues[0] = *pvalues[1] = *pvalues[2] = 0.0; + } + + //printf("results %f %f %f \n",*pvalues[0], *pvalues[1], *pvalues[2]); + + memcpy(dataptrs[0][base_out + 0], pvalues[0], sizeof(double)); + memcpy(dataptrs[0][base_out + 1], pvalues[1], sizeof(double)); + memcpy(dataptrs[0][base_out + 2], pvalues[2], sizeof(double)); + + pdata.clear(); + wdata.clear(); + + } while(advance_multi_iter(iter_next, iters)); + + // Recover outputs + for(int i=0; i < nout; ++i) { + if (out_arr[i] == NULL) { + out_arr[i] = (PyObject*)NpyIter_GetOperandArray(iters[0])[base_out + i]; + } + Py_INCREF(out_arr[i]); + } + +exit: + + for(auto the_iter: iters) { + NpyIter_Deallocate(the_iter); + } + + Py_XDECREF(images_seq); + Py_XDECREF(masks_seq); + + if (zeros == Py_None || zeros == NULL) + PyMem_Free(zbuffer); + + Py_XDECREF(zeros_arr); + + if (scales == Py_None || scales == NULL) + PyMem_Free(sbuffer); + + Py_XDECREF(scales_arr); + + if (weights == Py_None || scales == NULL) + PyMem_Free(wbuffer); + + Py_XDECREF(weights_arr); + + return PyErr_Occurred() ? NULL : Py_BuildValue("(N,N,N)",out_arr[0], out_arr[1], out_arr[2]); +} + + void NU_destructor(PyObject *cap) { void* cdata; cdata = PyCapsule_GetContext(cap); @@ -404,7 +851,7 @@ py_method_sigmaclip(PyObject *obj, PyObject *args) { double *funcdata = NULL; PyObject *cap; - if (!PyArg_ParseTuple(args, "dd", &low, &high)) + if (!PyArg_ParseTuple(args, "dd", &low, &high)) return NULL; if (low < 0) { @@ -446,7 +893,7 @@ py_method_quantileclip(PyObject *obj, PyObject *args) { double *funcdata = NULL; PyObject *cap; - if (!PyArg_ParseTuple(args, "d", &fclip)) + if (!PyArg_ParseTuple(args, "d", &fclip)) return NULL; if (fclip < 0 || fclip > 0.4) { @@ -494,7 +941,9 @@ py_method_debug(PyObject *obj, PyObject *args) { static PyMethodDef module_functions[] = { - {"generic_combine", (PyCFunction) py_generic_combine, METH_VARARGS | METH_KEYWORDS, "generic combine function"}, + {"generic_combine", (PyCFunction) py_generic_combine_miter, METH_VARARGS | METH_KEYWORDS, "generic combine function"}, + {"generic_combine_1iter", (PyCFunction) py_generic_combine, METH_VARARGS | METH_KEYWORDS, "generic combine function"}, + {"generic_combine_miter", (PyCFunction) py_generic_combine_miter, METH_VARARGS | METH_KEYWORDS, "generic combine function with multiple iters"}, {"mean_method", py_method_mean, METH_NOARGS, ""}, {"median_method", py_method_median, METH_NOARGS, ""}, {"minmax_method", py_method_minmax, METH_VARARGS, ""}, @@ -520,9 +969,9 @@ static struct PyModuleDef moduledef = { PyMODINIT_FUNC PyInit__combine(void) { - PyObject *m; - m = PyModule_Create(&moduledef); - if (m == NULL) + PyObject *mod; + mod = PyModule_Create(&moduledef); + if (mod == NULL) return NULL; import_array(); @@ -535,6 +984,6 @@ PyMODINIT_FUNC PyInit__combine(void) CombineError = PyErr_NewException("_combine.CombineError", NULL, NULL); } Py_INCREF(CombineError); - PyModule_AddObject(m, "CombineError", CombineError); - return m; + PyModule_AddObject(mod, "CombineError", CombineError); + return mod; } diff --git a/src/numina/array/tests/test_combine.py b/src/numina/array/tests/test_combine.py index d9e0f088..efcdeef0 100644 --- a/src/numina/array/tests/test_combine.py +++ b/src/numina/array/tests/test_combine.py @@ -216,10 +216,31 @@ def test_combine_median2(): @pytest.mark.xfail def test_combine_maxargs(): - """Testing silly numpy max args limit""" + """Testing numpy max args limit (with 1 iter impl)""" + # Inputs + input1 = numpy.array([[1, 2, 3, -4]]) + inputs = [input1] * 100 + + out0, out1, out2 = _c.generic_combine_1iter(_c.median_method(), inputs) + assert numpy.allclose(out0, input1) + + +def test_combine_miter_maxargs(): + """Testing numpy max args limit""" # Inputs input1 = numpy.array([[1, 2, 3, -4]]) inputs = [input1] * 100 out0, out1, out2 = _c.generic_combine(_c.median_method(), inputs) assert numpy.allclose(out0, input1) + + +def test_combine_miter_maxargs2(): + """Testing numpy max args limit with larger images""" + # Inputs + input1 = numpy.zeros((512, 512)) + inputs = [input1] * 100 + + out0, out1, out2 = _c.generic_combine(_c.median_method(), inputs) + assert numpy.allclose(out0, input1) + diff --git a/src/numina/array/tests/test_combine_miter.py b/src/numina/array/tests/test_combine_miter.py new file mode 100644 index 00000000..c2526656 --- /dev/null +++ b/src/numina/array/tests/test_combine_miter.py @@ -0,0 +1,84 @@ + +import numpy +import numpy as np + +import numina.array._combine as _c # noqa + + +def test_error2(): + """test general functionality""" + data1 = numpy.array([[-1, -3], [5, 0]]) + data2 = numpy.array([[1, 2], [8, 3]]) + data3 = numpy.array([[9, 1], [2, 4]]) + mask1 = numpy.array([[0, 0], [0, 0]]) + mask2 = numpy.array([[0, 0], [0, 1]]) + mask3 = numpy.array([[0, 0], [0, 1]]) + exp_res = numpy.array([[3, 0], [5, 0]]) + exp_var = numpy.array([[28, 7], [9, 0]]) + exp_pix = numpy.array([[3, 3], [3, 1]]) + a, b, c = _c.generic_combine_miter(_c.mean_method(), [data1, data2, data3], masks=[mask1, mask2, mask3]) + assert np.allclose(a, exp_res) + assert np.allclose(b, exp_var) + assert np.allclose(c, exp_pix) + + +def test_error3(): + """test general functionality""" + data1 = numpy.array([[-1, -3], [5, 0]]) + data2 = numpy.array([[1, 2], [8, 3]]) + data3 = numpy.array([[9, 1], [2, 4]]) + mask1 = numpy.array([[0, 0], [0, 0]]) + mask2 = numpy.array([[0, 0], [0, 0]]) + mask3 = numpy.array([[0, 0], [0, 0]]) + out_res = numpy.array([[1, 2], [1, 2]]) + out_var = numpy.array([[1, 2], [1, 2]]) + out_pix = numpy.array([[1, 2], [1, 2]]) + exp_res = numpy.array([[3, 0], [5, 2]]) + exp_var = numpy.array([[28, 7], [9, 4]]) + exp_pix = numpy.array([[3, 3], [3, 3]]) + a, b, c = _c.generic_combine_miter(_c.mean_method(), [data1, data2, data3], masks=[mask1, mask2, mask3], + out_res=out_res, out_var=out_var, out_pix=out_pix) + assert np.allclose(a, exp_res) + assert np.allclose(b, exp_var) + assert np.allclose(c, exp_pix) + + +def test_error4(): + """test general functionality""" + data1 = numpy.array([[-1, -3], [5, 0]]) + out_res = numpy.array([[1, 2], [1, 2]]) + out_var = numpy.array([[1, 2], [1, 2]]) + out_pix = numpy.array([[1, 2], [1, 2]]) + exp_res = numpy.array([[-1, -3], [5, 0]]) + exp_var = numpy.array([[0, 0], [0, 0]]) + exp_pix = 100 * numpy.array([[1, 1], [1, 1]]) + a, b, c = _c.generic_combine_miter(_c.mean_method(), [data1]*100, + out_res=out_res, out_var=out_var, out_pix=out_pix) + assert np.allclose(a, exp_res) + assert np.allclose(b, exp_var) + assert np.allclose(c, exp_pix) + + +def test_error5(): + """test general functionality""" + data1 = numpy.array([[-1, -3], [5, 0]]) + exp_res = numpy.array([[-1, -3], [5, 0]]) + exp_var = numpy.array([[0, 0], [0, 0]]) + exp_pix = 100 * numpy.array([[1, 1], [1, 1]]) + a, b, c = _c.generic_combine_miter(_c.mean_method(), [data1]*100) + assert np.allclose(a, exp_res) + assert np.allclose(b, exp_var) + assert np.allclose(c, exp_pix) + + +def test_error6(): + """test general functionality""" + data1 = numpy.array([[-1, -3], [5, 0]]) + data2 = numpy.array([[2, 0], [4, 1]]) + exp_res = numpy.array([[0.5, -1.5], [4.5, 0.5]]) + exp_var = numpy.array([[4.5, 4.5], [0.5, 0.5]]) + exp_pix = 2 * numpy.array([[1, 1], [1, 1]]) + a, b, c = _c.generic_combine_miter(_c.mean_method(), [data1, data2]) + assert np.allclose(a, exp_res) + assert np.allclose(b, exp_var) + assert np.allclose(c, exp_pix)