-
Notifications
You must be signed in to change notification settings - Fork 179
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[enhancement] add oneDAL finiteness_checker implementation to onedal (#…
…2126) * add finiteness_checker pybind11 bindings * added finiteness checker * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Update finiteness_checker.cpp * Rename finiteness_checker.cpp to finiteness_checker.cpp * Update finiteness_checker.cpp * add next step * follow conventions * make xtable explicit * remove comment * Update validation.py * Update __init__.py * Update validation.py * Update __init__.py * Update __init__.py * Update validation.py * Update _data_conversion.py * Update _data_conversion.py * Update policy_common.cpp * Update policy_common.cpp * Update _policy.py * Update policy_common.cpp * Rename finiteness_checker.cpp to finiteness_checker.cpp * Create finiteness_checker.py * Update validation.py * Update __init__.py * attempt at fixing circular imports again * fix isort * remove __init__ changes * last move * Update policy_common.cpp * Update policy_common.cpp * Update policy_common.cpp * Update policy_common.cpp * Update validation.py * add testing * isort * attempt to fix module error * add fptype * fix typo * Update validation.py * remove sua_ifcae from to_table * isort and black * Update test_memory_usage.py * format * Update _data_conversion.py * Update _data_conversion.py * Update test_validation.py * remove unnecessary code * make reviewer changes * make dtype check change * add sparse testing * try again * try again * try again * Update onedal/utils/tests/test_validation.py Co-authored-by: Samir Nasibli <samir.nasibli@intel.com> * formatting * formatting again * add _check_sample_weight * Revert "add _check_sample_weight" This reverts commit 4efad2c. * Update test_validation.py * Update validation.py * make changes * Update test_validation.py --------- Co-authored-by: Samir Nasibli <samir.nasibli@intel.com>
- Loading branch information
1 parent
d9a25a5
commit 5fcc7fb
Showing
5 changed files
with
293 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
/******************************************************************************* | ||
* Copyright 2024 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
// fix error with missing headers | ||
#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 | ||
#include "oneapi/dal/algo/finiteness_checker.hpp" | ||
#else | ||
#include "oneapi/dal/algo/finiteness_checker/compute.hpp" | ||
#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 | ||
|
||
#include "onedal/common.hpp" | ||
#include "onedal/version.hpp" | ||
|
||
namespace py = pybind11; | ||
|
||
namespace oneapi::dal::python { | ||
|
||
template <typename Task, typename Ops> | ||
struct method2t { | ||
method2t(const Task& task, const Ops& ops) : ops(ops) {} | ||
|
||
template <typename Float> | ||
auto operator()(const py::dict& params) { | ||
using namespace finiteness_checker; | ||
|
||
const auto method = params["method"].cast<std::string>(); | ||
|
||
ONEDAL_PARAM_DISPATCH_VALUE(method, "dense", ops, Float, method::dense); | ||
ONEDAL_PARAM_DISPATCH_VALUE(method, "by_default", ops, Float, method::by_default); | ||
ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(method); | ||
} | ||
|
||
Ops ops; | ||
}; | ||
|
||
struct params2desc { | ||
template <typename Float, typename Method, typename Task> | ||
auto operator()(const pybind11::dict& params) { | ||
using namespace dal::finiteness_checker; | ||
|
||
auto desc = descriptor<Float, Method, Task>(); | ||
desc.set_allow_NaN(params["allow_nan"].cast<bool>()); | ||
return desc; | ||
} | ||
}; | ||
|
||
template <typename Policy, typename Task> | ||
void init_compute_ops(py::module_& m) { | ||
m.def("compute", | ||
[](const Policy& policy, | ||
const py::dict& params, | ||
const table& data) { | ||
using namespace finiteness_checker; | ||
using input_t = compute_input<Task>; | ||
|
||
compute_ops ops(policy, input_t{ data }, params2desc{}); | ||
return fptype2t{ method2t{ Task{}, ops } }(params); | ||
}); | ||
} | ||
|
||
template <typename Task> | ||
void init_compute_result(py::module_& m) { | ||
using namespace finiteness_checker; | ||
using result_t = compute_result<Task>; | ||
|
||
py::class_<result_t>(m, "compute_result") | ||
.def(py::init()) | ||
.DEF_ONEDAL_PY_PROPERTY(finite, result_t); | ||
} | ||
|
||
ONEDAL_PY_TYPE2STR(finiteness_checker::task::compute, "compute"); | ||
|
||
ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_ops); | ||
ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_result); | ||
|
||
ONEDAL_PY_INIT_MODULE(finiteness_checker) { | ||
using namespace dal::detail; | ||
using namespace finiteness_checker; | ||
using namespace dal::finiteness_checker; | ||
|
||
using task_list = types<task::compute>; | ||
auto sub = m.def_submodule("finiteness_checker"); | ||
|
||
#ifndef ONEDAL_DATA_PARALLEL_SPMD | ||
ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list); | ||
ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list); | ||
#endif | ||
} | ||
|
||
} // namespace oneapi::dal::python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
# ============================================================================== | ||
# Copyright 2024 Intel Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
import time | ||
|
||
import numpy as np | ||
import numpy.random as rand | ||
import pytest | ||
import scipy.sparse as sp | ||
|
||
from onedal.tests.utils._dataframes_support import ( | ||
_convert_to_dataframe, | ||
get_dataframes_and_queues, | ||
) | ||
from onedal.utils.validation import assert_all_finite | ||
|
||
|
||
@pytest.mark.parametrize("dtype", [np.float32, np.float64]) | ||
@pytest.mark.parametrize( | ||
"shape", | ||
[ | ||
[16, 2048], | ||
[65539], # 2**16 + 3, | ||
[1000, 1000], | ||
[ | ||
3, | ||
], | ||
], | ||
) | ||
@pytest.mark.parametrize("allow_nan", [False, True]) | ||
@pytest.mark.parametrize( | ||
"dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl") | ||
) | ||
def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue): | ||
X = np.empty(shape, dtype=dtype) | ||
X.fill(np.finfo(dtype).max) | ||
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) | ||
assert_all_finite(X, allow_nan=allow_nan) | ||
|
||
|
||
@pytest.mark.parametrize("dtype", [np.float32, np.float64]) | ||
@pytest.mark.parametrize( | ||
"shape", | ||
[ | ||
[16, 2048], | ||
[65539], # 2**16 + 3, | ||
[1000, 1000], | ||
[ | ||
3, | ||
], | ||
], | ||
) | ||
@pytest.mark.parametrize("allow_nan", [False, True]) | ||
@pytest.mark.parametrize("check", ["inf", "NaN", None]) | ||
@pytest.mark.parametrize("seed", [0, int(time.time())]) | ||
@pytest.mark.parametrize( | ||
"dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl") | ||
) | ||
def test_assert_finite_random_location( | ||
dtype, shape, allow_nan, check, seed, dataframe, queue | ||
): | ||
rand.seed(seed) | ||
X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype) | ||
|
||
if check: | ||
loc = rand.randint(0, X.size - 1) | ||
X.reshape((-1,))[loc] = float(check) | ||
|
||
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) | ||
|
||
if check is None or (allow_nan and check == "NaN"): | ||
assert_all_finite(X, allow_nan=allow_nan) | ||
else: | ||
msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." | ||
with pytest.raises(ValueError, match=msg_err): | ||
assert_all_finite(X, allow_nan=allow_nan) | ||
|
||
|
||
@pytest.mark.parametrize("dtype", [np.float32, np.float64]) | ||
@pytest.mark.parametrize("allow_nan", [False, True]) | ||
@pytest.mark.parametrize("check", ["inf", "NaN", None]) | ||
@pytest.mark.parametrize("seed", [0, int(time.time())]) | ||
@pytest.mark.parametrize( | ||
"dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl") | ||
) | ||
def test_assert_finite_random_shape_and_location( | ||
dtype, allow_nan, check, seed, dataframe, queue | ||
): | ||
lb, ub = 2, 1048576 # ub is 2^20 | ||
rand.seed(seed) | ||
X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype) | ||
|
||
if check: | ||
loc = rand.randint(0, X.size - 1) | ||
X[loc] = float(check) | ||
|
||
X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) | ||
|
||
if check is None or (allow_nan and check == "NaN"): | ||
assert_all_finite(X, allow_nan=allow_nan) | ||
else: | ||
msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." | ||
with pytest.raises(ValueError, match=msg_err): | ||
assert_all_finite(X, allow_nan=allow_nan) | ||
|
||
|
||
@pytest.mark.parametrize("dtype", [np.float32, np.float64]) | ||
@pytest.mark.parametrize("allow_nan", [False, True]) | ||
@pytest.mark.parametrize("check", ["inf", "NaN", None]) | ||
@pytest.mark.parametrize("seed", [0, int(time.time())]) | ||
def test_assert_finite_sparse(dtype, allow_nan, check, seed): | ||
lb, ub = 2, 2056 | ||
rand.seed(seed) | ||
X = sp.random( | ||
rand.randint(lb, ub), | ||
rand.randint(lb, ub), | ||
format="csr", | ||
dtype=dtype, | ||
random_state=rand.default_rng(seed), | ||
) | ||
|
||
if check: | ||
locx = rand.randint(0, X.data.shape[0] - 1) | ||
X.data[locx] = float(check) | ||
|
||
if check is None or (allow_nan and check == "NaN"): | ||
assert_all_finite(X, allow_nan=allow_nan) | ||
else: | ||
msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." | ||
with pytest.raises(ValueError, match=msg_err): | ||
assert_all_finite(X, allow_nan=allow_nan) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters