diff --git a/cpp/src/glm/qn/mg/qn_mg.cuh b/cpp/src/glm/qn/mg/qn_mg.cuh index 425240f87d..ef9c1db6c2 100644 --- a/cpp/src/glm/qn/mg/qn_mg.cuh +++ b/cpp/src/glm/qn/mg/qn_mg.cuh @@ -49,9 +49,12 @@ int qn_fit_mg(const raft::handle_t& handle, SimpleVec w0(w0_data, loss.n_param); // Scale the regularization strength with the number of samples. - T l1 = 0; + T l1 = pams.penalty_l1; T l2 = pams.penalty_l2; - if (pams.penalty_normalized) { l2 /= n_samples; } + if (pams.penalty_normalized) { + l1 /= n_samples; + l2 /= n_samples; + } ML::GLM::detail::Tikhonov reg(l2); ML::GLM::detail::RegularizedGLM regularizer_obj(&loss, ®); diff --git a/python/cuml/tests/dask/test_dask_logistic_regression.py b/python/cuml/tests/dask/test_dask_logistic_regression.py index f424e3ee62..2d50fdb946 100644 --- a/python/cuml/tests/dask/test_dask_logistic_regression.py +++ b/python/cuml/tests/dask/test_dask_logistic_regression.py @@ -73,7 +73,7 @@ def select_sk_solver(cuml_solver): @pytest.mark.mg -@pytest.mark.parametrize("nrows", [1e5]) +@pytest.mark.parametrize("nrows", [1e4]) @pytest.mark.parametrize("ncols", [20]) @pytest.mark.parametrize("n_parts", [2, 6]) @pytest.mark.parametrize("fit_intercept", [False, True]) @@ -273,6 +273,8 @@ def test_lbfgs( delayed, client, penalty="l2", + l1_ratio=None, + C=1.0, n_classes=2, ): tolerance = 0.005 @@ -296,20 +298,41 @@ def imp(): X_df, y_df = _prep_training_data(client, X, y, n_parts) - lr = cumlLBFGS_dask(fit_intercept=fit_intercept, penalty=penalty) + lr = cumlLBFGS_dask( + solver="qn", + fit_intercept=fit_intercept, + penalty=penalty, + l1_ratio=l1_ratio, + C=C, + verbose=True, + ) lr.fit(X_df, y_df) lr_coef = lr.coef_.to_numpy() lr_intercept = lr.intercept_.to_numpy() - sk_model = skLR(fit_intercept=fit_intercept, penalty=penalty) + if penalty == "l2" or penalty == "none": + sk_solver = "lbfgs" + elif penalty == "l1" or penalty == "elasticnet": + sk_solver = "saga" + else: + raise ValueError(f"unexpected penalty {penalty}") + + sk_model = skLR( + solver=sk_solver, + fit_intercept=fit_intercept, + penalty=penalty, + l1_ratio=l1_ratio, + C=C, + ) sk_model.fit(X, y) sk_coef = sk_model.coef_ sk_intercept = sk_model.intercept_ - assert len(lr_coef) == len(sk_coef) - for i in range(len(lr_coef)): - assert lr_coef[i] == pytest.approx(sk_coef[i], abs=tolerance) - assert lr_intercept == pytest.approx(sk_intercept, abs=tolerance) + if sk_solver == "lbfgs": + assert len(lr_coef) == len(sk_coef) + for i in range(len(lr_coef)): + assert lr_coef[i] == pytest.approx(sk_coef[i], abs=tolerance) + assert lr_intercept == pytest.approx(sk_intercept, abs=tolerance) # test predict cu_preds = lr.predict(X_df, delayed=delayed).compute().to_numpy() @@ -329,7 +352,7 @@ def imp(): @pytest.mark.parametrize("fit_intercept", [False, True]) def test_noreg(fit_intercept, client): lr = test_lbfgs( - nrows=1e5, + nrows=1e4, ncols=20, n_parts=23, fit_intercept=fit_intercept, @@ -392,7 +415,7 @@ def assert_small(X, y, n_classes): @pytest.mark.parametrize("n_classes", [8]) def test_n_classes(n_parts, fit_intercept, n_classes, client): lr = test_lbfgs( - nrows=1e5, + nrows=1e4, ncols=20, n_parts=n_parts, fit_intercept=fit_intercept, @@ -404,3 +427,57 @@ def test_n_classes(n_parts, fit_intercept, n_classes, client): ) assert lr._num_classes == n_classes + + +@pytest.mark.mg +@pytest.mark.parametrize("fit_intercept", [False, True]) +@pytest.mark.parametrize("datatype", [np.float32]) +@pytest.mark.parametrize("delayed", [True]) +@pytest.mark.parametrize("n_classes", [2, 8]) +@pytest.mark.parametrize("C", [1.0, 10.0]) +def test_l1(fit_intercept, datatype, delayed, n_classes, C, client): + lr = test_lbfgs( + nrows=1e4, + ncols=20, + n_parts=2, + fit_intercept=fit_intercept, + datatype=datatype, + delayed=delayed, + client=client, + penalty="l1", + n_classes=n_classes, + C=C, + ) + + l1_strength, l2_strength = lr._get_qn_params() + assert l1_strength == 1.0 / lr.C + assert l2_strength == 0.0 + + +@pytest.mark.mg +@pytest.mark.parametrize("fit_intercept", [False, True]) +@pytest.mark.parametrize("datatype", [np.float32]) +@pytest.mark.parametrize("delayed", [True]) +@pytest.mark.parametrize("n_classes", [2, 8]) +@pytest.mark.parametrize("l1_ratio", [0.2, 0.8]) +def test_elasticnet( + fit_intercept, datatype, delayed, n_classes, l1_ratio, client +): + lr = test_lbfgs( + nrows=1e4, + ncols=20, + n_parts=2, + fit_intercept=fit_intercept, + datatype=datatype, + delayed=delayed, + client=client, + penalty="elasticnet", + n_classes=n_classes, + l1_ratio=l1_ratio, + ) + + l1_strength, l2_strength = lr._get_qn_params() + + strength = 1.0 / lr.C + assert l1_strength == lr.l1_ratio * strength + assert l2_strength == (1.0 - lr.l1_ratio) * strength