Skip to content

Commit

Permalink
support l1 and elasticnet
Browse files Browse the repository at this point in the history
  • Loading branch information
lijinf2 committed Sep 27, 2023
1 parent b6fe8b2 commit bef6676
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 11 deletions.
7 changes: 5 additions & 2 deletions cpp/src/glm/qn/mg/qn_mg.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,12 @@ int qn_fit_mg(const raft::handle_t& handle,
SimpleVec<T> w0(w0_data, loss.n_param);

// Scale the regularization strength with the number of samples.
T l1 = 0;
T l1 = pams.penalty_l1;
T l2 = pams.penalty_l2;
if (pams.penalty_normalized) { l2 /= n_samples; }
if (pams.penalty_normalized) {
l1 /= n_samples;
l2 /= n_samples;
}

ML::GLM::detail::Tikhonov<T> reg(l2);
ML::GLM::detail::RegularizedGLM<T, LossFunction, decltype(reg)> regularizer_obj(&loss, &reg);
Expand Down
95 changes: 86 additions & 9 deletions python/cuml/tests/dask/test_dask_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def select_sk_solver(cuml_solver):


@pytest.mark.mg
@pytest.mark.parametrize("nrows", [1e5])
@pytest.mark.parametrize("nrows", [1e4])
@pytest.mark.parametrize("ncols", [20])
@pytest.mark.parametrize("n_parts", [2, 6])
@pytest.mark.parametrize("fit_intercept", [False, True])
Expand Down Expand Up @@ -273,6 +273,8 @@ def test_lbfgs(
delayed,
client,
penalty="l2",
l1_ratio=None,
C=1.0,
n_classes=2,
):
tolerance = 0.005
Expand All @@ -296,20 +298,41 @@ def imp():

X_df, y_df = _prep_training_data(client, X, y, n_parts)

lr = cumlLBFGS_dask(fit_intercept=fit_intercept, penalty=penalty)
lr = cumlLBFGS_dask(
solver="qn",
fit_intercept=fit_intercept,
penalty=penalty,
l1_ratio=l1_ratio,
C=C,
verbose=True,
)
lr.fit(X_df, y_df)
lr_coef = lr.coef_.to_numpy()
lr_intercept = lr.intercept_.to_numpy()

sk_model = skLR(fit_intercept=fit_intercept, penalty=penalty)
if penalty == "l2" or penalty == "none":
sk_solver = "lbfgs"
elif penalty == "l1" or penalty == "elasticnet":
sk_solver = "saga"
else:
raise ValueError(f"unexpected penalty {penalty}")

sk_model = skLR(
solver=sk_solver,
fit_intercept=fit_intercept,
penalty=penalty,
l1_ratio=l1_ratio,
C=C,
)
sk_model.fit(X, y)
sk_coef = sk_model.coef_
sk_intercept = sk_model.intercept_

assert len(lr_coef) == len(sk_coef)
for i in range(len(lr_coef)):
assert lr_coef[i] == pytest.approx(sk_coef[i], abs=tolerance)
assert lr_intercept == pytest.approx(sk_intercept, abs=tolerance)
if sk_solver == "lbfgs":
assert len(lr_coef) == len(sk_coef)
for i in range(len(lr_coef)):
assert lr_coef[i] == pytest.approx(sk_coef[i], abs=tolerance)
assert lr_intercept == pytest.approx(sk_intercept, abs=tolerance)

# test predict
cu_preds = lr.predict(X_df, delayed=delayed).compute().to_numpy()
Expand All @@ -329,7 +352,7 @@ def imp():
@pytest.mark.parametrize("fit_intercept", [False, True])
def test_noreg(fit_intercept, client):
lr = test_lbfgs(
nrows=1e5,
nrows=1e4,
ncols=20,
n_parts=23,
fit_intercept=fit_intercept,
Expand Down Expand Up @@ -392,7 +415,7 @@ def assert_small(X, y, n_classes):
@pytest.mark.parametrize("n_classes", [8])
def test_n_classes(n_parts, fit_intercept, n_classes, client):
lr = test_lbfgs(
nrows=1e5,
nrows=1e4,
ncols=20,
n_parts=n_parts,
fit_intercept=fit_intercept,
Expand All @@ -404,3 +427,57 @@ def test_n_classes(n_parts, fit_intercept, n_classes, client):
)

assert lr._num_classes == n_classes


@pytest.mark.mg
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("datatype", [np.float32])
@pytest.mark.parametrize("delayed", [True])
@pytest.mark.parametrize("n_classes", [2, 8])
@pytest.mark.parametrize("C", [1.0, 10.0])
def test_l1(fit_intercept, datatype, delayed, n_classes, C, client):
lr = test_lbfgs(
nrows=1e4,
ncols=20,
n_parts=2,
fit_intercept=fit_intercept,
datatype=datatype,
delayed=delayed,
client=client,
penalty="l1",
n_classes=n_classes,
C=C,
)

l1_strength, l2_strength = lr._get_qn_params()
assert l1_strength == 1.0 / lr.C
assert l2_strength == 0.0


@pytest.mark.mg
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("datatype", [np.float32])
@pytest.mark.parametrize("delayed", [True])
@pytest.mark.parametrize("n_classes", [2, 8])
@pytest.mark.parametrize("l1_ratio", [0.2, 0.8])
def test_elasticnet(
fit_intercept, datatype, delayed, n_classes, l1_ratio, client
):
lr = test_lbfgs(
nrows=1e4,
ncols=20,
n_parts=2,
fit_intercept=fit_intercept,
datatype=datatype,
delayed=delayed,
client=client,
penalty="elasticnet",
n_classes=n_classes,
l1_ratio=l1_ratio,
)

l1_strength, l2_strength = lr._get_qn_params()

strength = 1.0 / lr.C
assert l1_strength == lr.l1_ratio * strength
assert l2_strength == (1.0 - lr.l1_ratio) * strength

0 comments on commit bef6676

Please sign in to comment.