Skip to content

Commit

Permalink
fix casting to float and test (#837)
Browse files Browse the repository at this point in the history
  • Loading branch information
eroell authored Dec 6, 2024
1 parent 2e8240f commit 27852fa
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 18 deletions.
3 changes: 3 additions & 0 deletions ehrapy/anndata/anndata_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,9 @@ def set_numeric_vars(

vars_idx = get_column_indices(adata, vars)

# if e.g. adata.X is of type int64, and values of dtype float64, the floats will be casted to int
adata.X = adata.X.astype(values.dtype)

adata.X[:, vars_idx] = values

return adata
Expand Down
26 changes: 13 additions & 13 deletions tests/data/dataset1.csv
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
idx,sys_bp_entry,dia_bp_entry,glucose,weight,disease,station
1,138,78,80,77,A,ICU
2,139,79,90,76,A,ICU
3,140,80,120,60,A,MICU
4,141,81,130,90,A,MICU
5,148,77,80,110,B,ICU
6,149,78,135,78,B,ICU
7,150,79,125,56,B,MICU
8,151,80,95,76,B,MICU
9,158,55,70,67,C,ICU
10,159,56,85,82,C,ICU
11,160,57,125,59,C,MICU
12,161,58,125,81,C,MICU
idx,sys_bp_entry,dia_bp_entry,glucose,weight,in_days,disease,station
1,138,78,80,77,1,A,ICU
2,139,79,90,76,2,A,ICU
3,140,80,120,60,0,A,MICU
4,141,81,130,90,1,A,MICU
5,148,77,80,110,0,B,ICU
6,149,78,135,78,1,B,ICU
7,150,79,125,56,2,B,MICU
8,151,80,95,76,3,B,MICU
9,158,55,70,67,4,C,ICU
10,159,56,85,82,1,C,ICU
11,160,57,125,59,2,C,MICU
12,161,58,125,81,1,C,MICU
94 changes: 94 additions & 0 deletions tests/preprocessing/test_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,19 @@ def adata_mini():
)[:8]


@pytest.fixture
def adata_mini_integers_in_X():
adata = read_csv(
f"{TEST_DATA_PATH}/dataset1.csv",
columns_obs_only=["idx", "sys_bp_entry", "dia_bp_entry", "glucose", "weight", "disease", "station"],
)
# cast data in X to integers; pd.read generates floats generously, but want to test integer normalization
adata.X = adata.X.astype(np.int32)
ep.ad.infer_feature_types(adata)
ep.ad.replace_feature_types(adata, ["in_days"], "numeric")
return adata


@pytest.fixture
def adata_to_norm():
obs_data = {"ID": ["Patient1", "Patient2", "Patient3"], "Age": [31, 94, 62]}
Expand Down Expand Up @@ -94,6 +107,27 @@ def test_norm_scale(array_type, adata_to_norm):
assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True)


def test_norm_scale_integers(adata_mini_integers_in_X):
adata_norm = ep.pp.scale_norm(adata_mini_integers_in_X, copy=True)
in_days_norm = np.array(
[
[-0.4472136],
[0.4472136],
[-1.34164079],
[-0.4472136],
[-1.34164079],
[-0.4472136],
[0.4472136],
[1.34164079],
[2.23606798],
[-0.4472136],
[0.4472136],
[-0.4472136],
]
)
assert np.allclose(adata_norm.X, in_days_norm)


@pytest.mark.parametrize("array_type", ARRAY_TYPES)
def test_norm_scale_kwargs(array_type, adata_to_norm):
adata_to_norm_casted = adata_to_norm.copy()
Expand Down Expand Up @@ -159,6 +193,12 @@ def test_norm_minmax(array_type, adata_to_norm):
assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True)


def test_norm_minmax_integers(adata_mini_integers_in_X):
adata_norm = ep.pp.minmax_norm(adata_mini_integers_in_X, copy=True)
in_days_norm = np.array([[0.25], [0.5], [0.0], [0.25], [0.0], [0.25], [0.5], [0.75], [1.0], [0.25], [0.5], [0.25]])
assert np.allclose(adata_norm.X, in_days_norm)


@pytest.mark.parametrize("array_type", ARRAY_TYPES)
def test_norm_minmax_kwargs(array_type, adata_to_norm):
adata_to_norm_casted = adata_to_norm.copy()
Expand Down Expand Up @@ -218,6 +258,12 @@ def test_norm_maxabs(array_type, adata_to_norm):
assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True)


def test_norm_maxabs_integers(adata_mini_integers_in_X):
adata_norm = ep.pp.maxabs_norm(adata_mini_integers_in_X, copy=True)
in_days_norm = np.array([[0.25], [0.5], [0.0], [0.25], [0.0], [0.25], [0.5], [0.75], [1.0], [0.25], [0.5], [0.25]])
assert np.allclose(adata_norm.X, in_days_norm)


@pytest.mark.parametrize("array_type", ARRAY_TYPES)
def test_norm_maxabs_group(array_type, adata_mini):
adata_mini_casted = adata_mini.copy()
Expand Down Expand Up @@ -273,6 +319,12 @@ def test_norm_robust_scale(array_type, adata_to_norm):
assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True)


def test_norm_robust_scale_integers(adata_mini_integers_in_X):
adata_norm = ep.pp.robust_scale_norm(adata_mini_integers_in_X, copy=True)
in_days_norm = np.array([[0.0], [1.0], [-1.0], [0.0], [-1.0], [0.0], [1.0], [2.0], [3.0], [0.0], [1.0], [0.0]])
assert np.allclose(adata_norm.X, in_days_norm)


@pytest.mark.parametrize("array_type", ARRAY_TYPES)
def test_norm_robust_scale_kwargs(array_type, adata_to_norm):
adata_to_norm_casted = adata_to_norm.copy()
Expand Down Expand Up @@ -331,6 +383,27 @@ def test_norm_quantile_uniform(array_type, adata_to_norm):
assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True)


def test_norm_quantile_integers(adata_mini_integers_in_X):
adata_norm = ep.pp.quantile_norm(adata_mini_integers_in_X, copy=True)
in_days_norm = np.array(
[
[0.36363636],
[0.72727273],
[0.0],
[0.36363636],
[0.0],
[0.36363636],
[0.72727273],
[0.90909091],
[1.0],
[0.36363636],
[0.72727273],
[0.36363636],
]
)
assert np.allclose(adata_norm.X, in_days_norm)


@pytest.mark.parametrize("array_type", ARRAY_TYPES)
def test_norm_quantile_uniform_kwargs(array_type, adata_to_norm):
adata_to_norm_casted = adata_to_norm.copy()
Expand Down Expand Up @@ -392,6 +465,27 @@ def test_norm_power(array_type, adata_to_norm):
assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True)


def test_norm_power_integers(adata_mini_integers_in_X):
adata_norm = ep.pp.power_norm(adata_mini_integers_in_X, copy=True)
in_days_norm = np.array(
[
[-0.31234142],
[0.58319338],
[-1.65324303],
[-0.31234142],
[-1.65324303],
[-0.31234142],
[0.58319338],
[1.27419965],
[1.8444134],
[-0.31234142],
[0.58319338],
[-0.31234142],
]
)
assert np.allclose(adata_norm.X, in_days_norm)


@pytest.mark.parametrize("array_type", ARRAY_TYPES)
def test_norm_power_kwargs(array_type, adata_to_norm):
adata_to_norm_casted = adata_to_norm.copy()
Expand Down
15 changes: 10 additions & 5 deletions tests/tools/feature_ranking/test_rank_features_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,15 +323,20 @@ def test_rank_features_groups_generates_outputs(field_to_rank):
assert "log2foldchanges" not in adata.uns["rank_features_groups"]
assert "pts" not in adata.uns["rank_features_groups"]

if field_to_rank == "layer" or field_to_rank == "obs":
if field_to_rank == "layer":
assert len(adata.uns["rank_features_groups"]["names"]) == 4
assert len(adata.uns["rank_features_groups"]["pvals"]) == 4
assert len(adata.uns["rank_features_groups"]["scores"]) == 4

elif field_to_rank == "obs":
assert len(adata.uns["rank_features_groups"]["names"]) == 3 # It only captures the length of each group
assert len(adata.uns["rank_features_groups"]["pvals"]) == 3
assert len(adata.uns["rank_features_groups"]["scores"]) == 3

elif field_to_rank == "layer_and_obs":
assert len(adata.uns["rank_features_groups"]["names"]) == 6 # It only captures the length of each group
assert len(adata.uns["rank_features_groups"]["pvals"]) == 6
assert len(adata.uns["rank_features_groups"]["scores"]) == 6
assert len(adata.uns["rank_features_groups"]["names"]) == 7 # It only captures the length of each group
assert len(adata.uns["rank_features_groups"]["pvals"]) == 7
assert len(adata.uns["rank_features_groups"]["scores"]) == 7


def test_rank_features_groups_consistent_results():
Expand Down Expand Up @@ -396,7 +401,7 @@ def test_rank_features_group_column_to_rank():
adata_copy = adata.copy()

ep.tl.rank_features_groups(adata, groupby="disease", columns_to_rank="all")
assert len(adata.uns["rank_features_groups"]["names"]) == 2
assert len(adata.uns["rank_features_groups"]["names"]) == 3

# want to check a "complete selection" works
adata = adata_copy.copy()
Expand Down

0 comments on commit 27852fa

Please sign in to comment.