Skip to content

Commit

Permalink
Fix index_unique argument for concatenating
Browse files Browse the repository at this point in the history
  • Loading branch information
ivirshup committed May 1, 2020
1 parent 1e679f3 commit 2e37bb6
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 19 deletions.
1 change: 1 addition & 0 deletions anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,7 @@ def concatenate(
batch_categories=batch_categories,
uns_merge=uns_merge,
fill_value=fill_value,
index_unique=index_unique,
)

# Backwards compat, ordering columns:
Expand Down
29 changes: 12 additions & 17 deletions anndata/_core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,32 +432,27 @@ def concat(
if batch_categories is None:
batch_categories = np.arange(len(adatas)).astype(str)

# Combining indexes
obs_names = pd.Index(
np.concatenate(
[
pd.Series(a.obs_names) + f"{index_unique}{batch}"
for batch, a in zip(batch_categories, adatas)
]
)
# Batch column
batch = pd.Categorical.from_codes(
np.repeat(np.arange(len(adatas)), [a.n_obs for a in adatas]),
categories=batch_categories,
)

# Combining indexes
obs_names = pd.concat([pd.Series(a.obs_names) for a in adatas], ignore_index=True)
if index_unique is not None:
obs_names = obs_names.str.cat(batch.map(str), sep=index_unique)
obs_names = pd.Index(obs_names)

var_names = resolve_index([a.var_names for a in adatas], join=join)
reindexers = [
gen_reindexer(var_names, a.var_names, fill_value=fill_value) for a in adatas
]

# Obs
# fmt: off
batch = (
pd.Series(
np.repeat(np.arange(len(adatas)), [a.n_obs for a in adatas]), dtype="category"
)
.map(dict(zip(np.arange(len(adatas)), batch_categories)))
)
# fmt: on
obs = pd.concat([a.obs for a in adatas], ignore_index=True)
obs.index = obs_names
obs[batch_key] = batch.values
obs[batch_key] = batch

# Var
var = merge_dataframes(
Expand Down
5 changes: 3 additions & 2 deletions anndata/tests/test_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def fix_known_differences(orig, result):
orig = orig.copy()
result = result.copy()

result.obs_names = result.obs_names.str.extract(r"^(.*)-\d+$", expand=False)
result.obs.drop(columns=["batch"], inplace=True)
result.strings_to_categoricals() # Should this be implicit in concatenation?

Expand Down Expand Up @@ -76,7 +75,9 @@ def test_concatenate_roundtrip(join_type, array_type):
subsets.append(adata[subset_idx])
remaining = remaining.difference(subset_idx)

result = subsets[0].concatenate(subsets[1:], join=join_type, uns_merge="same")
result = subsets[0].concatenate(
subsets[1:], join=join_type, uns_merge="same", index_unique=None
)

# Correcting for known differences
orig, result = fix_known_differences(adata, result)
Expand Down

0 comments on commit 2e37bb6

Please sign in to comment.