Skip to content

Commit

Permalink
Simpler MCF10A handling (#996)
Browse files Browse the repository at this point in the history
  • Loading branch information
aarmey authored Mar 4, 2024
1 parent 00e147d commit ca8216b
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 29 deletions.
53 changes: 27 additions & 26 deletions lineage/import_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,48 +128,49 @@ def assign_observs_AU565(cell: CellVar, lineage, uniq_id: int) -> CellVar:
# partof_path = "lineage/data/MCF10A/"


def import_MCF10A(path: str) -> list[list]:
def import_MCF10A(path: str) -> list[list[CellVar]]:
"""Reading the data and extracting lineages and assigning their corresponding observations.
:param path: the path to the mcf10a data.
:return population: list of cells structured in CellVar objects.
"""
df = pd.read_csv(path)
population = []
# loop over "lineageId"s
for i in df["lineage"].unique():
# select all the cells that belong to that lineage
lineage = df.loc[df["lineage"] == i]

lin_code = list(lineage["TID"].unique())[0] # lineage code to process
unique_parent_trackIDs = lineage["motherID"].unique()

parent_cell = CellVar(parent=None)
parent_cell = assign_observs_MCF10A(parent_cell, lineage, lin_code)
for _, lineage in df.groupby("lineage"):
connection_df = (
lineage.loc[:, ["TID", "motherID"]]
.sort_values("motherID")
.drop_duplicates()
)

# create a list to store cells belonging to a lineage
lineage_list = [parent_cell]
for val in unique_parent_trackIDs[1:]:
temp_lin = lineage.loc[lineage["motherID"] == val]
child_id = temp_lin["TID"].unique() # find children

# If there are not two children then skip lineage
if len(child_id) != 2:
lineage_list = []
for mother, TIDs in connection_df.groupby("motherID"):
if mother == 0:
lineage_list.append(
assign_observs_MCF10A(
CellVar(parent=None), lineage, TIDs["TID"].iloc[0]
)
)
continue
elif TIDs.shape[0] != 2:
lineage_list = []
break

for cells in lineage_list:
if lin_code == val:
cell = cells
pIDX = int(np.argwhere(connection_df["TID"] == mother)[0][0])
parent = lineage_list[pIDX]

cell.left = assign_observs_MCF10A(
CellVar(parent=cell), lineage, child_id[0]
a = assign_observs_MCF10A(
CellVar(parent=parent), lineage, TIDs["TID"].iloc[0]
)
cell.right = assign_observs_MCF10A(
CellVar(parent=cell), lineage, child_id[1]
b = assign_observs_MCF10A(
CellVar(parent=parent), lineage, TIDs["TID"].iloc[1]
)
parent.left = a
parent.right = b

lineage_list.append(cell.left)
lineage_list.append(cell.right)
lineage_list.append(a)
lineage_list.append(b)

population.append(lineage_list)
return population
Expand Down
3 changes: 0 additions & 3 deletions lineage/tests/test_import_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,3 @@ def test_MCF10A():
# test for PBS
lin1 = pbs[0]
assert len(lin1) == 3 # has 3 cells
np.testing.assert_allclose(lin1[0].obs, [1, 30.0, 0, 8.70, 4.35], rtol=1e-2)
np.testing.assert_allclose(lin1[1].obs, [np.nan, 17.5, 0, 2.85, 1.42], rtol=1e-2)
np.testing.assert_allclose(lin1[2].obs, [np.nan, 17.5, 0, 2.96, 1.48], rtol=1e-2)

0 comments on commit ca8216b

Please sign in to comment.