Skip to content

Commit

Permalink
pandas dataframe operations translated into numpy; added function to …
Browse files Browse the repository at this point in the history
…compute displacements
  • Loading branch information
Paola Masuzzo committed Jun 26, 2017
1 parent b383dbb commit 65d722e
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 70 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,6 @@ ENV/

# Rope project settings
.ropeproject

# Visual studio
.vscode
118 changes: 69 additions & 49 deletions biotracks/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,81 +67,101 @@ def plotXY(df, id_, x_coord, y_coord):


def normalize(df, id_, x_coord, y_coord):
"""Normalize to the origin.
"""Normalize to the origin of the coordinate system.
df -- the trajectories dataframe
id_ -- an identifier (linkID or trackID)
x_coord -- the x coordinate
y_coord -- the y coordinate
"""
list_ = []
x_norm = x_coord + 'norm'
y_norm = y_coord + 'norm'
df = df.dropna()
for i in df[id_].unique():
tmp = df[df[id_] == i]
# the first x and y values
x0, y0 = tmp.iloc[0][x_coord], tmp.iloc[0][y_coord]
for index, row in tmp.iterrows():
current_x, current_y = row[x_coord], row[y_coord]
xn, yn = current_x - x0, current_y - y0
# pass a list to .loc to be sure to get a dataframe: behavior is
# not consistent!
tmp_row = tmp.loc[[index]]
tmp_row[x_norm], tmp_row[y_norm] = xn, yn
list_.append(tmp_row)

df = pd.concat(list_)
return df

def cum_displ(df, id_, x_coord, y_coord):
# convert coordinates columns into numpy array
array = tmp[[x_coord, y_coord]].values
# substract first x and y coordinates
diff_array = array - array[0]
diff_df = pd.DataFrame(diff_array, columns=['x_norm', 'y_norm'])
tmp = tmp.assign(x_norm=diff_df.x_norm.values,
y_norm=diff_df.y_norm.values)
list_.append(tmp)
result = pd.concat(list_)
return result


def compute_cumulative_displacements(df, id_, x_coord, y_coord):
"""Compute cumulative displacements of motion in the two directions, x and y.
df -- the trajectories dataframe
id_ -- an identifier
x_coord -- the x coordinate
y_coord -- the y coordinate
"""
list_ = []
x_cum = x_coord + 'cum'
y_cum = y_coord + 'cum'

df = df.dropna()
for i in df[id_].unique():
tmp = df[df[id_] == i]
cumX = 0
cumY = 0
for index, row in tmp.iterrows():
current_x, current_y = row[x_coord], row[y_coord]
tmp_row = tmp.loc[[index]]
# convert coordinates columns into numpy array
array = tmp[[x_coord, y_coord]].values
# add rows
sum_array = np.cumsum(array, axis=0)
sum_df = pd.DataFrame(sum_array, columns=['x_cum', 'y_cum'])
tmp = tmp.assign(x_cum=sum_df.x_cum.values,
y_cum=sum_df.y_cum.values)
list_.append(tmp)

cumX+=current_x
cumY+=current_y
tmp_row[x_cum], tmp_row[y_cum] = cumX, cumY
list_.append(tmp_row)
result = pd.concat(list_)
return result

df = pd.concat(list_)
return df

def compute_ta(df, id_, x_coord, y_coord):
"""Compute turning angles.
def compute_displacements(df, id_, x_coord, y_coord):
"""Compute net displacements of motion in the two directions, x and y.
df -- the trajectories dataframe
id_ -- an identifier
x_coord -- the x coordinate
y_coord -- the y coordinate
"""
list_ = []
df = df.dropna()
for i in df[id_].unique():
tmp = df[df[id_] == i]
# convert coordinates columns into numpy array
array = tmp[[x_coord, y_coord]].values
# substract rows
diff_array = np.diff(array, axis=0)
# need to insert NaN at the first position
diff_array = np.insert(diff_array, [0], [np.NaN, np.NaN], axis=0)
diff_df = pd.DataFrame(diff_array, columns=['delta_x', 'delta_y'])
tmp = tmp.assign(delta_x=diff_df.delta_x.values,
delta_y=diff_df.delta_y.values)
list_.append(tmp)
result = pd.concat(list_)
return result


def compute_turning_angle(df, id_):
"""Compute turning angles.
df -- the trajectories dataframe
id_ -- an identifier
"""
list_ = []
for i in df[id_].unique():
tmp_df = pd.DataFrame()

tmp = df[df[id_] == i]
for i, row in enumerate(tmp.iterrows()):
temp_tracks_row = tmp.iloc[[i]]
if i == 0:
previousX, previousY = row[1][x_coord], row[1][y_coord]
tmp_df.loc[i, 'ta'] = float('NaN')
else:
delta_x, delta_y = row[1][x_coord] - \
previousX, row[1][y_coord] - previousY
previousX, previousY = row[1][x_coord], row[1][y_coord]
ta = math.atan2(delta_y, delta_x)
tmp_df.loc[i, 'ta'] = ta

list_.append(tmp_df)

df = pd.concat(list_)
return df
array = tmp[['delta_x', 'delta_y']].values
turning_angle = np.apply_along_axis(
lambda x: math.atan2(x[0], x[1]), 1, array)

ta_df = pd.DataFrame(turning_angle, columns=['ta'])
tmp = tmp.assign(ta=ta_df.ta.values)
list_.append(tmp)
result = pd.concat(list_)
return result


def plot_polar(theta, N):
Expand Down
40 changes: 19 additions & 21 deletions scripts/create_dpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,29 +134,27 @@ def main(argv):
y = track_dict.get(names.Y_COORD_NAME)
frame = track_dict.get(names.FRAME_NAME)
# basic visualizations
try:
plot.prepareforplot(objects_links_tracks, x, y, frame)
cum_df = plot.cum_displ(objects_links_tracks, link_id, x, y)
plot.plotXY(cum_df, 'TRACK_ID', x + 'cum', y + 'cum')

plot.plotXY(
cum_df[cum_df['LINK_ID'] == 0], 'TRACK_ID', x + 'cum', y + 'cum'
)
plot.plotXY(objects_links_tracks, 'TRACK_ID', x, y)
plot.plotXY(objects_links_tracks, 'LINK_ID', x, y)
logger.info(
plot.prepareforplot(objects_links_tracks, x, y, frame)
cum_df = plot.compute_cumulative_displacements(objects_links_tracks, link_id, x, y)
plot.plotXY(cum_df, 'TRACK_ID', 'x_cum', 'y_cum')

plot.plotXY(cum_df[cum_df['LINK_ID'] == 0], 'TRACK_ID', 'x_cum', 'y_cum')
plot.plotXY(objects_links_tracks, 'TRACK_ID', x, y)
plot.plotXY(objects_links_tracks, link_id, x, y)
logger.info(
'normalizing dataset to the origin of the coordinate system...'
)
norm = plot.normalize(objects_links_tracks, 'TRACK_ID', x, y)
plot.plotXY(norm, 'TRACK_ID', x + 'norm', y + 'norm')
plot.plotXY(norm, 'LINK_ID', x + 'norm', y + 'norm')
logger.info('computing turning angles...')
ta_norm = plot.compute_ta(norm, 'TRACK_ID', x, y)
theta = ta_norm.ta[~np.isnan(ta_norm.ta)]
theta = pd.DataFrame(theta)
plot.plot_polar(theta, 10)
except KeyError:
logger.error('one or more variable provided are not in the dataset')
norm = plot.normalize(objects_links_tracks, 'TRACK_ID', x, y)
plot.plotXY(norm, 'TRACK_ID', 'x_norm', 'y_norm')
plot.plotXY(norm, link_id, 'x_norm', 'y_norm')
logger.info('computing displacements in the two directions of motion...')
norm = plot.compute_displacements(norm, 'TRACK_ID', x, y)
logger.info('computing turning angles...')
norm = plot.compute_turning_angle(norm, 'TRACK_ID')

theta = pd.DataFrame(norm.ta[~np.isnan(norm.ta)])
plot.plot_polar(theta, 10)



if __name__ == "__main__":
Expand Down

0 comments on commit 65d722e

Please sign in to comment.