From 7470e4c50e19a63b24d5dbe4f2524abdf8485a96 Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Tue, 14 Sep 2021 11:54:15 -0400 Subject: [PATCH 01/82] Update feature_detection.py Added 3D feature detect functions to module --- tobac/feature_detection.py | 657 +++++++++++++++++++++++++++++++++++++ 1 file changed, 657 insertions(+) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 3e634a96..ed827dcc 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -398,3 +398,660 @@ def filter_min_distance(features,dxy,min_distance): remove_list_distance.append(index_2) features=features[~features.index.isin(remove_list_distance)] return features + +#-------------------------------------- +#new functions for 3D feature detection +#-------------------------------------- + +def feature_detection_multithreshold_3D(field_in, + dxy, + dz, + threshold=None, + min_num=0, + target='maximum', + position_threshold='center', + sigma_threshold=0.5, + n_erosion_threshold=0, + n_min_threshold=0, + min_distance=0, + feature_number_start=1 + ): + ''' Function to perform feature detection based on contiguous regions above/below a threshold + Input: + field_in: iris.cube.Cube + 3D field to perform the tracking on (needs to have coordinate 'time' along one of its dimensions) + + thresholds: list of floats + threshold values used to select target regions to track + dxy: float + grid spacing of the input data (m) + dz: float + array, grid spacing of input data in vertical (m) + target: str ('minimum' or 'maximum') + flag to determine if tracking is targetting minima or maxima in the data + position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') + flag choosing method used for the position of the tracked feature + sigma_threshold: float + standard deviation for intial filtering step + n_erosion_threshold: int + number of pixel by which to erode the identified features + n_min_threshold: int + minimum number of identified features + min_distance: float + minimum distance between detected features (m) + Output: + features: pandas DataFrame + detected features + ''' + #from .utils import add_coordinates + #"add_coordinates_3D" defined and used here instead + + logging.debug('start feature detection based on thresholds') + + # create empty list to store features for all timesteps + list_features_timesteps=[] + + # loop over timesteps for feature identification: + data_time=field_in.slices_over('time') + + # if single threshold is put in as a single value, turn it into a list + if type(threshold) in [int,float]: + threshold=[threshold] + + for i_time,data_i in enumerate(data_time): + print("feature detection multithreshold_3D loop, i_time: ",i_time) + time_i=data_i.coord('time').units.num2date(data_i.coord('time').points[0]) + print("Calling feature_detection_multithreshold_timestep_3D") + features_thresholds=feature_detection_multithreshold_timestep_3D(data_i,i_time, + threshold=threshold, + sigma_threshold=sigma_threshold, + min_num=min_num, + target=target, + position_threshold=position_threshold, + n_erosion_threshold=n_erosion_threshold, + n_min_threshold=n_min_threshold, + min_distance=min_distance, + feature_number_start=feature_number_start + ) + #check if list of features is not empty, then merge features from different threshold values + #into one DataFrame and append to list for individual timesteps: + if not features_thresholds.empty: + #Loop over DataFrame to remove features that are closer than distance_min to each other: + if (min_distance > 0): + features_thresholds=filter_min_distance_3D(features_thresholds,dxy,dz,min_distance) + list_features_timesteps.append(features_thresholds) + + print(features_thresholds) + + logging.debug('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) + print('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) + + logging.debug('feature detection: merging DataFrames') + # Check if features are detected and then concatenate features from different timesteps into one pandas DataFrame + # If no features are detected raise error + if any([not x.empty for x in list_features_timesteps]): + features=pd.concat(list_features_timesteps, ignore_index=True) + features['feature']=features.index+feature_number_start + # features_filtered = features.drop(features[features['num'] < min_num].index) + # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) + features=add_coordinates_3D(features,field_in) + else: + features=None + logging.info('No features detected') + logging.debug('feature detection completed') + print('feature detection completed') + return features + +def feature_detection_multithreshold_timestep_3D(data_i,i_time, + threshold=None, + min_num=0, + target='maximum', + position_threshold='center', + sigma_threshold=0.5, + n_erosion_threshold=0, + n_min_threshold=0, + min_distance=0, + feature_number_start=1 + ): + ''' + function to find features in each timestep based on iteratively finding regions above/below a set of thresholds + Input: + data_i: iris.cube.Cube + 3D field to perform the feature detection (single timestep) + i_time: int + number of the current timestep + + threshold: list of floats + threshold values used to select target regions to track + dxy: float + grid spacing of the input data (m) + target: str ('minimum' or 'maximum') + flag to determine if tracking is targetting minima or maxima in the data + position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') + flag choosing method used for the position of the tracked feature + sigma_threshold: float + standard deviation for intial filtering step + n_erosion_threshold: int + number of pixel by which to erode the identified features + n_min_threshold: int + minimum number of identified features + min_distance: float + minimum distance between detected features (m) + feature_number_start: int + feature number to start with + Output: + features_threshold: pandas DataFrame + detected features for individual timestep + ''' + from scipy.ndimage.filters import gaussian_filter + + track_data = data_i.core_data() + + track_data=gaussian_filter(track_data, sigma=sigma_threshold) #smooth data slightly to create rounded, continuous field + # create empty lists to store regions and features for individual timestep + features_thresholds=pd.DataFrame() + for i_threshold,threshold_i in enumerate(threshold): + if (i_threshold>0 and not features_thresholds.empty): + idx_start=features_thresholds['idx'].max()+1 + else: + idx_start=0 + features_threshold_i,regions_i=feature_detection_threshold_3D(track_data,i_time, + threshold=threshold_i, + sigma_threshold=sigma_threshold, + min_num=min_num, + target=target, + position_threshold=position_threshold, + n_erosion_threshold=n_erosion_threshold, + n_min_threshold=n_min_threshold, + min_distance=min_distance, + idx_start=idx_start + ) + if any([x is not None for x in features_threshold_i]): + features_thresholds=features_thresholds.append(features_threshold_i) + + # For multiple threshold, and features found both in the current and previous step, remove "parent" features from Dataframe + if (i_threshold>0 and not features_thresholds.empty and regions_old): + # for each threshold value: check if newly found features are surrounded by feature based on less restrictive threshold + features_thresholds=remove_parents_3D(features_thresholds,regions_i,regions_old) + regions_old=regions_i + + logging.debug('Finished feature detection for threshold '+str(i_threshold) + ' : ' + str(threshold_i) ) + return features_thresholds + + +def feature_detection_threshold_3D(data_i,i_time, + threshold=None, + min_num=0, + target='maximum', + position_threshold='center', + sigma_threshold=0.5, + n_erosion_threshold=0, + n_min_threshold=0, + min_distance=0, + idx_start=0): + ''' + function to find features based on individual threshold value: + Input: + data_i: iris.cube.Cube + 3D field to perform the feature detection (single timestep) + i_time: int + number of the current timestep + threshold: float + threshold value used to select target regions to track + target: str ('minimum' or 'maximum') + flag to determine if tracking is targetting minima or maxima in the data + position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') + flag choosing method used for the position of the tracked feature + sigma_threshold: float + standard deviation for intial filtering step + n_erosion_threshold: int + number of pixel by which to erode the identified features + n_min_threshold: int + minimum number of identified features + min_distance: float + minimum distance between detected features (m) + idx_start: int + feature id to start with + Output: + features_threshold: pandas DataFrame + detected features for individual threshold + regions: dict + dictionary containing the regions above/below threshold used for each feature (feature ids as keys) + ''' + from skimage.measure import label + from skimage.morphology import binary_erosion + import operator + # if looking for minima, set values above threshold to 0 and scale by data minimum: + if target == 'maximum': + mask=1*(data_i >= threshold) + # if looking for minima, set values above threshold to 0 and scale by data minimum: + elif target == 'minimum': + mask=1*(data_i <= threshold) + # only include values greater than threshold + # erode selected regions by n pixels + if n_erosion_threshold>0: + selem=np.ones((n_erosion_threshold,n_erosion_threshold)) + mask=binary_erosion(mask,selem).astype(np.int64) + # detect individual regions, label and count the number of pixels included: + labels, num_labels = label(mask, background=0, return_num=True) + labels_shape = labels.shape + #values, count = np.unique(labels[:,:,:].ravel(), return_counts=True) + #values_counts=dict(zip(values, count)) + + max_init_size_numba = labels.shape[0]*5 + # Filter out regions that have less pixels than n_min_threshold + #values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold} + #check if not entire domain filled as one feature + #if 0 in values_counts: + #Remove background counts: + #values_counts.pop(0) + #create empty list to store individual features for this threshold + list_features_threshold=[] + #create empty dict to store regions for individual features for this threshold + regions=dict() + + vdim_indyces_dict = dict() + hdim1_indices_dict = dict() + hdim2_indeces_dict = dict() + labels = np.array(labels) + #print(type(values)) + if(num_labels>0): + [last_idx, vdim_indyces_dict,hdim1_indices_dict,hdim2_indeces_dict] = get_indices_of_labels(labels, np.array(list(range(1,num_labels+1))), max_init_size_numba) + #print(last_idx, num_labels) + #print(values, count) + #print(np.array(list(values_counts.keys()))) + #create emptry list of features to remove from parent threshold value + #loop over individual regions: + for cur_idx in range(1,num_labels+1): + max_cur_idx = last_idx[cur_idx][0] + if max_cur_idx<=n_min_threshold: + continue + + #print("In feature_detection_threshold_3D, cur_idx: ", cur_idx) + #region=labels[:,:,:] == cur_idx + region = np.full(labels_shape, False) + #[vdim_indyces,hdim1_indices,hdim2_indeces]= np.nonzero(region) + [vdim_indyces,hdim1_indices,hdim2_indeces]= [vdim_indyces_dict[cur_idx][:max_cur_idx],hdim1_indices_dict[cur_idx][:max_cur_idx],hdim2_indeces_dict[cur_idx][:max_cur_idx]] + #print(region.shape, np.array(list(zip(vdim_indyces,hdim1_indices,hdim2_indeces)))) + region[vdim_indyces,hdim1_indices,hdim2_indeces] = True + + #print(vdim_indyces, hdim1_indices, hdim2_indeces) + #print("Original indices shape: ", np.shape(vdim_indyces), np.shape(hdim1_indices), np.shape(hdim2_indeces)) + #region=labels[:,:,:] == cur_idx + #all_matching_indices = np.argwhere(labels==cur_idx) + #[vdim_indyces,hdim1_indices,hdim2_indeces]=all_matching_indices.T + #print(all_matching_indices) + #print("New indices shape: ", np.shape(vdim_indyces), np.shape(hdim1_indices), np.shape(hdim2_indeces)) + + #write region for individual threshold and feature to dict + region_i=list(zip(vdim_indyces,hdim1_indices,hdim2_indeces)) + regions[cur_idx+idx_start]=region_i + # Determine feature position for region by one of the following methods: + vdim_index,hdim1_index,hdim2_index=feature_position_3D(vdim_indyces,hdim1_indices,hdim2_indeces,region,data_i,threshold,position_threshold,target) + #create individual DataFrame row in tracky format for identified feature + list_features_threshold.append({'frame': int(i_time), + 'idx':cur_idx+idx_start, + 'vdim': vdim_index, + 'hdim_1': hdim1_index, + 'hdim_2':hdim2_index, + 'num':max_cur_idx, + 'threshold_value':threshold}) + features_threshold=pd.DataFrame(list_features_threshold) + #else: + # features_threshold=pd.DataFrame() + # regions=dict() + + return features_threshold, regions + + +from numba import jit +from numba.typed import Dict +import numba.core.types +import numba + +# Make array type. Type-expression is not supported in jit +# functions. + +@jit(nopython=True) +def my_np_resize(a, new_size): + new = np.zeros(new_size, a.dtype) + new[:a.size] = a + return new + + +int_array = numba.core.types.int64[:] +int_array_3d = numba.core.types.int64[:,:,:] +@jit((int_array_3d, int_array, numba.core.types.int64),nopython=True) +def get_indices_of_labels(labels, indices, max_init_size): + + ''' + Returns 3 dicts of label indices + ''' + label_shape =labels.shape + z_shape = label_shape[0] + y_shape = label_shape[1] + x_shape = label_shape[2] + z_indices = Dict.empty( + key_type=numba.core.types.int64, + value_type=int_array, + ) + x_indices = Dict.empty( + key_type=numba.core.types.int64, + value_type=int_array, + ) + y_indices = Dict.empty( + key_type=numba.core.types.int64, + value_type=int_array, + ) + + curr_loc_indices = Dict.empty( + key_type=numba.core.types.int64, + value_type=int_array, + ) + i = 0 + for index in indices: + #pass + curr_loc_indices[index] = np.array([0,]) + z_indices[index] = np.empty((max_init_size,),dtype=np.int64) + x_indices[index] = np.empty((max_init_size,),dtype=np.int64) + y_indices[index] = np.empty((max_init_size,),dtype=np.int64) + + + for z in range(z_shape): + for y in range(y_shape): + for x in range(x_shape): + curr_label = labels[z,y,x] + for index in indices: + if curr_label == index: + curr_loc_ix = curr_loc_indices[index][0] + if curr_loc_ix == z_indices[index].size: + curr_arr_sz = z_indices[index].size + z_indices[index]= my_np_resize(z_indices[index], curr_arr_sz*2) + x_indices[index]= my_np_resize(x_indices[index], curr_arr_sz*2) + y_indices[index]= my_np_resize(y_indices[index], curr_arr_sz*2) + + z_indices[index][curr_loc_ix] = z + x_indices[index][curr_loc_ix] = x + y_indices[index][curr_loc_ix] = y + curr_loc_indices[index][0]+=1 + + return [curr_loc_indices, z_indices, y_indices, x_indices] + +def feature_position_3D(vdim_indyces,hdim1_indices,hdim2_indeces,region,track_data,threshold_i,position_threshold, target): + ''' + function to determine feature position + Input: + vdim_indyces: list + + hdim1_indices: list + + hdim2_indeces: list + + region: list + list of 2-element tuples + track_data: numpy.ndarray + 2D numpy array containing the data + + threshold_i: float + + position_threshold: str + + target: str + + Output: + vdim_index: float + feature position along vertical dimension + hdim1_index: float + feature position along 1st horizontal dimension + hdim2_index: float + feature position along 2nd horizontal dimension + ''' + if position_threshold=='center': + # get position as geometrical centre of identified region: + vdim_index=np.mean(vdim_indyces) + hdim1_index=np.mean(hdim1_indices) + hdim2_index=np.mean(hdim2_indeces) + + elif position_threshold=='extreme': + #get position as max/min position inside the identified region: + if target == 'maximum': + index=np.argmax(track_data[region]) + vdim_index=vdim_indyces[index] + hdim1_index=hdim1_indices[index] + hdim2_index=hdim2_indeces[index] + + if target == 'minimum': + index=np.argmin(track_data[region]) + vdim_index=vdim_indyces[index] + hdim1_index=hdim1_indices[index] + hdim2_index=hdim2_indeces[index] + + elif position_threshold=='weighted_diff': + # get position as centre of identified region, weighted by difference from the threshold: + weights=abs(track_data[region]-threshold_i) + if sum(weights)==0: + weights=None + vdim_index=np.average(vdim_indyces,weights=weights) + hdim1_index=np.average(hdim1_indices,weights=weights) + hdim2_index=np.average(hdim2_indeces,weights=weights) + + elif position_threshold=='weighted_abs': + # get position as centre of identified region, weighted by absolute values if the field: + weights=abs(track_data[region]) + if sum(weights)==0: + weights=None + vdim_index=np.average(vdim_indyces,weights=weights) + hdim1_index=np.average(hdim1_indices,weights=weights) + hdim2_index=np.average(hdim2_indeces,weights=weights) + else: + raise ValueError('position_threshold must be center,extreme,weighted_diff or weighted_abs') + return vdim_index,hdim1_index,hdim2_index + +def remove_parents_3D(features_thresholds,regions_i,regions_old): + ''' + function to remove features whose regions surround newly detected feature regions + Input: + features_thresholds: pandas.DataFrame + Dataframe containing detected features + regions_i: dict + dictionary containing the regions above/below threshold for the newly detected feature (feature ids as keys) + regions_old: dict + dictionary containing the regions above/below threshold from previous threshold (feature ids as keys) + Output: + features_thresholds pandas.DataFrame + Dataframe containing detected features excluding those that are superseded by newly detected ones + ''' + list_remove=[] + for idx_i,region_i in regions_i.items(): + for idx_old,region_old in regions_old.items(): + if test_overlap(regions_old[idx_old],regions_i[idx_i]): + list_remove.append(idx_old) + list_remove=list(set(list_remove)) + # remove parent regions: + if features_thresholds is not None: + features_thresholds=features_thresholds[~features_thresholds['idx'].isin(list_remove)] + + return features_thresholds + +def filter_min_distance_3D(features,dxy,dz,min_distance): + ''' Function to perform feature detection based on contiguous regions above/below a threshold + Input: + features: pandas DataFrame + features + dxy: float + horzontal grid spacing (m) + dz: float array + vertical grid spacing (m) + min_distance: float + minimum distance between detected features (m) + Output: + features: pandas DataFrame + features + ''' + from itertools import combinations + remove_list_distance=[] + #create list of tuples with all combinations of features at the timestep: + indeces=combinations(features.index.values,2) + #Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) + for index_1,index_2 in indeces: + if index_1 is not index_2: + #features.loc[index_1,'hdim_1'] + #distance=dxy*np.sqrt((features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1'])**2+(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2'])**2) + distance=np.sqrt((features.loc[index_1,'projection_x_coordinate']-features.loc[index_2,'projection_x_coordinate'])**2 + (features.loc[index_1,'projection_y_coordinate']-features.loc[index_2,'projection_y_coordinate'])**2 + (features.loc[index_1,'altitude']-features.loc[index_2,'altitude'])**2) + + if distance <= min_distance: +# logging.debug('distance<= min_distance: ' + str(distance)) + if features.loc[index_1,'threshold_value']>features.loc[index_2,'threshold_value']: + remove_list_distance.append(index_2) + elif features.loc[index_1,'threshold_value']features.loc[index_2,'num']: + remove_list_distance.append(index_2) + elif features.loc[index_1,'num'] Date: Thu, 11 Nov 2021 10:26:16 -0700 Subject: [PATCH 02/82] Updated documentation I went in and updated most of the documentation, although not all of it. It now compiles locally on my machine and should be compatible with readthedocs. --- doc/_static/theme_overrides.css | 17 +++++++++++++++++ doc/analysis.rst | 2 +- doc/data_input.rst | 2 +- doc/examples.rst | 2 +- doc/feature_detection_3D_out_vars.csv | 4 ++++ doc/feature_detection_base_out_vars.csv | 16 ++++++++++++++++ doc/feature_detection_output.rst | 18 ++++++++++++++++++ doc/index.rst | 22 +++++++++++++++------- doc/installation.rst | 10 +++++++++- doc/plotting.rst | 2 +- 10 files changed, 83 insertions(+), 12 deletions(-) create mode 100644 doc/_static/theme_overrides.css create mode 100644 doc/feature_detection_3D_out_vars.csv create mode 100644 doc/feature_detection_base_out_vars.csv create mode 100644 doc/feature_detection_output.rst diff --git a/doc/_static/theme_overrides.css b/doc/_static/theme_overrides.css new file mode 100644 index 00000000..4f6920b2 --- /dev/null +++ b/doc/_static/theme_overrides.css @@ -0,0 +1,17 @@ +/* from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-41506687 */ +/* with augmentations from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-153083280 */ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from + overriding this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } + + } + \ No newline at end of file diff --git a/doc/analysis.rst b/doc/analysis.rst index 13483a2a..1ecc81b8 100644 --- a/doc/analysis.rst +++ b/doc/analysis.rst @@ -1,5 +1,5 @@ Analysis -======= +========= tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of important properties of the tracked objects such as cloud lifetimes, cloud areas/volumes, but also allows for a convenient calculation of statistics for arbitratry fields of the same shape as as the input data used for the tracking analysis. diff --git a/doc/data_input.rst b/doc/data_input.rst index 68eb3277..cbbed02e 100644 --- a/doc/data_input.rst +++ b/doc/data_input.rst @@ -1,4 +1,4 @@ -*Data input and output +Data input and output ====================== Input data for tobac should consist of one or more fields on a common, regular grid with a time dimension and two or more spatial dimensions. The input data should also include latitude and longitude coordinates, either as 1-d or 2-d variables depending on the grid used. diff --git a/doc/examples.rst b/doc/examples.rst index 7a98fc71..05b24fbf 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -1,5 +1,5 @@ Example notebooks -=============== +================== tobac is provided with a set of Jupyter notebooks that show examples of the application of tobac for different types of datasets. The notebooks can be found in the **examples** folder in the the repository. The necessary input data for these examples is avaliable on zenodo: diff --git a/doc/feature_detection_3D_out_vars.csv b/doc/feature_detection_3D_out_vars.csv new file mode 100644 index 00000000..4b5d23b1 --- /dev/null +++ b/doc/feature_detection_3D_out_vars.csv @@ -0,0 +1,4 @@ +Variable Name,Description,Units,Type +vdim,vertical dimension in grid point space,Number of grid points,float64 +z,grid point z location of the feature (see vdim). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float64 +altitude,z location of the feature above ground level,meters,float64 \ No newline at end of file diff --git a/doc/feature_detection_base_out_vars.csv b/doc/feature_detection_base_out_vars.csv new file mode 100644 index 00000000..ce935797 --- /dev/null +++ b/doc/feature_detection_base_out_vars.csv @@ -0,0 +1,16 @@ +Variable Name,Description,Units,Type +frame,Frame/time/file number; starts from 0 and increments by 1 to N times. ,n/a,int64 +idx,"Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments",n/a,int64 +hdim_1,"First horizontal dimension in grid point space (typically, although not always, N/S or y space)",Number of grid points,float64 +hdim_2,"Second horizontal dimension in grid point space (typically, although not always, E/W or x space)",Number of grid points,float64 +num,Number of grid points that are within the threshold of this feature,Number of grid points,int64 +threshold_value,Maximum threshold value reached by the feature,Units of the input feature,int64(?) +feature,Unique number of the feature; starts from 1 and increments by 1 to the number of features,n/a,int64 +time,Time of the feature,Date and time,object/python datetime +timestr,String representation of the feature time,YYYY-MM-DD HH:MM:SS,object/string +y,Grid point y location of the feature (see hdim_1 and hdim_2). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float64 +x,Grid point x location of the feature (see also y),Number of grid points,float64 +projection_y_coordinate,Y location of the feature in projection coordinates,Projection coordinates (usually m),float64 +projection_x_coordinate,X location of the feature in projection coodinates,Projection coordinates (usually m),float64 +lat,Latitude of the feature,Decimal degrees,float64 +lon,Longitude of the feature,Decimal degrees,float64 \ No newline at end of file diff --git a/doc/feature_detection_output.rst b/doc/feature_detection_output.rst new file mode 100644 index 00000000..4523800f --- /dev/null +++ b/doc/feature_detection_output.rst @@ -0,0 +1,18 @@ +Feature detection output +------------------------- + +Feature detection outputs a `pandas` dataframe with several variables. The variables, (with column names listed in the `Variable Name` column), are described below, with units. Note that while these variables come initially from the feature detection step, segmentation and tracking also share some of these variables. + +Variables that are common to all feature detection files: + +.. csv-table:: tobac Feature Detection Output Variables + :file: ./feature_detection_base_out_vars.csv + :widths: 3, 35, 3, 3 + :header-rows: 1 + +Variables that are included when using 3D feature detection in addition to those above: + +.. csv-table:: tobac 3D Feature Detection Output Variables + :file: ./feature_detection_3D_out_vars.csv + :widths: 3, 35, 3, 3 + :header-rows: 1 diff --git a/doc/index.rst b/doc/index.rst index 86540728..0f171120 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,18 +1,20 @@ tobac - Tracking and Object-Based Analysis of Clouds ------------ +------------------------------------------------------- -**tobac** is a Python package to identify, track and analyse clouds in different types of gridded datasets, such as 3D model output from cloud resolving model simulations or 2D data from satellite retrievals. +**tobac** is a Python package to identify, track and analyze clouds in different types of gridded datasets, such as 3D model output from cloud-resolving model simulations or 2D data from satellite retrievals. -The software is set up in a modular way to include different algorithms for feature identification, tracking and analyses. -In the current implementation, individual features are indentified as either maxima or minima in a two dimensional time varying field. The volume/are associated with the identified object can be determined based on a time-varying 2D or 3D field and a threshold value. In the tracking step, the identified objects are linked into consistent trajectories representing the cloud over its lifecycle. Analysis and visualisation methods provide a convenient way to use and display the tracking results. +The software is set up in a modular way to include different algorithms for feature identification, tracking, and analyses. **tobac** is also input variable agnostic and doesn't rely on specific input variables to work. -Version 1.0 of tobac and some example applications are described in a paper that is currently in discussion for the journal "Geoscientific Model Development" as: +In the current implementation, individual features are identified as either maxima or minima in a two or three-dimensional time-varying field. An associated volume can then be determined using these features with a separate (or identical) time-varying 2D or 3D field and a threshold value. The identified objects are linked into consistent trajectories representing the cloud over its lifecycle in the tracking step. Analysis and visualization methods provide a convenient way to use and display the tracking results. -Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P.: tobac v1.0: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev. Discuss., `https://doi.org/10.5194/gmd-2019-105 `_ , in review, 2019. +Version 1.2 of tobac and some example applications are described in a manuscript in Geoscientific Model Development as: -The project is currently extended by several contributors to include additional workflows and algorithms using the same structure, synthax and data formats. +Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P.: tobac 1.2: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev., 12, 4551–4570, https://doi.org/10.5194/gmd-12-4551-2019, 2019. + +The project is currently being extended by several contributors to include additional workflows and algorithms using the same structure, syntax, and data formats. .. toctree:: + :caption: Basic Information :maxdepth: 2 :numbered: @@ -24,3 +26,9 @@ The project is currently extended by several contributors to include additional analysis plotting examples + +.. toctree:: + :caption: Output Documentation + :maxdepth: 2 + + feature_detection_output \ No newline at end of file diff --git a/doc/installation.rst b/doc/installation.rst index f8895e4a..10488c1d 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -2,17 +2,25 @@ Installation ------------ tobac is now capable of working with both Python 2 and Python 3 (tested for 2.7,3.6 and 3.7) installations. -The easiest way is to install the most recent version of tobac via conda and the conda-forge channel: +The easiest way is to install the most recent version of tobac via conda or mamba and the conda-forge channel: ``` conda install -c conda-forge tobac ``` +or +``` +mamba install -c conda-forge tobac +``` This will take care of all necessary dependencies and should do the job for most users and also allows for an easy update of the installation by ``` conda update -c conda-forge tobac ``` +or +``` +mamba update -c conda-forge tobac +``` You can also install conda via pip, which is mainly interesting for development purposed or to use specific development branches for the Github repository. diff --git a/doc/plotting.rst b/doc/plotting.rst index ae11e5cf..3425a0e5 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -1,3 +1,3 @@ Plotting -------- +--------- tobac provides functions to conveniently visualise the tracking results and analyses. From 7ac7bb2a4a2bfd34028a0b23cdaf8a954291b64b Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Thu, 11 Nov 2021 11:09:13 -0700 Subject: [PATCH 03/82] Update analysis.rst Made a few tweaks for brevity/typos --- doc/analysis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/analysis.rst b/doc/analysis.rst index 1ecc81b8..70706aea 100644 --- a/doc/analysis.rst +++ b/doc/analysis.rst @@ -1,5 +1,5 @@ Analysis ========= -tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of important properties of the tracked objects such as cloud lifetimes, cloud areas/volumes, but also allows for a convenient calculation of statistics for arbitratry fields of the same shape as as the input data used for the tracking analysis. +tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of properties such as cloud lifetimes and cloud areas/volumes, but also allows for a convenient calculation of statistics for arbitrary fields of the same shape as as the input data used for the tracking analysis. From 41da11e59f8e14e9c6721e697be0777dc7613c1e Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Thu, 11 Nov 2021 12:28:11 -0700 Subject: [PATCH 04/82] Update installation.rst Cleaned up a sentence and typo or two --- doc/installation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/installation.rst b/doc/installation.rst index 10488c1d..7caaf31b 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -12,7 +12,7 @@ or mamba install -c conda-forge tobac ``` -This will take care of all necessary dependencies and should do the job for most users and also allows for an easy update of the installation by +This will take care of all necessary dependencies and should do the job for most users. It also allows for an easy update of the installation by ``` conda update -c conda-forge tobac @@ -23,7 +23,7 @@ mamba update -c conda-forge tobac ``` -You can also install conda via pip, which is mainly interesting for development purposed or to use specific development branches for the Github repository. +You can also install conda via pip, which is mainly interesting for development purposes or using specific development branches for the Github repository. The follwoing python packages are required (including dependencies of these packages): From 250b69d1687ca5d9a3d8a33f6d2b64f550f298f8 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 11 Nov 2021 12:30:08 -0700 Subject: [PATCH 05/82] Updated documentation (#2) * Updated documentation I went in and updated most of the documentation, although not all of it. It now compiles locally on my machine and should be compatible with readthedocs. * Update analysis.rst Made a few tweaks for brevity/typos * Update installation.rst Cleaned up a sentence and typo or two Co-authored-by: galexsky <90701223+galexsky@users.noreply.github.com> --- doc/_static/theme_overrides.css | 17 +++++++++++++++++ doc/analysis.rst | 4 ++-- doc/data_input.rst | 2 +- doc/examples.rst | 2 +- doc/feature_detection_3D_out_vars.csv | 4 ++++ doc/feature_detection_base_out_vars.csv | 16 ++++++++++++++++ doc/feature_detection_output.rst | 18 ++++++++++++++++++ doc/index.rst | 22 +++++++++++++++------- doc/installation.rst | 14 +++++++++++--- doc/plotting.rst | 2 +- 10 files changed, 86 insertions(+), 15 deletions(-) create mode 100644 doc/_static/theme_overrides.css create mode 100644 doc/feature_detection_3D_out_vars.csv create mode 100644 doc/feature_detection_base_out_vars.csv create mode 100644 doc/feature_detection_output.rst diff --git a/doc/_static/theme_overrides.css b/doc/_static/theme_overrides.css new file mode 100644 index 00000000..4f6920b2 --- /dev/null +++ b/doc/_static/theme_overrides.css @@ -0,0 +1,17 @@ +/* from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-41506687 */ +/* with augmentations from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-153083280 */ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from + overriding this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } + + } + \ No newline at end of file diff --git a/doc/analysis.rst b/doc/analysis.rst index 13483a2a..70706aea 100644 --- a/doc/analysis.rst +++ b/doc/analysis.rst @@ -1,5 +1,5 @@ Analysis -======= -tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of important properties of the tracked objects such as cloud lifetimes, cloud areas/volumes, but also allows for a convenient calculation of statistics for arbitratry fields of the same shape as as the input data used for the tracking analysis. +========= +tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of properties such as cloud lifetimes and cloud areas/volumes, but also allows for a convenient calculation of statistics for arbitrary fields of the same shape as as the input data used for the tracking analysis. diff --git a/doc/data_input.rst b/doc/data_input.rst index 68eb3277..cbbed02e 100644 --- a/doc/data_input.rst +++ b/doc/data_input.rst @@ -1,4 +1,4 @@ -*Data input and output +Data input and output ====================== Input data for tobac should consist of one or more fields on a common, regular grid with a time dimension and two or more spatial dimensions. The input data should also include latitude and longitude coordinates, either as 1-d or 2-d variables depending on the grid used. diff --git a/doc/examples.rst b/doc/examples.rst index 7a98fc71..05b24fbf 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -1,5 +1,5 @@ Example notebooks -=============== +================== tobac is provided with a set of Jupyter notebooks that show examples of the application of tobac for different types of datasets. The notebooks can be found in the **examples** folder in the the repository. The necessary input data for these examples is avaliable on zenodo: diff --git a/doc/feature_detection_3D_out_vars.csv b/doc/feature_detection_3D_out_vars.csv new file mode 100644 index 00000000..4b5d23b1 --- /dev/null +++ b/doc/feature_detection_3D_out_vars.csv @@ -0,0 +1,4 @@ +Variable Name,Description,Units,Type +vdim,vertical dimension in grid point space,Number of grid points,float64 +z,grid point z location of the feature (see vdim). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float64 +altitude,z location of the feature above ground level,meters,float64 \ No newline at end of file diff --git a/doc/feature_detection_base_out_vars.csv b/doc/feature_detection_base_out_vars.csv new file mode 100644 index 00000000..ce935797 --- /dev/null +++ b/doc/feature_detection_base_out_vars.csv @@ -0,0 +1,16 @@ +Variable Name,Description,Units,Type +frame,Frame/time/file number; starts from 0 and increments by 1 to N times. ,n/a,int64 +idx,"Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments",n/a,int64 +hdim_1,"First horizontal dimension in grid point space (typically, although not always, N/S or y space)",Number of grid points,float64 +hdim_2,"Second horizontal dimension in grid point space (typically, although not always, E/W or x space)",Number of grid points,float64 +num,Number of grid points that are within the threshold of this feature,Number of grid points,int64 +threshold_value,Maximum threshold value reached by the feature,Units of the input feature,int64(?) +feature,Unique number of the feature; starts from 1 and increments by 1 to the number of features,n/a,int64 +time,Time of the feature,Date and time,object/python datetime +timestr,String representation of the feature time,YYYY-MM-DD HH:MM:SS,object/string +y,Grid point y location of the feature (see hdim_1 and hdim_2). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float64 +x,Grid point x location of the feature (see also y),Number of grid points,float64 +projection_y_coordinate,Y location of the feature in projection coordinates,Projection coordinates (usually m),float64 +projection_x_coordinate,X location of the feature in projection coodinates,Projection coordinates (usually m),float64 +lat,Latitude of the feature,Decimal degrees,float64 +lon,Longitude of the feature,Decimal degrees,float64 \ No newline at end of file diff --git a/doc/feature_detection_output.rst b/doc/feature_detection_output.rst new file mode 100644 index 00000000..4523800f --- /dev/null +++ b/doc/feature_detection_output.rst @@ -0,0 +1,18 @@ +Feature detection output +------------------------- + +Feature detection outputs a `pandas` dataframe with several variables. The variables, (with column names listed in the `Variable Name` column), are described below, with units. Note that while these variables come initially from the feature detection step, segmentation and tracking also share some of these variables. + +Variables that are common to all feature detection files: + +.. csv-table:: tobac Feature Detection Output Variables + :file: ./feature_detection_base_out_vars.csv + :widths: 3, 35, 3, 3 + :header-rows: 1 + +Variables that are included when using 3D feature detection in addition to those above: + +.. csv-table:: tobac 3D Feature Detection Output Variables + :file: ./feature_detection_3D_out_vars.csv + :widths: 3, 35, 3, 3 + :header-rows: 1 diff --git a/doc/index.rst b/doc/index.rst index 86540728..0f171120 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,18 +1,20 @@ tobac - Tracking and Object-Based Analysis of Clouds ------------ +------------------------------------------------------- -**tobac** is a Python package to identify, track and analyse clouds in different types of gridded datasets, such as 3D model output from cloud resolving model simulations or 2D data from satellite retrievals. +**tobac** is a Python package to identify, track and analyze clouds in different types of gridded datasets, such as 3D model output from cloud-resolving model simulations or 2D data from satellite retrievals. -The software is set up in a modular way to include different algorithms for feature identification, tracking and analyses. -In the current implementation, individual features are indentified as either maxima or minima in a two dimensional time varying field. The volume/are associated with the identified object can be determined based on a time-varying 2D or 3D field and a threshold value. In the tracking step, the identified objects are linked into consistent trajectories representing the cloud over its lifecycle. Analysis and visualisation methods provide a convenient way to use and display the tracking results. +The software is set up in a modular way to include different algorithms for feature identification, tracking, and analyses. **tobac** is also input variable agnostic and doesn't rely on specific input variables to work. -Version 1.0 of tobac and some example applications are described in a paper that is currently in discussion for the journal "Geoscientific Model Development" as: +In the current implementation, individual features are identified as either maxima or minima in a two or three-dimensional time-varying field. An associated volume can then be determined using these features with a separate (or identical) time-varying 2D or 3D field and a threshold value. The identified objects are linked into consistent trajectories representing the cloud over its lifecycle in the tracking step. Analysis and visualization methods provide a convenient way to use and display the tracking results. -Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P.: tobac v1.0: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev. Discuss., `https://doi.org/10.5194/gmd-2019-105 `_ , in review, 2019. +Version 1.2 of tobac and some example applications are described in a manuscript in Geoscientific Model Development as: -The project is currently extended by several contributors to include additional workflows and algorithms using the same structure, synthax and data formats. +Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P.: tobac 1.2: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev., 12, 4551–4570, https://doi.org/10.5194/gmd-12-4551-2019, 2019. + +The project is currently being extended by several contributors to include additional workflows and algorithms using the same structure, syntax, and data formats. .. toctree:: + :caption: Basic Information :maxdepth: 2 :numbered: @@ -24,3 +26,9 @@ The project is currently extended by several contributors to include additional analysis plotting examples + +.. toctree:: + :caption: Output Documentation + :maxdepth: 2 + + feature_detection_output \ No newline at end of file diff --git a/doc/installation.rst b/doc/installation.rst index f8895e4a..7caaf31b 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -2,20 +2,28 @@ Installation ------------ tobac is now capable of working with both Python 2 and Python 3 (tested for 2.7,3.6 and 3.7) installations. -The easiest way is to install the most recent version of tobac via conda and the conda-forge channel: +The easiest way is to install the most recent version of tobac via conda or mamba and the conda-forge channel: ``` conda install -c conda-forge tobac ``` +or +``` +mamba install -c conda-forge tobac +``` -This will take care of all necessary dependencies and should do the job for most users and also allows for an easy update of the installation by +This will take care of all necessary dependencies and should do the job for most users. It also allows for an easy update of the installation by ``` conda update -c conda-forge tobac ``` +or +``` +mamba update -c conda-forge tobac +``` -You can also install conda via pip, which is mainly interesting for development purposed or to use specific development branches for the Github repository. +You can also install conda via pip, which is mainly interesting for development purposes or using specific development branches for the Github repository. The follwoing python packages are required (including dependencies of these packages): diff --git a/doc/plotting.rst b/doc/plotting.rst index ae11e5cf..3425a0e5 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -1,3 +1,3 @@ Plotting -------- +--------- tobac provides functions to conveniently visualise the tracking results and analyses. From df6b6b075c247a17b6db8396ad10c011c6fbef8b Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 11 Nov 2021 13:52:35 -0700 Subject: [PATCH 06/82] Updated the tracking function to the new, faster option (#4) * Updated the tracking function to the new, faster option In addition to updating the tracking, I've added a Dockerfile in here for easier testing using pytest. * Added pycache to the gitignore * Update tracking.py Added the 3D version of the updated tracking function. Looks and works essentially the same as the original, but the function name has a '_3D' appended to it and the function's guts now include the vertical dimension 'vdim' in the arguments for the trackpy link/link_df functions. * Update tracking.py Tweaked "my_linking_trackpy_3D" to change it to "linking_trackpy_3D" for consistency with the non-3D function name * Update tracking.py Removed 3D tracking function so we can merge with consistency. Co-authored-by: galexsky <90701223+galexsky@users.noreply.github.com> --- .gitignore | 2 ++ Dockerfile | 15 +++++++++++++ tobac/tracking.py | 55 +++++++++++++++++++++++++++++++---------------- 3 files changed, 54 insertions(+), 18 deletions(-) create mode 100644 .gitignore create mode 100644 Dockerfile diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..a295864e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +__pycache__ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..b228c287 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM mambaorg/micromamba + +#WORKDIR . +COPY . ./ + +RUN micromamba install -y -n base -c conda-forge numpy \ + scipy scikit-image pandas pytables matplotlib iris \ + cf-units xarray cartopy trackpy numba pytest + +# Make RUN commands use the new environment: +#SHELL ["micromamba", "run", "-n", "myenv", "/bin/bash", "-c"] + +RUN pip install . + +RUN pytest diff --git a/tobac/tracking.py b/tobac/tracking.py index c864e6ef..94e27524 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -2,6 +2,9 @@ import numpy as np import pandas as pd + + + def linking_trackpy(features,field_in,dt,dxy, v_max=None,d_max=None,d_min=None,subnetwork_size=None, memory=0,stubs=1,time_cell_min=None, @@ -32,6 +35,7 @@ def linking_trackpy(features,field_in,dt,dxy, flag choosing method used for trajectory linking """ # from trackpy import link_df + # from trackpy import link_df import trackpy as tp from copy import deepcopy # from trackpy import filter_stubs @@ -53,9 +57,9 @@ def linking_trackpy(features,field_in,dt,dxy, stubs=np.floor(time_cell_min/dt)+1 - logging.debug('stubs: '+ str(stubs)) + #logging.debug('stubs: '+ str(stubs)) - logging.debug('start linking features into trajectories') + #logging.debug('start linking features into trajectories') #If subnetwork size given, set maximum subnet size @@ -98,20 +102,27 @@ def linking_trackpy(features,field_in,dt,dxy, # Reset particle numbers from the arbitray numbers at the end of the feature detection and linking to consecutive cell numbers # keep 'particle' for reference to the feature detection step. trajectories_unfiltered['cell']=None + particle_num_to_cell_num = dict() for i_particle,particle in enumerate(pd.Series.unique(trajectories_unfiltered['particle'])): cell=int(i_particle+cell_number_start) - trajectories_unfiltered.loc[trajectories_unfiltered['particle']==particle,'cell']=cell + particle_num_to_cell_num[particle] = int(cell) + remap_particle_to_cell_vec = np.vectorize(remap_particle_to_cell_nv) + trajectories_unfiltered['cell'] = remap_particle_to_cell_vec(particle_num_to_cell_num, trajectories_unfiltered['particle']) + trajectories_unfiltered['cell'] = trajectories_unfiltered['cell'].astype(int) trajectories_unfiltered.drop(columns=['particle'],inplace=True) trajectories_bycell=trajectories_unfiltered.groupby('cell') + stub_cell_nums = list() for cell,trajectories_cell in trajectories_bycell: - logging.debug("cell: "+str(cell)) - logging.debug("feature: "+str(trajectories_cell['feature'].values)) - logging.debug("trajectories_cell.shape[0]: "+ str(trajectories_cell.shape[0])) - + #logging.debug("cell: "+str(cell)) + #logging.debug("feature: "+str(trajectories_cell['feature'].values)) + #logging.debug("trajectories_cell.shape[0]: "+ str(trajectories_cell.shape[0])) + if trajectories_cell.shape[0] < stubs: - logging.debug("cell" + str(cell)+ " is a stub ("+str(trajectories_cell.shape[0])+ "), setting cell number to Nan..") - trajectories_unfiltered.loc[trajectories_unfiltered['cell']==cell,'cell']=np.nan + #logging.debug("cell" + str(cell)+ " is a stub ("+str(trajectories_cell.shape[0])+ "), setting cell number to Nan..") + stub_cell_nums.append(cell) + + trajectories_unfiltered.loc[trajectories_unfiltered['cell'].isin(stub_cell_nums),'cell']=np.nan trajectories_filtered=trajectories_unfiltered @@ -132,11 +143,12 @@ def linking_trackpy(features,field_in,dt,dxy, trajectories_final=add_cell_time(trajectories_filtered_filled) # add coordinate to raw features identified: - logging.debug('start adding coordinates to detected features') - logging.debug('feature linking completed') + #logging.debug('start adding coordinates to detected features') + #logging.debug('feature linking completed') return trajectories_final + def fill_gaps(t,order=1,extrapolate=0,frame_max=None,hdim_1_max=None,hdim_2_max=None): ''' add cell time as time since the initiation of each cell Input: @@ -210,14 +222,21 @@ def add_cell_time(t): trajectories with added cell time ''' - logging.debug('start adding time relative to cell initiation') + #logging.debug('start adding time relative to cell initiation') t_grouped=t.groupby('cell') - t['time_cell']=np.nan - for cell,track in t_grouped: - track_0=track.head(n=1) - for i,row in track.iterrows(): - t.loc[i,'time_cell']=row['time']-track_0.loc[track_0.index[0],'time'] - # turn series into pandas timedelta DataSeries + + t['time_cell'] = t['time']-t.groupby('cell')['time'].transform('min') t['time_cell']=pd.to_timedelta(t['time_cell']) return t +def remap_particle_to_cell_nv(particle_cell_map, input_particle): + '''Remaps the particles to new cells given an input map and the current particle. + Designed to be vectorized with np.vectorize + + Input: + t: pandas DataFrame + trajectories with added coordinates + + + ''' + return particle_cell_map[input_particle] From 7685da16816e81277196c8d4f8d4f5c48962c6a8 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 12 Nov 2021 09:02:48 -0700 Subject: [PATCH 07/82] Added conf.py for sphinx to the doc folder. --- doc/conf.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 doc/conf.py diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 00000000..a9a400b2 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,19 @@ +import sphinx_rtd_theme +import sys, os + +sys.path.insert(0, os.path.abspath('extensions')) + +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', + 'sphinx.ext.coverage', 'sphinx.ext.imgmath', 'sphinx.ext.ifconfig', + 'sphinx_rtd_theme',] + + +html_theme = "sphinx_rtd_theme" + +project = u'tobac' + + +def setup(app): + app.add_css_file("theme_overrides.css") + + From 68d6d997ba8be6f039b1d2b91c840daadd3fab53 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 12 Nov 2021 11:20:15 -0700 Subject: [PATCH 08/82] Added API docs --- doc/conf.py | 3 ++ doc/index.rst | 9 ++++- doc/modules.rst | 7 ++++ doc/tobac.rst | 85 ++++++++++++++++++++++++++++++++++++++++ tobac/centerofgravity.py | 3 +- 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 doc/modules.rst create mode 100644 doc/tobac.rst diff --git a/doc/conf.py b/doc/conf.py index a9a400b2..0f0db861 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -16,4 +16,7 @@ def setup(app): app.add_css_file("theme_overrides.css") +autodoc_mock_imports = ['numpy', 'scipy', 'scikit-image', 'pandas', 'pytables', 'matplotlib', 'iris', + 'cf-units', 'xarray', 'cartopy', 'trackpy'] +sys.path.insert(0, os.path.abspath("../")) \ No newline at end of file diff --git a/doc/index.rst b/doc/index.rst index 0f171120..bd087fae 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -31,4 +31,11 @@ The project is currently being extended by several contributors to include addit :caption: Output Documentation :maxdepth: 2 - feature_detection_output \ No newline at end of file + feature_detection_output + +.. toctree:: + :caption: API Reference + :maxdepth: 2 + + tobac + diff --git a/doc/modules.rst b/doc/modules.rst new file mode 100644 index 00000000..aa0e3d9f --- /dev/null +++ b/doc/modules.rst @@ -0,0 +1,7 @@ +tobac +===== + +.. toctree:: + :maxdepth: 4 + + tobac diff --git a/doc/tobac.rst b/doc/tobac.rst new file mode 100644 index 00000000..51722c77 --- /dev/null +++ b/doc/tobac.rst @@ -0,0 +1,85 @@ +tobac package +============= + +Submodules +---------- + +tobac.analysis module +--------------------- + +.. automodule:: tobac.analysis + :members: + :undoc-members: + :show-inheritance: + +tobac.centerofgravity module +---------------------------- + +.. automodule:: tobac.centerofgravity + :members: + :undoc-members: + :show-inheritance: + +tobac.feature\_detection module +------------------------------- + +.. automodule:: tobac.feature_detection + :members: + :undoc-members: + :show-inheritance: + +tobac.plotting module +--------------------- + +.. automodule:: tobac.plotting + :members: + :undoc-members: + :show-inheritance: + +tobac.segmentation module +------------------------- + +.. automodule:: tobac.segmentation + :members: + :undoc-members: + :show-inheritance: + +tobac.testing module +-------------------- + +.. automodule:: tobac.testing + :members: + :undoc-members: + :show-inheritance: + +tobac.tracking module +--------------------- + +.. automodule:: tobac.tracking + :members: + :undoc-members: + :show-inheritance: + +tobac.utils module +------------------ + +.. automodule:: tobac.utils + :members: + :undoc-members: + :show-inheritance: + +tobac.wrapper module +-------------------- + +.. automodule:: tobac.wrapper + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: tobac + :members: + :undoc-members: + :show-inheritance: diff --git a/tobac/centerofgravity.py b/tobac/centerofgravity.py index 544a4ee2..6d5762b9 100644 --- a/tobac/centerofgravity.py +++ b/tobac/centerofgravity.py @@ -1,7 +1,7 @@ import logging def calculate_cog(tracks,mass,mask): - ''' caluclate centre of gravity and mass forech individual tracked cell in the simulation + '''caluclate centre of gravity and mass forech individual tracked cell in the simulation Input: tracks: pandas.DataFrame DataFrame containing trajectories of cell centres @@ -12,6 +12,7 @@ def calculate_cog(tracks,mass,mask): Output: tracks_out pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass each tracked cells at each timestep + ''' from .utils import mask_cube_cell from iris import Constraint From c79d80a6de35b2f6a42352645b2368376bb5e073 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 12 Nov 2021 12:26:24 -0700 Subject: [PATCH 09/82] Switched centerofgravity documentation to numpy style --- doc/conf.py | 2 +- tobac/centerofgravity.py | 65 ++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 0f0db861..f740960e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -5,7 +5,7 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.imgmath', 'sphinx.ext.ifconfig', - 'sphinx_rtd_theme',] + 'sphinx_rtd_theme','sphinx.ext.napoleon'] html_theme = "sphinx_rtd_theme" diff --git a/tobac/centerofgravity.py b/tobac/centerofgravity.py index 6d5762b9..f45a4d8e 100644 --- a/tobac/centerofgravity.py +++ b/tobac/centerofgravity.py @@ -1,16 +1,19 @@ import logging def calculate_cog(tracks,mass,mask): - '''caluclate centre of gravity and mass forech individual tracked cell in the simulation - Input: - tracks: pandas.DataFrame + '''Caluclate centre of gravity and mass for each individual tracked cell in the simulation + + Parameters + ---------- + tracks : pandas.DataFrame DataFrame containing trajectories of cell centres - mass: iris.cube.Cube + mass : iris.cube.Cube cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - mask: iris.cube.Cube + mask : iris.cube.Cube cube containing mask (int > where belonging to cloud volume, 0 everywhere else ) - Output: - tracks_out pandas.DataFrame + Returns + ------- + pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass each tracked cells at each timestep ''' @@ -38,15 +41,18 @@ def calculate_cog(tracks,mass,mask): return tracks_out def calculate_cog_untracked(mass,mask): - ''' caluclate centre of gravity and mass for untracked parts of domain - Input: - mass: iris.cube.Cube + '''caluclate centre of gravity and mass for untracked parts of domain + + Parameters + ---------- + mass : iris.cube.Cube cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - mask: iris.cube.Cube + mask : iris.cube.Cube cube containing mask (int > where belonging to cloud volume, 0 everywhere else ) - Output: - tracks_out pandas.DataFrame + Returns + ------- + pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass for untracked part of dimain ''' from pandas import DataFrame @@ -75,12 +81,15 @@ def calculate_cog_untracked(mass,mask): return tracks_out def calculate_cog_domain(mass): - ''' caluclate centre of gravity and mass for entire domain - Input: - mass: iris.cube.Cube + '''caluclate centre of gravity and mass for entire domain + + Parameters + ---------- + mass : iris.cube.Cube cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - Output: - tracks_out pandas.DataFrame + Returns + ------- + pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass ''' from pandas import DataFrame @@ -108,18 +117,22 @@ def calculate_cog_domain(mass): return tracks_out def center_of_gravity(cube_in): - ''' caluclate centre of gravity and sum of quantity - Input: - cube_in: iris.cube.Cube + '''caluclate centre of gravity and sum of quantity + + Parameters + ---------- + cube_in : iris.cube.Cube cube (potentially masked) of quantity (need coordinates 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - Output: - x: float + + Returns + ------- + float x position of centre of gravity - y: float + float y position of centre of gravity - z: float + float z position of centre of gravity - variable_sum: float + float sum of quantity of over unmasked part of the cube ''' From a1f6e16fe861476e641f22e19a9efda08003eede Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 12 Nov 2021 15:39:23 -0700 Subject: [PATCH 10/82] Continued updates to the docstrings to match numpy formatting --- doc/conf.py | 18 +++- tobac/feature_detection.py | 207 ++++++++++++++++++++----------------- 2 files changed, 128 insertions(+), 97 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index f740960e..cb3fbee2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -19,4 +19,20 @@ def setup(app): autodoc_mock_imports = ['numpy', 'scipy', 'scikit-image', 'pandas', 'pytables', 'matplotlib', 'iris', 'cf-units', 'xarray', 'cartopy', 'trackpy'] -sys.path.insert(0, os.path.abspath("../")) \ No newline at end of file +sys.path.insert(0, os.path.abspath("../")) + +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = False +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_preprocess_types = False +napoleon_type_aliases = None +napoleon_attr_annotations = True diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 3e634a96..cc2bae8e 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -3,29 +3,36 @@ import pandas as pd def feature_position(hdim1_indices,hdim2_indeces,region,track_data,threshold_i,position_threshold, target): - ''' - function to determine feature position - Input: - hdim1_indices: list + '''Function to determine feature position + + Parameters + ---------- + hdim1_indices : list + list of indices along hdim1 (typically ```y```) - hdim2_indeces: list + hdim2_indeces : list + List of indices of feature along hdim2 (typically ```x```) - region: list - list of 2-element tuples - track_data: numpy.ndarray - 2D numpy array containing the data + region : list + List of 2-element tuples + track_data : array-like + 2D array containing the data - threshold_i: float + threshold_i : float + TODO: ?? - position_threshold: str + position_threshold : str + TODO: ?? - target: str + target : str + TODO: ?? - Output: - hdim1_index: float - feature position along 1st horizontal dimension - hdim2_index: float - feature position along 2nd horizontal dimension + Returns + ------- + hdim1_index : float + feature position along 1st horizontal dimension + hdim2_index : float + feature position along 2nd horizontal dimension ''' if position_threshold=='center': # get position as geometrical centre of identified region: @@ -64,34 +71,39 @@ def feature_position(hdim1_indices,hdim2_indeces,region,track_data,threshold_i,p return hdim1_index,hdim2_index def test_overlap(region_inner,region_outer): - ''' - function to test for overlap between two regions (probably scope for further speedup here) - Input: - region_1: list - list of 2-element tuples defining the indeces of all cell in the region - region_2: list - list of 2-element tuples defining the indeces of all cell in the region + '''function to test for overlap between two regions (TODO: probably scope for further speedup here) - Output: - overlap: bool - True if there are any shared points between the two regions + Parameters + ---------- + region_1 : list + list of 2-element tuples defining the indeces of all cell in the region + region_2 : list + list of 2-element tuples defining the indeces of all cell in the region + + Returns + ------- + bool + True if there are any shared points between the two regions ''' overlap=frozenset(region_outer).isdisjoint(region_inner) return not overlap def remove_parents(features_thresholds,regions_i,regions_old): - ''' - function to remove features whose regions surround newly detected feature regions - Input: - features_thresholds: pandas.DataFrame - Dataframe containing detected features - regions_i: dict - dictionary containing the regions above/below threshold for the newly detected feature (feature ids as keys) - regions_old: dict - dictionary containing the regions above/below threshold from previous threshold (feature ids as keys) - Output: - features_thresholds pandas.DataFrame - Dataframe containing detected features excluding those that are superseded by newly detected ones + '''function to remove features whose regions surround newly detected feature regions + + Parameters + ---------- + features_thresholds : pandas.DataFrame + Dataframe containing detected features + regions_i : dict + dictionary containing the regions above/below threshold for the newly detected feature (feature ids as keys) + regions_old : dict + dictionary containing the regions above/below threshold from previous threshold (feature ids as keys) + + Returns + ------- + pandas.DataFrame + Dataframe containing detected features excluding those that are superseded by newly detected ones ''' list_remove=[] for idx_i,region_i in regions_i.items(): @@ -115,34 +127,36 @@ def feature_detection_threshold(data_i,i_time, n_min_threshold=0, min_distance=0, idx_start=0): - ''' - function to find features based on individual threshold value: - Input: - data_i: iris.cube.Cube - 2D field to perform the feature detection (single timestep) - i_time: int - number of the current timestep - threshold: float - threshold value used to select target regions to track - target: str ('minimum' or 'maximum') - flag to determine if tracking is targetting minima or maxima in the data - position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') - flag choosing method used for the position of the tracked feature - sigma_threshold: float - standard deviation for intial filtering step - n_erosion_threshold: int - number of pixel by which to erode the identified features - n_min_threshold: int - minimum number of identified features - min_distance: float - minimum distance between detected features (m) - idx_start: int - feature id to start with - Output: - features_threshold: pandas DataFrame - detected features for individual threshold - regions: dict - dictionary containing the regions above/below threshold used for each feature (feature ids as keys) + '''function to find features based on individual threshold value + + Parameters + ---------- + data_i : iris.cube.Cube + 2D field to perform the feature detection (single timestep) + i_time : int + number of the current timestep + threshold : float + threshold value used to select target regions to track + target : str ('minimum' or 'maximum') + flag to determine if tracking is targetting minima or maxima in the data + position_threshold : str('extreme', 'weighted_diff', 'weighted_abs' or 'center') + flag choosing method used for the position of the tracked feature + sigma_threshold : float + standard deviation for intial filtering step + n_erosion_threshold : int + number of pixel by which to erode the identified features + n_min_threshold : int + minimum number of identified features + min_distance : float + minimum distance between detected features (m) + idx_start : int + feature id to start with + Returns + ------- + pandas DataFrame + detected features for individual threshold + dict + dictionary containing the regions above/below threshold used for each feature (feature ids as keys) ''' from skimage.measure import label from skimage.morphology import binary_erosion @@ -207,35 +221,36 @@ def feature_detection_multithreshold_timestep(data_i,i_time, min_distance=0, feature_number_start=1 ): - ''' - function to find features in each timestep based on iteratively finding regions above/below a set of thresholds - Input: - data_i: iris.cube.Cube - 2D field to perform the feature detection (single timestep) - i_time: int - number of the current timestep + '''function to find features in each timestep based on iteratively finding regions above/below a set of thresholds - threshold: list of floats - threshold values used to select target regions to track - dxy: float - grid spacing of the input data (m) - target: str ('minimum' or 'maximum') - flag to determine if tracking is targetting minima or maxima in the data - position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') - flag choosing method used for the position of the tracked feature - sigma_threshold: float - standard deviation for intial filtering step - n_erosion_threshold: int - number of pixel by which to erode the identified features - n_min_threshold: int - minimum number of identified features - min_distance: float - minimum distance between detected features (m) - feature_number_start: int - feature number to start with - Output: - features_threshold: pandas DataFrame - detected features for individual timestep + Parameters + ---------- + data_i : iris.cube.Cube + 2D field to perform the feature detection (single timestep) + i_time : int + number of the current timestep + threshold : list of floats + threshold values used to select target regions to track + dxy : float + grid spacing of the input data (m) + target : str ('minimum' or 'maximum') + flag to determine if tracking is targetting minima or maxima in the data + position_threshold : str('extreme', 'weighted_diff', 'weighted_abs' or 'center') + flag choosing method used for the position of the tracked feature + sigma_threshold : float + standard deviation for intial filtering step + n_erosion_threshold : int + number of pixel by which to erode the identified features + n_min_threshold : int + minimum number of identified features + min_distance : float + minimum distance between detected features (m) + feature_number_start : int + feature number to start with + Returns + ------- + pandas DataFrame + detected features for individual timestep ''' from scipy.ndimage.filters import gaussian_filter @@ -284,7 +299,7 @@ def feature_detection_multithreshold(field_in, min_distance=0, feature_number_start=1 ): - ''' Function to perform feature detection based on contiguous regions above/below a threshold + '''Function to perform feature detection based on contiguous regions above/below a threshold Input: field_in: iris.cube.Cube 2D field to perform the tracking on (needs to have coordinate 'time' along one of its dimensions) From 83f22818e626d01e20cbc74b057a2df7c9b3c026 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 16 Nov 2021 10:11:25 -0700 Subject: [PATCH 11/82] Updated most docstrings on the remainder of the functions --- tobac/feature_detection.py | 22 ++++-- tobac/segmentation.py | 24 ++++--- tobac/tracking.py | 45 +++++++----- tobac/utils.py | 136 +++++++++++++++++++++++++------------ tobac/wrapper.py | 11 +-- 5 files changed, 156 insertions(+), 82 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index cc2bae8e..66462922 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -300,7 +300,9 @@ def feature_detection_multithreshold(field_in, feature_number_start=1 ): '''Function to perform feature detection based on contiguous regions above/below a threshold - Input: + + Parameters + ---------- field_in: iris.cube.Cube 2D field to perform the tracking on (needs to have coordinate 'time' along one of its dimensions) @@ -320,8 +322,10 @@ def feature_detection_multithreshold(field_in, minimum number of identified features min_distance: float minimum distance between detected features (m) - Output: - features: pandas DataFrame + + Returns + ------- + pandas DataFrame detected features ''' from .utils import add_coordinates @@ -377,16 +381,20 @@ def feature_detection_multithreshold(field_in, return features def filter_min_distance(features,dxy,min_distance): - ''' Function to perform feature detection based on contiguous regions above/below a threshold - Input: + '''Function to perform feature detection based on contiguous regions above/below a threshold + + Parameters + ---------- features: pandas DataFrame features dxy: float horzontal grid spacing (m) min_distance: float minimum distance between detected features (m) - Output: - features: pandas DataFrame + + Returns + ------- + pandas DataFrame features ''' from itertools import combinations diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 3dbcda2b..a92dacf5 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -8,10 +8,10 @@ def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=Non def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): - """ - Function performing watershedding for an individual timestep of the data + """Function performing watershedding for an individual timestep of the data - Parameters: + Parameters + ---------- features: pandas.DataFrame features for one specific point in time field: iris.cube.Cube @@ -27,10 +27,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu max_distance: float maximum distance from a marker allowed to be classified as belonging to that cell - Output: - segmentation_out: iris.cube.Cube + Returns + ------- + iris.cube.Cube cloud mask, 0 outside and integer numbers according to track inside the clouds - features_out: pandas.DataFrame + pandas.DataFrame feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep """ from skimage.morphology import watershed @@ -136,10 +137,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu return segmentation_out,features_out def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): - """ - Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts + """Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts - Parameters: + Parameters + ---------- features: pandas.DataFrame output from trackpy/maketrack field: iris.cube.Cube @@ -158,8 +159,9 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m max_distance: float Maximum distance from a marker allowed to be classified as belonging to that cell - Output: - segmentation_out: iris.cube.Cube + Returns + ------- + iris.cube.Cube Cloud mask, 0 outside and integer numbers according to track inside the cloud """ import pandas as pd diff --git a/tobac/tracking.py b/tobac/tracking.py index 94e27524..ff2cec2d 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -13,10 +13,10 @@ def linking_trackpy(features,field_in,dt,dxy, adaptive_step=None,adaptive_stop=None, cell_number_start=1 ): - """ - Function to perform the linking of features in trajectories + """Function to perform the linking of features in trajectories - Parameters: + Parameters + ---------- features: pandas.DataFrame Detected features to be linked v_max: float @@ -33,6 +33,10 @@ def linking_trackpy(features,field_in,dt,dxy, flag choosing method used for feature detection method_linking: str('predict' or 'random') flag choosing method used for trajectory linking + Returns + ------- + pandas.dataframe + Pandas dataframe containing the linked features """ # from trackpy import link_df # from trackpy import link_df @@ -150,8 +154,10 @@ def linking_trackpy(features,field_in,dt,dxy, def fill_gaps(t,order=1,extrapolate=0,frame_max=None,hdim_1_max=None,hdim_2_max=None): - ''' add cell time as time since the initiation of each cell - Input: + '''add cell time as time since the initiation of each cell + + Parameters + ---------- t: pandas dataframe trajectories from trackpy order: int @@ -164,9 +170,10 @@ def fill_gaps(t,order=1,extrapolate=0,frame_max=None,hdim_1_max=None,hdim_2_max= size of input data along first horizontal axis hdim_2_max: int size of input data along second horizontal axis - Output: - t: pandas dataframe - trajectories from trackpy with with filled gaps and potentially extrapolated + Returns + ------- + pandas dataframe + trajectories from trackpy with with filled gaps and potentially extrapolated ''' from scipy.interpolate import InterpolatedUnivariateSpline logging.debug('start filling gaps') @@ -214,10 +221,14 @@ def fill_gaps(t,order=1,extrapolate=0,frame_max=None,hdim_1_max=None,hdim_2_max= def add_cell_time(t): ''' add cell time as time since the initiation of each cell - Input: + + Parameters + ---------- t: pandas DataFrame trajectories with added coordinates - Output: + + Returns + ------- t: pandas dataframe trajectories with added cell time ''' @@ -231,12 +242,14 @@ def add_cell_time(t): def remap_particle_to_cell_nv(particle_cell_map, input_particle): '''Remaps the particles to new cells given an input map and the current particle. - Designed to be vectorized with np.vectorize - - Input: - t: pandas DataFrame - trajectories with added coordinates - + Helper function that is designed to be vectorized with np.vectorize + + Parameters + ---------- + particle_cell_map: dict-like + The dictionary mapping particle number to cell number + input_particle: key for particle_cell_map + The particle number to remap ''' return particle_cell_map[input_particle] diff --git a/tobac/utils.py b/tobac/utils.py index ab9be599..dd498522 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1,16 +1,18 @@ import logging def column_mask_from2D(mask_2D,cube,z_coord='model_level_number'): - ''' function to turn 2D watershedding mask into a 3D mask of selected columns - Input: + '''function to turn 2D watershedding mask into a 3D mask of selected columns + Parameters + ---------- cube: iris.cube.Cube data cube mask_2D: iris.cube.Cube 2D cube containing mask (int id for tacked volumes 0 everywhere else) z_coord: str name of the vertical coordinate in the cube - Output: - mask_2D: iris.cube.Cube + Returns + ------- + iris.cube.Cube 3D cube containing columns of 2D mask (int id for tacked volumes 0 everywhere else) ''' from copy import deepcopy @@ -26,16 +28,20 @@ def column_mask_from2D(mask_2D,cube,z_coord='model_level_number'): def mask_cube_cell(variable_cube,mask,cell,track): - ''' Mask cube for tracked volume of an individual cell - Input: + '''Mask cube for tracked volume of an individual cell + + Parameters + ---------- variable_cube: iris.cube.Cube unmasked data cube mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) cell: int interger id of cell to create masked cube for - Output: - variable_cube_out: iris.cube.Cube + + Returns + ------- + iris.cube.Cube Masked cube with data for respective cell ''' from copy import deepcopy @@ -46,12 +52,16 @@ def mask_cube_cell(variable_cube,mask,cell,track): def mask_cube_all(variable_cube,mask): ''' Mask cube for untracked volume - Input: + + Parameters + ---------- variable_cube: iris.cube.Cube unmasked data cube mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: + + Returns + ------- variable_cube_out: iris.cube.Cube Masked cube for untracked volume ''' @@ -62,13 +72,17 @@ def mask_cube_all(variable_cube,mask): return variable_cube_out def mask_cube_untracked(variable_cube,mask): - ''' Mask cube for untracked volume - Input: + '''Mask cube for untracked volume + + Parameters + ---------- variable_cube: iris.cube.Cube unmasked data cube mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: + + Returns + ------- variable_cube_out: iris.cube.Cube Masked cube for untracked volume ''' @@ -80,13 +94,17 @@ def mask_cube_untracked(variable_cube,mask): def mask_cube(cube_in,mask): ''' Mask cube where mask is larger than zero - Input: + + Parameters + ---------- cube_in: iris.cube.Cube unmasked data cube mask: numpy.ndarray or dask.array mask to use for masking, >0 where cube is supposed to be masked - Output: - cube_out: iris.cube.Cube + + Returns + ------- + iris.cube.Cube Masked cube ''' from dask.array import ma @@ -96,12 +114,16 @@ def mask_cube(cube_in,mask): return cube_out def mask_cell(mask,cell,track,masked=False): - ''' create mask for specific cell - Input: + '''create mask for specific cell + + Parameters + ---------- mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: - variable_cube_out: numpy.ndarray + + Returns + ------- + numpy.ndarray Masked cube for untracked volume ''' feature_ids=track.loc[track['cell']==cell,'feature'].values @@ -110,11 +132,15 @@ def mask_cell(mask,cell,track,masked=False): def mask_cell_surface(mask,cell,track,masked=False,z_coord='model_level_number'): '''Create surface projection of mask for individual cell - Input: + + Parameters + ---------- mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: - variable_cube_out: iris.cube.Cube + + Returns + ------- + iris.cube.Cube Masked cube for untracked volume ''' feature_ids=track.loc[track['cell']==cell,'feature'].values @@ -123,11 +149,15 @@ def mask_cell_surface(mask,cell,track,masked=False,z_coord='model_level_number') def mask_cell_columns(mask,cell,track,masked=False,z_coord='model_level_number'): '''Create mask with entire columns for individual cell - Input: + + Parameters + ---------- mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: - variable_cube_out: iris.cube.Cube + + Returns + ------- + iris.cube.Cube Masked cube for untracked volume ''' feature_ids=track.loc[track['cell']==cell].loc['feature'] @@ -136,15 +166,19 @@ def mask_cell_columns(mask,cell,track,masked=False,z_coord='model_level_number') def mask_cube_features(variable_cube,mask,feature_ids): ''' Mask cube for tracked volume of an individual cell - Input: + + Parameters + ---------- variable_cube: iris.cube.Cube unmasked data cube mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) cell: int interger id of cell to create masked cube for - Output: - variable_cube_out: iris.cube.Cube + + Returns + ------- + iris.cube.Cube Masked cube with data for respective cell ''' from dask.array import ma,isin @@ -154,12 +188,16 @@ def mask_cube_features(variable_cube,mask,feature_ids): return variable_cube_out def mask_features(mask,feature_ids,masked=False): - ''' create mask for specific features - Input: + '''create mask for specific features + + Parameters + ---------- mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: - variable_cube_out: numpy.ndarray + + Returns + ------- + numpy.ndarray Masked cube for untracked volume ''' from dask.array import ma,isin @@ -173,11 +211,15 @@ def mask_features(mask,feature_ids,masked=False): def mask_features_surface(mask,feature_ids,masked=False,z_coord='model_level_number'): ''' create surface mask for individual features - Input: + + Parameters + ---------- mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: - variable_cube_out: iris.cube.Cube + + Returns + ------- + iris.cube.Cube Masked cube for untracked volume ''' from iris.analysis import MAX @@ -194,10 +236,14 @@ def mask_features_surface(mask,feature_ids,masked=False,z_coord='model_level_num def mask_all_surface(mask,masked=False,z_coord='model_level_number'): ''' create surface mask for individual features - Input: + + Parameters + ---------- mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - Output: + + Returns + ------- mask_i_surface: iris.cube.Cube (2D) Mask with 1 below features and 0 everywhere else ''' @@ -278,14 +324,18 @@ def mask_all_surface(mask,masked=False,z_coord='model_level_number'): def add_coordinates(t,variable_cube): import numpy as np - ''' Function adding coordinates from the tracking cube to the trajectories: time, longitude&latitude, x&y dimensions - Input: + '''Function adding coordinates from the tracking cube to the trajectories: time, longitude&latitude, x&y dimensions + + Parameters + ---------- t: pandas DataFrame trajectories/features variable_cube: iris.cube.Cube Cube containing the dimensions 'time','longitude','latitude','x_projection_coordinate','y_projection_coordinate', usually cube that the tracking is performed on - Output: - t: pandas DataFrame + + Returns + ------- + pandas DataFrame trajectories with added coordinated ''' from scipy.interpolate import interp2d, interp1d @@ -391,7 +441,7 @@ def add_coordinates(t,variable_cube): def get_bounding_box(x,buffer=1): from numpy import delete,arange,diff,nonzero,array - """ Calculates the bounding box of a ndarray + """Calculates the bounding box of a ndarray https://stackoverflow.com/questions/31400769/bounding-box-of-numpy-array """ mask = x == 0 diff --git a/tobac/wrapper.py b/tobac/wrapper.py index fbff488c..78bba6d3 100644 --- a/tobac/wrapper.py +++ b/tobac/wrapper.py @@ -79,10 +79,10 @@ def maketrack(field_in, from .feature_detection import feature_detection_multithreshold from .tracking import linking_trackpy - """ - Function identifiying features andlinking them into trajectories + """Function identifiying features and linking them into trajectories - Parameters: + Parameters + ---------- field_in: iris.cube.Cube 2D input field tracking is performed on grid_spacing: float @@ -118,8 +118,9 @@ def maketrack(field_in, return_intermediate: boolean flag to tetermine if only final tracjectories are output (False, default) or if detected features, filtered features and unfilled tracks are returned additionally (True) - - Output: + + Returns + ------- trajectories_final: pandas.DataFrame Tracked updrafts, one row per timestep and updraft, includes dimensions 'time','latitude','longitude','projection_x_variable', 'projection_y_variable' based on w cube. 'hdim_1' and 'hdim_2' are used for segementation step. From 4e4a8bd444a6bcca06c5d2e274e837004ca0fdc3 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 22 Nov 2021 08:05:40 -0700 Subject: [PATCH 12/82] Speeding up 2D feature detection using what we learned from 3D (#10) * Improved speed from implementation of new indices get functions * Updated feature detection to be faster * Updated version I updated the version number in the setup.py script so that users can update with pip. * Added workflow to automatically run tests via github actions * Switched around order of dockerfile to improve performance. * Switched label around to use binary arrays, as that seems faster. * Speeding up binary_erosion --- .github/workflows/docker-image.yml | 18 +++++ Dockerfile | 4 +- setup.py | 8 +- tobac/feature_detection.py | 116 +++++++++++++++++++++++++---- 4 files changed, 126 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/docker-image.yml diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 00000000..0046f75c --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,18 @@ +name: Docker Image CI + +on: + push: + branches: [ master, dev ] + pull_request: + branches: [ master, dev ] + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Build the Docker image + run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) diff --git a/Dockerfile b/Dockerfile index b228c287..42b8794f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,12 @@ FROM mambaorg/micromamba #WORKDIR . -COPY . ./ RUN micromamba install -y -n base -c conda-forge numpy \ scipy scikit-image pandas pytables matplotlib iris \ cf-units xarray cartopy trackpy numba pytest -# Make RUN commands use the new environment: -#SHELL ["micromamba", "run", "-n", "myenv", "/bin/bash", "-c"] +COPY . ./ RUN pip install . diff --git a/setup.py b/setup.py index 5f9cd927..bc73ae2a 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,12 @@ from setuptools import setup setup(name='tobac', - version='1.2', + version='1.3.0', description='Tracking and object-based analysis of clouds', url='http://github.com/climate-processes/tobac', - author='Max Heikenfeld', - author_email='max.heikenfeld@physics.ox.ac.uk', - license='GNU', + author='Max Heikenfeld, Sean Freeman, Alex Sokolowsky', + author_email='sean.freeman@colostate.edu', + license='BSD-3', packages=['tobac'], install_requires=[], zip_safe=False) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 66462922..50fb5185 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -2,6 +2,79 @@ import numpy as np import pandas as pd +def get_label_props_in_dict(labels): + '''Function to get the label properties into a dictionary format. + + Parameters + ---------- + labels: 2D or 3D array-like + comes from the `skimage.measure.label` function + + Returns + ------- + dict + output from skimage.measure.regionprops in dictionary format, where they key is the label number + ''' + import skimage.measure + + region_properties_raw = skimage.measure.regionprops(labels) + region_properties_dict = dict() + for region_prop in region_properties_raw: + region_properties_dict[region_prop.label] = region_prop + + return region_properties_dict + + +def get_indices_of_labels_from_reg_prop_dict(region_property_dict): + '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. + This function should produce similar output as new_get_indices_of_labels, but + allows for re-use of the region_property_dict. + + Parameters + ---------- + region_property_dict: dict of region_property objects + This dict should come from the get_label_props_in_dict function. + + Returns + ------- + dict (key: label number, int) + The number of points in the label number + dict (key: label number, int) + The z indices in the label number + dict (key: label number, int) + the y indices in the label number + dict (key: label number, int) + the x indices in the label number + + Raises + ------ + ValueError + a ValueError is raised if + ''' + + import skimage.measure + + if len(region_property_dict) ==0: + raise ValueError("No regions!") + + y_indices = dict() + x_indices = dict() + curr_loc_indices = dict() + + #loop through all skimage identified regions + for region_prop_key in region_property_dict: + region_prop = region_property_dict[region_prop_key] + index = region_prop.label + curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + y_indices[index] = curr_y_ixs + x_indices[index] = curr_x_ixs + curr_loc_indices[index] = len(curr_x_ixs) + + #print("indices found") + return [curr_loc_indices, y_indices, x_indices] + + + def feature_position(hdim1_indices,hdim2_indeces,region,track_data,threshold_i,position_threshold, target): '''Function to determine feature position @@ -163,34 +236,51 @@ def feature_detection_threshold(data_i,i_time, # if looking for minima, set values above threshold to 0 and scale by data minimum: if target == 'maximum': - mask=1*(data_i >= threshold) + mask=(data_i >= threshold) # if looking for minima, set values above threshold to 0 and scale by data minimum: elif target == 'minimum': - mask=1*(data_i <= threshold) + mask=(data_i <= threshold) # only include values greater than threshold # erode selected regions by n pixels if n_erosion_threshold>0: selem=np.ones((n_erosion_threshold,n_erosion_threshold)) - mask=binary_erosion(mask,selem).astype(np.int64) + mask=binary_erosion(mask,selem).astype(bool) # detect individual regions, label and count the number of pixels included: labels = label(mask, background=0) - values, count = np.unique(labels[:,:].ravel(), return_counts=True) - values_counts=dict(zip(values, count)) + label_props = get_label_props_in_dict(labels) + if len(label_props)>0: + [total_indices_all, hdim1_indices_all, hdim2_indices_all] = get_indices_of_labels_from_reg_prop_dict(label_props) + + + #values, count = np.unique(labels[:,:].ravel(), return_counts=True) + #values_counts=dict(zip(values, count)) # Filter out regions that have less pixels than n_min_threshold - values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold} + #values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold} #check if not entire domain filled as one feature - if 0 in values_counts: - #Remove background counts: - values_counts.pop(0) + if len(label_props)>0: #create empty list to store individual features for this threshold list_features_threshold=[] #create empty dict to store regions for individual features for this threshold regions=dict() #create emptry list of features to remove from parent threshold value + + region = np.empty(mask.shape, dtype=bool) #loop over individual regions: - for cur_idx,count in values_counts.items(): - region=labels[:,:] == cur_idx - [hdim1_indices,hdim2_indeces]= np.nonzero(region) + for cur_idx in total_indices_all: + #skip this if there aren't enough points to be considered a real feature + #as defined above by n_min_threshold + curr_count = total_indices_all[cur_idx] + if curr_count <=n_min_threshold: + continue + + label_bbox = label_props[cur_idx].bbox + + hdim1_indices = hdim1_indices_all[cur_idx] + hdim2_indeces = hdim2_indices_all[cur_idx] + region.fill(False) + region[hdim1_indices,hdim2_indeces]=True + + #[hdim1_indices,hdim2_indeces]= np.nonzero(region) #write region for individual threshold and feature to dict region_i=list(zip(hdim1_indices,hdim2_indeces)) regions[cur_idx+idx_start]=region_i @@ -201,7 +291,7 @@ def feature_detection_threshold(data_i,i_time, 'idx':cur_idx+idx_start, 'hdim_1': hdim1_index, 'hdim_2':hdim2_index, - 'num':count, + 'num':curr_count, 'threshold_value':threshold}) features_threshold=pd.DataFrame(list_features_threshold) else: From 7ed73a1b3a6c3d777e1caf1bef6898a85b93f80a Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 30 Nov 2021 01:19:53 -0600 Subject: [PATCH 13/82] Updated to remove reference to depreciated function (#11) --- tobac/segmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index a92dacf5..09daff74 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -34,7 +34,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu pandas.DataFrame feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep """ - from skimage.morphology import watershed + from skimage.segmentation import watershed # from skimage.segmentation import random_walker from scipy.ndimage import distance_transform_edt from copy import deepcopy From 50c339f678343609dca101322236b8151fb6d279 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 4 Jan 2022 14:56:22 -0600 Subject: [PATCH 14/82] added pip to the docker file because it is not automatically installed with micromamba anymore --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 42b8794f..96a4eff0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM mambaorg/micromamba RUN micromamba install -y -n base -c conda-forge numpy \ scipy scikit-image pandas pytables matplotlib iris \ - cf-units xarray cartopy trackpy numba pytest + cf-units xarray cartopy trackpy numba pytest pip COPY . ./ From 18d5a3e7d1af3d9f339a92ef92d15817ab9fb055 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 21 Feb 2022 17:40:14 -0600 Subject: [PATCH 15/82] Combining 2D and 3D code and speedups to both (#19) * Update feature_detection.py Added new optimized functions for 3D and periodic boundary condition (PBC) feature detection. Includes treatments for no PBCs (original tobac boundary approach), single-boundary PBC for either of hdim_1 or hdim_2, and doubly periodic boundaries * Updated docstring on some of the feature detection functions * Updated docstrings Continuing to update docstrings to be more clear and to be in numpy format where needed. * Added some new tests and updated dockerfile for testing I added a test for get_label_props_in_dict as part of adding new tests for tobac. I also updated the dockerfile to improve execution order and build/test times. * Updated test assertions for number of labels * cleaned up new code and removed redundant functions * Added 2D and 3D compatibility with get_indices_of_labels_from_dict. I also added the associated tests to test_feature_detection.py. * Renamed feature_position_3D for cleanup. * Addition of 3D/PBC segmentation functions Added 2 new versions of the segmentation and segmentation_timestep functions that improve the 3D segmentation and also include a first-pass doubly periodic boundary condition (which will need improvement before being pushed out to the world at large). Whereas the previous 3D segmentation approach simply seeded the entire vertical column at the integer x,y position of a feature, we now seed a 5x5x5 box around the position of the feature, which ought to reduce the chance of artificially segmenting unconnected fields (e.g. cirrus overlying a discrete convective cloud). * Update segmentation.py Fixed a call to an old test function "watershed_PBC" to just call the regular skimage.segmentation watershed function. Also updated the original segmentation function to use skimage.segmentation watershed rather than the deprecated skimage.morphology watershed * Added linking_trackpy_3D function Added linking_trackpy_3D to module, which is an updated, 3D version of the linking_trackpy function. This function is basically identical to the non-3D version, but includes vdim as an additional function argument and positional argument for the tp.link call that is really the guts of tobac's tracking code. Longer term, probably a good idea to integrate this with the 2D tracking function and use a user-set flag or something to determine whether to do 2D or 3D tracking - all this really comes down to is whether or not to include vdim in pos_columns. * Added new functions, corrected error in old one Corrected issue in 'feature_position_3D' where the coordinate re-transform at the end of the function still used the old integer PBC_flag values instead of the new string values. Also added 3D versions of feature_detection_multithreshold(_3D) and feature_detection_multithreshold_timestep(_3D) just for temporary continuity with feature_detection_threshold_3D so we can smoothly integrate 2D and 3D feature detection functions. * Fixing a docker bug and continuing to combine feature detection * Updated feature detection for 3D to call the new overarching feature_position function * Updated 3D feature detection stuff * Fixed some bugs with 3D feature detection. * At a working state, but the new combined feature_detection_threshold does not exactly match what we had earlier, so fixing that is the next step. * Fixed feature_detection_threshold to be in line with what Alex had (and ~2x faster) * Removing a 3D function to clean things up * Added 3D capability to feature_detection_multithreshold_timestep and it matches. * Removed extraneous 3D code * Continued consolidation of 2D and 3D functions. * Combined the last of the 2D/3D feature detection functions. * Fixed a bug with the 2D case label_bbox. * Ignoring the .vscode files, whoops * Deleted my personal vscode settings. * first go at improving remove_parents, switching to numpy comparisons. * trying out the v2 trunk from the tobac github * Attempt at our own remove_parents function * improving speeds * Attempt at speeding up further * fixing a bug * fixing some bugs here? * Fixing the edge case with no initial features. * Combined 2D and 3D tracking * Starting to add in more testing in anticipation of PBC tracking * Starting to add into the PBC coordinates function * Added in hdim_2 to PBC_flag both * get_pbc_coordinates now working throughout * Cleaned up make_feature_blob and added the docstring for a new generate_single_feature function * New testing utilities, building to PBC tracking * Added more tests for our new generate_single_feature function. * Updated testing code to include times and starting updates to PBC code * Tracking testing working well now. * Now have a PBC test that correctly fails. * Starting to clean up documentation and prep for 3D PBC tracking * Added in new distance calculation functions for trackpy * Added in PBC tracking and added scikit-learn as a requirement To add in PBC tracking, we need to use the BTree neighbor_strategy, which requires scikit-learn. * Fixed tracking to produce correct results, huzzah! * Added new tests for PBC tracking with hdim_1 boundaries and hdim_2 boundaries * Starting prep work to efficiency * Switched euclidian distance tests to use numpy arrays to match what trackpy gives it * Revision to calculate_distance_coords_pbc to be faster * Trying benchmarks with numba * Making numba optional * Cleaning up unused functions * Updated comments * Added fix for small proto-features case Previously, code would crash if there are 1 or more proto-features at a particular threshold where none exceed the minimum point threshold. Variable 'column_names' was left undefined, causing an error when attempting to create a dataframe from 'list_features_threshold' with columns 'column_names'. Added a new if-else statement to fix this. * Added some more testing utilities and more tests for the tests * Fixed PBC labeling Added 3 new elifs in feature_detection_threshold that corrected an issue where contiguous indices which crossed boundaries would not be relabeled properly if the cross-boundary label had already been overwritten. * Removed field_in argument for linking_trackpy Removed field_in as an argument in linking_trackpy. This argument did not serve any meaningful purpose in the original tracking code, if I remember correctly, and is completely unused in the present implementation. * Revert "Removed field_in argument for linking_trackpy" This reverts commit 893d09eaafb86aa3632e5dedc3dbe888ad2797e9. Co-authored-by: galexsky <90701223+galexsky@users.noreply.github.com> Co-authored-by: Sean Freeman Co-authored-by: Sean Freeman Co-authored-by: galexsky --- .gitignore | 1 + Dockerfile | 5 +- conda-requirements.txt | 1 + tobac/feature_detection.py | 1343 +++++++++++-------------- tobac/segmentation.py | 384 +++++++ tobac/testing.py | 517 +++++++++- tobac/tests/test_feature_detection.py | 83 ++ tobac/tests/test_testing.py | 323 ++++++ tobac/tests/test_tracking.py | 208 ++++ tobac/tracking.py | 165 ++- tobac/utils.py | 146 ++- 11 files changed, 2404 insertions(+), 772 deletions(-) create mode 100644 tobac/tests/test_feature_detection.py create mode 100644 tobac/tests/test_testing.py create mode 100644 tobac/tests/test_tracking.py diff --git a/.gitignore b/.gitignore index a295864e..a9a8efc1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc __pycache__ +.vscode \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 96a4eff0..d9075c3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,9 @@ FROM mambaorg/micromamba - -#WORKDIR . +ARG MAMBA_DOCKERFILE_ACTIVATE=1 RUN micromamba install -y -n base -c conda-forge numpy \ scipy scikit-image pandas pytables matplotlib iris \ - cf-units xarray cartopy trackpy numba pytest pip + cf-units xarray cartopy trackpy numba pytest pip scikit-learn COPY . ./ diff --git a/conda-requirements.txt b/conda-requirements.txt index 6e0e7478..07924269 100644 --- a/conda-requirements.txt +++ b/conda-requirements.txt @@ -2,6 +2,7 @@ numpy scipy scikit-image +scikit-learn pandas pytables matplotlib diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 25e4d065..b7c7d393 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1,6 +1,7 @@ -import logging + import numpy as np import pandas as pd +import logging def get_label_props_in_dict(labels): '''Function to get the label properties into a dictionary format. @@ -25,123 +26,315 @@ def get_label_props_in_dict(labels): return region_properties_dict + def get_indices_of_labels_from_reg_prop_dict(region_property_dict): '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. - This function should produce similar output as new_get_indices_of_labels, but - allows for re-use of the region_property_dict. - + Parameters ---------- region_property_dict: dict of region_property objects This dict should come from the get_label_props_in_dict function. - + Returns ------- dict (key: label number, int) The number of points in the label number dict (key: label number, int) - The z indices in the label number + The z indices in the label number. If a 2D property dict is passed, this value is not returned dict (key: label number, int) the y indices in the label number dict (key: label number, int) the x indices in the label number - + Raises ------ ValueError - a ValueError is raised if + a ValueError is raised if there are no regions in the region property dict + ''' import skimage.measure if len(region_property_dict) ==0: raise ValueError("No regions!") - + + + z_indices = dict() y_indices = dict() x_indices = dict() curr_loc_indices = dict() + is_3D = False #loop through all skimage identified regions for region_prop_key in region_property_dict: region_prop = region_property_dict[region_prop_key] index = region_prop.label - curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + if len(region_prop.coords[0])>=3: + is_3D = True + curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = curr_z_ixs + else: + curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = -1 + y_indices[index] = curr_y_ixs x_indices[index] = curr_x_ixs - curr_loc_indices[index] = len(curr_x_ixs) + curr_loc_indices[index] = len(curr_y_ixs) #print("indices found") - return [curr_loc_indices, y_indices, x_indices] + if is_3D: + return [curr_loc_indices, z_indices, y_indices, x_indices] + else: + return [curr_loc_indices, y_indices, x_indices] + + +def adjust_pbc_point(in_dim, dim_min, dim_max): + '''Function to adjust a point to the other boundary for PBCs + + Parameters + ---------- + in_dim : int + Input coordinate to adjust + dim_min : int + Minimum point for the dimension + dim_max : int + Maximum point for the dimension (inclusive) + + Returns + ------- + int + The adjusted point on the opposite boundary + + Raises + ------ + ValueError + If in_dim isn't on one of the boundary points + ''' + if in_dim == dim_min: + return dim_max + elif in_dim == dim_max: + return dim_min + else: + raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") +def get_label_props_in_dict(labels): + '''Function to get the label properties into a dictionary format. + + Parameters + ---------- + labels: 2D or 3D array-like + comes from the `skimage.measure.label` function + + Returns + ------- + dict + output from skimage.measure.regionprops in dictionary format, where they key is the label number + ''' + import skimage.measure + + region_properties_raw = skimage.measure.regionprops(labels) + region_properties_dict = dict() + for region_prop in region_properties_raw: + region_properties_dict[region_prop.label] = region_prop + + return region_properties_dict -def feature_position(hdim1_indices,hdim2_indeces,region,track_data,threshold_i,position_threshold, target): +def feature_position(hdim1_indices, hdim2_indeces, + vdim_indyces = None, + region_small = None, region_bbox = None, + track_data = None, threshold_i = None, + position_threshold = 'center', + target = None, PBC_flag = 'none', + x_min = 0, x_max = 0, y_min = 0, y_max = 0): '''Function to determine feature position Parameters ---------- - hdim1_indices : list - list of indices along hdim1 (typically ```y```) - - hdim2_indeces : list - List of indices of feature along hdim2 (typically ```x```) - - region : list - List of 2-element tuples - track_data : array-like - 2D array containing the data - - threshold_i : float - TODO: ?? - - position_threshold : str - TODO: ?? + hdim1_indices : list + list of indices along hdim1 (typically ```y```) + + hdim2_indeces : list + List of indices of feature along hdim2 (typically ```x```) + + vdim_indyces : list, optional + List of indices of feature along optional vdim (typically ```z```) + + region_small : 2D or 3D array-like + A true/false array containing True where the threshold + is met and false where the threshold isn't met. This array should + be the the size specified by region_bbox, and can be a subset of the + overall input array (i.e., ```track_data```). + + region_bbox : list or tuple with length of 4 or 6 + The coordinates that region_small occupies within the total track_data + array. This is in the order that the coordinates come from the + ```get_label_props_in_dict``` function. For 2D data, this should be: + (hdim1 start, hdim 2 start, hdim 1 end, hdim 2 end). For 3D data, this + is: (vdim start, hdim1 start, hdim 2 start, vdim end, hdim 1 end, hdim 2 end). - target : str - TODO: ?? - + track_data : 2D or 3D array-like + 2D or 3D array containing the data + + threshold_i : float + The threshold value that we are testing against + + position_threshold : {'center', 'extreme', 'weighted_diff', 'weighted abs'} + How to select the single point position from our data. + 'center' picks the geometrical centre of the region, and is typically not recommended. + 'extreme' picks the maximum or minimum value inside the region (max/min set by ```target```) + 'weighted_diff' picks the centre of the region weighted by the distance from the threshold value + 'weighted_abs' picks the centre of the region weighted by the absolute values of the field + + target : {'maximum', 'minimum'} + Used only when position_threshold is set to 'extreme', this sets whether + it is looking for maxima or minima. + + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + x_min : int + Minimum real x coordinate (for PBCs) + + x_max: int + Maximum real x coordinate (for PBCs) + + y_min : int + Minimum real y coordinate (for PBCs) + + y_max : int + Maximum real y coordinate (for PBCs) + Returns ------- - hdim1_index : float + float + (if 3D) feature position along vertical dimension + float feature position along 1st horizontal dimension - hdim2_index : float + float feature position along 2nd horizontal dimension ''' + + # First, if necessary, run PBC processing. + #processing of PBC indices + #checks to see if minimum and maximum values are present in dimensional array + #then if true, adds max value to any indices past the halfway point of their respective dimension + #are we 3D? if so, True. + is_3D = False + + if PBC_flag == 'hdim_1': + #ONLY periodic in y + hdim1_indices_2 = hdim1_indices + hdim2_indeces_2 = hdim2_indeces + + if (((np.max(hdim1_indices)) == y_max) and((np.min(hdim1_indices)== y_min))): + for y2 in range(0,len(hdim1_indices_2)): + h1_ind = hdim1_indices_2[y2] + if h1_ind < (y_max/2): + hdim1_indices_2[y2] = h1_ind + y_max + + elif PBC_flag == 'hdim_2': + #ONLY periodic in x + hdim1_indices_2 = hdim1_indices + hdim2_indeces_2 = hdim2_indeces + + if (((np.max(hdim2_indeces)) == x_max) and((np.min(hdim2_indeces)== x_min))): + for x2 in range(0,len(hdim2_indeces_2)): + h2_ind = hdim2_indeces_2[x2] + if h2_ind < (x_max/2): + hdim2_indeces_2[x2] = h2_ind + x_max + + elif PBC_flag == 'both': + #DOUBLY periodic boundaries + hdim1_indices_2 = hdim1_indices + hdim2_indeces_2 = hdim2_indeces + + if (((np.max(hdim1_indices)) == y_max) and((np.min(hdim1_indices)== y_min))): + for y2 in range(0,len(hdim1_indices_2)): + h1_ind = hdim1_indices_2[y2] + if h1_ind < (y_max/2): + hdim1_indices_2[y2] = h1_ind + y_max + + if (((np.max(hdim2_indeces)) == x_max) and((np.min(hdim2_indeces)== x_min))): + for x2 in range(0,len(hdim2_indeces_2)): + h2_ind = hdim2_indeces_2[x2] + if h2_ind < (x_max/2): + hdim2_indeces_2[x2] = h2_ind + x_max + + else: + hdim1_indices_2 = hdim1_indices + hdim2_indeces_2 = hdim2_indeces + + hdim1_indices = hdim1_indices_2 + hdim2_indeces = hdim2_indeces_2 + + if len(region_bbox) == 4: + #2D case + is_3D = False + track_data_region = track_data[region_bbox[0]:region_bbox[2], region_bbox[1]:region_bbox[3]] + elif len(region_bbox) == 6: + #3D case + is_3D = True + track_data_region = track_data[region_bbox[0]:region_bbox[3], region_bbox[1]:region_bbox[4], region_bbox[2]:region_bbox[5]] + if position_threshold=='center': # get position as geometrical centre of identified region: hdim1_index=np.mean(hdim1_indices) hdim2_index=np.mean(hdim2_indeces) + if is_3D: + vdim_index = np.mean(vdim_indyces) elif position_threshold=='extreme': #get position as max/min position inside the identified region: if target == 'maximum': - index=np.argmax(track_data[region]) - hdim1_index=hdim1_indices[index] - hdim2_index=hdim2_indeces[index] - + index=np.argmax(track_data_region[region_small]) if target == 'minimum': - index=np.argmin(track_data[region]) - hdim1_index=hdim1_indices[index] - hdim2_index=hdim2_indeces[index] + index=np.argmin(track_data_region[region_small]) + hdim1_index=hdim1_indices[index] + hdim2_index=hdim2_indeces[index] + if is_3D: + vdim_index = vdim_indyces[index] elif position_threshold=='weighted_diff': # get position as centre of identified region, weighted by difference from the threshold: - weights=abs(track_data[region]-threshold_i) + weights=abs(track_data_region[region_small]-threshold_i) if sum(weights)==0: weights=None hdim1_index=np.average(hdim1_indices,weights=weights) hdim2_index=np.average(hdim2_indeces,weights=weights) + if is_3D: + vdim_index = np.average(vdim_indyces,weights=weights) elif position_threshold=='weighted_abs': # get position as centre of identified region, weighted by absolute values if the field: - weights=abs(track_data[region]) + weights=abs(track_data[region_small]) if sum(weights)==0: weights=None hdim1_index=np.average(hdim1_indices,weights=weights) hdim2_index=np.average(hdim2_indeces,weights=weights) + if is_3D: + vdim_index = np.average(vdim_indyces,weights=weights) + else: raise ValueError('position_threshold must be center,extreme,weighted_diff or weighted_abs') - return hdim1_index,hdim2_index + + #re-transform of any coords beyond the boundaries - (should be) general enough to work for any variety of PBC + #as no x or y points will be beyond the boundaries if we haven't transformed them in the first place + if (PBC_flag == 'hdim_1') or (PBC_flag == 'hdim_2') or (PBC_flag == 'both'): + if hdim1_index > y_max: + hdim1_index = hdim1_index - y_max + + if hdim2_index > x_max: + hdim2_index = hdim2_index - x_max + + if is_3D: + return vdim_index, hdim1_index, hdim2_index + else: + return hdim1_index,hdim2_index def test_overlap(region_inner,region_outer): '''function to test for overlap between two regions (TODO: probably scope for further speedup here) @@ -178,18 +371,30 @@ def remove_parents(features_thresholds,regions_i,regions_old): pandas.DataFrame Dataframe containing detected features excluding those that are superseded by newly detected ones ''' - list_remove=[] - for idx_i,region_i in regions_i.items(): - for idx_old,region_old in regions_old.items(): - if test_overlap(regions_old[idx_old],regions_i[idx_i]): - list_remove.append(idx_old) - list_remove=list(set(list_remove)) + #list_remove=[] + try: + all_curr_pts = np.concatenate([vals for idx, vals in regions_i.items()]) + all_old_pts = np.concatenate([vals for idx, vals in regions_old.items()]) + except ValueError: + #the case where there are no regions + return features_thresholds + old_feat_arr = np.empty((len(all_old_pts))) + curr_loc = 0 + for idx_old in regions_old: + old_feat_arr[curr_loc:curr_loc+len(regions_old[idx_old])] = idx_old + curr_loc+=len(regions_old[idx_old]) + + common_pts, common_ix_new, common_ix_old = np.intersect1d(all_curr_pts, all_old_pts, return_indices=True) + list_remove = np.unique(old_feat_arr[common_ix_old]) + # remove parent regions: if features_thresholds is not None: features_thresholds=features_thresholds[~features_thresholds['idx'].isin(list_remove)] return features_thresholds + + def feature_detection_threshold(data_i,i_time, threshold=None, min_num=0, @@ -199,7 +404,8 @@ def feature_detection_threshold(data_i,i_time, n_erosion_threshold=0, n_min_threshold=0, min_distance=0, - idx_start=0): + idx_start=0, + PBC_flag='none'): '''function to find features based on individual threshold value Parameters @@ -224,6 +430,13 @@ def feature_detection_threshold(data_i,i_time, minimum distance between detected features (m) idx_start : int feature id to start with + PBC_flag: str('none', 'hdim_1', 'hdim_2', 'both') + flag sets how to treat boundaries (i.e., whether they are periodic or not) + 'none' - no PBCs + 'hdim_1' - periodic in hdim1 ONLY + 'hdim_2' - periodic in hdim2 ONLY + 'both' - DOUBLY periodic + Returns ------- pandas DataFrame @@ -233,6 +446,10 @@ def feature_detection_threshold(data_i,i_time, ''' from skimage.measure import label from skimage.morphology import binary_erosion + from copy import deepcopy + + # If we are given a 3D data array, we should do 3D feature detection. + is_3D = len(data_i.shape)==3 # if looking for minima, set values above threshold to 0 and scale by data minimum: if target == 'maximum': @@ -243,21 +460,196 @@ def feature_detection_threshold(data_i,i_time, # only include values greater than threshold # erode selected regions by n pixels if n_erosion_threshold>0: + # is this right? the documentation is unclear + #if is_3D: + # selem=np.ones((n_erosion_threshold,n_erosion_threshold, n_erosion_threshold)) + #else: selem=np.ones((n_erosion_threshold,n_erosion_threshold)) mask=binary_erosion(mask,selem).astype(bool) # detect individual regions, label and count the number of pixels included: - labels = label(mask, background=0) + labels, num_labels = label(mask, background=0, return_num = True) + if not is_3D: + # let's transpose labels to a 1,y,x array to make calculations etc easier. + labels = labels[np.newaxis, :, :] + z_min = 0 + z_max = labels.shape[0] + y_min = 0 + y_max = labels.shape[1] - 1 + x_min = 0 + x_max = labels.shape[2] - 1 + + + #deal with PBCs + # all options that involve dealing with periodic boundaries + pbc_options = ['hdim_1', 'hdim_2', 'both'] + + # we need to deal with PBCs in some way. + if PBC_flag in pbc_options: + # + #create our copy of `labels` to edit + labels_2 = deepcopy(labels) + #points we've already edited + skip_list = np.array([]) + #labels that touch the PBC walls + wall_labels = np.array([]) + + if num_labels > 0: + all_label_props = get_label_props_in_dict(labels) + [all_labels_max_size, all_label_locs_v, all_label_locs_h1, all_label_locs_h2 + ] = get_indices_of_labels_from_reg_prop_dict(all_label_props) + + #find the points along the boundaries + + #along hdim_1 or both horizontal boundaries + if PBC_flag == 'hdim_1' or PBC_flag == 'both': + #north wall + n_wall = np.unique(labels[:,y_max,:]) + wall_labels = np.append(wall_labels,n_wall) + + #south wall + s_wall = np.unique(labels[:,y_min,:]) + wall_labels = np.append(wall_labels,s_wall) + + #along hdim_2 or both horizontal boundaries + if PBC_flag == 'hdim_2' or PBC_flag == 'both': + #east wall + e_wall = np.unique(labels[:,:,x_max]) + wall_labels = np.append(wall_labels,e_wall) + + #west wall + w_wall = np.unique(labels[:,:,x_min]) + wall_labels = np.append(wall_labels,w_wall) + + + wall_labels = np.unique(wall_labels) + + for label_ind in wall_labels: + # 0 isn't a real index + if label_ind == 0: + continue + # skip this label if we have already dealt with it. + if np.any(label_ind == skip_list): + continue + + #get all locations of this label. + #TODO: harmonize x/y/z vs hdim1/hdim2/vdim. + label_locs_v = all_label_locs_v[label_ind] + label_locs_h1 = all_label_locs_h1[label_ind] + label_locs_h2 = all_label_locs_h2[label_ind] + + #loop through every point in the label + for label_z, label_y, label_x in zip( + label_locs_v, label_locs_h1, label_locs_h2): + # check if this is the special case of being a corner point. + # if it's doubly periodic AND on both x and y boundaries, it's a corner point + # and we have to look at the other corner. + # here, we will only look at the corner point and let the below deal with x/y only. + if PBC_flag == 'both' and (np.any(label_y == [y_min,y_max]) and np.any(label_x == [x_min,x_max])): + + #adjust x and y points to the other side + y_val_alt = adjust_pbc_point(label_y, y_min, y_max) + x_val_alt = adjust_pbc_point(label_x, x_min, x_max) + + label_on_corner = labels[label_z,y_val_alt,x_val_alt] + + if((label_on_corner !=0) and (~np.any(label_on_corner==skip_list))): + #alt_inds = np.where(labels==alt_label_3) + #get a list of indices where the label on the corner is so we can switch them + #in the new list. + + labels_2[all_label_locs_v[label_on_corner], + all_label_locs_h1[label_on_corner], + all_label_locs_h2[label_on_corner]] = label_ind + skip_list = np.append(skip_list,label_on_corner) + + #if it's labeled and has already been dealt with + elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list))): + #find the updated label, and overwrite all of label_ind indices with updated label + labels_2_alt = labels_2[label_z,y_val_alt,x_val_alt] + labels_2[label_locs_v, + label_locs_h1, + label_locs_h2] = labels_2_alt + skip_list = np.append(skip_list,label_ind) + break + + # on the hdim1 boundary and periodic on hdim1 + if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [y_min,y_max]): + y_val_alt = adjust_pbc_point(label_y, y_min, y_max) + + #get the label value on the opposite side + label_alt = labels[label_z,y_val_alt,label_x] + + #if it's labeled and not already been dealt with + if((label_alt !=0) and (~np.any(label_alt==skip_list))): + #find the indices where it has the label value on opposite side and change their value to original side + #print(all_label_locs_v[label_alt], alt_inds[0]) + labels_2[all_label_locs_v[label_alt], + all_label_locs_h1[label_alt], + all_label_locs_h2[label_alt]] = label_ind + #we have already dealt with this label. + skip_list = np.append(skip_list,label_alt) + + #if it's labeled and has already been dealt with + elif((label_alt !=0) and (np.any(label_alt==skip_list))): + #find the updated label, and overwrite all of label_ind indices with updated label + labels_2_alt = labels_2[label_z,y_val_alt,label_x] + labels_2[label_locs_v, + label_locs_h1, + label_locs_h2] = labels_2_alt + skip_list = np.append(skip_list,label_ind) + break + + if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [x_min,x_max]): + x_val_alt = adjust_pbc_point(label_x, x_min, x_max) + + #get the label value on the opposite side + label_alt = labels[label_z,label_y,x_val_alt] + + #if it's labeled and not already been dealt with + if((label_alt !=0) and (~np.any(label_alt==skip_list))): + #find the indices where it has the label value on opposite side and change their value to original side + labels_2[all_label_locs_v[label_alt], + all_label_locs_h1[label_alt], + all_label_locs_h2[label_alt]] = label_ind + #we have already dealt with this label. + skip_list = np.append(skip_list,label_alt) + + #if it's labeled and has already been dealt with + elif((label_alt !=0) and (np.any(label_alt==skip_list))): + #find the updated label, and overwrite all of label_ind indices with updated label + labels_2_alt = labels_2[label_z,label_y,x_val_alt] + labels_2[label_locs_v, + label_locs_h1, + label_locs_h2] = labels_2_alt + skip_list = np.append(skip_list,label_ind) + break + + + + #copy over new labels after we have adjusted everything + labels = labels_2 + + elif PBC_flag == 'none': + pass + else: + #TODO: fix periodic flag to be str, then update this with the possible values. + raise ValueError("Options for periodic are currently: none, hdim_1, hdim_2, both") + + #num_labels = num_labels - len(skip_list) + # END PBC treatment + # we need to get label properties again after we handle PBCs. label_props = get_label_props_in_dict(labels) if len(label_props)>0: - [total_indices_all, hdim1_indices_all, hdim2_indices_all] = get_indices_of_labels_from_reg_prop_dict(label_props) - + [total_indices_all, vdim_indyces_all, hdim1_indices_all, hdim2_indices_all] = get_indices_of_labels_from_reg_prop_dict(label_props) + #values, count = np.unique(labels[:,:].ravel(), return_counts=True) #values_counts=dict(zip(values, count)) # Filter out regions that have less pixels than n_min_threshold #values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold} + #check if not entire domain filled as one feature - if len(label_props)>0: + if num_labels>0: #create empty list to store individual features for this threshold list_features_threshold=[] #create empty dict to store regions for individual features for this threshold @@ -272,28 +664,77 @@ def feature_detection_threshold(data_i,i_time, curr_count = total_indices_all[cur_idx] if curr_count <=n_min_threshold: continue - - label_bbox = label_props[cur_idx].bbox - + if is_3D: + vdim_indyces = vdim_indyces_all[cur_idx] + else: + vdim_indyces = None hdim1_indices = hdim1_indices_all[cur_idx] hdim2_indeces = hdim2_indices_all[cur_idx] - region.fill(False) - region[hdim1_indices,hdim2_indeces]=True + + label_bbox = label_props[cur_idx].bbox + bbox_zstart, bbox_ystart, bbox_xstart, bbox_zend, bbox_yend, bbox_xend = label_bbox + bbox_zsize = bbox_zend - bbox_zstart + bbox_xsize = bbox_xend - bbox_xstart + bbox_ysize = bbox_yend - bbox_ystart + #build small region box + if is_3D: + region_small = np.full((bbox_zsize, bbox_ysize, bbox_xsize), False) + region_small[vdim_indyces-bbox_zstart, + hdim1_indices-bbox_ystart, hdim2_indeces-bbox_xstart] = True + + else: + region_small = np.full((bbox_ysize, bbox_xsize), False) + region_small[hdim1_indices-bbox_ystart, hdim2_indeces-bbox_xstart] = True + #we are 2D and need to remove the dummy 3D coordinate. + label_bbox = (label_bbox[1], label_bbox[2], label_bbox[4], label_bbox[5]) #[hdim1_indices,hdim2_indeces]= np.nonzero(region) #write region for individual threshold and feature to dict - region_i=list(zip(hdim1_indices,hdim2_indeces)) + + if is_3D: + region_i=list(zip(hdim1_indices*x_max*z_max +hdim2_indeces* z_max + vdim_indyces)) + else: + region_i=np.array(hdim1_indices*x_max+hdim2_indeces) + regions[cur_idx+idx_start]=region_i # Determine feature position for region by one of the following methods: - hdim1_index,hdim2_index=feature_position(hdim1_indices,hdim2_indeces,region,data_i,threshold,position_threshold,target) + single_indices=feature_position( + hdim1_indices,hdim2_indeces, + vdim_indyces=vdim_indyces, + region_small = region_small, region_bbox = label_bbox, + track_data = data_i, threshold_i = threshold, + position_threshold = position_threshold, target = target, + PBC_flag = PBC_flag, + x_min = x_min, x_max = x_max, y_min = y_min, y_max = y_max) + if is_3D: + vdim_index, hdim1_index, hdim2_index = single_indices + else: + hdim1_index, hdim2_index = single_indices #create individual DataFrame row in tracky format for identified feature - list_features_threshold.append({'frame': int(i_time), + appending_dict = {'frame': int(i_time), 'idx':cur_idx+idx_start, 'hdim_1': hdim1_index, 'hdim_2':hdim2_index, 'num':curr_count, - 'threshold_value':threshold}) - features_threshold=pd.DataFrame(list_features_threshold) + 'threshold_value':threshold} + column_names = ['frame', 'idx', 'hdim_1', 'hdim_2', 'num', 'threshold_value'] + if is_3D: + appending_dict['vdim'] = vdim_index + column_names = ['frame', 'idx', 'vdim', 'hdim_1', 'hdim_2', 'num', 'threshold_value'] + list_features_threshold.append(appending_dict) + #after looping thru proto-features, check if any exceed num threshold + #if they do not, provide a blank pandas df and regions dict + if list_features_threshold == []: + #print("no features above num value at threshold: ",threshold) + features_threshold=pd.DataFrame() + regions=dict() + #if they do, provide a dataframe with features organized with 2D and 3D metadata + else: + #print("at least one feature above num value at threshold: ",threshold) + #print("column_names, after cur_idx loop: ",column_names) + features_threshold=pd.DataFrame(list_features_threshold, columns = column_names) + + #features_threshold=pd.DataFrame(list_features_threshold, columns = column_names) else: features_threshold=pd.DataFrame() regions=dict() @@ -309,7 +750,8 @@ def feature_detection_multithreshold_timestep(data_i,i_time, n_erosion_threshold=0, n_min_threshold=0, min_distance=0, - feature_number_start=1 + feature_number_start=1, + PBC_flag='none' ): '''function to find features in each timestep based on iteratively finding regions above/below a set of thresholds @@ -337,23 +779,32 @@ def feature_detection_multithreshold_timestep(data_i,i_time, minimum distance between detected features (m) feature_number_start : int feature number to start with + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + Returns ------- pandas DataFrame detected features for individual timestep ''' + # consider switching to scikit image filter? from scipy.ndimage.filters import gaussian_filter + # get actual numpy array track_data = data_i.core_data() - - track_data=gaussian_filter(track_data, sigma=sigma_threshold) #smooth data slightly to create rounded, continuous field + # smooth data slightly to create rounded, continuous field + track_data=gaussian_filter(track_data, sigma=sigma_threshold) # create empty lists to store regions and features for individual timestep features_thresholds=pd.DataFrame() for i_threshold,threshold_i in enumerate(threshold): if (i_threshold>0 and not features_thresholds.empty): - idx_start=features_thresholds['idx'].max()+1 + idx_start=features_thresholds['idx'].max()+feature_number_start else: - idx_start=0 + idx_start=feature_number_start-1 features_threshold_i,regions_i=feature_detection_threshold(track_data,i_time, threshold=threshold_i, sigma_threshold=sigma_threshold, @@ -363,7 +814,8 @@ def feature_detection_multithreshold_timestep(data_i,i_time, n_erosion_threshold=n_erosion_threshold, n_min_threshold=n_min_threshold, min_distance=min_distance, - idx_start=idx_start + idx_start=idx_start, + PBC_flag = PBC_flag ) if any([x is not None for x in features_threshold_i]): features_thresholds=features_thresholds.append(features_threshold_i) @@ -378,7 +830,8 @@ def feature_detection_multithreshold_timestep(data_i,i_time, return features_thresholds def feature_detection_multithreshold(field_in, - dxy, + dxy = None, + dz = None, threshold=None, min_num=0, target='maximum', @@ -387,19 +840,29 @@ def feature_detection_multithreshold(field_in, n_erosion_threshold=0, n_min_threshold=0, min_distance=0, - feature_number_start=1 + feature_number_start=1, + PBC_flag='none' ): '''Function to perform feature detection based on contiguous regions above/below a threshold Parameters ---------- field_in: iris.cube.Cube - 2D field to perform the tracking on (needs to have coordinate 'time' along one of its dimensions) - - thresholds: list of floats + 2D or 3D field to perform the tracking on (needs to have coordinate 'time' along one of its dimensions) + threshold: list of float or ints threshold values used to select target regions to track - dxy: float - grid spacing of the input data (m) + dxy: float + Constant horzontal grid spacing (m), optional. If not specified, + this function requires that ```x_coordinate_name``` and + ```y_coordinate_name``` are available in `features`. If you specify a + value here, this function assumes that it is the x/y spacing between points + even if ```x_coordinate_name``` and ```y_coordinate_name``` are specified. + dz: float + Constant vertical grid spacing (m), optional. If not specified + and the input is 3D, this function requires that `altitude` is available + in the `features` input. If you specify a value here, this function assumes + that it is the constant z spacing between points, even if ```z_coordinate_name``` + is specified. target: str ('minimum' or 'maximum') flag to determine if tracking is targetting minima or maxima in the data position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') @@ -412,13 +875,19 @@ def feature_detection_multithreshold(field_in, minimum number of identified features min_distance: float minimum distance between detected features (m) - + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + Returns ------- pandas DataFrame detected features ''' - from .utils import add_coordinates + from .utils import add_coordinates, add_coordinates_3D logging.debug('start feature detection based on thresholds') @@ -443,14 +912,16 @@ def feature_detection_multithreshold(field_in, n_erosion_threshold=n_erosion_threshold, n_min_threshold=n_min_threshold, min_distance=min_distance, - feature_number_start=feature_number_start + feature_number_start=feature_number_start, + PBC_flag=PBC_flag, ) #check if list of features is not empty, then merge features from different threshold values #into one DataFrame and append to list for individual timesteps: if not features_thresholds.empty: #Loop over DataFrame to remove features that are closer than distance_min to each other: if (min_distance > 0): - features_thresholds=filter_min_distance(features_thresholds,dxy,min_distance) + features_thresholds=filter_min_distance(features_thresholds,dxy=dxy, dz=dz, + min_distance = min_distance) list_features_timesteps.append(features_thresholds) logging.debug('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) @@ -463,24 +934,49 @@ def feature_detection_multithreshold(field_in, features['feature']=features.index+feature_number_start # features_filtered = features.drop(features[features['num'] < min_num].index) # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) - features=add_coordinates(features,field_in) + if 'vdim' in features: + features=add_coordinates_3D(features,field_in) + else: + features=add_coordinates(features,field_in) else: features=None logging.info('No features detected') logging.debug('feature detection completed') return features -def filter_min_distance(features,dxy,min_distance): - '''Function to perform feature detection based on contiguous regions above/below a threshold +def filter_min_distance(features, dxy = None,dz = None, min_distance = None, + x_coordinate_name = "projection_x_coordinate", + y_coordinate_name = "projection_y_coordinate", + z_coordinate_name = "altitude"): + '''Function to remove features that are too close together Parameters ---------- features: pandas DataFrame features dxy: float - horzontal grid spacing (m) + Constant horzontal grid spacing (m), optional. If not specified, + this function requires that ```x_coordinate_name``` and + ```y_coordinate_name``` are available in `features`. If you specify a + value here, this function assumes that it is the x/y spacing between points + even if ```x_coordinate_name``` and ```y_coordinate_name``` are specified. + dz: float + Constant vertical grid spacing (m), optional. If not specified + and the input is 3D, this function requires that `altitude` is available + in the `features` input. If you specify a value here, this function assumes + that it is the constant z spacing between points, even if ```z_coordinate_name``` + is specified. min_distance: float - minimum distance between detected features (m) + minimum distance between detected features (m) + x_coordinate_name: str + The name of the x coordinate to calculate distance based on in meters. + This is typically `projection_x_coordinate` + y_coordinate_name: str + The name of the y coordinate to calculate distance based on in meters. + This is typically `projection_y_coordinate` + z_coordinate_name: str + The name of the z coordinate to calculate distance based on in meters. + This is typically `altitude` Returns ------- @@ -489,530 +985,37 @@ def filter_min_distance(features,dxy,min_distance): ''' from itertools import combinations remove_list_distance=[] - #create list of tuples with all combinations of features at the timestep: - indeces=combinations(features.index.values,2) - #Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) - for index_1,index_2 in indeces: - if index_1 is not index_2: - features.loc[index_1,'hdim_1'] - distance=dxy*np.sqrt((features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1'])**2+(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2'])**2) - if distance <= min_distance: -# logging.debug('distance<= min_distance: ' + str(distance)) - if features.loc[index_1,'threshold_value']>features.loc[index_2,'threshold_value']: - remove_list_distance.append(index_2) - elif features.loc[index_1,'threshold_value']features.loc[index_2,'num']: - remove_list_distance.append(index_2) - elif features.loc[index_1,'num'] 0): - features_thresholds=filter_min_distance_3D(features_thresholds,dxy,dz,min_distance) - list_features_timesteps.append(features_thresholds) - - print(features_thresholds) - - logging.debug('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) - print('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) + #if we are 3D, the vertical dimension is in features. if we are 2D, there + #is no vertical dimension in features. + is_3D = 'vdim' in features - logging.debug('feature detection: merging DataFrames') - # Check if features are detected and then concatenate features from different timesteps into one pandas DataFrame - # If no features are detected raise error - if any([not x.empty for x in list_features_timesteps]): - features=pd.concat(list_features_timesteps, ignore_index=True) - features['feature']=features.index+feature_number_start - # features_filtered = features.drop(features[features['num'] < min_num].index) - # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) - features=add_coordinates_3D(features,field_in) - else: - features=None - logging.info('No features detected') - logging.debug('feature detection completed') - print('feature detection completed') - return features - -def feature_detection_multithreshold_timestep_3D(data_i,i_time, - threshold=None, - min_num=0, - target='maximum', - position_threshold='center', - sigma_threshold=0.5, - n_erosion_threshold=0, - n_min_threshold=0, - min_distance=0, - feature_number_start=1 - ): - ''' - function to find features in each timestep based on iteratively finding regions above/below a set of thresholds - Input: - data_i: iris.cube.Cube - 3D field to perform the feature detection (single timestep) - i_time: int - number of the current timestep - - threshold: list of floats - threshold values used to select target regions to track - dxy: float - grid spacing of the input data (m) - target: str ('minimum' or 'maximum') - flag to determine if tracking is targetting minima or maxima in the data - position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') - flag choosing method used for the position of the tracked feature - sigma_threshold: float - standard deviation for intial filtering step - n_erosion_threshold: int - number of pixel by which to erode the identified features - n_min_threshold: int - minimum number of identified features - min_distance: float - minimum distance between detected features (m) - feature_number_start: int - feature number to start with - Output: - features_threshold: pandas DataFrame - detected features for individual timestep - ''' - from scipy.ndimage.filters import gaussian_filter - - track_data = data_i.core_data() - - track_data=gaussian_filter(track_data, sigma=sigma_threshold) #smooth data slightly to create rounded, continuous field - # create empty lists to store regions and features for individual timestep - features_thresholds=pd.DataFrame() - for i_threshold,threshold_i in enumerate(threshold): - if (i_threshold>0 and not features_thresholds.empty): - idx_start=features_thresholds['idx'].max()+1 - else: - idx_start=0 - features_threshold_i,regions_i=feature_detection_threshold_3D(track_data,i_time, - threshold=threshold_i, - sigma_threshold=sigma_threshold, - min_num=min_num, - target=target, - position_threshold=position_threshold, - n_erosion_threshold=n_erosion_threshold, - n_min_threshold=n_min_threshold, - min_distance=min_distance, - idx_start=idx_start - ) - if any([x is not None for x in features_threshold_i]): - features_thresholds=features_thresholds.append(features_threshold_i) - - # For multiple threshold, and features found both in the current and previous step, remove "parent" features from Dataframe - if (i_threshold>0 and not features_thresholds.empty and regions_old): - # for each threshold value: check if newly found features are surrounded by feature based on less restrictive threshold - features_thresholds=remove_parents_3D(features_thresholds,regions_i,regions_old) - regions_old=regions_i - - logging.debug('Finished feature detection for threshold '+str(i_threshold) + ' : ' + str(threshold_i) ) - return features_thresholds - - -def feature_detection_threshold_3D(data_i,i_time, - threshold=None, - min_num=0, - target='maximum', - position_threshold='center', - sigma_threshold=0.5, - n_erosion_threshold=0, - n_min_threshold=0, - min_distance=0, - idx_start=0): - ''' - function to find features based on individual threshold value: - Input: - data_i: iris.cube.Cube - 3D field to perform the feature detection (single timestep) - i_time: int - number of the current timestep - threshold: float - threshold value used to select target regions to track - target: str ('minimum' or 'maximum') - flag to determine if tracking is targetting minima or maxima in the data - position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') - flag choosing method used for the position of the tracked feature - sigma_threshold: float - standard deviation for intial filtering step - n_erosion_threshold: int - number of pixel by which to erode the identified features - n_min_threshold: int - minimum number of identified features - min_distance: float - minimum distance between detected features (m) - idx_start: int - feature id to start with - Output: - features_threshold: pandas DataFrame - detected features for individual threshold - regions: dict - dictionary containing the regions above/below threshold used for each feature (feature ids as keys) - ''' - from skimage.measure import label - from skimage.morphology import binary_erosion - import operator - # if looking for minima, set values above threshold to 0 and scale by data minimum: - if target == 'maximum': - mask=1*(data_i >= threshold) - # if looking for minima, set values above threshold to 0 and scale by data minimum: - elif target == 'minimum': - mask=1*(data_i <= threshold) - # only include values greater than threshold - # erode selected regions by n pixels - if n_erosion_threshold>0: - selem=np.ones((n_erosion_threshold,n_erosion_threshold)) - mask=binary_erosion(mask,selem).astype(np.int64) - # detect individual regions, label and count the number of pixels included: - labels, num_labels = label(mask, background=0, return_num=True) - labels_shape = labels.shape - #values, count = np.unique(labels[:,:,:].ravel(), return_counts=True) - #values_counts=dict(zip(values, count)) - - max_init_size_numba = labels.shape[0]*5 - # Filter out regions that have less pixels than n_min_threshold - #values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold} - #check if not entire domain filled as one feature - #if 0 in values_counts: - #Remove background counts: - #values_counts.pop(0) - #create empty list to store individual features for this threshold - list_features_threshold=[] - #create empty dict to store regions for individual features for this threshold - regions=dict() - - vdim_indyces_dict = dict() - hdim1_indices_dict = dict() - hdim2_indeces_dict = dict() - labels = np.array(labels) - #print(type(values)) - if(num_labels>0): - [last_idx, vdim_indyces_dict,hdim1_indices_dict,hdim2_indeces_dict] = get_indices_of_labels(labels, np.array(list(range(1,num_labels+1))), max_init_size_numba) - #print(last_idx, num_labels) - #print(values, count) - #print(np.array(list(values_counts.keys()))) - #create emptry list of features to remove from parent threshold value - #loop over individual regions: - for cur_idx in range(1,num_labels+1): - max_cur_idx = last_idx[cur_idx][0] - if max_cur_idx<=n_min_threshold: - continue - - #print("In feature_detection_threshold_3D, cur_idx: ", cur_idx) - #region=labels[:,:,:] == cur_idx - region = np.full(labels_shape, False) - #[vdim_indyces,hdim1_indices,hdim2_indeces]= np.nonzero(region) - [vdim_indyces,hdim1_indices,hdim2_indeces]= [vdim_indyces_dict[cur_idx][:max_cur_idx],hdim1_indices_dict[cur_idx][:max_cur_idx],hdim2_indeces_dict[cur_idx][:max_cur_idx]] - #print(region.shape, np.array(list(zip(vdim_indyces,hdim1_indices,hdim2_indeces)))) - region[vdim_indyces,hdim1_indices,hdim2_indeces] = True - - #print(vdim_indyces, hdim1_indices, hdim2_indeces) - #print("Original indices shape: ", np.shape(vdim_indyces), np.shape(hdim1_indices), np.shape(hdim2_indeces)) - #region=labels[:,:,:] == cur_idx - #all_matching_indices = np.argwhere(labels==cur_idx) - #[vdim_indyces,hdim1_indices,hdim2_indeces]=all_matching_indices.T - #print(all_matching_indices) - #print("New indices shape: ", np.shape(vdim_indyces), np.shape(hdim1_indices), np.shape(hdim2_indeces)) - - #write region for individual threshold and feature to dict - region_i=list(zip(vdim_indyces,hdim1_indices,hdim2_indeces)) - regions[cur_idx+idx_start]=region_i - # Determine feature position for region by one of the following methods: - vdim_index,hdim1_index,hdim2_index=feature_position_3D(vdim_indyces,hdim1_indices,hdim2_indeces,region,data_i,threshold,position_threshold,target) - #create individual DataFrame row in tracky format for identified feature - list_features_threshold.append({'frame': int(i_time), - 'idx':cur_idx+idx_start, - 'vdim': vdim_index, - 'hdim_1': hdim1_index, - 'hdim_2':hdim2_index, - 'num':max_cur_idx, - 'threshold_value':threshold}) - features_threshold=pd.DataFrame(list_features_threshold) - #else: - # features_threshold=pd.DataFrame() - # regions=dict() - - return features_threshold, regions - - -from numba import jit -from numba.typed import Dict -import numba.core.types -import numba - -# Make array type. Type-expression is not supported in jit -# functions. - -@jit(nopython=True) -def my_np_resize(a, new_size): - new = np.zeros(new_size, a.dtype) - new[:a.size] = a - return new - - -int_array = numba.core.types.int64[:] -int_array_3d = numba.core.types.int64[:,:,:] -@jit((int_array_3d, int_array, numba.core.types.int64),nopython=True) -def get_indices_of_labels(labels, indices, max_init_size): - - ''' - Returns 3 dicts of label indices - ''' - label_shape =labels.shape - z_shape = label_shape[0] - y_shape = label_shape[1] - x_shape = label_shape[2] - z_indices = Dict.empty( - key_type=numba.core.types.int64, - value_type=int_array, - ) - x_indices = Dict.empty( - key_type=numba.core.types.int64, - value_type=int_array, - ) - y_indices = Dict.empty( - key_type=numba.core.types.int64, - value_type=int_array, - ) - - curr_loc_indices = Dict.empty( - key_type=numba.core.types.int64, - value_type=int_array, - ) - i = 0 - for index in indices: - #pass - curr_loc_indices[index] = np.array([0,]) - z_indices[index] = np.empty((max_init_size,),dtype=np.int64) - x_indices[index] = np.empty((max_init_size,),dtype=np.int64) - y_indices[index] = np.empty((max_init_size,),dtype=np.int64) - - - for z in range(z_shape): - for y in range(y_shape): - for x in range(x_shape): - curr_label = labels[z,y,x] - for index in indices: - if curr_label == index: - curr_loc_ix = curr_loc_indices[index][0] - if curr_loc_ix == z_indices[index].size: - curr_arr_sz = z_indices[index].size - z_indices[index]= my_np_resize(z_indices[index], curr_arr_sz*2) - x_indices[index]= my_np_resize(x_indices[index], curr_arr_sz*2) - y_indices[index]= my_np_resize(y_indices[index], curr_arr_sz*2) - - z_indices[index][curr_loc_ix] = z - x_indices[index][curr_loc_ix] = x - y_indices[index][curr_loc_ix] = y - curr_loc_indices[index][0]+=1 - - return [curr_loc_indices, z_indices, y_indices, x_indices] - -def feature_position_3D(vdim_indyces,hdim1_indices,hdim2_indeces,region,track_data,threshold_i,position_threshold, target): - ''' - function to determine feature position - Input: - vdim_indyces: list - - hdim1_indices: list - - hdim2_indeces: list - - region: list - list of 2-element tuples - track_data: numpy.ndarray - 2D numpy array containing the data - - threshold_i: float - - position_threshold: str - - target: str - - Output: - vdim_index: float - feature position along vertical dimension - hdim1_index: float - feature position along 1st horizontal dimension - hdim2_index: float - feature position along 2nd horizontal dimension - ''' - if position_threshold=='center': - # get position as geometrical centre of identified region: - vdim_index=np.mean(vdim_indyces) - hdim1_index=np.mean(hdim1_indices) - hdim2_index=np.mean(hdim2_indeces) - - elif position_threshold=='extreme': - #get position as max/min position inside the identified region: - if target == 'maximum': - index=np.argmax(track_data[region]) - vdim_index=vdim_indyces[index] - hdim1_index=hdim1_indices[index] - hdim2_index=hdim2_indeces[index] - - if target == 'minimum': - index=np.argmin(track_data[region]) - vdim_index=vdim_indyces[index] - hdim1_index=hdim1_indices[index] - hdim2_index=hdim2_indeces[index] - - elif position_threshold=='weighted_diff': - # get position as centre of identified region, weighted by difference from the threshold: - weights=abs(track_data[region]-threshold_i) - if sum(weights)==0: - weights=None - vdim_index=np.average(vdim_indyces,weights=weights) - hdim1_index=np.average(hdim1_indices,weights=weights) - hdim2_index=np.average(hdim2_indeces,weights=weights) - - elif position_threshold=='weighted_abs': - # get position as centre of identified region, weighted by absolute values if the field: - weights=abs(track_data[region]) - if sum(weights)==0: - weights=None - vdim_index=np.average(vdim_indyces,weights=weights) - hdim1_index=np.average(hdim1_indices,weights=weights) - hdim2_index=np.average(hdim2_indeces,weights=weights) - else: - raise ValueError('position_threshold must be center,extreme,weighted_diff or weighted_abs') - return vdim_index,hdim1_index,hdim2_index - -def remove_parents_3D(features_thresholds,regions_i,regions_old): - ''' - function to remove features whose regions surround newly detected feature regions - Input: - features_thresholds: pandas.DataFrame - Dataframe containing detected features - regions_i: dict - dictionary containing the regions above/below threshold for the newly detected feature (feature ids as keys) - regions_old: dict - dictionary containing the regions above/below threshold from previous threshold (feature ids as keys) - Output: - features_thresholds pandas.DataFrame - Dataframe containing detected features excluding those that are superseded by newly detected ones - ''' - list_remove=[] - for idx_i,region_i in regions_i.items(): - for idx_old,region_old in regions_old.items(): - if test_overlap(regions_old[idx_old],regions_i[idx_i]): - list_remove.append(idx_old) - list_remove=list(set(list_remove)) - # remove parent regions: - if features_thresholds is not None: - features_thresholds=features_thresholds[~features_thresholds['idx'].isin(list_remove)] - - return features_thresholds - -def filter_min_distance_3D(features,dxy,dz,min_distance): - ''' Function to perform feature detection based on contiguous regions above/below a threshold - Input: - features: pandas DataFrame - features - dxy: float - horzontal grid spacing (m) - dz: float array - vertical grid spacing (m) - min_distance: float - minimum distance between detected features (m) - Output: - features: pandas DataFrame - features - ''' - from itertools import combinations - remove_list_distance=[] #create list of tuples with all combinations of features at the timestep: indeces=combinations(features.index.values,2) #Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) - for index_1,index_2 in indeces: + for index_1, index_2 in indeces: if index_1 is not index_2: #features.loc[index_1,'hdim_1'] + if dxy is not None: + xy_sqdst = ((dxy*(features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1']))**2+ + (dxy*(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2']))**2) + else: + # calculate xy distance based on x/y coordinates in meters. + xy_sqdst = ((features.loc[index_1, x_coordinate_name]- + features.loc[index_2, x_coordinate_name])**2 + + (features.loc[index_1, y_coordinate_name]- + features.loc[index_2, y_coordinate_name])**2) + if dz is not None: + z_sqdst = (dz * (features.loc[index_1,'vdim']-features.loc[index_2,'vdim']))**2 + else: + z_sqdst = (features.loc[index_1,z_coordinate_name]- + features.loc[index_2,z_coordinate_name])**2 + #distance=dxy*np.sqrt((features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1'])**2+(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2'])**2) - distance=np.sqrt((features.loc[index_1,'projection_x_coordinate']-features.loc[index_2,'projection_x_coordinate'])**2 + (features.loc[index_1,'projection_y_coordinate']-features.loc[index_2,'projection_y_coordinate'])**2 + (features.loc[index_1,'altitude']-features.loc[index_2,'altitude'])**2) + distance=np.sqrt(xy_sqdst + z_sqdst) if distance <= min_distance: + #print(distance, min_distance, index_1, index_2, features.size) # logging.debug('distance<= min_distance: ' + str(distance)) if features.loc[index_1,'threshold_value']>features.loc[index_2,'threshold_value']: remove_list_distance.append(index_2) @@ -1028,143 +1031,5 @@ def filter_min_distance_3D(features,dxy,dz,min_distance): features=features[~features.index.isin(remove_list_distance)] return features -def add_coordinates_3D(t,variable_cube): - import numpy as np - ''' Function adding coordinates from the tracking cube to the trajectories: time, longitude&latitude, x&y&z dimensions - Input: - t: pandas DataFrame - trajectories/features - variable_cube: iris.cube.Cube - Cube containing the dimensions 'time','longitude','latitude','x_projection_coordinate','y_projection_coordinate','altitude' usually cube that the tracking is performed on - Output: - t: pandas DataFrame - trajectories with added coordinates - ''' - from scipy.interpolate import interp2d, interp1d - logging.debug('start adding coordinates from cube') - # pull time as datetime object and timestr from input data and add it to DataFrame: - t['time']=None - t['timestr']=None - - - logging.debug('adding time coordinate') - - time_in=variable_cube.coord('time') - time_in_datetime=time_in.units.num2date(time_in.points) - - t["time"]=time_in_datetime[t['frame']] - t["timestr"]=[x.strftime('%Y-%m-%d %H:%M:%S') for x in time_in_datetime[t['frame']]] - - # Get list of all coordinates in input cube except for time (already treated): - coord_names=[coord.name() for coord in variable_cube.coords()] - coord_names.remove('time') - - logging.debug('time coordinate added') - - # chose right dimension for horizontal and vertical axes based on time dimension: - ndim_time=variable_cube.coord_dims('time')[0] - if ndim_time==0: - vdim=1 - hdim_1=2 - hdim_2=3 - elif ndim_time==1: - vdim=0 - hdim_1=2 - hdim_2=3 - elif ndim_time==2: - vdim=0 - hdim_1=1 - hdim_2=3 - elif ndim_time==3: - vdim=0 - hdim_1=1 - hdim_2=2 - - # create vectors to use to interpolate from pixels to coordinates - dimvec_1=np.arange(variable_cube.shape[vdim]) - dimvec_2=np.arange(variable_cube.shape[hdim_1]) - dimvec_3=np.arange(variable_cube.shape[hdim_2]) - - # loop over coordinates in input data: - for coord in coord_names: - logging.debug('adding coord: '+ coord) - # interpolate 1D coordinates: - if variable_cube.coord(coord).ndim==1: - - if variable_cube.coord_dims(coord)==(vdim,): - f=interp1d(dimvec_1,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['vdim']) - - if variable_cube.coord_dims(coord)==(hdim_1,): - f=interp1d(dimvec_2,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['hdim_1']) - - if variable_cube.coord_dims(coord)==(hdim_2,): - f=interp1d(dimvec_3,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['hdim_2']) - - # interpolate 2D coordinates: - elif variable_cube.coord(coord).ndim==2: - - if variable_cube.coord_dims(coord)==(hdim_1,hdim_2): - f=interp2d(dimvec_3,dimvec_2,variable_cube.coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - - if variable_cube.coord_dims(coord)==(hdim_2,hdim_1): - f=interp2d(dimvec_2,dimvec_3,variable_cube.coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - # interpolate 3D coordinates: - # mainly workaround for wrf latitude and longitude (to be fixed in future) - - elif variable_cube.coord(coord).ndim==3: - - if variable_cube.coord_dims(coord)==(ndim_time,hdim_1,hdim_2): - f=interp2d(dimvec_2,dimvec_1,variable_cube[0,:,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - - if variable_cube.coord_dims(coord)==(ndim_time,hdim_2,hdim_1): - f=interp2d(dimvec_1,dimvec_2,variable_cube[0,:,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - - if variable_cube.coord_dims(coord)==(hdim_1,ndim_time,hdim_2): - f=interp2d(dimvec_2,dimvec_1,variable_cube[:,0,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - - if variable_cube.coord_dims(coord)==(hdim_1,hdim_2,ndim_time): - f=interp2d(dimvec_2,dimvec_1,variable_cube[:,:,0].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim1'])] - - - if variable_cube.coord_dims(coord)==(hdim_2,ndim_time,hdim_1): - f=interp2d(dimvec_1,dimvec_2,variable_cube[:,0,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - if variable_cube.coord_dims(coord)==(hdim_2,hdim_1,ndim_time): - f=interp2d(dimvec_1,dimvec_2,variable_cube[:,:,0].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - # write resulting array or list into DataFrame: - t[coord]=coordinate_points - - logging.debug('added coord: '+ coord) - return t - -def test_overlap(region_inner,region_outer): - ''' - function to test for overlap between two regions (probably scope for further speedup here) - Input: - region_1: list - list of 3-element tuples defining the indeces of all cell in the region - region_2: list - list of 3-element tuples defining the indeces of all cell in the region - - Output: - overlap: bool - True if there are any shared points between the two regions - ''' - overlap=frozenset(region_outer).isdisjoint(region_inner) - return not overlap diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 09daff74..288cb2cd 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -196,6 +196,390 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m logging.debug('Finished segmentation') return segmentation_out,features_out +def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag=0): + """ + Function performing watershedding for an individual timestep of the data + + Parameters: + features: pandas.DataFrame + features for one specific point in time + field: iris.cube.Cube + input field to perform the watershedding on (2D or 3D for one specific point in time) + threshold: float + threshold for the watershedding field to be used for the mas + target: string + switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) + level slice + levels at which to seed the cells for the watershedding algorithm + method: string + flag determining the algorithm to use (currently watershedding implemented) + max_distance: float + maximum distance from a marker allowed to be classified as belonging to that cell + PBC_flag: integer + flag indicating whether to use PBC treatment or not + note to self: should be expanded to account for singly periodic boundaries also + rather than just doubly periodic + + Output: + segmentation_out: iris.cube.Cube + cloud mask, 0 outside and integer numbers according to track inside the clouds + features_out: pandas.DataFrame + feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep + """ + #from skimage.morphology import watershed + import skimage.segmentation._watershed_cy + import skimage.segmentation + from skimage.segmentation import watershed + # from skimage.segmentation import random_walker + from scipy.ndimage import distance_transform_edt, label + from copy import deepcopy + import numpy as np + + # copy feature dataframe for output + features_out=deepcopy(features_in) + # Create cube of the same dimensions and coordinates as input data to store mask: + segmentation_out=1*field_in + segmentation_out.rename('segmentation_mask') + segmentation_out.units=1 + + #Create dask array from input data: + data=field_in.core_data() + + #Set level at which to create "Seed" for each feature in the case of 3D watershedding: + # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) + if level==None: + level=slice(None) + + # transform max_distance in metres to distance in pixels: + if max_distance is not None: + max_distance_pixel=np.ceil(max_distance/dxy) + + # mask data outside region above/below threshold and invert data if tracking maxima: + if target == 'maximum': + unmasked=data>threshold + data_segmentation=-1*data + elif target == 'minimum': + unmasked=data1: + raise ValueError('please specify 1 dimensional vertical coordinate') + z_len = len(field_in.coord('z').points) + y_len = len(field_in.coord('y').points) + x_len = len(field_in.coord('x').points) + + print(z_len,y_len,x_len) + + for index, row in features_in.iterrows(): + #creation of 5x5x5 point ranges for 3D marker seeding + #instead of seeding whole column as is done in original segmentation + #since this may cause erroneous seeding of unconnected fields + #e.g. cirrus overlaying a discrete convective cloud + + print("feature: ",row['feature']) + #print("z-ctr: ",row['vdim']) + #print("y-ctr: ",row['hdim_1']) + #print("x-ctr: ",row['hdim_2']) + + #proper positioning of box points in z space to avoid going beyond bounds + if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): + z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) + elif(int(row['vdim']) < 2): + z_list = np.arange(0,5) + else: + z_list = np.arange(z_len-5,z_len) + + #proper positioning of box points in y space to avoid going beyond bounds + if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): + y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) + elif(int(row['hdim_1']) < 2): + y_list = np.arange(0,5) + #PBC_y_chk = 1 + else: + y_list = np.arange(y_len-5,y_len) + #PBC_y_chk = 1 + + #proper positioning of box points in x space to avoid going beyond bounds + if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): + x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) + elif(int(row['hdim_2']) < 2): + x_list = np.arange(0,5) + #PBC_x_chk = 1 + else: + x_list = np.arange(x_len-5,x_len) + #PBC_x_chk = 1 + + #loop thru 5x5x5 z times y times x range to seed markers + for k in range(0,5): + for j in range(0,5): + for i in range(0,5): + + if ndim_vertical[0]==0: + markers[z_list[k],y_list[j],x_list[i]]=row['feature'] + elif ndim_vertical[0]==1: + markers[y_list[j],z_list[k],x_list[i]]=row['feature'] + elif ndim_vertical[0]==2: + markers[y_list[j],x_list[i],z_list[k]]=row['feature'] + + + #print("z_list: ",z_list[:]) + #print("y_list: ",y_list[:]) + #print("x_list: ",x_list[:]) + #print(markers) + #print("unique marker labels: ",np.unique(markers)) + + else: + raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') + + # set markers in cells not fulfilling threshold condition to zero: + markers[~unmasked]=0 + + #rethinking this - data is padded with zeros, but we should set masked values to something different + #than zeroes as the array is initiated and padded with zeros + #and unmasked points that don't get watershedded are ALSO going to have a mask value equal to zero + + # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm + data_segmentation=np.array(data_segmentation) + unmasked=np.array(unmasked) + + # perform segmentation: + if method=='watershed': + segmentation_mask = watershed(np.array(data_segmentation),markers.astype(np.int32), mask=unmasked) +# elif method=='random_walker': +# segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), +# beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) + else: + raise ValueError('unknown method, must be watershed') + + # remove everything from the individual masks that is more than max_distance_pixel away from the markers + if max_distance is not None: + D=distance_transform_edt((markers==0).astype(int)) + segmentation_mask[np.bitwise_and(segmentation_mask>0, D>max_distance_pixel)]=0 + + #print(segmentation_mask.shape) + #print(segmentation_mask[unmasked].shape) + #print(np.where(segmentation_mask == 0 and unmasked == True)) + #z_unm,y_unm,x_unm = np.where(unmasked==True) + #print(np.where(segmentation_mask[z_unm,y_unm,x_unm] == 0)) + + #mask all segmentation_mask points below threshold as -1 + #to differentiate from those unmasked points NOT filled by watershedding + print(np.unique(segmentation_mask)) + segmentation_mask[~unmasked] = -1 + + #z_unf,y_unf,x_unf = np.where(segmentation_mask==0) + + #print(np.where(segmentation_mask==-1)) + #print(np.where(segmentation_mask==0)) + + #PBC treatment if-else statements + if PBC_flag == 1: + z_unf,y_unf,x_unf = np.where(segmentation_mask==0) + + seg_mask_unseeded = np.zeros(segmentation_mask.shape) + + seg_mask_unseeded[z_unf,y_unf,x_unf]=1 + + labels_unseeded,label_num = label(seg_mask_unseeded) + + print(label_num) + + markers_2 = np.zeros(unmasked.shape).astype(np.int32) + + print(segmentation_mask.shape) + + #new, shorter PBC marker seeding approach + #loop thru LB points + #then check if fillable region (labels_unseeded > 0) + #then check if point on other side of boundary is > 0 in segmentation_mask + + for z_ind in range(0,segmentation_mask.shape[0]): + #print("z_ind: ",z_ind) + for y_ind in range(0,segmentation_mask.shape[1]): + for x_ind in [0,segmentation_mask.shape[2]-1]: + + #print(z_ind,y_ind,x_ind) + #print(labels_unseeded[z_ind,y_ind,x_ind]) + + if(labels_unseeded[z_ind,y_ind,x_ind] == 0): + continue + else: + if x_ind == 0: + if (segmentation_mask[z_ind,y_ind,segmentation_mask.shape[2]-1]<=0): + continue + else: + markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,y_ind,segmentation_mask.shape[2]-1] + #print(z_ind,y_ind,x_ind) + #print("seeded") + elif x_ind == segmentation_mask.shape[2]-1: + if (segmentation_mask[z_ind,y_ind,0]<=0): + continue + else: + markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,y_ind,0] + #print(z_ind,y_ind,x_ind) + #print("seeded") + + + for y_ind in [0,segmentation_mask.shape[1]-1]: + for x_ind in range(0,segmentation_mask.shape[2]): + + #print(z_ind,y_ind,x_ind) + #print(labels_unseeded[z_ind,y_ind,x_ind]) + + if(labels_unseeded[z_ind,y_ind,x_ind] == 0): + continue + else: + if y_ind == 0: + if (segmentation_mask[z_ind,segmentation_mask.shape[1]-1,x_ind]<=0): + continue + else: + markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,segmentation_mask.shape[1]-1,x_ind] + #print(z_ind,y_ind,x_ind) + #print("seeded") + elif y_ind == segmentation_mask.shape[1]-1: + if (segmentation_mask[z_ind,0,x_ind]<=0): + continue + else: + markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,0,x_ind] + #print(z_ind,y_ind,x_ind) + #print("seeded") + + print("PBC cross-boundary markers planted") + print("Beginning PBC segmentation for secondary mask") + + markers_2[~unmasked]=0 + + if method=='watershed': + segmentation_mask_2 = watershed(np.array(data_segmentation),markers_2.astype(np.int32), mask=unmasked) + # elif method=='random_walker': + # segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), + # beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) + else: + raise ValueError('unknown method, must be watershed') + + # remove everything from the individual masks that is more than max_distance_pixel away from the markers + if max_distance is not None: + D=distance_transform_edt((markers==0).astype(int)) + segmentation_mask_2[np.bitwise_and(segmentation_mask_2>0, D>max_distance_pixel)]=0 + + print("Sum up original mask and secondary PBC-mask for full PBC segmentation") + + #Write resulting mask into cube for output + segmentation_out.data=segmentation_mask + segmentation_mask_2 + + else: + #Write resulting mask into cube for output + segmentation_out.data = segmentation_mask + + # count number of grid cells asoociated to each tracked cell and write that into DataFrame: + print(np.min(segmentation_out.data),np.max(segmentation_out.data)) + + values, count = np.unique(segmentation_out.data, return_counts=True) + counts=dict(zip(values, count)) + ncells=np.zeros(len(features_out)) + for i,(index,row) in enumerate(features_out.iterrows()): + #print(i,index,row,(index,row)) + #print("pre-if ncells ",ncells) + if row['feature'] in counts.keys(): + ncells=counts[row['feature']] + #print("in-if ncells ",ncells) + #row['ncells'] == ncells + #features_out['ncells'][i] = ncells + features_out['ncells']=ncells + #print("post-if ncells ",ncells) + + return segmentation_out,features_out + +def segmentation_PBC3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag=0): + """ + Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts + + Parameters: + features: pandas.DataFrame + output from trackpy/maketrack + field: iris.cube.Cube + containing the field to perform the watershedding on + threshold: float + threshold for the watershedding field to be used for the mask + + target: string + Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) + + level slice + levels at which to seed the cells for the watershedding algorithm + method: str ('method') + flag determining the algorithm to use (currently watershedding implemented) + + max_distance: float + Maximum distance from a marker allowed to be classified as belonging to that cell + + Output: + segmentation_out: iris.cube.Cube + Cloud mask, 0 outside and integer numbers according to track inside the cloud + """ + import pandas as pd + from iris.cube import CubeList + + logging.info('Start watershedding 3D') + + # check input for right dimensions: + if not (field.ndim==3 or field.ndim==4): + raise ValueError('input to segmentation step must be 3D or 4D including a time dimension') + if 'time' not in [coord.name() for coord in field.coords()]: + raise ValueError("input to segmentation step must include a dimension named 'time'") + + # CubeList and list to store individual segmentation masks and feature DataFrames with information about segmentation + segmentation_out_list=CubeList() + features_out_list=[] + + #loop over individual input timesteps for segmentation: + field_time=field.slices_over('time') + for i,field_i in enumerate(field_time): + #print("i, field i: ",i,field_i) + time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) + features_i=features.loc[features['time']==time_i] + #print("time_i, features_i:") + #print(time_i) + #print(features_i) + segmentation_out_i,features_out_i=segmentation_timestep_PBC3D(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag) + segmentation_out_list.append(segmentation_out_i) + features_out_list.append(features_out_i) + logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) + + #Merge output from individual timesteps: + segmentation_out=segmentation_out_list.merge_cube() + features_out=pd.concat(features_out_list) + + logging.debug('Finished segmentation') + return segmentation_out,features_out + def watershedding_3D(track,field_in,**kwargs): kwargs.pop('method',None) return segmentation_3D(track,field_in,method='watershed',**kwargs) diff --git a/tobac/testing.py b/tobac/testing.py index 5299af89..5a757ca6 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -1,17 +1,25 @@ import datetime +from multiprocessing.sharedctypes import Value import numpy as np from xarray import DataArray +import pandas as pd def make_simple_sample_data_2D(data_type='iris'): - """ - function creating a simple dataset to use in tests for tobac. + """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 500 in y direction. Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range. The data contains a single blob travelling on a linear trajectory through the dataset for part of the time. + Parameters + ---------- + data_type: {'iris', 'xarray'} + The type of dataset to produce. Note that this function currently generates an iris cube + and if xarray is requested, it simply converts to xarray with the from_iris function in xarray. - :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce - :return: sample dataset as an Iris.Cube.cube or xarray.DataArray + Returns + ------- + Iris.Cube.cube or xarray.DataArray + The simple output """ from iris.cube import Cube @@ -59,16 +67,22 @@ def make_simple_sample_data_2D(data_type='iris'): def make_sample_data_2D_3blobs(data_type='iris'): from iris.cube import Cube from iris.coords import DimCoord,AuxCoord - """ - function creating a simple dataset to use in tests for tobac. + """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 200 in y direction. Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range. The data contains a three individual blobs travelling on a linear trajectory through the dataset for part of the time. - :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce - :return: sample dataset as an Iris.Cube.cube or xarray.DataArray - + Parameters + ---------- + data_type: {'iris', 'xarray'} + The type of dataset to produce. Note that this function currently generates an iris cube + and if xarray is requested, it simply converts to xarray with the from_iris function in xarray. + + Returns + ------- + Iris.Cube.cube or xarray.DataArray + The simple output """ t_0=datetime.datetime(2000,1,1,12,0,0) @@ -140,13 +154,20 @@ def make_sample_data_2D_3blobs(data_type='iris'): def make_sample_data_2D_3blobs_inv(data_type='iris'): - """ - function creating a version of the dataset created in the function make_sample_cube_2D, but with switched coordinate order for the horizontal coordinates + """function creating a version of the dataset created in the function make_sample_cube_2D, but with switched coordinate order for the horizontal coordinates for tests to ensure that this does not affect the results - - :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce - :return: sample dataset as an Iris.Cube.cube or xarray.DataArray + Parameters + ---------- + data_type: {'iris', 'xarray'} + The type of dataset to produce. Note that this function currently generates an iris cube + and if xarray is requested, it simply converts to xarray with the from_iris function in xarray. + + Returns + ------- + Iris.Cube.cube or xarray.DataArray + The simple output + """ from iris.cube import Cube from iris.coords import DimCoord,AuxCoord @@ -223,15 +244,24 @@ def make_sample_data_2D_3blobs_inv(data_type='iris'): def make_sample_data_3D_3blobs(data_type='iris',invert_xy=False): from iris.cube import Cube from iris.coords import DimCoord,AuxCoord - """ - function creating a simple dataset to use in tests for tobac. + """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 200 in y direction. Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range. The data contains a three individual blobs travelling on a linear trajectory through the dataset for part of the time. - :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce - :return: sample dataset as an Iris.Cube.cube or xarray.DataArray + Parameters + ---------- + data_type: {'iris', 'xarray'} + The type of dataset to produce. Note that this function currently generates an iris cube + and if xarray is requested, it simply converts to xarray with the from_iris function in xarray. + invert_xy: bool + True to invert the x and y, false to keep them as they are originally. + + Returns + ------- + Iris.Cube.cube or xarray.DataArray + The simple output """ @@ -327,3 +357,454 @@ def make_sample_data_3D_3blobs(data_type='iris',invert_xy=False): sample_data=DataArray.from_iris(sample_data) return sample_data + + +def make_dataset_from_arr(in_arr, data_type = 'xarray'): + '''Makes a dataset (xarray or iris) for feature detection/segmentation from + a raw numpy/dask/etc. array. + + Parameters + ---------- + in_arr: array-like + The input array to convert to iris/xarray + data_type: str('xarray' or 'iris') + Type of the dataset to return + + Returns + ------- + Iris or xarray dataset with everything we need for feature detection/tracking. + + ''' + import xarray as xr + + output_arr = xr.DataArray(in_arr) + + if data_type == 'xarray': + return output_arr + elif data_type == 'iris': + return output_arr.to_iris() + else: + raise ValueError("data_type must be 'xarray' or 'iris'") + +def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, + h1_size = 1, h2_size = 1, v_size = 1, + shape = 'rectangle', amplitude=1, + PBC_flag = 'none'): + import xarray as xr + """Function to make a defined "blob" in location (zloc, yloc, xloc) with + user-specified shape and amplitude. Note that this function will + round the size and locations to the nearest point within the array. + + Parameters + ---------- + in_arr: array-like + input array to add the "blob" to + h1_loc: float + Center hdim_1 location of the blob, required + h2_loc: float + Center hdim_2 location of the blob, required + v_loc: float + Center vdim location of the blob, optional. If this is None, we assume that the + dataset is 2D. + h1_size: float + Size of the bubble in array coordinates in hdim_1 + h2_size: float + Size of the bubble in array coordinates in hdim_2 + v_size: float + Size of the bubble in array coordinates in vdim + shape: str('rectangle') + The shape of the blob that is added. For now, this is just rectangle + 'oval' adds an oval/spherical bubble with constant amplitude `amplitude`. We assume that the + sizes specified are the diameters in each dimension. + 'rectangle' adds a rectangular/rectangular prism bubble with constant amplitude `amplitude`. + amplitude: float + Maximum amplitude of the blob + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + + Returns + ------- + array-like + An array with the same type as `in_arr` that has the blob added. + """ + + # Check if z location is there and set our 3D-ness based on this. + if v_loc is None: + is_3D = False + start_loc = 0 + start_v = None + end_v = None + + else: + is_3D = True + start_loc = 1 + v_min = 0 + v_max = in_arr.shape[start_loc] + start_v = round(max(v_min, v_loc - v_size/2)) + end_v = round(min(v_max-1, v_loc + v_size/2)) + if v_size > v_max - v_min: + raise ValueError("v_size larger than domain size") + + + # Get min/max coordinates for hdim_1 and hdim_2 + # Min is inclusive, end is exclusive + h1_min = 0 + h1_max = in_arr.shape[start_loc] + + h2_min = 0 + h2_max = in_arr.shape[start_loc+1] + + if ((h1_size > h1_max - h1_min) or (h2_size > h2_max - h2_min)): + raise ValueError("Horizontal size larger than domain size") + + # let's get start/end x/y/z + start_h1 = round(h1_loc - h1_size/2) + end_h1 = round(h1_loc + h1_size/2) + + start_h2 = round(h2_loc - h2_size/2) + end_h2 = round(h2_loc + h2_size/2) + + # get the coordinate sets + coords_to_fill = get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, + start_h1, end_h1, start_h2, end_h2, PBC_flag=PBC_flag) + if shape == 'rectangle': + for coord_box in coords_to_fill: + in_arr = set_arr_2D_3D(in_arr, amplitude, coord_box[0], coord_box[1], coord_box[2], coord_box[3], + start_v, end_v) + return in_arr + + + + +def set_arr_2D_3D(in_arr, value, start_h1, end_h1, start_h2, end_h2, + start_v = None, end_v = None): + '''Function to set part of `in_arr` for either 2D or 3D points to `value`. + If `start_v` and `end_v` are not none, we assume that the array is 3D. If they + are none, we will set the array as if it is a 2D array. + + Parameters + ---------- + in_arr: array-like + Array of values to set + value: int, float, or array-like of size (end_v-start_v, end_h1-start_h1, end_h2-start_h2) + The value to assign to in_arr. This will work to assign an array, but the array + must have the same dimensions as the size specified in the function. + start_h1: int + Start index to set for hdim_1 + end_h1: int + End index to set for hdim_1 (exclusive, so it acts like [start_h1:end_h1]) + start_h2: int + Start index to set for hdim_2 + end_h2: int + End index to set for hdim_2 + start_v: int + Start index to set for vdim (optional) + end_v: int + End index to set for vdim (optional) + + Returns + ------- + array-like + in_arr with the new values set. + ''' + if start_v is not None and end_v is not None: + in_arr[start_v:end_v, start_h1:end_h1, start_h2:end_h2] = value + else: + in_arr[start_h1:end_h1, start_h2:end_h2] = value + + return in_arr + + +def get_single_pbc_coordinate(h1_min, h1_max, h2_min, h2_max, h1_coord, h2_coord, + PBC_flag = 'none'): + '''Function to get the PBC-adjusted coordinate for an original non-PBC adjusted + coordinate. + + Parameters + ---------- + h1_min: int + Minimum point in hdim_1 + h1_max: int + Maximum point in hdim_1 + h2_min: int + Minimum point in hdim_2 + h2_max: int + Maximum point in hdim_2 + h1_coord: int + hdim_1 query coordinate + h2_coord: int + hdim_2 query coordinate + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + tuple + Returns a tuple of (hdim_1, hdim_2). + + Raises + ------ + ValueError + Raises a ValueError if the point is invalid (e.g., h1_coord < h1_min + when PBC_flag = 'none') + ''' + # Avoiding duplicating code here, so throwing this into a loop. + is_pbc = [False, False] + if PBC_flag in ['hdim_1', 'both']: + is_pbc[0] = True + if PBC_flag in ['hdim_2', 'both']: + is_pbc[1] = True + + out_coords = list() + + + for point_query, dim_min, dim_max, dim_pbc in zip([h1_coord, h2_coord], + [h1_min, h2_min], + [h1_max, h2_max], + is_pbc): + if point_query >= dim_min and point_query < dim_max: + out_coords.append(point_query) + continue + # off at least one domain + elif point_query < dim_min: + if not dim_pbc: + raise ValueError("Point invalid!") + out_coords.append(point_query + (dim_max - dim_min)) + elif point_query >= dim_max: + if not dim_pbc: + raise ValueError("Point invalid!") + out_coords.append(point_query - (dim_max - dim_min)) + + return tuple(out_coords) + + + +def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, + h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord, + PBC_flag = 'none'): + '''Function to get the *actual* coordinate boxes of interest given a set of shifted + coordinates with periodic boundaries. + + For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + (-3, 5, 2,6) with PBC_flag of 'both' or 'hdim_1', h1_max of 10, and h1_min of 0 + this function will return: [(0,5,2,6), (7,10,2,6)]. + + If you pass in something outside the bounds of the array, this will truncate your + requested box. For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + (-3, 5, 2,6) with PBC_flag of 'none' or 'hdim_2', this function will return: + [(0,5,2,6)], assuming h1_min is 0. + + For cases where PBC_flag is 'both' and we have a corner case, it is possible + to get overlapping boundaries. For example, if you pass in (-6, 5, -6, 5) + + Parameters + ---------- + h1_min: int + Minimum array value in hdim_1, typically 0. + h1_max: int + Maximum array value in hdim_1 (exclusive). h1_max - h1_min should be the size in h1. + h2_min: int + Minimum array value in hdim_2, typically 0. + h2_max: int + Maximum array value in hdim_2 (exclusive). h2_max - h2_min should be the size in h2. + h1_start_coord: int + Start coordinate in hdim_1. Can be < h1_min if dealing with PBCs. + h1_end_coord: int + End coordinate in hdim_1. Can be >= h1_max if dealing with PBCs. + h2_start_coord: int + Start coordinate in hdim_2. Can be < h2_min if dealing with PBCs. + h2_end_coord: int + End coordinate in hdim_2. Can be >= h2_max if dealing with PBCs. + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + list of tuples + A list of tuples containing (h1_start, h1_end, h2_start, h2_end) of each of the + boxes needed to encompass the coordinates. + ''' + + if PBC_flag not in ['none', 'hdim_1', 'hdim_2', 'both']: + raise ValueError("PBC_flag must be 'none', 'hdim_1', 'hdim_2', or 'both'") + + + h1_start_coords = list() + h1_end_coords = list() + h2_start_coords = list() + h2_end_coords = list() + + + # In both of these cases, we just need to truncate the hdim_1 points. + if PBC_flag in ['none', 'hdim_2']: + h1_start_coords.append(max(h1_min, h1_start_coord)) + h1_end_coords.append(min(h1_max, h1_end_coord)) + + + # In both of these cases, we only need to truncate the hdim_2 points. + if PBC_flag in ['none', 'hdim_1']: + h2_start_coords.append(max(h2_min, h2_start_coord)) + h2_end_coords.append(min(h2_max, h2_end_coord)) + + # If the PBC flag is none, we can just return. + if PBC_flag == 'none': + return [(h1_start_coords[0], h1_end_coords[0], h2_start_coords[0], h2_end_coords[0])] + + # We have at least one periodic boundary. + + # hdim_1 boundary is periodic. + if PBC_flag in ['hdim_1', 'both']: + if (h1_end_coord - h1_start_coord) >= (h1_max - h1_min): + # In this case, we have selected the full h1 length of the domain, + # so we set the start and end coords to just that. + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_max) + + # We know we only have either h1_end_coord > h1_max or h1_start_coord < h1_min + # and not both. If both are true, the previous if statement should trigger. + elif h1_start_coord < h1_min: + # First set of h1 start coordinates + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_end_coord) + # Second set of h1 start coordinates + pts_from_begin = h1_min - h1_start_coord + h1_start_coords.append(h1_max - pts_from_begin) + h1_end_coords.append(h1_max) + + elif h1_end_coord > h1_max: + h1_start_coords.append(h1_start_coord) + h1_end_coords.append(h1_max) + pts_from_end = h1_end_coord - h1_max + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_min + pts_from_end) + + # We have no PBC-related issues, actually + else: + h1_start_coords.append(h1_start_coord) + h1_end_coords.append(h1_end_coord) + + if PBC_flag in ['hdim_2', 'both']: + if (h2_end_coord - h2_start_coord) >= (h2_max - h2_min): + # In this case, we have selected the full h2 length of the domain, + # so we set the start and end coords to just that. + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_max) + + # We know we only have either h1_end_coord > h1_max or h1_start_coord < h1_min + # and not both. If both are true, the previous if statement should trigger. + elif h2_start_coord < h2_min: + # First set of h1 start coordinates + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_end_coord) + # Second set of h1 start coordinates + pts_from_begin = h2_min - h2_start_coord + h2_start_coords.append(h2_max - pts_from_begin) + h2_end_coords.append(h2_max) + + elif h2_end_coord > h2_max: + h2_start_coords.append(h2_start_coord) + h2_end_coords.append(h2_max) + pts_from_end = h2_end_coord - h2_max + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_min + pts_from_end) + + # We have no PBC-related issues, actually + else: + h2_start_coords.append(h2_start_coord) + h2_end_coords.append(h2_end_coord) + + out_coords = list() + for h1_start_coord_single, h1_end_coord_single in zip(h1_start_coords, h1_end_coords): + for h2_start_coord_single, h2_end_coord_single in zip(h2_start_coords, h2_end_coords): + out_coords.append((h1_start_coord_single, h1_end_coord_single, h2_start_coord_single, h2_end_coord_single)) + return out_coords + + +def generate_single_feature(start_h1, start_h2, start_v = None, + spd_h1 = 1, spd_h2 = 1, spd_v = 1, + min_h1 = 0, max_h1 = 1000, min_h2 = 0, max_h2 = 1000, + num_frames = 1, dt = datetime.timedelta(minutes=5), + start_date = datetime.datetime(2022,1,1,0), + PBC_flag = 'none', frame_start = 1): + '''Function to generate a dummy feature dataframe to test the tracking functionality + + Parameters + ---------- + start_h1: float + Starting point of the feature in hdim_1 space + start_h2: float + Starting point of the feature in hdim_2 space + start_v: float + Starting point of the feature in vdim space (if 3D). For 2D, set to None. + spd_h1: float + Speed (per frame) of the feature in hdim_1 + spd_h2: float + Speed (per frame) of the feature in hdim_2 + spd_v: float + Speed (per frame) of the feature in vdim + min_h1: int + Minimum value of hdim_1 allowed. If PBC_flag is not 'none', then + this will be used to know when to wrap around periodic boundaries. + If PBC_flag is 'none', features will disappear if they are above/below + these bounds. + max_h1: int + Similar to min_h1, but the max value of hdim_1 allowed. + min_h2: int + Similar to min_h1, but the minimum value of hdim_2 allowed. + max_h2: int + Similar to min_h1, but the maximum value of hdim_2 allowed. + num_frames: int + Number of frames to generate + dt: datetime.timedelta + Difference in time between each frame + start_date: datetime.datetime + Start datetime + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + frame_start: int + Number to start the frame at + ''' + + out_list_of_dicts = list() + curr_h1 = start_h1 + curr_h2 = start_h2 + curr_v = start_v + curr_dt = start_date + is_3D = not (start_v is None) + for i in range(num_frames): + curr_dict = dict() + curr_h1, curr_h2 = get_single_pbc_coordinate(min_h1, max_h1, min_h2, max_h2, + curr_h1, curr_h2, PBC_flag) + curr_dict['hdim_1'] = curr_h1 + curr_dict['hdim_2'] = curr_h2 + curr_dict['frame'] = frame_start + i + if curr_v is not None: + curr_dict['vdim'] = curr_v + curr_v += spd_v + curr_dict['time'] = curr_dt + + + curr_h1 += spd_h1 + curr_h2 += spd_h2 + curr_dt += dt + out_list_of_dicts.append(curr_dict) + + + return pd.DataFrame.from_dict(out_list_of_dicts) diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py new file mode 100644 index 00000000..ef6e4a15 --- /dev/null +++ b/tobac/tests/test_feature_detection.py @@ -0,0 +1,83 @@ +import tobac.testing +import tobac.feature_detection as feat_detect + +def test_get_label_props_in_dict(): + '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. + ''' + import skimage.measure as skim + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + + + # make sure it works for 3D data + labels_3D = skim.label(test_3D_data.values[0]) + + output_3D = feat_detect.get_label_props_in_dict(labels_3D) + + #make sure it is a dict + assert type(output_3D) is dict + #make sure we get at least one output, there should be at least one label. + assert len(output_3D) > 0 + + # make sure it works for 2D data + labels_2D = skim.label(test_2D_data.values[0]) + + output_2D = feat_detect.get_label_props_in_dict(labels_2D) + + #make sure it is a dict + assert type(output_2D) is dict + #make sure we get at least one output, there should be at least one label. + assert len(output_2D) > 0 + + +def test_get_indices_of_labels_from_reg_prop_dict(): + '''Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases. + ''' + import skimage.measure as skim + import numpy as np + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + + + # make sure it works for 3D data + labels_3D = skim.label(test_3D_data.values[0]) + nx_3D = test_3D_data.values[0].shape[2] + ny_3D = test_3D_data.values[0].shape[1] + nz_3D = test_3D_data.values[0].shape[0] + + labels_2D = skim.label(test_2D_data.values[0]) + nx_2D = test_2D_data.values[0].shape[1] + ny_2D = test_2D_data.values[0].shape[0] + + region_props_3D = feat_detect.get_label_props_in_dict(labels_3D) + region_props_2D = feat_detect.get_label_props_in_dict(labels_2D) + + #get_indices_of_labels_from_reg_prop_dict + + [curr_loc_indices, z_indices, y_indices, x_indices] = feat_detect.get_indices_of_labels_from_reg_prop_dict(region_props_3D) + + for index_key in curr_loc_indices: + # there should be at least one value in each. + assert curr_loc_indices[index_key] > 0 + + assert np.all(z_indices[index_key] >= 0) and np.all(z_indices[index_key] < nz_3D) + assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_3D) + assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_3D) + + [curr_loc_indices, y_indices, x_indices] = feat_detect.get_indices_of_labels_from_reg_prop_dict(region_props_2D) + + for index_key in curr_loc_indices: + # there should be at least one value in each. + assert curr_loc_indices[index_key] > 0 + + assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_2D) + assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_2D) + + + + +def test_feature_detection_multithreshold_timestep(): + ''' + Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep + ''' + pass \ No newline at end of file diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py new file mode 100644 index 00000000..5f42df88 --- /dev/null +++ b/tobac/tests/test_testing.py @@ -0,0 +1,323 @@ +''' +Audit of the testing functions that produce our test data. +Who's watching the watchmen, basically. +''' +import pytest +from tobac.testing import get_pbc_coordinates, generate_single_feature, get_single_pbc_coordinate +import tobac.testing as tbtest +from collections import Counter +import pandas as pd +from pandas.util.testing import assert_frame_equal +import datetime + +def lists_equal_without_order(a, b): + """ + This will make sure the inner list contain the same, + but doesn't account for duplicate groups. + from: https://stackoverflow.com/questions/31501909/assert-list-of-list-equality-without-order-in-python/31502000 + """ + for l1 in a: + check_counter = Counter(l1) + if not any(Counter(l2) == check_counter for l2 in b): + return False + return True + +def test_make_feature_blob(): + '''Tests ```tobac.testing.make_feature_blob``` + Currently runs the following tests: + Creates a blob in the right location and cuts off without PBCs + Blob extends off PBCs for all dimensions when appropriate + ''' + import numpy as np + + # Test without PBCs first, make sure that a blob is generated in the first place. + # 2D test + out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=5, + h1_size = 2, h2_size= 2, shape='rectangle', amplitude= 1, PBC_flag='none') + assert np.all(out_blob[4:6, 4:6] == 1) + # There should be exactly 4 points of value 1. + assert np.sum(out_blob) == 4 and np.min(out_blob) == 0 + + # 3D test + out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=5, + v_loc = 5, h1_size = 2, h2_size= 2, v_size= 2, + shape='rectangle', amplitude= 1, PBC_flag='none') + assert np.all(out_blob[4:6, 4:6, 4:6] == 1) + # There should be exactly 8 points of value 1. + assert np.sum(out_blob) == 8 and np.min(out_blob) == 0 + + + # Test that it cuts things off along a boundary. + # 2D test + out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=9, + h1_size = 2, h2_size= 4, shape='rectangle', amplitude= 1, PBC_flag='none') + assert np.all(out_blob[4:6, 7:10] == 1) + assert np.all(out_blob[4:6, 0:1] == 0) + # There should be exactly 4 points of value 1. + assert np.sum(out_blob) == 6 and np.min(out_blob) == 0 + + # 3D test + out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=9, + v_loc = 5, h1_size = 2, h2_size= 4, v_size= 2, + shape='rectangle', amplitude= 1, PBC_flag='none') + assert np.all(out_blob[4:6, 4:6, 7:10] == 1) + assert np.all(out_blob[4:6, 4:6, 0:1] == 0) + # There should be exactly 4 points of value 1. + assert np.sum(out_blob) == 12 and np.min(out_blob) == 0 + + for PBC_condition in ['hdim_1', 'hdim_2', 'both']: + # Now test simple cases with PBCs + # 2D test + out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=5, + h1_size = 2, h2_size= 2, shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + assert np.all(out_blob[4:6, 4:6] == 1) + # There should be exactly 4 points of value 1. + assert np.sum(out_blob) == 4 and np.min(out_blob) == 0 + + # 3D test + out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=5, + v_loc = 5, h1_size = 2, h2_size= 2, v_size= 2, + shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + assert np.all(out_blob[4:6, 4:6, 4:6] == 1) + # There should be exactly 8 points of value 1. + assert np.sum(out_blob) == 8 and np.min(out_blob) == 0 + + # Test that it wraps around on the hdim_1 positive side + for PBC_condition in ['hdim_2', 'both']: + out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=9, + h1_size = 2, h2_size= 4, shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + assert np.all(out_blob[4:6, 7:10] == 1) + assert np.all(out_blob[4:6, 0:1] == 1) + # There should be exactly 4 points of value 1. + assert np.sum(out_blob) == 8 and np.min(out_blob) == 0 + + # 3D test + out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=9, + v_loc = 5, h1_size = 2, h2_size= 4, v_size= 2, + shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + assert np.all(out_blob[4:6, 4:6, 7:10] == 1) + assert np.all(out_blob[4:6, 4:6, 0:1] == 1) + # There should be exactly 4 points of value 1. + assert np.sum(out_blob) == 16 and np.min(out_blob) == 0 + + + +def test_get_single_pbc_coordinate(): + '''Tests ```tobac.testing.get_single_pbc_coordinate```. + Currently runs the following tests: + Point within bounds with all PBC conditions + Point off bounds on each side + Invalid point + ''' + + # Test points that do not need to be adjusted for all PBC conditions + for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: + assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, 3, PBC_condition) == (3,3)) + assert(get_single_pbc_coordinate(0, 10, 0, 10, 0, 0, PBC_condition) == (0,0)) + assert(get_single_pbc_coordinate(0, 10, 0, 10, 9, 9, PBC_condition) == (9,9)) + + # Test points off bounds on each side + # First points off min/max of hdim_1 for the two that allow it + for PBC_condition in ['hdim_1', 'both']: + assert(get_single_pbc_coordinate(0, 10, 0, 10, -3, 3, PBC_condition) == (7,3)) + assert(get_single_pbc_coordinate(0, 10, 0, 10, 12, 3, PBC_condition) == (2,3)) + assert(get_single_pbc_coordinate(0, 10, 0, 10, 10, 3, PBC_condition) == (0,3)) + + # Now test points off min/max of hdim_1 for the two that don't allow it (expect raise error) + for PBC_condition in ['none','hdim_2']: + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 10, 0, 10, -3, 3, PBC_condition) + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 10, 0, 10, 12, 3, PBC_condition) + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 10, 0, 10, 10, 3, PBC_condition) + + + # Now test points off min/max of hdim_2 for the two that allow it + for PBC_condition in ['hdim_2', 'both']: + assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, -3, PBC_condition) == (3,7)) + assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, 12, PBC_condition) == (3,2)) + assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, 10, PBC_condition) == (3,0)) + + # Now test hdim_2 min/max for the two that don't allow it + for PBC_condition in ['none','hdim_1']: + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 10, 0, 10, 3, -3, PBC_condition) + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 10, 0, 10, 3, 12, PBC_condition) + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 10, 0, 10, 3, 10, PBC_condition) + + # Now test hdim_1 and hdim_2 min/max for 'both' + assert(get_single_pbc_coordinate(0, 11, 0, 10, -3, -3, 'both') == (8,7)) + assert(get_single_pbc_coordinate(0, 10, 0, 10, 12, 12, 'both') == (2,2)) + + + # Now test hdim_1 and hdim/2 min/max for the three that don't allow it + for PBC_condition in ['none','hdim_1', 'hdim_2']: + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 11, 0, 10, -3, -3, PBC_condition) + with pytest.raises(ValueError): + get_single_pbc_coordinate(0, 10, 0, 10, 12, 12, PBC_condition) + + + + +def test_get_pbc_coordinates(): + '''Tests tobac.testing.get_pbc_coordinates. + Currently runs the following tests: + For an invalid PBC_flag, we raise an error + For PBC_flag of 'none', we truncate the box and give a valid box. + + ''' + + with pytest.raises(ValueError): + get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'c') + + # Test PBC_flag of none + + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'none') == [(1, 4, 1, 4),]) + assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'none') == [(0, 4, 1, 4),]) + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 12, 1, 4, 'none') == [(1, 10, 1, 4),]) + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 12, -1, 4, 'none') == [(1, 10, 0, 4),]) + + # Test PBC_flag with hdim_1 + # Simple case, no PBC overlapping + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_1') == [(1, 4, 1, 4),]) + # PBC going on the min side + assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'hdim_1') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + # PBC going on the min side; should be truncated in hdim_2. + assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, -1, 4, 'hdim_1') == [(0, 4, 0, 4), (9, 10, 0, 4)]) + # PBC going on the max side only + assert (get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'hdim_1') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + # PBC overlapping + assert (get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'hdim_1') == [(0, 10, 1, 4),]) + + # Test PBC_flag with hdim_2 + # Simple case, no PBC overlapping + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_2') == [(1, 4, 1, 4),]) + # PBC going on the min side + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'hdim_2') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + # PBC going on the min side with truncation in hdim_1 + assert (get_pbc_coordinates(0, 10, 0, 10, -4, 4, -1, 4, 'hdim_2') == [(0, 4, 0, 4), (0, 4, 9, 10)]) + # PBC going on the max side + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'hdim_2') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + # PBC overlapping + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'hdim_2') == [(1, 4, 0, 10),]) + + # Test PBC_flag with both + # Simple case, no PBC overlapping + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'both') == [(1, 4, 1, 4),]) + # hdim_1 only testing + # PBC on the min side of hdim_1 only + assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'both') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + # PBC on the max side of hdim_1 only + assert (get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'both') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + # PBC overlapping on max side of hdim_1 only + assert (get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'both') == [(0, 10, 1, 4),]) + # hdim_2 only testing + # PBC on the min side of hdim_2 only + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'both') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + # PBC on the max side of hdim_2 only + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'both') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + # PBC overlapping on max side of hdim_2 only + assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'both') == [(1, 4, 0, 10),]) + # hdim_1 and hdim_2 testing simultaneous + # both larger than the actual domain + assert (get_pbc_coordinates(0, 10, 0, 10, -1, 12, -4, 14, 'both') == [(0, 10, 0, 10),]) + # min in hdim_1 and hdim_2 + assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, -3, 3, -4, 2, 'both'), [(0, 3, 0, 2), (0, 3, 6, 10), (7, 10, 6, 10), (7, 10, 0, 2)])) + # max in hdim_1, min in hdim_2 + assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, 5, 12, -4, 2, 'both'), [(5, 10, 0, 2), (5, 10, 6, 10), (0, 2, 6, 10), (0, 2, 0, 2)])) + # max in hdim_1 and hdim_2 + assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, 'both'), [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)])) + # min in hdim_1, max in hdim_2 + assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, 'both'), [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)])) + +def test_generate_single_feature(): + '''Tests the `generate_single_feature` function. + Currently runs the following tests: + A single feature is generated + + ''' + + # Testing a simple 3D case + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)} + ]) + + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) + + # Testing a simple 2D case + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)} + ]) + assert_frame_equal(generate_single_feature(1, 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) + + # Testing a simple 2D case with movement + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + ]) + assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, num_frames=4, + spd_h1 = 1, spd_h2 = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) + + # Testing a simple 3D case with movement + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + ]) + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, num_frames=4, + spd_h1 = 1, spd_h2 = 1, spd_v = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) + + # Testing a simple 3D case with movement that passes the hdim_1 boundary + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1, 0, 0)}, + {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1, 0, 5)}, + {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1, 0, 10)}, + {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1, 0, 15)}, + ]) + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + frame_start = 0, num_frames=4, + spd_h1 = 4, spd_h2 = 1, spd_v = 1, PBC_flag='hdim_1').sort_index(axis=1), expected_df.sort_index(axis=1)) + + # Testing a simple 3D case with movement that passes the hdim_1 boundary + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + ]) + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + frame_start = 0, num_frames=4, + spd_h1 = 4, spd_h2 = 1, spd_v = 1, PBC_flag='hdim_1').sort_index(axis=1), expected_df.sort_index(axis=1)) + + # Testing a simple 3D case with movement that passes the hdim_2 boundary + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + ]) + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + frame_start = 0, num_frames=4, + spd_h1 = 1, spd_h2 = 4, spd_v = 1, PBC_flag='hdim_2').sort_index(axis=1), expected_df.sort_index(axis=1)) + + # Testing a simple 3D case with movement that passes the hdim_1 and hdim_2 boundaries + expected_df = pd.DataFrame.from_dict([ + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 6, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 1, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 6, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + ]) + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + frame_start = 0, num_frames=4, + spd_h1 = 5, spd_h2 = 4, spd_v = 1, PBC_flag='both').sort_index(axis=1), expected_df.sort_index(axis=1)) diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py new file mode 100644 index 00000000..6c6a27d7 --- /dev/null +++ b/tobac/tests/test_tracking.py @@ -0,0 +1,208 @@ +''' +Test for the trackpy tracking functions +Who's watching the watchmen, basically. +''' +import pytest +import tobac.testing +import tobac.tracking +import copy +from pandas.util.testing import assert_frame_equal +import numpy as np + +def test_linking_trackpy(): + '''Function to test ```tobac.tracking.linking_trackpy``` + Currently tests: + 2D tracking + 3D tracking + 3D tracking with PBCs + ''' + + # Test 2D tracking of a simple moving feature + test_feature = tobac.testing.generate_single_feature(1, 1, + min_h1 = 0, max_h1 = 100, min_h2 = 0, max_h2 = 100, + frame_start = 0, num_frames=5, + spd_h1 = 1, spd_h2 = 1, PBC_flag='none') + + expected_out_feature = copy.deepcopy(test_feature) + expected_out_feature['cell'] = 1.0 + + actual_out_feature = tobac.tracking.linking_trackpy( + test_feature, None, 5, 1000, + v_max = 10000, method_linking='predict', + PBC_flag = 'none' + ) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'frame', 'time', 'cell']] + + assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + + # Test 3D tracking of a simple moving feature + test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 100, min_h2 = 0, max_h2 = 100, + frame_start = 0, num_frames=5, + spd_h1 = 1, spd_h2 = 1, spd_v = 1, PBC_flag='none') + + expected_out_feature = copy.deepcopy(test_feature) + expected_out_feature['cell'] = 1.0 + + actual_out_feature = tobac.tracking.linking_trackpy( + test_feature, None, 5, 1000, + v_max = 10000, method_linking='predict', + PBC_flag = 'none' + ) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + + assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + + + # Test 3D tracking of a simple moving feature with periodic boundaries on hdim_1 + test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + frame_start = 0, num_frames=8, + spd_h1 = 3, spd_h2 = 1, spd_v = 1, PBC_flag='hdim_1') + + expected_out_feature = copy.deepcopy(test_feature) + expected_out_feature['cell'] = 1.0 + + actual_out_feature = tobac.tracking.linking_trackpy( + test_feature, None, 1, 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + v_max = 4, method_linking='predict', + PBC_flag = 'hdim_1' + ) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + + assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + + # Test 3D tracking of a simple moving feature with periodic boundaries on hdim_2 + test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + frame_start = 0, num_frames=8, + spd_h1 = 1, spd_h2 = 3, spd_v = 1, PBC_flag='hdim_2') + + expected_out_feature = copy.deepcopy(test_feature) + expected_out_feature['cell'] = 1.0 + + actual_out_feature = tobac.tracking.linking_trackpy( + test_feature, None, 1, 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + v_max = 4, method_linking='predict', + PBC_flag = 'hdim_2' + ) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + + assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + + # Test 3D tracking of a simple moving feature with periodic boundaries on both hdim_1 and hdim_2 + test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, + min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + frame_start = 0, num_frames=8, + spd_h1 = 3, spd_h2 = 3, spd_v = 0, PBC_flag='both') + + expected_out_feature = copy.deepcopy(test_feature) + expected_out_feature['cell'] = 1.0 + + actual_out_feature = tobac.tracking.linking_trackpy( + test_feature, None, 1, 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + v_max = 5, method_linking='predict', + PBC_flag = 'both' + ) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + + assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + + +def test_build_distance_function(): + '''Tests ```tobac.tracking.build_distance_function``` + Currently tests: + that this produces an object that is suitable to call from trackpy + ''' + + test_func = tobac.tracking.build_distance_function(0, 10, 0, 10, 'both') + assert (test_func(np.array((0,9,9)), np.array((0,0,0))) == pytest.approx(1.4142135)) + + +def test_calc_distance_coords_pbc(): + '''Tests ```tobac.tracking.calc_distance_coords_pbc``` + Currently tests: + two points in normal space + Periodicity along hdim_1, hdim_2, and corners + ''' + + # Test first two points in normal space with varying PBC conditions + for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(0)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0)), np.array((0,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((3,3,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(4.3588989, rel=1e-3)) + + # Now test two points that will be closer along the hdim_1 boundary for cases without PBCs + for PBC_condition in ['hdim_1', 'both']: + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(2)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(3.3166247)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(3.3166247)) + + + + # Test the same points, except without PBCs + for PBC_condition in ['none', 'hdim_2']: + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(8)) + + # Now test two points that will be closer along the hdim_2 boundary for cases without PBCs + for PBC_condition in ['hdim_2', 'both']: + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(2)) + + # Test the same points, except without PBCs + for PBC_condition in ['none', 'hdim_1']: + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(8)) + + # Test points that will be closer for the both + PBC_condition = 'both' + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1.4142135, rel=1e-3)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1.4142135, rel=1e-3)) + + # Test the corner points for no PBCs + PBC_condition = 'none' + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(12.727922, rel=1e-3)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(12.727922, rel=1e-3)) + + # Test the corner points for hdim_1 and hdim_2 + for PBC_condition in ['hdim_1', 'hdim_2']: + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9.055385)) + assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9.055385)) + + diff --git a/tobac/tracking.py b/tobac/tracking.py index ff2cec2d..12e7ebd7 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -1,8 +1,19 @@ import logging import numpy as np import pandas as pd +import math +def njit_if_available(func, **kwargs): + '''Decorator to wrap a function with numba.njit if available. + If numba isn't available, it just returns the function. + ''' + try: + from numba import njit + return njit(func, kwargs) + except ModuleNotFoundError: + return func + def linking_trackpy(features,field_in,dt,dxy, @@ -11,7 +22,10 @@ def linking_trackpy(features,field_in,dt,dxy, order=1,extrapolate=0, method_linking='random', adaptive_step=None,adaptive_stop=None, - cell_number_start=1 + cell_number_start=1, + min_h1 = None, max_h1 = None, + min_h2 = None, max_h2 = None, + PBC_flag = 'none' ): """Function to perform the linking of features in trajectories @@ -29,10 +43,23 @@ def linking_trackpy(features,field_in,dt,dxy, number of output timesteps features allowed to vanish for to be still considered tracked subnetwork_size int maximim size of subnetwork for linking - method_detection: str('trackpy' or 'threshold') - flag choosing method used for feature detection method_linking: str('predict' or 'random') flag choosing method used for trajectory linking + min_h1: int + Minimum hdim_1 value, required when PBC_flag is 'hdim_1' or 'both' + max_h1: int + Maximum hdim_1 value, required when PBC_flag is 'hdim_1' or 'both' + min_h2: int + Minimum hdim_2 value, required when PBC_flag is 'hdim_2' or 'both' + max_h2: int + Maximum hdim_2 value, required when PBC_flag is 'hdim_2' or 'both' + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + Returns ------- pandas.dataframe @@ -45,6 +72,14 @@ def linking_trackpy(features,field_in,dt,dxy, # from trackpy import filter_stubs # from .utils import add_coordinates + # make sure that we have min and max for h1 and h2 if we are PBC + if PBC_flag in ['hdim_1', 'both'] and (min_h1 is None or max_h1 is None): + raise ValueError("For PBC tracking, must set min and max coordinates.") + + if PBC_flag in ['hdim_2', 'both'] and (min_h2 is None or max_h2 is None): + raise ValueError("For PBC tracking, must set min and max coordinates.") + + # calculate search range based on timestep and grid spacing if v_max is not None: search_range=int(dt*v_max/dxy) @@ -71,6 +106,26 @@ def linking_trackpy(features,field_in,dt,dxy, tp.linking.Linker.MAX_SUB_NET_SIZE=subnetwork_size # deep copy to preserve features field: features_linking=deepcopy(features) + # check if we are 3D or not + + if 'vdim' in features_linking: + is_3D = True + pos_columns_tp = ['vdim','hdim_1','hdim_2'] + else: + is_3D = False + pos_columns_tp = ['hdim_1', 'hdim_2'] + + # Check if we have PBCs. + if PBC_flag in ['hdim_1', 'hdim_2', 'both']: + # Per the trackpy docs, to specify a custom distance function + # which we need for PBCs, neighbor_strategy must be 'BTree'. + # I think this shouldn't change results, but it will degrade performance. + neighbor_strategy = 'BTree' + dist_func = build_distance_function(min_h1, max_h1, min_h2, max_h2, PBC_flag) + + else: + neighbor_strategy = 'KDTree' + dist_func = None if method_linking is 'random': @@ -79,18 +134,20 @@ def linking_trackpy(features,field_in,dt,dxy, search_range=search_range, memory=memory, t_column='frame', - pos_columns=['hdim_2','hdim_1'], + pos_columns=pos_columns_tp, adaptive_step=adaptive_step,adaptive_stop=adaptive_stop, - neighbor_strategy='KDTree', link_strategy='auto' + neighbor_strategy=neighbor_strategy, link_strategy='auto', + dist_func = dist_func ) elif method_linking is 'predict': pred = tp.predict.NearestVelocityPredict(span=1) trajectories_unfiltered = pred.link_df(features_linking, search_range=search_range, memory=memory, - pos_columns=['hdim_1','hdim_2'], + pos_columns=pos_columns_tp, t_column='frame', - neighbor_strategy='KDTree', link_strategy='auto', - adaptive_step=adaptive_step,adaptive_stop=adaptive_stop + neighbor_strategy=neighbor_strategy, link_strategy='auto', + adaptive_step=adaptive_step,adaptive_stop=adaptive_stop, + dist_func = dist_func # copy_features=False, diagnostics=False, # hash_size=None, box_size=None, verify_integrity=True, # retain_index=False @@ -151,7 +208,7 @@ def linking_trackpy(features,field_in,dt,dxy, #logging.debug('feature linking completed') return trajectories_final - + def fill_gaps(t,order=1,extrapolate=0,frame_max=None,hdim_1_max=None,hdim_2_max=None): '''add cell time as time since the initiation of each cell @@ -253,3 +310,93 @@ def remap_particle_to_cell_nv(particle_cell_map, input_particle): ''' return particle_cell_map[input_particle] + +def build_distance_function(min_h1, max_h1, min_h2, max_h2, PBC_flag): + '''Function to build a partial ```calc_distance_coords_pbc``` function + suitable for use with trackpy + + Parameters + ---------- + min_h1: int + Minimum point in hdim_1 + max_h1: int + Maximum point in hdim_1 + min_h2: int + Minimum point in hdim_2 + max_h2: int + Maximum point in hdim_2 + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + function object + A version of calc_distance_coords_pbc suitable to be called by + just f(coords_1, coords_2) + + ''' + import functools + return functools.partial(calc_distance_coords_pbc, + min_h1 = min_h1, max_h1 = max_h1, min_h2 = min_h2, + max_h2 = max_h2, PBC_flag = PBC_flag) + +@njit_if_available +def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, + PBC_flag): + '''Function to calculate the distance between cartesian + coordinate set 1 and coordinate set 2. Note that we assume both + coordinates are within their min/max already. + + Parameters + ---------- + coords_1: 2D or 3D array-like + Set of coordinates passed in from trackpy of either (vdim, hdim_1, hdim_2) + coordinates or (hdim_1, hdim_2) coordinates. + coords_2: 2D or 3D array-like + Similar to coords_1, but for the second pair of coordinates + min_h1: int + Minimum point in hdim_1 + max_h1: int + Maximum point in hdim_1 + min_h2: int + Minimum point in hdim_2 + max_h2: int + Maximum point in hdim_2 + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + float + Distance between coords_1 and coords_2 in cartesian space. + + ''' + is_3D = len(coords_1)== 3 + size_h1 = max_h1 - min_h1 + size_h2 = max_h2 - min_h2 + + if not is_3D: + # Let's make the accounting easier. + coords_1 = np.array((0, coords_1[0], coords_1[1])) + coords_2 = np.array((0, coords_2[0], coords_2[1])) + + if PBC_flag in ['hdim_1', 'both']: + mod_h1 = size_h1 + else: + mod_h1 = 0 + if PBC_flag in ['hdim_2', 'both']: + mod_h2 = size_h2 + else: + mod_h2 = 0 + max_dims = np.array((0, mod_h1, mod_h2)) + deltas = np.abs(coords_1 - coords_2) + deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) + return np.sqrt(np.sum(deltas**2)) diff --git a/tobac/utils.py b/tobac/utils.py index dd498522..bdcc7c93 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -324,19 +324,23 @@ def mask_all_surface(mask,masked=False,z_coord='model_level_number'): def add_coordinates(t,variable_cube): import numpy as np - '''Function adding coordinates from the tracking cube to the trajectories: time, longitude&latitude, x&y dimensions + '''Function adding coordinates from the tracking cube to the trajectories + for the 2D case: time, longitude&latitude, x&y dimensions Parameters ---------- t: pandas DataFrame trajectories/features variable_cube: iris.cube.Cube - Cube containing the dimensions 'time','longitude','latitude','x_projection_coordinate','y_projection_coordinate', usually cube that the tracking is performed on + Cube (usually the one you are tracking on) at least conaining the dimension of 'time'. + Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate', + are the coordinates that we expect, although this function + will happily interpolate along any dimension coordinates you give. Returns ------- pandas DataFrame - trajectories with added coordinated + trajectories with added coordinates ''' from scipy.interpolate import interp2d, interp1d @@ -439,6 +443,142 @@ def add_coordinates(t,variable_cube): logging.debug('added coord: '+ coord) return t +def add_coordinates_3D(t,variable_cube): + import numpy as np + '''Function adding coordinates from the tracking cube to the trajectories + for the 3D case: time, longitude&latitude, x&y dimensions, and altitude + + Parameters + ---------- + t: pandas DataFrame + trajectories/features + variable_cube: iris.cube.Cube + Cube (usually the one you are tracking on) at least conaining the dimension of 'time'. + Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate', + and 'altitude' (if 3D) are the coordinates that we expect, although this function + will happily interpolate along any dimension coordinates you give. + + Returns + ------- + pandas DataFrame + trajectories with added coordinates + ''' + from scipy.interpolate import interp2d, interp1d + + logging.debug('start adding coordinates from cube') + + # pull time as datetime object and timestr from input data and add it to DataFrame: + t['time']=None + t['timestr']=None + + + logging.debug('adding time coordinate') + + time_in=variable_cube.coord('time') + time_in_datetime=time_in.units.num2date(time_in.points) + + t["time"]=time_in_datetime[t['frame']] + t["timestr"]=[x.strftime('%Y-%m-%d %H:%M:%S') for x in time_in_datetime[t['frame']]] + + # Get list of all coordinates in input cube except for time (already treated): + coord_names=[coord.name() for coord in variable_cube.coords()] + coord_names.remove('time') + + logging.debug('time coordinate added') + + # chose right dimension for horizontal and vertical axes based on time dimension: + ndim_time=variable_cube.coord_dims('time')[0] + if ndim_time==0: + vdim=1 + hdim_1=2 + hdim_2=3 + elif ndim_time==1: + vdim=0 + hdim_1=2 + hdim_2=3 + elif ndim_time==2: + vdim=0 + hdim_1=1 + hdim_2=3 + elif ndim_time==3: + vdim=0 + hdim_1=1 + hdim_2=2 + + # create vectors to use to interpolate from pixels to coordinates + dimvec_1=np.arange(variable_cube.shape[vdim]) + dimvec_2=np.arange(variable_cube.shape[hdim_1]) + dimvec_3=np.arange(variable_cube.shape[hdim_2]) + + # loop over coordinates in input data: + for coord in coord_names: + logging.debug('adding coord: '+ coord) + # interpolate 1D coordinates: + if variable_cube.coord(coord).ndim==1: + + if variable_cube.coord_dims(coord)==(vdim,): + f=interp1d(dimvec_1,variable_cube.coord(coord).points,fill_value="extrapolate") + coordinate_points=f(t['vdim']) + + if variable_cube.coord_dims(coord)==(hdim_1,): + f=interp1d(dimvec_2,variable_cube.coord(coord).points,fill_value="extrapolate") + coordinate_points=f(t['hdim_1']) + + if variable_cube.coord_dims(coord)==(hdim_2,): + f=interp1d(dimvec_3,variable_cube.coord(coord).points,fill_value="extrapolate") + coordinate_points=f(t['hdim_2']) + + # interpolate 2D coordinates: + elif variable_cube.coord(coord).ndim==2: + + if variable_cube.coord_dims(coord)==(hdim_1,hdim_2): + f=interp2d(dimvec_3,dimvec_2,variable_cube.coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] + + if variable_cube.coord_dims(coord)==(hdim_2,hdim_1): + f=interp2d(dimvec_2,dimvec_3,variable_cube.coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] + + # interpolate 3D coordinates: + # mainly workaround for wrf latitude and longitude (to be fixed in future) + + elif variable_cube.coord(coord).ndim==3: + + if variable_cube.coord_dims(coord)==(ndim_time,hdim_1,hdim_2): + f=interp2d(dimvec_2,dimvec_1,variable_cube[0,:,:].coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] + + if variable_cube.coord_dims(coord)==(ndim_time,hdim_2,hdim_1): + f=interp2d(dimvec_1,dimvec_2,variable_cube[0,:,:].coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] + + + if variable_cube.coord_dims(coord)==(hdim_1,ndim_time,hdim_2): + f=interp2d(dimvec_2,dimvec_1,variable_cube[:,0,:].coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] + + if variable_cube.coord_dims(coord)==(hdim_1,hdim_2,ndim_time): + f=interp2d(dimvec_2,dimvec_1,variable_cube[:,:,0].coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim1'])] + + + if variable_cube.coord_dims(coord)==(hdim_2,ndim_time,hdim_1): + f=interp2d(dimvec_1,dimvec_2,variable_cube[:,0,:].coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] + + if variable_cube.coord_dims(coord)==(hdim_2,hdim_1,ndim_time): + f=interp2d(dimvec_1,dimvec_2,variable_cube[:,:,0].coord(coord).points) + coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] + + # write resulting array or list into DataFrame: + t[coord]=coordinate_points + + logging.debug('added coord: '+ coord) + return t + + + + def get_bounding_box(x,buffer=1): from numpy import delete,arange,diff,nonzero,array """Calculates the bounding box of a ndarray From 32b1131b181db2704f1634f870ac671502f4311f Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Tue, 22 Feb 2022 17:15:53 -0700 Subject: [PATCH 16/82] Added new 3D and PBC segmentation Added a few new functions and many lines of code to enable a new 'box' 3D seeding approach and proper watershedding/adjustment of segmentation masks across periodic boundaries. The if-else split for the older 'column' seeding approach and new 'box' seeding approach should work (though I have not tested it), but right now the PBC treatment assumes we are using the 'box' approach and will likely have to be adapted to work with the 'column' approach too. There is also a bug in the buddy box seeding currently, as we assume a 5x5x5 seeding box the code will break if the size of the buddy box is less than 5 in any of the dimensions. Generalizing the box approach to be a user-set size and also adding a check for this in the Buddy Box routine should help to fix this, but I wanted to port it over without breaking any of the current functionality before I started exploring that. --- tobac/segmentation.py | 912 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 883 insertions(+), 29 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 288cb2cd..5111df06 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,13 +1,140 @@ import logging -def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance) +def get_label_props_in_dict(labels): + '''Function to get the label properties into a dictionary format. + + Parameters + ---------- + labels: 2D or 3D array-like + comes from the `skimage.measure.label` function + + Returns + ------- + dict + output from skimage.measure.regionprops in dictionary format, where they key is the label number + ''' + import skimage.measure + + region_properties_raw = skimage.measure.regionprops(labels) + region_properties_dict = dict() + for region_prop in region_properties_raw: + region_properties_dict[region_prop.label] = region_prop + + return region_properties_dict + +def get_indices_of_labels_from_reg_prop_dict(region_property_dict): + '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. + This function should produce similar output as new_get_indices_of_labels, but + allows for re-use of the region_property_dict. + + Parameters + ---------- + region_property_dict: dict of region_property objects + This dict should come from the get_label_props_in_dict function. + + Returns + ------- + dict (key: label number, int) + The number of points in the label number + dict (key: label number, int) + The z indices in the label number + dict (key: label number, int) + the y indices in the label number + dict (key: label number, int) + the x indices in the label number + Raises + ------ + ValueError + a ValueError is raised if + ''' + + import skimage.measure + + if len(region_property_dict) ==0: + raise ValueError("No regions!") + + z_indices = dict() + y_indices = dict() + x_indices = dict() + curr_loc_indices = dict() + + #loop through all skimage identified regions + for region_prop_key in region_property_dict: + region_prop = region_property_dict[region_prop_key] + index = region_prop.label + curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = curr_z_ixs + y_indices[index] = curr_y_ixs + x_indices[index] = curr_x_ixs + curr_loc_indices[index] = len(curr_x_ixs) + + #print("indices found") + return [curr_loc_indices, z_indices, y_indices, x_indices] + +def adjust_pbc_point(in_dim, dim_min, dim_max): + '''Function to adjust a point to the other boundary for PBCs + + Parameters + ---------- + in_dim : int + Input coordinate to adjust + dim_min : int + Minimum point for the dimension + dim_max : int + Maximum point for the dimension (inclusive) + + Returns + ------- + int + The adjusted point on the opposite boundary + + Raises + ------ + ValueError + If in_dim isn't on one of the boundary points + ''' + if in_dim == dim_min: + return dim_max + elif in_dim == dim_max: + return dim_min + else: + raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") + +def transfm_pbc_point(in_dim, dim_min, dim_max): + '''Function to transform a PBC-feature point for contiguity + + Parameters + ---------- + in_dim : int + Input coordinate to adjust + dim_min : int + Minimum point for the dimension + dim_max : int + Maximum point for the dimension (inclusive) + + Returns + ------- + int + The transformed point + + Raises + ------ + ValueError + If in_dim isn't on one of the boundary points + ''' + if in_dim < ((dim_min+dim_max)/2): + return in_dim+dim_max+1 + else: + return in_dim -def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance) +def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag=0): + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag) +def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag=0): + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag) -def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): + +def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='None',seed_3D_flag='column'): """Function performing watershedding for an individual timestep of the data Parameters @@ -26,6 +153,14 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu flag determining the algorithm to use (currently watershedding implemented) max_distance: float maximum distance from a marker allowed to be classified as belonging to that cell + PBC_flag: string + options: 'none' (default), 'hdim_1', 'hdim_2', 'both' + flag indicating whether to use PBC treatment or not + note to self: should be expanded to account for singly periodic boundaries also + rather than just doubly periodic + seed_3D_flag: string + options: 'column' (default), 'box' + Seed 3D field with Returns ------- @@ -39,6 +174,16 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu from scipy.ndimage import distance_transform_edt from copy import deepcopy import numpy as np + + #saving intermediary fields for testing + #original mask, secondary seeding, final version + #so we can ascertain deltas of each + #inter_fp = '/sumatra/asokolowsky/tobac_data/segmentation/testing/' + + #if (np.all(features_in.frame.values[:] == 0)): + # print("creating output file") + # out_f = h5py.File('/sumatra/asokolowsky/tobac_data/segmentation/testing/seg_fields_progression.h5','w') + # print(out_f) # copy feature dataframe for output features_out=deepcopy(features_in) @@ -76,33 +221,118 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers[int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] elif field_in.ndim==3: #3D watershedding - list_coord_names=[coord.name() for coord in field_in.coords()] - #determine vertical axis: - if vertical_coord=='auto': - list_vertical=['z','model_level_number','altitude','geopotential_height'] - for coord_name in list_vertical: - if coord_name in list_coord_names: - vertical_axis=coord_name - break - elif vertical_coord in list_coord_names: - vertical_axis=vertical_coord - else: - raise ValueError('Plese specify vertical coordinate') - ndim_vertical=field_in.coord_dims(vertical_axis) - if len(ndim_vertical)>1: - raise ValueError('please specify 1 dimensional vertical coordinate') - for index, row in features_in.iterrows(): - if ndim_vertical[0]==0: - markers[:,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - elif ndim_vertical[0]==1: - markers[int(row['hdim_1']),:, int(row['hdim_2'])]=row['feature'] - elif ndim_vertical[0]==2: - markers[int(row['hdim_1']), int(row['hdim_2']),:]=row['feature'] + if (seed_3D_flag == 'column'): + list_coord_names=[coord.name() for coord in field_in.coords()] + #determine vertical axis: + if vertical_coord=='auto': + list_vertical=['z','model_level_number','altitude','geopotential_height'] + for coord_name in list_vertical: + if coord_name in list_coord_names: + vertical_axis=coord_name + break + elif vertical_coord in list_coord_names: + vertical_axis=vertical_coord + else: + raise ValueError('Plese specify vertical coordinate') + ndim_vertical=field_in.coord_dims(vertical_axis) + if len(ndim_vertical)>1: + raise ValueError('please specify 1 dimensional vertical coordinate') + for index, row in features_in.iterrows(): + if ndim_vertical[0]==0: + markers[:,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] + elif ndim_vertical[0]==1: + markers[int(row['hdim_1']),:, int(row['hdim_2'])]=row['feature'] + elif ndim_vertical[0]==2: + markers[int(row['hdim_1']), int(row['hdim_2']),:]=row['feature'] + + elif (seed_3D_flag == 'box'): + list_coord_names=[coord.name() for coord in field_in.coords()] + #determine vertical axis: + #print(list_coord_names) + if vertical_coord=='auto': + list_vertical=['vdim','z','model_level_number','altitude','geopotential_height'] + for coord_name in list_vertical: + if coord_name in list_coord_names: + vertical_axis=coord_name + #print(vertical_axis) + break + elif vertical_coord in list_coord_names: + vertical_axis=vertical_coord + else: + raise ValueError('Please specify vertical coordinate') + ndim_vertical=field_in.coord_dims(vertical_axis) + #print(ndim_vertical,ndim_vertical[0]) + + if len(ndim_vertical)>1: + raise ValueError('please specify 1 dimensional vertical coordinate') + z_len = len(field_in.coord('z').points) + y_len = len(field_in.coord('y').points) + x_len = len(field_in.coord('x').points) + + #print(z_len,y_len,x_len) + #display(features_in) + + for index, row in features_in.iterrows(): + #creation of 5x5x5 point ranges for 3D marker seeding + #and PBC flags for cross-boundary seeding - nixing this idea for now, but described in PBC Segmentation notes + #PBC_y_chk = 0 + #PBC_x_chk = 0 + + #print("feature: ",row['feature']) + #print("z-ctr: ",row['vdim']) + #print("y-ctr: ",row['hdim_1']) + #print("x-ctr: ",row['hdim_2']) + + + + if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): + z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) + elif(int(row['vdim']) < 2): + z_list = np.arange(0,5) + else: + z_list = np.arange(z_len-5,z_len) + + if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): + y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) + elif(int(row['hdim_1']) < 2): + y_list = np.arange(0,5) + #PBC_y_chk = 1 + else: + y_list = np.arange(y_len-5,y_len) + #PBC_y_chk = 1 + + if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): + x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) + elif(int(row['hdim_2']) < 2): + x_list = np.arange(0,5) + #PBC_x_chk = 1 + else: + x_list = np.arange(x_len-5,x_len) + #PBC_x_chk = 1 + + #loop thru 5x5x5 z times y times x range + for k in range(0,5): + for j in range(0,5): + for i in range(0,5): + + if ndim_vertical[0]==0: + markers[z_list[k],y_list[j],x_list[i]]=row['feature'] + elif ndim_vertical[0]==1: + markers[y_list[j],z_list[k],x_list[i]]=row['feature'] + elif ndim_vertical[0]==2: + markers[y_list[j],x_list[i],z_list[k]]=row['feature'] + + + #else: + #error for unspec method + + else: raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') # set markers in cells not fulfilling threshold condition to zero: markers[~unmasked]=0 + marker_vals = np.unique(markers) # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm data_segmentation=np.array(data_segmentation) @@ -121,9 +351,633 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu if max_distance is not None: D=distance_transform_edt((markers==0).astype(int)) segmentation_mask[np.bitwise_and(segmentation_mask>0, D>max_distance_pixel)]=0 + + #mask all segmentation_mask points below th=reshold as -1 + #to differentiate from those unmasked points NOT filled by watershedding + print(np.unique(segmentation_mask)) + segmentation_mask[~unmasked] = -1 + + #saves/prints below for testing + #seg_m_data = segmentation_mask[:] + + #if (np.all(features_in.frame.values[:] == 0)): + # print("saving first field") + # first_seg_mask = out_f.create_dataset("seg_mask_1",data=seg_m_data) + + #print(seg_m_data) + + #read in labeling/masks and region-finding functions + reg_props_dict = get_label_props_in_dict(seg_m_data) + + del seg_m_data + gc.collect() + + + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + + print(np.unique(segmentation_mask[:])) + print(curr_reg_inds) + + #z_unf,y_unf,x_unf = np.where(segmentation_mask==0) + + #print(np.where(segmentation_mask==-1)) + #print(np.where(segmentation_mask==0)) + + hdim1_min = 0 + hdim1_max = segmentation_mask.shape[1] - 1 + hdim2_min = 0 + hdim2_max = segmentation_mask.shape[2] - 1 + + # all options that involve dealing with periodic boundaries + pbc_options = ['hdim_1', 'hdim_2', 'both'] + + if PBC_flag in pbc_options: + + #Return all indices where segmentation field == 0 + #meaning unfilled but above threshold + vdim_unf,hdim1_unf,hdim2_unf = np.where(segmentation_mask==0) + + + + #vdim_unf = z_reg_inds[0.] + #hdim1_unf = y_reg_inds[0.] + #hdim2_unf = x_reg_inds[0.] + + #for cur_idx in wall_labels: + #skip this if there aren't enough points to be considered a real feature + #as defined above by n_min_threshold + #curr_count = curr_reg_inds[cur_idx] + #print("Current wall feature: ",cur_idx) + + seg_mask_unseeded = np.zeros(segmentation_mask.shape) + + seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 + + #create labeled field of unfilled, unseeded features + labels_unseeded,label_num = label(seg_mask_unseeded) + + print(label_num) + + markers_2 = np.zeros(unmasked.shape).astype(np.int32) + + print(segmentation_mask.shape) + + #new, shorter PBC marker seeding approach + #loop thru LB points + #then check if fillable region (labels_unseeded > 0) + #then check if point on other side of boundary is > 0 in segmentation_mask + + if PBC_flag == 'hdim_1' or PBC_flag == 'both': + for vdim_ind in range(0,segmentation_mask.shape[0]): + for hdim1_ind in [hdim1_min,hdim1_max]: + for hdim2_ind in range(hdim2_min,hdim2_max): + + #print(z_ind,y_ind,x_ind) + #print(labels_unseeded[z_ind,y_ind,x_ind]) + + if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + continue + else: + if hdim1_ind == 0: + if (segmentation_mask[vdim_ind,hdim1_max,hdim2_ind]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_max,hdim2_ind] + #print(z_ind,y_ind,x_ind) + #print("seeded") + elif hdim1_ind == hdim1_max: + if (segmentation_mask[vdim_ind,hdim1_min,hdim2_ind]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_min,hdim2_ind] + #print(z_ind,y_ind,x_ind) + #print("seeded") + + if PBC_flag == 'hdim_2' or PBC_flag == 'both': + for vdim_ind in range(0,segmentation_mask.shape[0]): + for hdim1_ind in range(hdim1_min,hdim1_max): + for hdim2_ind in [hdim2_min,hdim2_max]: + + #print(z_ind,y_ind,x_ind) + #print(labels_unseeded[z_ind,y_ind,x_ind]) + + if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + continue + else: + if hdim2_ind == hdim2_min: + if (segmentation_mask[vdim_ind,hdim1_ind,hdim2_max]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_max] + #print(z_ind,y_ind,x_ind) + #print("seeded") + elif hdim2_ind == hdim2_max: + if (segmentation_mask[vdim_ind,hdim1_ind,hdim2_min]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_min] + #print(z_ind,y_ind,x_ind) + #print("seeded") + + + print("PBC cross-boundary markers planted") + print("Beginning PBC segmentation for secondary mask") + + markers_2[~unmasked]=0 + + if method=='watershed': + segmentation_mask_2 = watershed(np.array(data_segmentation),markers_2.astype(np.int32), mask=unmasked) + # elif method=='random_walker': + # segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), + # beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) + else: + raise ValueError('unknown method, must be watershed') + + # remove everything from the individual masks that is more than max_distance_pixel away from the markers + if max_distance is not None: + D=distance_transform_edt((markers==0).astype(int)) + segmentation_mask_2[np.bitwise_and(segmentation_mask_2>0, D>max_distance_pixel)]=0 + + print("Sum up original mask and secondary PBC-mask for full PBC segmentation") + + #Write resulting mask into cube for output + segmentation_out.data=segmentation_mask + segmentation_mask_2 + + segmentation_mask_3 = segmentation_out.data + + #if (np.all(features_in.frame.values[:] == 0)): + # print("saving second field") + # second_seg_mask = out_f.create_dataset("seg_mask_2",data=segmentation_mask_3) + + + print("Secondary seeding complete, now blending periodic boundaries") + + #keep segmentation mask fields for now so we can save these all later + #for demos of changes + + #print("Test of PBC segmentation boundary blending below") + + #update mask coord regions + + reg_props_dict = get_label_props_in_dict(segmentation_out.data) + + + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + + wall_labels = np.array([]) + #skip_list = np.array([]) + #bdry_buddies = np.array([]) + + #y_min = 0 + #y_max = test_mask2.shape[1] - 1 + #x_min = 0 + #x_max = test_mask2.shape[2] - 1 + + w_wall = np.unique(segmentation_mask_3[:,:,0]) + wall_labels = np.append(wall_labels,w_wall) + + e_wall = np.unique(segmentation_mask_3[:,:,-1]) + wall_labels = np.append(wall_labels,e_wall) + + n_wall = np.unique(segmentation_mask_3[:,-1,:]) + wall_labels = np.append(wall_labels,n_wall) + + s_wall = np.unique(segmentation_mask_3[:,0,:]) + wall_labels = np.append(wall_labels,s_wall) + + wall_labels = np.unique(wall_labels) + wall_labels = wall_labels[(wall_labels) > 0].astype(int) + #print(wall_labels) + + for cur_idx in wall_labels: + #skip this if there aren't enough points to be considered a real feature + #as defined above by n_min_threshold + curr_count = curr_reg_inds[cur_idx] + #print("Current wall feature: ",cur_idx) + #print(np.where(wall_labels==cur_idx)) + + vdim_indices = z_reg_inds[cur_idx] + hdim1_indices = y_reg_inds[cur_idx] + hdim2_indices = x_reg_inds[cur_idx] + + #start buddies array with feature of interest + buddies = np.array([cur_idx],dtype=int) + + for label_z, label_y, label_x in zip(vdim_indices, hdim1_indices, hdim2_indices): + + # check if this is the special case of being a corner point. + # if it's doubly periodic AND on both x and y boundaries, it's a corner point + # and we have to look at the other corner. + # here, we will only look at the corner point and let the below deal with x/y only. + if PBC_flag == 'both' and (np.any(label_y == [hdim1_min,hdim1_max]) and np.any(label_x == [hdim2_min,hdim2_max])): + + #adjust x and y points to the other side + y_val_alt = adjust_pbc_point(label_y, hdim1_min, hdim1_max) + x_val_alt = adjust_pbc_point(label_x, hdim2_min, hdim2_max) + + label_on_corner = segmentation_mask_3[label_z,y_val_alt,x_val_alt] + + if((label_on_corner > 0)): + #add opposite-corner buddy if it exists + buddies = np.append(buddies,label_on_corner) + + + # on the hdim1 boundary and periodic on hdim1 + if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [hdim1_min,hdim1_max]): + y_val_alt = adjust_pbc_point(label_y, hdim1_min, hdim1_max) - #Write resulting mask into cube for output - segmentation_out.data=segmentation_mask + #get the label value on the opposite side + label_alt = segmentation_mask_3[label_z,y_val_alt,label_x] + + #if it's labeled and not already been dealt with + if((label_alt > 0)): + #add above/below buddy if it exists + buddies = np.append(buddies,label_alt) + + if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [hdim2_min,hdim2_max]): + x_val_alt = adjust_pbc_point(label_x, hdim2_min, hdim2_max) + + #get the seg value on the opposite side + label_alt = segmentation_mask_3[label_z,label_y,x_val_alt] + + #if it's labeled and not already been dealt with + if((label_alt > 0)): + #add left/right buddy if it exists + buddies = np.append(buddies,label_alt) + + + buddies = np.unique(buddies) + + #print(buddies) + + if np.all(buddies==cur_idx): + continue + else: + inter_buddies,feat_inds,buddy_inds=np.intersect1d(features_in.feature.values[:],buddies,return_indices=True) + + buddy_features = copy.deepcopy(features_in.iloc[feat_inds]) + #display(buddy_features) + + #create arrays to contain points of all buddies + #and their transpositions/transformations + #for use in Buddy Box space + + buddy_z = np.array([],dtype=int) + buddy_y = np.array([],dtype=int) + buddy_x = np.array([],dtype=int) + buddy_z2 = np.array([],dtype=int) + buddy_y2 = np.array([],dtype=int) + buddy_x2 = np.array([],dtype=int) + + buddy_zf = np.array([],dtype=int) + buddy_yf = np.array([],dtype=int) + buddy_xf = np.array([],dtype=int) + + buddy_looper = 0 + + #loop thru buddies + for buddy in buddies: + print("Now on buddy: ",buddy) + print("points: ",len(z_reg_inds[buddy])) + + #if buddy == cur_idx: + buddy_feat = features_in[features_in['feature'] == buddy] + + #display(buddy_feat) + + yf2 = transfm_pbc_point(int(buddy_feat.hdim_1), hdim1_min, hdim1_max) + xf2 = transfm_pbc_point(int(buddy_feat.hdim_2), hdim2_min, hdim2_max) + + buddy_features.hdim_1.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_1), hdim1_min, hdim1_max) + buddy_features.hdim_2.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_2), hdim2_min, hdim2_max) + + #print(int(buddy_feat.vdim),yf2,xf2) + #display(buddy_features) + + buddy_zf = np.append(buddy_zf,int(buddy_feat.vdim)) + buddy_yf = np.append(buddy_yf,yf2) + buddy_xf = np.append(buddy_xf,xf2) + + buddy_looper = buddy_looper+1 + + for z,y,x in zip(z_reg_inds[buddy],y_reg_inds[buddy],x_reg_inds[buddy]): + + buddy_z = np.append(buddy_z,z) + buddy_y = np.append(buddy_y,y) + buddy_x = np.append(buddy_x,x) + + #else: + + y2 = transfm_pbc_point(y, hdim1_min, hdim1_max) + x2 = transfm_pbc_point(x, hdim2_min, hdim2_max) + + buddy_z2 = np.append(buddy_z2,z) + buddy_y2 = np.append(buddy_y2,y2) + buddy_x2 = np.append(buddy_x2,x2) + + #Buddy Box! + print("Buddy Box space:") + bbox_zstart = int(np.min(buddy_z2)) + bbox_ystart = int(np.min(buddy_y2)) + bbox_xstart = int(np.min(buddy_x2)) + bbox_zend = int(np.max(buddy_z2)+1) + bbox_yend = int(np.max(buddy_y2)+1) + bbox_xend = int(np.max(buddy_x2)+1) + + bbox_zsize = bbox_zend - bbox_zstart + bbox_ysize = bbox_yend - bbox_ystart + bbox_xsize = bbox_xend - bbox_xstart + + print('vdim: ',bbox_zstart,bbox_zend) + print('hdim_1: ',bbox_ystart,bbox_yend) + print('hdim_2: ',bbox_xstart,bbox_xend) + + print(bbox_zsize,bbox_ysize,bbox_xsize) + + #Buddy Box for smooth watershedding of features at PBC boundaries + buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) + ind_ctr = 0 + + #need to loop thru ALL z,y,x inds in buddy box + #not just the ones that have nonzero seg mask values + + for z in range(bbox_zstart,bbox_zend): + for y in range(bbox_ystart,bbox_yend): + for x in range(bbox_xstart,bbox_xend): + z_a1 = z + if y > hdim1_max: + y_a1 = y - (hdim1_max + 1) + else: + y_a1 = y + + if x > hdim2_max: + x_a1 = x - (hdim2_max + 1) + else: + x_a1 = x + + buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = field_in.data[z_a1,y_a1,x_a1] + + + #buddy_rgn = np.expand_dims(buddy_rgn,0) + #print(buddy_rgn.shape) + + rgn_cube = iris.cube.Cube(data=buddy_rgn) + + #date = '2018-08-22' + #ftime = m.group(2) + #time = "2100" + + #dd + #current_time = dati.datetime(year=yyyy,month=mm,day=dd,hour=hh,minute=mins,second=0) + + #timediff = current_time - start_time + #print(timediff.days, timediff.seconds, timediff.microseconds) + #iris_time = timediff.days*86400 + timediff.seconds + #itime = iris.coords.Coord([field_in.time.point], standard_name='time', long_name='index_time', var_name='itime', units='seconds since 2018-08-21 00:00') + coord_system=None + + #h2_coord=iris.coords.DimCoord(np.arange(bbox_xstart,bbox_xend), long_name='hdim_2', units='1', bounds=None, attributes=None, coord_system=coord_system) + #h1_coord=iris.coords.DimCoord(np.arange(bbox_ystart,bbox_yend), long_name='hdim_1', units='1', bounds=None, attributes=None, coord_system=coord_system) + #v_coord=iris.coords.DimCoord(np.arange(bbox_zstart,bbox_zend), long_name='vdim', units='1', bounds=None, attributes=None, coord_system=coord_system) + h2_coord=iris.coords.DimCoord(np.arange(bbox_xsize), long_name='hdim_2', units='1', bounds=None, attributes=None, coord_system=coord_system) + h1_coord=iris.coords.DimCoord(np.arange(bbox_ysize), long_name='hdim_1', units='1', bounds=None, attributes=None, coord_system=coord_system) + v_coord=iris.coords.DimCoord(np.arange(bbox_zsize), long_name='vdim', units='1', bounds=None, attributes=None, coord_system=coord_system) + + rgn_cube.add_dim_coord(h2_coord,2) + rgn_cube.add_dim_coord(h1_coord,1) + rgn_cube.add_dim_coord(v_coord,0) + #rgn_cube.add_dim_coord(itime,0) + + rgn_cube.units = 'kg kg-1' + + print(rgn_cube) + #print(rgn_cube.vdim) + + #buddy correction to bounding box + + + for buddy_looper in range(0,len(buddy_features)): + buddy_features.vdim.values[buddy_looper] = buddy_features.vdim.values[buddy_looper] - bbox_zstart + buddy_features.hdim_1.values[buddy_looper] = buddy_features.hdim_1.values[buddy_looper] - bbox_ystart + buddy_features.hdim_2.values[buddy_looper] = buddy_features.hdim_2.values[buddy_looper] - bbox_xstart + + # Create cube of the same dimensions and coordinates as input data to store mask: + buddies_out=1*rgn_cube + buddies_out.rename('buddies_mask') + buddies_out.units=1 + + #Create dask array from input data: + #data=rgn_cube.core_data() + buddy_data = buddy_rgn + #Set level at which to create "Seed" for each feature in the case of 3D watershedding: + # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) + if level==None: + level=slice(None) + + # transform max_distance in metres to distance in pixels: + if max_distance is not None: + max_distance_pixel=np.ceil(max_distance/dxy) + #note - this doesn't consider vertical distance in pixels + + # mask data outside region above/below threshold and invert data if tracking maxima: + if target == 'maximum': + unmasked_buddies=buddy_data>threshold + buddy_segmentation=-1*buddy_data + elif target == 'minimum': + unmasked_buddies=buddy_data1: + raise ValueError('please specify 1 dimensional vertical coordinate') + z_len = len(rgn_cube.coord('vdim').points) + y_len = len(rgn_cube.coord('hdim_1').points) + x_len = len(rgn_cube.coord('hdim_2').points) + + + for index, row in buddy_features.iterrows(): + #creation of 5x5x5 point ranges for 3D marker seeding + #and PBC flags for cross-boundary seeding - nixing this idea for now, but described in PBC Segmentation notes + #PBC_y_chk = 0 + #PBC_x_chk = 0 + + #print("feature: ",row['feature']) + #print("z-ctr: ",row['vdim']) + #print("y-ctr: ",row['hdim_1']) + #print("x-ctr: ",row['hdim_2']) + + + if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): + z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) + elif(int(row['vdim']) < 2): + z_list = np.arange(0,5) + else: + z_list = np.arange(z_len-5,z_len) + + if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): + y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) + elif(int(row['hdim_1']) < 2): + y_list = np.arange(0,5) + #PBC_y_chk = 1 + else: + y_list = np.arange(y_len-5,y_len) + #PBC_y_chk = 1 + + if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): + x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) + elif(int(row['hdim_2']) < 2): + x_list = np.arange(0,5) + #PBC_x_chk = 1 + else: + x_list = np.arange(x_len-5,x_len) + #PBC_x_chk = 1 + + #loop thru 5x5x5 z times y times x range + for k in range(0,5): + for j in range(0,5): + for i in range(0,5): + + if ndim_vertical[0]==0: + buddy_markers[z_list[k],y_list[j],x_list[i]]=row['feature'] + elif ndim_vertical[0]==1: + buddy_markers[y_list[j],z_list[k],x_list[i]]=row['feature'] + elif ndim_vertical[0]==2: + buddy_markers[y_list[j],x_list[i],z_list[k]]=row['feature'] + + else: + raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') + + # set markers in cells not fulfilling threshold condition to zero: + print(np.unique(buddy_markers)) + buddy_markers[~unmasked_buddies]=0 + + marker_vals = np.unique(buddy_markers) + #print("vals: ",marker_vals) + + #for marker in np.unique(markers): + # print(marker) + # if marker == 0: + # continue + # z_mark,y_mark,x_mark = np.where(markers==marker) + # print(z_mark,y_mark,x_mark) + # print(np.min(data_segmentation[z_mark,y_mark,x_mark]),np.max(data_segmentation[z_mark,y_mark,x_mark])) + + + # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm + buddy_segmentation=np.array(buddy_segmentation) + unmasked_buddies=np.array(unmasked_buddies) + + # perform segmentation: + if method=='watershed': + segmentation_mask_4 = watershed(np.array(buddy_segmentation),buddy_markers.astype(np.int32), mask=unmasked_buddies) + + else: + raise ValueError('unknown method, must be watershed') + + # remove everything from the individual masks that is more than max_distance_pixel away from the markers + if max_distance is not None: + D=distance_transform_edt((markers==0).astype(int)) + segmentation_mask_4[np.bitwise_and(segmentation_mask_4>0, D>max_distance_pixel)]=0 + + + #mask all segmentation_mask points below th=reshold as -1 + #to differentiate from those unmasked points NOT filled by watershedding + print(np.unique(segmentation_mask_4)) + segmentation_mask_4[~unmasked_buddies] = -1 + + + #transform seg_mask_4 data back to original mask + #print(np.unique(test_mask3.data)) + + #loop through buddy box inds and analogous seg mask inds + for z_val in range(bbox_zstart,bbox_zend): + z_seg = z_val - bbox_zstart + z_val_o = z_val + for y_val in range(bbox_ystart,bbox_yend): + y_seg = y_val - bbox_ystart + #y_val_o = y_val + if y_val > hdim1_max: + y_val_o = y_val - (hdim1_max+1) + else: + y_val_o = y_val + for x_val in range(bbox_xstart,bbox_xend): + x_seg = x_val - bbox_xstart + #x_val_o = x_val + if x_val > hdim2_max: + x_val_o = x_val - (hdim2_max+1) + else: + x_val_o = x_val + #print(z_seg,y_seg,x_seg) + #print(z_val,y_val,x_val) + + #fix to + #overwrite IF: + #1) feature of interest + #2) changing to/from feature of interest or adjacent segmented feature + + #We don't want to overwrite other features that may be in the + #buddy box if not contacting the intersected seg field + #print("Transformed z,y,x: ",z_val,y_val,x_val) + #print("Real z,y,x: ",z_val_o,y_val_o,x_val_o) + #print("Seg z,y,x: ",z_seg,y_seg,x_seg) + #print("original: ",test_mask2[z_val_o,y_val_o,x_val_o]) + #print("new: ",test_mask3.data[z_seg,y_seg,x_seg]) + #print("input cube: ",rgn_cube.data[0,z_seg,y_seg,x_seg]) + #print("orig cube: ",hr_21_cube.data[z_val_o,y_val_o,x_val_o]) + #print(rgn_cube.data[0,z_seg,y_seg,x_seg] > 1.e-5) + + if (np.any(segmentation_mask_3[z_val_o,y_val_o,x_val_o]==buddies) and np.any(segmentation_mask_4.data[z_seg,y_seg,x_seg]==buddies)): + #only do updating procedure if old and new values both in buddy set + #and values are different + if(segmentation_mask_3[z_val_o,y_val_o,x_val_o] != segmentation_mask_4.data[z_seg,y_seg,x_seg]): + #print("Transformed z,y,x: ",z_val,y_val,x_val) + #print("Real z,y,x: ",z_val_o,y_val_o,x_val_o) + #print("Seg z,y,x: ",z_seg,y_seg,x_seg) + #print("transformed z,y,x: ",) + #print(segmentation_mask_3[z_val_o,y_val_o,x_val_o], " -> ", segmentation_mask_4.data[z_seg,y_seg,x_seg]) + #print(segmentation_mask_3[z_val_o,y_val_o,x_val_o]+600845, " -> ", segmentation_mask_4.data[z_seg,y_seg,x_seg]+600845) + #print(rgn_cube.data[z_seg,y_seg,x_seg]) + segmentation_mask_3[z_val_o,y_val_o,x_val_o] = segmentation_mask_4.data[z_seg,y_seg,x_seg] + #print("updated") + + segmentation_out.data = segmentation_mask_3 + + if (np.all(features_in.frame.values[:] == 0)): + print("saving final field") + final_seg_mask = out_f.create_dataset("seg_mask_3",data=segmentation_mask_3) + out_f.close() + + + else: + #Write resulting mask into cube for output + segmentation_out.data = segmentation_mask # count number of grid cells asoociated to each tracked cell and write that into DataFrame: values, count = np.unique(segmentation_mask, return_counts=True) From bc53463f037bfcc0eff7540d99263599bdc062f3 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 23 Feb 2022 13:20:59 -0700 Subject: [PATCH 17/82] Fixed a bug with the n_erosion_threshold in 3D --- tobac/feature_detection.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index b7c7d393..4027a5f7 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -460,11 +460,10 @@ def feature_detection_threshold(data_i,i_time, # only include values greater than threshold # erode selected regions by n pixels if n_erosion_threshold>0: - # is this right? the documentation is unclear - #if is_3D: - # selem=np.ones((n_erosion_threshold,n_erosion_threshold, n_erosion_threshold)) - #else: - selem=np.ones((n_erosion_threshold,n_erosion_threshold)) + if is_3D: + selem=np.ones((n_erosion_threshold,n_erosion_threshold, n_erosion_threshold)) + else: + selem=np.ones((n_erosion_threshold,n_erosion_threshold)) mask=binary_erosion(mask,selem).astype(bool) # detect individual regions, label and count the number of pixels included: labels, num_labels = label(mask, background=0, return_num = True) From b1c70e04a3946a408941fd64de6b4b272732ac31 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 10 Mar 2022 13:24:54 -0700 Subject: [PATCH 18/82] Moved some common functions to the utils module --- tobac/segmentation.py | 121 ++++-------------------------------------- tobac/utils.py | 100 ++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 112 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 5111df06..78ab9d75 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,104 +1,5 @@ import logging - -def get_label_props_in_dict(labels): - '''Function to get the label properties into a dictionary format. - - Parameters - ---------- - labels: 2D or 3D array-like - comes from the `skimage.measure.label` function - - Returns - ------- - dict - output from skimage.measure.regionprops in dictionary format, where they key is the label number - ''' - import skimage.measure - - region_properties_raw = skimage.measure.regionprops(labels) - region_properties_dict = dict() - for region_prop in region_properties_raw: - region_properties_dict[region_prop.label] = region_prop - - return region_properties_dict - -def get_indices_of_labels_from_reg_prop_dict(region_property_dict): - '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. - This function should produce similar output as new_get_indices_of_labels, but - allows for re-use of the region_property_dict. - - Parameters - ---------- - region_property_dict: dict of region_property objects - This dict should come from the get_label_props_in_dict function. - - Returns - ------- - dict (key: label number, int) - The number of points in the label number - dict (key: label number, int) - The z indices in the label number - dict (key: label number, int) - the y indices in the label number - dict (key: label number, int) - the x indices in the label number - Raises - ------ - ValueError - a ValueError is raised if - ''' - - import skimage.measure - - if len(region_property_dict) ==0: - raise ValueError("No regions!") - - z_indices = dict() - y_indices = dict() - x_indices = dict() - curr_loc_indices = dict() - - #loop through all skimage identified regions - for region_prop_key in region_property_dict: - region_prop = region_property_dict[region_prop_key] - index = region_prop.label - curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) - z_indices[index] = curr_z_ixs - y_indices[index] = curr_y_ixs - x_indices[index] = curr_x_ixs - curr_loc_indices[index] = len(curr_x_ixs) - - #print("indices found") - return [curr_loc_indices, z_indices, y_indices, x_indices] - -def adjust_pbc_point(in_dim, dim_min, dim_max): - '''Function to adjust a point to the other boundary for PBCs - - Parameters - ---------- - in_dim : int - Input coordinate to adjust - dim_min : int - Minimum point for the dimension - dim_max : int - Maximum point for the dimension (inclusive) - - Returns - ------- - int - The adjusted point on the opposite boundary - - Raises - ------ - ValueError - If in_dim isn't on one of the boundary points - ''' - if in_dim == dim_min: - return dim_max - elif in_dim == dim_max: - return dim_min - else: - raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") +import utils def transfm_pbc_point(in_dim, dim_min, dim_max): '''Function to transform a PBC-feature point for contiguity @@ -117,10 +18,6 @@ def transfm_pbc_point(in_dim, dim_min, dim_max): int The transformed point - Raises - ------ - ValueError - If in_dim isn't on one of the boundary points ''' if in_dim < ((dim_min+dim_max)/2): return in_dim+dim_max+1 @@ -367,13 +264,13 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #print(seg_m_data) #read in labeling/masks and region-finding functions - reg_props_dict = get_label_props_in_dict(seg_m_data) + reg_props_dict = utils.get_label_props_in_dict(seg_m_data) del seg_m_data gc.collect() - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) print(np.unique(segmentation_mask[:])) print(curr_reg_inds) @@ -519,10 +416,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #update mask coord regions - reg_props_dict = get_label_props_in_dict(segmentation_out.data) + reg_props_dict = utils.get_label_props_in_dict(segmentation_out.data) - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) wall_labels = np.array([]) #skip_list = np.array([]) @@ -572,8 +469,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu if PBC_flag == 'both' and (np.any(label_y == [hdim1_min,hdim1_max]) and np.any(label_x == [hdim2_min,hdim2_max])): #adjust x and y points to the other side - y_val_alt = adjust_pbc_point(label_y, hdim1_min, hdim1_max) - x_val_alt = adjust_pbc_point(label_x, hdim2_min, hdim2_max) + y_val_alt = utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) + x_val_alt = utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) label_on_corner = segmentation_mask_3[label_z,y_val_alt,x_val_alt] @@ -584,7 +481,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # on the hdim1 boundary and periodic on hdim1 if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [hdim1_min,hdim1_max]): - y_val_alt = adjust_pbc_point(label_y, hdim1_min, hdim1_max) + y_val_alt = utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) #get the label value on the opposite side label_alt = segmentation_mask_3[label_z,y_val_alt,label_x] @@ -595,7 +492,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddies = np.append(buddies,label_alt) if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [hdim2_min,hdim2_max]): - x_val_alt = adjust_pbc_point(label_x, hdim2_min, hdim2_max) + x_val_alt = utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) #get the seg value on the opposite side label_alt = segmentation_mask_3[label_z,label_y,x_val_alt] diff --git a/tobac/utils.py b/tobac/utils.py index bdcc7c93..0156b122 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -638,3 +638,103 @@ def get_spacings(field_in,grid_spacing=None,time_spacing=None): # use value of time_spacing for dt: dt=time_spacing return dxy,dt + +def get_label_props_in_dict(labels): + '''Function to get the label properties into a dictionary format. + + Parameters + ---------- + labels: 2D or 3D array-like + comes from the `skimage.measure.label` function + + Returns + ------- + dict + output from skimage.measure.regionprops in dictionary format, where they key is the label number + ''' + import skimage.measure + + region_properties_raw = skimage.measure.regionprops(labels) + region_properties_dict = dict() + for region_prop in region_properties_raw: + region_properties_dict[region_prop.label] = region_prop + + return region_properties_dict + +def get_indices_of_labels_from_reg_prop_dict(region_property_dict): + '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. + This function should produce similar output as new_get_indices_of_labels, but + allows for re-use of the region_property_dict. + + Parameters + ---------- + region_property_dict: dict of region_property objects + This dict should come from the get_label_props_in_dict function. + + Returns + ------- + dict (key: label number, int) + The number of points in the label number + dict (key: label number, int) + The z indices in the label number + dict (key: label number, int) + the y indices in the label number + dict (key: label number, int) + the x indices in the label number + Raises + ------ + ValueError + a ValueError is raised if + ''' + + import skimage.measure + + if len(region_property_dict) ==0: + raise ValueError("No regions!") + + z_indices = dict() + y_indices = dict() + x_indices = dict() + curr_loc_indices = dict() + + #loop through all skimage identified regions + for region_prop_key in region_property_dict: + region_prop = region_property_dict[region_prop_key] + index = region_prop.label + curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = curr_z_ixs + y_indices[index] = curr_y_ixs + x_indices[index] = curr_x_ixs + curr_loc_indices[index] = len(curr_x_ixs) + + #print("indices found") + return [curr_loc_indices, z_indices, y_indices, x_indices] + +def adjust_pbc_point(in_dim, dim_min, dim_max): + '''Function to adjust a point to the other boundary for PBCs + + Parameters + ---------- + in_dim : int + Input coordinate to adjust + dim_min : int + Minimum point for the dimension + dim_max : int + Maximum point for the dimension (inclusive) + + Returns + ------- + int + The adjusted point on the opposite boundary + + Raises + ------ + ValueError + If in_dim isn't on one of the boundary points + ''' + if in_dim == dim_min: + return dim_max + elif in_dim == dim_max: + return dim_min + else: + raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") From 9183193e95cb19559f12c6a8d0c1c79837df4bf3 Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Fri, 11 Mar 2022 13:35:45 -0500 Subject: [PATCH 19/82] Removed deprecaded code, updated args Removed older versions of segmentation functions which were nominally PBC/3D but had been superseded by new versions of the original functions including these capabilities and more. Also added 'seed_3D_flag' argument to all relevant functions for user choice of 3D feature seeding method --- tobac/segmentation.py | 426 ++++-------------------------------------- 1 file changed, 36 insertions(+), 390 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 78ab9d75..6ef38106 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -24,14 +24,14 @@ def transfm_pbc_point(in_dim, dim_min, dim_max): else: return in_dim -def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag=0): +def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag) -def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag=0): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag) +def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag='column') -def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='None',seed_3D_flag='column'): +def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column'): """Function performing watershedding for an individual timestep of the data Parameters @@ -57,7 +57,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu rather than just doubly periodic seed_3D_flag: string options: 'column' (default), 'box' - Seed 3D field with + Seed 3D field at feature positions with either the full column (default) or a box of user-set size Returns ------- @@ -887,388 +887,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu return segmentation_out,features_out -def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): - """Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts - - Parameters - ---------- - features: pandas.DataFrame - output from trackpy/maketrack - field: iris.cube.Cube - containing the field to perform the watershedding on - threshold: float - threshold for the watershedding field to be used for the mask - - target: string - Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) - - level slice - levels at which to seed the cells for the watershedding algorithm - method: str ('method') - flag determining the algorithm to use (currently watershedding implemented) - - max_distance: float - Maximum distance from a marker allowed to be classified as belonging to that cell - - Returns - ------- - iris.cube.Cube - Cloud mask, 0 outside and integer numbers according to track inside the cloud - """ - import pandas as pd - from iris.cube import CubeList - - logging.info('Start watershedding 3D') - - # check input for right dimensions: - if not (field.ndim==3 or field.ndim==4): - raise ValueError('input to segmentation step must be 3D or 4D including a time dimension') - if 'time' not in [coord.name() for coord in field.coords()]: - raise ValueError("input to segmentation step must include a dimension named 'time'") - - # CubeList and list to store individual segmentation masks and feature DataFrames with information about segmentation - segmentation_out_list=CubeList() - features_out_list=[] - - #loop over individual input timesteps for segmentation: - field_time=field.slices_over('time') - for i,field_i in enumerate(field_time): - time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) - features_i=features.loc[features['time']==time_i] - segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord) - segmentation_out_list.append(segmentation_out_i) - features_out_list.append(features_out_i) - logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) - - #Merge output from individual timesteps: - segmentation_out=segmentation_out_list.merge_cube() - features_out=pd.concat(features_out_list) - - logging.debug('Finished segmentation') - return segmentation_out,features_out - -def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag=0): - """ - Function performing watershedding for an individual timestep of the data - - Parameters: - features: pandas.DataFrame - features for one specific point in time - field: iris.cube.Cube - input field to perform the watershedding on (2D or 3D for one specific point in time) - threshold: float - threshold for the watershedding field to be used for the mas - target: string - switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) - level slice - levels at which to seed the cells for the watershedding algorithm - method: string - flag determining the algorithm to use (currently watershedding implemented) - max_distance: float - maximum distance from a marker allowed to be classified as belonging to that cell - PBC_flag: integer - flag indicating whether to use PBC treatment or not - note to self: should be expanded to account for singly periodic boundaries also - rather than just doubly periodic - - Output: - segmentation_out: iris.cube.Cube - cloud mask, 0 outside and integer numbers according to track inside the clouds - features_out: pandas.DataFrame - feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep - """ - #from skimage.morphology import watershed - import skimage.segmentation._watershed_cy - import skimage.segmentation - from skimage.segmentation import watershed - # from skimage.segmentation import random_walker - from scipy.ndimage import distance_transform_edt, label - from copy import deepcopy - import numpy as np - - # copy feature dataframe for output - features_out=deepcopy(features_in) - # Create cube of the same dimensions and coordinates as input data to store mask: - segmentation_out=1*field_in - segmentation_out.rename('segmentation_mask') - segmentation_out.units=1 - - #Create dask array from input data: - data=field_in.core_data() - - #Set level at which to create "Seed" for each feature in the case of 3D watershedding: - # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) - if level==None: - level=slice(None) - - # transform max_distance in metres to distance in pixels: - if max_distance is not None: - max_distance_pixel=np.ceil(max_distance/dxy) - - # mask data outside region above/below threshold and invert data if tracking maxima: - if target == 'maximum': - unmasked=data>threshold - data_segmentation=-1*data - elif target == 'minimum': - unmasked=data1: - raise ValueError('please specify 1 dimensional vertical coordinate') - z_len = len(field_in.coord('z').points) - y_len = len(field_in.coord('y').points) - x_len = len(field_in.coord('x').points) - - print(z_len,y_len,x_len) - - for index, row in features_in.iterrows(): - #creation of 5x5x5 point ranges for 3D marker seeding - #instead of seeding whole column as is done in original segmentation - #since this may cause erroneous seeding of unconnected fields - #e.g. cirrus overlaying a discrete convective cloud - - print("feature: ",row['feature']) - #print("z-ctr: ",row['vdim']) - #print("y-ctr: ",row['hdim_1']) - #print("x-ctr: ",row['hdim_2']) - - #proper positioning of box points in z space to avoid going beyond bounds - if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): - z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) - elif(int(row['vdim']) < 2): - z_list = np.arange(0,5) - else: - z_list = np.arange(z_len-5,z_len) - - #proper positioning of box points in y space to avoid going beyond bounds - if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): - y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) - elif(int(row['hdim_1']) < 2): - y_list = np.arange(0,5) - #PBC_y_chk = 1 - else: - y_list = np.arange(y_len-5,y_len) - #PBC_y_chk = 1 - - #proper positioning of box points in x space to avoid going beyond bounds - if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): - x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) - elif(int(row['hdim_2']) < 2): - x_list = np.arange(0,5) - #PBC_x_chk = 1 - else: - x_list = np.arange(x_len-5,x_len) - #PBC_x_chk = 1 - - #loop thru 5x5x5 z times y times x range to seed markers - for k in range(0,5): - for j in range(0,5): - for i in range(0,5): - - if ndim_vertical[0]==0: - markers[z_list[k],y_list[j],x_list[i]]=row['feature'] - elif ndim_vertical[0]==1: - markers[y_list[j],z_list[k],x_list[i]]=row['feature'] - elif ndim_vertical[0]==2: - markers[y_list[j],x_list[i],z_list[k]]=row['feature'] - - - #print("z_list: ",z_list[:]) - #print("y_list: ",y_list[:]) - #print("x_list: ",x_list[:]) - #print(markers) - #print("unique marker labels: ",np.unique(markers)) - - else: - raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') - - # set markers in cells not fulfilling threshold condition to zero: - markers[~unmasked]=0 - - #rethinking this - data is padded with zeros, but we should set masked values to something different - #than zeroes as the array is initiated and padded with zeros - #and unmasked points that don't get watershedded are ALSO going to have a mask value equal to zero - - # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm - data_segmentation=np.array(data_segmentation) - unmasked=np.array(unmasked) - - # perform segmentation: - if method=='watershed': - segmentation_mask = watershed(np.array(data_segmentation),markers.astype(np.int32), mask=unmasked) -# elif method=='random_walker': -# segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), -# beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) - else: - raise ValueError('unknown method, must be watershed') - - # remove everything from the individual masks that is more than max_distance_pixel away from the markers - if max_distance is not None: - D=distance_transform_edt((markers==0).astype(int)) - segmentation_mask[np.bitwise_and(segmentation_mask>0, D>max_distance_pixel)]=0 - - #print(segmentation_mask.shape) - #print(segmentation_mask[unmasked].shape) - #print(np.where(segmentation_mask == 0 and unmasked == True)) - #z_unm,y_unm,x_unm = np.where(unmasked==True) - #print(np.where(segmentation_mask[z_unm,y_unm,x_unm] == 0)) - - #mask all segmentation_mask points below threshold as -1 - #to differentiate from those unmasked points NOT filled by watershedding - print(np.unique(segmentation_mask)) - segmentation_mask[~unmasked] = -1 - - #z_unf,y_unf,x_unf = np.where(segmentation_mask==0) - - #print(np.where(segmentation_mask==-1)) - #print(np.where(segmentation_mask==0)) - - #PBC treatment if-else statements - if PBC_flag == 1: - z_unf,y_unf,x_unf = np.where(segmentation_mask==0) - - seg_mask_unseeded = np.zeros(segmentation_mask.shape) - - seg_mask_unseeded[z_unf,y_unf,x_unf]=1 - - labels_unseeded,label_num = label(seg_mask_unseeded) - - print(label_num) - - markers_2 = np.zeros(unmasked.shape).astype(np.int32) - - print(segmentation_mask.shape) - - #new, shorter PBC marker seeding approach - #loop thru LB points - #then check if fillable region (labels_unseeded > 0) - #then check if point on other side of boundary is > 0 in segmentation_mask - - for z_ind in range(0,segmentation_mask.shape[0]): - #print("z_ind: ",z_ind) - for y_ind in range(0,segmentation_mask.shape[1]): - for x_ind in [0,segmentation_mask.shape[2]-1]: - - #print(z_ind,y_ind,x_ind) - #print(labels_unseeded[z_ind,y_ind,x_ind]) - - if(labels_unseeded[z_ind,y_ind,x_ind] == 0): - continue - else: - if x_ind == 0: - if (segmentation_mask[z_ind,y_ind,segmentation_mask.shape[2]-1]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,y_ind,segmentation_mask.shape[2]-1] - #print(z_ind,y_ind,x_ind) - #print("seeded") - elif x_ind == segmentation_mask.shape[2]-1: - if (segmentation_mask[z_ind,y_ind,0]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,y_ind,0] - #print(z_ind,y_ind,x_ind) - #print("seeded") - - - for y_ind in [0,segmentation_mask.shape[1]-1]: - for x_ind in range(0,segmentation_mask.shape[2]): - - #print(z_ind,y_ind,x_ind) - #print(labels_unseeded[z_ind,y_ind,x_ind]) - - if(labels_unseeded[z_ind,y_ind,x_ind] == 0): - continue - else: - if y_ind == 0: - if (segmentation_mask[z_ind,segmentation_mask.shape[1]-1,x_ind]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,segmentation_mask.shape[1]-1,x_ind] - #print(z_ind,y_ind,x_ind) - #print("seeded") - elif y_ind == segmentation_mask.shape[1]-1: - if (segmentation_mask[z_ind,0,x_ind]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,0,x_ind] - #print(z_ind,y_ind,x_ind) - #print("seeded") - - print("PBC cross-boundary markers planted") - print("Beginning PBC segmentation for secondary mask") - - markers_2[~unmasked]=0 - - if method=='watershed': - segmentation_mask_2 = watershed(np.array(data_segmentation),markers_2.astype(np.int32), mask=unmasked) - # elif method=='random_walker': - # segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), - # beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) - else: - raise ValueError('unknown method, must be watershed') - - # remove everything from the individual masks that is more than max_distance_pixel away from the markers - if max_distance is not None: - D=distance_transform_edt((markers==0).astype(int)) - segmentation_mask_2[np.bitwise_and(segmentation_mask_2>0, D>max_distance_pixel)]=0 - - print("Sum up original mask and secondary PBC-mask for full PBC segmentation") - - #Write resulting mask into cube for output - segmentation_out.data=segmentation_mask + segmentation_mask_2 - - else: - #Write resulting mask into cube for output - segmentation_out.data = segmentation_mask - - # count number of grid cells asoociated to each tracked cell and write that into DataFrame: - print(np.min(segmentation_out.data),np.max(segmentation_out.data)) - - values, count = np.unique(segmentation_out.data, return_counts=True) - counts=dict(zip(values, count)) - ncells=np.zeros(len(features_out)) - for i,(index,row) in enumerate(features_out.iterrows()): - #print(i,index,row,(index,row)) - #print("pre-if ncells ",ncells) - if row['feature'] in counts.keys(): - ncells=counts[row['feature']] - #print("in-if ncells ",ncells) - #row['ncells'] == ncells - #features_out['ncells'][i] = ncells - features_out['ncells']=ncells - #print("post-if ncells ",ncells) - - return segmentation_out,features_out - -def segmentation_PBC3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag=0): +def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column'): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts @@ -1290,6 +909,12 @@ def segmentation_PBC3D(features,field,dxy,threshold=3e-3,target='maximum',level= max_distance: float Maximum distance from a marker allowed to be classified as belonging to that cell + + PBC_flag: string + string flag of 'none', 'hdim_1', 'hdim_2', or 'both' indicating which lateral boundaries are periodic + + seed_3D_flag: string + string flag of 'column' (default) or 'box' which determines the method of seeding feature positions for 3D watershedding Output: segmentation_out: iris.cube.Cube @@ -1311,18 +936,39 @@ def segmentation_PBC3D(features,field,dxy,threshold=3e-3,target='maximum',level= features_out_list=[] #loop over individual input timesteps for segmentation: + #OR do segmentation on single timestep + #print(field) field_time=field.slices_over('time') + #print(field_time) + #print(enumerate(field_time)) + time_len = len(field.coord('time').points[:]) + print(time_len) + for i,field_i in enumerate(field_time): - #print("i, field i: ",i,field_i) time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) features_i=features.loc[features['time']==time_i] - #print("time_i, features_i:") #print(time_i) + #print(field_i) #print(features_i) - segmentation_out_i,features_out_i=segmentation_timestep_PBC3D(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag) + segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) segmentation_out_list.append(segmentation_out_i) features_out_list.append(features_out_i) logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) + + #if time_len > 1: + + + + #else: + # time_i=field.coord('time').units.num2date(field.coord('time').points[0]) + # features_i=features.loc[features['time']==time_i] + # print(time_i) + # print(field) + # print(features_i) + # segmentation_out_i,features_out_i=segmentation_timestep(field,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag) + # segmentation_out_list.append(segmentation_out_i) + # features_out_list.append(features_out_i) + # logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) #Merge output from individual timesteps: segmentation_out=segmentation_out_list.merge_cube() From f880ebd085b12b9884e96b73153e82f7f9967f40 Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Fri, 11 Mar 2022 13:36:46 -0500 Subject: [PATCH 20/82] Added missing seed_3D_flag Added missing seed_3D_flag in segmentation function call for segmentation_3D function --- tobac/segmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 6ef38106..948fd631 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -25,7 +25,7 @@ def transfm_pbc_point(in_dim, dim_min, dim_max): return in_dim def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag) + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag='column') def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag='column') From c42f401fa9d9aef4b298c6e8d9aca41135a3be21 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 14 Mar 2022 17:10:13 -0600 Subject: [PATCH 21/82] Starting to clean up --- tobac/segmentation.py | 133 ++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 90 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 948fd631..50c87337 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -31,7 +31,7 @@ def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=Non return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag='column') -def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column'): +def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column', seed_3D_size=5): """Function performing watershedding for an individual timestep of the data Parameters @@ -58,6 +58,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu seed_3D_flag: string options: 'column' (default), 'box' Seed 3D field at feature positions with either the full column (default) or a box of user-set size + seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + seed area for each dimension separately. Returns ------- @@ -67,10 +71,12 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep """ from skimage.segmentation import watershed + import skimage.measure # from skimage.segmentation import random_walker from scipy.ndimage import distance_transform_edt from copy import deepcopy import numpy as np + import iris #saving intermediary fields for testing #original mask, secondary seeding, final version @@ -89,7 +95,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu segmentation_out.rename('segmentation_mask') segmentation_out.units=1 - #Create dask array from input data: + # Get raw array from input data: data=field_in.core_data() #Set level at which to create "Seed" for each feature in the case of 3D watershedding: @@ -168,9 +174,21 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #print(z_len,y_len,x_len) #display(features_in) + + # Get the size of the seed box from the input parameter + try: + seed_z = seed_3D_size[0] + seed_y = seed_3D_size[1] + seed_x = seed_3D_size[2] + except TypeError: + # Not iterable, assume int. + seed_z = seed_3D_size + seed_y = seed_3D_size + seed_x = seed_3D_size + for index, row in features_in.iterrows(): - #creation of 5x5x5 point ranges for 3D marker seeding + #creation of point ranges for 3D marker seeding #and PBC flags for cross-boundary seeding - nixing this idea for now, but described in PBC Segmentation notes #PBC_y_chk = 0 #PBC_x_chk = 0 @@ -185,32 +203,32 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) elif(int(row['vdim']) < 2): - z_list = np.arange(0,5) + z_list = np.arange(0,seed_z) else: - z_list = np.arange(z_len-5,z_len) + z_list = np.arange(z_len-seed_z,z_len) if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) elif(int(row['hdim_1']) < 2): - y_list = np.arange(0,5) + y_list = np.arange(0,seed_y) #PBC_y_chk = 1 else: - y_list = np.arange(y_len-5,y_len) + y_list = np.arange(y_len-seed_y,y_len) #PBC_y_chk = 1 if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) elif(int(row['hdim_2']) < 2): - x_list = np.arange(0,5) + x_list = np.arange(0,seed_x) #PBC_x_chk = 1 else: - x_list = np.arange(x_len-5,x_len) + x_list = np.arange(x_len-seed_x,x_len) #PBC_x_chk = 1 #loop thru 5x5x5 z times y times x range - for k in range(0,5): - for j in range(0,5): - for i in range(0,5): + for k in range(0,seed_z): + for j in range(0,seed_y): + for i in range(0,seed_x): if ndim_vertical[0]==0: markers[z_list[k],y_list[j],x_list[i]]=row['feature'] @@ -255,26 +273,13 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu segmentation_mask[~unmasked] = -1 #saves/prints below for testing - #seg_m_data = segmentation_mask[:] - - #if (np.all(features_in.frame.values[:] == 0)): - # print("saving first field") - # first_seg_mask = out_f.create_dataset("seg_mask_1",data=seg_m_data) - - #print(seg_m_data) - + seg_m_data = segmentation_mask[:] + #read in labeling/masks and region-finding functions reg_props_dict = utils.get_label_props_in_dict(seg_m_data) - - del seg_m_data - gc.collect() - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) - print(np.unique(segmentation_mask[:])) - print(curr_reg_inds) - #z_unf,y_unf,x_unf = np.where(segmentation_mask==0) #print(np.where(segmentation_mask==-1)) @@ -293,32 +298,16 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #Return all indices where segmentation field == 0 #meaning unfilled but above threshold vdim_unf,hdim1_unf,hdim2_unf = np.where(segmentation_mask==0) - - - - #vdim_unf = z_reg_inds[0.] - #hdim1_unf = y_reg_inds[0.] - #hdim2_unf = x_reg_inds[0.] - - #for cur_idx in wall_labels: - #skip this if there aren't enough points to be considered a real feature - #as defined above by n_min_threshold - #curr_count = curr_reg_inds[cur_idx] - #print("Current wall feature: ",cur_idx) - + seg_mask_unseeded = np.zeros(segmentation_mask.shape) seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 #create labeled field of unfilled, unseeded features - labels_unseeded,label_num = label(seg_mask_unseeded) - - print(label_num) - + labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded) + markers_2 = np.zeros(unmasked.shape).astype(np.int32) - - print(segmentation_mask.shape) - + #new, shorter PBC marker seeding approach #loop thru LB points #then check if fillable region (labels_unseeded > 0) @@ -329,8 +318,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu for hdim1_ind in [hdim1_min,hdim1_max]: for hdim2_ind in range(hdim2_min,hdim2_max): - #print(z_ind,y_ind,x_ind) - #print(labels_unseeded[z_ind,y_ind,x_ind]) if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): continue @@ -340,15 +327,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu continue else: markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_max,hdim2_ind] - #print(z_ind,y_ind,x_ind) - #print("seeded") elif hdim1_ind == hdim1_max: if (segmentation_mask[vdim_ind,hdim1_min,hdim2_ind]<=0): continue else: markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_min,hdim2_ind] - #print(z_ind,y_ind,x_ind) - #print("seeded") if PBC_flag == 'hdim_2' or PBC_flag == 'both': for vdim_ind in range(0,segmentation_mask.shape[0]): @@ -375,11 +358,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_min] #print(z_ind,y_ind,x_ind) #print("seeded") - - - print("PBC cross-boundary markers planted") - print("Beginning PBC segmentation for secondary mask") - + markers_2[~unmasked]=0 if method=='watershed': @@ -395,40 +374,25 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu D=distance_transform_edt((markers==0).astype(int)) segmentation_mask_2[np.bitwise_and(segmentation_mask_2>0, D>max_distance_pixel)]=0 - print("Sum up original mask and secondary PBC-mask for full PBC segmentation") - + # Sum up original mask and secondary PBC-mask for full PBC segmentation #Write resulting mask into cube for output segmentation_out.data=segmentation_mask + segmentation_mask_2 segmentation_mask_3 = segmentation_out.data - - #if (np.all(features_in.frame.values[:] == 0)): - # print("saving second field") - # second_seg_mask = out_f.create_dataset("seg_mask_2",data=segmentation_mask_3) - - - print("Secondary seeding complete, now blending periodic boundaries") - + + # Secondary seeding complete, now blending periodic boundaries #keep segmentation mask fields for now so we can save these all later #for demos of changes - #print("Test of PBC segmentation boundary blending below") + # Test of PBC segmentation boundary blending below #update mask coord regions reg_props_dict = utils.get_label_props_in_dict(segmentation_out.data) - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) wall_labels = np.array([]) - #skip_list = np.array([]) - #bdry_buddies = np.array([]) - - #y_min = 0 - #y_max = test_mask2.shape[1] - 1 - #x_min = 0 - #x_max = test_mask2.shape[2] - 1 w_wall = np.unique(segmentation_mask_3[:,:,0]) wall_labels = np.append(wall_labels,w_wall) @@ -512,7 +476,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu else: inter_buddies,feat_inds,buddy_inds=np.intersect1d(features_in.feature.values[:],buddies,return_indices=True) - buddy_features = copy.deepcopy(features_in.iloc[feat_inds]) + buddy_features = deepcopy(features_in.iloc[feat_inds]) #display(buddy_features) #create arrays to contain points of all buddies @@ -572,8 +536,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_y2 = np.append(buddy_y2,y2) buddy_x2 = np.append(buddy_x2,x2) - #Buddy Box! - print("Buddy Box space:") + # Buddy Box! bbox_zstart = int(np.min(buddy_z2)) bbox_ystart = int(np.min(buddy_y2)) bbox_xstart = int(np.min(buddy_x2)) @@ -585,10 +548,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu bbox_ysize = bbox_yend - bbox_ystart bbox_xsize = bbox_xend - bbox_xstart - print('vdim: ',bbox_zstart,bbox_zend) - print('hdim_1: ',bbox_ystart,bbox_yend) - print('hdim_2: ',bbox_xstart,bbox_xend) - print(bbox_zsize,bbox_ysize,bbox_xsize) #Buddy Box for smooth watershedding of features at PBC boundaries @@ -865,13 +824,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #print("updated") segmentation_out.data = segmentation_mask_3 - - if (np.all(features_in.frame.values[:] == 0)): - print("saving final field") - final_seg_mask = out_f.create_dataset("seg_mask_3",data=segmentation_mask_3) - out_f.close() - else: #Write resulting mask into cube for output segmentation_out.data = segmentation_mask From 57c9fb8976fe41d11765299d902caa4dc8e0018f Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 18 Mar 2022 11:18:53 -0600 Subject: [PATCH 22/82] continuing to clean up code, started process of adding tests --- tobac/segmentation.py | 1 - tobac/tests/test_segmentation.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tobac/tests/test_segmentation.py diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 50c87337..5ea5a668 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -269,7 +269,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #mask all segmentation_mask points below th=reshold as -1 #to differentiate from those unmasked points NOT filled by watershedding - print(np.unique(segmentation_mask)) segmentation_mask[~unmasked] = -1 #saves/prints below for testing diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py new file mode 100644 index 00000000..cca84169 --- /dev/null +++ b/tobac/tests/test_segmentation.py @@ -0,0 +1,3 @@ +import tobac.testing +import tobac.segmentation as seg + From 9000ae15628930eb32dd89c8d661ef3002864e53 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 18 Mar 2022 16:47:28 -0600 Subject: [PATCH 23/82] More cleanup of segmentation, adding more tests to segmentation and feature detection --- tobac/segmentation.py | 18 +++++------ tobac/testing.py | 16 +++++----- Dockerfile => tobac/tests/Dockerfile | 0 tobac/tests/test_feature_detection.py | 39 +++++++++++++++++++++++- tobac/tests/test_segmentation.py | 43 ++++++++++++++++++++++++++- 5 files changed, 97 insertions(+), 19 deletions(-) rename Dockerfile => tobac/tests/Dockerfile (100%) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 5ea5a668..d0991143 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,5 +1,5 @@ import logging -import utils +from . import utils as tb_utils def transfm_pbc_point(in_dim, dim_min, dim_max): '''Function to transform a PBC-feature point for contiguity @@ -275,9 +275,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu seg_m_data = segmentation_mask[:] #read in labeling/masks and region-finding functions - reg_props_dict = utils.get_label_props_in_dict(seg_m_data) + reg_props_dict = tb_utils.get_label_props_in_dict(seg_m_data) - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) #z_unf,y_unf,x_unf = np.where(segmentation_mask==0) @@ -387,9 +387,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #update mask coord regions - reg_props_dict = utils.get_label_props_in_dict(segmentation_out.data) + reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_out.data) - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) wall_labels = np.array([]) @@ -432,8 +432,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu if PBC_flag == 'both' and (np.any(label_y == [hdim1_min,hdim1_max]) and np.any(label_x == [hdim2_min,hdim2_max])): #adjust x and y points to the other side - y_val_alt = utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) - x_val_alt = utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) + y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) + x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) label_on_corner = segmentation_mask_3[label_z,y_val_alt,x_val_alt] @@ -444,7 +444,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # on the hdim1 boundary and periodic on hdim1 if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [hdim1_min,hdim1_max]): - y_val_alt = utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) + y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) #get the label value on the opposite side label_alt = segmentation_mask_3[label_z,y_val_alt,label_x] @@ -455,7 +455,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddies = np.append(buddies,label_alt) if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [hdim2_min,hdim2_max]): - x_val_alt = utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) + x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) #get the seg value on the opposite side label_alt = segmentation_mask_3[label_z,label_y,x_val_alt] diff --git a/tobac/testing.py b/tobac/testing.py index 5a757ca6..f9c6dda3 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -7,7 +7,7 @@ def make_simple_sample_data_2D(data_type='iris'): """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 500 in y direction. - Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). + Time resolution is 1 minute and the total length of the dataset is 100 minutes around a arbitrary date (2000-01-01 12:00). The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range. The data contains a single blob travelling on a linear trajectory through the dataset for part of the time. Parameters @@ -445,8 +445,8 @@ def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, start_loc = 1 v_min = 0 v_max = in_arr.shape[start_loc] - start_v = round(max(v_min, v_loc - v_size/2)) - end_v = round(min(v_max-1, v_loc + v_size/2)) + start_v = int(np.ceil(max(v_min, v_loc - v_size / 2))) + end_v = int(np.ceil(min(v_max - 1, v_loc + v_size / 2))) if v_size > v_max - v_min: raise ValueError("v_size larger than domain size") @@ -463,12 +463,12 @@ def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, raise ValueError("Horizontal size larger than domain size") # let's get start/end x/y/z - start_h1 = round(h1_loc - h1_size/2) - end_h1 = round(h1_loc + h1_size/2) - - start_h2 = round(h2_loc - h2_size/2) - end_h2 = round(h2_loc + h2_size/2) + start_h1 = int(np.ceil(h1_loc - h1_size / 2)) + end_h1 = int(np.ceil(h1_loc + h1_size / 2)) + start_h2 = int(np.ceil(h2_loc - h2_size / 2)) + end_h2 = int(np.ceil(h2_loc + h2_size / 2)) + # get the coordinate sets coords_to_fill = get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, start_h1, end_h1, start_h2, end_h2, PBC_flag=PBC_flag) diff --git a/Dockerfile b/tobac/tests/Dockerfile similarity index 100% rename from Dockerfile rename to tobac/tests/Dockerfile diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index ef6e4a15..c723916f 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -1,5 +1,6 @@ import tobac.testing import tobac.feature_detection as feat_detect +import pytest def test_get_label_props_in_dict(): '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. @@ -80,4 +81,40 @@ def test_feature_detection_multithreshold_timestep(): ''' Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep ''' - pass \ No newline at end of file + import numpy as np + from tobac import testing + from tobac import feature_detection + + # start by building a simple dataset with a single feature and seeing + # if we identify it + + test_dset_size = (50, 50) + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 20.0 + test_hdim_1_sz = 5 + test_hdim_2_sz = 5 + test_amp = 2 + test_threshs = [ + 1.5, + ] + test_min_num = 2 + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + fd_output = feature_detection.feature_detection_multithreshold_timestep( + test_data_iris, 0, threshold=test_threshs, min_num=test_min_num + ) + + # Make sure we have only one feature + assert len(fd_output.index) == 1 + # Make sure that the location of the feature is correct + assert fd_output.iloc[0]["hdim_1"] == pytest.approx(test_hdim_1_pt) + assert fd_output.iloc[0]["hdim_2"] == pytest.approx(test_hdim_2_pt) \ No newline at end of file diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index cca84169..f4d89950 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -1,3 +1,44 @@ -import tobac.testing +import tobac.testing as testing import tobac.segmentation as seg +def test_segmentation_timestep_2D_feature_2D_seg(): + ''' Tests `tobac.segmentation.segmentation_timestep` with a 2D + input feature and a 2D segmentation array + ''' + # Before we can run segmentation, we must run feature detection. + + # start by building a simple dataset with a single feature + import numpy as np + + test_dset_size = (50, 50) + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 20.0 + test_hdim_1_sz = 5 + test_hdim_2_sz = 5 + hdim_1_start_feat = int(np.ceil(test_hdim_1_pt - test_dset_size[0] / 2)) + hdim_1_end_feat = int(np.ceil(test_hdim_1_pt + test_dset_size[0] / 2)) + + hdim_2_start_feat = int(np.ceil(test_hdim_2_pt - test_dset_size[1] / 2)) + hdim_2_end_feat = int(np.ceil(test_hdim_2_pt + test_dset_size[1] / 2)) + + test_amp = 2 + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = 20.0, start_h2 = 20.0) + + out_seg_mask, out_df = seg.segmentation_timestep(test_data_iris, test_feature_ds, + threshold = 1.5, PBC_flag='none', ) + + # Make sure that all labeled points are segmented + assert np.all(out_seg_mask[hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat] == np.ones((test_hdim_1_sz, test_hdim_2_sz))) \ No newline at end of file From a83ef7523554f106e4a1bb8f438b4262e4a4606d Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sat, 19 Mar 2022 23:54:17 -0600 Subject: [PATCH 24/82] Cleaning up of segmentation code, updating tests, cleaning up other code --- tobac/feature_detection.py | 67 +----------- tobac/segmentation.py | 149 ++++++++++---------------- tobac/testing.py | 50 +++++++-- tobac/tests/test_feature_detection.py | 74 ------------- tobac/tests/test_segmentation.py | 131 ++++++++++++++++++++-- tobac/tests/test_testing.py | 52 ++++----- tobac/tests/test_tracking.py | 10 +- tobac/tests/test_util.py | 76 +++++++++++++ tobac/utils.py | 34 ++++-- 9 files changed, 352 insertions(+), 291 deletions(-) create mode 100644 tobac/tests/test_util.py diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index b7c7d393..e2fa54a5 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd import logging +from . import utils as tb_utils def get_label_props_in_dict(labels): '''Function to get the label properties into a dictionary format. @@ -26,68 +27,6 @@ def get_label_props_in_dict(labels): return region_properties_dict - -def get_indices_of_labels_from_reg_prop_dict(region_property_dict): - '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. - - Parameters - ---------- - region_property_dict: dict of region_property objects - This dict should come from the get_label_props_in_dict function. - - Returns - ------- - dict (key: label number, int) - The number of points in the label number - dict (key: label number, int) - The z indices in the label number. If a 2D property dict is passed, this value is not returned - dict (key: label number, int) - the y indices in the label number - dict (key: label number, int) - the x indices in the label number - - Raises - ------ - ValueError - a ValueError is raised if there are no regions in the region property dict - - ''' - - import skimage.measure - - if len(region_property_dict) ==0: - raise ValueError("No regions!") - - - z_indices = dict() - y_indices = dict() - x_indices = dict() - curr_loc_indices = dict() - is_3D = False - - #loop through all skimage identified regions - for region_prop_key in region_property_dict: - region_prop = region_property_dict[region_prop_key] - index = region_prop.label - if len(region_prop.coords[0])>=3: - is_3D = True - curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) - z_indices[index] = curr_z_ixs - else: - curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) - z_indices[index] = -1 - - y_indices[index] = curr_y_ixs - x_indices[index] = curr_x_ixs - curr_loc_indices[index] = len(curr_y_ixs) - - #print("indices found") - if is_3D: - return [curr_loc_indices, z_indices, y_indices, x_indices] - else: - return [curr_loc_indices, y_indices, x_indices] - - def adjust_pbc_point(in_dim, dim_min, dim_max): '''Function to adjust a point to the other boundary for PBCs @@ -496,7 +435,7 @@ def feature_detection_threshold(data_i,i_time, if num_labels > 0: all_label_props = get_label_props_in_dict(labels) [all_labels_max_size, all_label_locs_v, all_label_locs_h1, all_label_locs_h2 - ] = get_indices_of_labels_from_reg_prop_dict(all_label_props) + ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(all_label_props) #find the points along the boundaries @@ -640,7 +579,7 @@ def feature_detection_threshold(data_i,i_time, # we need to get label properties again after we handle PBCs. label_props = get_label_props_in_dict(labels) if len(label_props)>0: - [total_indices_all, vdim_indyces_all, hdim1_indices_all, hdim2_indices_all] = get_indices_of_labels_from_reg_prop_dict(label_props) + [total_indices_all, vdim_indyces_all, hdim1_indices_all, hdim2_indices_all] = tb_utils.get_indices_of_labels_from_reg_prop_dict(label_props) #values, count = np.unique(labels[:,:].ravel(), return_counts=True) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index d0991143..212b71c6 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -72,22 +72,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu """ from skimage.segmentation import watershed import skimage.measure - # from skimage.segmentation import random_walker from scipy.ndimage import distance_transform_edt from copy import deepcopy import numpy as np import iris - #saving intermediary fields for testing - #original mask, secondary seeding, final version - #so we can ascertain deltas of each - #inter_fp = '/sumatra/asokolowsky/tobac_data/segmentation/testing/' - - #if (np.all(features_in.frame.values[:] == 0)): - # print("creating output file") - # out_f = h5py.File('/sumatra/asokolowsky/tobac_data/segmentation/testing/seg_fields_progression.h5','w') - # print(out_f) - # copy feature dataframe for output features_out=deepcopy(features_in) # Create cube of the same dimensions and coordinates as input data to store mask: @@ -97,6 +86,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Get raw array from input data: data=field_in.core_data() + is_3D_seg = len(data.shape)==3 #Set level at which to create "Seed" for each feature in the case of 3D watershedding: # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) @@ -120,61 +110,57 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # set markers at the positions of the features: markers = np.zeros(unmasked.shape).astype(np.int32) if field_in.ndim==2: #2D watershedding + hdim_1_axis = 0 + hdim_2_axis = 1 for index, row in features_in.iterrows(): markers[int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] elif field_in.ndim==3: #3D watershedding + # Find which coordinate is the z coordinate + list_coord_names=[coord.name() for coord in field_in.coords()] + #determine vertical axis: + if vertical_coord=='auto': + list_vertical=['z','model_level_number','altitude','geopotential_height'] + # TODO: there surely must be a better way to handle this + for coord_name in list_vertical: + if coord_name in list_coord_names: + vertical_axis=coord_name + break + elif vertical_coord in list_coord_names: + vertical_axis=vertical_coord + else: + raise ValueError('Plese specify vertical coordinate') + ndim_vertical=field_in.coord_dims(vertical_axis) + if len(ndim_vertical)>1: + raise ValueError('please specify 1 dimensional vertical coordinate') + vertical_coord_axis = ndim_vertical[0] + # Once we know the vertical coordinate, we can resolve the + # horizontal coordinates + if vertical_coord_axis == 0: + hdim_1_axis = 1 + hdim_2_axis = 2 + elif vertical_coord_axis == 1: + hdim_1_axis = 0 + hdim_2_axis = 2 + elif vertical_coord_axis == 2: + hdim_1_axis = 0 + hdim_2_axis = 1 + + # We need to generate seeds in 3D. if (seed_3D_flag == 'column'): - list_coord_names=[coord.name() for coord in field_in.coords()] - #determine vertical axis: - if vertical_coord=='auto': - list_vertical=['z','model_level_number','altitude','geopotential_height'] - for coord_name in list_vertical: - if coord_name in list_coord_names: - vertical_axis=coord_name - break - elif vertical_coord in list_coord_names: - vertical_axis=vertical_coord - else: - raise ValueError('Plese specify vertical coordinate') - ndim_vertical=field_in.coord_dims(vertical_axis) - if len(ndim_vertical)>1: - raise ValueError('please specify 1 dimensional vertical coordinate') for index, row in features_in.iterrows(): - if ndim_vertical[0]==0: - markers[:,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - elif ndim_vertical[0]==1: - markers[int(row['hdim_1']),:, int(row['hdim_2'])]=row['feature'] - elif ndim_vertical[0]==2: - markers[int(row['hdim_1']), int(row['hdim_2']),:]=row['feature'] + if vertical_coord_axis==0: + markers[level,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] + elif vertical_coord_axis==1: + markers[int(row['hdim_1']),level, int(row['hdim_2'])]=row['feature'] + elif vertical_coord_axis==2: + markers[int(row['hdim_1']), int(row['hdim_2']),level]=row['feature'] elif (seed_3D_flag == 'box'): - list_coord_names=[coord.name() for coord in field_in.coords()] - #determine vertical axis: - #print(list_coord_names) - if vertical_coord=='auto': - list_vertical=['vdim','z','model_level_number','altitude','geopotential_height'] - for coord_name in list_vertical: - if coord_name in list_coord_names: - vertical_axis=coord_name - #print(vertical_axis) - break - elif vertical_coord in list_coord_names: - vertical_axis=vertical_coord - else: - raise ValueError('Please specify vertical coordinate') - ndim_vertical=field_in.coord_dims(vertical_axis) - #print(ndim_vertical,ndim_vertical[0]) - - if len(ndim_vertical)>1: - raise ValueError('please specify 1 dimensional vertical coordinate') z_len = len(field_in.coord('z').points) y_len = len(field_in.coord('y').points) x_len = len(field_in.coord('x').points) - #print(z_len,y_len,x_len) - #display(features_in) - # Get the size of the seed box from the input parameter try: seed_z = seed_3D_size[0] @@ -186,20 +172,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu seed_y = seed_3D_size seed_x = seed_3D_size - + # Can we use our testing function to generate 3D boxes (with PBC awareness) + # for a faster version of this? for index, row in features_in.iterrows(): - #creation of point ranges for 3D marker seeding - #and PBC flags for cross-boundary seeding - nixing this idea for now, but described in PBC Segmentation notes - #PBC_y_chk = 0 - #PBC_x_chk = 0 - - #print("feature: ",row['feature']) - #print("z-ctr: ",row['vdim']) - #print("y-ctr: ",row['hdim_1']) - #print("x-ctr: ",row['hdim_2']) - - - + #creation of point ranges for 3D marker seeding + # TODO: fix this so that it's not all 0-5 if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) elif(int(row['vdim']) < 2): @@ -225,7 +202,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu x_list = np.arange(x_len-seed_x,x_len) #PBC_x_chk = 1 - #loop thru 5x5x5 z times y times x range + #loop thru the box points for k in range(0,seed_z): for j in range(0,seed_y): for i in range(0,seed_x): @@ -236,18 +213,14 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers[y_list[j],z_list[k],x_list[i]]=row['feature'] elif ndim_vertical[0]==2: markers[y_list[j],x_list[i],z_list[k]]=row['feature'] - - - #else: - #error for unspec method - + else: raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') # set markers in cells not fulfilling threshold condition to zero: markers[~unmasked]=0 - marker_vals = np.unique(markers) + #marker_vals = np.unique(markers) # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm data_segmentation=np.array(data_segmentation) @@ -256,9 +229,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # perform segmentation: if method=='watershed': segmentation_mask = watershed(np.array(data_segmentation),markers.astype(np.int32), mask=unmasked) -# elif method=='random_walker': -# segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), -# beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) + else: raise ValueError('unknown method, must be watershed') @@ -267,7 +238,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu D=distance_transform_edt((markers==0).astype(int)) segmentation_mask[np.bitwise_and(segmentation_mask>0, D>max_distance_pixel)]=0 - #mask all segmentation_mask points below th=reshold as -1 + #mask all segmentation_mask points below threshold as -1 #to differentiate from those unmasked points NOT filled by watershedding segmentation_mask[~unmasked] = -1 @@ -276,26 +247,19 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #read in labeling/masks and region-finding functions reg_props_dict = tb_utils.get_label_props_in_dict(seg_m_data) - - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) - - #z_unf,y_unf,x_unf = np.where(segmentation_mask==0) - - #print(np.where(segmentation_mask==-1)) - #print(np.where(segmentation_mask==0)) hdim1_min = 0 - hdim1_max = segmentation_mask.shape[1] - 1 + hdim1_max = segmentation_mask.shape[hdim_1_axis] - 1 hdim2_min = 0 - hdim2_max = segmentation_mask.shape[2] - 1 + hdim2_max = segmentation_mask.shape[hdim_2_axis] - 1 # all options that involve dealing with periodic boundaries pbc_options = ['hdim_1', 'hdim_2', 'both'] - + # Only run this if we need to deal with PBCs if PBC_flag in pbc_options: - #Return all indices where segmentation field == 0 - #meaning unfilled but above threshold + # Return all indices where segmentation field == 0 + # meaning unfilled but above threshold vdim_unf,hdim1_unf,hdim2_unf = np.where(segmentation_mask==0) seg_mask_unseeded = np.zeros(segmentation_mask.shape) @@ -362,9 +326,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu if method=='watershed': segmentation_mask_2 = watershed(np.array(data_segmentation),markers_2.astype(np.int32), mask=unmasked) - # elif method=='random_walker': - # segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), - # beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) else: raise ValueError('unknown method, must be watershed') @@ -382,6 +343,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Secondary seeding complete, now blending periodic boundaries #keep segmentation mask fields for now so we can save these all later #for demos of changes + + # Does any of this need to be run if there aren't PBCs? # Test of PBC segmentation boundary blending below diff --git a/tobac/testing.py b/tobac/testing.py index f9c6dda3..e15255ad 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -359,8 +359,15 @@ def make_sample_data_3D_3blobs(data_type='iris',invert_xy=False): return sample_data -def make_dataset_from_arr(in_arr, data_type = 'xarray'): - '''Makes a dataset (xarray or iris) for feature detection/segmentation from +def make_dataset_from_arr( + in_arr, + data_type="xarray", + time_dim_num=None, + z_dim_num=None, + y_dim_num=0, + x_dim_num=1, +): + """Makes a dataset (xarray or iris) for feature detection/segmentation from a raw numpy/dask/etc. array. Parameters @@ -369,20 +376,41 @@ def make_dataset_from_arr(in_arr, data_type = 'xarray'): The input array to convert to iris/xarray data_type: str('xarray' or 'iris') Type of the dataset to return - + time_dim_num: int or None + What axis is the time dimension on, None for a single timestep + z_dim_num: int or None + What axis is the z dimension on, None for a 2D array + y_dim_num: int + What axis is the y dimension on, typically 0 for a 2D array + x_dim_num: int + What axis is the x dimension on, typically 1 for a 2D array + Returns ------- Iris or xarray dataset with everything we need for feature detection/tracking. - ''' + """ import xarray as xr - + import iris + + if time_dim_num is not None: + raise NotImplementedError("Time dimension not yet implemented in this function") + + is_3D = z_dim_num is not None output_arr = xr.DataArray(in_arr) + if is_3D: + z_max = in_arr.shape[z_dim_num] - if data_type == 'xarray': + if data_type == "xarray": return output_arr - elif data_type == 'iris': - return output_arr.to_iris() + elif data_type == "iris": + out_arr_iris = output_arr.to_iris() + if is_3D: + out_arr_iris.add_dim_coord( + iris.coords.DimCoord(np.arange(0, z_max), standard_name="altitude"), + z_dim_num, + ) + return out_arr_iris else: raise ValueError("data_type must be 'xarray' or 'iris'") @@ -738,7 +766,7 @@ def generate_single_feature(start_h1, start_h2, start_v = None, min_h1 = 0, max_h1 = 1000, min_h2 = 0, max_h2 = 1000, num_frames = 1, dt = datetime.timedelta(minutes=5), start_date = datetime.datetime(2022,1,1,0), - PBC_flag = 'none', frame_start = 1): + PBC_flag = 'none', frame_start = 1, feature_num=1,): '''Function to generate a dummy feature dataframe to test the tracking functionality Parameters @@ -780,6 +808,8 @@ def generate_single_feature(start_h1, start_h2, start_v = None, 'both' means that we are periodic along both horizontal dimensions frame_start: int Number to start the frame at + feature_num: int + What number to start the feature at ''' out_list_of_dicts = list() @@ -799,7 +829,7 @@ def generate_single_feature(start_h1, start_h2, start_v = None, curr_dict['vdim'] = curr_v curr_v += spd_v curr_dict['time'] = curr_dt - + curr_dict["feature"] = feature_num + i curr_h1 += spd_h1 curr_h2 += spd_h2 diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index c723916f..c6d00a0f 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -2,80 +2,6 @@ import tobac.feature_detection as feat_detect import pytest -def test_get_label_props_in_dict(): - '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. - ''' - import skimage.measure as skim - test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') - test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') - - - # make sure it works for 3D data - labels_3D = skim.label(test_3D_data.values[0]) - - output_3D = feat_detect.get_label_props_in_dict(labels_3D) - - #make sure it is a dict - assert type(output_3D) is dict - #make sure we get at least one output, there should be at least one label. - assert len(output_3D) > 0 - - # make sure it works for 2D data - labels_2D = skim.label(test_2D_data.values[0]) - - output_2D = feat_detect.get_label_props_in_dict(labels_2D) - - #make sure it is a dict - assert type(output_2D) is dict - #make sure we get at least one output, there should be at least one label. - assert len(output_2D) > 0 - - -def test_get_indices_of_labels_from_reg_prop_dict(): - '''Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases. - ''' - import skimage.measure as skim - import numpy as np - test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') - test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') - - - # make sure it works for 3D data - labels_3D = skim.label(test_3D_data.values[0]) - nx_3D = test_3D_data.values[0].shape[2] - ny_3D = test_3D_data.values[0].shape[1] - nz_3D = test_3D_data.values[0].shape[0] - - labels_2D = skim.label(test_2D_data.values[0]) - nx_2D = test_2D_data.values[0].shape[1] - ny_2D = test_2D_data.values[0].shape[0] - - region_props_3D = feat_detect.get_label_props_in_dict(labels_3D) - region_props_2D = feat_detect.get_label_props_in_dict(labels_2D) - - #get_indices_of_labels_from_reg_prop_dict - - [curr_loc_indices, z_indices, y_indices, x_indices] = feat_detect.get_indices_of_labels_from_reg_prop_dict(region_props_3D) - - for index_key in curr_loc_indices: - # there should be at least one value in each. - assert curr_loc_indices[index_key] > 0 - - assert np.all(z_indices[index_key] >= 0) and np.all(z_indices[index_key] < nz_3D) - assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_3D) - assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_3D) - - [curr_loc_indices, y_indices, x_indices] = feat_detect.get_indices_of_labels_from_reg_prop_dict(region_props_2D) - - for index_key in curr_loc_indices: - # there should be at least one value in each. - assert curr_loc_indices[index_key] > 0 - - assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_2D) - assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_2D) - - - def test_feature_detection_multithreshold_timestep(): ''' diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index f4d89950..c6ee5cd7 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -15,11 +15,11 @@ def test_segmentation_timestep_2D_feature_2D_seg(): test_hdim_2_pt = 20.0 test_hdim_1_sz = 5 test_hdim_2_sz = 5 - hdim_1_start_feat = int(np.ceil(test_hdim_1_pt - test_dset_size[0] / 2)) - hdim_1_end_feat = int(np.ceil(test_hdim_1_pt + test_dset_size[0] / 2)) - - hdim_2_start_feat = int(np.ceil(test_hdim_2_pt - test_dset_size[1] / 2)) - hdim_2_end_feat = int(np.ceil(test_hdim_2_pt + test_dset_size[1] / 2)) + test_dxy = 1000 + hdim_1_start_feat = int(np.ceil(test_hdim_1_pt - test_hdim_1_sz / 2)) + hdim_1_end_feat = int(np.ceil(test_hdim_1_pt + test_hdim_1_sz / 2)) + hdim_2_start_feat = int(np.ceil(test_hdim_2_pt - test_hdim_2_sz / 2)) + hdim_2_end_feat = int(np.ceil(test_hdim_2_pt + test_hdim_2_sz / 2)) test_amp = 2 @@ -36,9 +36,124 @@ def test_segmentation_timestep_2D_feature_2D_seg(): # Generate dummy feature dataset test_feature_ds = testing.generate_single_feature(start_h1 = 20.0, start_h2 = 20.0) - out_seg_mask, out_df = seg.segmentation_timestep(test_data_iris, test_feature_ds, + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, threshold = 1.5, PBC_flag='none', ) # Make sure that all labeled points are segmented - assert np.all(out_seg_mask[hdim_1_start_feat:hdim_1_end_feat, - hdim_2_start_feat:hdim_2_end_feat] == np.ones((test_hdim_1_sz, test_hdim_2_sz))) \ No newline at end of file + assert np.all(out_seg_mask.core_data()[hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat] == np.ones((test_hdim_1_sz, test_hdim_2_sz))) + + +def test_segmentation_timestep_level(): + """Tests `tobac.segmentation.segmentation_timestep` with a 2D + input feature and a 3D segmentation array, specifying the `level` parameter. + """ + # Before we can run segmentation, we must run feature detection. + + # start by building a simple dataset with a single feature + import numpy as np + + test_dset_size = (20, 50, 50) + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 20.0 + test_vdim_pt = 2 + test_hdim_1_sz = 5 + test_hdim_2_sz = 5 + test_vdim_sz = 3 + test_dxy = 1000 + + vdim_start_feat = int(np.ceil(test_vdim_pt - test_vdim_sz / 2)) + vdim_end_feat = int(np.ceil(test_vdim_pt + test_vdim_sz / 2)) + hdim_1_start_feat = int(np.ceil(test_hdim_1_pt - test_hdim_1_sz / 2)) + hdim_1_end_feat = int(np.ceil(test_hdim_1_pt + test_hdim_1_sz / 2)) + hdim_2_start_feat = int(np.ceil(test_hdim_2_pt - test_hdim_2_sz / 2)) + hdim_2_end_feat = int(np.ceil(test_hdim_2_pt + test_hdim_2_sz / 2)) + + test_amp = 2 + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + test_vdim_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + v_size=test_vdim_sz, + amplitude=test_amp, + ) + + # Make a second feature, above the first. + + delta_height = 8 + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + test_vdim_pt + delta_height, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + v_size=test_vdim_sz, + amplitude=test_amp, + ) + + test_data_iris = testing.make_dataset_from_arr( + test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1=20.0, start_h2=20.0) + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + seed_3D_flag= 'column' + ) + out_seg_mask_arr = out_seg_mask.core_data() + # Make sure that all labeled points are segmented, before setting specific levels + assert np.all( + out_seg_mask_arr[ + vdim_start_feat:vdim_end_feat, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.ones((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) + assert np.all( + out_seg_mask_arr[ + vdim_start_feat + delta_height : vdim_end_feat + delta_height, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.ones((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) + + # now set specific levels + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + level=slice(vdim_start_feat, vdim_end_feat), + threshold=1.5, + seed_3D_flag = 'column' + ) + out_seg_mask_arr = out_seg_mask.core_data() + # Make sure that all labeled points are segmented, before setting specific levels + assert np.all( + out_seg_mask_arr[ + vdim_start_feat:vdim_end_feat, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.ones((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) + assert np.all( + out_seg_mask_arr[ + vdim_start_feat + delta_height : vdim_end_feat + delta_height, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.zeros((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py index 5f42df88..28e63371 100644 --- a/tobac/tests/test_testing.py +++ b/tobac/tests/test_testing.py @@ -243,43 +243,43 @@ def test_generate_single_feature(): # Testing a simple 3D case expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)} + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 2D case expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)} + {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} ]) assert_frame_equal(generate_single_feature(1, 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 2D case with movement expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, num_frames=4, spd_h1 = 1, spd_h2 = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 3D case with movement expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, num_frames=4, spd_h1 = 1, spd_h2 = 1, spd_v = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 3D case with movement that passes the hdim_1 boundary expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1, 0, 0)}, - {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1, 0, 5)}, - {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1, 0, 10)}, - {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1, 0, 15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1, 0, 0)}, + {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1, 0, 5)}, + {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1, 0, 10)}, + {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1, 0, 15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, @@ -288,10 +288,10 @@ def test_generate_single_feature(): # Testing a simple 3D case with movement that passes the hdim_1 boundary expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, @@ -300,10 +300,10 @@ def test_generate_single_feature(): # Testing a simple 3D case with movement that passes the hdim_2 boundary expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, @@ -312,10 +312,10 @@ def test_generate_single_feature(): # Testing a simple 3D case with movement that passes the hdim_1 and hdim_2 boundaries expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 6, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 1, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 6, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 6, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 1, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 6, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index 6c6a27d7..182071b7 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -32,7 +32,7 @@ def test_linking_trackpy(): PBC_flag = 'none' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -51,7 +51,7 @@ def test_linking_trackpy(): PBC_flag = 'none' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -71,7 +71,7 @@ def test_linking_trackpy(): PBC_flag = 'hdim_1' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -90,7 +90,7 @@ def test_linking_trackpy(): PBC_flag = 'hdim_2' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -109,7 +109,7 @@ def test_linking_trackpy(): PBC_flag = 'both' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py new file mode 100644 index 00000000..168bd705 --- /dev/null +++ b/tobac/tests/test_util.py @@ -0,0 +1,76 @@ +import tobac.testing +import tobac.utils as tb_utils + +def test_get_label_props_in_dict(): + '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. + ''' + import skimage.measure as skim + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + + + # make sure it works for 3D data + labels_3D = skim.label(test_3D_data.values[0]) + + output_3D = tb_utils.get_label_props_in_dict(labels_3D) + + #make sure it is a dict + assert type(output_3D) is dict + #make sure we get at least one output, there should be at least one label. + assert len(output_3D) > 0 + + # make sure it works for 2D data + labels_2D = skim.label(test_2D_data.values[0]) + + output_2D = tb_utils.get_label_props_in_dict(labels_2D) + + #make sure it is a dict + assert type(output_2D) is dict + #make sure we get at least one output, there should be at least one label. + assert len(output_2D) > 0 + + +def test_get_indices_of_labels_from_reg_prop_dict(): + '''Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases. + ''' + import skimage.measure as skim + import numpy as np + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + + + # make sure it works for 3D data + labels_3D = skim.label(test_3D_data.values[0]) + nx_3D = test_3D_data.values[0].shape[2] + ny_3D = test_3D_data.values[0].shape[1] + nz_3D = test_3D_data.values[0].shape[0] + + labels_2D = skim.label(test_2D_data.values[0]) + nx_2D = test_2D_data.values[0].shape[1] + ny_2D = test_2D_data.values[0].shape[0] + + region_props_3D = tb_utils.get_label_props_in_dict(labels_3D) + region_props_2D = tb_utils.get_label_props_in_dict(labels_2D) + + #get_indices_of_labels_from_reg_prop_dict + + [curr_loc_indices, z_indices, y_indices, x_indices] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_3D) + + for index_key in curr_loc_indices: + # there should be at least one value in each. + assert curr_loc_indices[index_key] > 0 + + assert np.all(z_indices[index_key] >= 0) and np.all(z_indices[index_key] < nz_3D) + assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_3D) + assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_3D) + + [curr_loc_indices, y_indices, x_indices] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_2D) + + for index_key in curr_loc_indices: + # there should be at least one value in each. + assert curr_loc_indices[index_key] > 0 + + assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_2D) + assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_2D) + + diff --git a/tobac/utils.py b/tobac/utils.py index 0156b122..e033cfeb 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -663,52 +663,64 @@ def get_label_props_in_dict(labels): def get_indices_of_labels_from_reg_prop_dict(region_property_dict): '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. - This function should produce similar output as new_get_indices_of_labels, but - allows for re-use of the region_property_dict. - + Parameters ---------- region_property_dict: dict of region_property objects This dict should come from the get_label_props_in_dict function. - + Returns ------- dict (key: label number, int) The number of points in the label number dict (key: label number, int) - The z indices in the label number + The z indices in the label number. If a 2D property dict is passed, this value is not returned dict (key: label number, int) the y indices in the label number dict (key: label number, int) the x indices in the label number + Raises ------ ValueError - a ValueError is raised if + a ValueError is raised if there are no regions in the region property dict + ''' import skimage.measure + import numpy as np if len(region_property_dict) ==0: raise ValueError("No regions!") - + + z_indices = dict() y_indices = dict() x_indices = dict() curr_loc_indices = dict() + is_3D = False #loop through all skimage identified regions for region_prop_key in region_property_dict: region_prop = region_property_dict[region_prop_key] index = region_prop.label - curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) - z_indices[index] = curr_z_ixs + if len(region_prop.coords[0])>=3: + is_3D = True + curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = curr_z_ixs + else: + curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = -1 + y_indices[index] = curr_y_ixs x_indices[index] = curr_x_ixs - curr_loc_indices[index] = len(curr_x_ixs) + curr_loc_indices[index] = len(curr_y_ixs) #print("indices found") - return [curr_loc_indices, z_indices, y_indices, x_indices] + if is_3D: + return [curr_loc_indices, z_indices, y_indices, x_indices] + else: + return [curr_loc_indices, y_indices, x_indices] def adjust_pbc_point(in_dim, dim_min, dim_max): '''Function to adjust a point to the other boundary for PBCs From b9309c91f2cb2be1a0b9aba5f250e23f94aaeb1d Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 20 Mar 2022 10:49:22 -0600 Subject: [PATCH 25/82] More cleaning up of segmentation --- tobac/segmentation.py | 99 +++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 56 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 212b71c6..3b7df518 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -76,6 +76,46 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu from copy import deepcopy import numpy as np import iris + + # How many dimensions are we using? + if field_in.ndim==2: + hdim_1_axis = 0 + hdim_2_axis = 1 + is_3D_seg = False + elif field_in.ndim == 3: + is_3D_seg = True + # Find which coordinate is the z coordinate + list_coord_names=[coord.name() for coord in field_in.coords()] + #determine vertical axis: + if vertical_coord=='auto': + list_vertical=['z','model_level_number','altitude','geopotential_height'] + # TODO: there surely must be a better way to handle this + for coord_name in list_vertical: + if coord_name in list_coord_names: + vertical_axis=coord_name + break + elif vertical_coord in list_coord_names: + vertical_axis=vertical_coord + else: + raise ValueError('Plese specify vertical coordinate') + ndim_vertical=field_in.coord_dims(vertical_axis) + if len(ndim_vertical)>1: + raise ValueError('please specify 1 dimensional vertical coordinate') + vertical_coord_axis = ndim_vertical[0] + # Once we know the vertical coordinate, we can resolve the + # horizontal coordinates + if vertical_coord_axis == 0: + hdim_1_axis = 1 + hdim_2_axis = 2 + elif vertical_coord_axis == 1: + hdim_1_axis = 0 + hdim_2_axis = 2 + elif vertical_coord_axis == 2: + hdim_1_axis = 0 + hdim_2_axis = 1 + else: + raise ValueError('Segmentation routine only possible with 2 or 3 spatial dimensions') + # copy feature dataframe for output features_out=deepcopy(features_in) @@ -109,42 +149,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # set markers at the positions of the features: markers = np.zeros(unmasked.shape).astype(np.int32) - if field_in.ndim==2: #2D watershedding - hdim_1_axis = 0 - hdim_2_axis = 1 + if not is_3D_seg: #2D watershedding for index, row in features_in.iterrows(): markers[int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - elif field_in.ndim==3: #3D watershedding - # Find which coordinate is the z coordinate - list_coord_names=[coord.name() for coord in field_in.coords()] - #determine vertical axis: - if vertical_coord=='auto': - list_vertical=['z','model_level_number','altitude','geopotential_height'] - # TODO: there surely must be a better way to handle this - for coord_name in list_vertical: - if coord_name in list_coord_names: - vertical_axis=coord_name - break - elif vertical_coord in list_coord_names: - vertical_axis=vertical_coord - else: - raise ValueError('Plese specify vertical coordinate') - ndim_vertical=field_in.coord_dims(vertical_axis) - if len(ndim_vertical)>1: - raise ValueError('please specify 1 dimensional vertical coordinate') - vertical_coord_axis = ndim_vertical[0] - # Once we know the vertical coordinate, we can resolve the - # horizontal coordinates - if vertical_coord_axis == 0: - hdim_1_axis = 1 - hdim_2_axis = 2 - elif vertical_coord_axis == 1: - hdim_1_axis = 0 - hdim_2_axis = 2 - elif vertical_coord_axis == 2: - hdim_1_axis = 0 - hdim_2_axis = 1 + elif is_3D_seg: #3D watershedding # We need to generate seeds in 3D. if (seed_3D_flag == 'column'): @@ -215,8 +224,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers[y_list[j],x_list[i],z_list[k]]=row['feature'] - else: - raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') # set markers in cells not fulfilling threshold condition to zero: markers[~unmasked]=0 @@ -460,8 +467,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #loop thru buddies for buddy in buddies: - print("Now on buddy: ",buddy) - print("points: ",len(z_reg_inds[buddy])) #if buddy == cur_idx: buddy_feat = features_in[features_in['feature'] == buddy] @@ -534,29 +539,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu x_a1 = x buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = field_in.data[z_a1,y_a1,x_a1] - - - #buddy_rgn = np.expand_dims(buddy_rgn,0) - #print(buddy_rgn.shape) - + rgn_cube = iris.cube.Cube(data=buddy_rgn) - #date = '2018-08-22' - #ftime = m.group(2) - #time = "2100" - - #dd - #current_time = dati.datetime(year=yyyy,month=mm,day=dd,hour=hh,minute=mins,second=0) - - #timediff = current_time - start_time - #print(timediff.days, timediff.seconds, timediff.microseconds) - #iris_time = timediff.days*86400 + timediff.seconds - #itime = iris.coords.Coord([field_in.time.point], standard_name='time', long_name='index_time', var_name='itime', units='seconds since 2018-08-21 00:00') coord_system=None - #h2_coord=iris.coords.DimCoord(np.arange(bbox_xstart,bbox_xend), long_name='hdim_2', units='1', bounds=None, attributes=None, coord_system=coord_system) - #h1_coord=iris.coords.DimCoord(np.arange(bbox_ystart,bbox_yend), long_name='hdim_1', units='1', bounds=None, attributes=None, coord_system=coord_system) - #v_coord=iris.coords.DimCoord(np.arange(bbox_zstart,bbox_zend), long_name='vdim', units='1', bounds=None, attributes=None, coord_system=coord_system) h2_coord=iris.coords.DimCoord(np.arange(bbox_xsize), long_name='hdim_2', units='1', bounds=None, attributes=None, coord_system=coord_system) h1_coord=iris.coords.DimCoord(np.arange(bbox_ysize), long_name='hdim_1', units='1', bounds=None, attributes=None, coord_system=coord_system) v_coord=iris.coords.DimCoord(np.arange(bbox_zsize), long_name='vdim', units='1', bounds=None, attributes=None, coord_system=coord_system) From 9b7ec69cbaa0e99bd8a8daf0447cad22280c5a4d Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 20 Mar 2022 16:21:23 -0600 Subject: [PATCH 26/82] Added compatibility of new PBC segmentation with 2D data --- tobac/segmentation.py | 58 ++++++++++++++++++-------------- tobac/testing.py | 44 ++++++++++++++++++++++++ tobac/tests/test_segmentation.py | 43 ++++++++++++++++++++++- tobac/tests/test_testing.py | 11 ++++++ 4 files changed, 130 insertions(+), 26 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 3b7df518..14fe821a 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -166,15 +166,15 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers[int(row['hdim_1']), int(row['hdim_2']),level]=row['feature'] elif (seed_3D_flag == 'box'): - z_len = len(field_in.coord('z').points) - y_len = len(field_in.coord('y').points) - x_len = len(field_in.coord('x').points) + z_len = data.shape[vertical_coord_axis] + y_len = data.shape[hdim_1_axis] + x_len = data.shape[hdim_2_axis] # Get the size of the seed box from the input parameter try: - seed_z = seed_3D_size[0] - seed_y = seed_3D_size[1] - seed_x = seed_3D_size[2] + seed_z = seed_3D_size[vertical_coord_axis] + seed_y = seed_3D_size[hdim_1_axis] + seed_x = seed_3D_size[hdim_2_axis] except TypeError: # Not iterable, assume int. seed_z = seed_3D_size @@ -265,16 +265,26 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Only run this if we need to deal with PBCs if PBC_flag in pbc_options: + if not is_3D_seg: + # let's transpose segmentation_mask to a 1,y,x array to make calculations etc easier. + segmentation_mask = segmentation_mask[np.newaxis, :, :] + vertical_coord_axis = 0 + hdim_1_axis = 1 + hdim_2_axis = 2 + + + seg_mask_unseeded = np.zeros(segmentation_mask.shape) + + # Return all indices where segmentation field == 0 # meaning unfilled but above threshold + # TODO: is there a way to do this without np.where? vdim_unf,hdim1_unf,hdim2_unf = np.where(segmentation_mask==0) - - seg_mask_unseeded = np.zeros(segmentation_mask.shape) + seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 - seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 #create labeled field of unfilled, unseeded features - labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded) + labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded, return_num=True) markers_2 = np.zeros(unmasked.shape).astype(np.int32) @@ -336,28 +346,25 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu else: raise ValueError('unknown method, must be watershed') + # For ease of use, switch segmentation_mask_2 to 3D if 2D. + if not is_3D_seg: + segmentation_mask_2 = segmentation_mask_2[np.newaxis, :, :] + # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: D=distance_transform_edt((markers==0).astype(int)) segmentation_mask_2[np.bitwise_and(segmentation_mask_2>0, D>max_distance_pixel)]=0 # Sum up original mask and secondary PBC-mask for full PBC segmentation - #Write resulting mask into cube for output - segmentation_out.data=segmentation_mask + segmentation_mask_2 - - segmentation_mask_3 = segmentation_out.data - + segmentation_mask_3=segmentation_mask + segmentation_mask_2 + # Secondary seeding complete, now blending periodic boundaries - #keep segmentation mask fields for now so we can save these all later - #for demos of changes - - # Does any of this need to be run if there aren't PBCs? - - # Test of PBC segmentation boundary blending below - + # keep segmentation mask fields for now so we can save these all later + # for demos of changes + #update mask coord regions - reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_out.data) + reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_mask_3) curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) @@ -771,9 +778,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #print(rgn_cube.data[z_seg,y_seg,x_seg]) segmentation_mask_3[z_val_o,y_val_o,x_val_o] = segmentation_mask_4.data[z_seg,y_seg,x_seg] #print("updated") - + if not is_3D_seg: + segmentation_mask_3 = segmentation_mask_3[0] segmentation_out.data = segmentation_mask_3 - + # PBC checks not run else: #Write resulting mask into cube for output segmentation_out.data = segmentation_mask diff --git a/tobac/testing.py b/tobac/testing.py index e15255ad..9d6285fa 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -838,3 +838,47 @@ def generate_single_feature(start_h1, start_h2, start_v = None, return pd.DataFrame.from_dict(out_list_of_dicts) + +def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc = False): + '''Gets the start and ending points for a feature given a size and PBC + conditions + + Parameters + ---------- + center_point: float + The center point of the feature + size: float + The size of the feature in this dimension + axis_min: int + Minimum point on the axis (usually 0) + axis_max: int + Maximum point on the axis (exclusive). This is 1 after + the last real point on the axis, such that axis_max - axis_min + is the size of the axis + is_pbc: bool + True if we should wrap around, false if we shouldn't. + + Returns + ------- + tuple (start_point, end_point) + ''' + import numpy as np + + min_pt = int(np.ceil(center_point - size / 2)) + max_pt = int(np.ceil(center_point + size / 2))\ + + # adjust points for boundaries, if needed. + if min_pt < axis_min: + if is_pbc: + min_pt += (axis_max - axis_min) + else: + min_pt = axis_min + if max_pt > axis_max: + if is_pbc: + max_pt -= (axis_max - axis_min) + else: + max_pt = axis_max + + return (min_pt, max_pt) + + diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index c6ee5cd7..8975f94f 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -9,7 +9,7 @@ def test_segmentation_timestep_2D_feature_2D_seg(): # start by building a simple dataset with a single feature import numpy as np - + test_dset_size = (50, 50) test_hdim_1_pt = 20.0 test_hdim_2_pt = 20.0 @@ -45,6 +45,47 @@ def test_segmentation_timestep_2D_feature_2D_seg(): hdim_2_start_feat:hdim_2_end_feat] == np.ones((test_hdim_1_sz, test_hdim_2_sz))) + # Now try PBCs + # First, something stretching across hdim_1 + test_hdim_1_pt = 0.0 + test_data = np.zeros(test_dset_size) + + # Note that PBC flag here is 'both' as we still want the blob to be on both + # sides of the boundary to see if we accidentally grab it without PBC + # segmentation + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + PBC_flag = 'both' + ) + + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, + start_h2 = test_hdim_2_pt) + + # First, try the cases where we shouldn't get the points on the opposite + # hdim_1 side + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, + test_hdim_1_sz, 0,test_dset_size[0], + is_pbc = False ) + + for pbc_option in ['none', 'hdim_2']: + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, + threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # Make sure that all labeled points are segmented + assert np.all(out_seg_mask.core_data()[hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat] == np.ones((hdim_1_end_feat, test_hdim_2_sz))) + + + + + def test_segmentation_timestep_level(): """Tests `tobac.segmentation.segmentation_timestep` with a 2D input feature and a 3D segmentation array, specifying the `level` parameter. diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py index 28e63371..61017108 100644 --- a/tobac/tests/test_testing.py +++ b/tobac/tests/test_testing.py @@ -321,3 +321,14 @@ def test_generate_single_feature(): min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, frame_start = 0, num_frames=4, spd_h1 = 5, spd_h2 = 4, spd_v = 1, PBC_flag='both').sort_index(axis=1), expected_df.sort_index(axis=1)) + +@pytest.mark.parametrize("in_pt,in_sz,axis_size,out_pts", + [(3, 0,(0,5), (3,3)), + (3, 3,(0,5), (2,5)), + ] +) +def test_get_start_end_of_feat_nopbc(in_pt, in_sz, axis_size, out_pts): + '''Tests ```tobac.testing.get_start_end_of_feat``` + + ''' + assert tbtest.get_start_end_of_feat(in_pt, in_sz, axis_size[0], axis_size[1]) == out_pts \ No newline at end of file From c25c2478657a2d5669119f98f0811db03fc50a03 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 21 Mar 2022 08:51:31 -0600 Subject: [PATCH 27/82] fixed more bugs in segmentation with 2D inputs --- tobac/segmentation.py | 10 ++++------ tobac/tests/test_segmentation.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 14fe821a..0466ca66 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -268,6 +268,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu if not is_3D_seg: # let's transpose segmentation_mask to a 1,y,x array to make calculations etc easier. segmentation_mask = segmentation_mask[np.newaxis, :, :] + unmasked = unmasked[np.newaxis, :, :] + data_segmentation = data_segmentation[np.newaxis, :, :] vertical_coord_axis = 0 hdim_1_axis = 1 hdim_2_axis = 2 @@ -286,7 +288,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #create labeled field of unfilled, unseeded features labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded, return_num=True) - markers_2 = np.zeros(unmasked.shape).astype(np.int32) + markers_2 = np.zeros(data_segmentation.shape).astype(np.int32) #new, shorter PBC marker seeding approach #loop thru LB points @@ -342,14 +344,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers_2[~unmasked]=0 if method=='watershed': - segmentation_mask_2 = watershed(np.array(data_segmentation),markers_2.astype(np.int32), mask=unmasked) + segmentation_mask_2 = watershed(data_segmentation,markers_2.astype(np.int32), mask=unmasked) else: raise ValueError('unknown method, must be watershed') - # For ease of use, switch segmentation_mask_2 to 3D if 2D. - if not is_3D_seg: - segmentation_mask_2 = segmentation_mask_2[np.newaxis, :, :] - # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: D=distance_transform_edt((markers==0).astype(int)) diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 8975f94f..92fe6835 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -81,7 +81,23 @@ def test_segmentation_timestep_2D_feature_2D_seg(): # Make sure that all labeled points are segmented assert np.all(out_seg_mask.core_data()[hdim_1_start_feat:hdim_1_end_feat, hdim_2_start_feat:hdim_2_end_feat] == np.ones((hdim_1_end_feat, test_hdim_2_sz))) + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((hdim_1_end_feat, test_hdim_2_sz)))) + # Now try the same case, but when we *should* wrap around. + for pbc_option in ['hdim_1', 'both']: + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, + threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # Make sure that all labeled points are segmented + assert np.all(out_seg_mask.core_data()[0:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat] == np.ones((hdim_1_end_feat, test_hdim_2_sz))) + # Make sure that we are also segmenting across the boundary + assert np.all(out_seg_mask.core_data()[test_dset_size[1]-2:test_dset_size[1], + hdim_2_start_feat:hdim_2_end_feat] == np.ones((2, test_hdim_2_sz))) + + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) From f7b945abe08a40d4f17a09fca7915f6e17d9370c Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 21 Mar 2022 21:40:38 -0600 Subject: [PATCH 28/82] Adding more tests to PBC segmentation --- tobac/testing.py | 19 ++--- tobac/tests/test_segmentation.py | 122 ++++++++++++++++++++++++++----- 2 files changed, 113 insertions(+), 28 deletions(-) diff --git a/tobac/testing.py b/tobac/testing.py index 9d6285fa..285da30f 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -856,11 +856,14 @@ def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc = False the last real point on the axis, such that axis_max - axis_min is the size of the axis is_pbc: bool - True if we should wrap around, false if we shouldn't. + True if we should give wrap around points, false if we shouldn't. Returns ------- tuple (start_point, end_point) + Note that if is_pbc is True, start_point can be less than axis_min and + end_point can be greater than or equal to axis_max. This is designed to be used with + ```get_pbc_coordinates``` ''' import numpy as np @@ -868,16 +871,10 @@ def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc = False max_pt = int(np.ceil(center_point + size / 2))\ # adjust points for boundaries, if needed. - if min_pt < axis_min: - if is_pbc: - min_pt += (axis_max - axis_min) - else: - min_pt = axis_min - if max_pt > axis_max: - if is_pbc: - max_pt -= (axis_max - axis_min) - else: - max_pt = axis_max + if min_pt < axis_min and not is_pbc: + min_pt = axis_min + if max_pt > axis_max and not is_pbc: + max_pt = axis_max return (min_pt, max_pt) diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 92fe6835..541415cb 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -68,38 +68,126 @@ def test_segmentation_timestep_2D_feature_2D_seg(): test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, start_h2 = test_hdim_2_pt) - # First, try the cases where we shouldn't get the points on the opposite - # hdim_1 side hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, test_hdim_1_sz, 0,test_dset_size[0], - is_pbc = False ) + is_pbc = True ) - for pbc_option in ['none', 'hdim_2']: + for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, features_in = test_feature_ds, dxy = test_dxy, threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # This will automatically give the appropriate box, and it's tested separately. + segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], + 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, + hdim_2_end_feat, PBC_flag=pbc_option) # Make sure that all labeled points are segmented - assert np.all(out_seg_mask.core_data()[hdim_1_start_feat:hdim_1_end_feat, - hdim_2_start_feat:hdim_2_end_feat] == np.ones((hdim_1_end_feat, test_hdim_2_sz))) - assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == - np.sum(np.ones((hdim_1_end_feat, test_hdim_2_sz)))) + for seg_box in segmented_box_expected: + assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], + seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + + if pbc_option in ['none', 'hdim_2']: + #there will only be one seg_box + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2])))) + else: + # We should be capturing the whole feature + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) - # Now try the same case, but when we *should* wrap around. - for pbc_option in ['hdim_1', 'both']: + # Same as the above test, but for hdim_2 + # First, try the cases where we shouldn't get the points on the opposite + # hdim_2 side + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 0.0 + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + PBC_flag = 'both' + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, + start_h2 = test_hdim_2_pt) + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, + test_hdim_1_sz, 0,test_dset_size[0], + is_pbc = True ) + + hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat(test_hdim_2_pt, + test_hdim_2_sz, 0,test_dset_size[1], + is_pbc = True ) + + for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, features_in = test_feature_ds, dxy = test_dxy, threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # This will automatically give the appropriate box(es), and it's tested separately. + segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], + 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, + hdim_2_end_feat, PBC_flag=pbc_option) # Make sure that all labeled points are segmented - assert np.all(out_seg_mask.core_data()[0:hdim_1_end_feat, - hdim_2_start_feat:hdim_2_end_feat] == np.ones((hdim_1_end_feat, test_hdim_2_sz))) - # Make sure that we are also segmenting across the boundary - assert np.all(out_seg_mask.core_data()[test_dset_size[1]-2:test_dset_size[1], - hdim_2_start_feat:hdim_2_end_feat] == np.ones((2, test_hdim_2_sz))) + for seg_box in segmented_box_expected: + assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], + seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + + if pbc_option in ['none', 'hdim_1']: + #there will only be one seg_box + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2])))) + else: + # We should be capturing the whole feature + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) + + - assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == - np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) + # Same as the above test, but for hdim_2 + # First, try the cases where we shouldn't get the points on the opposite + # both sides (corner point) + test_hdim_1_pt = 0.0 + test_hdim_2_pt = 0.0 + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + PBC_flag = 'both' + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, + start_h2 = test_hdim_2_pt) + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, + test_hdim_1_sz, 0,test_dset_size[0], + is_pbc = True ) + hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat(test_hdim_2_pt, + test_hdim_2_sz, 0,test_dset_size[1], + is_pbc = True ) + + for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, + threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # This will automatically give the appropriate box(es), and it's tested separately. + segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], + 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, + hdim_2_end_feat, PBC_flag=pbc_option) + # Make sure that all labeled points are segmented + for seg_box in segmented_box_expected: + print(pbc_option, seg_box) + #TODO: something is wrong with this case, unclear what. + #assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], + #seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + #TODO: Make sure for none, hdim_1, hdim_2 that only the appropriate points are segmented def test_segmentation_timestep_level(): From 897bcab0cc0234f1fc5f9d0fb6b017787aa080a1 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 23 Mar 2022 21:30:00 -0600 Subject: [PATCH 29/82] Added corner test case where it fails --- tobac/tests/test_segmentation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 541415cb..ebbf56a3 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -184,8 +184,8 @@ def test_segmentation_timestep_2D_feature_2D_seg(): for seg_box in segmented_box_expected: print(pbc_option, seg_box) #TODO: something is wrong with this case, unclear what. - #assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], - #seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], + seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) #TODO: Make sure for none, hdim_1, hdim_2 that only the appropriate points are segmented From 46de44898454904d5f3cd875d5851b3454fbefcf Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 23 Mar 2022 22:01:13 -0600 Subject: [PATCH 30/82] Potential fix to corner point bug --- tobac/segmentation.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 0466ca66..a563cbe1 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -285,7 +285,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 - #create labeled field of unfilled, unseeded features + # create labeled field of unfilled, unseeded features labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded, return_num=True) markers_2 = np.zeros(data_segmentation.shape).astype(np.int32) @@ -314,15 +314,13 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu continue else: markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_min,hdim2_ind] - + # TODO: better documentation of these points if PBC_flag == 'hdim_2' or PBC_flag == 'both': + # TODO: This seems quite slow, is there scope for further speedup? for vdim_ind in range(0,segmentation_mask.shape[0]): for hdim1_ind in range(hdim1_min,hdim1_max): for hdim2_ind in [hdim2_min,hdim2_max]: - #print(z_ind,y_ind,x_ind) - #print(labels_unseeded[z_ind,y_ind,x_ind]) - if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): continue else: @@ -341,6 +339,24 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #print(z_ind,y_ind,x_ind) #print("seeded") + # Deal with the opposite corner only + if PBC_flag == 'both': + # TODO: This seems quite slow, is there scope for further speedup? + for vdim_ind in range(0,segmentation_mask.shape[0]): + for hdim1_ind in [hdim1_min, hdim1_max]: + for hdim2_ind in [hdim2_min,hdim2_max]: + # If this point is unseeded and unlabeled + if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + continue + + # Find the opposite point in hdim1 space + hdim1_opposite_corner = (hdim1_min if hdim1_ind == hdim1_max else hdim1_max) + hdim2_opposite_corner = (hdim2_min if hdim2_ind == hdim2_max else hdim2_max) + if segmentation_mask[vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner] <= 0: + continue + + markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[vdim_ind,hdim1_opposite_corner,hdim2_opposite_corner] + markers_2[~unmasked]=0 if method=='watershed': @@ -409,9 +425,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #adjust x and y points to the other side y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) - label_on_corner = segmentation_mask_3[label_z,y_val_alt,x_val_alt] - + print('label points:', label_z, y_val_alt, x_val_alt, + 'label on corner', label_on_corner) + if((label_on_corner > 0)): #add opposite-corner buddy if it exists buddies = np.append(buddies,label_on_corner) From 0d52522e1c29f8c6ba81e36ad17abb5d34d96712 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 23 Mar 2022 22:06:58 -0600 Subject: [PATCH 31/82] Moved get_pbc_coordinates to utilities so that we can use it for segmentation ultimately Also moved get_label_props_in_dict to inside the PBC flags as it's not used otherwise --- tobac/segmentation.py | 9 ++- tobac/utils.py | 145 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 3 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index a563cbe1..51bbf579 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -61,7 +61,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the - seed area for each dimension separately. + seed area for each dimension separately. Note: we recommend the use + of odd numbers for this. If you give an even number, your seed box will be + biased and not centered around the feature. Returns ------- @@ -252,8 +254,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #saves/prints below for testing seg_m_data = segmentation_mask[:] - #read in labeling/masks and region-finding functions - reg_props_dict = tb_utils.get_label_props_in_dict(seg_m_data) hdim1_min = 0 hdim1_max = segmentation_mask.shape[hdim_1_axis] - 1 @@ -265,6 +265,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Only run this if we need to deal with PBCs if PBC_flag in pbc_options: + # read in labeling/masks and region-finding functions + reg_props_dict = tb_utils.get_label_props_in_dict(seg_m_data) + if not is_3D_seg: # let's transpose segmentation_mask to a 1,y,x array to make calculations etc easier. segmentation_mask = segmentation_mask[np.newaxis, :, :] diff --git a/tobac/utils.py b/tobac/utils.py index e033cfeb..ea46ae6b 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -750,3 +750,148 @@ def adjust_pbc_point(in_dim, dim_min, dim_max): return dim_min else: raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") + + +def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, + h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord, + PBC_flag = 'none'): + '''Function to get the *actual* coordinate boxes of interest given a set of shifted + coordinates with periodic boundaries. + + For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + (-3, 5, 2,6) with PBC_flag of 'both' or 'hdim_1', h1_max of 10, and h1_min of 0 + this function will return: [(0,5,2,6), (7,10,2,6)]. + + If you pass in something outside the bounds of the array, this will truncate your + requested box. For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + (-3, 5, 2,6) with PBC_flag of 'none' or 'hdim_2', this function will return: + [(0,5,2,6)], assuming h1_min is 0. + + For cases where PBC_flag is 'both' and we have a corner case, it is possible + to get overlapping boundaries. For example, if you pass in (-6, 5, -6, 5) + + Parameters + ---------- + h1_min: int + Minimum array value in hdim_1, typically 0. + h1_max: int + Maximum array value in hdim_1 (exclusive). h1_max - h1_min should be the size in h1. + h2_min: int + Minimum array value in hdim_2, typically 0. + h2_max: int + Maximum array value in hdim_2 (exclusive). h2_max - h2_min should be the size in h2. + h1_start_coord: int + Start coordinate in hdim_1. Can be < h1_min if dealing with PBCs. + h1_end_coord: int + End coordinate in hdim_1. Can be >= h1_max if dealing with PBCs. + h2_start_coord: int + Start coordinate in hdim_2. Can be < h2_min if dealing with PBCs. + h2_end_coord: int + End coordinate in hdim_2. Can be >= h2_max if dealing with PBCs. + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + list of tuples + A list of tuples containing (h1_start, h1_end, h2_start, h2_end) of each of the + boxes needed to encompass the coordinates. + ''' + + if PBC_flag not in ['none', 'hdim_1', 'hdim_2', 'both']: + raise ValueError("PBC_flag must be 'none', 'hdim_1', 'hdim_2', or 'both'") + + + h1_start_coords = list() + h1_end_coords = list() + h2_start_coords = list() + h2_end_coords = list() + + + # In both of these cases, we just need to truncate the hdim_1 points. + if PBC_flag in ['none', 'hdim_2']: + h1_start_coords.append(max(h1_min, h1_start_coord)) + h1_end_coords.append(min(h1_max, h1_end_coord)) + + + # In both of these cases, we only need to truncate the hdim_2 points. + if PBC_flag in ['none', 'hdim_1']: + h2_start_coords.append(max(h2_min, h2_start_coord)) + h2_end_coords.append(min(h2_max, h2_end_coord)) + + # If the PBC flag is none, we can just return. + if PBC_flag == 'none': + return [(h1_start_coords[0], h1_end_coords[0], h2_start_coords[0], h2_end_coords[0])] + + # We have at least one periodic boundary. + + # hdim_1 boundary is periodic. + if PBC_flag in ['hdim_1', 'both']: + if (h1_end_coord - h1_start_coord) >= (h1_max - h1_min): + # In this case, we have selected the full h1 length of the domain, + # so we set the start and end coords to just that. + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_max) + + # We know we only have either h1_end_coord > h1_max or h1_start_coord < h1_min + # and not both. If both are true, the previous if statement should trigger. + elif h1_start_coord < h1_min: + # First set of h1 start coordinates + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_end_coord) + # Second set of h1 start coordinates + pts_from_begin = h1_min - h1_start_coord + h1_start_coords.append(h1_max - pts_from_begin) + h1_end_coords.append(h1_max) + + elif h1_end_coord > h1_max: + h1_start_coords.append(h1_start_coord) + h1_end_coords.append(h1_max) + pts_from_end = h1_end_coord - h1_max + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_min + pts_from_end) + + # We have no PBC-related issues, actually + else: + h1_start_coords.append(h1_start_coord) + h1_end_coords.append(h1_end_coord) + + if PBC_flag in ['hdim_2', 'both']: + if (h2_end_coord - h2_start_coord) >= (h2_max - h2_min): + # In this case, we have selected the full h2 length of the domain, + # so we set the start and end coords to just that. + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_max) + + # We know we only have either h1_end_coord > h1_max or h1_start_coord < h1_min + # and not both. If both are true, the previous if statement should trigger. + elif h2_start_coord < h2_min: + # First set of h1 start coordinates + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_end_coord) + # Second set of h1 start coordinates + pts_from_begin = h2_min - h2_start_coord + h2_start_coords.append(h2_max - pts_from_begin) + h2_end_coords.append(h2_max) + + elif h2_end_coord > h2_max: + h2_start_coords.append(h2_start_coord) + h2_end_coords.append(h2_max) + pts_from_end = h2_end_coord - h2_max + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_min + pts_from_end) + + # We have no PBC-related issues, actually + else: + h2_start_coords.append(h2_start_coord) + h2_end_coords.append(h2_end_coord) + + out_coords = list() + for h1_start_coord_single, h1_end_coord_single in zip(h1_start_coords, h1_end_coords): + for h2_start_coord_single, h2_end_coord_single in zip(h2_start_coords, h2_end_coords): + out_coords.append((h1_start_coord_single, h1_end_coord_single, h2_start_coord_single, h2_end_coord_single)) + return out_coords From 01410055987fd083230b22fecc9a57527618e189 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 23 Mar 2022 22:11:02 -0600 Subject: [PATCH 32/82] Updated docs, added start to test function for 3D box --- tobac/segmentation.py | 6 +++--- tobac/tests/test_segmentation.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 51bbf579..7419214f 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -55,9 +55,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu flag indicating whether to use PBC treatment or not note to self: should be expanded to account for singly periodic boundaries also rather than just doubly periodic - seed_3D_flag: string - options: 'column' (default), 'box' - Seed 3D field at feature positions with either the full column (default) or a box of user-set size + seed_3D_flag: str('column', 'box') + Seed 3D field at feature positions with either the full column (default) + or a box of user-set size seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index ebbf56a3..8191fc52 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -302,3 +302,13 @@ def test_segmentation_timestep_level(): ] == np.zeros((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) ) + +def test_segmentation_timestep_3d_seed_box(): + '''Tests ```tobac.segmentation.segmentation_timestep``` + to make sure that the 3D seed box works. + ''' + + # start by building a simple dataset with a single feature + import numpy as np + + pass \ No newline at end of file From f9b18b3bb445f56983cd64cc69e410aa989148f0 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 23 Mar 2022 22:11:50 -0600 Subject: [PATCH 33/82] Added notes to myself --- tobac/segmentation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 7419214f..de6aae38 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -663,7 +663,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #print("y-ctr: ",row['hdim_1']) #print("x-ctr: ",row['hdim_2']) - + # TODO: fix point ranges here. + # TODO: why is this repeated? if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) elif(int(row['vdim']) < 2): From e581250dc08ff67f8d1b5eab06eef5994fcd2bd2 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 24 Mar 2022 09:14:20 -0600 Subject: [PATCH 34/82] Added comments and todos --- tobac/segmentation.py | 103 ++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 65 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index de6aae38..a53b7c34 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -249,6 +249,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #mask all segmentation_mask points below threshold as -1 #to differentiate from those unmasked points NOT filled by watershedding + # TODO: allow user to specify segmentation_mask[~unmasked] = -1 #saves/prints below for testing @@ -287,7 +288,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu vdim_unf,hdim1_unf,hdim2_unf = np.where(segmentation_mask==0) seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 - # create labeled field of unfilled, unseeded features labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded, return_num=True) @@ -297,7 +297,16 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #loop thru LB points #then check if fillable region (labels_unseeded > 0) #then check if point on other side of boundary is > 0 in segmentation_mask - + ''' + "First pass" at seeding features across the boundaries. This first pass will bring in + eligible (meaning values that are higher than threshold) but not previously watershedded + points across the boundary by seeding them with the appropriate feature across the boundary. + + Later, we will run the second pass or "buddy box" approach that handles cases where points across the boundary + have been watershedded already. + ''' + + # TODO: clean up code. if PBC_flag == 'hdim_1' or PBC_flag == 'both': for vdim_ind in range(0,segmentation_mask.shape[0]): for hdim1_ind in [hdim1_min,hdim1_max]: @@ -317,7 +326,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu continue else: markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_min,hdim2_ind] - # TODO: better documentation of these points if PBC_flag == 'hdim_2' or PBC_flag == 'both': # TODO: This seems quite slow, is there scope for further speedup? for vdim_ind in range(0,segmentation_mask.shape[0]): @@ -380,6 +388,12 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # for demos of changes #update mask coord regions + + ''' + Now, start the second round of watershedding- the "buddy box" approach + buddies contains features of interest and any neighbors that across the boundary or in + physical contact with that label + ''' reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_mask_3) @@ -387,14 +401,16 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu wall_labels = np.array([]) + # TODO: move indices around with specific z axis w_wall = np.unique(segmentation_mask_3[:,:,0]) wall_labels = np.append(wall_labels,w_wall) - e_wall = np.unique(segmentation_mask_3[:,:,-1]) - wall_labels = np.append(wall_labels,e_wall) + # TODO: add test case that tests buddy box + #e_wall = np.unique(segmentation_mask_3[:,:,-1]) + #wall_labels = np.append(wall_labels,e_wall) - n_wall = np.unique(segmentation_mask_3[:,-1,:]) - wall_labels = np.append(wall_labels,n_wall) + #n_wall = np.unique(segmentation_mask_3[:,-1,:]) + #wall_labels = np.append(wall_labels,n_wall) s_wall = np.unique(segmentation_mask_3[:,0,:]) wall_labels = np.append(wall_labels,s_wall) @@ -403,20 +419,16 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu wall_labels = wall_labels[(wall_labels) > 0].astype(int) #print(wall_labels) + # Loop through all segmentation mask labels on the wall for cur_idx in wall_labels: - #skip this if there aren't enough points to be considered a real feature - #as defined above by n_min_threshold - curr_count = curr_reg_inds[cur_idx] - #print("Current wall feature: ",cur_idx) - #print(np.where(wall_labels==cur_idx)) - + vdim_indices = z_reg_inds[cur_idx] hdim1_indices = y_reg_inds[cur_idx] hdim2_indices = x_reg_inds[cur_idx] #start buddies array with feature of interest buddies = np.array([cur_idx],dtype=int) - + # Loop through all points in the segmentation mask that we're intertested in for label_z, label_y, label_x in zip(vdim_indices, hdim1_indices, hdim2_indices): # check if this is the special case of being a corner point. @@ -429,8 +441,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) label_on_corner = segmentation_mask_3[label_z,y_val_alt,x_val_alt] - print('label points:', label_z, y_val_alt, x_val_alt, - 'label on corner', label_on_corner) if((label_on_corner > 0)): #add opposite-corner buddy if it exists @@ -460,18 +470,16 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #add left/right buddy if it exists buddies = np.append(buddies,label_alt) - + buddies = np.unique(buddies) - - #print(buddies) - + if np.all(buddies==cur_idx): continue else: inter_buddies,feat_inds,buddy_inds=np.intersect1d(features_in.feature.values[:],buddies,return_indices=True) - + + # Get features that are needed for the buddy box buddy_features = deepcopy(features_in.iloc[feat_inds]) - #display(buddy_features) #create arrays to contain points of all buddies #and their transpositions/transformations @@ -480,10 +488,12 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_z = np.array([],dtype=int) buddy_y = np.array([],dtype=int) buddy_x = np.array([],dtype=int) + # buddy box points which are in INTERNAL BUDDY BOX SPACE buddy_z2 = np.array([],dtype=int) buddy_y2 = np.array([],dtype=int) buddy_x2 = np.array([],dtype=int) - + + # These are just for feature positions and are in DOMAIN SPACE buddy_zf = np.array([],dtype=int) buddy_yf = np.array([],dtype=int) buddy_xf = np.array([],dtype=int) @@ -493,11 +503,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #loop thru buddies for buddy in buddies: - #if buddy == cur_idx: buddy_feat = features_in[features_in['feature'] == buddy] - - #display(buddy_feat) - + yf2 = transfm_pbc_point(int(buddy_feat.hdim_1), hdim1_min, hdim1_max) xf2 = transfm_pbc_point(int(buddy_feat.hdim_2), hdim2_min, hdim2_max) @@ -512,15 +519,13 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_xf = np.append(buddy_xf,xf2) buddy_looper = buddy_looper+1 - + # Create 1:1 map through actual domain points and buddy box points for z,y,x in zip(z_reg_inds[buddy],y_reg_inds[buddy],x_reg_inds[buddy]): buddy_z = np.append(buddy_z,z) buddy_y = np.append(buddy_y,y) buddy_x = np.append(buddy_x,x) - - #else: - + y2 = transfm_pbc_point(y, hdim1_min, hdim1_max) x2 = transfm_pbc_point(x, hdim2_min, hdim2_max) @@ -540,7 +545,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu bbox_ysize = bbox_yend - bbox_ystart bbox_xsize = bbox_xend - bbox_xstart - print(bbox_zsize,bbox_ysize,bbox_xsize) + #print(bbox_zsize,bbox_ysize,bbox_xsize) #Buddy Box for smooth watershedding of features at PBC boundaries buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) @@ -568,7 +573,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu rgn_cube = iris.cube.Cube(data=buddy_rgn) coord_system=None - + # TODO: clean this up h2_coord=iris.coords.DimCoord(np.arange(bbox_xsize), long_name='hdim_2', units='1', bounds=None, attributes=None, coord_system=coord_system) h1_coord=iris.coords.DimCoord(np.arange(bbox_ysize), long_name='hdim_1', units='1', bounds=None, attributes=None, coord_system=coord_system) v_coord=iris.coords.DimCoord(np.arange(bbox_zsize), long_name='vdim', units='1', bounds=None, attributes=None, coord_system=coord_system) @@ -655,13 +660,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu for index, row in buddy_features.iterrows(): #creation of 5x5x5 point ranges for 3D marker seeding #and PBC flags for cross-boundary seeding - nixing this idea for now, but described in PBC Segmentation notes - #PBC_y_chk = 0 - #PBC_x_chk = 0 - - #print("feature: ",row['feature']) - #print("z-ctr: ",row['vdim']) - #print("y-ctr: ",row['hdim_1']) - #print("x-ctr: ",row['hdim_2']) # TODO: fix point ranges here. # TODO: why is this repeated? @@ -710,16 +708,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_markers[~unmasked_buddies]=0 marker_vals = np.unique(buddy_markers) - #print("vals: ",marker_vals) - - #for marker in np.unique(markers): - # print(marker) - # if marker == 0: - # continue - # z_mark,y_mark,x_mark = np.where(markers==marker) - # print(z_mark,y_mark,x_mark) - # print(np.min(data_segmentation[z_mark,y_mark,x_mark]),np.max(data_segmentation[z_mark,y_mark,x_mark])) - # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm buddy_segmentation=np.array(buddy_segmentation) @@ -775,26 +763,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #We don't want to overwrite other features that may be in the #buddy box if not contacting the intersected seg field - #print("Transformed z,y,x: ",z_val,y_val,x_val) - #print("Real z,y,x: ",z_val_o,y_val_o,x_val_o) - #print("Seg z,y,x: ",z_seg,y_seg,x_seg) - #print("original: ",test_mask2[z_val_o,y_val_o,x_val_o]) - #print("new: ",test_mask3.data[z_seg,y_seg,x_seg]) - #print("input cube: ",rgn_cube.data[0,z_seg,y_seg,x_seg]) - #print("orig cube: ",hr_21_cube.data[z_val_o,y_val_o,x_val_o]) - #print(rgn_cube.data[0,z_seg,y_seg,x_seg] > 1.e-5) if (np.any(segmentation_mask_3[z_val_o,y_val_o,x_val_o]==buddies) and np.any(segmentation_mask_4.data[z_seg,y_seg,x_seg]==buddies)): #only do updating procedure if old and new values both in buddy set #and values are different if(segmentation_mask_3[z_val_o,y_val_o,x_val_o] != segmentation_mask_4.data[z_seg,y_seg,x_seg]): - #print("Transformed z,y,x: ",z_val,y_val,x_val) - #print("Real z,y,x: ",z_val_o,y_val_o,x_val_o) - #print("Seg z,y,x: ",z_seg,y_seg,x_seg) - #print("transformed z,y,x: ",) - #print(segmentation_mask_3[z_val_o,y_val_o,x_val_o], " -> ", segmentation_mask_4.data[z_seg,y_seg,x_seg]) - #print(segmentation_mask_3[z_val_o,y_val_o,x_val_o]+600845, " -> ", segmentation_mask_4.data[z_seg,y_seg,x_seg]+600845) - #print(rgn_cube.data[z_seg,y_seg,x_seg]) segmentation_mask_3[z_val_o,y_val_o,x_val_o] = segmentation_mask_4.data[z_seg,y_seg,x_seg] #print("updated") if not is_3D_seg: From 22e22c21f831b3e9d194db9ce703e7db7a046d76 Mon Sep 17 00:00:00 2001 From: galexsky Date: Thu, 24 Mar 2022 11:27:14 -0400 Subject: [PATCH 35/82] Update segmentation.py added blah comment --- tobac/segmentation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index a53b7c34..eafb4677 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,5 +1,7 @@ import logging from . import utils as tb_utils + +#blah def transfm_pbc_point(in_dim, dim_min, dim_max): '''Function to transform a PBC-feature point for contiguity From e0c75fb25368f322dc6d004303902fc36b4475df Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 24 Mar 2022 09:27:42 -0600 Subject: [PATCH 36/82] Revert "Update segmentation.py" This reverts commit 22e22c21f831b3e9d194db9ce703e7db7a046d76. --- tobac/segmentation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index eafb4677..a53b7c34 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,7 +1,5 @@ import logging from . import utils as tb_utils - -#blah def transfm_pbc_point(in_dim, dim_min, dim_max): '''Function to transform a PBC-feature point for contiguity From 77a80a7743737a32e86477838aa2d3ddda8c9ef5 Mon Sep 17 00:00:00 2001 From: galexsky Date: Thu, 24 Mar 2022 12:18:27 -0400 Subject: [PATCH 37/82] Added new comments explaining PBC seg approach Added numerous comments pertaining to chunks of the PBC treatment procedure. Also found some likely code redundancies or deprecated pieces, which are noted in these new comments --- tobac/segmentation.py | 64 +++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index a53b7c34..32b368cc 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -484,16 +484,29 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #create arrays to contain points of all buddies #and their transpositions/transformations #for use in Buddy Box space - + + #z,y,x points in the grid domain with no transformations + #NOTE: when I think about it, not sure if these are really needed + # as we use the y_a1/x_a1 points for the data transposition + # to the buddy box rather than these and their z2/y2/x2 counterparts buddy_z = np.array([],dtype=int) buddy_y = np.array([],dtype=int) buddy_x = np.array([],dtype=int) - # buddy box points which are in INTERNAL BUDDY BOX SPACE + + # z,y,x points from the grid domain WHICH MAY OR MAY NOT BE TRANSFORMED + # so as to be continuous/contiguous across a grid boundary for that dimension + #(e.g., instead of [1496,1497,0,1,2,3] it would be [1496,1497,1498,1499,1500,1501]) buddy_z2 = np.array([],dtype=int) buddy_y2 = np.array([],dtype=int) buddy_x2 = np.array([],dtype=int) - # These are just for feature positions and are in DOMAIN SPACE + # These are just for feature positions and are in z2/y2/x2 space + # (may or may not be within real grid domain) + # so that when the buddy box is constructed, seeding is done properly + # in the buddy box space + + #NOTE: We may not need this, as we already do this editing the buddy_features df + # and an iterrows call through this is what's used to actually seed the buddy box buddy_zf = np.array([],dtype=int) buddy_yf = np.array([],dtype=int) buddy_xf = np.array([],dtype=int) @@ -503,23 +516,30 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #loop thru buddies for buddy in buddies: + #isolate feature from set of buddies buddy_feat = features_in[features_in['feature'] == buddy] - + + #transform buddy feature position if needed for positioning in z2/y2/x2 space + #MAY be redundant with what is done just below here yf2 = transfm_pbc_point(int(buddy_feat.hdim_1), hdim1_min, hdim1_max) xf2 = transfm_pbc_point(int(buddy_feat.hdim_2), hdim2_min, hdim2_max) - + + #edit value in buddy_features dataframe buddy_features.hdim_1.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_1), hdim1_min, hdim1_max) buddy_features.hdim_2.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_2), hdim2_min, hdim2_max) #print(int(buddy_feat.vdim),yf2,xf2) #display(buddy_features) - + + #again, this may be redundant as I don't think we use buddy_zf/yf/xf after this + #in favor of iterrows thru the updated buddy_features buddy_zf = np.append(buddy_zf,int(buddy_feat.vdim)) buddy_yf = np.append(buddy_yf,yf2) buddy_xf = np.append(buddy_xf,xf2) buddy_looper = buddy_looper+1 - # Create 1:1 map through actual domain points and buddy box points + # Create 1:1 map through actual domain points and continuous/contiguous points + # used to identify buddy box dimension lengths for its construction for z,y,x in zip(z_reg_inds[buddy],y_reg_inds[buddy],x_reg_inds[buddy]): buddy_z = np.append(buddy_z,z) @@ -534,6 +554,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_x2 = np.append(buddy_x2,x2) # Buddy Box! + # Indentify mins and maxes of buddy box continuous points range + # so that box of correct size can be constructred bbox_zstart = int(np.min(buddy_z2)) bbox_ystart = int(np.min(buddy_y2)) bbox_xstart = int(np.min(buddy_x2)) @@ -547,13 +569,17 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #print(bbox_zsize,bbox_ysize,bbox_xsize) - #Buddy Box for smooth watershedding of features at PBC boundaries + # Creation of actual Buddy Box space for transposition + # of data in domain and re-seeding with Buddy feature markers buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) ind_ctr = 0 #need to loop thru ALL z,y,x inds in buddy box #not just the ones that have nonzero seg mask values - + + # "_a1" points are re-transformations from the continuous buddy box points + # back to original grid/domain space to ensure that the correct data are + # copied to the proper Buddy Box locations for z in range(bbox_zstart,bbox_zend): for y in range(bbox_ystart,bbox_yend): for x in range(bbox_xstart,bbox_xend): @@ -570,6 +596,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = field_in.data[z_a1,y_a1,x_a1] + + #construction of iris cube corresponding to buddy box and its data + #for marker seeding and watershedding of buddy box rgn_cube = iris.cube.Cube(data=buddy_rgn) coord_system=None @@ -588,15 +617,14 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu print(rgn_cube) #print(rgn_cube.vdim) - #buddy correction to bounding box - - + #Update buddy_features feature positions to correspond to buddy box space + #rather than domain space or continuous/contiguous point space for buddy_looper in range(0,len(buddy_features)): buddy_features.vdim.values[buddy_looper] = buddy_features.vdim.values[buddy_looper] - bbox_zstart buddy_features.hdim_1.values[buddy_looper] = buddy_features.hdim_1.values[buddy_looper] - bbox_ystart buddy_features.hdim_2.values[buddy_looper] = buddy_features.hdim_2.values[buddy_looper] - bbox_xstart - # Create cube of the same dimensions and coordinates as input data to store mask: + # Create cube of the same dimensions and coordinates as Buddy Box to store updated mask: buddies_out=1*rgn_cube buddies_out.rename('buddies_mask') buddies_out.units=1 @@ -604,6 +632,12 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #Create dask array from input data: #data=rgn_cube.core_data() buddy_data = buddy_rgn + + #All of the below is, I think, the same overarching segmentation procedure as in the original + #segmentation approach until the line which states + # "#transform seg_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")" + # It's just performed on the buddy box and its data rather than our full domain + #Set level at which to create "Seed" for each feature in the case of 3D watershedding: # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) if level==None: @@ -726,13 +760,13 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu segmentation_mask_4[np.bitwise_and(segmentation_mask_4>0, D>max_distance_pixel)]=0 - #mask all segmentation_mask points below th=reshold as -1 + #mask all segmentation_mask points below threshold as -1 #to differentiate from those unmasked points NOT filled by watershedding print(np.unique(segmentation_mask_4)) segmentation_mask_4[~unmasked_buddies] = -1 - #transform seg_mask_4 data back to original mask + #transform seg_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3") #print(np.unique(test_mask3.data)) #loop through buddy box inds and analogous seg mask inds From f21b05a0ff079e52394e99547ec59b8f2496aa1e Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 24 Mar 2022 14:08:16 -0600 Subject: [PATCH 38/82] Updated non PBC seed box code Updated the seed box code using my coordinates getter --- tobac/segmentation.py | 86 +++++++++++++++--------------- tobac/tests/test_segmentation.py | 90 ++++++++++++++++++++++++++++++-- 2 files changed, 130 insertions(+), 46 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 32b368cc..18292df4 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -158,6 +158,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu elif is_3D_seg: #3D watershedding # We need to generate seeds in 3D. + # TODO: I think it would be easier to transpose the input to always be + # z, h1, h2. if (seed_3D_flag == 'column'): for index, row in features_in.iterrows(): if vertical_coord_axis==0: @@ -169,63 +171,61 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu elif (seed_3D_flag == 'box'): z_len = data.shape[vertical_coord_axis] - y_len = data.shape[hdim_1_axis] - x_len = data.shape[hdim_2_axis] + h1_len = data.shape[hdim_1_axis] + h2_len = data.shape[hdim_2_axis] # Get the size of the seed box from the input parameter try: seed_z = seed_3D_size[vertical_coord_axis] - seed_y = seed_3D_size[hdim_1_axis] - seed_x = seed_3D_size[hdim_2_axis] + seed_h1 = seed_3D_size[hdim_1_axis] + seed_h2 = seed_3D_size[hdim_2_axis] except TypeError: # Not iterable, assume int. seed_z = seed_3D_size - seed_y = seed_3D_size - seed_x = seed_3D_size + seed_h1 = seed_3D_size + seed_h2 = seed_3D_size # Can we use our testing function to generate 3D boxes (with PBC awareness) # for a faster version of this? for index, row in features_in.iterrows(): - #creation of point ranges for 3D marker seeding - # TODO: fix this so that it's not all 0-5 - if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): - z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) - elif(int(row['vdim']) < 2): - z_list = np.arange(0,seed_z) - else: - z_list = np.arange(z_len-seed_z,z_len) - - if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): - y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) - elif(int(row['hdim_1']) < 2): - y_list = np.arange(0,seed_y) - #PBC_y_chk = 1 - else: - y_list = np.arange(y_len-seed_y,y_len) - #PBC_y_chk = 1 + try: + row['vdim'] + except KeyError: + raise ValueError("For Box seeding, you must have a 3D input" + "source.") - if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): - x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) - elif(int(row['hdim_2']) < 2): - x_list = np.arange(0,seed_x) - #PBC_x_chk = 1 - else: - x_list = np.arange(x_len-seed_x,x_len) - #PBC_x_chk = 1 + # Because we don't support PBCs on the vertical axis, + # this is simple- just go in the seed_z/2 points around the + # vdim of the feature, up to the limits of the array. + z_seed_start = int(np.max([0, np.ceil(row['vdim']-seed_z/2)])) + z_seed_end = int(np.min([z_len, np.ceil(row['vdim']+seed_z/2)])) - #loop thru the box points - for k in range(0,seed_z): - for j in range(0,seed_y): - for i in range(0,seed_x): - - if ndim_vertical[0]==0: - markers[z_list[k],y_list[j],x_list[i]]=row['feature'] - elif ndim_vertical[0]==1: - markers[y_list[j],z_list[k],x_list[i]]=row['feature'] - elif ndim_vertical[0]==2: - markers[y_list[j],x_list[i],z_list[k]]=row['feature'] + # For the horizontal dimensions, it's more complicated if we have + # PBCs. + hdim_1_min = int(np.ceil(row['hdim_1'] - seed_h1/2)) + hdim_1_max = int(np.ceil(row['hdim_1'] + seed_h1/2)) + hdim_2_min = int(np.ceil(row['hdim_2'] - seed_h2/2)) + hdim_2_max = int(np.ceil(row['hdim_2'] + seed_h2/2)) + + all_seed_boxes = tb_utils.get_pbc_coordinates( + hdim_1_min, hdim_1_max, hdim_2_min, hdim_2_max, + 0, h1_len, 0, h2_len, PBC_flag= PBC_flag + ) + for seed_box in all_seed_boxes: + #print("seed box: ", seed_box) + if vertical_coord_axis==0: + markers[z_seed_start:z_seed_end, + seed_box[0]:seed_box[1], + seed_box[2]:seed_box[3]]=row['feature'] + elif vertical_coord_axis==1: + markers[seed_box[0]:seed_box[1], + z_seed_start:z_seed_end, + seed_box[2]:seed_box[3]]=row['feature'] + elif vertical_coord_axis==2: + markers[seed_box[0]:seed_box[1], + seed_box[2]:seed_box[3], + z_seed_start:z_seed_end]=row['feature'] - # set markers in cells not fulfilling threshold condition to zero: markers[~unmasked]=0 diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 8191fc52..a09d3b78 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -1,3 +1,4 @@ +import pytest import tobac.testing as testing import tobac.segmentation as seg @@ -303,12 +304,95 @@ def test_segmentation_timestep_level(): == np.zeros((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) ) -def test_segmentation_timestep_3d_seed_box(): +@pytest.mark.parametrize("blob_size, shift_pts, seed_3D_size" + ", expected_both_segmented", + [((3,3,3), (0,0,4), 3, False), + ((3,3,3), (0,0,4), 5, False), + ((3,3,3), (0,0,4), 7, True), + ] +) +def test_segmentation_timestep_3d_seed_box_nopbcs(blob_size, shift_pts, + seed_3D_size, expected_both_segmented): '''Tests ```tobac.segmentation.segmentation_timestep``` to make sure that the 3D seed box works. + Parameters + ---------- + blob_size: tuple(int, int, int) + Size of the initial blob to add to the domain in (z, y, x) space. + We strongly recommend that these be *odd* numbers. + shift_pts: tuple(int, int, int) + Number of points *relative to the center* to shift the blob in + (z, y, x) space. + seed_3D_size: int or tuple + Seed size to pass to tobac + expected_both_segmented: bool + True if we expect both features to be segmented, false + if we don't expect them both to be segmented + ''' - # start by building a simple dataset with a single feature import numpy as np - pass \ No newline at end of file + # For now, just testing this for no PBCs. + ''' + The best way to do this I think is to create two blobs near (but not touching) + each other, varying the seed_3D_size so that they are either segmented together + or not segmented together. + ''' + test_dset_size = (20, 50, 50) + test_hdim_1_pt_1 = 20.0 + test_hdim_2_pt_1 = 20.0 + test_vdim_pt_1 = 8 + test_dxy = 1000 + test_amp = 2 + + PBC_opt = 'none' + + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt_1, + test_hdim_2_pt_1, + test_vdim_pt_1, + h1_size=blob_size[1], + h2_size=blob_size[2], + v_size=blob_size[0], + amplitude=test_amp, + ) + + # Make a second feature + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt_1 + shift_pts[1], + test_hdim_2_pt_1 + shift_pts[2], + test_vdim_pt_1 + shift_pts[0], + h1_size=blob_size[1], + h2_size=blob_size[2], + v_size=blob_size[0], + amplitude=test_amp, + ) + + test_data_iris = testing.make_dataset_from_arr( + test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + # Generate dummy feature dataset only on the first feature. + test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, + start_h1=test_hdim_1_pt_1, + start_h2=test_hdim_2_pt_1) + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + seed_3D_flag= 'box', + seed_3D_size=seed_3D_size + ) + + second_point_seg = out_seg_mask.core_data()[int(test_vdim_pt_1 + shift_pts[0]), + int(test_hdim_1_pt_1 + shift_pts[1]), + int(test_hdim_2_pt_1 + shift_pts[2])] + # We really only need to check the center point here for this test. + seg_point_overlaps = second_point_seg == 1 + assert seg_point_overlaps == expected_both_segmented From 7aa0b37429a261b7f0e7045657010b7d9bbbd07a Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 25 Mar 2022 11:00:08 -0600 Subject: [PATCH 39/82] Added code coverage files to gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a9a8efc1..944f9afc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.pyc __pycache__ -.vscode \ No newline at end of file +.vscode +htmlcov +.coverage \ No newline at end of file From b4cbe357bd0f3391dbb8485c7893d4ab1bd8f1a6 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sat, 26 Mar 2022 21:42:14 -0600 Subject: [PATCH 40/82] Segmentation now transposes to improve code readability Refactored segmentation to transpose data internally so that z is always first (and then re-transposes after we finish). Added new tests to make sure that this is happening correctly --- tobac/segmentation.py | 73 +++++++++++++++++-------------- tobac/testing.py | 5 ++- tobac/tests/test_segmentation.py | 74 ++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 33 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 18292df4..0404c004 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,4 +1,6 @@ import logging + +from numpy import transpose from . import utils as tb_utils def transfm_pbc_point(in_dim, dim_min, dim_max): @@ -92,20 +94,25 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu if vertical_coord=='auto': list_vertical=['z','model_level_number','altitude','geopotential_height'] # TODO: there surely must be a better way to handle this + vertical_axis = None for coord_name in list_vertical: if coord_name in list_coord_names: vertical_axis=coord_name break + if vertical_axis is None: + raise ValueError('Please specify vertical coordinate') elif vertical_coord in list_coord_names: vertical_axis=vertical_coord else: - raise ValueError('Plese specify vertical coordinate') + raise ValueError('Please specify vertical coordinate') ndim_vertical=field_in.coord_dims(vertical_axis) if len(ndim_vertical)>1: raise ValueError('please specify 1 dimensional vertical coordinate') vertical_coord_axis = ndim_vertical[0] # Once we know the vertical coordinate, we can resolve the # horizontal coordinates + # To make things easier, we will transpose the axes + # so that they are consistent. if vertical_coord_axis == 0: hdim_1_axis = 1 hdim_2_axis = 2 @@ -129,6 +136,18 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Get raw array from input data: data=field_in.core_data() is_3D_seg = len(data.shape)==3 + # To make things easier, we will transpose the axes + # so that they are consistent: z, hdim_1, hdim_2 + # We only need to do this for 3D. + transposed_data = False + if is_3D_seg: + if vertical_coord_axis == 1: + data = np.transpose(data, axes=(1, 0, 2)) + transposed_data = True + elif vertical_coord_axis == 2: + data = np.transpose(data, axes=(2, 0, 1)) + transposed_data = True + #Set level at which to create "Seed" for each feature in the case of 3D watershedding: # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) @@ -162,23 +181,18 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # z, h1, h2. if (seed_3D_flag == 'column'): for index, row in features_in.iterrows(): - if vertical_coord_axis==0: - markers[level,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - elif vertical_coord_axis==1: - markers[int(row['hdim_1']),level, int(row['hdim_2'])]=row['feature'] - elif vertical_coord_axis==2: - markers[int(row['hdim_1']), int(row['hdim_2']),level]=row['feature'] + markers[level,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] elif (seed_3D_flag == 'box'): - z_len = data.shape[vertical_coord_axis] - h1_len = data.shape[hdim_1_axis] - h2_len = data.shape[hdim_2_axis] + z_len = data.shape[0] + h1_len = data.shape[1] + h2_len = data.shape[2] # Get the size of the seed box from the input parameter try: - seed_z = seed_3D_size[vertical_coord_axis] - seed_h1 = seed_3D_size[hdim_1_axis] - seed_h2 = seed_3D_size[hdim_2_axis] + seed_z = seed_3D_size[0] + seed_h1 = seed_3D_size[1] + seed_h2 = seed_3D_size[2] except TypeError: # Not iterable, assume int. seed_z = seed_3D_size @@ -212,19 +226,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu 0, h1_len, 0, h2_len, PBC_flag= PBC_flag ) for seed_box in all_seed_boxes: - #print("seed box: ", seed_box) - if vertical_coord_axis==0: - markers[z_seed_start:z_seed_end, - seed_box[0]:seed_box[1], - seed_box[2]:seed_box[3]]=row['feature'] - elif vertical_coord_axis==1: - markers[seed_box[0]:seed_box[1], - z_seed_start:z_seed_end, - seed_box[2]:seed_box[3]]=row['feature'] - elif vertical_coord_axis==2: - markers[seed_box[0]:seed_box[1], - seed_box[2]:seed_box[3], - z_seed_start:z_seed_end]=row['feature'] + markers[z_seed_start:z_seed_end, + seed_box[0]:seed_box[1], + seed_box[2]:seed_box[3]]=row['feature'] # set markers in cells not fulfilling threshold condition to zero: @@ -805,12 +809,17 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu segmentation_mask_3[z_val_o,y_val_o,x_val_o] = segmentation_mask_4.data[z_seg,y_seg,x_seg] #print("updated") if not is_3D_seg: - segmentation_mask_3 = segmentation_mask_3[0] - segmentation_out.data = segmentation_mask_3 - # PBC checks not run - else: - #Write resulting mask into cube for output - segmentation_out.data = segmentation_mask + segmentation_mask_3 = segmentation_mask_3[0] + + segmentation_mask = segmentation_mask_3 + + if transposed_data: + segmentation_mask = np.transpose(segmentation_mask, axes = + [vertical_coord_axis, hdim_1_axis, hdim_2_axis]) + + # Finished PBC checks and new PBC updated segmentation now in segmentation_mask. + #Write resulting mask into cube for output + segmentation_out.data = segmentation_mask # count number of grid cells asoociated to each tracked cell and write that into DataFrame: values, count = np.unique(segmentation_mask, return_counts=True) diff --git a/tobac/testing.py b/tobac/testing.py index 285da30f..4b936d79 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -364,6 +364,7 @@ def make_dataset_from_arr( data_type="xarray", time_dim_num=None, z_dim_num=None, + z_dim_name = 'altitude', y_dim_num=0, x_dim_num=1, ): @@ -380,6 +381,8 @@ def make_dataset_from_arr( What axis is the time dimension on, None for a single timestep z_dim_num: int or None What axis is the z dimension on, None for a 2D array + z_dim_name: str + What the z dimension name is named y_dim_num: int What axis is the y dimension on, typically 0 for a 2D array x_dim_num: int @@ -407,7 +410,7 @@ def make_dataset_from_arr( out_arr_iris = output_arr.to_iris() if is_3D: out_arr_iris.add_dim_coord( - iris.coords.DimCoord(np.arange(0, z_max), standard_name="altitude"), + iris.coords.DimCoord(np.arange(0, z_max), standard_name=z_dim_name), z_dim_num, ) return out_arr_iris diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index a09d3b78..8d60fefa 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -396,3 +396,77 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(blob_size, shift_pts, # We really only need to check the center point here for this test. seg_point_overlaps = second_point_seg == 1 assert seg_point_overlaps == expected_both_segmented + + +@pytest.mark.parametrize("test_dset_size, vertical_axis_num, " + "vertical_coord_name," + " vertical_coord_opt, expected_raise", + [((20,30,40), 0, 'altitude', 'auto', False), + ((20,30,40), 1, 'altitude', 'auto', False), + ((20,30,40), 2, 'altitude', 'auto', False), + ((20,30,40), 0, 'air_pressure', 'air_pressure', False), + ((20,30,40), 0, 'air_pressure', 'auto', True), + ((20,30,40), 0, 'model_level_number', 'auto', False), + ((20,30,40), 0, 'altitude', 'auto', False), + ((20,30,40), 0, 'geopotential_height', 'auto', False) + ] +) +def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name, + vertical_coord_opt, expected_raise): + '''Tests ```tobac.segmentation.segmentation_timestep``` + Tests: + The output is the same no matter what order we have axes in. + The + + ''' + import numpy as np + + # First, just check that input and output shapes are the same. + test_dxy = 1000 + test_vdim_pt_1 = 8 + test_hdim_1_pt_1 = 12 + test_hdim_2_pt_1 = 12 + test_data = np.zeros(test_dset_size) + common_dset_opts = { + 'in_arr': test_data, + 'data_type': 'iris', + 'z_dim_name': vertical_coord_name + } + if vertical_axis_num == 0: + test_data_iris = testing.make_dataset_from_arr( + z_dim_num=0, y_dim_num=1, x_dim_num=2, **common_dset_opts + ) + elif vertical_axis_num == 1: + test_data_iris = testing.make_dataset_from_arr( + z_dim_num=1, y_dim_num=0, x_dim_num=1, **common_dset_opts + ) + elif vertical_axis_num == 2: + test_data_iris = testing.make_dataset_from_arr( + z_dim_num=1, y_dim_num=0, x_dim_num=1, **common_dset_opts + ) + + # Generate dummy feature dataset only on the first feature. + test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, + start_h1=test_hdim_1_pt_1, + start_h2=test_hdim_2_pt_1) + if not expected_raise: + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + vertical_coord=vertical_coord_opt + ) + # Check that shapes don't change. + assert test_data.shape == out_seg_mask.core_data().shape + + else: + # Expecting a raise + with pytest.raises(ValueError): + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + ) + From 3a0d777ebcbc0abd9b8d0017d06ad9b50cb073d1 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sat, 26 Mar 2022 22:54:09 -0600 Subject: [PATCH 41/82] Added new test to theoretically check buddy box Note that none of my cases trigger the buddy box, which I'm honestly confused by. --- tobac/segmentation.py | 2 +- tobac/testing.py | 6 +- tobac/tests/test_segmentation.py | 176 +++++++++++++++++++++++++++++-- tobac/tests/test_testing.py | 8 +- 4 files changed, 178 insertions(+), 14 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 0404c004..20dff5cc 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -398,7 +398,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddies contains features of interest and any neighbors that across the boundary or in physical contact with that label ''' - + # TODO: this can cause a crash if there are no segmentation regions reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_mask_3) curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) diff --git a/tobac/testing.py b/tobac/testing.py index 4b936d79..ac220a1c 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -766,7 +766,7 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, def generate_single_feature(start_h1, start_h2, start_v = None, spd_h1 = 1, spd_h2 = 1, spd_v = 1, - min_h1 = 0, max_h1 = 1000, min_h2 = 0, max_h2 = 1000, + min_h1 = 0, max_h1 = None, min_h2 = 0, max_h2 = None, num_frames = 1, dt = datetime.timedelta(minutes=5), start_date = datetime.datetime(2022,1,1,0), PBC_flag = 'none', frame_start = 1, feature_num=1,): @@ -815,6 +815,10 @@ def generate_single_feature(start_h1, start_h2, start_v = None, What number to start the feature at ''' + if max_h1 is None or max_h2 is None: + raise ValueError('Max coords must be specified.') + + out_list_of_dicts = list() curr_h1 = start_h1 curr_h2 = start_h2 diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 8d60fefa..51e23cb0 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -35,7 +35,8 @@ def test_segmentation_timestep_2D_feature_2D_seg(): ) test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset - test_feature_ds = testing.generate_single_feature(start_h1 = 20.0, start_h2 = 20.0) + test_feature_ds = testing.generate_single_feature(start_h1 = 20.0, start_h2 = 20.0, + max_h1 = 1000, max_h2 = 1000) out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, features_in = test_feature_ds, dxy = test_dxy, @@ -67,7 +68,9 @@ def test_segmentation_timestep_2D_feature_2D_seg(): test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, - start_h2 = test_hdim_2_pt) + start_h2 = test_hdim_2_pt, + max_h1 = 1000, max_h2 = 1000 + ) hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, test_hdim_1_sz, 0,test_dset_size[0], @@ -113,7 +116,8 @@ def test_segmentation_timestep_2D_feature_2D_seg(): test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, - start_h2 = test_hdim_2_pt) + start_h2 = test_hdim_2_pt, + max_h1 = 1000, max_h2 = 1000) hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, test_hdim_1_sz, 0,test_dset_size[0], is_pbc = True ) @@ -164,7 +168,8 @@ def test_segmentation_timestep_2D_feature_2D_seg(): test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, - start_h2 = test_hdim_2_pt) + start_h2 = test_hdim_2_pt, + max_h1 = 1000, max_h2 = 1000) hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, test_hdim_1_sz, 0,test_dset_size[0], is_pbc = True ) @@ -248,7 +253,8 @@ def test_segmentation_timestep_level(): test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 ) # Generate dummy feature dataset - test_feature_ds = testing.generate_single_feature(start_h1=20.0, start_h2=20.0) + test_feature_ds = testing.generate_single_feature(start_h1=20.0, start_h2=20.0, + max_h1 = 1000, max_h2 = 1000) out_seg_mask, out_df = seg.segmentation_timestep( field_in=test_data_iris, @@ -379,7 +385,8 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(blob_size, shift_pts, # Generate dummy feature dataset only on the first feature. test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, start_h1=test_hdim_1_pt_1, - start_h2=test_hdim_2_pt_1) + start_h2=test_hdim_2_pt_1, + max_h1 = 1000, max_h2 = 1000) out_seg_mask, out_df = seg.segmentation_timestep( field_in=test_data_iris, @@ -416,8 +423,21 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name '''Tests ```tobac.segmentation.segmentation_timestep``` Tests: The output is the same no matter what order we have axes in. - The + A ValueError is raised if an invalid vertical coordinate is + passed in + Parameters + ---------- + test_dset_size: tuple(int, int, int) + Size of the test dataset + vertical_axis_num: int (0-2, inclusive) + Which axis in test_dset_size is the vertical axis + vertical_coord_name: str + Name of the vertical coordinate. + vertical_coord_opt: str + What to pass in as the vertical coordinate option to segmentation_timestep + expected_raise: bool + True if we expect a ValueError to be raised, false otherwise ''' import numpy as np @@ -448,7 +468,8 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name # Generate dummy feature dataset only on the first feature. test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, start_h1=test_hdim_1_pt_1, - start_h2=test_hdim_2_pt_1) + start_h2=test_hdim_2_pt_1, + max_h1 = 1000, max_h2 = 1000) if not expected_raise: out_seg_mask, out_df = seg.segmentation_timestep( field_in=test_data_iris, @@ -470,3 +491,142 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name threshold=1.5, ) +# TODO: add more tests to make sure buddy box code is run. +@pytest.mark.parametrize("dset_size, blob_1_loc, blob_1_size, blob_2_loc, blob_2_size," + "shift_domain, seed_3D_size", + [((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), + ((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), 5), + ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,15,15), 5), + ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), None), + ] +) +def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, blob_2_loc, blob_2_size, + shift_domain, seed_3D_size): + '''Tests ```tobac.segmentation.segmentation_timestep``` + to make sure that the "buddy box" 3D PBC implementation works. + Basic procedure: build a dataset with two features (preferrably on the corner) + and then run segmentation, shift the points, and then run segmentation again. + After shifting back, the results should be identical. + Note: only tests 'both' PBC condition. + Parameters + ---------- + dset_size: tuple(int, int, int) + Size of the domain (assumes z, hdim_1, hdim_2) + blob_1_loc: tuple(int, int, int) + Location of the first blob + blob_1_size: tuple(int, int, int) + Size of the first blob. Note: use odd numbers here. + blob_2_loc: tuple(int, int, int) + Location of the second blob + blob_2_size: tuple(int, int, int) + Size of the second blob. Note: use odd numbers here. + shift_domain: tuple(int, int, int) + How many points to shift the domain by. + seed_3D_size: None, int, or tuple + Seed size to pass to tobac. If None, passes in a column seed + ''' + + import numpy as np + import pandas as pd + + ''' + The best way to do this I think is to create two blobs near (but not touching) + each other, varying the seed_3D_size so that they are either segmented together + or not segmented together. + ''' + test_dxy = 1000 + test_amp = 2 + + test_data = np.zeros(dset_size) + test_data = testing.make_feature_blob( + test_data, + blob_1_loc[1], + blob_1_loc[2], + blob_1_loc[0], + h1_size=blob_1_size[1], + h2_size=blob_1_size[2], + v_size=blob_1_size[0], + amplitude=test_amp, + PBC_flag='both' + + ) + + # Make a second feature + test_data = testing.make_feature_blob( + test_data, + blob_2_loc[1], + blob_2_loc[2], + blob_2_loc[0], + h1_size=blob_2_size[1], + h2_size=blob_2_size[2], + v_size=blob_2_size[0], + amplitude=test_amp, + PBC_flag='both' + ) + + test_data_iris = testing.make_dataset_from_arr( + test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + # Generate dummy feature dataset only on the first feature. + test_feature_ds_1 = testing.generate_single_feature(start_v=blob_1_loc[0], + start_h1=blob_1_loc[1], + start_h2=blob_1_loc[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + PBC_flag='both') + test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0], + start_h1=blob_2_loc[1], + start_h2=blob_2_loc[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + PBC_flag='both') + test_feature_ds = pd.concat([test_feature_ds_1, test_feature_ds_2]) + + common_seg_opts = { + 'dxy': test_dxy, + 'threshold': 1.5, + 'PBC_flag': 'both' + } + if seed_3D_size is None: + common_seg_opts['seed_3D_flag'] = 'column' + else: + common_seg_opts['seed_3D_flag'] = 'box' + common_seg_opts['seed_3D_size'] = seed_3D_size + + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + **common_seg_opts + ) + + # Now, shift the data over and re-run segmentation. + test_data_shifted = np.roll(test_data, shift_domain, axis=(0,1,2)) + test_data_iris_shifted = testing.make_dataset_from_arr( + test_data_shifted, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + test_feature_ds_1 = testing.generate_single_feature(start_v=blob_1_loc[0]+shift_domain[0], + start_h1=blob_1_loc[1]+shift_domain[1], + start_h2=blob_1_loc[2]+shift_domain[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + PBC_flag='both') + test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0]+shift_domain[0], + start_h1=blob_2_loc[1]+shift_domain[1], + start_h2=blob_2_loc[2]+shift_domain[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + PBC_flag='both') + test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) + out_seg_mask_shifted, out_df = seg.segmentation_timestep( + field_in=test_data_iris_shifted, + features_in=test_feature_ds_shifted, + **common_seg_opts + ) + + # Now, shift output back. + out_seg_reshifted = np.roll(out_seg_mask_shifted.core_data(), + tuple((-x for x in shift_domain)), axis=(0,1,2)) + + assert np.all(out_seg_mask.core_data() == out_seg_reshifted) + diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py index 61017108..12fc7772 100644 --- a/tobac/tests/test_testing.py +++ b/tobac/tests/test_testing.py @@ -246,13 +246,13 @@ def test_generate_single_feature(): {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} ]) - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 2D case expected_df = pd.DataFrame.from_dict([ {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} ]) - assert_frame_equal(generate_single_feature(1, 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) + assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 2D case with movement expected_df = pd.DataFrame.from_dict([ @@ -262,7 +262,7 @@ def test_generate_single_feature(): {'hdim_1': 4, 'hdim_2': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, num_frames=4, - spd_h1 = 1, spd_h2 = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) + spd_h1 = 1, spd_h2 = 1, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 3D case with movement expected_df = pd.DataFrame.from_dict([ @@ -272,7 +272,7 @@ def test_generate_single_feature(): {'hdim_1': 4, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, num_frames=4, - spd_h1 = 1, spd_h2 = 1, spd_v = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) + spd_h1 = 1, spd_h2 = 1, spd_v = 1, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 3D case with movement that passes the hdim_1 boundary expected_df = pd.DataFrame.from_dict([ From e096d88a29b2317f194f95879bc006671943c724 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 27 Mar 2022 18:35:56 -0600 Subject: [PATCH 42/82] Got buddy boxes working for more cases --- tobac/segmentation.py | 347 +++++++++++++++++-------------- tobac/tests/test_segmentation.py | 10 +- tobac/tests/test_tracking.py | 82 -------- tobac/tests/test_util.py | 85 ++++++++ tobac/tracking.py | 71 +------ tobac/utils.py | 71 +++++++ 6 files changed, 359 insertions(+), 307 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 20dff5cc..fb8dc4c2 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,4 +1,5 @@ import logging +from operator import is_ from numpy import transpose from . import utils as tb_utils @@ -26,11 +27,190 @@ def transfm_pbc_point(in_dim, dim_min, dim_max): else: return in_dim +def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size, level, PBC_flag): + '''Adds markers for watershedding using the `features` dataframe + to the marker_arr. + + Parameters + ---------- + features: pandas.DataFrame + Features for one point in time to add as markers. + marker_arr: 2D or 3D array-like + Array to add the markers to. Assumes a (z, y, x) configuration. + seed_3D_flag: str('column', 'box') + Seed 3D field at feature positions with either the full column + or a box of user-set size + seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + seed area for each dimension separately. + Note: we recommend the use of odd numbers for this. If you give + an even number, your seed box will be biased and not centered + around the feature. + Note: if two seed boxes overlap, the feature that is seeded will be the + closer feature. + level: slice or None + If `seed_3D_flag` is 'column', the levels at which to seed the + cells for the watershedding algorithm. If None, seeds all levels. + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + 2D or 3D array like (same type as `marker_arr`) + The marker array + ''' + import numpy as np + + # What marker number is the background? Assumed 0. + bg_marker = 0 + + if level is None: + level=slice(None) + + if len(marker_arr.shape)==3: + is_3D = True + z_len = marker_arr.shape[0] + h1_len = marker_arr.shape[1] + h2_len = marker_arr.shape[2] + + else: + is_3D = False + z_len = 0 + h1_len = marker_arr.shape[0] + h2_len = marker_arr.shape[1] + # transpose to 3D array to make things easier. + marker_arr = marker_arr[np.newaxis, :, :] + + if seed_3D_flag == 'column': + for index, row in features.iterrows(): + marker_arr[level,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] + + elif seed_3D_flag == 'box': + # Get the size of the seed box from the input parameter + try: + if is_3D: + seed_z = seed_3D_size[0] + start_num = 1 + else: + start_num = 0 + seed_h1 = seed_3D_size[start_num] + seed_h2 = seed_3D_size[start_num + 1] + except TypeError: + # Not iterable, assume int. + seed_z = seed_3D_size + seed_h1 = seed_3D_size + seed_h2 = seed_3D_size + + for index, row in features.iterrows(): + if is_3D: + # If we have a 3D input and we need to do box seeding + # we need to have 3D features. + try: + row['vdim'] + except KeyError: + raise ValueError("For Box seeding on 3D segmentation," + " you must have a 3D input source.") + + # Because we don't support PBCs on the vertical axis, + # this is simple- just go in the seed_z/2 points around the + # vdim of the feature, up to the limits of the array. + if is_3D: + z_seed_start = int(np.max([0, np.ceil(row['vdim']-seed_z/2)])) + z_seed_end = int(np.min([z_len, np.ceil(row['vdim']+seed_z/2)])) + + # For the horizontal dimensions, it's more complicated if we have + # PBCs. + hdim_1_min = int(np.ceil(row['hdim_1'] - seed_h1/2)) + hdim_1_max = int(np.ceil(row['hdim_1'] + seed_h1/2)) + hdim_2_min = int(np.ceil(row['hdim_2'] - seed_h2/2)) + hdim_2_max = int(np.ceil(row['hdim_2'] + seed_h2/2)) + + all_seed_boxes = tb_utils.get_pbc_coordinates( + hdim_1_min, hdim_1_max, hdim_2_min, hdim_2_max, + 0, h1_len, 0, h2_len, PBC_flag= PBC_flag + ) + for seed_box in all_seed_boxes: + # Need to see if there are any other points seeded + # in this seed box first. + curr_box_markers = (marker_arr[z_seed_start:z_seed_end, + seed_box[0]:seed_box[1], + seed_box[2]:seed_box[3]]) + all_feats_in_box = np.unique(curr_box_markers) + if np.any(curr_box_markers!=bg_marker): + # If we have non-background points already seeded, + # we need to find the best way to seed them. + # Currently seeding with the closest point. + # Loop through all points in the box + with np.nditer(curr_box_markers) as it: + for curr_box_pt in it: + # Get its global index so that we can calculate + # distance and set the array. + local_index = it.multi_index + global_index = (local_index[0]+z_seed_start, + local_index[1] + seed_box[0], + local_index[2] + seed_box[2]) + # If it's a background marker, we can just set it + # with the feature we're working on. + if curr_box_pt == bg_marker: + marker_arr[global_index] = row['feature'] + continue + # it has another feature in it. Calculate the distance + # from its current set feature and the new feature. + if is_3D: + curr_coord = (row['vdim'], row['hdim_1'], row['hdim_2']) + else: + curr_coord = (0, row['hdim_1'], row['hdim_2']) + + dist_from_curr_pt = tb_utils.calc_distance_coords_pbc( + global_index, curr_coord, + min_h1 = 0, max_h1 = h1_len, + min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag + ) + + # This is technically an O(N^2) operation, but + # hopefully performance isn't too bad as this should + # be rare. + orig_row = features[features['feature'] == curr_box_pt].iloc[0] + if is_3D: + orig_coord = (orig_row['vdim'], orig_row['hdim_1'], orig_row['hdim_2']) + else: + orig_coord = (0, orig_row['hdim_1'], orig_row['hdim_2']) + dist_from_orig_pt = tb_utils.calc_distance_coords_pbc( + global_index, orig_coord, + min_h1 = 0, max_h1 = h1_len, + min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag + ) + # The current point center is further away + # than the original point center, so do nothing + if dist_from_curr_pt > dist_from_orig_pt: + continue + else: + # the current point center is closer. + marker_arr[global_index] = row['feature'] + # completely unseeded region so far. + else: + marker_arr[z_seed_start:z_seed_end, + seed_box[0]:seed_box[1], + seed_box[2]:seed_box[3]]=row['feature'] + + + # If we aren't 3D, transpose back. + if not is_3D: + marker_arr = marker_arr[0, :, :] + + return marker_arr + + def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag='column') + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag='column') + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column', seed_3D_size=5): @@ -52,11 +232,12 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu flag determining the algorithm to use (currently watershedding implemented) max_distance: float maximum distance from a marker allowed to be classified as belonging to that cell - PBC_flag: string - options: 'none' (default), 'hdim_1', 'hdim_2', 'both' - flag indicating whether to use PBC treatment or not - note to self: should be expanded to account for singly periodic boundaries also - rather than just doubly periodic + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions seed_3D_flag: str('column', 'box') Seed 3D field at feature positions with either the full column (default) or a box of user-set size @@ -170,67 +351,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # set markers at the positions of the features: markers = np.zeros(unmasked.shape).astype(np.int32) - if not is_3D_seg: #2D watershedding - for index, row in features_in.iterrows(): - markers[int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - - elif is_3D_seg: #3D watershedding - - # We need to generate seeds in 3D. - # TODO: I think it would be easier to transpose the input to always be - # z, h1, h2. - if (seed_3D_flag == 'column'): - for index, row in features_in.iterrows(): - markers[level,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - - elif (seed_3D_flag == 'box'): - z_len = data.shape[0] - h1_len = data.shape[1] - h2_len = data.shape[2] - - # Get the size of the seed box from the input parameter - try: - seed_z = seed_3D_size[0] - seed_h1 = seed_3D_size[1] - seed_h2 = seed_3D_size[2] - except TypeError: - # Not iterable, assume int. - seed_z = seed_3D_size - seed_h1 = seed_3D_size - seed_h2 = seed_3D_size - - # Can we use our testing function to generate 3D boxes (with PBC awareness) - # for a faster version of this? - for index, row in features_in.iterrows(): - try: - row['vdim'] - except KeyError: - raise ValueError("For Box seeding, you must have a 3D input" - "source.") - - # Because we don't support PBCs on the vertical axis, - # this is simple- just go in the seed_z/2 points around the - # vdim of the feature, up to the limits of the array. - z_seed_start = int(np.max([0, np.ceil(row['vdim']-seed_z/2)])) - z_seed_end = int(np.min([z_len, np.ceil(row['vdim']+seed_z/2)])) - - # For the horizontal dimensions, it's more complicated if we have - # PBCs. - hdim_1_min = int(np.ceil(row['hdim_1'] - seed_h1/2)) - hdim_1_max = int(np.ceil(row['hdim_1'] + seed_h1/2)) - hdim_2_min = int(np.ceil(row['hdim_2'] - seed_h2/2)) - hdim_2_max = int(np.ceil(row['hdim_2'] + seed_h2/2)) - - all_seed_boxes = tb_utils.get_pbc_coordinates( - hdim_1_min, hdim_1_max, hdim_2_min, hdim_2_max, - 0, h1_len, 0, h2_len, PBC_flag= PBC_flag - ) - for seed_box in all_seed_boxes: - markers[z_seed_start:z_seed_end, - seed_box[0]:seed_box[1], - seed_box[2]:seed_box[3]]=row['feature'] - - + markers = add_markers(features_in, markers, seed_3D_flag, seed_3D_size, level, PBC_flag) # set markers in cells not fulfilling threshold condition to zero: markers[~unmasked]=0 #marker_vals = np.unique(markers) @@ -405,17 +526,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu wall_labels = np.array([]) - # TODO: move indices around with specific z axis w_wall = np.unique(segmentation_mask_3[:,:,0]) wall_labels = np.append(wall_labels,w_wall) - # TODO: add test case that tests buddy box - #e_wall = np.unique(segmentation_mask_3[:,:,-1]) - #wall_labels = np.append(wall_labels,e_wall) - - #n_wall = np.unique(segmentation_mask_3[:,-1,:]) - #wall_labels = np.append(wall_labels,n_wall) - s_wall = np.unique(segmentation_mask_3[:,0,:]) wall_labels = np.append(wall_labels,s_wall) @@ -425,6 +538,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Loop through all segmentation mask labels on the wall for cur_idx in wall_labels: + print("we have buddies") vdim_indices = z_reg_inds[cur_idx] hdim1_indices = y_reg_inds[cur_idx] @@ -618,7 +732,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu rgn_cube.units = 'kg kg-1' - print(rgn_cube) + #print(rgn_cube) #print(rgn_cube.vdim) #Update buddy_features feature positions to correspond to buddy box space @@ -664,82 +778,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # set markers at the positions of the features: buddy_markers = np.zeros(unmasked_buddies.shape).astype(np.int32) - - if rgn_cube.ndim==2: #2D watershedding - for index, row in buddy_features.iterrows(): - buddy_markers[int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - - elif rgn_cube.ndim==3: #3D watershedding - - - list_coord_names=[coord.name() for coord in rgn_cube.coords()] - #determine vertical axis: - print(list_coord_names) - if vertical_coord=='auto': - list_vertical=['vdim','z','model_level_number','altitude','geopotential_height'] - for coord_name in list_vertical: - if coord_name in list_coord_names: - vertical_axis=coord_name - print(vertical_axis) - break - elif vertical_coord in list_coord_names: - vertical_axis=vertical_coord - else: - raise ValueError('Please specify vertical coordinate') - ndim_vertical=rgn_cube.coord_dims(vertical_axis) - - if len(ndim_vertical)>1: - raise ValueError('please specify 1 dimensional vertical coordinate') - z_len = len(rgn_cube.coord('vdim').points) - y_len = len(rgn_cube.coord('hdim_1').points) - x_len = len(rgn_cube.coord('hdim_2').points) - - - for index, row in buddy_features.iterrows(): - #creation of 5x5x5 point ranges for 3D marker seeding - #and PBC flags for cross-boundary seeding - nixing this idea for now, but described in PBC Segmentation notes - - # TODO: fix point ranges here. - # TODO: why is this repeated? - if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): - z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) - elif(int(row['vdim']) < 2): - z_list = np.arange(0,5) - else: - z_list = np.arange(z_len-5,z_len) - - if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): - y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) - elif(int(row['hdim_1']) < 2): - y_list = np.arange(0,5) - #PBC_y_chk = 1 - else: - y_list = np.arange(y_len-5,y_len) - #PBC_y_chk = 1 - - if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): - x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) - elif(int(row['hdim_2']) < 2): - x_list = np.arange(0,5) - #PBC_x_chk = 1 - else: - x_list = np.arange(x_len-5,x_len) - #PBC_x_chk = 1 - - #loop thru 5x5x5 z times y times x range - for k in range(0,5): - for j in range(0,5): - for i in range(0,5): - - if ndim_vertical[0]==0: - buddy_markers[z_list[k],y_list[j],x_list[i]]=row['feature'] - elif ndim_vertical[0]==1: - buddy_markers[y_list[j],z_list[k],x_list[i]]=row['feature'] - elif ndim_vertical[0]==2: - buddy_markers[y_list[j],x_list[i],z_list[k]]=row['feature'] - - else: - raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') + # Buddy boxes are always without PBCs + buddy_markers = add_markers(buddy_features, buddy_markers, seed_3D_flag, + seed_3D_size, level, PBC_flag='none') # set markers in cells not fulfilling threshold condition to zero: print(np.unique(buddy_markers)) diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 51e23cb0..95bcd022 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -490,13 +490,13 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name dxy=test_dxy, threshold=1.5, ) - # TODO: add more tests to make sure buddy box code is run. +# From this list right now, I'm not sure why buddy box isn't run actually. @pytest.mark.parametrize("dset_size, blob_1_loc, blob_1_size, blob_2_loc, blob_2_size," "shift_domain, seed_3D_size", [((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), - ((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), 5), - ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,15,15), 5), + ((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), + ((20,30,40), (8,1,1), (5,5,5), (8, 28,38), (5,5,5), (0,15,15), None), ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), None), ] ) @@ -573,12 +573,14 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b start_h2=blob_1_loc[2], max_h1 = dset_size[1], max_h2 = dset_size[2], + feature_num = 1, PBC_flag='both') test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0], start_h1=blob_2_loc[1], start_h2=blob_2_loc[2], max_h1 = dset_size[1], max_h2 = dset_size[2], + feature_num = 2, PBC_flag='both') test_feature_ds = pd.concat([test_feature_ds_1, test_feature_ds_2]) @@ -610,12 +612,14 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b start_h2=blob_1_loc[2]+shift_domain[2], max_h1 = dset_size[1], max_h2 = dset_size[2], + feature_num = 1, PBC_flag='both') test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0]+shift_domain[0], start_h1=blob_2_loc[1]+shift_domain[1], start_h2=blob_2_loc[2]+shift_domain[2], max_h1 = dset_size[1], max_h2 = dset_size[2], + feature_num = 2, PBC_flag='both') test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) out_seg_mask_shifted, out_df = seg.segmentation_timestep( diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index 182071b7..267998fc 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -124,85 +124,3 @@ def test_build_distance_function(): assert (test_func(np.array((0,9,9)), np.array((0,0,0))) == pytest.approx(1.4142135)) -def test_calc_distance_coords_pbc(): - '''Tests ```tobac.tracking.calc_distance_coords_pbc``` - Currently tests: - two points in normal space - Periodicity along hdim_1, hdim_2, and corners - ''' - - # Test first two points in normal space with varying PBC conditions - for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(0)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0)), np.array((0,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((3,3,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(4.3588989, rel=1e-3)) - - # Now test two points that will be closer along the hdim_1 boundary for cases without PBCs - for PBC_condition in ['hdim_1', 'both']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(2)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(3.3166247)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(3.3166247)) - - - - # Test the same points, except without PBCs - for PBC_condition in ['none', 'hdim_2']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(8)) - - # Now test two points that will be closer along the hdim_2 boundary for cases without PBCs - for PBC_condition in ['hdim_2', 'both']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(2)) - - # Test the same points, except without PBCs - for PBC_condition in ['none', 'hdim_1']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(8)) - - # Test points that will be closer for the both - PBC_condition = 'both' - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1.4142135, rel=1e-3)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1.4142135, rel=1e-3)) - - # Test the corner points for no PBCs - PBC_condition = 'none' - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(12.727922, rel=1e-3)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(12.727922, rel=1e-3)) - - # Test the corner points for hdim_1 and hdim_2 - for PBC_condition in ['hdim_1', 'hdim_2']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9.055385)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9.055385)) - - diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py index 168bd705..b31c6f0c 100644 --- a/tobac/tests/test_util.py +++ b/tobac/tests/test_util.py @@ -1,3 +1,4 @@ +import pytest import tobac.testing import tobac.utils as tb_utils @@ -74,3 +75,87 @@ def test_get_indices_of_labels_from_reg_prop_dict(): assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_2D) + +def test_calc_distance_coords_pbc(): + '''Tests ```tobac.utils.calc_distance_coords_pbc``` + Currently tests: + two points in normal space + Periodicity along hdim_1, hdim_2, and corners + ''' + import numpy as np + + # Test first two points in normal space with varying PBC conditions + for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(0)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0)), np.array((0,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((3,3,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(4.3588989, rel=1e-3)) + + # Now test two points that will be closer along the hdim_1 boundary for cases without PBCs + for PBC_condition in ['hdim_1', 'both']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(2)) + assert (tb_utils.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(3.3166247)) + assert (tb_utils.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(3.3166247)) + + + + # Test the same points, except without PBCs + for PBC_condition in ['none', 'hdim_2']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(8)) + + # Now test two points that will be closer along the hdim_2 boundary for cases without PBCs + for PBC_condition in ['hdim_2', 'both']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(2)) + + # Test the same points, except without PBCs + for PBC_condition in ['none', 'hdim_1']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(8)) + + # Test points that will be closer for the both + PBC_condition = 'both' + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1.4142135, rel=1e-3)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1.4142135, rel=1e-3)) + + # Test the corner points for no PBCs + PBC_condition = 'none' + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(12.727922, rel=1e-3)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(12.727922, rel=1e-3)) + + # Test the corner points for hdim_1 and hdim_2 + for PBC_condition in ['hdim_1', 'hdim_2']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9.055385)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9.055385)) + + diff --git a/tobac/tracking.py b/tobac/tracking.py index 12e7ebd7..4a699959 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -2,17 +2,7 @@ import numpy as np import pandas as pd import math - - -def njit_if_available(func, **kwargs): - '''Decorator to wrap a function with numba.njit if available. - If numba isn't available, it just returns the function. - ''' - try: - from numba import njit - return njit(func, kwargs) - except ModuleNotFoundError: - return func +from . import utils as tb_utils @@ -340,63 +330,6 @@ def build_distance_function(min_h1, max_h1, min_h2, max_h2, PBC_flag): ''' import functools - return functools.partial(calc_distance_coords_pbc, + return functools.partial(tb_utils.calc_distance_coords_pbc, min_h1 = min_h1, max_h1 = max_h1, min_h2 = min_h2, max_h2 = max_h2, PBC_flag = PBC_flag) - -@njit_if_available -def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, - PBC_flag): - '''Function to calculate the distance between cartesian - coordinate set 1 and coordinate set 2. Note that we assume both - coordinates are within their min/max already. - - Parameters - ---------- - coords_1: 2D or 3D array-like - Set of coordinates passed in from trackpy of either (vdim, hdim_1, hdim_2) - coordinates or (hdim_1, hdim_2) coordinates. - coords_2: 2D or 3D array-like - Similar to coords_1, but for the second pair of coordinates - min_h1: int - Minimum point in hdim_1 - max_h1: int - Maximum point in hdim_1 - min_h2: int - Minimum point in hdim_2 - max_h2: int - Maximum point in hdim_2 - PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') - Sets whether to use periodic boundaries, and if so in which directions. - 'none' means that we do not have periodic boundaries - 'hdim_1' means that we are periodic along hdim1 - 'hdim_2' means that we are periodic along hdim2 - 'both' means that we are periodic along both horizontal dimensions - - Returns - ------- - float - Distance between coords_1 and coords_2 in cartesian space. - - ''' - is_3D = len(coords_1)== 3 - size_h1 = max_h1 - min_h1 - size_h2 = max_h2 - min_h2 - - if not is_3D: - # Let's make the accounting easier. - coords_1 = np.array((0, coords_1[0], coords_1[1])) - coords_2 = np.array((0, coords_2[0], coords_2[1])) - - if PBC_flag in ['hdim_1', 'both']: - mod_h1 = size_h1 - else: - mod_h1 = 0 - if PBC_flag in ['hdim_2', 'both']: - mod_h2 = size_h2 - else: - mod_h2 = 0 - max_dims = np.array((0, mod_h1, mod_h2)) - deltas = np.abs(coords_1 - coords_2) - deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) - return np.sqrt(np.sum(deltas**2)) diff --git a/tobac/utils.py b/tobac/utils.py index ea46ae6b..31aad024 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1,4 +1,6 @@ import logging +import numpy as np + def column_mask_from2D(mask_2D,cube,z_coord='model_level_number'): '''function to turn 2D watershedding mask into a 3D mask of selected columns @@ -895,3 +897,72 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, for h2_start_coord_single, h2_end_coord_single in zip(h2_start_coords, h2_end_coords): out_coords.append((h1_start_coord_single, h1_end_coord_single, h2_start_coord_single, h2_end_coord_single)) return out_coords + +def njit_if_available(func, **kwargs): + '''Decorator to wrap a function with numba.njit if available. + If numba isn't available, it just returns the function. + ''' + try: + from numba import njit + return njit(func, kwargs) + except ModuleNotFoundError: + return func + + +@njit_if_available +def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, + PBC_flag): + '''Function to calculate the distance between cartesian + coordinate set 1 and coordinate set 2. Note that we assume both + coordinates are within their min/max already. + + Parameters + ---------- + coords_1: 2D or 3D array-like + Set of coordinates passed in from trackpy of either (vdim, hdim_1, hdim_2) + coordinates or (hdim_1, hdim_2) coordinates. + coords_2: 2D or 3D array-like + Similar to coords_1, but for the second pair of coordinates + min_h1: int + Minimum point in hdim_1 + max_h1: int + Maximum point in hdim_1 + min_h2: int + Minimum point in hdim_2 + max_h2: int + Maximum point in hdim_2 + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + float + Distance between coords_1 and coords_2 in cartesian space. + + ''' + + is_3D = len(coords_1)== 3 + size_h1 = max_h1 - min_h1 + size_h2 = max_h2 - min_h2 + + if not is_3D: + # Let's make the accounting easier. + coords_1 = np.array((0, coords_1[0], coords_1[1])) + coords_2 = np.array((0, coords_2[0], coords_2[1])) + + if PBC_flag in ['hdim_1', 'both']: + mod_h1 = size_h1 + else: + mod_h1 = 0 + if PBC_flag in ['hdim_2', 'both']: + mod_h2 = size_h2 + else: + mod_h2 = 0 + max_dims = np.array((0, mod_h1, mod_h2)) + deltas = np.abs(coords_1 - coords_2) + deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) + return np.sqrt(np.sum(deltas**2)) From 55c2f9f1e0b6c0153d264b04f577f65b9e0825ba Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 27 Mar 2022 22:02:43 -0600 Subject: [PATCH 43/82] Added new test that tests buddy box better. Test currently failing, need to fix. --- tobac/segmentation.py | 6 +++--- tobac/tests/test_segmentation.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index fb8dc4c2..adad11c5 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -146,7 +146,7 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size, level, PBC_fla # we need to find the best way to seed them. # Currently seeding with the closest point. # Loop through all points in the box - with np.nditer(curr_box_markers) as it: + with np.nditer(curr_box_markers, flags=['multi_index']) as it: for curr_box_pt in it: # Get its global index so that we can calculate # distance and set the array. @@ -167,7 +167,7 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size, level, PBC_fla curr_coord = (0, row['hdim_1'], row['hdim_2']) dist_from_curr_pt = tb_utils.calc_distance_coords_pbc( - global_index, curr_coord, + np.array(global_index), np.array(curr_coord), min_h1 = 0, max_h1 = h1_len, min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag ) @@ -181,7 +181,7 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size, level, PBC_fla else: orig_coord = (0, orig_row['hdim_1'], orig_row['hdim_2']) dist_from_orig_pt = tb_utils.calc_distance_coords_pbc( - global_index, orig_coord, + np.array(global_index), np.array(orig_coord), min_h1 = 0, max_h1 = h1_len, min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag ) diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 95bcd022..c3a850c7 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -498,8 +498,10 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name ((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), ((20,30,40), (8,1,1), (5,5,5), (8, 28,38), (5,5,5), (0,15,15), None), ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), None), + ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), (5,5,5)), ] ) +# TODO: last test fails def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, blob_2_loc, blob_2_size, shift_domain, seed_3D_size): '''Tests ```tobac.segmentation.segmentation_timestep``` From c72bc66852f346b0aead1fcdfd05c80e42b92077 Mon Sep 17 00:00:00 2001 From: galexsky Date: Mon, 4 Apr 2022 15:12:22 -0400 Subject: [PATCH 44/82] Fixed wall_label overwrite procedure Corrected an issue with multiple overwrites for the PBC wall_labels loop. Added new list "skip_list_thisind" to accumulate labels that have been dealt with during the course of treating the wall_label we are presently on. This ensures that we are not prematurely overwriting our present wall_label and exiting the loop through points for that label. --- tobac/feature_detection.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index e2fa54a5..62cf0f9f 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -463,6 +463,8 @@ def feature_detection_threshold(data_i,i_time, wall_labels = np.unique(wall_labels) for label_ind in wall_labels: + #create list for skip labels for this wall label only + skip_list_thisind = [] # 0 isn't a real index if label_ind == 0: continue @@ -500,9 +502,15 @@ def feature_detection_threshold(data_i,i_time, all_label_locs_h1[label_on_corner], all_label_locs_h2[label_on_corner]] = label_ind skip_list = np.append(skip_list,label_on_corner) + skip_list_thisind = np.append(skip_list_thisind,label_on_corner) + + #if it's labeled and has already been dealt with for this label + elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list)) and (np.any(label_on_corner==skip_list_thisind))): + #print("skip_list_thisind label - has already been treated this index") + continue - #if it's labeled and has already been dealt with - elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list))): + #if it's labeled and has already been dealt with via a previous label + elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list)) and (~np.any(label_on_corner==skip_list_thisind))): #find the updated label, and overwrite all of label_ind indices with updated label labels_2_alt = labels_2[label_z,y_val_alt,x_val_alt] labels_2[label_locs_v, @@ -527,9 +535,14 @@ def feature_detection_threshold(data_i,i_time, all_label_locs_h2[label_alt]] = label_ind #we have already dealt with this label. skip_list = np.append(skip_list,label_alt) + skip_list_thisind = np.append(skip_list_thisind,label_alt) + + #if it's labeled and has already been dealt with for this label + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (np.any(label_alt==skip_list_thisind))): + continue #if it's labeled and has already been dealt with - elif((label_alt !=0) and (np.any(label_alt==skip_list))): + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (~np.any(label_alt==skip_list_thisind))): #find the updated label, and overwrite all of label_ind indices with updated label labels_2_alt = labels_2[label_z,y_val_alt,label_x] labels_2[label_locs_v, @@ -552,9 +565,14 @@ def feature_detection_threshold(data_i,i_time, all_label_locs_h2[label_alt]] = label_ind #we have already dealt with this label. skip_list = np.append(skip_list,label_alt) + skip_list_thisind = np.append(skip_list_thisind,label_alt) + + #if it's labeled and has already been dealt with for this label + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (np.any(label_alt==skip_list_thisind))): + continue #if it's labeled and has already been dealt with - elif((label_alt !=0) and (np.any(label_alt==skip_list))): + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (~np.any(label_alt==skip_list_thisind))): #find the updated label, and overwrite all of label_ind indices with updated label labels_2_alt = labels_2[label_z,label_y,x_val_alt] labels_2[label_locs_v, From e2c9d5f181de4f27af993e7dd95d52d272fb9d0e Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 6 Apr 2022 08:20:04 -0600 Subject: [PATCH 45/82] Fixed buddy box segmentation bug. --- tobac/segmentation.py | 16 ++--- tobac/tests/test_segmentation.py | 94 +++++++++++++++++++++++++ tobac/tests/test_util.py | 115 +++++++++++++++++++++++++++++++ tobac/utils.py | 4 +- 4 files changed, 218 insertions(+), 11 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index adad11c5..9196e895 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -27,7 +27,7 @@ def transfm_pbc_point(in_dim, dim_min, dim_max): else: return in_dim -def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size, level, PBC_flag): +def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = None, PBC_flag = 'none'): '''Adds markers for watershedding using the `features` dataframe to the marker_arr. @@ -129,11 +129,13 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size, level, PBC_fla hdim_1_max = int(np.ceil(row['hdim_1'] + seed_h1/2)) hdim_2_min = int(np.ceil(row['hdim_2'] - seed_h2/2)) hdim_2_max = int(np.ceil(row['hdim_2'] + seed_h2/2)) - + all_seed_boxes = tb_utils.get_pbc_coordinates( - hdim_1_min, hdim_1_max, hdim_2_min, hdim_2_max, - 0, h1_len, 0, h2_len, PBC_flag= PBC_flag - ) + h1_min = 0, h1_max = h1_len, + h2_min = 0, h2_max = h2_len, + h1_start_coord = hdim_1_min, h1_end_coord = hdim_1_max, + h2_start_coord = hdim_2_min, h2_end_coord = hdim_2_max, + PBC_flag= PBC_flag) for seed_box in all_seed_boxes: # Need to see if there are any other points seeded # in this seed box first. @@ -465,15 +467,11 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu continue else: markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_max] - #print(z_ind,y_ind,x_ind) - #print("seeded") elif hdim2_ind == hdim2_max: if (segmentation_mask[vdim_ind,hdim1_ind,hdim2_min]<=0): continue else: markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_min] - #print(z_ind,y_ind,x_ind) - #print("seeded") # Deal with the opposite corner only if PBC_flag == 'both': diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index c3a850c7..1509fb4b 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -1,3 +1,4 @@ +from tracemalloc import start import pytest import tobac.testing as testing import tobac.segmentation as seg @@ -636,3 +637,96 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b assert np.all(out_seg_mask.core_data() == out_seg_reshifted) + +@pytest.mark.parametrize("dset_size, feat_1_loc, feat_2_loc," + "shift_domain, seed_3D_size", + [((20,30,40), (8,0,0), (8, 3,3), (0,-8,-8), None), + ((20,30,40), (8,0,0), (8, 3,3), (0,-8,-8), None), + ((20,30,40), (8,1,1), (8, 28,38), (0,15,15), None), + ((20,30,40), (8,0,0), (8, 28,38), (0,-8,-8), None), + ((20,30,40), (8,0,0), (8, 28,38), (0,-8,-8), (5,5,5)), + ] +) +# TODO: last test fails +def test_add_markers_pbcs(dset_size,feat_1_loc, feat_2_loc, shift_domain, seed_3D_size): + '''Tests ```tobac.segmentation.add_markers``` + to make sure that adding markers works and is consistent across PBCs + Parameters + ---------- + dset_size: tuple(int, int, int) or (int, int) + Size of the domain (assumes z, hdim_1, hdim_2) or (hdim_1, hdim_2) + feat_1_loc: tuple, same length as dset_size + Location of the first blob + feat_2_loc: tuple, same length as dset_size + Location of the second blob + shift_domain: tuple, same length as dset_size + How many points to shift the domain by. + seed_3D_size: None, int, or tuple + Seed size to pass to tobac. If None, passes in a column seed + ''' + + import numpy as np + import pandas as pd + + + if len(dset_size) == 2: + is_3D = False + start_h1_ax = 0 + else: + is_3D = True + start_h1_ax = 1 + + common_feat_opts = { + 'PBC_flag': 'both', + 'max_h1': dset_size[start_h1_ax], + 'max_h2': dset_size[start_h1_ax + 1] + } + + + # Generate dummy feature dataset only on the first feature. + test_feature_ds_1 = testing.generate_single_feature(start_v=feat_1_loc[0], + start_h1=feat_1_loc[1], + start_h2=feat_1_loc[2], + feature_num = 1, + **common_feat_opts) + test_feature_ds_2 = testing.generate_single_feature(start_v=feat_2_loc[0], + start_h1=feat_2_loc[1], + start_h2=feat_2_loc[2], + feature_num = 2, + **common_feat_opts) + test_feature_ds = pd.concat([test_feature_ds_1, test_feature_ds_2]) + + common_marker_opts = dict() + common_marker_opts['PBC_flag'] = 'both' + + if seed_3D_size is None: + common_marker_opts['seed_3D_flag'] = 'column' + else: + common_marker_opts['seed_3D_flag'] = 'box' + common_marker_opts['seed_3D_size'] = seed_3D_size + + marker_arr = seg.add_markers(test_feature_ds, np.zeros(dset_size), **common_marker_opts) + + # Now, shift the data over and re-run markers. + test_feature_ds_1 = testing.generate_single_feature(start_v=feat_1_loc[0]+shift_domain[0], + start_h1=feat_1_loc[1]+shift_domain[1], + start_h2=feat_1_loc[2]+shift_domain[2], + feature_num = 1, + **common_feat_opts) + test_feature_ds_2 = testing.generate_single_feature(start_v=feat_2_loc[0]+shift_domain[0], + start_h1=feat_2_loc[1]+shift_domain[1], + start_h2=feat_2_loc[2]+shift_domain[2], + feature_num = 2, + **common_feat_opts) + test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) + + marker_arr_shifted = seg.add_markers(test_feature_ds_shifted, np.zeros(dset_size), + **common_marker_opts) + + + # Now, shift output back. + marker_arr_reshifted = np.roll(marker_arr_shifted, + tuple((-x for x in shift_domain)), axis=(0,1,2)) + + assert np.all(marker_arr == marker_arr_reshifted) + diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py index b31c6f0c..1ffccfef 100644 --- a/tobac/tests/test_util.py +++ b/tobac/tests/test_util.py @@ -1,6 +1,21 @@ import pytest import tobac.testing import tobac.utils as tb_utils +from collections import Counter + + +def lists_equal_without_order(a, b): + """ + This will make sure the inner list contain the same, + but doesn't account for duplicate groups. + from: https://stackoverflow.com/questions/31501909/assert-list-of-list-equality-without-order-in-python/31502000 + """ + for l1 in a: + check_counter = Counter(l1) + if not any(Counter(l2) == check_counter for l2 in b): + return False + return True + def test_get_label_props_in_dict(): '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. @@ -159,3 +174,103 @@ def test_calc_distance_coords_pbc(): == pytest.approx(9.055385)) +@pytest.mark.parametrize("loc_1, loc_2, bounds, PBC_flag, expected_dist", + [((0,0,0), (0,0,9), (0, 10, 0, 10), 'both', 1), + ] +) +def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected_dist): + '''Tests ```tobac.utils.calc_distance_coords_pbc``` in a parameterized way + + Parameters + ---------- + loc_1: tuple + First point location, either in 2D or 3D space (assumed z, h1, h2) + loc_2: tuple + Second point location, either in 2D or 3D space (assumed z, h1, h2) + bounds: tuple + hdim_1/hdim_2 bounds as (h1_min, h1_max, h2_min, h2_max) + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + expected_dist: float + Expected distance between the two points + ''' + import numpy as np + + assert (tb_utils.calc_distance_coords_pbc(np.array(loc_1), np.array(loc_2), bounds[0], bounds[1], + bounds[2], bounds[3], PBC_flag)== pytest.approx(expected_dist)) + + +def test_get_pbc_coordinates(): + '''Tests tobac.util.get_pbc_coordinates. + Currently runs the following tests: + For an invalid PBC_flag, we raise an error + For PBC_flag of 'none', we truncate the box and give a valid box. + + ''' + + with pytest.raises(ValueError): + tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'c') + + # Test PBC_flag of none + + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'none') == [(1, 4, 1, 4),]) + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'none') == [(0, 4, 1, 4),]) + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, 1, 4, 'none') == [(1, 10, 1, 4),]) + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, -1, 4, 'none') == [(1, 10, 0, 4),]) + + # Test PBC_flag with hdim_1 + # Simple case, no PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_1') == [(1, 4, 1, 4),]) + # PBC going on the min side + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'hdim_1') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + # PBC going on the min side; should be truncated in hdim_2. + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, -1, 4, 'hdim_1') == [(0, 4, 0, 4), (9, 10, 0, 4)]) + # PBC going on the max side only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'hdim_1') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + # PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'hdim_1') == [(0, 10, 1, 4),]) + + # Test PBC_flag with hdim_2 + # Simple case, no PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_2') == [(1, 4, 1, 4),]) + # PBC going on the min side + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'hdim_2') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + # PBC going on the min side with truncation in hdim_1 + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 4, -1, 4, 'hdim_2') == [(0, 4, 0, 4), (0, 4, 9, 10)]) + # PBC going on the max side + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'hdim_2') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + # PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'hdim_2') == [(1, 4, 0, 10),]) + + # Test PBC_flag with both + # Simple case, no PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'both') == [(1, 4, 1, 4),]) + # hdim_1 only testing + # PBC on the min side of hdim_1 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'both') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + # PBC on the max side of hdim_1 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'both') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + # PBC overlapping on max side of hdim_1 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'both') == [(0, 10, 1, 4),]) + # hdim_2 only testing + # PBC on the min side of hdim_2 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'both') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + # PBC on the max side of hdim_2 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'both') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + # PBC overlapping on max side of hdim_2 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'both') == [(1, 4, 0, 10),]) + # hdim_1 and hdim_2 testing simultaneous + # both larger than the actual domain + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 12, -4, 14, 'both') == [(0, 10, 0, 10),]) + # min in hdim_1 and hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, -4, 2, 'both'), [(0, 3, 0, 2), (0, 3, 6, 10), (7, 10, 6, 10), (7, 10, 0, 2)])) + # max in hdim_1, min in hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, -4, 2, 'both'), [(5, 10, 0, 2), (5, 10, 6, 10), (0, 2, 6, 10), (0, 2, 0, 2)])) + # max in hdim_1 and hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, 'both'), [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)])) + # min in hdim_1, max in hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, 'both'), [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)])) diff --git a/tobac/utils.py b/tobac/utils.py index 31aad024..3428b5ba 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -926,11 +926,11 @@ def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, min_h1: int Minimum point in hdim_1 max_h1: int - Maximum point in hdim_1 + Maximum point in hdim_1, exclusive. max_h1-min_h1 should be the size. min_h2: int Minimum point in hdim_2 max_h2: int - Maximum point in hdim_2 + Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size. PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') Sets whether to use periodic boundaries, and if so in which directions. 'none' means that we do not have periodic boundaries From ab42d31ba70239aea34db8f45f547308c1a929fa Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 6 Apr 2022 08:25:59 -0600 Subject: [PATCH 46/82] fixed github actions dockerfile location --- .github/workflows/docker-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 0046f75c..ae29583e 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -15,4 +15,4 @@ jobs: steps: - uses: actions/checkout@v2 - name: Build the Docker image - run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) + run: docker build . --file tobac/tests/Dockerfile --tag my-image-name:$(date +%s) From e24e2f9eabbaac87423a9548dfa8161070450d8e Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 8 Apr 2022 10:17:28 -0600 Subject: [PATCH 47/82] Added 3D segmentation with PBCs. (#21) * Added new 3D and PBC segmentation Added a few new functions and many lines of code to enable a new 'box' 3D seeding approach and proper watershedding/adjustment of segmentation masks across periodic boundaries. The if-else split for the older 'column' seeding approach and new 'box' seeding approach should work (though I have not tested it), but right now the PBC treatment assumes we are using the 'box' approach and will likely have to be adapted to work with the 'column' approach too. There is also a bug in the buddy box seeding currently, as we assume a 5x5x5 seeding box the code will break if the size of the buddy box is less than 5 in any of the dimensions. Generalizing the box approach to be a user-set size and also adding a check for this in the Buddy Box routine should help to fix this, but I wanted to port it over without breaking any of the current functionality before I started exploring that. * Moved some common functions to the utils module * Removed deprecaded code, updated args Removed older versions of segmentation functions which were nominally PBC/3D but had been superseded by new versions of the original functions including these capabilities and more. Also added 'seed_3D_flag' argument to all relevant functions for user choice of 3D feature seeding method * Added missing seed_3D_flag Added missing seed_3D_flag in segmentation function call for segmentation_3D function * Starting to clean up * continuing to clean up code, started process of adding tests * More cleanup of segmentation, adding more tests to segmentation and feature detection * Cleaning up of segmentation code, updating tests, cleaning up other code * More cleaning up of segmentation * Added compatibility of new PBC segmentation with 2D data * fixed more bugs in segmentation with 2D inputs * Adding more tests to PBC segmentation * Added corner test case where it fails * Potential fix to corner point bug * Moved get_pbc_coordinates to utilities so that we can use it for segmentation ultimately Also moved get_label_props_in_dict to inside the PBC flags as it's not used otherwise * Updated docs, added start to test function for 3D box * Added notes to myself * Added comments and todos * Update segmentation.py added blah comment * Revert "Update segmentation.py" This reverts commit 22e22c21f831b3e9d194db9ce703e7db7a046d76. * Added new comments explaining PBC seg approach Added numerous comments pertaining to chunks of the PBC treatment procedure. Also found some likely code redundancies or deprecated pieces, which are noted in these new comments * Updated non PBC seed box code Updated the seed box code using my coordinates getter * Added code coverage files to gitignore * Segmentation now transposes to improve code readability Refactored segmentation to transpose data internally so that z is always first (and then re-transposes after we finish). Added new tests to make sure that this is happening correctly * Added new test to theoretically check buddy box Note that none of my cases trigger the buddy box, which I'm honestly confused by. * Got buddy boxes working for more cases * Added new test that tests buddy box better. Test currently failing, need to fix. * Fixed wall_label overwrite procedure Corrected an issue with multiple overwrites for the PBC wall_labels loop. Added new list "skip_list_thisind" to accumulate labels that have been dealt with during the course of treating the wall_label we are presently on. This ensures that we are not prematurely overwriting our present wall_label and exiting the loop through points for that label. * Fixed buddy box segmentation bug. * fixed github actions dockerfile location Co-authored-by: galexsky <90701223+galexsky@users.noreply.github.com> Co-authored-by: galexsky --- .github/workflows/docker-image.yml | 2 +- .gitignore | 4 +- tobac/feature_detection.py | 93 +- tobac/segmentation.py | 1151 ++++++++++++++++--------- tobac/testing.py | 116 ++- Dockerfile => tobac/tests/Dockerfile | 0 tobac/tests/test_feature_detection.py | 113 +-- tobac/tests/test_segmentation.py | 732 ++++++++++++++++ tobac/tests/test_testing.py | 71 +- tobac/tests/test_tracking.py | 92 +- tobac/tests/test_util.py | 276 ++++++ tobac/tracking.py | 71 +- tobac/utils.py | 328 +++++++ 13 files changed, 2314 insertions(+), 735 deletions(-) rename Dockerfile => tobac/tests/Dockerfile (100%) create mode 100644 tobac/tests/test_segmentation.py create mode 100644 tobac/tests/test_util.py diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 0046f75c..ae29583e 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -15,4 +15,4 @@ jobs: steps: - uses: actions/checkout@v2 - name: Build the Docker image - run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) + run: docker build . --file tobac/tests/Dockerfile --tag my-image-name:$(date +%s) diff --git a/.gitignore b/.gitignore index a9a8efc1..944f9afc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.pyc __pycache__ -.vscode \ No newline at end of file +.vscode +htmlcov +.coverage \ No newline at end of file diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 4027a5f7..de06d701 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd import logging +from . import utils as tb_utils def get_label_props_in_dict(labels): '''Function to get the label properties into a dictionary format. @@ -26,68 +27,6 @@ def get_label_props_in_dict(labels): return region_properties_dict - -def get_indices_of_labels_from_reg_prop_dict(region_property_dict): - '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. - - Parameters - ---------- - region_property_dict: dict of region_property objects - This dict should come from the get_label_props_in_dict function. - - Returns - ------- - dict (key: label number, int) - The number of points in the label number - dict (key: label number, int) - The z indices in the label number. If a 2D property dict is passed, this value is not returned - dict (key: label number, int) - the y indices in the label number - dict (key: label number, int) - the x indices in the label number - - Raises - ------ - ValueError - a ValueError is raised if there are no regions in the region property dict - - ''' - - import skimage.measure - - if len(region_property_dict) ==0: - raise ValueError("No regions!") - - - z_indices = dict() - y_indices = dict() - x_indices = dict() - curr_loc_indices = dict() - is_3D = False - - #loop through all skimage identified regions - for region_prop_key in region_property_dict: - region_prop = region_property_dict[region_prop_key] - index = region_prop.label - if len(region_prop.coords[0])>=3: - is_3D = True - curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) - z_indices[index] = curr_z_ixs - else: - curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) - z_indices[index] = -1 - - y_indices[index] = curr_y_ixs - x_indices[index] = curr_x_ixs - curr_loc_indices[index] = len(curr_y_ixs) - - #print("indices found") - if is_3D: - return [curr_loc_indices, z_indices, y_indices, x_indices] - else: - return [curr_loc_indices, y_indices, x_indices] - - def adjust_pbc_point(in_dim, dim_min, dim_max): '''Function to adjust a point to the other boundary for PBCs @@ -495,7 +434,7 @@ def feature_detection_threshold(data_i,i_time, if num_labels > 0: all_label_props = get_label_props_in_dict(labels) [all_labels_max_size, all_label_locs_v, all_label_locs_h1, all_label_locs_h2 - ] = get_indices_of_labels_from_reg_prop_dict(all_label_props) + ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(all_label_props) #find the points along the boundaries @@ -523,6 +462,8 @@ def feature_detection_threshold(data_i,i_time, wall_labels = np.unique(wall_labels) for label_ind in wall_labels: + #create list for skip labels for this wall label only + skip_list_thisind = [] # 0 isn't a real index if label_ind == 0: continue @@ -560,9 +501,15 @@ def feature_detection_threshold(data_i,i_time, all_label_locs_h1[label_on_corner], all_label_locs_h2[label_on_corner]] = label_ind skip_list = np.append(skip_list,label_on_corner) + skip_list_thisind = np.append(skip_list_thisind,label_on_corner) + + #if it's labeled and has already been dealt with for this label + elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list)) and (np.any(label_on_corner==skip_list_thisind))): + #print("skip_list_thisind label - has already been treated this index") + continue - #if it's labeled and has already been dealt with - elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list))): + #if it's labeled and has already been dealt with via a previous label + elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list)) and (~np.any(label_on_corner==skip_list_thisind))): #find the updated label, and overwrite all of label_ind indices with updated label labels_2_alt = labels_2[label_z,y_val_alt,x_val_alt] labels_2[label_locs_v, @@ -587,9 +534,14 @@ def feature_detection_threshold(data_i,i_time, all_label_locs_h2[label_alt]] = label_ind #we have already dealt with this label. skip_list = np.append(skip_list,label_alt) + skip_list_thisind = np.append(skip_list_thisind,label_alt) + + #if it's labeled and has already been dealt with for this label + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (np.any(label_alt==skip_list_thisind))): + continue #if it's labeled and has already been dealt with - elif((label_alt !=0) and (np.any(label_alt==skip_list))): + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (~np.any(label_alt==skip_list_thisind))): #find the updated label, and overwrite all of label_ind indices with updated label labels_2_alt = labels_2[label_z,y_val_alt,label_x] labels_2[label_locs_v, @@ -612,9 +564,14 @@ def feature_detection_threshold(data_i,i_time, all_label_locs_h2[label_alt]] = label_ind #we have already dealt with this label. skip_list = np.append(skip_list,label_alt) + skip_list_thisind = np.append(skip_list_thisind,label_alt) + + #if it's labeled and has already been dealt with for this label + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (np.any(label_alt==skip_list_thisind))): + continue #if it's labeled and has already been dealt with - elif((label_alt !=0) and (np.any(label_alt==skip_list))): + elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (~np.any(label_alt==skip_list_thisind))): #find the updated label, and overwrite all of label_ind indices with updated label labels_2_alt = labels_2[label_z,label_y,x_val_alt] labels_2[label_locs_v, @@ -639,7 +596,7 @@ def feature_detection_threshold(data_i,i_time, # we need to get label properties again after we handle PBCs. label_props = get_label_props_in_dict(labels) if len(label_props)>0: - [total_indices_all, vdim_indyces_all, hdim1_indices_all, hdim2_indices_all] = get_indices_of_labels_from_reg_prop_dict(label_props) + [total_indices_all, vdim_indyces_all, hdim1_indices_all, hdim2_indices_all] = tb_utils.get_indices_of_labels_from_reg_prop_dict(label_props) #values, count = np.unique(labels[:,:].ravel(), return_counts=True) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 288cb2cd..9196e895 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,13 +1,221 @@ import logging +from operator import is_ -def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance) +from numpy import transpose +from . import utils as tb_utils + +def transfm_pbc_point(in_dim, dim_min, dim_max): + '''Function to transform a PBC-feature point for contiguity + + Parameters + ---------- + in_dim : int + Input coordinate to adjust + dim_min : int + Minimum point for the dimension + dim_max : int + Maximum point for the dimension (inclusive) + + Returns + ------- + int + The transformed point + + ''' + if in_dim < ((dim_min+dim_max)/2): + return in_dim+dim_max+1 + else: + return in_dim -def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance) +def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = None, PBC_flag = 'none'): + '''Adds markers for watershedding using the `features` dataframe + to the marker_arr. + Parameters + ---------- + features: pandas.DataFrame + Features for one point in time to add as markers. + marker_arr: 2D or 3D array-like + Array to add the markers to. Assumes a (z, y, x) configuration. + seed_3D_flag: str('column', 'box') + Seed 3D field at feature positions with either the full column + or a box of user-set size + seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + seed area for each dimension separately. + Note: we recommend the use of odd numbers for this. If you give + an even number, your seed box will be biased and not centered + around the feature. + Note: if two seed boxes overlap, the feature that is seeded will be the + closer feature. + level: slice or None + If `seed_3D_flag` is 'column', the levels at which to seed the + cells for the watershedding algorithm. If None, seeds all levels. + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + 2D or 3D array like (same type as `marker_arr`) + The marker array + ''' + import numpy as np + + # What marker number is the background? Assumed 0. + bg_marker = 0 -def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): + if level is None: + level=slice(None) + + if len(marker_arr.shape)==3: + is_3D = True + z_len = marker_arr.shape[0] + h1_len = marker_arr.shape[1] + h2_len = marker_arr.shape[2] + + else: + is_3D = False + z_len = 0 + h1_len = marker_arr.shape[0] + h2_len = marker_arr.shape[1] + # transpose to 3D array to make things easier. + marker_arr = marker_arr[np.newaxis, :, :] + + if seed_3D_flag == 'column': + for index, row in features.iterrows(): + marker_arr[level,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] + + elif seed_3D_flag == 'box': + # Get the size of the seed box from the input parameter + try: + if is_3D: + seed_z = seed_3D_size[0] + start_num = 1 + else: + start_num = 0 + seed_h1 = seed_3D_size[start_num] + seed_h2 = seed_3D_size[start_num + 1] + except TypeError: + # Not iterable, assume int. + seed_z = seed_3D_size + seed_h1 = seed_3D_size + seed_h2 = seed_3D_size + + for index, row in features.iterrows(): + if is_3D: + # If we have a 3D input and we need to do box seeding + # we need to have 3D features. + try: + row['vdim'] + except KeyError: + raise ValueError("For Box seeding on 3D segmentation," + " you must have a 3D input source.") + + # Because we don't support PBCs on the vertical axis, + # this is simple- just go in the seed_z/2 points around the + # vdim of the feature, up to the limits of the array. + if is_3D: + z_seed_start = int(np.max([0, np.ceil(row['vdim']-seed_z/2)])) + z_seed_end = int(np.min([z_len, np.ceil(row['vdim']+seed_z/2)])) + + # For the horizontal dimensions, it's more complicated if we have + # PBCs. + hdim_1_min = int(np.ceil(row['hdim_1'] - seed_h1/2)) + hdim_1_max = int(np.ceil(row['hdim_1'] + seed_h1/2)) + hdim_2_min = int(np.ceil(row['hdim_2'] - seed_h2/2)) + hdim_2_max = int(np.ceil(row['hdim_2'] + seed_h2/2)) + + all_seed_boxes = tb_utils.get_pbc_coordinates( + h1_min = 0, h1_max = h1_len, + h2_min = 0, h2_max = h2_len, + h1_start_coord = hdim_1_min, h1_end_coord = hdim_1_max, + h2_start_coord = hdim_2_min, h2_end_coord = hdim_2_max, + PBC_flag= PBC_flag) + for seed_box in all_seed_boxes: + # Need to see if there are any other points seeded + # in this seed box first. + curr_box_markers = (marker_arr[z_seed_start:z_seed_end, + seed_box[0]:seed_box[1], + seed_box[2]:seed_box[3]]) + all_feats_in_box = np.unique(curr_box_markers) + if np.any(curr_box_markers!=bg_marker): + # If we have non-background points already seeded, + # we need to find the best way to seed them. + # Currently seeding with the closest point. + # Loop through all points in the box + with np.nditer(curr_box_markers, flags=['multi_index']) as it: + for curr_box_pt in it: + # Get its global index so that we can calculate + # distance and set the array. + local_index = it.multi_index + global_index = (local_index[0]+z_seed_start, + local_index[1] + seed_box[0], + local_index[2] + seed_box[2]) + # If it's a background marker, we can just set it + # with the feature we're working on. + if curr_box_pt == bg_marker: + marker_arr[global_index] = row['feature'] + continue + # it has another feature in it. Calculate the distance + # from its current set feature and the new feature. + if is_3D: + curr_coord = (row['vdim'], row['hdim_1'], row['hdim_2']) + else: + curr_coord = (0, row['hdim_1'], row['hdim_2']) + + dist_from_curr_pt = tb_utils.calc_distance_coords_pbc( + np.array(global_index), np.array(curr_coord), + min_h1 = 0, max_h1 = h1_len, + min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag + ) + + # This is technically an O(N^2) operation, but + # hopefully performance isn't too bad as this should + # be rare. + orig_row = features[features['feature'] == curr_box_pt].iloc[0] + if is_3D: + orig_coord = (orig_row['vdim'], orig_row['hdim_1'], orig_row['hdim_2']) + else: + orig_coord = (0, orig_row['hdim_1'], orig_row['hdim_2']) + dist_from_orig_pt = tb_utils.calc_distance_coords_pbc( + np.array(global_index), np.array(orig_coord), + min_h1 = 0, max_h1 = h1_len, + min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag + ) + # The current point center is further away + # than the original point center, so do nothing + if dist_from_curr_pt > dist_from_orig_pt: + continue + else: + # the current point center is closer. + marker_arr[global_index] = row['feature'] + # completely unseeded region so far. + else: + marker_arr[z_seed_start:z_seed_end, + seed_box[0]:seed_box[1], + seed_box[2]:seed_box[3]]=row['feature'] + + + # If we aren't 3D, transpose back. + if not is_3D: + marker_arr = marker_arr[0, :, :] + + return marker_arr + + +def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) + +def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): + return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) + + +def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column', seed_3D_size=5): """Function performing watershedding for an individual timestep of the data Parameters @@ -26,6 +234,21 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu flag determining the algorithm to use (currently watershedding implemented) max_distance: float maximum distance from a marker allowed to be classified as belonging to that cell + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + seed_3D_flag: str('column', 'box') + Seed 3D field at feature positions with either the full column (default) + or a box of user-set size + seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + seed area for each dimension separately. Note: we recommend the use + of odd numbers for this. If you give an even number, your seed box will be + biased and not centered around the feature. Returns ------- @@ -35,206 +258,57 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep """ from skimage.segmentation import watershed - # from skimage.segmentation import random_walker + import skimage.measure from scipy.ndimage import distance_transform_edt from copy import deepcopy import numpy as np - - # copy feature dataframe for output - features_out=deepcopy(features_in) - # Create cube of the same dimensions and coordinates as input data to store mask: - segmentation_out=1*field_in - segmentation_out.rename('segmentation_mask') - segmentation_out.units=1 - - #Create dask array from input data: - data=field_in.core_data() - - #Set level at which to create "Seed" for each feature in the case of 3D watershedding: - # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) - if level==None: - level=slice(None) - - # transform max_distance in metres to distance in pixels: - if max_distance is not None: - max_distance_pixel=np.ceil(max_distance/dxy) - - # mask data outside region above/below threshold and invert data if tracking maxima: - if target == 'maximum': - unmasked=data>threshold - data_segmentation=-1*data - elif target == 'minimum': - unmasked=data1: raise ValueError('please specify 1 dimensional vertical coordinate') - for index, row in features_in.iterrows(): - if ndim_vertical[0]==0: - markers[:,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - elif ndim_vertical[0]==1: - markers[int(row['hdim_1']),:, int(row['hdim_2'])]=row['feature'] - elif ndim_vertical[0]==2: - markers[int(row['hdim_1']), int(row['hdim_2']),:]=row['feature'] + vertical_coord_axis = ndim_vertical[0] + # Once we know the vertical coordinate, we can resolve the + # horizontal coordinates + # To make things easier, we will transpose the axes + # so that they are consistent. + if vertical_coord_axis == 0: + hdim_1_axis = 1 + hdim_2_axis = 2 + elif vertical_coord_axis == 1: + hdim_1_axis = 0 + hdim_2_axis = 2 + elif vertical_coord_axis == 2: + hdim_1_axis = 0 + hdim_2_axis = 1 else: - raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') - - # set markers in cells not fulfilling threshold condition to zero: - markers[~unmasked]=0 - - # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm - data_segmentation=np.array(data_segmentation) - unmasked=np.array(unmasked) - - # perform segmentation: - if method=='watershed': - segmentation_mask = watershed(np.array(data_segmentation),markers.astype(np.int32), mask=unmasked) -# elif method=='random_walker': -# segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), -# beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) - else: - raise ValueError('unknown method, must be watershed') + raise ValueError('Segmentation routine only possible with 2 or 3 spatial dimensions') - # remove everything from the individual masks that is more than max_distance_pixel away from the markers - if max_distance is not None: - D=distance_transform_edt((markers==0).astype(int)) - segmentation_mask[np.bitwise_and(segmentation_mask>0, D>max_distance_pixel)]=0 - - #Write resulting mask into cube for output - segmentation_out.data=segmentation_mask - - # count number of grid cells asoociated to each tracked cell and write that into DataFrame: - values, count = np.unique(segmentation_mask, return_counts=True) - counts=dict(zip(values, count)) - ncells=np.zeros(len(features_out)) - for i,(index,row) in enumerate(features_out.iterrows()): - if row['feature'] in counts.keys(): - ncells=counts[row['feature']] - features_out['ncells']=ncells - - return segmentation_out,features_out - -def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): - """Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts - - Parameters - ---------- - features: pandas.DataFrame - output from trackpy/maketrack - field: iris.cube.Cube - containing the field to perform the watershedding on - threshold: float - threshold for the watershedding field to be used for the mask - - target: string - Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) - - level slice - levels at which to seed the cells for the watershedding algorithm - method: str ('method') - flag determining the algorithm to use (currently watershedding implemented) - - max_distance: float - Maximum distance from a marker allowed to be classified as belonging to that cell - - Returns - ------- - iris.cube.Cube - Cloud mask, 0 outside and integer numbers according to track inside the cloud - """ - import pandas as pd - from iris.cube import CubeList - - logging.info('Start watershedding 3D') - - # check input for right dimensions: - if not (field.ndim==3 or field.ndim==4): - raise ValueError('input to segmentation step must be 3D or 4D including a time dimension') - if 'time' not in [coord.name() for coord in field.coords()]: - raise ValueError("input to segmentation step must include a dimension named 'time'") - - # CubeList and list to store individual segmentation masks and feature DataFrames with information about segmentation - segmentation_out_list=CubeList() - features_out_list=[] - - #loop over individual input timesteps for segmentation: - field_time=field.slices_over('time') - for i,field_i in enumerate(field_time): - time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) - features_i=features.loc[features['time']==time_i] - segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord) - segmentation_out_list.append(segmentation_out_i) - features_out_list.append(features_out_i) - logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) - - #Merge output from individual timesteps: - segmentation_out=segmentation_out_list.merge_cube() - features_out=pd.concat(features_out_list) - - logging.debug('Finished segmentation') - return segmentation_out,features_out - -def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag=0): - """ - Function performing watershedding for an individual timestep of the data - Parameters: - features: pandas.DataFrame - features for one specific point in time - field: iris.cube.Cube - input field to perform the watershedding on (2D or 3D for one specific point in time) - threshold: float - threshold for the watershedding field to be used for the mas - target: string - switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) - level slice - levels at which to seed the cells for the watershedding algorithm - method: string - flag determining the algorithm to use (currently watershedding implemented) - max_distance: float - maximum distance from a marker allowed to be classified as belonging to that cell - PBC_flag: integer - flag indicating whether to use PBC treatment or not - note to self: should be expanded to account for singly periodic boundaries also - rather than just doubly periodic - - Output: - segmentation_out: iris.cube.Cube - cloud mask, 0 outside and integer numbers according to track inside the clouds - features_out: pandas.DataFrame - feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep - """ - #from skimage.morphology import watershed - import skimage.segmentation._watershed_cy - import skimage.segmentation - from skimage.segmentation import watershed - # from skimage.segmentation import random_walker - from scipy.ndimage import distance_transform_edt, label - from copy import deepcopy - import numpy as np - # copy feature dataframe for output features_out=deepcopy(features_in) # Create cube of the same dimensions and coordinates as input data to store mask: @@ -242,8 +316,21 @@ def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target=' segmentation_out.rename('segmentation_mask') segmentation_out.units=1 - #Create dask array from input data: + # Get raw array from input data: data=field_in.core_data() + is_3D_seg = len(data.shape)==3 + # To make things easier, we will transpose the axes + # so that they are consistent: z, hdim_1, hdim_2 + # We only need to do this for 3D. + transposed_data = False + if is_3D_seg: + if vertical_coord_axis == 1: + data = np.transpose(data, axes=(1, 0, 2)) + transposed_data = True + elif vertical_coord_axis == 2: + data = np.transpose(data, axes=(2, 0, 1)) + transposed_data = True + #Set level at which to create "Seed" for each feature in the case of 3D watershedding: # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) @@ -266,104 +353,10 @@ def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target=' # set markers at the positions of the features: markers = np.zeros(unmasked.shape).astype(np.int32) - - if field_in.ndim==2: #2D watershedding - for index, row in features_in.iterrows(): - markers[int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] - - elif field_in.ndim==3: #3D watershedding - list_coord_names=[coord.name() for coord in field_in.coords()] - #determine vertical axis: - print(list_coord_names) - if vertical_coord=='auto': - list_vertical=['vdim','z','model_level_number','altitude','geopotential_height'] - for coord_name in list_vertical: - if coord_name in list_coord_names: - vertical_axis=coord_name - print(vertical_axis) - break - elif vertical_coord in list_coord_names: - vertical_axis=vertical_coord - else: - raise ValueError('Plese specify vertical coordinate') - ndim_vertical=field_in.coord_dims(vertical_axis) - #print(ndim_vertical,ndim_vertical[0]) - - if len(ndim_vertical)>1: - raise ValueError('please specify 1 dimensional vertical coordinate') - z_len = len(field_in.coord('z').points) - y_len = len(field_in.coord('y').points) - x_len = len(field_in.coord('x').points) - - print(z_len,y_len,x_len) - - for index, row in features_in.iterrows(): - #creation of 5x5x5 point ranges for 3D marker seeding - #instead of seeding whole column as is done in original segmentation - #since this may cause erroneous seeding of unconnected fields - #e.g. cirrus overlaying a discrete convective cloud - - print("feature: ",row['feature']) - #print("z-ctr: ",row['vdim']) - #print("y-ctr: ",row['hdim_1']) - #print("x-ctr: ",row['hdim_2']) - - #proper positioning of box points in z space to avoid going beyond bounds - if(int(row['vdim']) >=2 and int(row['vdim']) <= z_len-3): - z_list = np.arange(int(row['vdim']-2),int(row['vdim']+3)) - elif(int(row['vdim']) < 2): - z_list = np.arange(0,5) - else: - z_list = np.arange(z_len-5,z_len) - - #proper positioning of box points in y space to avoid going beyond bounds - if(int(row['hdim_1']) >=2 and int(row['hdim_1']) <= y_len-3): - y_list = np.arange(int(row['hdim_1']-2),int(row['hdim_1']+3)) - elif(int(row['hdim_1']) < 2): - y_list = np.arange(0,5) - #PBC_y_chk = 1 - else: - y_list = np.arange(y_len-5,y_len) - #PBC_y_chk = 1 - - #proper positioning of box points in x space to avoid going beyond bounds - if(int(row['hdim_2']) >=2 and int(row['hdim_2']) <= x_len-3): - x_list = np.arange(int(row['hdim_2']-2),int(row['hdim_2']+3)) - elif(int(row['hdim_2']) < 2): - x_list = np.arange(0,5) - #PBC_x_chk = 1 - else: - x_list = np.arange(x_len-5,x_len) - #PBC_x_chk = 1 - - #loop thru 5x5x5 z times y times x range to seed markers - for k in range(0,5): - for j in range(0,5): - for i in range(0,5): - - if ndim_vertical[0]==0: - markers[z_list[k],y_list[j],x_list[i]]=row['feature'] - elif ndim_vertical[0]==1: - markers[y_list[j],z_list[k],x_list[i]]=row['feature'] - elif ndim_vertical[0]==2: - markers[y_list[j],x_list[i],z_list[k]]=row['feature'] - - - #print("z_list: ",z_list[:]) - #print("y_list: ",y_list[:]) - #print("x_list: ",x_list[:]) - #print(markers) - #print("unique marker labels: ",np.unique(markers)) - - else: - raise ValueError('Segmentations routine only possible with 2 or 3 spatial dimensions') - + markers = add_markers(features_in, markers, seed_3D_flag, seed_3D_size, level, PBC_flag) # set markers in cells not fulfilling threshold condition to zero: markers[~unmasked]=0 - - #rethinking this - data is padded with zeros, but we should set masked values to something different - #than zeroes as the array is initiated and padded with zeros - #and unmasked points that don't get watershedded are ALSO going to have a mask value equal to zero + #marker_vals = np.unique(markers) # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm data_segmentation=np.array(data_segmentation) @@ -372,9 +365,7 @@ def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target=' # perform segmentation: if method=='watershed': segmentation_mask = watershed(np.array(data_segmentation),markers.astype(np.int32), mask=unmasked) -# elif method=='random_walker': -# segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), -# beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) + else: raise ValueError('unknown method, must be watershed') @@ -382,105 +373,128 @@ def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target=' if max_distance is not None: D=distance_transform_edt((markers==0).astype(int)) segmentation_mask[np.bitwise_and(segmentation_mask>0, D>max_distance_pixel)]=0 - - #print(segmentation_mask.shape) - #print(segmentation_mask[unmasked].shape) - #print(np.where(segmentation_mask == 0 and unmasked == True)) - #z_unm,y_unm,x_unm = np.where(unmasked==True) - #print(np.where(segmentation_mask[z_unm,y_unm,x_unm] == 0)) - + #mask all segmentation_mask points below threshold as -1 #to differentiate from those unmasked points NOT filled by watershedding - print(np.unique(segmentation_mask)) + # TODO: allow user to specify segmentation_mask[~unmasked] = -1 - #z_unf,y_unf,x_unf = np.where(segmentation_mask==0) - - #print(np.where(segmentation_mask==-1)) - #print(np.where(segmentation_mask==0)) + #saves/prints below for testing + seg_m_data = segmentation_mask[:] + - #PBC treatment if-else statements - if PBC_flag == 1: - z_unf,y_unf,x_unf = np.where(segmentation_mask==0) + hdim1_min = 0 + hdim1_max = segmentation_mask.shape[hdim_1_axis] - 1 + hdim2_min = 0 + hdim2_max = segmentation_mask.shape[hdim_2_axis] - 1 + # all options that involve dealing with periodic boundaries + pbc_options = ['hdim_1', 'hdim_2', 'both'] + # Only run this if we need to deal with PBCs + if PBC_flag in pbc_options: + + # read in labeling/masks and region-finding functions + reg_props_dict = tb_utils.get_label_props_in_dict(seg_m_data) + + if not is_3D_seg: + # let's transpose segmentation_mask to a 1,y,x array to make calculations etc easier. + segmentation_mask = segmentation_mask[np.newaxis, :, :] + unmasked = unmasked[np.newaxis, :, :] + data_segmentation = data_segmentation[np.newaxis, :, :] + vertical_coord_axis = 0 + hdim_1_axis = 1 + hdim_2_axis = 2 + + seg_mask_unseeded = np.zeros(segmentation_mask.shape) + + + # Return all indices where segmentation field == 0 + # meaning unfilled but above threshold + # TODO: is there a way to do this without np.where? + vdim_unf,hdim1_unf,hdim2_unf = np.where(segmentation_mask==0) + seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 - seg_mask_unseeded[z_unf,y_unf,x_unf]=1 - - labels_unseeded,label_num = label(seg_mask_unseeded) - - print(label_num) - - markers_2 = np.zeros(unmasked.shape).astype(np.int32) - - print(segmentation_mask.shape) - + # create labeled field of unfilled, unseeded features + labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded, return_num=True) + + markers_2 = np.zeros(data_segmentation.shape).astype(np.int32) + #new, shorter PBC marker seeding approach #loop thru LB points #then check if fillable region (labels_unseeded > 0) #then check if point on other side of boundary is > 0 in segmentation_mask - - for z_ind in range(0,segmentation_mask.shape[0]): - #print("z_ind: ",z_ind) - for y_ind in range(0,segmentation_mask.shape[1]): - for x_ind in [0,segmentation_mask.shape[2]-1]: + ''' + "First pass" at seeding features across the boundaries. This first pass will bring in + eligible (meaning values that are higher than threshold) but not previously watershedded + points across the boundary by seeding them with the appropriate feature across the boundary. + + Later, we will run the second pass or "buddy box" approach that handles cases where points across the boundary + have been watershedded already. + ''' + + # TODO: clean up code. + if PBC_flag == 'hdim_1' or PBC_flag == 'both': + for vdim_ind in range(0,segmentation_mask.shape[0]): + for hdim1_ind in [hdim1_min,hdim1_max]: + for hdim2_ind in range(hdim2_min,hdim2_max): - #print(z_ind,y_ind,x_ind) - #print(labels_unseeded[z_ind,y_ind,x_ind]) - - if(labels_unseeded[z_ind,y_ind,x_ind] == 0): - continue - else: - if x_ind == 0: - if (segmentation_mask[z_ind,y_ind,segmentation_mask.shape[2]-1]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,y_ind,segmentation_mask.shape[2]-1] - #print(z_ind,y_ind,x_ind) - #print("seeded") - elif x_ind == segmentation_mask.shape[2]-1: - if (segmentation_mask[z_ind,y_ind,0]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,y_ind,0] - #print(z_ind,y_ind,x_ind) - #print("seeded") - - - for y_ind in [0,segmentation_mask.shape[1]-1]: - for x_ind in range(0,segmentation_mask.shape[2]): - #print(z_ind,y_ind,x_ind) - #print(labels_unseeded[z_ind,y_ind,x_ind]) + if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + continue + else: + if hdim1_ind == 0: + if (segmentation_mask[vdim_ind,hdim1_max,hdim2_ind]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_max,hdim2_ind] + elif hdim1_ind == hdim1_max: + if (segmentation_mask[vdim_ind,hdim1_min,hdim2_ind]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_min,hdim2_ind] + if PBC_flag == 'hdim_2' or PBC_flag == 'both': + # TODO: This seems quite slow, is there scope for further speedup? + for vdim_ind in range(0,segmentation_mask.shape[0]): + for hdim1_ind in range(hdim1_min,hdim1_max): + for hdim2_ind in [hdim2_min,hdim2_max]: - if(labels_unseeded[z_ind,y_ind,x_ind] == 0): - continue - else: - if y_ind == 0: - if (segmentation_mask[z_ind,segmentation_mask.shape[1]-1,x_ind]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,segmentation_mask.shape[1]-1,x_ind] - #print(z_ind,y_ind,x_ind) - #print("seeded") - elif y_ind == segmentation_mask.shape[1]-1: - if (segmentation_mask[z_ind,0,x_ind]<=0): - continue - else: - markers_2[z_ind,y_ind,x_ind] = segmentation_mask[z_ind,0,x_ind] - #print(z_ind,y_ind,x_ind) - #print("seeded") - - print("PBC cross-boundary markers planted") - print("Beginning PBC segmentation for secondary mask") - + if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + continue + else: + if hdim2_ind == hdim2_min: + if (segmentation_mask[vdim_ind,hdim1_ind,hdim2_max]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_max] + elif hdim2_ind == hdim2_max: + if (segmentation_mask[vdim_ind,hdim1_ind,hdim2_min]<=0): + continue + else: + markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_min] + + # Deal with the opposite corner only + if PBC_flag == 'both': + # TODO: This seems quite slow, is there scope for further speedup? + for vdim_ind in range(0,segmentation_mask.shape[0]): + for hdim1_ind in [hdim1_min, hdim1_max]: + for hdim2_ind in [hdim2_min,hdim2_max]: + # If this point is unseeded and unlabeled + if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + continue + + # Find the opposite point in hdim1 space + hdim1_opposite_corner = (hdim1_min if hdim1_ind == hdim1_max else hdim1_max) + hdim2_opposite_corner = (hdim2_min if hdim2_ind == hdim2_max else hdim2_max) + if segmentation_mask[vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner] <= 0: + continue + + markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[vdim_ind,hdim1_opposite_corner,hdim2_opposite_corner] + markers_2[~unmasked]=0 if method=='watershed': - segmentation_mask_2 = watershed(np.array(data_segmentation),markers_2.astype(np.int32), mask=unmasked) - # elif method=='random_walker': - # segmentation_mask=random_walker(data_segmentation, markers.astype(np.int32), - # beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) + segmentation_mask_2 = watershed(data_segmentation,markers_2.astype(np.int32), mask=unmasked) else: raise ValueError('unknown method, must be watershed') @@ -489,35 +503,375 @@ def segmentation_timestep_3DPBC(field_in,features_in,dxy,threshold=3e-3,target=' D=distance_transform_edt((markers==0).astype(int)) segmentation_mask_2[np.bitwise_and(segmentation_mask_2>0, D>max_distance_pixel)]=0 - print("Sum up original mask and secondary PBC-mask for full PBC segmentation") + # Sum up original mask and secondary PBC-mask for full PBC segmentation + segmentation_mask_3=segmentation_mask + segmentation_mask_2 + + # Secondary seeding complete, now blending periodic boundaries + # keep segmentation mask fields for now so we can save these all later + # for demos of changes + + #update mask coord regions + + ''' + Now, start the second round of watershedding- the "buddy box" approach + buddies contains features of interest and any neighbors that across the boundary or in + physical contact with that label + ''' + # TODO: this can cause a crash if there are no segmentation regions + reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_mask_3) + + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + + wall_labels = np.array([]) + + w_wall = np.unique(segmentation_mask_3[:,:,0]) + wall_labels = np.append(wall_labels,w_wall) + + s_wall = np.unique(segmentation_mask_3[:,0,:]) + wall_labels = np.append(wall_labels,s_wall) + + wall_labels = np.unique(wall_labels) + wall_labels = wall_labels[(wall_labels) > 0].astype(int) + #print(wall_labels) + + # Loop through all segmentation mask labels on the wall + for cur_idx in wall_labels: + print("we have buddies") + + vdim_indices = z_reg_inds[cur_idx] + hdim1_indices = y_reg_inds[cur_idx] + hdim2_indices = x_reg_inds[cur_idx] + + #start buddies array with feature of interest + buddies = np.array([cur_idx],dtype=int) + # Loop through all points in the segmentation mask that we're intertested in + for label_z, label_y, label_x in zip(vdim_indices, hdim1_indices, hdim2_indices): + + # check if this is the special case of being a corner point. + # if it's doubly periodic AND on both x and y boundaries, it's a corner point + # and we have to look at the other corner. + # here, we will only look at the corner point and let the below deal with x/y only. + if PBC_flag == 'both' and (np.any(label_y == [hdim1_min,hdim1_max]) and np.any(label_x == [hdim2_min,hdim2_max])): + + #adjust x and y points to the other side + y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) + x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) + label_on_corner = segmentation_mask_3[label_z,y_val_alt,x_val_alt] + + if((label_on_corner > 0)): + #add opposite-corner buddy if it exists + buddies = np.append(buddies,label_on_corner) + + + # on the hdim1 boundary and periodic on hdim1 + if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [hdim1_min,hdim1_max]): + y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) + + #get the label value on the opposite side + label_alt = segmentation_mask_3[label_z,y_val_alt,label_x] + + #if it's labeled and not already been dealt with + if((label_alt > 0)): + #add above/below buddy if it exists + buddies = np.append(buddies,label_alt) + + if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [hdim2_min,hdim2_max]): + x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) + + #get the seg value on the opposite side + label_alt = segmentation_mask_3[label_z,label_y,x_val_alt] + + #if it's labeled and not already been dealt with + if((label_alt > 0)): + #add left/right buddy if it exists + buddies = np.append(buddies,label_alt) + + + buddies = np.unique(buddies) + + if np.all(buddies==cur_idx): + continue + else: + inter_buddies,feat_inds,buddy_inds=np.intersect1d(features_in.feature.values[:],buddies,return_indices=True) + + # Get features that are needed for the buddy box + buddy_features = deepcopy(features_in.iloc[feat_inds]) + + #create arrays to contain points of all buddies + #and their transpositions/transformations + #for use in Buddy Box space + + #z,y,x points in the grid domain with no transformations + #NOTE: when I think about it, not sure if these are really needed + # as we use the y_a1/x_a1 points for the data transposition + # to the buddy box rather than these and their z2/y2/x2 counterparts + buddy_z = np.array([],dtype=int) + buddy_y = np.array([],dtype=int) + buddy_x = np.array([],dtype=int) + + # z,y,x points from the grid domain WHICH MAY OR MAY NOT BE TRANSFORMED + # so as to be continuous/contiguous across a grid boundary for that dimension + #(e.g., instead of [1496,1497,0,1,2,3] it would be [1496,1497,1498,1499,1500,1501]) + buddy_z2 = np.array([],dtype=int) + buddy_y2 = np.array([],dtype=int) + buddy_x2 = np.array([],dtype=int) + + # These are just for feature positions and are in z2/y2/x2 space + # (may or may not be within real grid domain) + # so that when the buddy box is constructed, seeding is done properly + # in the buddy box space + + #NOTE: We may not need this, as we already do this editing the buddy_features df + # and an iterrows call through this is what's used to actually seed the buddy box + buddy_zf = np.array([],dtype=int) + buddy_yf = np.array([],dtype=int) + buddy_xf = np.array([],dtype=int) + + buddy_looper = 0 + + #loop thru buddies + for buddy in buddies: + + #isolate feature from set of buddies + buddy_feat = features_in[features_in['feature'] == buddy] + + #transform buddy feature position if needed for positioning in z2/y2/x2 space + #MAY be redundant with what is done just below here + yf2 = transfm_pbc_point(int(buddy_feat.hdim_1), hdim1_min, hdim1_max) + xf2 = transfm_pbc_point(int(buddy_feat.hdim_2), hdim2_min, hdim2_max) + + #edit value in buddy_features dataframe + buddy_features.hdim_1.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_1), hdim1_min, hdim1_max) + buddy_features.hdim_2.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_2), hdim2_min, hdim2_max) + + #print(int(buddy_feat.vdim),yf2,xf2) + #display(buddy_features) + + #again, this may be redundant as I don't think we use buddy_zf/yf/xf after this + #in favor of iterrows thru the updated buddy_features + buddy_zf = np.append(buddy_zf,int(buddy_feat.vdim)) + buddy_yf = np.append(buddy_yf,yf2) + buddy_xf = np.append(buddy_xf,xf2) + + buddy_looper = buddy_looper+1 + # Create 1:1 map through actual domain points and continuous/contiguous points + # used to identify buddy box dimension lengths for its construction + for z,y,x in zip(z_reg_inds[buddy],y_reg_inds[buddy],x_reg_inds[buddy]): + + buddy_z = np.append(buddy_z,z) + buddy_y = np.append(buddy_y,y) + buddy_x = np.append(buddy_x,x) + + y2 = transfm_pbc_point(y, hdim1_min, hdim1_max) + x2 = transfm_pbc_point(x, hdim2_min, hdim2_max) + + buddy_z2 = np.append(buddy_z2,z) + buddy_y2 = np.append(buddy_y2,y2) + buddy_x2 = np.append(buddy_x2,x2) + + # Buddy Box! + # Indentify mins and maxes of buddy box continuous points range + # so that box of correct size can be constructred + bbox_zstart = int(np.min(buddy_z2)) + bbox_ystart = int(np.min(buddy_y2)) + bbox_xstart = int(np.min(buddy_x2)) + bbox_zend = int(np.max(buddy_z2)+1) + bbox_yend = int(np.max(buddy_y2)+1) + bbox_xend = int(np.max(buddy_x2)+1) + + bbox_zsize = bbox_zend - bbox_zstart + bbox_ysize = bbox_yend - bbox_ystart + bbox_xsize = bbox_xend - bbox_xstart + + #print(bbox_zsize,bbox_ysize,bbox_xsize) + + # Creation of actual Buddy Box space for transposition + # of data in domain and re-seeding with Buddy feature markers + buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) + ind_ctr = 0 + + #need to loop thru ALL z,y,x inds in buddy box + #not just the ones that have nonzero seg mask values + + # "_a1" points are re-transformations from the continuous buddy box points + # back to original grid/domain space to ensure that the correct data are + # copied to the proper Buddy Box locations + for z in range(bbox_zstart,bbox_zend): + for y in range(bbox_ystart,bbox_yend): + for x in range(bbox_xstart,bbox_xend): + z_a1 = z + if y > hdim1_max: + y_a1 = y - (hdim1_max + 1) + else: + y_a1 = y + + if x > hdim2_max: + x_a1 = x - (hdim2_max + 1) + else: + x_a1 = x + + buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = field_in.data[z_a1,y_a1,x_a1] + + + #construction of iris cube corresponding to buddy box and its data + #for marker seeding and watershedding of buddy box + rgn_cube = iris.cube.Cube(data=buddy_rgn) + + coord_system=None + # TODO: clean this up + h2_coord=iris.coords.DimCoord(np.arange(bbox_xsize), long_name='hdim_2', units='1', bounds=None, attributes=None, coord_system=coord_system) + h1_coord=iris.coords.DimCoord(np.arange(bbox_ysize), long_name='hdim_1', units='1', bounds=None, attributes=None, coord_system=coord_system) + v_coord=iris.coords.DimCoord(np.arange(bbox_zsize), long_name='vdim', units='1', bounds=None, attributes=None, coord_system=coord_system) + + rgn_cube.add_dim_coord(h2_coord,2) + rgn_cube.add_dim_coord(h1_coord,1) + rgn_cube.add_dim_coord(v_coord,0) + #rgn_cube.add_dim_coord(itime,0) + + rgn_cube.units = 'kg kg-1' + + #print(rgn_cube) + #print(rgn_cube.vdim) + + #Update buddy_features feature positions to correspond to buddy box space + #rather than domain space or continuous/contiguous point space + for buddy_looper in range(0,len(buddy_features)): + buddy_features.vdim.values[buddy_looper] = buddy_features.vdim.values[buddy_looper] - bbox_zstart + buddy_features.hdim_1.values[buddy_looper] = buddy_features.hdim_1.values[buddy_looper] - bbox_ystart + buddy_features.hdim_2.values[buddy_looper] = buddy_features.hdim_2.values[buddy_looper] - bbox_xstart + + # Create cube of the same dimensions and coordinates as Buddy Box to store updated mask: + buddies_out=1*rgn_cube + buddies_out.rename('buddies_mask') + buddies_out.units=1 + + #Create dask array from input data: + #data=rgn_cube.core_data() + buddy_data = buddy_rgn + + #All of the below is, I think, the same overarching segmentation procedure as in the original + #segmentation approach until the line which states + # "#transform seg_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")" + # It's just performed on the buddy box and its data rather than our full domain + + #Set level at which to create "Seed" for each feature in the case of 3D watershedding: + # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) + if level==None: + level=slice(None) + + # transform max_distance in metres to distance in pixels: + if max_distance is not None: + max_distance_pixel=np.ceil(max_distance/dxy) + #note - this doesn't consider vertical distance in pixels + + # mask data outside region above/below threshold and invert data if tracking maxima: + if target == 'maximum': + unmasked_buddies=buddy_data>threshold + buddy_segmentation=-1*buddy_data + elif target == 'minimum': + unmasked_buddies=buddy_data0, D>max_distance_pixel)]=0 + - else: - #Write resulting mask into cube for output - segmentation_out.data = segmentation_mask + #mask all segmentation_mask points below threshold as -1 + #to differentiate from those unmasked points NOT filled by watershedding + print(np.unique(segmentation_mask_4)) + segmentation_mask_4[~unmasked_buddies] = -1 + + + #transform seg_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3") + #print(np.unique(test_mask3.data)) + + #loop through buddy box inds and analogous seg mask inds + for z_val in range(bbox_zstart,bbox_zend): + z_seg = z_val - bbox_zstart + z_val_o = z_val + for y_val in range(bbox_ystart,bbox_yend): + y_seg = y_val - bbox_ystart + #y_val_o = y_val + if y_val > hdim1_max: + y_val_o = y_val - (hdim1_max+1) + else: + y_val_o = y_val + for x_val in range(bbox_xstart,bbox_xend): + x_seg = x_val - bbox_xstart + #x_val_o = x_val + if x_val > hdim2_max: + x_val_o = x_val - (hdim2_max+1) + else: + x_val_o = x_val + #print(z_seg,y_seg,x_seg) + #print(z_val,y_val,x_val) + + #fix to + #overwrite IF: + #1) feature of interest + #2) changing to/from feature of interest or adjacent segmented feature + + #We don't want to overwrite other features that may be in the + #buddy box if not contacting the intersected seg field + + if (np.any(segmentation_mask_3[z_val_o,y_val_o,x_val_o]==buddies) and np.any(segmentation_mask_4.data[z_seg,y_seg,x_seg]==buddies)): + #only do updating procedure if old and new values both in buddy set + #and values are different + if(segmentation_mask_3[z_val_o,y_val_o,x_val_o] != segmentation_mask_4.data[z_seg,y_seg,x_seg]): + segmentation_mask_3[z_val_o,y_val_o,x_val_o] = segmentation_mask_4.data[z_seg,y_seg,x_seg] + #print("updated") + if not is_3D_seg: + segmentation_mask_3 = segmentation_mask_3[0] + + segmentation_mask = segmentation_mask_3 + + if transposed_data: + segmentation_mask = np.transpose(segmentation_mask, axes = + [vertical_coord_axis, hdim_1_axis, hdim_2_axis]) + + # Finished PBC checks and new PBC updated segmentation now in segmentation_mask. + #Write resulting mask into cube for output + segmentation_out.data = segmentation_mask # count number of grid cells asoociated to each tracked cell and write that into DataFrame: - print(np.min(segmentation_out.data),np.max(segmentation_out.data)) - - values, count = np.unique(segmentation_out.data, return_counts=True) + values, count = np.unique(segmentation_mask, return_counts=True) counts=dict(zip(values, count)) ncells=np.zeros(len(features_out)) for i,(index,row) in enumerate(features_out.iterrows()): - #print(i,index,row,(index,row)) - #print("pre-if ncells ",ncells) if row['feature'] in counts.keys(): ncells=counts[row['feature']] - #print("in-if ncells ",ncells) - #row['ncells'] == ncells - #features_out['ncells'][i] = ncells features_out['ncells']=ncells - #print("post-if ncells ",ncells) return segmentation_out,features_out -def segmentation_PBC3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag=0): +def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column'): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts @@ -539,6 +893,12 @@ def segmentation_PBC3D(features,field,dxy,threshold=3e-3,target='maximum',level= max_distance: float Maximum distance from a marker allowed to be classified as belonging to that cell + + PBC_flag: string + string flag of 'none', 'hdim_1', 'hdim_2', or 'both' indicating which lateral boundaries are periodic + + seed_3D_flag: string + string flag of 'column' (default) or 'box' which determines the method of seeding feature positions for 3D watershedding Output: segmentation_out: iris.cube.Cube @@ -560,18 +920,39 @@ def segmentation_PBC3D(features,field,dxy,threshold=3e-3,target='maximum',level= features_out_list=[] #loop over individual input timesteps for segmentation: + #OR do segmentation on single timestep + #print(field) field_time=field.slices_over('time') + #print(field_time) + #print(enumerate(field_time)) + time_len = len(field.coord('time').points[:]) + print(time_len) + for i,field_i in enumerate(field_time): - #print("i, field i: ",i,field_i) time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) features_i=features.loc[features['time']==time_i] - #print("time_i, features_i:") #print(time_i) + #print(field_i) #print(features_i) - segmentation_out_i,features_out_i=segmentation_timestep_PBC3D(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag) + segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) segmentation_out_list.append(segmentation_out_i) features_out_list.append(features_out_i) logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) + + #if time_len > 1: + + + + #else: + # time_i=field.coord('time').units.num2date(field.coord('time').points[0]) + # features_i=features.loc[features['time']==time_i] + # print(time_i) + # print(field) + # print(features_i) + # segmentation_out_i,features_out_i=segmentation_timestep(field,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag) + # segmentation_out_list.append(segmentation_out_i) + # features_out_list.append(features_out_i) + # logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) #Merge output from individual timesteps: segmentation_out=segmentation_out_list.merge_cube() diff --git a/tobac/testing.py b/tobac/testing.py index 5a757ca6..ac220a1c 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -7,7 +7,7 @@ def make_simple_sample_data_2D(data_type='iris'): """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 500 in y direction. - Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). + Time resolution is 1 minute and the total length of the dataset is 100 minutes around a arbitrary date (2000-01-01 12:00). The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range. The data contains a single blob travelling on a linear trajectory through the dataset for part of the time. Parameters @@ -359,8 +359,16 @@ def make_sample_data_3D_3blobs(data_type='iris',invert_xy=False): return sample_data -def make_dataset_from_arr(in_arr, data_type = 'xarray'): - '''Makes a dataset (xarray or iris) for feature detection/segmentation from +def make_dataset_from_arr( + in_arr, + data_type="xarray", + time_dim_num=None, + z_dim_num=None, + z_dim_name = 'altitude', + y_dim_num=0, + x_dim_num=1, +): + """Makes a dataset (xarray or iris) for feature detection/segmentation from a raw numpy/dask/etc. array. Parameters @@ -369,20 +377,43 @@ def make_dataset_from_arr(in_arr, data_type = 'xarray'): The input array to convert to iris/xarray data_type: str('xarray' or 'iris') Type of the dataset to return - + time_dim_num: int or None + What axis is the time dimension on, None for a single timestep + z_dim_num: int or None + What axis is the z dimension on, None for a 2D array + z_dim_name: str + What the z dimension name is named + y_dim_num: int + What axis is the y dimension on, typically 0 for a 2D array + x_dim_num: int + What axis is the x dimension on, typically 1 for a 2D array + Returns ------- Iris or xarray dataset with everything we need for feature detection/tracking. - ''' + """ import xarray as xr - + import iris + + if time_dim_num is not None: + raise NotImplementedError("Time dimension not yet implemented in this function") + + is_3D = z_dim_num is not None output_arr = xr.DataArray(in_arr) + if is_3D: + z_max = in_arr.shape[z_dim_num] - if data_type == 'xarray': + if data_type == "xarray": return output_arr - elif data_type == 'iris': - return output_arr.to_iris() + elif data_type == "iris": + out_arr_iris = output_arr.to_iris() + if is_3D: + out_arr_iris.add_dim_coord( + iris.coords.DimCoord(np.arange(0, z_max), standard_name=z_dim_name), + z_dim_num, + ) + return out_arr_iris else: raise ValueError("data_type must be 'xarray' or 'iris'") @@ -445,8 +476,8 @@ def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, start_loc = 1 v_min = 0 v_max = in_arr.shape[start_loc] - start_v = round(max(v_min, v_loc - v_size/2)) - end_v = round(min(v_max-1, v_loc + v_size/2)) + start_v = int(np.ceil(max(v_min, v_loc - v_size / 2))) + end_v = int(np.ceil(min(v_max - 1, v_loc + v_size / 2))) if v_size > v_max - v_min: raise ValueError("v_size larger than domain size") @@ -463,12 +494,12 @@ def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, raise ValueError("Horizontal size larger than domain size") # let's get start/end x/y/z - start_h1 = round(h1_loc - h1_size/2) - end_h1 = round(h1_loc + h1_size/2) - - start_h2 = round(h2_loc - h2_size/2) - end_h2 = round(h2_loc + h2_size/2) + start_h1 = int(np.ceil(h1_loc - h1_size / 2)) + end_h1 = int(np.ceil(h1_loc + h1_size / 2)) + start_h2 = int(np.ceil(h2_loc - h2_size / 2)) + end_h2 = int(np.ceil(h2_loc + h2_size / 2)) + # get the coordinate sets coords_to_fill = get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, start_h1, end_h1, start_h2, end_h2, PBC_flag=PBC_flag) @@ -735,10 +766,10 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, def generate_single_feature(start_h1, start_h2, start_v = None, spd_h1 = 1, spd_h2 = 1, spd_v = 1, - min_h1 = 0, max_h1 = 1000, min_h2 = 0, max_h2 = 1000, + min_h1 = 0, max_h1 = None, min_h2 = 0, max_h2 = None, num_frames = 1, dt = datetime.timedelta(minutes=5), start_date = datetime.datetime(2022,1,1,0), - PBC_flag = 'none', frame_start = 1): + PBC_flag = 'none', frame_start = 1, feature_num=1,): '''Function to generate a dummy feature dataframe to test the tracking functionality Parameters @@ -780,8 +811,14 @@ def generate_single_feature(start_h1, start_h2, start_v = None, 'both' means that we are periodic along both horizontal dimensions frame_start: int Number to start the frame at + feature_num: int + What number to start the feature at ''' + if max_h1 is None or max_h2 is None: + raise ValueError('Max coords must be specified.') + + out_list_of_dicts = list() curr_h1 = start_h1 curr_h2 = start_h2 @@ -799,7 +836,7 @@ def generate_single_feature(start_h1, start_h2, start_v = None, curr_dict['vdim'] = curr_v curr_v += spd_v curr_dict['time'] = curr_dt - + curr_dict["feature"] = feature_num + i curr_h1 += spd_h1 curr_h2 += spd_h2 @@ -808,3 +845,44 @@ def generate_single_feature(start_h1, start_h2, start_v = None, return pd.DataFrame.from_dict(out_list_of_dicts) + +def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc = False): + '''Gets the start and ending points for a feature given a size and PBC + conditions + + Parameters + ---------- + center_point: float + The center point of the feature + size: float + The size of the feature in this dimension + axis_min: int + Minimum point on the axis (usually 0) + axis_max: int + Maximum point on the axis (exclusive). This is 1 after + the last real point on the axis, such that axis_max - axis_min + is the size of the axis + is_pbc: bool + True if we should give wrap around points, false if we shouldn't. + + Returns + ------- + tuple (start_point, end_point) + Note that if is_pbc is True, start_point can be less than axis_min and + end_point can be greater than or equal to axis_max. This is designed to be used with + ```get_pbc_coordinates``` + ''' + import numpy as np + + min_pt = int(np.ceil(center_point - size / 2)) + max_pt = int(np.ceil(center_point + size / 2))\ + + # adjust points for boundaries, if needed. + if min_pt < axis_min and not is_pbc: + min_pt = axis_min + if max_pt > axis_max and not is_pbc: + max_pt = axis_max + + return (min_pt, max_pt) + + diff --git a/Dockerfile b/tobac/tests/Dockerfile similarity index 100% rename from Dockerfile rename to tobac/tests/Dockerfile diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index ef6e4a15..c6d00a0f 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -1,83 +1,46 @@ import tobac.testing import tobac.feature_detection as feat_detect - -def test_get_label_props_in_dict(): - '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. - ''' - import skimage.measure as skim - test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') - test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') - - - # make sure it works for 3D data - labels_3D = skim.label(test_3D_data.values[0]) - - output_3D = feat_detect.get_label_props_in_dict(labels_3D) - - #make sure it is a dict - assert type(output_3D) is dict - #make sure we get at least one output, there should be at least one label. - assert len(output_3D) > 0 - - # make sure it works for 2D data - labels_2D = skim.label(test_2D_data.values[0]) - - output_2D = feat_detect.get_label_props_in_dict(labels_2D) - - #make sure it is a dict - assert type(output_2D) is dict - #make sure we get at least one output, there should be at least one label. - assert len(output_2D) > 0 - - -def test_get_indices_of_labels_from_reg_prop_dict(): - '''Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases. - ''' - import skimage.measure as skim - import numpy as np - test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') - test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') - - - # make sure it works for 3D data - labels_3D = skim.label(test_3D_data.values[0]) - nx_3D = test_3D_data.values[0].shape[2] - ny_3D = test_3D_data.values[0].shape[1] - nz_3D = test_3D_data.values[0].shape[0] - - labels_2D = skim.label(test_2D_data.values[0]) - nx_2D = test_2D_data.values[0].shape[1] - ny_2D = test_2D_data.values[0].shape[0] - - region_props_3D = feat_detect.get_label_props_in_dict(labels_3D) - region_props_2D = feat_detect.get_label_props_in_dict(labels_2D) - - #get_indices_of_labels_from_reg_prop_dict - - [curr_loc_indices, z_indices, y_indices, x_indices] = feat_detect.get_indices_of_labels_from_reg_prop_dict(region_props_3D) - - for index_key in curr_loc_indices: - # there should be at least one value in each. - assert curr_loc_indices[index_key] > 0 - - assert np.all(z_indices[index_key] >= 0) and np.all(z_indices[index_key] < nz_3D) - assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_3D) - assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_3D) - - [curr_loc_indices, y_indices, x_indices] = feat_detect.get_indices_of_labels_from_reg_prop_dict(region_props_2D) - - for index_key in curr_loc_indices: - # there should be at least one value in each. - assert curr_loc_indices[index_key] > 0 - - assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_2D) - assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_2D) - - +import pytest def test_feature_detection_multithreshold_timestep(): ''' Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep ''' - pass \ No newline at end of file + import numpy as np + from tobac import testing + from tobac import feature_detection + + # start by building a simple dataset with a single feature and seeing + # if we identify it + + test_dset_size = (50, 50) + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 20.0 + test_hdim_1_sz = 5 + test_hdim_2_sz = 5 + test_amp = 2 + test_threshs = [ + 1.5, + ] + test_min_num = 2 + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + fd_output = feature_detection.feature_detection_multithreshold_timestep( + test_data_iris, 0, threshold=test_threshs, min_num=test_min_num + ) + + # Make sure we have only one feature + assert len(fd_output.index) == 1 + # Make sure that the location of the feature is correct + assert fd_output.iloc[0]["hdim_1"] == pytest.approx(test_hdim_1_pt) + assert fd_output.iloc[0]["hdim_2"] == pytest.approx(test_hdim_2_pt) \ No newline at end of file diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py new file mode 100644 index 00000000..1509fb4b --- /dev/null +++ b/tobac/tests/test_segmentation.py @@ -0,0 +1,732 @@ +from tracemalloc import start +import pytest +import tobac.testing as testing +import tobac.segmentation as seg + +def test_segmentation_timestep_2D_feature_2D_seg(): + ''' Tests `tobac.segmentation.segmentation_timestep` with a 2D + input feature and a 2D segmentation array + ''' + # Before we can run segmentation, we must run feature detection. + + # start by building a simple dataset with a single feature + import numpy as np + + test_dset_size = (50, 50) + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 20.0 + test_hdim_1_sz = 5 + test_hdim_2_sz = 5 + test_dxy = 1000 + hdim_1_start_feat = int(np.ceil(test_hdim_1_pt - test_hdim_1_sz / 2)) + hdim_1_end_feat = int(np.ceil(test_hdim_1_pt + test_hdim_1_sz / 2)) + hdim_2_start_feat = int(np.ceil(test_hdim_2_pt - test_hdim_2_sz / 2)) + hdim_2_end_feat = int(np.ceil(test_hdim_2_pt + test_hdim_2_sz / 2)) + + test_amp = 2 + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = 20.0, start_h2 = 20.0, + max_h1 = 1000, max_h2 = 1000) + + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, + threshold = 1.5, PBC_flag='none', ) + + # Make sure that all labeled points are segmented + assert np.all(out_seg_mask.core_data()[hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat] == np.ones((test_hdim_1_sz, test_hdim_2_sz))) + + + # Now try PBCs + # First, something stretching across hdim_1 + test_hdim_1_pt = 0.0 + test_data = np.zeros(test_dset_size) + + # Note that PBC flag here is 'both' as we still want the blob to be on both + # sides of the boundary to see if we accidentally grab it without PBC + # segmentation + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + PBC_flag = 'both' + ) + + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, + start_h2 = test_hdim_2_pt, + max_h1 = 1000, max_h2 = 1000 + ) + + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, + test_hdim_1_sz, 0,test_dset_size[0], + is_pbc = True ) + + for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, + threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # This will automatically give the appropriate box, and it's tested separately. + segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], + 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, + hdim_2_end_feat, PBC_flag=pbc_option) + # Make sure that all labeled points are segmented + for seg_box in segmented_box_expected: + assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], + seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + + if pbc_option in ['none', 'hdim_2']: + #there will only be one seg_box + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2])))) + else: + # We should be capturing the whole feature + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) + + # Same as the above test, but for hdim_2 + # First, try the cases where we shouldn't get the points on the opposite + # hdim_2 side + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 0.0 + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + PBC_flag = 'both' + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, + start_h2 = test_hdim_2_pt, + max_h1 = 1000, max_h2 = 1000) + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, + test_hdim_1_sz, 0,test_dset_size[0], + is_pbc = True ) + + hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat(test_hdim_2_pt, + test_hdim_2_sz, 0,test_dset_size[1], + is_pbc = True ) + + for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, + threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # This will automatically give the appropriate box(es), and it's tested separately. + segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], + 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, + hdim_2_end_feat, PBC_flag=pbc_option) + # Make sure that all labeled points are segmented + for seg_box in segmented_box_expected: + assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], + seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + + if pbc_option in ['none', 'hdim_1']: + #there will only be one seg_box + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2])))) + else: + # We should be capturing the whole feature + assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == + np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) + + + + # Same as the above test, but for hdim_2 + # First, try the cases where we shouldn't get the points on the opposite + # both sides (corner point) + test_hdim_1_pt = 0.0 + test_hdim_2_pt = 0.0 + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + amplitude=test_amp, + PBC_flag = 'both' + ) + test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, + start_h2 = test_hdim_2_pt, + max_h1 = 1000, max_h2 = 1000) + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, + test_hdim_1_sz, 0,test_dset_size[0], + is_pbc = True ) + + hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat(test_hdim_2_pt, + test_hdim_2_sz, 0,test_dset_size[1], + is_pbc = True ) + + for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: + out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, + features_in = test_feature_ds, dxy = test_dxy, + threshold = test_amp-0.5, PBC_flag=pbc_option, ) + # This will automatically give the appropriate box(es), and it's tested separately. + segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], + 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, + hdim_2_end_feat, PBC_flag=pbc_option) + # Make sure that all labeled points are segmented + for seg_box in segmented_box_expected: + print(pbc_option, seg_box) + #TODO: something is wrong with this case, unclear what. + assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], + seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + + #TODO: Make sure for none, hdim_1, hdim_2 that only the appropriate points are segmented + + +def test_segmentation_timestep_level(): + """Tests `tobac.segmentation.segmentation_timestep` with a 2D + input feature and a 3D segmentation array, specifying the `level` parameter. + """ + # Before we can run segmentation, we must run feature detection. + + # start by building a simple dataset with a single feature + import numpy as np + + test_dset_size = (20, 50, 50) + test_hdim_1_pt = 20.0 + test_hdim_2_pt = 20.0 + test_vdim_pt = 2 + test_hdim_1_sz = 5 + test_hdim_2_sz = 5 + test_vdim_sz = 3 + test_dxy = 1000 + + vdim_start_feat = int(np.ceil(test_vdim_pt - test_vdim_sz / 2)) + vdim_end_feat = int(np.ceil(test_vdim_pt + test_vdim_sz / 2)) + hdim_1_start_feat = int(np.ceil(test_hdim_1_pt - test_hdim_1_sz / 2)) + hdim_1_end_feat = int(np.ceil(test_hdim_1_pt + test_hdim_1_sz / 2)) + hdim_2_start_feat = int(np.ceil(test_hdim_2_pt - test_hdim_2_sz / 2)) + hdim_2_end_feat = int(np.ceil(test_hdim_2_pt + test_hdim_2_sz / 2)) + + test_amp = 2 + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + test_vdim_pt, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + v_size=test_vdim_sz, + amplitude=test_amp, + ) + + # Make a second feature, above the first. + + delta_height = 8 + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt, + test_hdim_2_pt, + test_vdim_pt + delta_height, + h1_size=test_hdim_1_sz, + h2_size=test_hdim_2_sz, + v_size=test_vdim_sz, + amplitude=test_amp, + ) + + test_data_iris = testing.make_dataset_from_arr( + test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + # Generate dummy feature dataset + test_feature_ds = testing.generate_single_feature(start_h1=20.0, start_h2=20.0, + max_h1 = 1000, max_h2 = 1000) + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + seed_3D_flag= 'column' + ) + out_seg_mask_arr = out_seg_mask.core_data() + # Make sure that all labeled points are segmented, before setting specific levels + assert np.all( + out_seg_mask_arr[ + vdim_start_feat:vdim_end_feat, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.ones((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) + assert np.all( + out_seg_mask_arr[ + vdim_start_feat + delta_height : vdim_end_feat + delta_height, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.ones((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) + + # now set specific levels + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + level=slice(vdim_start_feat, vdim_end_feat), + threshold=1.5, + seed_3D_flag = 'column' + ) + out_seg_mask_arr = out_seg_mask.core_data() + # Make sure that all labeled points are segmented, before setting specific levels + assert np.all( + out_seg_mask_arr[ + vdim_start_feat:vdim_end_feat, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.ones((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) + assert np.all( + out_seg_mask_arr[ + vdim_start_feat + delta_height : vdim_end_feat + delta_height, + hdim_1_start_feat:hdim_1_end_feat, + hdim_2_start_feat:hdim_2_end_feat, + ] + == np.zeros((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) + ) + +@pytest.mark.parametrize("blob_size, shift_pts, seed_3D_size" + ", expected_both_segmented", + [((3,3,3), (0,0,4), 3, False), + ((3,3,3), (0,0,4), 5, False), + ((3,3,3), (0,0,4), 7, True), + ] +) +def test_segmentation_timestep_3d_seed_box_nopbcs(blob_size, shift_pts, + seed_3D_size, expected_both_segmented): + '''Tests ```tobac.segmentation.segmentation_timestep``` + to make sure that the 3D seed box works. + Parameters + ---------- + blob_size: tuple(int, int, int) + Size of the initial blob to add to the domain in (z, y, x) space. + We strongly recommend that these be *odd* numbers. + shift_pts: tuple(int, int, int) + Number of points *relative to the center* to shift the blob in + (z, y, x) space. + seed_3D_size: int or tuple + Seed size to pass to tobac + expected_both_segmented: bool + True if we expect both features to be segmented, false + if we don't expect them both to be segmented + + ''' + + import numpy as np + + # For now, just testing this for no PBCs. + ''' + The best way to do this I think is to create two blobs near (but not touching) + each other, varying the seed_3D_size so that they are either segmented together + or not segmented together. + ''' + test_dset_size = (20, 50, 50) + test_hdim_1_pt_1 = 20.0 + test_hdim_2_pt_1 = 20.0 + test_vdim_pt_1 = 8 + test_dxy = 1000 + test_amp = 2 + + PBC_opt = 'none' + + + test_data = np.zeros(test_dset_size) + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt_1, + test_hdim_2_pt_1, + test_vdim_pt_1, + h1_size=blob_size[1], + h2_size=blob_size[2], + v_size=blob_size[0], + amplitude=test_amp, + ) + + # Make a second feature + test_data = testing.make_feature_blob( + test_data, + test_hdim_1_pt_1 + shift_pts[1], + test_hdim_2_pt_1 + shift_pts[2], + test_vdim_pt_1 + shift_pts[0], + h1_size=blob_size[1], + h2_size=blob_size[2], + v_size=blob_size[0], + amplitude=test_amp, + ) + + test_data_iris = testing.make_dataset_from_arr( + test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + # Generate dummy feature dataset only on the first feature. + test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, + start_h1=test_hdim_1_pt_1, + start_h2=test_hdim_2_pt_1, + max_h1 = 1000, max_h2 = 1000) + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + seed_3D_flag= 'box', + seed_3D_size=seed_3D_size + ) + + second_point_seg = out_seg_mask.core_data()[int(test_vdim_pt_1 + shift_pts[0]), + int(test_hdim_1_pt_1 + shift_pts[1]), + int(test_hdim_2_pt_1 + shift_pts[2])] + # We really only need to check the center point here for this test. + seg_point_overlaps = second_point_seg == 1 + assert seg_point_overlaps == expected_both_segmented + + +@pytest.mark.parametrize("test_dset_size, vertical_axis_num, " + "vertical_coord_name," + " vertical_coord_opt, expected_raise", + [((20,30,40), 0, 'altitude', 'auto', False), + ((20,30,40), 1, 'altitude', 'auto', False), + ((20,30,40), 2, 'altitude', 'auto', False), + ((20,30,40), 0, 'air_pressure', 'air_pressure', False), + ((20,30,40), 0, 'air_pressure', 'auto', True), + ((20,30,40), 0, 'model_level_number', 'auto', False), + ((20,30,40), 0, 'altitude', 'auto', False), + ((20,30,40), 0, 'geopotential_height', 'auto', False) + ] +) +def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name, + vertical_coord_opt, expected_raise): + '''Tests ```tobac.segmentation.segmentation_timestep``` + Tests: + The output is the same no matter what order we have axes in. + A ValueError is raised if an invalid vertical coordinate is + passed in + + Parameters + ---------- + test_dset_size: tuple(int, int, int) + Size of the test dataset + vertical_axis_num: int (0-2, inclusive) + Which axis in test_dset_size is the vertical axis + vertical_coord_name: str + Name of the vertical coordinate. + vertical_coord_opt: str + What to pass in as the vertical coordinate option to segmentation_timestep + expected_raise: bool + True if we expect a ValueError to be raised, false otherwise + ''' + import numpy as np + + # First, just check that input and output shapes are the same. + test_dxy = 1000 + test_vdim_pt_1 = 8 + test_hdim_1_pt_1 = 12 + test_hdim_2_pt_1 = 12 + test_data = np.zeros(test_dset_size) + common_dset_opts = { + 'in_arr': test_data, + 'data_type': 'iris', + 'z_dim_name': vertical_coord_name + } + if vertical_axis_num == 0: + test_data_iris = testing.make_dataset_from_arr( + z_dim_num=0, y_dim_num=1, x_dim_num=2, **common_dset_opts + ) + elif vertical_axis_num == 1: + test_data_iris = testing.make_dataset_from_arr( + z_dim_num=1, y_dim_num=0, x_dim_num=1, **common_dset_opts + ) + elif vertical_axis_num == 2: + test_data_iris = testing.make_dataset_from_arr( + z_dim_num=1, y_dim_num=0, x_dim_num=1, **common_dset_opts + ) + + # Generate dummy feature dataset only on the first feature. + test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, + start_h1=test_hdim_1_pt_1, + start_h2=test_hdim_2_pt_1, + max_h1 = 1000, max_h2 = 1000) + if not expected_raise: + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + vertical_coord=vertical_coord_opt + ) + # Check that shapes don't change. + assert test_data.shape == out_seg_mask.core_data().shape + + else: + # Expecting a raise + with pytest.raises(ValueError): + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + ) +# TODO: add more tests to make sure buddy box code is run. +# From this list right now, I'm not sure why buddy box isn't run actually. +@pytest.mark.parametrize("dset_size, blob_1_loc, blob_1_size, blob_2_loc, blob_2_size," + "shift_domain, seed_3D_size", + [((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), + ((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), + ((20,30,40), (8,1,1), (5,5,5), (8, 28,38), (5,5,5), (0,15,15), None), + ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), None), + ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), (5,5,5)), + ] +) +# TODO: last test fails +def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, blob_2_loc, blob_2_size, + shift_domain, seed_3D_size): + '''Tests ```tobac.segmentation.segmentation_timestep``` + to make sure that the "buddy box" 3D PBC implementation works. + Basic procedure: build a dataset with two features (preferrably on the corner) + and then run segmentation, shift the points, and then run segmentation again. + After shifting back, the results should be identical. + Note: only tests 'both' PBC condition. + Parameters + ---------- + dset_size: tuple(int, int, int) + Size of the domain (assumes z, hdim_1, hdim_2) + blob_1_loc: tuple(int, int, int) + Location of the first blob + blob_1_size: tuple(int, int, int) + Size of the first blob. Note: use odd numbers here. + blob_2_loc: tuple(int, int, int) + Location of the second blob + blob_2_size: tuple(int, int, int) + Size of the second blob. Note: use odd numbers here. + shift_domain: tuple(int, int, int) + How many points to shift the domain by. + seed_3D_size: None, int, or tuple + Seed size to pass to tobac. If None, passes in a column seed + ''' + + import numpy as np + import pandas as pd + + ''' + The best way to do this I think is to create two blobs near (but not touching) + each other, varying the seed_3D_size so that they are either segmented together + or not segmented together. + ''' + test_dxy = 1000 + test_amp = 2 + + test_data = np.zeros(dset_size) + test_data = testing.make_feature_blob( + test_data, + blob_1_loc[1], + blob_1_loc[2], + blob_1_loc[0], + h1_size=blob_1_size[1], + h2_size=blob_1_size[2], + v_size=blob_1_size[0], + amplitude=test_amp, + PBC_flag='both' + + ) + + # Make a second feature + test_data = testing.make_feature_blob( + test_data, + blob_2_loc[1], + blob_2_loc[2], + blob_2_loc[0], + h1_size=blob_2_size[1], + h2_size=blob_2_size[2], + v_size=blob_2_size[0], + amplitude=test_amp, + PBC_flag='both' + ) + + test_data_iris = testing.make_dataset_from_arr( + test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + # Generate dummy feature dataset only on the first feature. + test_feature_ds_1 = testing.generate_single_feature(start_v=blob_1_loc[0], + start_h1=blob_1_loc[1], + start_h2=blob_1_loc[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + feature_num = 1, + PBC_flag='both') + test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0], + start_h1=blob_2_loc[1], + start_h2=blob_2_loc[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + feature_num = 2, + PBC_flag='both') + test_feature_ds = pd.concat([test_feature_ds_1, test_feature_ds_2]) + + common_seg_opts = { + 'dxy': test_dxy, + 'threshold': 1.5, + 'PBC_flag': 'both' + } + if seed_3D_size is None: + common_seg_opts['seed_3D_flag'] = 'column' + else: + common_seg_opts['seed_3D_flag'] = 'box' + common_seg_opts['seed_3D_size'] = seed_3D_size + + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + **common_seg_opts + ) + + # Now, shift the data over and re-run segmentation. + test_data_shifted = np.roll(test_data, shift_domain, axis=(0,1,2)) + test_data_iris_shifted = testing.make_dataset_from_arr( + test_data_shifted, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + test_feature_ds_1 = testing.generate_single_feature(start_v=blob_1_loc[0]+shift_domain[0], + start_h1=blob_1_loc[1]+shift_domain[1], + start_h2=blob_1_loc[2]+shift_domain[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + feature_num = 1, + PBC_flag='both') + test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0]+shift_domain[0], + start_h1=blob_2_loc[1]+shift_domain[1], + start_h2=blob_2_loc[2]+shift_domain[2], + max_h1 = dset_size[1], + max_h2 = dset_size[2], + feature_num = 2, + PBC_flag='both') + test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) + out_seg_mask_shifted, out_df = seg.segmentation_timestep( + field_in=test_data_iris_shifted, + features_in=test_feature_ds_shifted, + **common_seg_opts + ) + + # Now, shift output back. + out_seg_reshifted = np.roll(out_seg_mask_shifted.core_data(), + tuple((-x for x in shift_domain)), axis=(0,1,2)) + + assert np.all(out_seg_mask.core_data() == out_seg_reshifted) + + +@pytest.mark.parametrize("dset_size, feat_1_loc, feat_2_loc," + "shift_domain, seed_3D_size", + [((20,30,40), (8,0,0), (8, 3,3), (0,-8,-8), None), + ((20,30,40), (8,0,0), (8, 3,3), (0,-8,-8), None), + ((20,30,40), (8,1,1), (8, 28,38), (0,15,15), None), + ((20,30,40), (8,0,0), (8, 28,38), (0,-8,-8), None), + ((20,30,40), (8,0,0), (8, 28,38), (0,-8,-8), (5,5,5)), + ] +) +# TODO: last test fails +def test_add_markers_pbcs(dset_size,feat_1_loc, feat_2_loc, shift_domain, seed_3D_size): + '''Tests ```tobac.segmentation.add_markers``` + to make sure that adding markers works and is consistent across PBCs + Parameters + ---------- + dset_size: tuple(int, int, int) or (int, int) + Size of the domain (assumes z, hdim_1, hdim_2) or (hdim_1, hdim_2) + feat_1_loc: tuple, same length as dset_size + Location of the first blob + feat_2_loc: tuple, same length as dset_size + Location of the second blob + shift_domain: tuple, same length as dset_size + How many points to shift the domain by. + seed_3D_size: None, int, or tuple + Seed size to pass to tobac. If None, passes in a column seed + ''' + + import numpy as np + import pandas as pd + + + if len(dset_size) == 2: + is_3D = False + start_h1_ax = 0 + else: + is_3D = True + start_h1_ax = 1 + + common_feat_opts = { + 'PBC_flag': 'both', + 'max_h1': dset_size[start_h1_ax], + 'max_h2': dset_size[start_h1_ax + 1] + } + + + # Generate dummy feature dataset only on the first feature. + test_feature_ds_1 = testing.generate_single_feature(start_v=feat_1_loc[0], + start_h1=feat_1_loc[1], + start_h2=feat_1_loc[2], + feature_num = 1, + **common_feat_opts) + test_feature_ds_2 = testing.generate_single_feature(start_v=feat_2_loc[0], + start_h1=feat_2_loc[1], + start_h2=feat_2_loc[2], + feature_num = 2, + **common_feat_opts) + test_feature_ds = pd.concat([test_feature_ds_1, test_feature_ds_2]) + + common_marker_opts = dict() + common_marker_opts['PBC_flag'] = 'both' + + if seed_3D_size is None: + common_marker_opts['seed_3D_flag'] = 'column' + else: + common_marker_opts['seed_3D_flag'] = 'box' + common_marker_opts['seed_3D_size'] = seed_3D_size + + marker_arr = seg.add_markers(test_feature_ds, np.zeros(dset_size), **common_marker_opts) + + # Now, shift the data over and re-run markers. + test_feature_ds_1 = testing.generate_single_feature(start_v=feat_1_loc[0]+shift_domain[0], + start_h1=feat_1_loc[1]+shift_domain[1], + start_h2=feat_1_loc[2]+shift_domain[2], + feature_num = 1, + **common_feat_opts) + test_feature_ds_2 = testing.generate_single_feature(start_v=feat_2_loc[0]+shift_domain[0], + start_h1=feat_2_loc[1]+shift_domain[1], + start_h2=feat_2_loc[2]+shift_domain[2], + feature_num = 2, + **common_feat_opts) + test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) + + marker_arr_shifted = seg.add_markers(test_feature_ds_shifted, np.zeros(dset_size), + **common_marker_opts) + + + # Now, shift output back. + marker_arr_reshifted = np.roll(marker_arr_shifted, + tuple((-x for x in shift_domain)), axis=(0,1,2)) + + assert np.all(marker_arr == marker_arr_reshifted) + diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py index 5f42df88..12fc7772 100644 --- a/tobac/tests/test_testing.py +++ b/tobac/tests/test_testing.py @@ -243,43 +243,43 @@ def test_generate_single_feature(): # Testing a simple 3D case expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)} + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} ]) - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) + assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 2D case expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)} + {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} ]) - assert_frame_equal(generate_single_feature(1, 1, frame_start = 0).sort_index(axis=1), expected_df.sort_index(axis=1)) + assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 2D case with movement expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, num_frames=4, - spd_h1 = 1, spd_h2 = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) + spd_h1 = 1, spd_h2 = 1, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 3D case with movement expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, num_frames=4, - spd_h1 = 1, spd_h2 = 1, spd_v = 1).sort_index(axis=1), expected_df.sort_index(axis=1)) + spd_h1 = 1, spd_h2 = 1, spd_v = 1, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) # Testing a simple 3D case with movement that passes the hdim_1 boundary expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1, 0, 0)}, - {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1, 0, 5)}, - {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1, 0, 10)}, - {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1, 0, 15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1, 0, 0)}, + {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1, 0, 5)}, + {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1, 0, 10)}, + {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1, 0, 15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, @@ -288,10 +288,10 @@ def test_generate_single_feature(): # Testing a simple 3D case with movement that passes the hdim_1 boundary expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, @@ -300,10 +300,10 @@ def test_generate_single_feature(): # Testing a simple 3D case with movement that passes the hdim_2 boundary expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 2, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 3, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 4, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, @@ -312,12 +312,23 @@ def test_generate_single_feature(): # Testing a simple 3D case with movement that passes the hdim_1 and hdim_2 boundaries expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 6, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 1, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 6, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'time': datetime.datetime(2022, 1, 1,0,15)}, + {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, + {'hdim_1': 6, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, + {'hdim_1': 1, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, + {'hdim_1': 6, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, ]) assert_frame_equal(generate_single_feature(1, 1, start_v = 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, frame_start = 0, num_frames=4, spd_h1 = 5, spd_h2 = 4, spd_v = 1, PBC_flag='both').sort_index(axis=1), expected_df.sort_index(axis=1)) + +@pytest.mark.parametrize("in_pt,in_sz,axis_size,out_pts", + [(3, 0,(0,5), (3,3)), + (3, 3,(0,5), (2,5)), + ] +) +def test_get_start_end_of_feat_nopbc(in_pt, in_sz, axis_size, out_pts): + '''Tests ```tobac.testing.get_start_end_of_feat``` + + ''' + assert tbtest.get_start_end_of_feat(in_pt, in_sz, axis_size[0], axis_size[1]) == out_pts \ No newline at end of file diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index 6c6a27d7..267998fc 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -32,7 +32,7 @@ def test_linking_trackpy(): PBC_flag = 'none' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -51,7 +51,7 @@ def test_linking_trackpy(): PBC_flag = 'none' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -71,7 +71,7 @@ def test_linking_trackpy(): PBC_flag = 'hdim_1' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -90,7 +90,7 @@ def test_linking_trackpy(): PBC_flag = 'hdim_2' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -109,7 +109,7 @@ def test_linking_trackpy(): PBC_flag = 'both' ) # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'time', 'cell']] + actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) @@ -124,85 +124,3 @@ def test_build_distance_function(): assert (test_func(np.array((0,9,9)), np.array((0,0,0))) == pytest.approx(1.4142135)) -def test_calc_distance_coords_pbc(): - '''Tests ```tobac.tracking.calc_distance_coords_pbc``` - Currently tests: - two points in normal space - Periodicity along hdim_1, hdim_2, and corners - ''' - - # Test first two points in normal space with varying PBC conditions - for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(0)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0)), np.array((0,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((3,3,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(4.3588989, rel=1e-3)) - - # Now test two points that will be closer along the hdim_1 boundary for cases without PBCs - for PBC_condition in ['hdim_1', 'both']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(2)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(3.3166247)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(3.3166247)) - - - - # Test the same points, except without PBCs - for PBC_condition in ['none', 'hdim_2']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(8)) - - # Now test two points that will be closer along the hdim_2 boundary for cases without PBCs - for PBC_condition in ['hdim_2', 'both']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(2)) - - # Test the same points, except without PBCs - for PBC_condition in ['none', 'hdim_1']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(8)) - - # Test points that will be closer for the both - PBC_condition = 'both' - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1.4142135, rel=1e-3)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1.4142135, rel=1e-3)) - - # Test the corner points for no PBCs - PBC_condition = 'none' - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(12.727922, rel=1e-3)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(12.727922, rel=1e-3)) - - # Test the corner points for hdim_1 and hdim_2 - for PBC_condition in ['hdim_1', 'hdim_2']: - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9.055385)) - assert (tobac.tracking.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9.055385)) - - diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py new file mode 100644 index 00000000..1ffccfef --- /dev/null +++ b/tobac/tests/test_util.py @@ -0,0 +1,276 @@ +import pytest +import tobac.testing +import tobac.utils as tb_utils +from collections import Counter + + +def lists_equal_without_order(a, b): + """ + This will make sure the inner list contain the same, + but doesn't account for duplicate groups. + from: https://stackoverflow.com/questions/31501909/assert-list-of-list-equality-without-order-in-python/31502000 + """ + for l1 in a: + check_counter = Counter(l1) + if not any(Counter(l2) == check_counter for l2 in b): + return False + return True + + +def test_get_label_props_in_dict(): + '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. + ''' + import skimage.measure as skim + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + + + # make sure it works for 3D data + labels_3D = skim.label(test_3D_data.values[0]) + + output_3D = tb_utils.get_label_props_in_dict(labels_3D) + + #make sure it is a dict + assert type(output_3D) is dict + #make sure we get at least one output, there should be at least one label. + assert len(output_3D) > 0 + + # make sure it works for 2D data + labels_2D = skim.label(test_2D_data.values[0]) + + output_2D = tb_utils.get_label_props_in_dict(labels_2D) + + #make sure it is a dict + assert type(output_2D) is dict + #make sure we get at least one output, there should be at least one label. + assert len(output_2D) > 0 + + +def test_get_indices_of_labels_from_reg_prop_dict(): + '''Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases. + ''' + import skimage.measure as skim + import numpy as np + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + + + # make sure it works for 3D data + labels_3D = skim.label(test_3D_data.values[0]) + nx_3D = test_3D_data.values[0].shape[2] + ny_3D = test_3D_data.values[0].shape[1] + nz_3D = test_3D_data.values[0].shape[0] + + labels_2D = skim.label(test_2D_data.values[0]) + nx_2D = test_2D_data.values[0].shape[1] + ny_2D = test_2D_data.values[0].shape[0] + + region_props_3D = tb_utils.get_label_props_in_dict(labels_3D) + region_props_2D = tb_utils.get_label_props_in_dict(labels_2D) + + #get_indices_of_labels_from_reg_prop_dict + + [curr_loc_indices, z_indices, y_indices, x_indices] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_3D) + + for index_key in curr_loc_indices: + # there should be at least one value in each. + assert curr_loc_indices[index_key] > 0 + + assert np.all(z_indices[index_key] >= 0) and np.all(z_indices[index_key] < nz_3D) + assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_3D) + assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_3D) + + [curr_loc_indices, y_indices, x_indices] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_2D) + + for index_key in curr_loc_indices: + # there should be at least one value in each. + assert curr_loc_indices[index_key] > 0 + + assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_2D) + assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_2D) + + + +def test_calc_distance_coords_pbc(): + '''Tests ```tobac.utils.calc_distance_coords_pbc``` + Currently tests: + two points in normal space + Periodicity along hdim_1, hdim_2, and corners + ''' + import numpy as np + + # Test first two points in normal space with varying PBC conditions + for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(0)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0)), np.array((0,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((3,3,1)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(4.3588989, rel=1e-3)) + + # Now test two points that will be closer along the hdim_1 boundary for cases without PBCs + for PBC_condition in ['hdim_1', 'both']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(2)) + assert (tb_utils.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(3.3166247)) + assert (tb_utils.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(3.3166247)) + + + + # Test the same points, except without PBCs + for PBC_condition in ['none', 'hdim_2']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(8)) + + # Now test two points that will be closer along the hdim_2 boundary for cases without PBCs + for PBC_condition in ['hdim_2', 'both']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(2)) + + # Test the same points, except without PBCs + for PBC_condition in ['none', 'hdim_1']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(8)) + + # Test points that will be closer for the both + PBC_condition = 'both' + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1.4142135, rel=1e-3)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(1.4142135, rel=1e-3)) + + # Test the corner points for no PBCs + PBC_condition = 'none' + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(12.727922, rel=1e-3)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(12.727922, rel=1e-3)) + + # Test the corner points for hdim_1 and hdim_2 + for PBC_condition in ['hdim_1', 'hdim_2']: + assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9.055385)) + assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) + == pytest.approx(9.055385)) + + +@pytest.mark.parametrize("loc_1, loc_2, bounds, PBC_flag, expected_dist", + [((0,0,0), (0,0,9), (0, 10, 0, 10), 'both', 1), + ] +) +def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected_dist): + '''Tests ```tobac.utils.calc_distance_coords_pbc``` in a parameterized way + + Parameters + ---------- + loc_1: tuple + First point location, either in 2D or 3D space (assumed z, h1, h2) + loc_2: tuple + Second point location, either in 2D or 3D space (assumed z, h1, h2) + bounds: tuple + hdim_1/hdim_2 bounds as (h1_min, h1_max, h2_min, h2_max) + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + expected_dist: float + Expected distance between the two points + ''' + import numpy as np + + assert (tb_utils.calc_distance_coords_pbc(np.array(loc_1), np.array(loc_2), bounds[0], bounds[1], + bounds[2], bounds[3], PBC_flag)== pytest.approx(expected_dist)) + + +def test_get_pbc_coordinates(): + '''Tests tobac.util.get_pbc_coordinates. + Currently runs the following tests: + For an invalid PBC_flag, we raise an error + For PBC_flag of 'none', we truncate the box and give a valid box. + + ''' + + with pytest.raises(ValueError): + tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'c') + + # Test PBC_flag of none + + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'none') == [(1, 4, 1, 4),]) + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'none') == [(0, 4, 1, 4),]) + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, 1, 4, 'none') == [(1, 10, 1, 4),]) + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, -1, 4, 'none') == [(1, 10, 0, 4),]) + + # Test PBC_flag with hdim_1 + # Simple case, no PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_1') == [(1, 4, 1, 4),]) + # PBC going on the min side + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'hdim_1') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + # PBC going on the min side; should be truncated in hdim_2. + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, -1, 4, 'hdim_1') == [(0, 4, 0, 4), (9, 10, 0, 4)]) + # PBC going on the max side only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'hdim_1') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + # PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'hdim_1') == [(0, 10, 1, 4),]) + + # Test PBC_flag with hdim_2 + # Simple case, no PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_2') == [(1, 4, 1, 4),]) + # PBC going on the min side + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'hdim_2') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + # PBC going on the min side with truncation in hdim_1 + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 4, -1, 4, 'hdim_2') == [(0, 4, 0, 4), (0, 4, 9, 10)]) + # PBC going on the max side + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'hdim_2') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + # PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'hdim_2') == [(1, 4, 0, 10),]) + + # Test PBC_flag with both + # Simple case, no PBC overlapping + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'both') == [(1, 4, 1, 4),]) + # hdim_1 only testing + # PBC on the min side of hdim_1 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'both') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + # PBC on the max side of hdim_1 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'both') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + # PBC overlapping on max side of hdim_1 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'both') == [(0, 10, 1, 4),]) + # hdim_2 only testing + # PBC on the min side of hdim_2 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'both') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + # PBC on the max side of hdim_2 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'both') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + # PBC overlapping on max side of hdim_2 only + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'both') == [(1, 4, 0, 10),]) + # hdim_1 and hdim_2 testing simultaneous + # both larger than the actual domain + assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 12, -4, 14, 'both') == [(0, 10, 0, 10),]) + # min in hdim_1 and hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, -4, 2, 'both'), [(0, 3, 0, 2), (0, 3, 6, 10), (7, 10, 6, 10), (7, 10, 0, 2)])) + # max in hdim_1, min in hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, -4, 2, 'both'), [(5, 10, 0, 2), (5, 10, 6, 10), (0, 2, 6, 10), (0, 2, 0, 2)])) + # max in hdim_1 and hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, 'both'), [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)])) + # min in hdim_1, max in hdim_2 + assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, 'both'), [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)])) diff --git a/tobac/tracking.py b/tobac/tracking.py index 12e7ebd7..4a699959 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -2,17 +2,7 @@ import numpy as np import pandas as pd import math - - -def njit_if_available(func, **kwargs): - '''Decorator to wrap a function with numba.njit if available. - If numba isn't available, it just returns the function. - ''' - try: - from numba import njit - return njit(func, kwargs) - except ModuleNotFoundError: - return func +from . import utils as tb_utils @@ -340,63 +330,6 @@ def build_distance_function(min_h1, max_h1, min_h2, max_h2, PBC_flag): ''' import functools - return functools.partial(calc_distance_coords_pbc, + return functools.partial(tb_utils.calc_distance_coords_pbc, min_h1 = min_h1, max_h1 = max_h1, min_h2 = min_h2, max_h2 = max_h2, PBC_flag = PBC_flag) - -@njit_if_available -def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, - PBC_flag): - '''Function to calculate the distance between cartesian - coordinate set 1 and coordinate set 2. Note that we assume both - coordinates are within their min/max already. - - Parameters - ---------- - coords_1: 2D or 3D array-like - Set of coordinates passed in from trackpy of either (vdim, hdim_1, hdim_2) - coordinates or (hdim_1, hdim_2) coordinates. - coords_2: 2D or 3D array-like - Similar to coords_1, but for the second pair of coordinates - min_h1: int - Minimum point in hdim_1 - max_h1: int - Maximum point in hdim_1 - min_h2: int - Minimum point in hdim_2 - max_h2: int - Maximum point in hdim_2 - PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') - Sets whether to use periodic boundaries, and if so in which directions. - 'none' means that we do not have periodic boundaries - 'hdim_1' means that we are periodic along hdim1 - 'hdim_2' means that we are periodic along hdim2 - 'both' means that we are periodic along both horizontal dimensions - - Returns - ------- - float - Distance between coords_1 and coords_2 in cartesian space. - - ''' - is_3D = len(coords_1)== 3 - size_h1 = max_h1 - min_h1 - size_h2 = max_h2 - min_h2 - - if not is_3D: - # Let's make the accounting easier. - coords_1 = np.array((0, coords_1[0], coords_1[1])) - coords_2 = np.array((0, coords_2[0], coords_2[1])) - - if PBC_flag in ['hdim_1', 'both']: - mod_h1 = size_h1 - else: - mod_h1 = 0 - if PBC_flag in ['hdim_2', 'both']: - mod_h2 = size_h2 - else: - mod_h2 = 0 - max_dims = np.array((0, mod_h1, mod_h2)) - deltas = np.abs(coords_1 - coords_2) - deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) - return np.sqrt(np.sum(deltas**2)) diff --git a/tobac/utils.py b/tobac/utils.py index bdcc7c93..3428b5ba 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1,4 +1,6 @@ import logging +import numpy as np + def column_mask_from2D(mask_2D,cube,z_coord='model_level_number'): '''function to turn 2D watershedding mask into a 3D mask of selected columns @@ -638,3 +640,329 @@ def get_spacings(field_in,grid_spacing=None,time_spacing=None): # use value of time_spacing for dt: dt=time_spacing return dxy,dt + +def get_label_props_in_dict(labels): + '''Function to get the label properties into a dictionary format. + + Parameters + ---------- + labels: 2D or 3D array-like + comes from the `skimage.measure.label` function + + Returns + ------- + dict + output from skimage.measure.regionprops in dictionary format, where they key is the label number + ''' + import skimage.measure + + region_properties_raw = skimage.measure.regionprops(labels) + region_properties_dict = dict() + for region_prop in region_properties_raw: + region_properties_dict[region_prop.label] = region_prop + + return region_properties_dict + +def get_indices_of_labels_from_reg_prop_dict(region_property_dict): + '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. + + Parameters + ---------- + region_property_dict: dict of region_property objects + This dict should come from the get_label_props_in_dict function. + + Returns + ------- + dict (key: label number, int) + The number of points in the label number + dict (key: label number, int) + The z indices in the label number. If a 2D property dict is passed, this value is not returned + dict (key: label number, int) + the y indices in the label number + dict (key: label number, int) + the x indices in the label number + + Raises + ------ + ValueError + a ValueError is raised if there are no regions in the region property dict + + ''' + + import skimage.measure + import numpy as np + + if len(region_property_dict) ==0: + raise ValueError("No regions!") + + + z_indices = dict() + y_indices = dict() + x_indices = dict() + curr_loc_indices = dict() + is_3D = False + + #loop through all skimage identified regions + for region_prop_key in region_property_dict: + region_prop = region_property_dict[region_prop_key] + index = region_prop.label + if len(region_prop.coords[0])>=3: + is_3D = True + curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = curr_z_ixs + else: + curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) + z_indices[index] = -1 + + y_indices[index] = curr_y_ixs + x_indices[index] = curr_x_ixs + curr_loc_indices[index] = len(curr_y_ixs) + + #print("indices found") + if is_3D: + return [curr_loc_indices, z_indices, y_indices, x_indices] + else: + return [curr_loc_indices, y_indices, x_indices] + +def adjust_pbc_point(in_dim, dim_min, dim_max): + '''Function to adjust a point to the other boundary for PBCs + + Parameters + ---------- + in_dim : int + Input coordinate to adjust + dim_min : int + Minimum point for the dimension + dim_max : int + Maximum point for the dimension (inclusive) + + Returns + ------- + int + The adjusted point on the opposite boundary + + Raises + ------ + ValueError + If in_dim isn't on one of the boundary points + ''' + if in_dim == dim_min: + return dim_max + elif in_dim == dim_max: + return dim_min + else: + raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") + + +def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, + h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord, + PBC_flag = 'none'): + '''Function to get the *actual* coordinate boxes of interest given a set of shifted + coordinates with periodic boundaries. + + For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + (-3, 5, 2,6) with PBC_flag of 'both' or 'hdim_1', h1_max of 10, and h1_min of 0 + this function will return: [(0,5,2,6), (7,10,2,6)]. + + If you pass in something outside the bounds of the array, this will truncate your + requested box. For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + (-3, 5, 2,6) with PBC_flag of 'none' or 'hdim_2', this function will return: + [(0,5,2,6)], assuming h1_min is 0. + + For cases where PBC_flag is 'both' and we have a corner case, it is possible + to get overlapping boundaries. For example, if you pass in (-6, 5, -6, 5) + + Parameters + ---------- + h1_min: int + Minimum array value in hdim_1, typically 0. + h1_max: int + Maximum array value in hdim_1 (exclusive). h1_max - h1_min should be the size in h1. + h2_min: int + Minimum array value in hdim_2, typically 0. + h2_max: int + Maximum array value in hdim_2 (exclusive). h2_max - h2_min should be the size in h2. + h1_start_coord: int + Start coordinate in hdim_1. Can be < h1_min if dealing with PBCs. + h1_end_coord: int + End coordinate in hdim_1. Can be >= h1_max if dealing with PBCs. + h2_start_coord: int + Start coordinate in hdim_2. Can be < h2_min if dealing with PBCs. + h2_end_coord: int + End coordinate in hdim_2. Can be >= h2_max if dealing with PBCs. + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + list of tuples + A list of tuples containing (h1_start, h1_end, h2_start, h2_end) of each of the + boxes needed to encompass the coordinates. + ''' + + if PBC_flag not in ['none', 'hdim_1', 'hdim_2', 'both']: + raise ValueError("PBC_flag must be 'none', 'hdim_1', 'hdim_2', or 'both'") + + + h1_start_coords = list() + h1_end_coords = list() + h2_start_coords = list() + h2_end_coords = list() + + + # In both of these cases, we just need to truncate the hdim_1 points. + if PBC_flag in ['none', 'hdim_2']: + h1_start_coords.append(max(h1_min, h1_start_coord)) + h1_end_coords.append(min(h1_max, h1_end_coord)) + + + # In both of these cases, we only need to truncate the hdim_2 points. + if PBC_flag in ['none', 'hdim_1']: + h2_start_coords.append(max(h2_min, h2_start_coord)) + h2_end_coords.append(min(h2_max, h2_end_coord)) + + # If the PBC flag is none, we can just return. + if PBC_flag == 'none': + return [(h1_start_coords[0], h1_end_coords[0], h2_start_coords[0], h2_end_coords[0])] + + # We have at least one periodic boundary. + + # hdim_1 boundary is periodic. + if PBC_flag in ['hdim_1', 'both']: + if (h1_end_coord - h1_start_coord) >= (h1_max - h1_min): + # In this case, we have selected the full h1 length of the domain, + # so we set the start and end coords to just that. + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_max) + + # We know we only have either h1_end_coord > h1_max or h1_start_coord < h1_min + # and not both. If both are true, the previous if statement should trigger. + elif h1_start_coord < h1_min: + # First set of h1 start coordinates + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_end_coord) + # Second set of h1 start coordinates + pts_from_begin = h1_min - h1_start_coord + h1_start_coords.append(h1_max - pts_from_begin) + h1_end_coords.append(h1_max) + + elif h1_end_coord > h1_max: + h1_start_coords.append(h1_start_coord) + h1_end_coords.append(h1_max) + pts_from_end = h1_end_coord - h1_max + h1_start_coords.append(h1_min) + h1_end_coords.append(h1_min + pts_from_end) + + # We have no PBC-related issues, actually + else: + h1_start_coords.append(h1_start_coord) + h1_end_coords.append(h1_end_coord) + + if PBC_flag in ['hdim_2', 'both']: + if (h2_end_coord - h2_start_coord) >= (h2_max - h2_min): + # In this case, we have selected the full h2 length of the domain, + # so we set the start and end coords to just that. + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_max) + + # We know we only have either h1_end_coord > h1_max or h1_start_coord < h1_min + # and not both. If both are true, the previous if statement should trigger. + elif h2_start_coord < h2_min: + # First set of h1 start coordinates + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_end_coord) + # Second set of h1 start coordinates + pts_from_begin = h2_min - h2_start_coord + h2_start_coords.append(h2_max - pts_from_begin) + h2_end_coords.append(h2_max) + + elif h2_end_coord > h2_max: + h2_start_coords.append(h2_start_coord) + h2_end_coords.append(h2_max) + pts_from_end = h2_end_coord - h2_max + h2_start_coords.append(h2_min) + h2_end_coords.append(h2_min + pts_from_end) + + # We have no PBC-related issues, actually + else: + h2_start_coords.append(h2_start_coord) + h2_end_coords.append(h2_end_coord) + + out_coords = list() + for h1_start_coord_single, h1_end_coord_single in zip(h1_start_coords, h1_end_coords): + for h2_start_coord_single, h2_end_coord_single in zip(h2_start_coords, h2_end_coords): + out_coords.append((h1_start_coord_single, h1_end_coord_single, h2_start_coord_single, h2_end_coord_single)) + return out_coords + +def njit_if_available(func, **kwargs): + '''Decorator to wrap a function with numba.njit if available. + If numba isn't available, it just returns the function. + ''' + try: + from numba import njit + return njit(func, kwargs) + except ModuleNotFoundError: + return func + + +@njit_if_available +def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, + PBC_flag): + '''Function to calculate the distance between cartesian + coordinate set 1 and coordinate set 2. Note that we assume both + coordinates are within their min/max already. + + Parameters + ---------- + coords_1: 2D or 3D array-like + Set of coordinates passed in from trackpy of either (vdim, hdim_1, hdim_2) + coordinates or (hdim_1, hdim_2) coordinates. + coords_2: 2D or 3D array-like + Similar to coords_1, but for the second pair of coordinates + min_h1: int + Minimum point in hdim_1 + max_h1: int + Maximum point in hdim_1, exclusive. max_h1-min_h1 should be the size. + min_h2: int + Minimum point in hdim_2 + max_h2: int + Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size. + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + Returns + ------- + float + Distance between coords_1 and coords_2 in cartesian space. + + ''' + + is_3D = len(coords_1)== 3 + size_h1 = max_h1 - min_h1 + size_h2 = max_h2 - min_h2 + + if not is_3D: + # Let's make the accounting easier. + coords_1 = np.array((0, coords_1[0], coords_1[1])) + coords_2 = np.array((0, coords_2[0], coords_2[1])) + + if PBC_flag in ['hdim_1', 'both']: + mod_h1 = size_h1 + else: + mod_h1 = 0 + if PBC_flag in ['hdim_2', 'both']: + mod_h2 = size_h2 + else: + mod_h2 = 0 + max_dims = np.array((0, mod_h1, mod_h2)) + deltas = np.abs(coords_1 - coords_2) + deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) + return np.sqrt(np.sum(deltas**2)) From e5fbd3106cd41eba6acddf49e7e803d038dbc75c Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Fri, 8 Apr 2022 18:00:14 -0400 Subject: [PATCH 48/82] Updated some comments Cleaned up a few typos in and tweaked a few comments for clarity --- tobac/segmentation.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 9196e895..87315c0a 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -3,6 +3,8 @@ from numpy import transpose from . import utils as tb_utils + +#import gc def transfm_pbc_point(in_dim, dim_min, dim_max): '''Function to transform a PBC-feature point for contiguity @@ -420,10 +422,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers_2 = np.zeros(data_segmentation.shape).astype(np.int32) - #new, shorter PBC marker seeding approach - #loop thru LB points - #then check if fillable region (labels_unseeded > 0) - #then check if point on other side of boundary is > 0 in segmentation_mask + # PBC marker seeding approach + # loop thru LB points, then check if fillable region (labels_unseeded > 0) and seed + # then check if point on other side of boundary is > 0 in segmentation_mask and + # adjust where needed ''' "First pass" at seeding features across the boundaries. This first pass will bring in eligible (meaning values that are higher than threshold) but not previously watershedded @@ -508,14 +510,18 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Secondary seeding complete, now blending periodic boundaries # keep segmentation mask fields for now so we can save these all later - # for demos of changes + # for demos of changes, otherwise, could add deletion for memory efficiency, e.g. + + #del segmentation_mask + #del segmentation_mask_2 + #gc.collect() #update mask coord regions ''' - Now, start the second round of watershedding- the "buddy box" approach - buddies contains features of interest and any neighbors that across the boundary or in - physical contact with that label + Now, start the second round of watershedding- the "buddy box" approach. + 'buddies' array contains features of interest and any neighbors that are across the boundary or + otherwise have lateral and/or diagonal physical contact with that label ''' # TODO: this can cause a crash if there are no segmentation regions reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_mask_3) @@ -670,7 +676,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_x2 = np.append(buddy_x2,x2) # Buddy Box! - # Indentify mins and maxes of buddy box continuous points range + # Indentify mins and maxes of Buddy Box continuous points range # so that box of correct size can be constructred bbox_zstart = int(np.min(buddy_z2)) bbox_ystart = int(np.min(buddy_y2)) @@ -745,13 +751,13 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddies_out.rename('buddies_mask') buddies_out.units=1 - #Create dask array from input data: + # Create dask array from input data: #data=rgn_cube.core_data() buddy_data = buddy_rgn - #All of the below is, I think, the same overarching segmentation procedure as in the original - #segmentation approach until the line which states - # "#transform seg_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")" + # All of the below is the same overarching segmentation procedure as in the original + # segmentation approach until the line which states + # "#transform segmentation_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")" # It's just performed on the buddy box and its data rather than our full domain #Set level at which to create "Seed" for each feature in the case of 3D watershedding: @@ -809,7 +815,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu segmentation_mask_4[~unmasked_buddies] = -1 - #transform seg_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3") + #transform segmentation_mask_4 data back to mask created after PBC first-pass ("segmentation_mask_3") #print(np.unique(test_mask3.data)) #loop through buddy box inds and analogous seg mask inds From 3ec7eeeaec199bf213b457b603f8b634bcd5925f Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 15 Apr 2022 10:31:48 -0600 Subject: [PATCH 49/82] updated docs writing --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index cb3fbee2..04e02516 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,7 +17,7 @@ def setup(app): app.add_css_file("theme_overrides.css") autodoc_mock_imports = ['numpy', 'scipy', 'scikit-image', 'pandas', 'pytables', 'matplotlib', 'iris', - 'cf-units', 'xarray', 'cartopy', 'trackpy'] + 'cf-units', 'xarray', 'cartopy', 'trackpy', 'numba'] sys.path.insert(0, os.path.abspath("../")) From 5c6430a5500464f3d042c46f4b880191d4179127 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 15 Apr 2022 10:59:13 -0600 Subject: [PATCH 50/82] fixed erosion not working --- tobac/feature_detection.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 62cf0f9f..de06d701 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -399,11 +399,10 @@ def feature_detection_threshold(data_i,i_time, # only include values greater than threshold # erode selected regions by n pixels if n_erosion_threshold>0: - # is this right? the documentation is unclear - #if is_3D: - # selem=np.ones((n_erosion_threshold,n_erosion_threshold, n_erosion_threshold)) - #else: - selem=np.ones((n_erosion_threshold,n_erosion_threshold)) + if is_3D: + selem=np.ones((n_erosion_threshold,n_erosion_threshold, n_erosion_threshold)) + else: + selem=np.ones((n_erosion_threshold,n_erosion_threshold)) mask=binary_erosion(mask,selem).astype(bool) # detect individual regions, label and count the number of pixels included: labels, num_labels = label(mask, background=0, return_num = True) From 0b8e3733d86d31dadeec727630943053d3dc1042 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 15 Apr 2022 21:40:36 -0600 Subject: [PATCH 51/82] Cleaning up of unnecessary code --- tobac/segmentation.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 596bee1b..f349b1e1 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -536,11 +536,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu wall_labels = np.unique(wall_labels) wall_labels = wall_labels[(wall_labels) > 0].astype(int) - #print(wall_labels) # Loop through all segmentation mask labels on the wall for cur_idx in wall_labels: - print("we have buddies") vdim_indices = z_reg_inds[cur_idx] hdim1_indices = y_reg_inds[cur_idx] @@ -647,10 +645,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #edit value in buddy_features dataframe buddy_features.hdim_1.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_1), hdim1_min, hdim1_max) buddy_features.hdim_2.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_2), hdim2_min, hdim2_max) - - #print(int(buddy_feat.vdim),yf2,xf2) - #display(buddy_features) - + #again, this may be redundant as I don't think we use buddy_zf/yf/xf after this #in favor of iterrows thru the updated buddy_features buddy_zf = np.append(buddy_zf,int(buddy_feat.vdim)) @@ -687,7 +682,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu bbox_ysize = bbox_yend - bbox_ystart bbox_xsize = bbox_xend - bbox_xstart - #print(bbox_zsize,bbox_ysize,bbox_xsize) # Creation of actual Buddy Box space for transposition # of data in domain and re-seeding with Buddy feature markers @@ -719,21 +713,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu #construction of iris cube corresponding to buddy box and its data #for marker seeding and watershedding of buddy box - rgn_cube = iris.cube.Cube(data=buddy_rgn) - - coord_system=None - # TODO: clean this up - h2_coord=iris.coords.DimCoord(np.arange(bbox_xsize), long_name='hdim_2', units='1', bounds=None, attributes=None, coord_system=coord_system) - h1_coord=iris.coords.DimCoord(np.arange(bbox_ysize), long_name='hdim_1', units='1', bounds=None, attributes=None, coord_system=coord_system) - v_coord=iris.coords.DimCoord(np.arange(bbox_zsize), long_name='vdim', units='1', bounds=None, attributes=None, coord_system=coord_system) - - rgn_cube.add_dim_coord(h2_coord,2) - rgn_cube.add_dim_coord(h1_coord,1) - rgn_cube.add_dim_coord(v_coord,0) - #rgn_cube.add_dim_coord(itime,0) - - rgn_cube.units = 'kg kg-1' - + #print(rgn_cube) #print(rgn_cube.vdim) @@ -744,11 +724,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_features.hdim_1.values[buddy_looper] = buddy_features.hdim_1.values[buddy_looper] - bbox_ystart buddy_features.hdim_2.values[buddy_looper] = buddy_features.hdim_2.values[buddy_looper] - bbox_xstart - # Create cube of the same dimensions and coordinates as Buddy Box to store updated mask: - buddies_out=1*rgn_cube - buddies_out.rename('buddies_mask') - buddies_out.units=1 - # Create dask array from input data: #data=rgn_cube.core_data() buddy_data = buddy_rgn From 9894e1cc7feb2b70bd5873871357e0a5afa17ab4 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 15 Apr 2022 21:50:00 -0600 Subject: [PATCH 52/82] Cut out a lot more unnecessary code. --- tobac/segmentation.py | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index f349b1e1..bd0b82b6 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -492,7 +492,6 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[vdim_ind,hdim1_opposite_corner,hdim2_opposite_corner] markers_2[~unmasked]=0 - if method=='watershed': segmentation_mask_2 = watershed(data_segmentation,markers_2.astype(np.int32), mask=unmasked) else: @@ -685,7 +684,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Creation of actual Buddy Box space for transposition # of data in domain and re-seeding with Buddy feature markers - buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) + buddy_rgn = np.empty((bbox_zsize, bbox_ysize, bbox_xsize), dtype=bool) ind_ctr = 0 #need to loop thru ALL z,y,x inds in buddy box @@ -708,15 +707,9 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu else: x_a1 = x - buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = field_in.data[z_a1,y_a1,x_a1] + buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = unmasked[z_a1,y_a1,x_a1] - - #construction of iris cube corresponding to buddy box and its data - #for marker seeding and watershedding of buddy box - #print(rgn_cube) - #print(rgn_cube.vdim) - #Update buddy_features feature positions to correspond to buddy box space #rather than domain space or continuous/contiguous point space for buddy_looper in range(0,len(buddy_features)): @@ -724,35 +717,19 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_features.hdim_1.values[buddy_looper] = buddy_features.hdim_1.values[buddy_looper] - bbox_ystart buddy_features.hdim_2.values[buddy_looper] = buddy_features.hdim_2.values[buddy_looper] - bbox_xstart - # Create dask array from input data: - #data=rgn_cube.core_data() - buddy_data = buddy_rgn # All of the below is the same overarching segmentation procedure as in the original # segmentation approach until the line which states # "#transform segmentation_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")" # It's just performed on the buddy box and its data rather than our full domain - #Set level at which to create "Seed" for each feature in the case of 3D watershedding: - # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) - if level==None: - level=slice(None) - # transform max_distance in metres to distance in pixels: if max_distance is not None: max_distance_pixel=np.ceil(max_distance/dxy) #note - this doesn't consider vertical distance in pixels # mask data outside region above/below threshold and invert data if tracking maxima: - if target == 'maximum': - unmasked_buddies=buddy_data>threshold - buddy_segmentation=-1*buddy_data - elif target == 'minimum': - unmasked_buddies=buddy_data Date: Fri, 15 Apr 2022 22:00:27 -0600 Subject: [PATCH 53/82] Resolves #29 and clarifies the documentation --- tobac/feature_detection.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index de06d701..e618b955 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1,7 +1,7 @@ - import numpy as np import pandas as pd import logging +import warnings from . import utils as tb_utils def get_label_props_in_dict(labels): @@ -930,7 +930,7 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, y_coordinate_name: str The name of the y coordinate to calculate distance based on in meters. This is typically `projection_y_coordinate` - z_coordinate_name: str + z_coordinate_name: str or None The name of the z coordinate to calculate distance based on in meters. This is typically `altitude` @@ -945,6 +945,11 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, #if we are 3D, the vertical dimension is in features. if we are 2D, there #is no vertical dimension in features. is_3D = 'vdim' in features + + # Check and if both dz is specified and altitude is available, warn that we will use dz. + if is_3D and (dz is not None and z_coordinate_name in features): + warnings.warn("Both "+z_coordinate_name+" and dz available to filter_min_distance; using constant dz. " + "Set dz to none if you want to use altitude or set `z_coordinate_name` to None to use constant dz.") #create list of tuples with all combinations of features at the timestep: indeces=combinations(features.index.values,2) @@ -961,14 +966,18 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, features.loc[index_2, x_coordinate_name])**2 + (features.loc[index_1, y_coordinate_name]- features.loc[index_2, y_coordinate_name])**2) - if dz is not None: - z_sqdst = (dz * (features.loc[index_1,'vdim']-features.loc[index_2,'vdim']))**2 - else: - z_sqdst = (features.loc[index_1,z_coordinate_name]- - features.loc[index_2,z_coordinate_name])**2 + if is_3D: + if dz is not None: + z_sqdst = (dz * (features.loc[index_1,'vdim']-features.loc[index_2,'vdim']))**2 + else: + z_sqdst = (features.loc[index_1,z_coordinate_name]- + features.loc[index_2,z_coordinate_name])**2 #distance=dxy*np.sqrt((features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1'])**2+(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2'])**2) - distance=np.sqrt(xy_sqdst + z_sqdst) + if is_3D: + distance=np.sqrt(xy_sqdst + z_sqdst) + else: + distance = xy_sqdst if distance <= min_distance: #print(distance, min_distance, index_1, index_2, features.size) From 282a439aa94aba00e8d7ecca7185b0707737df3d Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 15 Apr 2022 22:03:09 -0600 Subject: [PATCH 54/82] Resolves #26 --- tobac/segmentation.py | 44 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index bd0b82b6..44e81053 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -824,7 +824,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu return segmentation_out,features_out -def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column'): +def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column', seed_3D_size = 5): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts @@ -847,11 +847,21 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m max_distance: float Maximum distance from a marker allowed to be classified as belonging to that cell - PBC_flag: string - string flag of 'none', 'hdim_1', 'hdim_2', or 'both' indicating which lateral boundaries are periodic - - seed_3D_flag: string - string flag of 'column' (default) or 'box' which determines the method of seeding feature positions for 3D watershedding + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + seed_3D_flag: str('column', 'box') + Seed 3D field at feature positions with either the full column (default) + or a box of user-set size + seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + seed area for each dimension separately. Note: we recommend the use + of odd numbers for this. If you give an even number, your seed box will be + biased and not centered around the feature. Output: segmentation_out: iris.cube.Cube @@ -874,39 +884,17 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m #loop over individual input timesteps for segmentation: #OR do segmentation on single timestep - #print(field) field_time=field.slices_over('time') - #print(field_time) - #print(enumerate(field_time)) time_len = len(field.coord('time').points[:]) - print(time_len) for i,field_i in enumerate(field_time): time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) features_i=features.loc[features['time']==time_i] - #print(time_i) - #print(field_i) - #print(features_i) segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) segmentation_out_list.append(segmentation_out_i) features_out_list.append(features_out_i) logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) - #if time_len > 1: - - - - #else: - # time_i=field.coord('time').units.num2date(field.coord('time').points[0]) - # features_i=features.loc[features['time']==time_i] - # print(time_i) - # print(field) - # print(features_i) - # segmentation_out_i,features_out_i=segmentation_timestep(field,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag) - # segmentation_out_list.append(segmentation_out_i) - # features_out_list.append(features_out_i) - # logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) - #Merge output from individual timesteps: segmentation_out=segmentation_out_list.merge_cube() features_out=pd.concat(features_out_list) From fbcd06975caeabee3a3094553ac250bc86dd1049 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sat, 16 Apr 2022 22:17:09 -0600 Subject: [PATCH 55/82] Added a new function in testing to test grid coordinates --- tobac/testing.py | 36 ++++++++++++++++++++++++++++++++++++ tobac/tests/test_testing.py | 34 +++++++++++++++++++++++++++++++++- tobac/tests/test_util.py | 9 +++++++++ tobac/utils.py | 7 +++++++ 4 files changed, 85 insertions(+), 1 deletion(-) diff --git a/tobac/testing.py b/tobac/testing.py index ac220a1c..991bae05 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -886,3 +886,39 @@ def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc = False return (min_pt, max_pt) +def generate_grid_coords(min_max_coords, lengths): + '''Generates a grid of coordinates, such as fake lat/lons for testing. + + Parameters + ---------- + min_max_coords: array-like, either length 2, length 4, or length 6. + The minimum and maximum values in each dimension as: + (min_dim1, max_dim1, min_dim2, max_dim2, min_dim3, max_dim3) to use + all 3 dimensions. You can omit any dimensions that you aren't using. + lengths: array-like, either length 1, 2, or 3. + The lengths of values in each dimension. Length must equal 1/2 the length + of min_max_coords. + + Returns + ------- + 1, 2, or 3 array-likes + array-like of grid coordinates in the number of dimensions requested + and with the number of arrays specified (meshed coordinates) + + ''' + import numpy as np + if len(min_max_coords) != len(lengths)*2: + raise ValueError("The length of min_max_coords must be exactly 2 times" + " the length of lengths.") + + if len(lengths) == 1: + return np.mgrid[min_max_coords[0]:min_max_coords[1]:complex(imag=lengths[0])] + + if len(lengths) == 2: + return np.mgrid[min_max_coords[0]:min_max_coords[1]:complex(imag=lengths[0]), + min_max_coords[2]:min_max_coords[3]:complex(imag=lengths[1])] + + if len(lengths) == 3: + return np.mgrid[min_max_coords[0]:min_max_coords[1]:complex(imag=lengths[0]), + min_max_coords[2]:min_max_coords[3]:complex(imag=lengths[1]), + min_max_coords[4]:min_max_coords[5]:complex(imag=lengths[2])] \ No newline at end of file diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py index 12fc7772..4f417e6b 100644 --- a/tobac/tests/test_testing.py +++ b/tobac/tests/test_testing.py @@ -331,4 +331,36 @@ def test_get_start_end_of_feat_nopbc(in_pt, in_sz, axis_size, out_pts): '''Tests ```tobac.testing.get_start_end_of_feat``` ''' - assert tbtest.get_start_end_of_feat(in_pt, in_sz, axis_size[0], axis_size[1]) == out_pts \ No newline at end of file + assert tbtest.get_start_end_of_feat(in_pt, in_sz, axis_size[0], axis_size[1]) == out_pts + + +''' +I acknowledge that this is a little confusing for the expected outputs, especially for the 3D. +''' +@pytest.mark.parametrize("min_max_coords, lengths, expected_outs", + [((0,3), (4,),[0,1,2,3]), + ((0,3, 0,3), (4,4),[[[0,]*4, [1]*4,[2]*4,[3]*4],[[0,1,2,3]]*4,]), + ((0,1, 0,1, 0, 1), (2,2,2),[[[[0]*2]*2, [[1]*2]*2,], + [[[0,0],[1,1]],[[0,0],[1,1]]], + [[[0,1],[0,1]],[[0,1],[0,1]]]] + ), + ] +) +def test_generate_grid_coords(min_max_coords, lengths, expected_outs): + '''Tests ```tobac.testing.generate_grid_coords``` + Parameters + ---------- + min_max_coords: array-like, either length 2, length 4, or length 6. + The minimum and maximum values in each dimension as: + (min_dim1, max_dim1, min_dim2, max_dim2, min_dim3, max_dim3) to use + all 3 dimensions. You can omit any dimensions that you aren't using. + lengths: array-like, either length 1, 2, or 3. + The lengths of values in each dimension. Length must equal 1/2 the length + of min_max_coords. + expected_outs: array-like, either 1D, 2D, or 3D + The expected output + ''' + import numpy as np + out_grid = tbtest.generate_grid_coords(min_max_coords, lengths) + assert np.all(np.isclose(out_grid, np.array(expected_outs))) + diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py index 1ffccfef..e5607537 100644 --- a/tobac/tests/test_util.py +++ b/tobac/tests/test_util.py @@ -274,3 +274,12 @@ def test_get_pbc_coordinates(): assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, 'both'), [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)])) # min in hdim_1, max in hdim_2 assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, 'both'), [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)])) + + + +def test_add_coordinates_2D(): + ''' + Tests ```utils.add_coordinates``` for a 2D case with + both 1D and 2D coordinates + ''' + pass \ No newline at end of file diff --git a/tobac/utils.py b/tobac/utils.py index 3428b5ba..fd00902a 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -901,6 +901,13 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, def njit_if_available(func, **kwargs): '''Decorator to wrap a function with numba.njit if available. If numba isn't available, it just returns the function. + + Parameters + ---------- + func: function object + Function to wrap with njit + kwargs: + Keyword arguments to pass to numba njit ''' try: from numba import njit From d46334b8a2396717c02aaa71a75e28e1aff1ebd5 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sat, 16 Apr 2022 23:13:40 -0600 Subject: [PATCH 56/82] Added test for add_coordinate --- tobac/testing.py | 17 ++++++++++++- tobac/tests/test_testing.py | 1 - tobac/tests/test_util.py | 49 ++++++++++++++++++++++++++++++++++--- tobac/utils.py | 2 +- 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/tobac/testing.py b/tobac/testing.py index 991bae05..281c26af 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -399,20 +399,35 @@ def make_dataset_from_arr( if time_dim_num is not None: raise NotImplementedError("Time dimension not yet implemented in this function") + has_time = time_dim_num is not None + is_3D = z_dim_num is not None output_arr = xr.DataArray(in_arr) if is_3D: z_max = in_arr.shape[z_dim_num] + if has_time: + time_min = datetime.datetime(2022,1,1) + time_num = in_arr.shape[time_dim_num] + time_max = datetime.timedelta(minutes=5) * time_num + if data_type == "xarray": return output_arr elif data_type == "iris": out_arr_iris = output_arr.to_iris() + if is_3D: out_arr_iris.add_dim_coord( iris.coords.DimCoord(np.arange(0, z_max), standard_name=z_dim_name), z_dim_num, ) + if has_time: + out_arr_iris = output_arr.to_iris() + if is_3D: + out_arr_iris.add_dim_coord( + iris.coords.DimCoord(np.linspace(time_min, time_max, time_num), + standard_name='time'), + time_dim_num,) return out_arr_iris else: raise ValueError("data_type must be 'xarray' or 'iris'") @@ -769,7 +784,7 @@ def generate_single_feature(start_h1, start_h2, start_v = None, min_h1 = 0, max_h1 = None, min_h2 = 0, max_h2 = None, num_frames = 1, dt = datetime.timedelta(minutes=5), start_date = datetime.datetime(2022,1,1,0), - PBC_flag = 'none', frame_start = 1, feature_num=1,): + PBC_flag = 'none', frame_start = 0, feature_num=1,): '''Function to generate a dummy feature dataframe to test the tracking functionality Parameters diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py index 4f417e6b..3248131d 100644 --- a/tobac/tests/test_testing.py +++ b/tobac/tests/test_testing.py @@ -363,4 +363,3 @@ def test_generate_grid_coords(min_max_coords, lengths, expected_outs): import numpy as np out_grid = tbtest.generate_grid_coords(min_max_coords, lengths) assert np.all(np.isclose(out_grid, np.array(expected_outs))) - diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py index e5607537..d2f87db3 100644 --- a/tobac/tests/test_util.py +++ b/tobac/tests/test_util.py @@ -1,5 +1,6 @@ import pytest import tobac.testing +import tobac.testing as tbtest import tobac.utils as tb_utils from collections import Counter @@ -276,10 +277,50 @@ def test_get_pbc_coordinates(): assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, 'both'), [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)])) - -def test_add_coordinates_2D(): +@pytest.mark.parametrize("feature_loc, min_max_coords, lengths, expected_coord_interp", + [((0,0), (0,1,0,1),(2,2), (0,0)), + ((0,0), (0,1),(2,), (0,)), + ] +) +def test_add_coordinates_2D(feature_loc, min_max_coords, lengths, expected_coord_interp): ''' Tests ```utils.add_coordinates``` for a 2D case with - both 1D and 2D coordinates + both 1D and 2D coordinates ''' - pass \ No newline at end of file + import xarray as xr + import numpy as np + import datetime + + feat_interp = tbtest.generate_single_feature(feature_loc[0], feature_loc[1], + max_h1 = 9999, max_h2 = 9999) + grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths) + + ndims = len(lengths) + dim_names = ['time','longitude', 'latitude'] + dim_names = dim_names[:ndims] + + # Note that this is arbitrary. + base_time = datetime.datetime(2022,1,1) + + + coord_dict = {'time': [base_time]} + if ndims == 1: + # force at least a 2D array for data + lengths = lengths*2 + dim_names = ['time', 'longitude', 'latitude'] + coord_dict['longitude'] = grid_coords + coord_dict['latitude'] = grid_coords + + elif ndims == 2: + dim_names = ['time','x', 'y'] + coord_dict['longitude'] = (('x','y'),grid_coords[0]) + coord_dict['latitude'] = (('x','y'),grid_coords[1]) + + data_xr = xr.DataArray(np.empty((1,)+lengths), + coords = coord_dict, dims = dim_names) + + feats_with_coords = tb_utils.add_coordinates(feat_interp, data_xr.to_iris()) + + assert feats_with_coords.iloc[0]['longitude'] == expected_coord_interp[0] + if ndims == 2: + assert feats_with_coords.iloc[0]['latitude'] == expected_coord_interp[1] \ No newline at end of file diff --git a/tobac/utils.py b/tobac/utils.py index fd00902a..30bff6de 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -410,7 +410,7 @@ def add_coordinates(t,variable_cube): # interpolate 3D coordinates: # mainly workaround for wrf latitude and longitude (to be fixed in future) - + # TODO: investigate, is this necessary? elif variable_cube.coord(coord).ndim==3: if variable_cube.coord_dims(coord)==(ndim_time,hdim_1,hdim_2): From a9aca6db4faa53e34b7dc8c844886635886d8089 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sat, 16 Apr 2022 23:29:05 -0600 Subject: [PATCH 57/82] Added new test for add_coordinates --- tobac/tests/test_util.py | 50 ++++++++++++++++++++++++++++++++++++++++ tobac/utils.py | 18 +++++++-------- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py index d2f87db3..41533368 100644 --- a/tobac/tests/test_util.py +++ b/tobac/tests/test_util.py @@ -303,6 +303,56 @@ def test_add_coordinates_2D(feature_loc, min_max_coords, lengths, expected_coord base_time = datetime.datetime(2022,1,1) + coord_dict = {'time': [base_time]} + if ndims == 1: + # force at least a 2D array for data + lengths = lengths*2 + dim_names = ['time', 'longitude', 'latitude'] + coord_dict['longitude'] = grid_coords + coord_dict['latitude'] = grid_coords + + elif ndims == 2: + dim_names = ['time','x', 'y'] + coord_dict['longitude'] = (('x','y'),grid_coords[0]) + coord_dict['latitude'] = (('x','y'),grid_coords[1]) + + data_xr = xr.DataArray(np.empty((1,)+lengths), + coords = coord_dict, dims = dim_names) + + feats_with_coords = tb_utils.add_coordinates(feat_interp, data_xr.to_iris()) + + print(feats_with_coords.iloc[0]['longitude']) + assert feats_with_coords.iloc[0]['longitude'] == expected_coord_interp[0] + if ndims == 2: + assert feats_with_coords.iloc[0]['latitude'] == expected_coord_interp[1] + +@pytest.mark.parametrize("feature_loc, min_max_coords, lengths, expected_coord_interp", + [((0,0,0), (0,1,0,1),(2,2), (0,0)), + ((0,0,0), (0,1),(2,), (0,)), + ] +) +def test_add_coordinates_3D(feature_loc, min_max_coords, lengths, expected_coord_interp): + ''' + Tests ```utils.add_coordinates_3D``` for a 3D case with + 1D, 2D, and 3D coordinates + ''' + import xarray as xr + import numpy as np + import datetime + + feat_interp = tbtest.generate_single_feature(feature_loc[1], feature_loc[2], + start_v = feature_loc[0], + max_h1 = 9999, max_h2 = 9999) + grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths) + + ndims = len(lengths) + dim_names = ['time','longitude', 'latitude'] + dim_names = dim_names[:ndims] + + # Note that this is arbitrary. + base_time = datetime.datetime(2022,1,1) + + coord_dict = {'time': [base_time]} if ndims == 1: # force at least a 2D array for data diff --git a/tobac/utils.py b/tobac/utils.py index 30bff6de..480f67aa 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -541,37 +541,37 @@ def add_coordinates_3D(t,variable_cube): f=interp2d(dimvec_2,dimvec_3,variable_cube.coord(coord).points) coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - # interpolate 3D coordinates: - # mainly workaround for wrf latitude and longitude (to be fixed in future) - + # interpolate 3D coordinates: elif variable_cube.coord(coord).ndim==3: if variable_cube.coord_dims(coord)==(ndim_time,hdim_1,hdim_2): f=interp2d(dimvec_2,dimvec_1,variable_cube[0,:,:].coord(coord).points) coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - if variable_cube.coord_dims(coord)==(ndim_time,hdim_2,hdim_1): + elif variable_cube.coord_dims(coord)==(ndim_time,hdim_2,hdim_1): f=interp2d(dimvec_1,dimvec_2,variable_cube[0,:,:].coord(coord).points) coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - if variable_cube.coord_dims(coord)==(hdim_1,ndim_time,hdim_2): + elif variable_cube.coord_dims(coord)==(hdim_1,ndim_time,hdim_2): f=interp2d(dimvec_2,dimvec_1,variable_cube[:,0,:].coord(coord).points) coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - if variable_cube.coord_dims(coord)==(hdim_1,hdim_2,ndim_time): + elif variable_cube.coord_dims(coord)==(hdim_1,hdim_2,ndim_time): f=interp2d(dimvec_2,dimvec_1,variable_cube[:,:,0].coord(coord).points) coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim1'])] - if variable_cube.coord_dims(coord)==(hdim_2,ndim_time,hdim_1): + elif variable_cube.coord_dims(coord)==(hdim_2,ndim_time,hdim_1): f=interp2d(dimvec_1,dimvec_2,variable_cube[:,0,:].coord(coord).points) coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - if variable_cube.coord_dims(coord)==(hdim_2,hdim_1,ndim_time): + elif variable_cube.coord_dims(coord)==(hdim_2,hdim_1,ndim_time): f=interp2d(dimvec_1,dimvec_2,variable_cube[:,:,0].coord(coord).points) coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - + + else: + raise ValueError("Unable to interpolate 3D coordinate") # write resulting array or list into DataFrame: t[coord]=coordinate_points From 1efb5e2737e8ee535aae5cb1c867b1baeca4119b Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 19 Apr 2022 11:31:53 -0600 Subject: [PATCH 58/82] Revert "Cut out a lot more unnecessary code." This reverts commit 9894e1cc7feb2b70bd5873871357e0a5afa17ab4. --- tobac/segmentation.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 44e81053..f0488314 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -492,6 +492,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[vdim_ind,hdim1_opposite_corner,hdim2_opposite_corner] markers_2[~unmasked]=0 + if method=='watershed': segmentation_mask_2 = watershed(data_segmentation,markers_2.astype(np.int32), mask=unmasked) else: @@ -684,7 +685,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # Creation of actual Buddy Box space for transposition # of data in domain and re-seeding with Buddy feature markers - buddy_rgn = np.empty((bbox_zsize, bbox_ysize, bbox_xsize), dtype=bool) + buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) ind_ctr = 0 #need to loop thru ALL z,y,x inds in buddy box @@ -707,9 +708,15 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu else: x_a1 = x - buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = unmasked[z_a1,y_a1,x_a1] + buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = field_in.data[z_a1,y_a1,x_a1] + + #construction of iris cube corresponding to buddy box and its data + #for marker seeding and watershedding of buddy box + #print(rgn_cube) + #print(rgn_cube.vdim) + #Update buddy_features feature positions to correspond to buddy box space #rather than domain space or continuous/contiguous point space for buddy_looper in range(0,len(buddy_features)): @@ -717,19 +724,35 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu buddy_features.hdim_1.values[buddy_looper] = buddy_features.hdim_1.values[buddy_looper] - bbox_ystart buddy_features.hdim_2.values[buddy_looper] = buddy_features.hdim_2.values[buddy_looper] - bbox_xstart + # Create dask array from input data: + #data=rgn_cube.core_data() + buddy_data = buddy_rgn # All of the below is the same overarching segmentation procedure as in the original # segmentation approach until the line which states # "#transform segmentation_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")" # It's just performed on the buddy box and its data rather than our full domain + #Set level at which to create "Seed" for each feature in the case of 3D watershedding: + # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) + if level==None: + level=slice(None) + # transform max_distance in metres to distance in pixels: if max_distance is not None: max_distance_pixel=np.ceil(max_distance/dxy) #note - this doesn't consider vertical distance in pixels # mask data outside region above/below threshold and invert data if tracking maxima: - unmasked_buddies = buddy_rgn + if target == 'maximum': + unmasked_buddies=buddy_data>threshold + buddy_segmentation=-1*buddy_data + elif target == 'minimum': + unmasked_buddies=buddy_data Date: Mon, 25 Apr 2022 09:29:46 -0600 Subject: [PATCH 59/82] Several improvements to `dev` (#36) * Updates to add_coordinates_3D This should resolve #28 and starts the process toward solving #30. * Added the ability to specify a vertical coordinate for feature detection. This still doesn't *entirely* work. We still need to go into the feature detection code and resolve it there. * Fixed a bug that crashed the segmentation with PBCs if there are no eligible regions. * Revert "Cut out a lot more unnecessary code." This reverts commit 9894e1cc7feb2b70bd5873871357e0a5afa17ab4. * Moved finding vertical axis to its own function * fixing a bug in my new code * moved segmentation code over to the new utility function * Added new tests to filter_min_distance * Fixed interp2D * fixing another coordinate bug * Removing debug statements * Add a new function to find vertical coordinate in dataframe * Added more tests * Now requiring a vertical coord in 3D tracking Added a requirement for vertical information to be covered in 3D tracking * Updated tracking to work with 3D correctly Also added tests to this effect * Added more tests to test_tracking --- tobac/feature_detection.py | 71 ++++++-- tobac/segmentation.py | 26 +-- tobac/testing.py | 13 +- tobac/tests/test_feature_detection.py | 124 +++++++++++++- tobac/tests/test_segmentation.py | 44 ++++- tobac/tests/test_tracking.py | 89 +++++++++- tobac/tests/test_util.py | 87 ++++++++-- tobac/tracking.py | 58 +++++-- tobac/utils.py | 227 +++++++++++++++++--------- 9 files changed, 592 insertions(+), 147 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index e618b955..87846994 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -797,7 +797,9 @@ def feature_detection_multithreshold(field_in, n_min_threshold=0, min_distance=0, feature_number_start=1, - PBC_flag='none' + PBC_flag='none', + vertical_coord = 'auto', + vertical_axis = None, ): '''Function to perform feature detection based on contiguous regions above/below a threshold @@ -813,11 +815,11 @@ def feature_detection_multithreshold(field_in, ```y_coordinate_name``` are available in `features`. If you specify a value here, this function assumes that it is the x/y spacing between points even if ```x_coordinate_name``` and ```y_coordinate_name``` are specified. - dz: float + dz: float or None. Constant vertical grid spacing (m), optional. If not specified - and the input is 3D, this function requires that `altitude` is available - in the `features` input. If you specify a value here, this function assumes - that it is the constant z spacing between points, even if ```z_coordinate_name``` + and the input is 3D, this function requires that vertical_coord is available + in the `field_in` input. If you specify a value here, this function assumes + that it is the constant z spacing between points, even if ```vertical_coord``` is specified. target: str ('minimum' or 'maximum') flag to determine if tracking is targetting minima or maxima in the data @@ -837,6 +839,13 @@ def feature_detection_multithreshold(field_in, 'hdim_1' means that we are periodic along hdim1 'hdim_2' means that we are periodic along hdim2 'both' means that we are periodic along both horizontal dimensions + vertical_coord: str + Name or axis number of the vertical coordinate. If 'auto', tries to auto-detect. + It looks for the coordinate or the dimension name corresponding + to the string. + vertical_axis: int or None. + The vertical axis number of the data. If None, uses vertical_coord + to determine axis. Returns ------- @@ -846,7 +855,8 @@ def feature_detection_multithreshold(field_in, from .utils import add_coordinates, add_coordinates_3D logging.debug('start feature detection based on thresholds') - + if vertical_coord != 1 and vertical_coord != 'auto': + raise NotImplementedError("Vertical coordinate must be first non-time coord.") # create empty list to store features for all timesteps list_features_timesteps=[] @@ -877,7 +887,10 @@ def feature_detection_multithreshold(field_in, #Loop over DataFrame to remove features that are closer than distance_min to each other: if (min_distance > 0): features_thresholds=filter_min_distance(features_thresholds,dxy=dxy, dz=dz, - min_distance = min_distance) + min_distance = min_distance, + vertical_coord = vertical_coord, + vertical_axis = vertical_axis + ) list_features_timesteps.append(features_thresholds) logging.debug('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) @@ -891,7 +904,7 @@ def feature_detection_multithreshold(field_in, # features_filtered = features.drop(features[features['num'] < min_num].index) # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) if 'vdim' in features: - features=add_coordinates_3D(features,field_in) + features=add_coordinates_3D(features,field_in, vertical_coord=vertical_coord) else: features=add_coordinates(features,field_in) else: @@ -901,10 +914,15 @@ def feature_detection_multithreshold(field_in, return features def filter_min_distance(features, dxy = None,dz = None, min_distance = None, - x_coordinate_name = "projection_x_coordinate", - y_coordinate_name = "projection_y_coordinate", - z_coordinate_name = "altitude"): - '''Function to remove features that are too close together + x_coordinate_name = None, + y_coordinate_name = None, + z_coordinate_name = None, + PBC_flag = 'none'): + '''Function to remove features that are too close together. + If two features are closer than `min_distance`, it keeps the + larger feature. + + TODO: does this function work with minima? Parameters ---------- @@ -932,19 +950,36 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, This is typically `projection_y_coordinate` z_coordinate_name: str or None The name of the z coordinate to calculate distance based on in meters. - This is typically `altitude` - + This is typically `altitude`. If `auto`, tries to auto-detect. + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + Returns ------- pandas DataFrame - features + features after filtering ''' from itertools import combinations remove_list_distance=[] + if PBC_flag != 'none': + raise NotImplementedError("We haven't yet implemented PBCs into this.") + #if we are 3D, the vertical dimension is in features. if we are 2D, there #is no vertical dimension in features. is_3D = 'vdim' in features + + # Check if both dxy and their coordinate names are specified. + # If they are, warn that we will use dxy. + if dxy is not None and (x_coordinate_name in features and y_coordinate_name in features): + warnings.warn("Both "+x_coordinate_name+"/"+y_coordinate_name+" and dxy " + "set. Using constant dxy. Set dxy to None if you want to use the " + "interpolated coordinates, or set `x_coordinate_name` and " + "`y_coordinate_name` to None to use a constant dxy.") # Check and if both dz is specified and altitude is available, warn that we will use dz. if is_3D and (dz is not None and z_coordinate_name in features): @@ -956,7 +991,6 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, #Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) for index_1, index_2 in indeces: if index_1 is not index_2: - #features.loc[index_1,'hdim_1'] if dxy is not None: xy_sqdst = ((dxy*(features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1']))**2+ (dxy*(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2']))**2) @@ -977,8 +1011,8 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, if is_3D: distance=np.sqrt(xy_sqdst + z_sqdst) else: - distance = xy_sqdst - + distance = np.sqrt(xy_sqdst) + print(distance, min_distance, distance <=min_distance) if distance <= min_distance: #print(distance, min_distance, index_1, index_2, features.size) # logging.debug('distance<= min_distance: ' + str(distance)) @@ -993,6 +1027,7 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, remove_list_distance.append(index_1) elif features.loc[index_1,'num']==features.loc[index_2,'num']: remove_list_distance.append(index_2) + print(remove_list_distance) features=features[~features.index.isin(remove_list_distance)] return features diff --git a/tobac/segmentation.py b/tobac/segmentation.py index f0488314..8541c1d2 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -271,23 +271,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu is_3D_seg = False elif field_in.ndim == 3: is_3D_seg = True - # Find which coordinate is the z coordinate - list_coord_names=[coord.name() for coord in field_in.coords()] - #determine vertical axis: - if vertical_coord=='auto': - list_vertical=['z','model_level_number','altitude','geopotential_height'] - # TODO: there surely must be a better way to handle this - vertical_axis = None - for coord_name in list_vertical: - if coord_name in list_coord_names: - vertical_axis=coord_name - break - if vertical_axis is None: - raise ValueError('Please specify vertical coordinate') - elif vertical_coord in list_coord_names: - vertical_axis=vertical_coord - else: - raise ValueError('Please specify vertical coordinate') + vertical_axis = tb_utils.find_vertical_axis_from_coord(field_in, vertical_coord=vertical_coord) ndim_vertical=field_in.coord_dims(vertical_axis) if len(ndim_vertical)>1: raise ValueError('please specify 1 dimensional vertical coordinate') @@ -305,6 +289,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu elif vertical_coord_axis == 2: hdim_1_axis = 0 hdim_2_axis = 1 + else: + raise ValueError("Segmentation routine can't find vertical coordinate.") else: raise ValueError('Segmentation routine only possible with 2 or 3 spatial dimensions') @@ -521,10 +507,10 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu 'buddies' array contains features of interest and any neighbors that are across the boundary or otherwise have lateral and/or diagonal physical contact with that label ''' - # TODO: this can cause a crash if there are no segmentation regions reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_mask_3) - - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + + if len(reg_props_dict) != 0: + curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) wall_labels = np.array([]) diff --git a/tobac/testing.py b/tobac/testing.py index 281c26af..91402f53 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -784,7 +784,8 @@ def generate_single_feature(start_h1, start_h2, start_v = None, min_h1 = 0, max_h1 = None, min_h2 = 0, max_h2 = None, num_frames = 1, dt = datetime.timedelta(minutes=5), start_date = datetime.datetime(2022,1,1,0), - PBC_flag = 'none', frame_start = 0, feature_num=1,): + PBC_flag = 'none', frame_start = 0, feature_num=1, + feature_size = None, threshold_val = None): '''Function to generate a dummy feature dataframe to test the tracking functionality Parameters @@ -828,6 +829,11 @@ def generate_single_feature(start_h1, start_h2, start_v = None, Number to start the frame at feature_num: int What number to start the feature at + feature_size: int or None + 'num' column in output; feature size + If None, doesn't set this column + threshold_val: float or None + Threshold value of this feature ''' if max_h1 is None or max_h2 is None: @@ -852,7 +858,10 @@ def generate_single_feature(start_h1, start_h2, start_v = None, curr_v += spd_v curr_dict['time'] = curr_dt curr_dict["feature"] = feature_num + i - + if feature_size is not None: + curr_dict['num'] = feature_size + if threshold_val is not None: + curr_dict['threshold_value'] = threshold_val curr_h1 += spd_h1 curr_h2 += spd_h2 curr_dt += dt diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index c6d00a0f..52d4eb5e 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -1,4 +1,5 @@ import tobac.testing +import tobac.testing as tbtest import tobac.feature_detection as feat_detect import pytest @@ -43,4 +44,125 @@ def test_feature_detection_multithreshold_timestep(): assert len(fd_output.index) == 1 # Make sure that the location of the feature is correct assert fd_output.iloc[0]["hdim_1"] == pytest.approx(test_hdim_1_pt) - assert fd_output.iloc[0]["hdim_2"] == pytest.approx(test_hdim_2_pt) \ No newline at end of file + assert fd_output.iloc[0]["hdim_2"] == pytest.approx(test_hdim_2_pt) + + +@pytest.mark.parametrize( + "feature_1_loc, feature_2_loc, dxy, dz, min_distance," + " add_x_coords, add_y_coords," + "add_z_coords, PBC_flag, expect_feature_1, expect_feature_2", + [((0,0,0,4,1), (1,1,1,4,1), 1000, 100, 1, False, False, False, + 'none', True, True), + ((0,0,0,4,1), (1,1,1,3,1), 1000, 100, 5000, False, False, False, + 'none', True, False), + ((0,0,0,4,2), (1,1,1,10,1), 1000, 100, 5000, False, False, False, + 'none', True, False), + + ] +) +def test_filter_min_distance(feature_1_loc, feature_2_loc, dxy, dz, + min_distance, add_x_coords, add_y_coords, + add_z_coords, PBC_flag, expect_feature_1, expect_feature_2): + '''Tests tobac.feature_detection.filter_min_distance + Parameters + ---------- + feature_1_loc: tuple, length of 4 or 5 + Feature 1 location, num, and threshold value (assumes a 100 x 100 x 100 grid). + Assumes z, y, x, num, threshold_value for 3D where num is the size/ 'num' + column of the feature and threshold_value is the threshold_value. + If 2D, assumes y, x, num, threshold_value. + feature_2_loc: tuple, length of 4 or 5 + Feature 2 location, same format and length as `feature_1_loc` + dxy: float or None + Horizontal grid spacing + dz: float or None + Vertical grid spacing (constant) + min_distance: float + Minimum distance between features (m) + add_x_coords: bool + Whether or not to add x coordinates + add_y_coords: bool + Whether or not to add y coordinates + add_z_coords: bool + Whether or not to add z coordinates + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + expect_feature_1: bool + True if we expect feature 1 to remain, false if we expect it gone. + expect_feature_2: bool + True if we expect feature 2 to remain, false if we expect it gone. + ''' + import pandas as pd + import numpy as np + + h1_max = 100 + h2_max = 100 + z_max = 100 + + assumed_dxy = 100 + assumed_dz = 100 + + x_coord_name = 'projection_coord_x' + y_coord_name = 'projection_coord_y' + z_coord_name = 'projection_coord_z' + + is_3D = len(feature_1_loc) == 5 + start_size_loc = 3 if is_3D else 2 + start_h1_loc = 1 if is_3D else 0 + feat_opts_f1 = { + 'start_h1': feature_1_loc[start_h1_loc], + 'start_h2': feature_1_loc[start_h1_loc+1], + 'max_h1': h1_max, + 'max_h2': h2_max, + 'feature_size': feature_1_loc[start_size_loc], + 'threshold_val': feature_1_loc[start_size_loc+1], + 'feature_num': 1, + } + + feat_opts_f2 = { + 'start_h1': feature_2_loc[start_h1_loc], + 'start_h2': feature_2_loc[start_h1_loc+1], + 'max_h1': h1_max, + 'max_h2': h2_max, + 'feature_size': feature_2_loc[start_size_loc], + 'threshold_val': feature_2_loc[start_size_loc+1], + 'feature_num': 2, + } + if is_3D: + feat_opts_f1['start_v'] = feature_1_loc[0] + feat_opts_f2['start_v'] = feature_2_loc[0] + + + feat_1_interp = tbtest.generate_single_feature(**feat_opts_f1) + feat_2_interp = tbtest.generate_single_feature(**feat_opts_f2) + + feat_combined = pd.concat([feat_1_interp, feat_2_interp], ignore_index=True) + + filter_dist_opts = dict() + + if add_x_coords: + feat_combined[x_coord_name] = feat_combined['hdim_2'] * assumed_dxy + filter_dist_opts['x_coordinate_name'] = x_coord_name + if add_y_coords: + feat_combined[y_coord_name] = feat_combined['hdim_1'] * assumed_dxy + filter_dist_opts['y_coordinate_name'] = y_coord_name + if add_z_coords and is_3D: + feat_combined[z_coord_name] = feat_combined['vdim'] * assumed_dz + filter_dist_opts['z_coordinate_name'] = z_coord_name + + filter_dist_opts = { + 'features': feat_combined, + 'dxy': dxy, + 'dz': dz, + 'min_distance': min_distance, + 'PBC_flag': PBC_flag, + } + + out_feats = feat_detect.filter_min_distance(**filter_dist_opts) + + assert expect_feature_1 == (np.sum(out_feats['feature']==1)==1) + assert expect_feature_2 == (np.sum(out_feats['feature']==2)==1) diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index 1509fb4b..c31f4062 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -647,7 +647,6 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b ((20,30,40), (8,0,0), (8, 28,38), (0,-8,-8), (5,5,5)), ] ) -# TODO: last test fails def test_add_markers_pbcs(dset_size,feat_1_loc, feat_2_loc, shift_domain, seed_3D_size): '''Tests ```tobac.segmentation.add_markers``` to make sure that adding markers works and is consistent across PBCs @@ -730,3 +729,46 @@ def test_add_markers_pbcs(dset_size,feat_1_loc, feat_2_loc, shift_domain, seed_3 assert np.all(marker_arr == marker_arr_reshifted) + +@pytest.mark.parametrize("PBC_flag", + [('none'), + ('hdim_1'), + ('hdim_2'), + ('both'), + ] +) +def test_empty_segmentation(PBC_flag): + '''Tests ```tobac.segmentation.segmentation_timestep``` with an + empty/zeroed out array + + ''' + import numpy as np + h1_size = 100 + h2_size = 100 + v_size = 5 + test_dxy = 1000 + test_feature = testing.generate_single_feature(start_v=1, + start_h1=1, + start_h2=1, + max_h1 = h1_size, + max_h2 = h2_size, + feature_num = 1, + PBC_flag=PBC_flag) + + seg_arr = np.zeros((v_size, h1_size, h2_size)) + seg_opts = { + 'dxy': test_dxy, + 'threshold': 1.5, + 'PBC_flag': PBC_flag, + } + test_data_iris = testing.make_dataset_from_arr( + seg_arr, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 + ) + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature, + **seg_opts + ) + + assert np.all(out_seg_mask.core_data() == -1) \ No newline at end of file diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index 267998fc..9e6feed2 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -2,6 +2,7 @@ Test for the trackpy tracking functions Who's watching the watchmen, basically. ''' +from pyexpat import features import pytest import tobac.testing import tobac.tracking @@ -46,9 +47,9 @@ def test_linking_trackpy(): expected_out_feature['cell'] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 5, 1000, + test_feature, None, 5, 1000, dz=1000, v_max = 10000, method_linking='predict', - PBC_flag = 'none' + PBC_flag = 'none', vertical_coord=None ) # Just want to remove the time_cell column here. actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] @@ -66,8 +67,8 @@ def test_linking_trackpy(): expected_out_feature['cell'] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 1, 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - v_max = 4, method_linking='predict', + test_feature, None, 1, 1, dz=1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + v_max = 4, method_linking='predict', vertical_coord=None, PBC_flag = 'hdim_1' ) # Just want to remove the time_cell column here. @@ -85,8 +86,8 @@ def test_linking_trackpy(): expected_out_feature['cell'] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 1, 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - v_max = 4, method_linking='predict', + test_feature, None, 1, 1, dz=1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + v_max = 4, method_linking='predict', vertical_coord=None, PBC_flag = 'hdim_2' ) # Just want to remove the time_cell column here. @@ -104,8 +105,8 @@ def test_linking_trackpy(): expected_out_feature['cell'] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 1, 1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - v_max = 5, method_linking='predict', + test_feature, None, 1, 1,dz=1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, + v_max = 5, method_linking='predict', vertical_coord=None, PBC_flag = 'both' ) # Just want to remove the time_cell column here. @@ -124,3 +125,75 @@ def test_build_distance_function(): assert (test_func(np.array((0,9,9)), np.array((0,0,0))) == pytest.approx(1.4142135)) +@pytest.mark.parametrize("point_init, speed, dxy, actual_dz, v_max," + "use_dz, features_connected", + [((0,0,0), (1,0,0), 1000, 100, 200, True, True), + ((0,0,0), (1,0,0), 1000, 100, 200, False, True), + ((0,0,0), (5,0,0), 1000, 100, 200, True, False), + ((0,0,0), (5,0,0), 1000, 100, 200, False, False), + ] +) +def test_3D_tracking_min_dist_z(point_init, speed, dxy, actual_dz, v_max, + use_dz, features_connected): + '''Tests ```tobac.tracking.linking_trackpy``` with + points in z with varying distances between them. + + Parameters + ---------- + point_init: 3D array-like + Initial point (z, y, x) + speed: 3D array-like + Speed of the feature (z, y, x) + dxy: float + grid spacing for dx and dy + actual_dz: float + grid spacing for Z + use_dz: bool + True to use the passed in constant dz, False + to use the calculated vertical coordinates + features_connected: bool + Do we expect the features to be connected? + ''' + + + test_feature = tobac.testing.generate_single_feature( + start_h1 = point_init[1], start_h2 = point_init[2], + start_v = point_init[0], + min_h1 = 0, max_h1 = 100, min_h2 = 0, max_h2 = 100, + frame_start = 0, num_frames=2, + spd_h1 = speed[1], spd_h2 = speed[2], spd_v=speed[0], + PBC_flag='none') + if not use_dz: + test_feature['z'] = test_feature['vdim']*actual_dz + + expected_out_feature = copy.deepcopy(test_feature) + + if features_connected: + expected_out_feature['cell'] = 1.0 + else: + expected_out_feature['cell'] = np.nan + + common_params = { + 'features': test_feature, + 'field_in': None, + 'dt': 1, + 'time_cell_min': 1, + 'dxy': dxy, + 'v_max': v_max, + 'method_linking': 'predict', + } + if use_dz: + common_params['dz'] = actual_dz + common_params['vertical_coord'] = None + else: + common_params['vertical_coord'] = 'z' + + actual_out_feature = tobac.tracking.linking_trackpy( + **common_params + ) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature.drop('time_cell', axis=1) + assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + + + diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py index 41533368..41caa4f2 100644 --- a/tobac/tests/test_util.py +++ b/tobac/tests/test_util.py @@ -1,3 +1,4 @@ +from multiprocessing.sharedctypes import Value import pytest import tobac.testing import tobac.testing as tbtest @@ -326,12 +327,15 @@ def test_add_coordinates_2D(feature_loc, min_max_coords, lengths, expected_coord if ndims == 2: assert feats_with_coords.iloc[0]['latitude'] == expected_coord_interp[1] -@pytest.mark.parametrize("feature_loc, min_max_coords, lengths, expected_coord_interp", - [((0,0,0), (0,1,0,1),(2,2), (0,0)), - ((0,0,0), (0,1),(2,), (0,)), +@pytest.mark.parametrize("feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp", + [((0,0,0), None, (0,1,0,1),(2,2), (0,0)), + ((0,0,0), (1,1,1), (0,1,0,1),(2,2), (0,0)), + ((0.5,0.5,0.5), None, (0,3,3,6),(2,2), (1.5,4.5)), + ((0,0,0), None, (0,1),(2,), (0,)), + ((0,0,0), None, (0,1,0,1,0,1),(2,2,2), (0,0,0)), ] ) -def test_add_coordinates_3D(feature_loc, min_max_coords, lengths, expected_coord_interp): +def test_add_coordinates_3D(feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp): ''' Tests ```utils.add_coordinates_3D``` for a 3D case with 1D, 2D, and 3D coordinates @@ -339,10 +343,17 @@ def test_add_coordinates_3D(feature_loc, min_max_coords, lengths, expected_coord import xarray as xr import numpy as np import datetime + import pandas as pd feat_interp = tbtest.generate_single_feature(feature_loc[1], feature_loc[2], start_v = feature_loc[0], max_h1 = 9999, max_h2 = 9999) + if delta_feat is not None: + feat_interp_2 = tbtest.generate_single_feature(feature_loc[1]+delta_feat[1], feature_loc[2]+delta_feat[2], + start_v = feature_loc[0]+delta_feat[0], + max_h1 = 9999, max_h2 = 9999, feature_num=2) + feat_interp = pd.concat([feat_interp, feat_interp_2], ignore_index=True) + grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths) ndims = len(lengths) @@ -355,22 +366,74 @@ def test_add_coordinates_3D(feature_loc, min_max_coords, lengths, expected_coord coord_dict = {'time': [base_time]} if ndims == 1: - # force at least a 2D array for data - lengths = lengths*2 - dim_names = ['time', 'longitude', 'latitude'] + # force at least a 3D array for data + lengths = lengths*3 + dim_names = ['time', 'longitude', 'latitude', 'z'] coord_dict['longitude'] = grid_coords + # we only test lon, so it doesn't really matter here what these are. coord_dict['latitude'] = grid_coords + coord_dict['z'] = grid_coords elif ndims == 2: - dim_names = ['time','x', 'y'] + lengths = lengths + (lengths[0],) + dim_names = ['time','x', 'y', 'z'] coord_dict['longitude'] = (('x','y'),grid_coords[0]) coord_dict['latitude'] = (('x','y'),grid_coords[1]) + # We only test lon and lat, so it doesn't matter what this is. + coord_dict['z'] = np.linspace(0,1,lengths[0]) + + elif ndims == 3: + dim_names = ['time','x', 'y', 'z'] + coord_dict['longitude'] = (('x','y', 'z'),grid_coords[0]) + coord_dict['latitude'] = (('x','y', 'z'),grid_coords[1]) + coord_dict['altitude'] = (('x','y', 'z'),grid_coords[2]) data_xr = xr.DataArray(np.empty((1,)+lengths), coords = coord_dict, dims = dim_names) - feats_with_coords = tb_utils.add_coordinates(feat_interp, data_xr.to_iris()) + if ndims <=2: + feats_with_coords = tb_utils.add_coordinates_3D(feat_interp, data_xr.to_iris()) + else: + feats_with_coords = tb_utils.add_coordinates_3D(feat_interp, data_xr.to_iris(), vertical_coord = 2) + + assert np.isclose(feats_with_coords.iloc[0]['longitude'], expected_coord_interp[0]) + if ndims >= 2: + assert np.isclose(feats_with_coords.iloc[0]['latitude'], expected_coord_interp[1]) + + if ndims >= 3: + assert np.isclose(feats_with_coords.iloc[0]['altitude'], expected_coord_interp[2]) + +@pytest.mark.parametrize("vertical_coord_names, vertical_coord_pass_in, expect_raise", + [(['z'], 'auto', False), + (['pudding'], 'auto', True), + (['pudding'], 'pudding', False), + (['z', 'model_level_number'], 'pudding', True), + (['z', 'model_level_number'], 'auto', True), + (['z', 'model_level_number'], 'z', False), + ] +) +def test_find_dataframe_vertical_coord(vertical_coord_names, vertical_coord_pass_in, + expect_raise): + '''Tests ```tobac.utils.find_dataframe_vertical_coord``` - assert feats_with_coords.iloc[0]['longitude'] == expected_coord_interp[0] - if ndims == 2: - assert feats_with_coords.iloc[0]['latitude'] == expected_coord_interp[1] \ No newline at end of file + Parameters + ---------- + vertical_coord_names: array-like + Names of vertical coordinates to add + vertical_coord_pass_in: str + Value to pass into `vertical_coord` + expect_raise: bool + True if we expect a ValueError to be raised, False otherwise + ''' + + test_feat = tbtest.generate_single_feature(0,0,max_h1=100, max_h2=100) + for vertical_name in vertical_coord_names: + test_feat[vertical_name] = 0.0 + + if expect_raise: + with pytest.raises(ValueError): + tb_utils.find_dataframe_vertical_coord(test_feat, + vertical_coord=vertical_coord_pass_in) + else: + assert tb_utils.find_dataframe_vertical_coord(test_feat, + vertical_coord=vertical_coord_pass_in) == vertical_coord_names[0] \ No newline at end of file diff --git a/tobac/tracking.py b/tobac/tracking.py index 4a699959..6ea192c4 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -1,4 +1,5 @@ import logging +from operator import is_ import numpy as np import pandas as pd import math @@ -6,13 +7,14 @@ -def linking_trackpy(features,field_in,dt,dxy, +def linking_trackpy(features,field_in,dt,dxy, dz = None, v_max=None,d_max=None,d_min=None,subnetwork_size=None, memory=0,stubs=1,time_cell_min=None, order=1,extrapolate=0, method_linking='random', adaptive_step=None,adaptive_stop=None, cell_number_start=1, + vertical_coord = 'auto', min_h1 = None, max_h1 = None, min_h2 = None, max_h2 = None, PBC_flag = 'none' @@ -28,13 +30,20 @@ def linking_trackpy(features,field_in,dt,dxy, dt: float time resolution of tracked features dxy: float - grid spacing of input data + Horizontal grid spacing of input data + dz: float + Constant vertical grid spacing of input data. If None, + uses `vertical_dim` to get vertical location. memory int number of output timesteps features allowed to vanish for to be still considered tracked subnetwork_size int maximim size of subnetwork for linking method_linking: str('predict' or 'random') flag choosing method used for trajectory linking + vertical_coord: str + Name of the vertical coordinate in meters. If 'auto', tries to auto-detect. + It looks for the coordinate or the dimension name corresponding + to the string. To use `dz`, set this to `None`. min_h1: int Minimum hdim_1 value, required when PBC_flag is 'hdim_1' or 'both' max_h1: int @@ -59,8 +68,23 @@ def linking_trackpy(features,field_in,dt,dxy, # from trackpy import link_df import trackpy as tp from copy import deepcopy -# from trackpy import filter_stubs -# from .utils import add_coordinates + + # Check if we are 3D. + if 'vdim' in features: + is_3D = True + if dz is not None and vertical_coord is not None: + raise ValueError("dz and vertical_coord both set, vertical" + " spacing is ambiguous. Set one to None.") + if dz is None and vertical_coord is None: + raise ValueError("Neither dz nor vertical_coord are set. One" + " must be set.") + if vertical_coord is not None: + found_vertical_coord = tb_utils.find_dataframe_vertical_coord( + variable_dataframe=features, + vertical_coord=vertical_coord + ) + else: + is_3D = False # make sure that we have min and max for h1 and h2 if we are PBC if PBC_flag in ['hdim_1', 'both'] and (min_h1 is None or max_h1 is None): @@ -72,15 +96,15 @@ def linking_trackpy(features,field_in,dt,dxy, # calculate search range based on timestep and grid spacing if v_max is not None: - search_range=int(dt*v_max/dxy) + search_range = dt*v_max/dxy # calculate search range based on timestep and grid spacing if d_max is not None: - search_range=int(d_max/dxy) + search_range=d_max/dxy # calculate search range based on timestep and grid spacing if d_min is not None: - search_range=max(search_range,int(d_min/dxy)) + search_range=max(search_range,d_min/dxy) if time_cell_min: stubs=np.floor(time_cell_min/dt)+1 @@ -97,12 +121,18 @@ def linking_trackpy(features,field_in,dt,dxy, # deep copy to preserve features field: features_linking=deepcopy(features) # check if we are 3D or not - - if 'vdim' in features_linking: - is_3D = True - pos_columns_tp = ['vdim','hdim_1','hdim_2'] + if is_3D: + # If we are 3D, we need to convert the vertical + # coordinates so that 1 unit is equal to dxy. + + if dz is not None: + features_linking['vdim_adj'] = features_linking['vdim']*dz/dxy + else: + vertical_coord = found_vertical_coord + features_linking['vdim_adj'] = (features_linking[found_vertical_coord]/dxy) + + pos_columns_tp = ['vdim_adj','hdim_1','hdim_2'] else: - is_3D = False pos_columns_tp = ['hdim_1', 'hdim_2'] # Check if we have PBCs. @@ -150,6 +180,10 @@ def linking_trackpy(features,field_in,dt,dxy, # trajectories_filtered = filter_stubs(trajectories_unfiltered,threshold=stubs) # trajectories_filtered=trajectories_filtered.reset_index(drop=True) + # clean up our temporary filters + if is_3D: + trajectories_unfiltered = trajectories_unfiltered.drop('vdim_adj', axis=1) + # Reset particle numbers from the arbitray numbers at the end of the feature detection and linking to consecutive cell numbers # keep 'particle' for reference to the feature detection step. trajectories_unfiltered['cell']=None diff --git a/tobac/utils.py b/tobac/utils.py index 480f67aa..f6f02028 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -445,7 +445,7 @@ def add_coordinates(t,variable_cube): logging.debug('added coord: '+ coord) return t -def add_coordinates_3D(t,variable_cube): +def add_coordinates_3D(t,variable_cube, vertical_coord='auto', assume_coords_fixed_in_time = True): import numpy as np '''Function adding coordinates from the tracking cube to the trajectories for the 3D case: time, longitude&latitude, x&y dimensions, and altitude @@ -459,13 +459,24 @@ def add_coordinates_3D(t,variable_cube): Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate', and 'altitude' (if 3D) are the coordinates that we expect, although this function will happily interpolate along any dimension coordinates you give. + vertical_coord: str or int + Name or axis number of the vertical coordinate. If 'auto', tries to auto-detect. + If it is a string, it looks for the coordinate or the dimension name corresponding + to the string. If it is an int, it assumes that it is the vertical axis. + Note that if you only have a 2D or 3D coordinate for altitude, you must + pass in an int. + assume_coords_fixed_in_time: bool + If true, it assumes that the coordinates are fixed in time, even if the + coordinates say they vary in time. This is, by default, True, to preserve + legacy functionality. If False, it assumes that if a coordinate says + it varies in time, it takes the coordinate at its word. Returns ------- pandas DataFrame trajectories with added coordinates ''' - from scipy.interpolate import interp2d, interp1d + from scipy.interpolate import interp2d, interp1d, interpn logging.debug('start adding coordinates from cube') @@ -490,88 +501,87 @@ def add_coordinates_3D(t,variable_cube): # chose right dimension for horizontal and vertical axes based on time dimension: ndim_time=variable_cube.coord_dims('time')[0] - if ndim_time==0: - vdim=1 - hdim_1=2 - hdim_2=3 - elif ndim_time==1: - vdim=0 - hdim_1=2 - hdim_2=3 - elif ndim_time==2: - vdim=0 - hdim_1=1 - hdim_2=3 - elif ndim_time==3: - vdim=0 - hdim_1=1 - hdim_2=2 + # TODO: move this to a function, this is duplicated from segmentation. + if type(vertical_coord) is int: + ndim_vertical = vertical_coord + vertical_axis = None + else: + vertical_axis = find_vertical_axis_from_coord(variable_cube, vertical_coord=vertical_coord) + + if vertical_axis is not None: + ndim_vertical=variable_cube.coord_dims(vertical_axis) + if len(ndim_vertical) > 1: + raise ValueError("Vertical coordinate detected as multidimensional. Please pass in " + "axis number of vertical data.") + else: + ndim_vertical = ndim_vertical[0] + + + # We need to figure out the axis number of hdim_1 and hdim_2. + ndim_hdim_1 = None + ndim_hdim_2 = None + for i in range(len(variable_cube.shape)): + if i != ndim_time and i != ndim_vertical: + if ndim_hdim_1 is None: + ndim_hdim_1 = i + else: + ndim_hdim_2 = i + + if ndim_hdim_1 is None or ndim_hdim_2 is None: + raise ValueError("Could not find hdim coordinates.") + # create vectors to use to interpolate from pixels to coordinates - dimvec_1=np.arange(variable_cube.shape[vdim]) - dimvec_2=np.arange(variable_cube.shape[hdim_1]) - dimvec_3=np.arange(variable_cube.shape[hdim_2]) + dimvec_1=np.arange(variable_cube.shape[ndim_vertical]) + dimvec_2=np.arange(variable_cube.shape[ndim_hdim_1]) + dimvec_3=np.arange(variable_cube.shape[ndim_hdim_2]) + dimvec_time = np.arange(variable_cube.shape[ndim_time]) + + coord_to_ax = {ndim_vertical: (dimvec_1, 'vdim'), + ndim_time: (dimvec_time,'time'), + ndim_hdim_1: (dimvec_2, 'hdim_1'), ndim_hdim_2: (dimvec_3, 'hdim_2')} # loop over coordinates in input data: for coord in coord_names: logging.debug('adding coord: '+ coord) # interpolate 1D coordinates: - if variable_cube.coord(coord).ndim==1: - - if variable_cube.coord_dims(coord)==(vdim,): - f=interp1d(dimvec_1,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['vdim']) - - if variable_cube.coord_dims(coord)==(hdim_1,): - f=interp1d(dimvec_2,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['hdim_1']) - - if variable_cube.coord_dims(coord)==(hdim_2,): - f=interp1d(dimvec_3,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['hdim_2']) - - # interpolate 2D coordinates: - elif variable_cube.coord(coord).ndim==2: - - if variable_cube.coord_dims(coord)==(hdim_1,hdim_2): - f=interp2d(dimvec_3,dimvec_2,variable_cube.coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - - if variable_cube.coord_dims(coord)==(hdim_2,hdim_1): - f=interp2d(dimvec_2,dimvec_3,variable_cube.coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - # interpolate 3D coordinates: - elif variable_cube.coord(coord).ndim==3: - - if variable_cube.coord_dims(coord)==(ndim_time,hdim_1,hdim_2): - f=interp2d(dimvec_2,dimvec_1,variable_cube[0,:,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - - elif variable_cube.coord_dims(coord)==(ndim_time,hdim_2,hdim_1): - f=interp2d(dimvec_1,dimvec_2,variable_cube[0,:,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - + var_coord = variable_cube.coord(coord) + if var_coord.ndim==1: + curr_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] + f=interp1d(curr_dim[0],var_coord.points,fill_value="extrapolate") + coordinate_points=f(t[curr_dim[1]]) + + # interpolate 2D coordinates + elif var_coord.ndim==2: + first_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] + second_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] + f=interp2d(first_dim[0],second_dim[0],var_coord.points) + coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] - elif variable_cube.coord_dims(coord)==(hdim_1,ndim_time,hdim_2): - f=interp2d(dimvec_2,dimvec_1,variable_cube[:,0,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] + # Deal with the special case where the coordinate is 3D but + # one of the dimensions is time and we assume the coordinates + # don't vary in time. + elif (var_coord.ndim == 3 and ndim_time in variable_cube.coord_dims(coord) + and assume_coords_fixed_in_time): + time_pos = variable_cube.coord_dims(coord).index(ndim_time) + hdim1_pos = 0 if time_pos !=0 else 1 + hdim2_pos = 1 if time_pos == 2 else 2 + first_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim2_pos]] + second_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim1_pos]] + f=interp2d(first_dim[0],second_dim[0],var_coord.points) + coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] - elif variable_cube.coord_dims(coord)==(hdim_1,hdim_2,ndim_time): - f=interp2d(dimvec_2,dimvec_1,variable_cube[:,:,0].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim1'])] - - elif variable_cube.coord_dims(coord)==(hdim_2,ndim_time,hdim_1): - f=interp2d(dimvec_1,dimvec_2,variable_cube[:,0,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] + # interpolate 3D coordinates: + elif var_coord.ndim==3: + curr_coord_dims = variable_cube.coord_dims(coord) + first_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] + second_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] + third_dim = coord_to_ax[variable_cube.coord_dims(coord)[2]] + coordinate_points=interpn([first_dim[0],second_dim[0], third_dim[0]],var_coord.points, + [[a,b,c] for a,b,c in zip(t[first_dim[1]],t[second_dim[1]], t[third_dim[1]])]) + #coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] - elif variable_cube.coord_dims(coord)==(hdim_2,hdim_1,ndim_time): - f=interp2d(dimvec_1,dimvec_2,variable_cube[:,:,0].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - else: - raise ValueError("Unable to interpolate 3D coordinate") # write resulting array or list into DataFrame: t[coord]=coordinate_points @@ -973,3 +983,74 @@ def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, deltas = np.abs(coords_1 - coords_2) deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) return np.sqrt(np.sum(deltas**2)) + +def find_vertical_axis_from_coord(variable_cube, vertical_coord='auto'): + '''Function to find the vertical coordinate in the iris cube + + Parameters + ---------- + variable_cube: iris.cube + Input variable cube, containing a vertical coordinate. + vertical_coord: str + Vertical coordinate name. If `auto`, this function tries to auto-detect. + + Returns + ------- + str + the vertical coordinate name + + Raises + ------ + ValueError + Raised if the vertical coordinate isn't found in the cube. + ''' + + list_coord_names=[coord.name() for coord in variable_cube.coords()] + if vertical_coord=='auto': + list_vertical=['z','model_level_number','altitude','geopotential_height'] + # find the intersection + all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) + if len(all_vertical_axes) == 1: + return all_vertical_axes[0] + else: + raise ValueError('Please specify vertical coordinate') + elif vertical_coord in list_coord_names: + return vertical_coord + else: + raise ValueError('Please specify vertical coordinate') + + +def find_dataframe_vertical_coord(variable_dataframe, vertical_coord='auto'): + '''Function to find the vertical coordinate in the iris cube + + Parameters + ---------- + variable_dataframe: pandas.DataFrame + Input variable cube, containing a vertical coordinate. + vertical_coord: str + Vertical coordinate name. If `auto`, this function tries to auto-detect. + + Returns + ------- + str + the vertical coordinate name + + Raises + ------ + ValueError + Raised if the vertical coordinate isn't found in the cube. + ''' + + if vertical_coord == 'auto': + list_vertical=['z','model_level_number','altitude','geopotential_height'] + all_vertical_axes = list(set(variable_dataframe.columns) & set(list_vertical)) + if len(all_vertical_axes) == 1: + return all_vertical_axes[0] + else: + raise ValueError('Please specify vertical coordinate') + + else: + if vertical_coord in variable_dataframe.columns: + return vertical_coord + else: + raise ValueError("Please specify vertical coordinate") \ No newline at end of file From acd21fa45be14b7b698f402a7cd641b6fcd81fda Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Thu, 28 Apr 2022 16:39:57 -0400 Subject: [PATCH 60/82] Bugfix on vertical coordinate implementation I uncovered an issue for Iris Cubes with greater than 1 vertical coordinate (e.g. both 'altitude' and 'z' are provided). If this is the case and 'auto' is passed, utils.find_vertical_axis_from_coord asks you to specify the coordinate, however, if anything other than 1 or 'auto' was passed, the NotImplementedError in feature detection would be triggered. I also noticed that the vertical_coord and vertical_axis coordinates were not passed down the chain for the various layers of feature detection calls, and have changed this. --- tobac/feature_detection.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 87846994..342e33df 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -344,7 +344,9 @@ def feature_detection_threshold(data_i,i_time, n_min_threshold=0, min_distance=0, idx_start=0, - PBC_flag='none'): + PBC_flag='none', + vertical_coord = 'auto', + vertical_axis = None,): '''function to find features based on individual threshold value Parameters @@ -707,7 +709,9 @@ def feature_detection_multithreshold_timestep(data_i,i_time, n_min_threshold=0, min_distance=0, feature_number_start=1, - PBC_flag='none' + PBC_flag='none', + vertical_coord = 'auto', + vertical_axis = None, ): '''function to find features in each timestep based on iteratively finding regions above/below a set of thresholds @@ -771,7 +775,9 @@ def feature_detection_multithreshold_timestep(data_i,i_time, n_min_threshold=n_min_threshold, min_distance=min_distance, idx_start=idx_start, - PBC_flag = PBC_flag + PBC_flag = PBC_flag, + vertical_coord = vertical_coord, + vertical_axis = vertical_axis, ) if any([x is not None for x in features_threshold_i]): features_thresholds=features_thresholds.append(features_threshold_i) @@ -855,7 +861,7 @@ def feature_detection_multithreshold(field_in, from .utils import add_coordinates, add_coordinates_3D logging.debug('start feature detection based on thresholds') - if vertical_coord != 1 and vertical_coord != 'auto': + if vertical_coord != 1 and vertical_coord != 'auto' and vertical_coord != 'altitude' and vertical_coord != 'z': raise NotImplementedError("Vertical coordinate must be first non-time coord.") # create empty list to store features for all timesteps list_features_timesteps=[] @@ -880,6 +886,8 @@ def feature_detection_multithreshold(field_in, min_distance=min_distance, feature_number_start=feature_number_start, PBC_flag=PBC_flag, + vertical_coord = vertical_coord, + vertical_axis = vertical_axis, ) #check if list of features is not empty, then merge features from different threshold values #into one DataFrame and append to list for individual timesteps: From 19b75f8c0a9376d2ea4859602e76cb6606e660e8 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 1 May 2022 14:24:15 -0600 Subject: [PATCH 61/82] Added a lot of new logic around vertical axes in feature detection --- tobac/feature_detection.py | 44 +++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 342e33df..451e6428 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -345,7 +345,6 @@ def feature_detection_threshold(data_i,i_time, min_distance=0, idx_start=0, PBC_flag='none', - vertical_coord = 'auto', vertical_axis = None,): '''function to find features based on individual threshold value @@ -377,7 +376,9 @@ def feature_detection_threshold(data_i,i_time, 'hdim_1' - periodic in hdim1 ONLY 'hdim_2' - periodic in hdim2 ONLY 'both' - DOUBLY periodic - + vertical_axis: int + The vertical axis number of the data. + Returns ------- pandas DataFrame @@ -710,7 +711,6 @@ def feature_detection_multithreshold_timestep(data_i,i_time, min_distance=0, feature_number_start=1, PBC_flag='none', - vertical_coord = 'auto', vertical_axis = None, ): '''function to find features in each timestep based on iteratively finding regions above/below a set of thresholds @@ -745,6 +745,8 @@ def feature_detection_multithreshold_timestep(data_i,i_time, 'hdim_1' means that we are periodic along hdim1 'hdim_2' means that we are periodic along hdim2 'both' means that we are periodic along both horizontal dimensions + vertical_axis: int + The vertical axis number of the data. Returns ------- @@ -776,7 +778,6 @@ def feature_detection_multithreshold_timestep(data_i,i_time, min_distance=min_distance, idx_start=idx_start, PBC_flag = PBC_flag, - vertical_coord = vertical_coord, vertical_axis = vertical_axis, ) if any([x is not None for x in features_threshold_i]): @@ -861,13 +862,42 @@ def feature_detection_multithreshold(field_in, from .utils import add_coordinates, add_coordinates_3D logging.debug('start feature detection based on thresholds') - if vertical_coord != 1 and vertical_coord != 'auto' and vertical_coord != 'altitude' and vertical_coord != 'z': + + if 'time' not in [coord.name() for coord in field_in.coords()]: + raise ValueError("input to feature detection step must include a dimension named 'time'") + + # Check whether we need to run 2D or 3D feature detection + if field_in.ndim == 3: + logging.debug("Running 2D feature detection") + is_3D = False + elif field_in.ndim == 4: + logging.debug("Running 3D feature detection") + is_3D = True + else: + raise ValueError("Feature detection only works with 2D or 3D data") + + if is_3D: + # We need to determine the time axis so that we can determine the + # vertical axis in each timestep if vertical_axis is not none. + if vertical_axis is not None: + ndim_time=field_in.coord_dims('time')[0] + # We only need to adjust the axis number if the time axis + # is a lower axis number than the specified vertical coordinate. + if ndim_time < vertical_axis: + vertical_axis = vertical_axis - 1 + else: + # We need to determine vertical axis + vertical_axis = tb_utils.find_vertical_axis_from_coord(field_in, vertical_coord=vertical_coord) + + + if is_3D and vertical_axis != 1: raise NotImplementedError("Vertical coordinate must be first non-time coord.") # create empty list to store features for all timesteps list_features_timesteps=[] # loop over timesteps for feature identification: data_time=field_in.slices_over('time') + # if single threshold is put in as a single value, turn it into a list if type(threshold) in [int,float]: @@ -886,7 +916,6 @@ def feature_detection_multithreshold(field_in, min_distance=min_distance, feature_number_start=feature_number_start, PBC_flag=PBC_flag, - vertical_coord = vertical_coord, vertical_axis = vertical_axis, ) #check if list of features is not empty, then merge features from different threshold values @@ -896,8 +925,7 @@ def feature_detection_multithreshold(field_in, if (min_distance > 0): features_thresholds=filter_min_distance(features_thresholds,dxy=dxy, dz=dz, min_distance = min_distance, - vertical_coord = vertical_coord, - vertical_axis = vertical_axis + z_coordinate_name = vertical_coord, ) list_features_timesteps.append(features_thresholds) From 34129e704c7a425e7d1ed3e232790209556a72fe Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 1 May 2022 14:58:13 -0600 Subject: [PATCH 62/82] Allowed z coordinates to be in any axis --- tobac/feature_detection.py | 17 +++--- tobac/testing.py | 12 ++--- tobac/tests/test_feature_detection.py | 78 +++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 15 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 451e6428..0c79d770 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -393,6 +393,13 @@ def feature_detection_threshold(data_i,i_time, # If we are given a 3D data array, we should do 3D feature detection. is_3D = len(data_i.shape)==3 + # We need to transpose the input data + if is_3D: + if vertical_axis == 1: + data_i = np.transpose(data_i, axes=(1,0,2)) + elif vertical_axis == 2: + data_i = np.transpose(data_i, axes=(2,0,1)) + # if looking for minima, set values above threshold to 0 and scale by data minimum: if target == 'maximum': mask=(data_i >= threshold) @@ -887,11 +894,10 @@ def feature_detection_multithreshold(field_in, vertical_axis = vertical_axis - 1 else: # We need to determine vertical axis - vertical_axis = tb_utils.find_vertical_axis_from_coord(field_in, vertical_coord=vertical_coord) + vertical_axis = tb_utils.find_vertical_axis_from_coord(field_in, + vertical_coord=vertical_coord) - if is_3D and vertical_axis != 1: - raise NotImplementedError("Vertical coordinate must be first non-time coord.") # create empty list to store features for all timesteps list_features_timesteps=[] @@ -902,7 +908,6 @@ def feature_detection_multithreshold(field_in, # if single threshold is put in as a single value, turn it into a list if type(threshold) in [int,float]: threshold=[threshold] - for i_time,data_i in enumerate(data_time): time_i=data_i.coord('time').units.num2date(data_i.coord('time').points[0]) features_thresholds=feature_detection_multithreshold_timestep(data_i,i_time, @@ -945,7 +950,7 @@ def feature_detection_multithreshold(field_in, features=add_coordinates(features,field_in) else: features=None - logging.info('No features detected') + logging.debug('No features detected') logging.debug('feature detection completed') return features @@ -1048,7 +1053,6 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, distance=np.sqrt(xy_sqdst + z_sqdst) else: distance = np.sqrt(xy_sqdst) - print(distance, min_distance, distance <=min_distance) if distance <= min_distance: #print(distance, min_distance, index_1, index_2, features.size) # logging.debug('distance<= min_distance: ' + str(distance)) @@ -1063,7 +1067,6 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, remove_list_distance.append(index_1) elif features.loc[index_1,'num']==features.loc[index_2,'num']: remove_list_distance.append(index_2) - print(remove_list_distance) features=features[~features.index.isin(remove_list_distance)] return features diff --git a/tobac/testing.py b/tobac/testing.py index 91402f53..f5dfb622 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -396,9 +396,6 @@ def make_dataset_from_arr( import xarray as xr import iris - if time_dim_num is not None: - raise NotImplementedError("Time dimension not yet implemented in this function") - has_time = time_dim_num is not None is_3D = z_dim_num is not None @@ -409,7 +406,6 @@ def make_dataset_from_arr( if has_time: time_min = datetime.datetime(2022,1,1) time_num = in_arr.shape[time_dim_num] - time_max = datetime.timedelta(minutes=5) * time_num if data_type == "xarray": return output_arr @@ -422,12 +418,12 @@ def make_dataset_from_arr( z_dim_num, ) if has_time: - out_arr_iris = output_arr.to_iris() if is_3D: out_arr_iris.add_dim_coord( - iris.coords.DimCoord(np.linspace(time_min, time_max, time_num), - standard_name='time'), - time_dim_num,) + iris.coords.DimCoord(pd.date_range(start=time_min, periods=time_num).values.astype('datetime64[s]').astype(int), + standard_name='time', units='seconds since epoch'), + time_dim_num, + ) return out_arr_iris else: raise ValueError("data_type must be 'xarray' or 'iris'") diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index 52d4eb5e..c0a4f9ba 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -166,3 +166,81 @@ def test_filter_min_distance(feature_1_loc, feature_2_loc, dxy, dz, assert expect_feature_1 == (np.sum(out_feats['feature']==1)==1) assert expect_feature_2 == (np.sum(out_feats['feature']==2)==1) + +@pytest.mark.parametrize("test_dset_size, vertical_axis_num, " + "vertical_coord_name," + " vertical_coord_opt, expected_raise", + [((1,20,30,40), 1, 'altitude', 'auto', False), + ((1,20,30,40), 2, 'altitude', 'auto', False), + ((1,20,30,40), 3, 'altitude', 'auto', False), + ((1,20,30,40), 1, 'air_pressure', 'air_pressure', False), + ((1,20,30,40), 1, 'air_pressure', 'auto', True), + ((1,20,30,40), 1, 'model_level_number', 'auto', False), + ((1,20,30,40), 1, 'altitude', 'auto', False), + ((1,20,30,40), 1, 'geopotential_height', 'auto', False) + ] +) +def test_feature_detection_multiple_z_coords(test_dset_size, vertical_axis_num, vertical_coord_name, + vertical_coord_opt, expected_raise): + '''Tests ```tobac.feature_detection.feature_detection_multithreshold``` + with different axes + + Parameters + ---------- + test_dset_size: tuple(int, int, int, int) + Size of the test dataset + vertical_axis_num: int (0-2, inclusive) + Which axis in test_dset_size is the vertical axis + vertical_coord_name: str + Name of the vertical coordinate. + vertical_coord_opt: str + What to pass in as the vertical coordinate option to segmentation_timestep + expected_raise: bool + True if we expect a ValueError to be raised, false otherwise + ''' + import numpy as np + + # First, just check that input and output shapes are the same. + test_dxy = 1000 + test_vdim_pt_1 = 8 + test_hdim_1_pt_1 = 12 + test_hdim_2_pt_1 = 12 + test_data = np.zeros(test_dset_size) + test_data[0, 0:5, 0:5, 0:5] = 3 + common_dset_opts = { + 'in_arr': test_data, + 'data_type': 'iris', + 'z_dim_name': vertical_coord_name + } + if vertical_axis_num == 1: + test_data_iris = tbtest.make_dataset_from_arr( + time_dim_num = 0, z_dim_num=1, y_dim_num=2, x_dim_num=3, **common_dset_opts + ) + elif vertical_axis_num == 2: + test_data_iris = tbtest.make_dataset_from_arr( + time_dim_num = 0, z_dim_num=2, y_dim_num=1, x_dim_num=3, **common_dset_opts + ) + elif vertical_axis_num == 3: + test_data_iris = tbtest.make_dataset_from_arr( + time_dim_num = 0, z_dim_num=3, y_dim_num=1, x_dim_num=2, **common_dset_opts + ) + + if not expected_raise: + out_df = feat_detect.feature_detection_multithreshold( + field_in=test_data_iris, + dxy=test_dxy, + threshold=[1.5,], + vertical_coord=vertical_coord_opt + ) + # Check that the vertical coordinate is returned. + print(out_df.columns) + assert vertical_coord_name in out_df + else: + # Expecting a raise + with pytest.raises(ValueError): + out_seg_mask, out_df = feat_detect.feature_detection_multithreshold( + field_in=test_data_iris, + dxy=test_dxy, + threshold=[1.5,], + vertical_coord=vertical_coord_opt + ) From ff6f56fa26312d377fc0124a939c7e4697f8e8a6 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 1 May 2022 15:05:22 -0600 Subject: [PATCH 63/82] Cleaning up of feature detection code Deleted several functions that have since been moved to the utilities --- tobac/feature_detection.py | 86 +++----------------------------------- 1 file changed, 6 insertions(+), 80 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 0c79d770..0e9bba1f 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -4,80 +4,6 @@ import warnings from . import utils as tb_utils -def get_label_props_in_dict(labels): - '''Function to get the label properties into a dictionary format. - - Parameters - ---------- - labels: 2D or 3D array-like - comes from the `skimage.measure.label` function - - Returns - ------- - dict - output from skimage.measure.regionprops in dictionary format, where they key is the label number - ''' - import skimage.measure - - region_properties_raw = skimage.measure.regionprops(labels) - region_properties_dict = dict() - for region_prop in region_properties_raw: - region_properties_dict[region_prop.label] = region_prop - - return region_properties_dict - - -def adjust_pbc_point(in_dim, dim_min, dim_max): - '''Function to adjust a point to the other boundary for PBCs - - Parameters - ---------- - in_dim : int - Input coordinate to adjust - dim_min : int - Minimum point for the dimension - dim_max : int - Maximum point for the dimension (inclusive) - - Returns - ------- - int - The adjusted point on the opposite boundary - - Raises - ------ - ValueError - If in_dim isn't on one of the boundary points - ''' - if in_dim == dim_min: - return dim_max - elif in_dim == dim_max: - return dim_min - else: - raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") - -def get_label_props_in_dict(labels): - '''Function to get the label properties into a dictionary format. - - Parameters - ---------- - labels: 2D or 3D array-like - comes from the `skimage.measure.label` function - - Returns - ------- - dict - output from skimage.measure.regionprops in dictionary format, where they key is the label number - ''' - import skimage.measure - - region_properties_raw = skimage.measure.regionprops(labels) - region_properties_dict = dict() - for region_prop in region_properties_raw: - region_properties_dict[region_prop.label] = region_prop - - return region_properties_dict - def feature_position(hdim1_indices, hdim2_indeces, vdim_indyces = None, @@ -442,7 +368,7 @@ def feature_detection_threshold(data_i,i_time, wall_labels = np.array([]) if num_labels > 0: - all_label_props = get_label_props_in_dict(labels) + all_label_props = tb_utils.get_label_props_in_dict(labels) [all_labels_max_size, all_label_locs_v, all_label_locs_h1, all_label_locs_h2 ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(all_label_props) @@ -497,8 +423,8 @@ def feature_detection_threshold(data_i,i_time, if PBC_flag == 'both' and (np.any(label_y == [y_min,y_max]) and np.any(label_x == [x_min,x_max])): #adjust x and y points to the other side - y_val_alt = adjust_pbc_point(label_y, y_min, y_max) - x_val_alt = adjust_pbc_point(label_x, x_min, x_max) + y_val_alt = tb_utils.adjust_pbc_point(label_y, y_min, y_max) + x_val_alt = tb_utils.adjust_pbc_point(label_x, x_min, x_max) label_on_corner = labels[label_z,y_val_alt,x_val_alt] @@ -530,7 +456,7 @@ def feature_detection_threshold(data_i,i_time, # on the hdim1 boundary and periodic on hdim1 if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [y_min,y_max]): - y_val_alt = adjust_pbc_point(label_y, y_min, y_max) + y_val_alt = tb_utils.adjust_pbc_point(label_y, y_min, y_max) #get the label value on the opposite side label_alt = labels[label_z,y_val_alt,label_x] @@ -561,7 +487,7 @@ def feature_detection_threshold(data_i,i_time, break if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [x_min,x_max]): - x_val_alt = adjust_pbc_point(label_x, x_min, x_max) + x_val_alt = tb_utils.adjust_pbc_point(label_x, x_min, x_max) #get the label value on the opposite side label_alt = labels[label_z,label_y,x_val_alt] @@ -604,7 +530,7 @@ def feature_detection_threshold(data_i,i_time, #num_labels = num_labels - len(skip_list) # END PBC treatment # we need to get label properties again after we handle PBCs. - label_props = get_label_props_in_dict(labels) + label_props = tb_utils.get_label_props_in_dict(labels) if len(label_props)>0: [total_indices_all, vdim_indyces_all, hdim1_indices_all, hdim2_indices_all] = tb_utils.get_indices_of_labels_from_reg_prop_dict(label_props) From f7e93677a483a8699b86d918c2e184e562e65c1a Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 1 May 2022 15:20:58 -0600 Subject: [PATCH 64/82] Resolves #33 Allows filter_min_distance to work with PBCs --- tobac/feature_detection.py | 46 +++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 0e9bba1f..ab2abbb4 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1,3 +1,4 @@ +from operator import is_ import numpy as np import pandas as pd import logging @@ -884,7 +885,8 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, x_coordinate_name = None, y_coordinate_name = None, z_coordinate_name = None, - PBC_flag = 'none'): + PBC_flag = 'none', + max_h1 = 0, max_h2 = 0,): '''Function to remove features that are too close together. If two features are closer than `min_distance`, it keeps the larger feature. @@ -924,6 +926,10 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, 'hdim_1' means that we are periodic along hdim1 'hdim_2' means that we are periodic along hdim2 'both' means that we are periodic along both horizontal dimensions + max_h1: int + Maximum coordinate in the hdim_1 dimension if PBC_flag is not 'none' + max_h2: int + Maximum coordinate in the hdim_2 dimension if PBC_flag is not 'none' Returns ------- @@ -940,6 +946,9 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, #is no vertical dimension in features. is_3D = 'vdim' in features + if is_3D and dz is None: + z_coordinate_name = tb_utils.find_dataframe_vertical_coord(features, z_coordinate_name) + # Check if both dxy and their coordinate names are specified. # If they are, warn that we will use dxy. if dxy is not None and (x_coordinate_name in features and y_coordinate_name in features): @@ -958,27 +967,28 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, #Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) for index_1, index_2 in indeces: if index_1 is not index_2: - if dxy is not None: - xy_sqdst = ((dxy*(features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1']))**2+ - (dxy*(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2']))**2) - else: - # calculate xy distance based on x/y coordinates in meters. - xy_sqdst = ((features.loc[index_1, x_coordinate_name]- - features.loc[index_2, x_coordinate_name])**2 + - (features.loc[index_1, y_coordinate_name]- - features.loc[index_2, y_coordinate_name])**2) if is_3D: if dz is not None: - z_sqdst = (dz * (features.loc[index_1,'vdim']-features.loc[index_2,'vdim']))**2 + z_coord_1 = dz * features.loc[index_1,'vdim'] + z_coord_2 = dz * features.loc[index_2,'vdim'] else: - z_sqdst = (features.loc[index_1,z_coordinate_name]- - features.loc[index_2,z_coordinate_name])**2 - - #distance=dxy*np.sqrt((features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1'])**2+(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2'])**2) - if is_3D: - distance=np.sqrt(xy_sqdst + z_sqdst) + z_coord_1 = features.loc[index_1,z_coordinate_name] + z_coord_2 = features.loc[index_2,z_coordinate_name] + + coord_1 = (z_coord_1, dxy*features.loc[index_1,'hdim_1'], + dxy*features.loc[index_1,'hdim_2']) + coord_2 = (z_coord_2, dxy*features.loc[index_2,'hdim_1'], + dxy*features.loc[index_2,'hdim_2']) else: - distance = np.sqrt(xy_sqdst) + coord_1 = (dxy*features.loc[index_1,'hdim_1'], dxy*features.loc[index_1,'hdim_2']) + coord_2 = (dxy*features.loc[index_2,'hdim_1'], dxy*features.loc[index_2,'hdim_2']) + + distance = tb_utils.calc_distance_coords_pbc( + coords_1 = np.array(coord_1), + coords_2 = np.array(coord_2), + min_h1 = 0, max_h1 = max_h1, min_h2 = 0, max_h2 = max_h2, PBC_flag=PBC_flag + ) + if distance <= min_distance: #print(distance, min_distance, index_1, index_2, features.size) # logging.debug('distance<= min_distance: ' + str(distance)) From af778fd28107b2a30887856c3581d69ac33748c3 Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Sun, 1 May 2022 18:04:49 -0400 Subject: [PATCH 65/82] Tweak to vertical coord function and ncells fix Slightly generalized and clarified value errors in find_vertical_axis_from_coord in utils module. Now, if default auto coordinate and more than one suitable coord is in iris cube, picks first of list. If cube lacks suitable coord for auto, the warning now specifies this. Additionally, if a coordinate is passed which for whatever reason does not exist in the cube, the warning also says this. Finally, a fix to ncells in segmentation which should assign the proper value at each index is implemented. --- tobac/segmentation.py | 4 ++-- tobac/utils.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 8541c1d2..348cc83d 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -831,8 +831,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu ncells=np.zeros(len(features_out)) for i,(index,row) in enumerate(features_out.iterrows()): if row['feature'] in counts.keys(): - ncells=counts[row['feature']] - features_out['ncells']=ncells + ncells[i]=counts[row['feature']] + features_out.ncells.values[i]=ncells[i] return segmentation_out,features_out diff --git a/tobac/utils.py b/tobac/utils.py index f6f02028..2c00c16b 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1006,18 +1006,19 @@ def find_vertical_axis_from_coord(variable_cube, vertical_coord='auto'): ''' list_coord_names=[coord.name() for coord in variable_cube.coords()] + if vertical_coord=='auto': list_vertical=['z','model_level_number','altitude','geopotential_height'] # find the intersection all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) - if len(all_vertical_axes) == 1: + if len(all_vertical_axes) >= 1: return all_vertical_axes[0] else: - raise ValueError('Please specify vertical coordinate') + raise ValueError('Cube lacks suitable automatic vertical coordinate (z, model_level_number, altitude, or geopotential_height)') elif vertical_coord in list_coord_names: return vertical_coord else: - raise ValueError('Please specify vertical coordinate') + raise ValueError('Please specify vertical coordinate found in cube') def find_dataframe_vertical_coord(variable_dataframe, vertical_coord='auto'): From 7017915f4bae7276917d98e31c883e1bef818ff7 Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Sun, 1 May 2022 18:10:23 -0400 Subject: [PATCH 66/82] Revert "Tweak to vertical coord function and ncells fix" This reverts commit af778fd28107b2a30887856c3581d69ac33748c3. --- tobac/segmentation.py | 4 ++-- tobac/utils.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 348cc83d..8541c1d2 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -831,8 +831,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu ncells=np.zeros(len(features_out)) for i,(index,row) in enumerate(features_out.iterrows()): if row['feature'] in counts.keys(): - ncells[i]=counts[row['feature']] - features_out.ncells.values[i]=ncells[i] + ncells=counts[row['feature']] + features_out['ncells']=ncells return segmentation_out,features_out diff --git a/tobac/utils.py b/tobac/utils.py index 2c00c16b..f6f02028 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1006,19 +1006,18 @@ def find_vertical_axis_from_coord(variable_cube, vertical_coord='auto'): ''' list_coord_names=[coord.name() for coord in variable_cube.coords()] - if vertical_coord=='auto': list_vertical=['z','model_level_number','altitude','geopotential_height'] # find the intersection all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) - if len(all_vertical_axes) >= 1: + if len(all_vertical_axes) == 1: return all_vertical_axes[0] else: - raise ValueError('Cube lacks suitable automatic vertical coordinate (z, model_level_number, altitude, or geopotential_height)') + raise ValueError('Please specify vertical coordinate') elif vertical_coord in list_coord_names: return vertical_coord else: - raise ValueError('Please specify vertical coordinate found in cube') + raise ValueError('Please specify vertical coordinate') def find_dataframe_vertical_coord(variable_dataframe, vertical_coord='auto'): From be4c1094b43e47cbc219cee2138fd9195a7985af Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Sun, 1 May 2022 18:13:15 -0400 Subject: [PATCH 67/82] Fix to ncells Fixed iterative calculation of ncells in segmentation --- tobac/._segmentation.py | Bin 0 -> 4096 bytes tobac/segmentation.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 tobac/._segmentation.py diff --git a/tobac/._segmentation.py b/tobac/._segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..2ad0cc5aca325dfdeaecd44ff4b7a3b3306f34ba GIT binary patch literal 4096 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDJkFz{^v(m+1nBL)UWIUt(=a103v0xDsI z=wMg?WDB5a0m{L|rIPb=^%4sTa#Hnj5{pYpi&Ill5=&B*1A;+%8d&p_K(xF$g93;i z1*0J_8UmvsFd71*Aut*OqaiRF0;3@?8UmvsFd71*A%H6cK%Gqx1_QZ}jLc$%qSWI2 z(xT*4g|z&lY=z9clGMDC%>2B>oSaI9oYb@ug`}Lsylh}!7^-VXQ>gxjdqsvp?*D%P Dqr@l7 literal 0 HcmV?d00001 diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 8541c1d2..348cc83d 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -831,8 +831,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu ncells=np.zeros(len(features_out)) for i,(index,row) in enumerate(features_out.iterrows()): if row['feature'] in counts.keys(): - ncells=counts[row['feature']] - features_out['ncells']=ncells + ncells[i]=counts[row['feature']] + features_out.ncells.values[i]=ncells[i] return segmentation_out,features_out From acee9744ab96a7b9b84be0b048d405ae0ee460df Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Sun, 1 May 2022 18:25:20 -0400 Subject: [PATCH 68/82] Revert "Fix to ncells" This reverts commit be4c1094b43e47cbc219cee2138fd9195a7985af. --- tobac/._segmentation.py | Bin 4096 -> 0 bytes tobac/segmentation.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 tobac/._segmentation.py diff --git a/tobac/._segmentation.py b/tobac/._segmentation.py deleted file mode 100644 index 2ad0cc5aca325dfdeaecd44ff4b7a3b3306f34ba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4096 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDJkFz{^v(m+1nBL)UWIUt(=a103v0xDsI z=wMg?WDB5a0m{L|rIPb=^%4sTa#Hnj5{pYpi&Ill5=&B*1A;+%8d&p_K(xF$g93;i z1*0J_8UmvsFd71*Aut*OqaiRF0;3@?8UmvsFd71*A%H6cK%Gqx1_QZ}jLc$%qSWI2 z(xT*4g|z&lY=z9clGMDC%>2B>oSaI9oYb@ug`}Lsylh}!7^-VXQ>gxjdqsvp?*D%P Dqr@l7 diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 348cc83d..8541c1d2 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -831,8 +831,8 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu ncells=np.zeros(len(features_out)) for i,(index,row) in enumerate(features_out.iterrows()): if row['feature'] in counts.keys(): - ncells[i]=counts[row['feature']] - features_out.ncells.values[i]=ncells[i] + ncells=counts[row['feature']] + features_out['ncells']=ncells return segmentation_out,features_out From 133e7c4759ac1dabdb90c55a3772572e1faf8171 Mon Sep 17 00:00:00 2001 From: galexsky <90701223+galexsky@users.noreply.github.com> Date: Sun, 1 May 2022 18:27:22 -0400 Subject: [PATCH 69/82] Update to find_vertical_axis_from_coord Generalized some aspects of this function and also improved specificity of ValueErrors --- tobac/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tobac/utils.py b/tobac/utils.py index f6f02028..5c2c4ecc 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1006,18 +1006,21 @@ def find_vertical_axis_from_coord(variable_cube, vertical_coord='auto'): ''' list_coord_names=[coord.name() for coord in variable_cube.coords()] + if vertical_coord=='auto': list_vertical=['z','model_level_number','altitude','geopotential_height'] # find the intersection all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) - if len(all_vertical_axes) == 1: + if len(all_vertical_axes) >= 1: return all_vertical_axes[0] else: - raise ValueError('Please specify vertical coordinate') + raise ValueError('Cube lacks suitable automatic vertical coordinate (z, model_level_number, altitude, or geopotential_height)') elif vertical_coord in list_coord_names: return vertical_coord else: - raise ValueError('Please specify vertical coordinate') + raise ValueError('Please specify vertical coordinate found in cube') + + def find_dataframe_vertical_coord(variable_dataframe, vertical_coord='auto'): From ea506e7f9b7f6989d7eeada9cad84b3af542cb41 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 1 May 2022 19:48:09 -0600 Subject: [PATCH 70/82] Added in framework for subsetting I've added in the framework here for subsetting data during feature detection, but not anything beyond that. #22 is addressed but not entirely resolved. --- tobac/feature_detection.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index ab2abbb4..417bd03c 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -741,6 +741,7 @@ def feature_detection_multithreshold(field_in, PBC_flag='none', vertical_coord = 'auto', vertical_axis = None, + detect_subset = None, ): '''Function to perform feature detection based on contiguous regions above/below a threshold @@ -787,6 +788,15 @@ def feature_detection_multithreshold(field_in, vertical_axis: int or None. The vertical axis number of the data. If None, uses vertical_coord to determine axis. + detect_subset: dict-like or None + Whether to run feature detection on only a subset of the data. + If this is not None, it will subset the grid that we run feature detection + on to the range specified for each axis specified. The format of this dict is: + {axis-number: (start, end)}, where axis-number is the number of the axis to subset, + start is inclusive, and end is exclusive. + For example, if your data are oriented as (time, z, y, x) and you want to + only detect on values between z levels 10 and 29, you would set: + {1: (10, 30)}. Returns ------- @@ -810,11 +820,20 @@ def feature_detection_multithreshold(field_in, else: raise ValueError("Feature detection only works with 2D or 3D data") + ndim_time=field_in.coord_dims('time')[0] + + if detect_subset is not None: + raise NotImplementedError("Subsetting feature detection not yet supported.") + + + if detect_subset is not None and ndim_time in detect_subset: + raise NotImplementedError("Cannot subset on time") + + if is_3D: # We need to determine the time axis so that we can determine the # vertical axis in each timestep if vertical_axis is not none. if vertical_axis is not None: - ndim_time=field_in.coord_dims('time')[0] # We only need to adjust the axis number if the time axis # is a lower axis number than the specified vertical coordinate. if ndim_time < vertical_axis: @@ -837,6 +856,7 @@ def feature_detection_multithreshold(field_in, threshold=[threshold] for i_time,data_i in enumerate(data_time): time_i=data_i.coord('time').units.num2date(data_i.coord('time').points[0]) + features_thresholds=feature_detection_multithreshold_timestep(data_i,i_time, threshold=threshold, sigma_threshold=sigma_threshold, From 1d715c9e136ca908f711c6b6f7c2a5337550ad35 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 2 May 2022 10:12:11 -0600 Subject: [PATCH 71/82] Black formatting --- tobac/__init__.py | 68 +- tobac/analysis.py | 759 +++++---- tobac/centerofgravity.py | 227 +-- tobac/feature_detection.py | 1227 +++++++++------ tobac/plotting.py | 2097 +++++++++++++++---------- tobac/segmentation.py | 1143 ++++++++------ tobac/testing.py | 926 ++++++----- tobac/tests/test_feature_detection.py | 224 ++- tobac/tests/test_import.py | 3 +- tobac/tests/test_sample_data.py | 278 ++-- tobac/tests/test_segmentation.py | 711 +++++---- tobac/tests/test_testing.py | 840 +++++++--- tobac/tests/test_tracking.py | 347 ++-- tobac/tests/test_util.py | 609 ++++--- tobac/tracking.py | 401 ++--- tobac/utils.py | 920 ++++++----- tobac/wrapper.py | 245 +-- 17 files changed, 6704 insertions(+), 4321 deletions(-) diff --git a/tobac/__init__.py b/tobac/__init__.py index f108e7a9..d01e2689 100644 --- a/tobac/__init__.py +++ b/tobac/__init__.py @@ -1,23 +1,63 @@ -#from .tracking import maketrack -from .segmentation import segmentation_3D, segmentation_2D,watershedding_3D,watershedding_2D -from .centerofgravity import calculate_cog,calculate_cog_untracked,calculate_cog_domain -from .plotting import plot_tracks_mask_field,plot_tracks_mask_field_loop,plot_mask_cell_track_follow,plot_mask_cell_track_static,plot_mask_cell_track_static_timeseries -from .plotting import plot_lifetime_histogram,plot_lifetime_histogram_bar,plot_histogram_cellwise,plot_histogram_featurewise -from .plotting import plot_mask_cell_track_3Dstatic,plot_mask_cell_track_2D3Dstatic -from .plotting import plot_mask_cell_individual_static,plot_mask_cell_individual_3Dstatic +# from .tracking import maketrack +from .segmentation import ( + segmentation_3D, + segmentation_2D, + watershedding_3D, + watershedding_2D, +) +from .centerofgravity import ( + calculate_cog, + calculate_cog_untracked, + calculate_cog_domain, +) +from .plotting import ( + plot_tracks_mask_field, + plot_tracks_mask_field_loop, + plot_mask_cell_track_follow, + plot_mask_cell_track_static, + plot_mask_cell_track_static_timeseries, +) +from .plotting import ( + plot_lifetime_histogram, + plot_lifetime_histogram_bar, + plot_histogram_cellwise, + plot_histogram_featurewise, +) +from .plotting import plot_mask_cell_track_3Dstatic, plot_mask_cell_track_2D3Dstatic +from .plotting import ( + plot_mask_cell_individual_static, + plot_mask_cell_individual_3Dstatic, +) from .plotting import animation_mask_field from .plotting import make_map, map_tracks -from .analysis import cell_statistics,cog_cell,lifetime_histogram,histogram_featurewise,histogram_cellwise -from .analysis import calculate_velocity,calculate_distance,calculate_area +from .analysis import ( + cell_statistics, + cog_cell, + lifetime_histogram, + histogram_featurewise, + histogram_cellwise, +) +from .analysis import calculate_velocity, calculate_distance, calculate_area from .analysis import calculate_nearestneighbordistance -from .analysis import velocity_histogram,nearestneighbordistance_histogram,area_histogram +from .analysis import ( + velocity_histogram, + nearestneighbordistance_histogram, + area_histogram, +) from .analysis import calculate_overlap -from .utils import mask_cell,mask_cell_surface,mask_cube_cell,mask_cube_untracked,mask_cube,column_mask_from2D,get_bounding_box -from .utils import mask_features,mask_features_surface,mask_cube_features +from .utils import ( + mask_cell, + mask_cell_surface, + mask_cube_cell, + mask_cube_untracked, + mask_cube, + column_mask_from2D, + get_bounding_box, +) +from .utils import mask_features, mask_features_surface, mask_cube_features -from .utils import add_coordinates,get_spacings +from .utils import add_coordinates, get_spacings from .feature_detection import feature_detection_multithreshold from .tracking import linking_trackpy from .wrapper import maketrack from .wrapper import tracking_wrapper - diff --git a/tobac/analysis.py b/tobac/analysis.py index b41b8370..697837fa 100644 --- a/tobac/analysis.py +++ b/tobac/analysis.py @@ -3,162 +3,241 @@ import logging import os -from .utils import mask_cell,mask_cell_surface,mask_cube_cell,get_bounding_box +from .utils import mask_cell, mask_cell_surface, mask_cube_cell, get_bounding_box -def cell_statistics_all(input_cubes,track,mask,aggregators,output_path='./',cell_selection=None,output_name='Profiles',width=10000,z_coord='model_level_number',dimensions=['x','y'],**kwargs): + +def cell_statistics_all( + input_cubes, + track, + mask, + aggregators, + output_path="./", + cell_selection=None, + output_name="Profiles", + width=10000, + z_coord="model_level_number", + dimensions=["x", "y"], + **kwargs +): if cell_selection is None: - cell_selection=np.unique(track['cell']) - for cell in cell_selection : - cell_statistics(input_cubes=input_cubes,track=track, mask=mask, - dimensions=dimensions,aggregators=aggregators,cell=cell, - output_path=output_path,output_name=output_name, - width=width,z_coord=z_coord,**kwargs) - -def cell_statistics(input_cubes,track,mask,aggregators,cell,output_path='./',output_name='Profiles',width=10000,z_coord='model_level_number',dimensions=['x','y'],**kwargs): - from iris.cube import Cube,CubeList + cell_selection = np.unique(track["cell"]) + for cell in cell_selection: + cell_statistics( + input_cubes=input_cubes, + track=track, + mask=mask, + dimensions=dimensions, + aggregators=aggregators, + cell=cell, + output_path=output_path, + output_name=output_name, + width=width, + z_coord=z_coord, + **kwargs + ) + + +def cell_statistics( + input_cubes, + track, + mask, + aggregators, + cell, + output_path="./", + output_name="Profiles", + width=10000, + z_coord="model_level_number", + dimensions=["x", "y"], + **kwargs +): + from iris.cube import Cube, CubeList from iris.coords import AuxCoord - from iris import Constraint,save - + from iris import Constraint, save + # If input is single cube, turn into cubelist if type(input_cubes) is Cube: - input_cubes=CubeList([input_cubes]) - - logging.debug('Start calculating profiles for cell '+str(cell)) - track_i=track[track['cell']==cell] - - cubes_profile={} + input_cubes = CubeList([input_cubes]) + + logging.debug("Start calculating profiles for cell " + str(cell)) + track_i = track[track["cell"] == cell] + + cubes_profile = {} for aggregator in aggregators: - cubes_profile[aggregator.name()]=CubeList() - - for time_i in track_i['time'].values: + cubes_profile[aggregator.name()] = CubeList() + + for time_i in track_i["time"].values: constraint_time = Constraint(time=time_i) - - mask_i=mask.extract(constraint_time) - mask_cell_i=mask_cell(mask_i,cell,track_i,masked=False) - mask_cell_surface_i=mask_cell_surface(mask_i,cell,track_i,masked=False,z_coord=z_coord) - - x_dim=mask_cell_surface_i.coord_dims('projection_x_coordinate')[0] - y_dim=mask_cell_surface_i.coord_dims('projection_y_coordinate')[0] - x_coord=mask_cell_surface_i.coord('projection_x_coordinate') - y_coord=mask_cell_surface_i.coord('projection_y_coordinate') - - if (mask_cell_surface_i.core_data()>0).any(): - box_mask_i=get_bounding_box(mask_cell_surface_i.core_data(),buffer=1) - - box_mask=[[x_coord.points[box_mask_i[x_dim][0]],x_coord.points[box_mask_i[x_dim][1]]], - [y_coord.points[box_mask_i[y_dim][0]],y_coord.points[box_mask_i[y_dim][1]]]] + + mask_i = mask.extract(constraint_time) + mask_cell_i = mask_cell(mask_i, cell, track_i, masked=False) + mask_cell_surface_i = mask_cell_surface( + mask_i, cell, track_i, masked=False, z_coord=z_coord + ) + + x_dim = mask_cell_surface_i.coord_dims("projection_x_coordinate")[0] + y_dim = mask_cell_surface_i.coord_dims("projection_y_coordinate")[0] + x_coord = mask_cell_surface_i.coord("projection_x_coordinate") + y_coord = mask_cell_surface_i.coord("projection_y_coordinate") + + if (mask_cell_surface_i.core_data() > 0).any(): + box_mask_i = get_bounding_box(mask_cell_surface_i.core_data(), buffer=1) + + box_mask = [ + [ + x_coord.points[box_mask_i[x_dim][0]], + x_coord.points[box_mask_i[x_dim][1]], + ], + [ + y_coord.points[box_mask_i[y_dim][0]], + y_coord.points[box_mask_i[y_dim][1]], + ], + ] else: - box_mask=[[np.nan,np.nan],[np.nan,np.nan]] - - x=track_i[track_i['time'].values==time_i]['projection_x_coordinate'].values[0] - y=track_i[track_i['time'].values==time_i]['projection_y_coordinate'].values[0] - - box_slice=[[x-width,x+width],[y-width,y+width]] - - x_min=np.nanmin([box_mask[0][0],box_slice[0][0]]) - x_max=np.nanmax([box_mask[0][1],box_slice[0][1]]) - y_min=np.nanmin([box_mask[1][0],box_slice[1][0]]) - y_max=np.nanmax([box_mask[1][1],box_slice[1][1]]) - - constraint_x=Constraint(projection_x_coordinate=lambda cell: int(x_min) < cell < int(x_max)) - constraint_y=Constraint(projection_y_coordinate=lambda cell: int(y_min) < cell < int(y_max)) - - constraint=constraint_time & constraint_x & constraint_y -# Mask_cell_surface_i=mask_cell_surface(Mask_w_i,cell,masked=False,z_coord='model_level_number') - mask_cell_i=mask_cell_i.extract(constraint) - mask_cell_surface_i=mask_cell_surface_i.extract(constraint) - - input_cubes_i=input_cubes.extract(constraint) + box_mask = [[np.nan, np.nan], [np.nan, np.nan]] + + x = track_i[track_i["time"].values == time_i]["projection_x_coordinate"].values[ + 0 + ] + y = track_i[track_i["time"].values == time_i]["projection_y_coordinate"].values[ + 0 + ] + + box_slice = [[x - width, x + width], [y - width, y + width]] + + x_min = np.nanmin([box_mask[0][0], box_slice[0][0]]) + x_max = np.nanmax([box_mask[0][1], box_slice[0][1]]) + y_min = np.nanmin([box_mask[1][0], box_slice[1][0]]) + y_max = np.nanmax([box_mask[1][1], box_slice[1][1]]) + + constraint_x = Constraint( + projection_x_coordinate=lambda cell: int(x_min) < cell < int(x_max) + ) + constraint_y = Constraint( + projection_y_coordinate=lambda cell: int(y_min) < cell < int(y_max) + ) + + constraint = constraint_time & constraint_x & constraint_y + # Mask_cell_surface_i=mask_cell_surface(Mask_w_i,cell,masked=False,z_coord='model_level_number') + mask_cell_i = mask_cell_i.extract(constraint) + mask_cell_surface_i = mask_cell_surface_i.extract(constraint) + + input_cubes_i = input_cubes.extract(constraint) for cube in input_cubes_i: - cube_masked=mask_cube_cell(cube,mask_cell_i,cell,track_i) - coords_remove=[] + cube_masked = mask_cube_cell(cube, mask_cell_i, cell, track_i) + coords_remove = [] for coordinate in cube_masked.coords(dim_coords=False): if coordinate.name() not in dimensions: for dim in dimensions: - if set(cube_masked.coord_dims(coordinate)).intersection(set(cube_masked.coord_dims(dim))): + if set(cube_masked.coord_dims(coordinate)).intersection( + set(cube_masked.coord_dims(dim)) + ): coords_remove.append(coordinate.name()) for coordinate in set(coords_remove): - cube_masked.remove_coord(coordinate) - + cube_masked.remove_coord(coordinate) + for aggregator in aggregators: - cube_collapsed=cube_masked.collapsed(dimensions,aggregator,**kwargs) - #remove all collapsed coordinates (x and y dim, scalar now) and keep only time as all these coordinates are useless + cube_collapsed = cube_masked.collapsed(dimensions, aggregator, **kwargs) + # remove all collapsed coordinates (x and y dim, scalar now) and keep only time as all these coordinates are useless for coordinate in cube_collapsed.coords(): if not cube_collapsed.coord_dims(coordinate): - if coordinate.name() is not 'time': + if coordinate.name() is not "time": cube_collapsed.remove_coord(coordinate) logging.debug(str(cube_collapsed)) cubes_profile[aggregator.name()].append(cube_collapsed) + minutes = (track_i["time_cell"] / pd.Timedelta(minutes=1)).values + latitude = track_i["latitude"].values + longitude = track_i["longitude"].values + minutes_coord = AuxCoord(minutes, long_name="cell_time", units="min") + latitude_coord = AuxCoord(latitude, long_name="latitude", units="degrees") + longitude_coord = AuxCoord(longitude, long_name="longitude", units="degrees") - minutes=(track_i['time_cell']/pd.Timedelta(minutes=1)).values - latitude=track_i['latitude'].values - longitude=track_i['longitude'].values - minutes_coord=AuxCoord(minutes,long_name='cell_time',units='min') - latitude_coord=AuxCoord(latitude,long_name='latitude',units='degrees') - longitude_coord=AuxCoord(longitude,long_name='longitude',units='degrees') - for aggregator in aggregators: - cubes_profile[aggregator.name()]=cubes_profile[aggregator.name()].merge() + cubes_profile[aggregator.name()] = cubes_profile[aggregator.name()].merge() for cube in cubes_profile[aggregator.name()]: - cube.add_aux_coord(minutes_coord,data_dims=cube.coord_dims('time')) - cube.add_aux_coord(latitude_coord,data_dims=cube.coord_dims('time')) - cube.add_aux_coord(longitude_coord,data_dims=cube.coord_dims('time')) - os.makedirs(os.path.join(output_path,output_name,aggregator.name()),exist_ok=True) - savefile=os.path.join(output_path,output_name,aggregator.name(),output_name+'_'+ aggregator.name()+'_'+str(int(cell))+'.nc') - save(cubes_profile[aggregator.name()],savefile) - - -def cog_cell(cell,Tracks=None,M_total=None,M_liquid=None, - M_frozen=None, - Mask=None, - savedir=None): - - + cube.add_aux_coord(minutes_coord, data_dims=cube.coord_dims("time")) + cube.add_aux_coord(latitude_coord, data_dims=cube.coord_dims("time")) + cube.add_aux_coord(longitude_coord, data_dims=cube.coord_dims("time")) + os.makedirs( + os.path.join(output_path, output_name, aggregator.name()), exist_ok=True + ) + savefile = os.path.join( + output_path, + output_name, + aggregator.name(), + output_name + "_" + aggregator.name() + "_" + str(int(cell)) + ".nc", + ) + save(cubes_profile[aggregator.name()], savefile) + + +def cog_cell( + cell, + Tracks=None, + M_total=None, + M_liquid=None, + M_frozen=None, + Mask=None, + savedir=None, +): + from iris import Constraint - logging.debug('Start calculating COG for '+str(cell)) - Track=Tracks[Tracks['cell']==cell] - constraint_time=Constraint(time=lambda cell: Track.head(1)['time'].values[0] <= cell <= Track.tail(1)['time'].values[0]) - M_total_i=M_total.extract(constraint_time) - M_liquid_i=M_liquid.extract(constraint_time) - M_frozen_i=M_frozen.extract(constraint_time) - Mask_i=Mask.extract(constraint_time) - - savedir_cell=os.path.join(savedir,'cells',str(int(cell))) - os.makedirs(savedir_cell,exist_ok=True) - savefile_COG_total_i=os.path.join(savedir_cell,'COG_total'+'_'+str(int(cell))+'.h5') - savefile_COG_liquid_i=os.path.join(savedir_cell,'COG_liquid'+'_'+str(int(cell))+'.h5') - savefile_COG_frozen_i=os.path.join(savedir_cell,'COG_frozen'+'_'+str(int(cell))+'.h5') - - Tracks_COG_total_i=calculate_cog(Track,M_total_i,Mask_i) -# Tracks_COG_total_list.append(Tracks_COG_total_i) - logging.debug('COG total loaded for ' +str(cell)) - - Tracks_COG_liquid_i=calculate_cog(Track,M_liquid_i,Mask_i) -# Tracks_COG_liquid_list.append(Tracks_COG_liquid_i) - logging.debug('COG liquid loaded for ' +str(cell)) - Tracks_COG_frozen_i=calculate_cog(Track,M_frozen_i,Mask_i) -# Tracks_COG_frozen_list.append(Tracks_COG_frozen_i) - logging.debug('COG frozen loaded for ' +str(cell)) - - Tracks_COG_total_i.to_hdf(savefile_COG_total_i,'table') - Tracks_COG_liquid_i.to_hdf(savefile_COG_liquid_i,'table') - Tracks_COG_frozen_i.to_hdf(savefile_COG_frozen_i,'table') - logging.debug('individual COG calculated and saved to '+ savedir_cell) - - -def lifetime_histogram(Track,bin_edges=np.arange(0,200,20),density=False,return_values=False): - Track_cell=Track.groupby('cell') - minutes=(Track_cell['time_cell'].max()/pd.Timedelta(minutes=1)).values - hist, bin_edges = np.histogram(minutes, bin_edges,density=density) - bin_centers=bin_edges[:-1]+0.5*np.diff(bin_edges) + + logging.debug("Start calculating COG for " + str(cell)) + Track = Tracks[Tracks["cell"] == cell] + constraint_time = Constraint( + time=lambda cell: Track.head(1)["time"].values[0] + <= cell + <= Track.tail(1)["time"].values[0] + ) + M_total_i = M_total.extract(constraint_time) + M_liquid_i = M_liquid.extract(constraint_time) + M_frozen_i = M_frozen.extract(constraint_time) + Mask_i = Mask.extract(constraint_time) + + savedir_cell = os.path.join(savedir, "cells", str(int(cell))) + os.makedirs(savedir_cell, exist_ok=True) + savefile_COG_total_i = os.path.join( + savedir_cell, "COG_total" + "_" + str(int(cell)) + ".h5" + ) + savefile_COG_liquid_i = os.path.join( + savedir_cell, "COG_liquid" + "_" + str(int(cell)) + ".h5" + ) + savefile_COG_frozen_i = os.path.join( + savedir_cell, "COG_frozen" + "_" + str(int(cell)) + ".h5" + ) + + Tracks_COG_total_i = calculate_cog(Track, M_total_i, Mask_i) + # Tracks_COG_total_list.append(Tracks_COG_total_i) + logging.debug("COG total loaded for " + str(cell)) + + Tracks_COG_liquid_i = calculate_cog(Track, M_liquid_i, Mask_i) + # Tracks_COG_liquid_list.append(Tracks_COG_liquid_i) + logging.debug("COG liquid loaded for " + str(cell)) + Tracks_COG_frozen_i = calculate_cog(Track, M_frozen_i, Mask_i) + # Tracks_COG_frozen_list.append(Tracks_COG_frozen_i) + logging.debug("COG frozen loaded for " + str(cell)) + + Tracks_COG_total_i.to_hdf(savefile_COG_total_i, "table") + Tracks_COG_liquid_i.to_hdf(savefile_COG_liquid_i, "table") + Tracks_COG_frozen_i.to_hdf(savefile_COG_frozen_i, "table") + logging.debug("individual COG calculated and saved to " + savedir_cell) + + +def lifetime_histogram( + Track, bin_edges=np.arange(0, 200, 20), density=False, return_values=False +): + Track_cell = Track.groupby("cell") + minutes = (Track_cell["time_cell"].max() / pd.Timedelta(minutes=1)).values + hist, bin_edges = np.histogram(minutes, bin_edges, density=density) + bin_centers = bin_edges[:-1] + 0.5 * np.diff(bin_edges) if return_values: - return hist,bin_edges,bin_centers,minutes + return hist, bin_edges, bin_centers, minutes else: - return hist,bin_edges,bin_centers - -def haversine(lat1,lon1,lat2,lon2): + return hist, bin_edges, bin_centers + + +def haversine(lat1, lon1, lat2, lon2): """Computes the Haversine distance in kilometres between two points (based on implementation CIS https://github.com/cedadev/cis) :param lat1: first point or points as array, each as array of latitude in degrees :param lon1: first point or points as array, each as array of longitude in degrees @@ -171,88 +250,164 @@ def haversine(lat1,lon1,lat2,lon2): lat2 = np.radians(lat2) lon1 = np.radians(lon1) lon2 = np.radians(lon2) - #print(lat1,lat2,lon1,lon2) - arclen = 2 * np.arcsin(np.sqrt((np.sin((lat2 - lat1) / 2)) ** 2 + np.cos(lat1) * np.cos(lat2) * (np.sin((lon2 - lon1) / 2)) ** 2)) + # print(lat1,lat2,lon1,lon2) + arclen = 2 * np.arcsin( + np.sqrt( + (np.sin((lat2 - lat1) / 2)) ** 2 + + np.cos(lat1) * np.cos(lat2) * (np.sin((lon2 - lon1) / 2)) ** 2 + ) + ) return arclen * RADIUS_EARTH -def calculate_distance(feature_1,feature_2,method_distance=None): + +def calculate_distance(feature_1, feature_2, method_distance=None): """Computes distance between two features based on either lat/lon coordinates or x/y coordinates :param feature_1: first feature or points as array, each as array of latitude, longitude in degrees :param feature_2: second feature or points as array, each as array of latitude, longitude in degrees :return: distance between the two features in metres """ if method_distance is None: - if ('projection_x_coordinate' in feature_1) and ('projection_y_coordinate' in feature_1) and ('projection_x_coordinate' in feature_2) and ('projection_y_coordinate' in feature_2) : - method_distance='xy' - elif ('latitude' in feature_1) and ('longitude' in feature_1) and ('latitude' in feature_2) and ('longitude' in feature_2): - method_distance='latlon' + if ( + ("projection_x_coordinate" in feature_1) + and ("projection_y_coordinate" in feature_1) + and ("projection_x_coordinate" in feature_2) + and ("projection_y_coordinate" in feature_2) + ): + method_distance = "xy" + elif ( + ("latitude" in feature_1) + and ("longitude" in feature_1) + and ("latitude" in feature_2) + and ("longitude" in feature_2) + ): + method_distance = "latlon" else: - raise ValueError('either latitude/longitude or projection_x_coordinate/projection_y_coordinate have to be present to calculate distances') + raise ValueError( + "either latitude/longitude or projection_x_coordinate/projection_y_coordinate have to be present to calculate distances" + ) - if method_distance=='xy': - distance=np.sqrt((feature_1['projection_x_coordinate']-feature_2['projection_x_coordinate'])**2 - +(feature_1['projection_y_coordinate']-feature_2['projection_y_coordinate'])**2) - elif method_distance=='latlon': - distance=1000*haversine(feature_1['latitude'],feature_1['longitude'],feature_2['latitude'],feature_2['longitude']) + if method_distance == "xy": + distance = np.sqrt( + ( + feature_1["projection_x_coordinate"] + - feature_2["projection_x_coordinate"] + ) + ** 2 + + ( + feature_1["projection_y_coordinate"] + - feature_2["projection_y_coordinate"] + ) + ** 2 + ) + elif method_distance == "latlon": + distance = 1000 * haversine( + feature_1["latitude"], + feature_1["longitude"], + feature_2["latitude"], + feature_2["longitude"], + ) else: - raise ValueError('method undefined') + raise ValueError("method undefined") return distance -def calculate_velocity_individual(feature_old,feature_new,method_distance=None): - distance=calculate_distance(feature_old,feature_new,method_distance=method_distance) - diff_time=((feature_new['time']-feature_old['time']).total_seconds()) - velocity=distance/diff_time + +def calculate_velocity_individual(feature_old, feature_new, method_distance=None): + distance = calculate_distance( + feature_old, feature_new, method_distance=method_distance + ) + diff_time = (feature_new["time"] - feature_old["time"]).total_seconds() + velocity = distance / diff_time return velocity -def calculate_velocity(track,method_distance=None): - for cell_i,track_i in track.groupby('cell'): - index=track_i.index.values - for i,index_i in enumerate(index[:-1]): - velocity=calculate_velocity_individual(track_i.loc[index[i]],track_i.loc[index[i+1]],method_distance=method_distance) - track.at[index_i,'v']=velocity + +def calculate_velocity(track, method_distance=None): + for cell_i, track_i in track.groupby("cell"): + index = track_i.index.values + for i, index_i in enumerate(index[:-1]): + velocity = calculate_velocity_individual( + track_i.loc[index[i]], + track_i.loc[index[i + 1]], + method_distance=method_distance, + ) + track.at[index_i, "v"] = velocity return track -def velocity_histogram(track,bin_edges=np.arange(0,30,1),density=False,method_distance=None,return_values=False): - if 'v' not in track.columns: - logging.info('calculate velocities') - track=calculate_velocity(track) - velocities=track['v'].values - hist, bin_edges = np.histogram(velocities[~np.isnan(velocities)], bin_edges,density=density) + +def velocity_histogram( + track, + bin_edges=np.arange(0, 30, 1), + density=False, + method_distance=None, + return_values=False, +): + if "v" not in track.columns: + logging.info("calculate velocities") + track = calculate_velocity(track) + velocities = track["v"].values + hist, bin_edges = np.histogram( + velocities[~np.isnan(velocities)], bin_edges, density=density + ) if return_values: - return hist,bin_edges,velocities + return hist, bin_edges, velocities else: - return hist,bin_edges + return hist, bin_edges -def calculate_nearestneighbordistance(features,method_distance=None): + +def calculate_nearestneighbordistance(features, method_distance=None): from itertools import combinations - features['min_distance']=np.nan - for time_i,features_i in features.groupby('time'): - logging.debug(str(time_i)) - indeces=combinations(features_i.index.values,2) - #Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) - distances=[] - for index_1,index_2 in indeces: - if index_1 is not index_2: - distance=calculate_distance(features_i.loc[index_1],features_i.loc[index_2],method_distance=method_distance) - distances.append(pd.DataFrame({'index_1':index_1,'index_2':index_2,'distance': distance}, index=[0])) - if any([x is not None for x in distances]): - distances=pd.concat(distances, ignore_index=True) - for i in features_i.index: - min_distance=distances.loc[(distances['index_1']==i) | (distances['index_2']==i),'distance'].min() - features.at[i,'min_distance']=min_distance + + features["min_distance"] = np.nan + for time_i, features_i in features.groupby("time"): + logging.debug(str(time_i)) + indeces = combinations(features_i.index.values, 2) + # Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) + distances = [] + for index_1, index_2 in indeces: + if index_1 is not index_2: + distance = calculate_distance( + features_i.loc[index_1], + features_i.loc[index_2], + method_distance=method_distance, + ) + distances.append( + pd.DataFrame( + {"index_1": index_1, "index_2": index_2, "distance": distance}, + index=[0], + ) + ) + if any([x is not None for x in distances]): + distances = pd.concat(distances, ignore_index=True) + for i in features_i.index: + min_distance = distances.loc[ + (distances["index_1"] == i) | (distances["index_2"] == i), + "distance", + ].min() + features.at[i, "min_distance"] = min_distance return features -def nearestneighbordistance_histogram(features,bin_edges=np.arange(0,30000,500),density=False,method_distance=None,return_values=False): - if 'min_distance' not in features.columns: - logging.debug('calculate nearest neighbor distances') - features=calculate_nearestneighbordistance(features,method_distance=method_distance) - distances=features['min_distance'].values - hist, bin_edges = np.histogram(distances[~np.isnan(distances)], bin_edges,density=density) + +def nearestneighbordistance_histogram( + features, + bin_edges=np.arange(0, 30000, 500), + density=False, + method_distance=None, + return_values=False, +): + if "min_distance" not in features.columns: + logging.debug("calculate nearest neighbor distances") + features = calculate_nearestneighbordistance( + features, method_distance=method_distance + ) + distances = features["min_distance"].values + hist, bin_edges = np.histogram( + distances[~np.isnan(distances)], bin_edges, density=density + ) if return_values: - return hist,bin_edges,distances + return hist, bin_edges, distances else: - return hist,bin_edges - + return hist, bin_edges + + # Treatment of 2D lat/lon coordinates to be added: # def calculate_areas_2Dlatlon(latitude_coord,longitude_coord): # lat=latitude_coord.core_data() @@ -260,133 +415,169 @@ def nearestneighbordistance_histogram(features,bin_edges=np.arange(0,30000,500), # area=np.zeros(lat.shape) # dx=np.zeros(lat.shape) # dy=np.zeros(lat.shape) - + # return area -def calculate_area(features,mask,method_area=None): - from tobac.utils import mask_features_surface,mask_features + +def calculate_area(features, mask, method_area=None): + from tobac.utils import mask_features_surface, mask_features from iris import Constraint from iris.analysis.cartography import area_weights - - features['area']=np.nan - - mask_coords=[coord.name() for coord in mask.coords()] + + features["area"] = np.nan + + mask_coords = [coord.name() for coord in mask.coords()] if method_area is None: - if ('projection_x_coordinate' in mask_coords) and ('projection_y_coordinate' in mask_coords): - method_area='xy' - elif ('latitude' in mask_coords) and ('longitude' in mask_coords): - method_area='latlon' + if ("projection_x_coordinate" in mask_coords) and ( + "projection_y_coordinate" in mask_coords + ): + method_area = "xy" + elif ("latitude" in mask_coords) and ("longitude" in mask_coords): + method_area = "latlon" else: - raise ValueError('either latitude/longitude or projection_x_coordinate/projection_y_coordinate have to be present to calculate distances') - logging.debug('calculating area using method '+ method_area) - if method_area=='xy': - if not (mask.coord('projection_x_coordinate').has_bounds() and mask.coord('projection_y_coordinate').has_bounds()): - mask.coord('projection_x_coordinate').guess_bounds() - mask.coord('projection_y_coordinate').guess_bounds() - area=np.outer(np.diff(mask.coord('projection_x_coordinate').bounds,axis=1),np.diff(mask.coord('projection_y_coordinate').bounds,axis=1)) - elif method_area=='latlon': - if (mask.coord('latitude').ndim==1) and (mask.coord('latitude').ndim==1): - if not (mask.coord('latitude').has_bounds() and mask.coord('longitude').has_bounds()): - mask.coord('latitude').guess_bounds() - mask.coord('longitude').guess_bounds() - area=area_weights(mask,normalize=False) - elif mask.coord('latitude').ndim==2 and mask.coord('longitude').ndim==2: - raise ValueError('2D latitude/longitude coordinates not supported yet') + raise ValueError( + "either latitude/longitude or projection_x_coordinate/projection_y_coordinate have to be present to calculate distances" + ) + logging.debug("calculating area using method " + method_area) + if method_area == "xy": + if not ( + mask.coord("projection_x_coordinate").has_bounds() + and mask.coord("projection_y_coordinate").has_bounds() + ): + mask.coord("projection_x_coordinate").guess_bounds() + mask.coord("projection_y_coordinate").guess_bounds() + area = np.outer( + np.diff(mask.coord("projection_x_coordinate").bounds, axis=1), + np.diff(mask.coord("projection_y_coordinate").bounds, axis=1), + ) + elif method_area == "latlon": + if (mask.coord("latitude").ndim == 1) and (mask.coord("latitude").ndim == 1): + if not ( + mask.coord("latitude").has_bounds() + and mask.coord("longitude").has_bounds() + ): + mask.coord("latitude").guess_bounds() + mask.coord("longitude").guess_bounds() + area = area_weights(mask, normalize=False) + elif mask.coord("latitude").ndim == 2 and mask.coord("longitude").ndim == 2: + raise ValueError("2D latitude/longitude coordinates not supported yet") # area=calculate_areas_2Dlatlon(mask.coord('latitude'),mask.coord('longitude')) else: - raise ValueError('latitude/longitude coordinate shape not supported') + raise ValueError("latitude/longitude coordinate shape not supported") else: - raise ValueError('method undefined') - - for time_i,features_i in features.groupby('time'): - logging.debug('timestep:'+ str(time_i)) + raise ValueError("method undefined") + + for time_i, features_i in features.groupby("time"): + logging.debug("timestep:" + str(time_i)) constraint_time = Constraint(time=time_i) - mask_i=mask.extract(constraint_time) + mask_i = mask.extract(constraint_time) for i in features_i.index: - if len(mask_i.shape)==3: - mask_i_surface = mask_features_surface(mask_i, features_i.loc[i,'feature'], z_coord='model_level_number') - elif len(mask_i.shape)==2: - mask_i_surface=mask_features(mask_i,features_i.loc[i,'feature']) - area_feature=np.sum(area*(mask_i_surface.data>0)) - features.at[i,'area']=area_feature + if len(mask_i.shape) == 3: + mask_i_surface = mask_features_surface( + mask_i, features_i.loc[i, "feature"], z_coord="model_level_number" + ) + elif len(mask_i.shape) == 2: + mask_i_surface = mask_features(mask_i, features_i.loc[i, "feature"]) + area_feature = np.sum(area * (mask_i_surface.data > 0)) + features.at[i, "area"] = area_feature return features -def area_histogram(features,mask,bin_edges=np.arange(0,30000,500), - density=False,method_area=None, - return_values=False,representative_area=False): - if 'area' not in features.columns: - logging.info('calculate area') - features=calculate_area(features,method_area) - areas=features['area'].values + +def area_histogram( + features, + mask, + bin_edges=np.arange(0, 30000, 500), + density=False, + method_area=None, + return_values=False, + representative_area=False, +): + if "area" not in features.columns: + logging.info("calculate area") + features = calculate_area(features, method_area) + areas = features["area"].values # restrict to non NaN values: - areas=areas[~np.isnan(areas)] + areas = areas[~np.isnan(areas)] if representative_area: - weights=areas + weights = areas else: - weights=None - hist, bin_edges = np.histogram(areas, bin_edges,density=density,weights=weights) - bin_centers=bin_edges[:-1]+0.5*np.diff(bin_edges) + weights = None + hist, bin_edges = np.histogram(areas, bin_edges, density=density, weights=weights) + bin_centers = bin_edges[:-1] + 0.5 * np.diff(bin_edges) if return_values: - return hist,bin_edges,bin_centers,areas + return hist, bin_edges, bin_centers, areas else: - return hist,bin_edges,bin_centers - -def histogram_cellwise(Track,variable=None,bin_edges=None,quantity='max',density=False): - Track_cell=Track.groupby('cell') - if quantity=='max': - variable_cell=Track_cell[variable].max().values - elif quantity=='min': - variable_cell=Track_cell[variable].min().values - elif quantity=='mean': - variable_cell=Track_cell[variable].mean().values + return hist, bin_edges, bin_centers + + +def histogram_cellwise( + Track, variable=None, bin_edges=None, quantity="max", density=False +): + Track_cell = Track.groupby("cell") + if quantity == "max": + variable_cell = Track_cell[variable].max().values + elif quantity == "min": + variable_cell = Track_cell[variable].min().values + elif quantity == "mean": + variable_cell = Track_cell[variable].mean().values else: - raise ValueError('quantity unknown, must be max, min or mean') - hist, bin_edges = np.histogram(variable_cell, bin_edges,density=density) - bin_centers=bin_edges[:-1]+0.5*np.diff(bin_edges) + raise ValueError("quantity unknown, must be max, min or mean") + hist, bin_edges = np.histogram(variable_cell, bin_edges, density=density) + bin_centers = bin_edges[:-1] + 0.5 * np.diff(bin_edges) + + return hist, bin_edges, bin_centers + - return hist,bin_edges, bin_centers +def histogram_featurewise(Track, variable=None, bin_edges=None, density=False): + hist, bin_edges = np.histogram(Track[variable].values, bin_edges, density=density) + bin_centers = bin_edges[:-1] + 0.5 * np.diff(bin_edges) -def histogram_featurewise(Track,variable=None,bin_edges=None,density=False): - hist, bin_edges = np.histogram(Track[variable].values, bin_edges,density=density) - bin_centers=bin_edges[:-1]+0.5*np.diff(bin_edges) + return hist, bin_edges, bin_centers - return hist,bin_edges, bin_centers -def calculate_overlap(track_1,track_2,min_sum_inv_distance=None,min_mean_inv_distance=None): - cells_1=track_1['cell'].unique() -# n_cells_1_tot=len(cells_1) - cells_2=track_2['cell'].unique() - overlap=pd.DataFrame() - for i_cell_1,cell_1 in enumerate(cells_1): +def calculate_overlap( + track_1, track_2, min_sum_inv_distance=None, min_mean_inv_distance=None +): + cells_1 = track_1["cell"].unique() + # n_cells_1_tot=len(cells_1) + cells_2 = track_2["cell"].unique() + overlap = pd.DataFrame() + for i_cell_1, cell_1 in enumerate(cells_1): for cell_2 in cells_2: - track_1_i=track_1[track_1['cell']==cell_1] - track_2_i=track_2[track_2['cell']==cell_2] - track_1_i=track_1_i[track_1_i['time'].isin(track_2_i['time'])] - track_2_i=track_2_i[track_2_i['time'].isin(track_1_i['time'])] + track_1_i = track_1[track_1["cell"] == cell_1] + track_2_i = track_2[track_2["cell"] == cell_2] + track_1_i = track_1_i[track_1_i["time"].isin(track_2_i["time"])] + track_2_i = track_2_i[track_2_i["time"].isin(track_1_i["time"])] if not track_1_i.empty: - n_overlap=len(track_1_i) - distances=[] + n_overlap = len(track_1_i) + distances = [] for i in range(len(track_1_i)): - distance=calculate_distance(track_1_i.iloc[[i]],track_2_i.iloc[[i]],method_distance='xy') + distance = calculate_distance( + track_1_i.iloc[[i]], track_2_i.iloc[[i]], method_distance="xy" + ) distances.append(distance) -# mean_distance=np.mean(distances) - mean_inv_distance=np.mean(1/(1+np.array(distances)/1000)) -# mean_inv_squaredistance=np.mean(1/(1+(np.array(distances)/1000)**2)) - sum_inv_distance=np.sum(1/(1+np.array(distances)/1000)) -# sum_inv_squaredistance=np.sum(1/(1+(np.array(distances)/1000)**2)) - overlap=overlap.append({'cell_1':cell_1, - 'cell_2':cell_2, - 'n_overlap':n_overlap, -# 'mean_distance':mean_distance, - 'mean_inv_distance':mean_inv_distance, -# 'mean_inv_squaredistance':mean_inv_squaredistance, - 'sum_inv_distance':sum_inv_distance, -# 'sum_inv_squaredistance':sum_inv_squaredistance - },ignore_index=True) + # mean_distance=np.mean(distances) + mean_inv_distance = np.mean(1 / (1 + np.array(distances) / 1000)) + # mean_inv_squaredistance=np.mean(1/(1+(np.array(distances)/1000)**2)) + sum_inv_distance = np.sum(1 / (1 + np.array(distances) / 1000)) + # sum_inv_squaredistance=np.sum(1/(1+(np.array(distances)/1000)**2)) + overlap = overlap.append( + { + "cell_1": cell_1, + "cell_2": cell_2, + "n_overlap": n_overlap, + # 'mean_distance':mean_distance, + "mean_inv_distance": mean_inv_distance, + # 'mean_inv_squaredistance':mean_inv_squaredistance, + "sum_inv_distance": sum_inv_distance, + # 'sum_inv_squaredistance':sum_inv_squaredistance + }, + ignore_index=True, + ) if min_sum_inv_distance: - overlap=overlap[(overlap['sum_inv_distance']>=min_sum_inv_distance)] + overlap = overlap[(overlap["sum_inv_distance"] >= min_sum_inv_distance)] if min_mean_inv_distance: - overlap=overlap[(overlap['mean_inv_distance']>=min_mean_inv_distance)] + overlap = overlap[(overlap["mean_inv_distance"] >= min_mean_inv_distance)] - return overlap \ No newline at end of file + return overlap diff --git a/tobac/centerofgravity.py b/tobac/centerofgravity.py index f45a4d8e..eb87a985 100644 --- a/tobac/centerofgravity.py +++ b/tobac/centerofgravity.py @@ -1,161 +1,180 @@ import logging -def calculate_cog(tracks,mass,mask): - '''Caluclate centre of gravity and mass for each individual tracked cell in the simulation + +def calculate_cog(tracks, mass, mask): + """Caluclate centre of gravity and mass for each individual tracked cell in the simulation Parameters ---------- tracks : pandas.DataFrame DataFrame containing trajectories of cell centres - mass : iris.cube.Cube + mass : iris.cube.Cube cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - mask : iris.cube.Cube + mask : iris.cube.Cube cube containing mask (int > where belonging to cloud volume, 0 everywhere else ) Returns ------- pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass each tracked cells at each timestep - - ''' + + """ from .utils import mask_cube_cell - from iris import Constraint - - logging.info('start calculating centre of gravity for tracked cells') - - tracks_out=tracks[['time','frame','cell','time_cell']] - - for i_row,row in tracks_out.iterrows(): - cell=row['cell'] - constraint_time=Constraint(time=row['time']) - mass_i=mass.extract(constraint_time) - mask_i=mask.extract(constraint_time) - mass_masked_i=mask_cube_cell(mass_i,mask_i,cell) - x_M,y_M,z_M,mass_M=center_of_gravity(mass_masked_i) - tracks_out.loc[i_row,'x_M']=float(x_M) - tracks_out.loc[i_row,'y_M']=float(y_M) - tracks_out.loc[i_row,'z_M']=float(z_M) - tracks_out.loc[i_row,'mass']=float(mass_M) - - logging.info('Finished calculating centre of gravity for tracked cells') + from iris import Constraint + + logging.info("start calculating centre of gravity for tracked cells") + + tracks_out = tracks[["time", "frame", "cell", "time_cell"]] + + for i_row, row in tracks_out.iterrows(): + cell = row["cell"] + constraint_time = Constraint(time=row["time"]) + mass_i = mass.extract(constraint_time) + mask_i = mask.extract(constraint_time) + mass_masked_i = mask_cube_cell(mass_i, mask_i, cell) + x_M, y_M, z_M, mass_M = center_of_gravity(mass_masked_i) + tracks_out.loc[i_row, "x_M"] = float(x_M) + tracks_out.loc[i_row, "y_M"] = float(y_M) + tracks_out.loc[i_row, "z_M"] = float(z_M) + tracks_out.loc[i_row, "mass"] = float(mass_M) + + logging.info("Finished calculating centre of gravity for tracked cells") return tracks_out - -def calculate_cog_untracked(mass,mask): - '''caluclate centre of gravity and mass for untracked parts of domain + + +def calculate_cog_untracked(mass, mask): + """caluclate centre of gravity and mass for untracked parts of domain Parameters ---------- - mass : iris.cube.Cube + mass : iris.cube.Cube cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - - mask : iris.cube.Cube + + mask : iris.cube.Cube cube containing mask (int > where belonging to cloud volume, 0 everywhere else ) Returns ------- pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass for untracked part of dimain - ''' + """ from pandas import DataFrame from .utils import mask_cube_untracked from iris import Constraint - - logging.info('start calculating centre of gravity for untracked parts of the domain') - tracks_out=DataFrame() - time_coord=mass.coord('time') - tracks_out['frame']=range(len(time_coord.points)) - for i_row,row in tracks_out.iterrows(): - time_i=time_coord.units.num2date(time_coord[int(row['frame'])].points[0]) - constraint_time=Constraint(time=time_i) - mass_i=mass.extract(constraint_time) - mask_i=mask.extract(constraint_time) - mass_untracked_i=mask_cube_untracked(mass_i,mask_i) - x_M,y_M,z_M,mass_M=center_of_gravity(mass_untracked_i) - tracks_out.loc[i_row,'time']=time_i - tracks_out.loc[i_row,'x_M']=float(x_M) - tracks_out.loc[i_row,'y_M']=float(y_M) - tracks_out.loc[i_row,'z_M']=float(z_M) - tracks_out.loc[i_row,'mass']=float(mass_M) - - logging.info('Finished calculating centre of gravity for untracked parts of the domain') - + + logging.info( + "start calculating centre of gravity for untracked parts of the domain" + ) + tracks_out = DataFrame() + time_coord = mass.coord("time") + tracks_out["frame"] = range(len(time_coord.points)) + for i_row, row in tracks_out.iterrows(): + time_i = time_coord.units.num2date(time_coord[int(row["frame"])].points[0]) + constraint_time = Constraint(time=time_i) + mass_i = mass.extract(constraint_time) + mask_i = mask.extract(constraint_time) + mass_untracked_i = mask_cube_untracked(mass_i, mask_i) + x_M, y_M, z_M, mass_M = center_of_gravity(mass_untracked_i) + tracks_out.loc[i_row, "time"] = time_i + tracks_out.loc[i_row, "x_M"] = float(x_M) + tracks_out.loc[i_row, "y_M"] = float(y_M) + tracks_out.loc[i_row, "z_M"] = float(z_M) + tracks_out.loc[i_row, "mass"] = float(mass_M) + + logging.info( + "Finished calculating centre of gravity for untracked parts of the domain" + ) + return tracks_out + def calculate_cog_domain(mass): - '''caluclate centre of gravity and mass for entire domain + """caluclate centre of gravity and mass for entire domain Parameters ---------- - mass : iris.cube.Cube + mass : iris.cube.Cube cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') Returns ------- pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass - ''' + """ from pandas import DataFrame from iris import Constraint - - logging.info('start calculating centre of gravity for entire domain') - - time_coord=mass.coord('time') - - tracks_out=DataFrame() - tracks_out['frame']=range(len(time_coord.points)) - for i_row,row in tracks_out.iterrows(): - time_i=time_coord.units.num2date(time_coord[int(row['frame'])].points[0]) - constraint_time=Constraint(time=time_i) - mass_i=mass.extract(constraint_time) - x_M,y_M,z_M,mass_M=center_of_gravity(mass_i) - tracks_out.loc[i_row,'time']=time_i - tracks_out.loc[i_row,'x_M']=float(x_M) - tracks_out.loc[i_row,'y_M']=float(y_M) - tracks_out.loc[i_row,'z_M']=float(z_M) - tracks_out.loc[i_row,'mass']=float(mass_M) - - logging.info('Finished calculating centre of gravity for entire domain') + + logging.info("start calculating centre of gravity for entire domain") + + time_coord = mass.coord("time") + + tracks_out = DataFrame() + tracks_out["frame"] = range(len(time_coord.points)) + for i_row, row in tracks_out.iterrows(): + time_i = time_coord.units.num2date(time_coord[int(row["frame"])].points[0]) + constraint_time = Constraint(time=time_i) + mass_i = mass.extract(constraint_time) + x_M, y_M, z_M, mass_M = center_of_gravity(mass_i) + tracks_out.loc[i_row, "time"] = time_i + tracks_out.loc[i_row, "x_M"] = float(x_M) + tracks_out.loc[i_row, "y_M"] = float(y_M) + tracks_out.loc[i_row, "z_M"] = float(z_M) + tracks_out.loc[i_row, "mass"] = float(mass_M) + + logging.info("Finished calculating centre of gravity for entire domain") return tracks_out + def center_of_gravity(cube_in): - '''caluclate centre of gravity and sum of quantity + """caluclate centre of gravity and sum of quantity Parameters ---------- - cube_in : iris.cube.Cube + cube_in : iris.cube.Cube cube (potentially masked) of quantity (need coordinates 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - + Returns ------- - float - x position of centre of gravity - float - y position of centre of gravity - float - z position of centre of gravity - float + float + x position of centre of gravity + float + y position of centre of gravity + float + z position of centre of gravity + float sum of quantity of over unmasked part of the cube - ''' + """ from iris.analysis import SUM import numpy as np - cube_sum=cube_in.collapsed(['bottom_top','south_north','west_east'],SUM) - z=cube_in.coord('geopotential_height') - x=cube_in.coord('projection_x_coordinate') - y=cube_in.coord('projection_y_coordinate') - dimensions_collapse=['model_level_number','x','y'] - for coord in cube_in.coords(): - if (coord.ndim>1 and (cube_in.coord_dims(dimensions_collapse[0])[0] in cube_in.coord_dims(coord) or cube_in.coord_dims(dimensions_collapse[1])[0] in cube_in.coord_dims(coord) or cube_in.coord_dims(dimensions_collapse[2])[0] in cube_in.coord_dims(coord))): - cube_in.remove_coord(coord.name()) + + cube_sum = cube_in.collapsed(["bottom_top", "south_north", "west_east"], SUM) + z = cube_in.coord("geopotential_height") + x = cube_in.coord("projection_x_coordinate") + y = cube_in.coord("projection_y_coordinate") + dimensions_collapse = ["model_level_number", "x", "y"] + for coord in cube_in.coords(): + if coord.ndim > 1 and ( + cube_in.coord_dims(dimensions_collapse[0])[0] in cube_in.coord_dims(coord) + or cube_in.coord_dims(dimensions_collapse[1])[0] + in cube_in.coord_dims(coord) + or cube_in.coord_dims(dimensions_collapse[2])[0] + in cube_in.coord_dims(coord) + ): + cube_in.remove_coord(coord.name()) if cube_sum.data > 0: - x=((cube_in*x).collapsed(['model_level_number','x','y'],SUM)/cube_sum).data - y=((cube_in*y).collapsed(['model_level_number','x','y'],SUM)/cube_sum).data - z=((cube_in*z.points).collapsed(['model_level_number','x','y'],SUM)/cube_sum).data + x = ( + (cube_in * x).collapsed(["model_level_number", "x", "y"], SUM) / cube_sum + ).data + y = ( + (cube_in * y).collapsed(["model_level_number", "x", "y"], SUM) / cube_sum + ).data + z = ( + (cube_in * z.points).collapsed(["model_level_number", "x", "y"], SUM) + / cube_sum + ).data else: - x=np.nan - y=np.nan - z=np.nan - variable_sum=cube_sum.data - return(x,y,z,variable_sum) - - - + x = np.nan + y = np.nan + z = np.nan + variable_sum = cube_sum.data + return (x, y, z, variable_sum) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 417bd03c..aca58634 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -6,75 +6,84 @@ from . import utils as tb_utils -def feature_position(hdim1_indices, hdim2_indeces, - vdim_indyces = None, - region_small = None, region_bbox = None, - track_data = None, threshold_i = None, - position_threshold = 'center', - target = None, PBC_flag = 'none', - x_min = 0, x_max = 0, y_min = 0, y_max = 0): - '''Function to determine feature position - +def feature_position( + hdim1_indices, + hdim2_indeces, + vdim_indyces=None, + region_small=None, + region_bbox=None, + track_data=None, + threshold_i=None, + position_threshold="center", + target=None, + PBC_flag="none", + x_min=0, + x_max=0, + y_min=0, + y_max=0, +): + """Function to determine feature position + Parameters ---------- hdim1_indices : list list of indices along hdim1 (typically ```y```) - + hdim2_indeces : list List of indices of feature along hdim2 (typically ```x```) - + vdim_indyces : list, optional List of indices of feature along optional vdim (typically ```z```) - + region_small : 2D or 3D array-like A true/false array containing True where the threshold is met and false where the threshold isn't met. This array should - be the the size specified by region_bbox, and can be a subset of the - overall input array (i.e., ```track_data```). + be the the size specified by region_bbox, and can be a subset of the + overall input array (i.e., ```track_data```). region_bbox : list or tuple with length of 4 or 6 The coordinates that region_small occupies within the total track_data - array. This is in the order that the coordinates come from the + array. This is in the order that the coordinates come from the ```get_label_props_in_dict``` function. For 2D data, this should be: (hdim1 start, hdim 2 start, hdim 1 end, hdim 2 end). For 3D data, this is: (vdim start, hdim1 start, hdim 2 start, vdim end, hdim 1 end, hdim 2 end). - + track_data : 2D or 3D array-like 2D or 3D array containing the data - + threshold_i : float The threshold value that we are testing against - + position_threshold : {'center', 'extreme', 'weighted_diff', 'weighted abs'} - How to select the single point position from our data. + How to select the single point position from our data. 'center' picks the geometrical centre of the region, and is typically not recommended. 'extreme' picks the maximum or minimum value inside the region (max/min set by ```target```) 'weighted_diff' picks the centre of the region weighted by the distance from the threshold value 'weighted_abs' picks the centre of the region weighted by the absolute values of the field - + target : {'maximum', 'minimum'} Used only when position_threshold is set to 'extreme', this sets whether it is looking for maxima or minima. - + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} Sets whether to use periodic boundaries, and if so in which directions. 'none' means that we do not have periodic boundaries 'hdim_1' means that we are periodic along hdim1 'hdim_2' means that we are periodic along hdim2 'both' means that we are periodic along both horizontal dimensions - - x_min : int + + x_min : int Minimum real x coordinate (for PBCs) - + x_max: int Maximum real x coordinate (for PBCs) - + y_min : int Minimum real y coordinate (for PBCs) - + y_max : int Maximum real y coordinate (for PBCs) - + Returns ------- float @@ -83,52 +92,52 @@ def feature_position(hdim1_indices, hdim2_indeces, feature position along 1st horizontal dimension float feature position along 2nd horizontal dimension - ''' - + """ + # First, if necessary, run PBC processing. - #processing of PBC indices - #checks to see if minimum and maximum values are present in dimensional array - #then if true, adds max value to any indices past the halfway point of their respective dimension - #are we 3D? if so, True. + # processing of PBC indices + # checks to see if minimum and maximum values are present in dimensional array + # then if true, adds max value to any indices past the halfway point of their respective dimension + # are we 3D? if so, True. is_3D = False - if PBC_flag == 'hdim_1': - #ONLY periodic in y + if PBC_flag == "hdim_1": + # ONLY periodic in y hdim1_indices_2 = hdim1_indices hdim2_indeces_2 = hdim2_indeces - if (((np.max(hdim1_indices)) == y_max) and((np.min(hdim1_indices)== y_min))): - for y2 in range(0,len(hdim1_indices_2)): + if ((np.max(hdim1_indices)) == y_max) and ((np.min(hdim1_indices) == y_min)): + for y2 in range(0, len(hdim1_indices_2)): h1_ind = hdim1_indices_2[y2] - if h1_ind < (y_max/2): + if h1_ind < (y_max / 2): hdim1_indices_2[y2] = h1_ind + y_max - elif PBC_flag == 'hdim_2': - #ONLY periodic in x + elif PBC_flag == "hdim_2": + # ONLY periodic in x hdim1_indices_2 = hdim1_indices hdim2_indeces_2 = hdim2_indeces - if (((np.max(hdim2_indeces)) == x_max) and((np.min(hdim2_indeces)== x_min))): - for x2 in range(0,len(hdim2_indeces_2)): + if ((np.max(hdim2_indeces)) == x_max) and ((np.min(hdim2_indeces) == x_min)): + for x2 in range(0, len(hdim2_indeces_2)): h2_ind = hdim2_indeces_2[x2] - if h2_ind < (x_max/2): + if h2_ind < (x_max / 2): hdim2_indeces_2[x2] = h2_ind + x_max - elif PBC_flag == 'both': - #DOUBLY periodic boundaries + elif PBC_flag == "both": + # DOUBLY periodic boundaries hdim1_indices_2 = hdim1_indices hdim2_indeces_2 = hdim2_indeces - if (((np.max(hdim1_indices)) == y_max) and((np.min(hdim1_indices)== y_min))): - for y2 in range(0,len(hdim1_indices_2)): + if ((np.max(hdim1_indices)) == y_max) and ((np.min(hdim1_indices) == y_min)): + for y2 in range(0, len(hdim1_indices_2)): h1_ind = hdim1_indices_2[y2] - if h1_ind < (y_max/2): + if h1_ind < (y_max / 2): hdim1_indices_2[y2] = h1_ind + y_max - if (((np.max(hdim2_indeces)) == x_max) and((np.min(hdim2_indeces)== x_min))): - for x2 in range(0,len(hdim2_indeces_2)): + if ((np.max(hdim2_indeces)) == x_max) and ((np.min(hdim2_indeces) == x_min)): + for x2 in range(0, len(hdim2_indeces_2)): h2_ind = hdim2_indeces_2[x2] - if h2_ind < (x_max/2): + if h2_ind < (x_max / 2): hdim2_indeces_2[x2] = h2_ind + x_max else: @@ -139,71 +148,80 @@ def feature_position(hdim1_indices, hdim2_indeces, hdim2_indeces = hdim2_indeces_2 if len(region_bbox) == 4: - #2D case + # 2D case is_3D = False - track_data_region = track_data[region_bbox[0]:region_bbox[2], region_bbox[1]:region_bbox[3]] + track_data_region = track_data[ + region_bbox[0] : region_bbox[2], region_bbox[1] : region_bbox[3] + ] elif len(region_bbox) == 6: - #3D case + # 3D case is_3D = True - track_data_region = track_data[region_bbox[0]:region_bbox[3], region_bbox[1]:region_bbox[4], region_bbox[2]:region_bbox[5]] + track_data_region = track_data[ + region_bbox[0] : region_bbox[3], + region_bbox[1] : region_bbox[4], + region_bbox[2] : region_bbox[5], + ] - if position_threshold=='center': + if position_threshold == "center": # get position as geometrical centre of identified region: - hdim1_index=np.mean(hdim1_indices) - hdim2_index=np.mean(hdim2_indeces) + hdim1_index = np.mean(hdim1_indices) + hdim2_index = np.mean(hdim2_indeces) if is_3D: vdim_index = np.mean(vdim_indyces) - elif position_threshold=='extreme': - #get position as max/min position inside the identified region: - if target == 'maximum': - index=np.argmax(track_data_region[region_small]) - if target == 'minimum': - index=np.argmin(track_data_region[region_small]) - hdim1_index=hdim1_indices[index] - hdim2_index=hdim2_indeces[index] + elif position_threshold == "extreme": + # get position as max/min position inside the identified region: + if target == "maximum": + index = np.argmax(track_data_region[region_small]) + if target == "minimum": + index = np.argmin(track_data_region[region_small]) + hdim1_index = hdim1_indices[index] + hdim2_index = hdim2_indeces[index] if is_3D: vdim_index = vdim_indyces[index] - elif position_threshold=='weighted_diff': + elif position_threshold == "weighted_diff": # get position as centre of identified region, weighted by difference from the threshold: - weights=abs(track_data_region[region_small]-threshold_i) - if sum(weights)==0: - weights=None - hdim1_index=np.average(hdim1_indices,weights=weights) - hdim2_index=np.average(hdim2_indeces,weights=weights) + weights = abs(track_data_region[region_small] - threshold_i) + if sum(weights) == 0: + weights = None + hdim1_index = np.average(hdim1_indices, weights=weights) + hdim2_index = np.average(hdim2_indeces, weights=weights) if is_3D: - vdim_index = np.average(vdim_indyces,weights=weights) + vdim_index = np.average(vdim_indyces, weights=weights) - elif position_threshold=='weighted_abs': + elif position_threshold == "weighted_abs": # get position as centre of identified region, weighted by absolute values if the field: - weights=abs(track_data[region_small]) - if sum(weights)==0: - weights=None - hdim1_index=np.average(hdim1_indices,weights=weights) - hdim2_index=np.average(hdim2_indeces,weights=weights) + weights = abs(track_data[region_small]) + if sum(weights) == 0: + weights = None + hdim1_index = np.average(hdim1_indices, weights=weights) + hdim2_index = np.average(hdim2_indeces, weights=weights) if is_3D: - vdim_index = np.average(vdim_indyces,weights=weights) + vdim_index = np.average(vdim_indyces, weights=weights) else: - raise ValueError('position_threshold must be center,extreme,weighted_diff or weighted_abs') - - #re-transform of any coords beyond the boundaries - (should be) general enough to work for any variety of PBC - #as no x or y points will be beyond the boundaries if we haven't transformed them in the first place - if (PBC_flag == 'hdim_1') or (PBC_flag == 'hdim_2') or (PBC_flag == 'both'): + raise ValueError( + "position_threshold must be center,extreme,weighted_diff or weighted_abs" + ) + + # re-transform of any coords beyond the boundaries - (should be) general enough to work for any variety of PBC + # as no x or y points will be beyond the boundaries if we haven't transformed them in the first place + if (PBC_flag == "hdim_1") or (PBC_flag == "hdim_2") or (PBC_flag == "both"): if hdim1_index > y_max: hdim1_index = hdim1_index - y_max if hdim2_index > x_max: - hdim2_index = hdim2_index - x_max - + hdim2_index = hdim2_index - x_max + if is_3D: return vdim_index, hdim1_index, hdim2_index else: - return hdim1_index,hdim2_index + return hdim1_index, hdim2_index -def test_overlap(region_inner,region_outer): - '''function to test for overlap between two regions (TODO: probably scope for further speedup here) + +def test_overlap(region_inner, region_outer): + """function to test for overlap between two regions (TODO: probably scope for further speedup here) Parameters ---------- @@ -216,12 +234,13 @@ def test_overlap(region_inner,region_outer): ------- bool True if there are any shared points between the two regions - ''' - overlap=frozenset(region_outer).isdisjoint(region_inner) + """ + overlap = frozenset(region_outer).isdisjoint(region_inner) return not overlap -def remove_parents(features_thresholds,regions_i,regions_old): - '''function to remove features whose regions surround newly detected feature regions + +def remove_parents(features_thresholds, regions_i, regions_old): + """function to remove features whose regions surround newly detected feature regions Parameters ---------- @@ -236,44 +255,50 @@ def remove_parents(features_thresholds,regions_i,regions_old): ------- pandas.DataFrame Dataframe containing detected features excluding those that are superseded by newly detected ones - ''' - #list_remove=[] + """ + # list_remove=[] try: all_curr_pts = np.concatenate([vals for idx, vals in regions_i.items()]) all_old_pts = np.concatenate([vals for idx, vals in regions_old.items()]) except ValueError: - #the case where there are no regions + # the case where there are no regions return features_thresholds old_feat_arr = np.empty((len(all_old_pts))) curr_loc = 0 for idx_old in regions_old: - old_feat_arr[curr_loc:curr_loc+len(regions_old[idx_old])] = idx_old - curr_loc+=len(regions_old[idx_old]) + old_feat_arr[curr_loc : curr_loc + len(regions_old[idx_old])] = idx_old + curr_loc += len(regions_old[idx_old]) - common_pts, common_ix_new, common_ix_old = np.intersect1d(all_curr_pts, all_old_pts, return_indices=True) + common_pts, common_ix_new, common_ix_old = np.intersect1d( + all_curr_pts, all_old_pts, return_indices=True + ) list_remove = np.unique(old_feat_arr[common_ix_old]) # remove parent regions: if features_thresholds is not None: - features_thresholds=features_thresholds[~features_thresholds['idx'].isin(list_remove)] + features_thresholds = features_thresholds[ + ~features_thresholds["idx"].isin(list_remove) + ] return features_thresholds - -def feature_detection_threshold(data_i,i_time, - threshold=None, - min_num=0, - target='maximum', - position_threshold='center', - sigma_threshold=0.5, - n_erosion_threshold=0, - n_min_threshold=0, - min_distance=0, - idx_start=0, - PBC_flag='none', - vertical_axis = None,): - '''function to find features based on individual threshold value +def feature_detection_threshold( + data_i, + i_time, + threshold=None, + min_num=0, + target="maximum", + position_threshold="center", + sigma_threshold=0.5, + n_erosion_threshold=0, + n_min_threshold=0, + min_distance=0, + idx_start=0, + PBC_flag="none", + vertical_axis=None, +): + """function to find features based on individual threshold value Parameters ---------- @@ -308,254 +333,304 @@ def feature_detection_threshold(data_i,i_time, Returns ------- - pandas DataFrame + pandas DataFrame detected features for individual threshold dict dictionary containing the regions above/below threshold used for each feature (feature ids as keys) - ''' + """ from skimage.measure import label from skimage.morphology import binary_erosion from copy import deepcopy # If we are given a 3D data array, we should do 3D feature detection. - is_3D = len(data_i.shape)==3 + is_3D = len(data_i.shape) == 3 - # We need to transpose the input data + # We need to transpose the input data if is_3D: if vertical_axis == 1: - data_i = np.transpose(data_i, axes=(1,0,2)) + data_i = np.transpose(data_i, axes=(1, 0, 2)) elif vertical_axis == 2: - data_i = np.transpose(data_i, axes=(2,0,1)) + data_i = np.transpose(data_i, axes=(2, 0, 1)) # if looking for minima, set values above threshold to 0 and scale by data minimum: - if target == 'maximum': - mask=(data_i >= threshold) + if target == "maximum": + mask = data_i >= threshold # if looking for minima, set values above threshold to 0 and scale by data minimum: - elif target == 'minimum': - mask=(data_i <= threshold) + elif target == "minimum": + mask = data_i <= threshold # only include values greater than threshold - # erode selected regions by n pixels - if n_erosion_threshold>0: + # erode selected regions by n pixels + if n_erosion_threshold > 0: if is_3D: - selem=np.ones((n_erosion_threshold,n_erosion_threshold, n_erosion_threshold)) + selem = np.ones( + (n_erosion_threshold, n_erosion_threshold, n_erosion_threshold) + ) else: - selem=np.ones((n_erosion_threshold,n_erosion_threshold)) - mask=binary_erosion(mask,selem).astype(bool) + selem = np.ones((n_erosion_threshold, n_erosion_threshold)) + mask = binary_erosion(mask, selem).astype(bool) # detect individual regions, label and count the number of pixels included: - labels, num_labels = label(mask, background=0, return_num = True) + labels, num_labels = label(mask, background=0, return_num=True) if not is_3D: # let's transpose labels to a 1,y,x array to make calculations etc easier. labels = labels[np.newaxis, :, :] z_min = 0 - z_max = labels.shape[0] + z_max = labels.shape[0] y_min = 0 y_max = labels.shape[1] - 1 x_min = 0 x_max = labels.shape[2] - 1 - - #deal with PBCs + # deal with PBCs # all options that involve dealing with periodic boundaries - pbc_options = ['hdim_1', 'hdim_2', 'both'] + pbc_options = ["hdim_1", "hdim_2", "both"] - # we need to deal with PBCs in some way. + # we need to deal with PBCs in some way. if PBC_flag in pbc_options: # - #create our copy of `labels` to edit + # create our copy of `labels` to edit labels_2 = deepcopy(labels) - #points we've already edited + # points we've already edited skip_list = np.array([]) - #labels that touch the PBC walls + # labels that touch the PBC walls wall_labels = np.array([]) - + if num_labels > 0: all_label_props = tb_utils.get_label_props_in_dict(labels) - [all_labels_max_size, all_label_locs_v, all_label_locs_h1, all_label_locs_h2 - ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(all_label_props) - - #find the points along the boundaries - - #along hdim_1 or both horizontal boundaries - if PBC_flag == 'hdim_1' or PBC_flag == 'both': - #north wall - n_wall = np.unique(labels[:,y_max,:]) - wall_labels = np.append(wall_labels,n_wall) - - #south wall - s_wall = np.unique(labels[:,y_min,:]) - wall_labels = np.append(wall_labels,s_wall) - - #along hdim_2 or both horizontal boundaries - if PBC_flag == 'hdim_2' or PBC_flag == 'both': - #east wall - e_wall = np.unique(labels[:,:,x_max]) - wall_labels = np.append(wall_labels,e_wall) - - #west wall - w_wall = np.unique(labels[:,:,x_min]) - wall_labels = np.append(wall_labels,w_wall) - - + [ + all_labels_max_size, + all_label_locs_v, + all_label_locs_h1, + all_label_locs_h2, + ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(all_label_props) + + # find the points along the boundaries + + # along hdim_1 or both horizontal boundaries + if PBC_flag == "hdim_1" or PBC_flag == "both": + # north wall + n_wall = np.unique(labels[:, y_max, :]) + wall_labels = np.append(wall_labels, n_wall) + + # south wall + s_wall = np.unique(labels[:, y_min, :]) + wall_labels = np.append(wall_labels, s_wall) + + # along hdim_2 or both horizontal boundaries + if PBC_flag == "hdim_2" or PBC_flag == "both": + # east wall + e_wall = np.unique(labels[:, :, x_max]) + wall_labels = np.append(wall_labels, e_wall) + + # west wall + w_wall = np.unique(labels[:, :, x_min]) + wall_labels = np.append(wall_labels, w_wall) + wall_labels = np.unique(wall_labels) for label_ind in wall_labels: - #create list for skip labels for this wall label only + # create list for skip labels for this wall label only skip_list_thisind = [] # 0 isn't a real index if label_ind == 0: continue - # skip this label if we have already dealt with it. + # skip this label if we have already dealt with it. if np.any(label_ind == skip_list): continue - - #get all locations of this label. - #TODO: harmonize x/y/z vs hdim1/hdim2/vdim. + + # get all locations of this label. + # TODO: harmonize x/y/z vs hdim1/hdim2/vdim. label_locs_v = all_label_locs_v[label_ind] label_locs_h1 = all_label_locs_h1[label_ind] label_locs_h2 = all_label_locs_h2[label_ind] - - #loop through every point in the label + + # loop through every point in the label for label_z, label_y, label_x in zip( - label_locs_v, label_locs_h1, label_locs_h2): - # check if this is the special case of being a corner point. + label_locs_v, label_locs_h1, label_locs_h2 + ): + # check if this is the special case of being a corner point. # if it's doubly periodic AND on both x and y boundaries, it's a corner point - # and we have to look at the other corner. - # here, we will only look at the corner point and let the below deal with x/y only. - if PBC_flag == 'both' and (np.any(label_y == [y_min,y_max]) and np.any(label_x == [x_min,x_max])): - - #adjust x and y points to the other side + # and we have to look at the other corner. + # here, we will only look at the corner point and let the below deal with x/y only. + if PBC_flag == "both" and ( + np.any(label_y == [y_min, y_max]) + and np.any(label_x == [x_min, x_max]) + ): + + # adjust x and y points to the other side y_val_alt = tb_utils.adjust_pbc_point(label_y, y_min, y_max) x_val_alt = tb_utils.adjust_pbc_point(label_x, x_min, x_max) - - label_on_corner = labels[label_z,y_val_alt,x_val_alt] - - if((label_on_corner !=0) and (~np.any(label_on_corner==skip_list))): - #alt_inds = np.where(labels==alt_label_3) - #get a list of indices where the label on the corner is so we can switch them - #in the new list. - - labels_2[all_label_locs_v[label_on_corner], - all_label_locs_h1[label_on_corner], - all_label_locs_h2[label_on_corner]] = label_ind - skip_list = np.append(skip_list,label_on_corner) - skip_list_thisind = np.append(skip_list_thisind,label_on_corner) - - #if it's labeled and has already been dealt with for this label - elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list)) and (np.any(label_on_corner==skip_list_thisind))): - #print("skip_list_thisind label - has already been treated this index") + + label_on_corner = labels[label_z, y_val_alt, x_val_alt] + + if (label_on_corner != 0) and ( + ~np.any(label_on_corner == skip_list) + ): + # alt_inds = np.where(labels==alt_label_3) + # get a list of indices where the label on the corner is so we can switch them + # in the new list. + + labels_2[ + all_label_locs_v[label_on_corner], + all_label_locs_h1[label_on_corner], + all_label_locs_h2[label_on_corner], + ] = label_ind + skip_list = np.append(skip_list, label_on_corner) + skip_list_thisind = np.append( + skip_list_thisind, label_on_corner + ) + + # if it's labeled and has already been dealt with for this label + elif ( + (label_on_corner != 0) + and (np.any(label_on_corner == skip_list)) + and (np.any(label_on_corner == skip_list_thisind)) + ): + # print("skip_list_thisind label - has already been treated this index") continue - - #if it's labeled and has already been dealt with via a previous label - elif((label_on_corner !=0) and (np.any(label_on_corner==skip_list)) and (~np.any(label_on_corner==skip_list_thisind))): - #find the updated label, and overwrite all of label_ind indices with updated label - labels_2_alt = labels_2[label_z,y_val_alt,x_val_alt] - labels_2[label_locs_v, - label_locs_h1, - label_locs_h2] = labels_2_alt - skip_list = np.append(skip_list,label_ind) + + # if it's labeled and has already been dealt with via a previous label + elif ( + (label_on_corner != 0) + and (np.any(label_on_corner == skip_list)) + and (~np.any(label_on_corner == skip_list_thisind)) + ): + # find the updated label, and overwrite all of label_ind indices with updated label + labels_2_alt = labels_2[label_z, y_val_alt, x_val_alt] + labels_2[ + label_locs_v, label_locs_h1, label_locs_h2 + ] = labels_2_alt + skip_list = np.append(skip_list, label_ind) break - + # on the hdim1 boundary and periodic on hdim1 - if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [y_min,y_max]): + if (PBC_flag == "hdim_1" or PBC_flag == "both") and np.any( + label_y == [y_min, y_max] + ): y_val_alt = tb_utils.adjust_pbc_point(label_y, y_min, y_max) - #get the label value on the opposite side - label_alt = labels[label_z,y_val_alt,label_x] - - #if it's labeled and not already been dealt with - if((label_alt !=0) and (~np.any(label_alt==skip_list))): - #find the indices where it has the label value on opposite side and change their value to original side - #print(all_label_locs_v[label_alt], alt_inds[0]) - labels_2[all_label_locs_v[label_alt], - all_label_locs_h1[label_alt], - all_label_locs_h2[label_alt]] = label_ind - #we have already dealt with this label. - skip_list = np.append(skip_list,label_alt) - skip_list_thisind = np.append(skip_list_thisind,label_alt) - - #if it's labeled and has already been dealt with for this label - elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (np.any(label_alt==skip_list_thisind))): + # get the label value on the opposite side + label_alt = labels[label_z, y_val_alt, label_x] + + # if it's labeled and not already been dealt with + if (label_alt != 0) and (~np.any(label_alt == skip_list)): + # find the indices where it has the label value on opposite side and change their value to original side + # print(all_label_locs_v[label_alt], alt_inds[0]) + labels_2[ + all_label_locs_v[label_alt], + all_label_locs_h1[label_alt], + all_label_locs_h2[label_alt], + ] = label_ind + # we have already dealt with this label. + skip_list = np.append(skip_list, label_alt) + skip_list_thisind = np.append(skip_list_thisind, label_alt) + + # if it's labeled and has already been dealt with for this label + elif ( + (label_alt != 0) + and (np.any(label_alt == skip_list)) + and (np.any(label_alt == skip_list_thisind)) + ): continue - - #if it's labeled and has already been dealt with - elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (~np.any(label_alt==skip_list_thisind))): - #find the updated label, and overwrite all of label_ind indices with updated label - labels_2_alt = labels_2[label_z,y_val_alt,label_x] - labels_2[label_locs_v, - label_locs_h1, - label_locs_h2] = labels_2_alt - skip_list = np.append(skip_list,label_ind) + + # if it's labeled and has already been dealt with + elif ( + (label_alt != 0) + and (np.any(label_alt == skip_list)) + and (~np.any(label_alt == skip_list_thisind)) + ): + # find the updated label, and overwrite all of label_ind indices with updated label + labels_2_alt = labels_2[label_z, y_val_alt, label_x] + labels_2[ + label_locs_v, label_locs_h1, label_locs_h2 + ] = labels_2_alt + skip_list = np.append(skip_list, label_ind) break - - if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [x_min,x_max]): + + if (PBC_flag == "hdim_2" or PBC_flag == "both") and np.any( + label_x == [x_min, x_max] + ): x_val_alt = tb_utils.adjust_pbc_point(label_x, x_min, x_max) - #get the label value on the opposite side - label_alt = labels[label_z,label_y,x_val_alt] - - #if it's labeled and not already been dealt with - if((label_alt !=0) and (~np.any(label_alt==skip_list))): - #find the indices where it has the label value on opposite side and change their value to original side - labels_2[all_label_locs_v[label_alt], - all_label_locs_h1[label_alt], - all_label_locs_h2[label_alt]] = label_ind - #we have already dealt with this label. - skip_list = np.append(skip_list,label_alt) - skip_list_thisind = np.append(skip_list_thisind,label_alt) - - #if it's labeled and has already been dealt with for this label - elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (np.any(label_alt==skip_list_thisind))): + # get the label value on the opposite side + label_alt = labels[label_z, label_y, x_val_alt] + + # if it's labeled and not already been dealt with + if (label_alt != 0) and (~np.any(label_alt == skip_list)): + # find the indices where it has the label value on opposite side and change their value to original side + labels_2[ + all_label_locs_v[label_alt], + all_label_locs_h1[label_alt], + all_label_locs_h2[label_alt], + ] = label_ind + # we have already dealt with this label. + skip_list = np.append(skip_list, label_alt) + skip_list_thisind = np.append(skip_list_thisind, label_alt) + + # if it's labeled and has already been dealt with for this label + elif ( + (label_alt != 0) + and (np.any(label_alt == skip_list)) + and (np.any(label_alt == skip_list_thisind)) + ): continue - - #if it's labeled and has already been dealt with - elif((label_alt !=0) and (np.any(label_alt==skip_list)) and (~np.any(label_alt==skip_list_thisind))): - #find the updated label, and overwrite all of label_ind indices with updated label - labels_2_alt = labels_2[label_z,label_y,x_val_alt] - labels_2[label_locs_v, - label_locs_h1, - label_locs_h2] = labels_2_alt - skip_list = np.append(skip_list,label_ind) - break + # if it's labeled and has already been dealt with + elif ( + (label_alt != 0) + and (np.any(label_alt == skip_list)) + and (~np.any(label_alt == skip_list_thisind)) + ): + # find the updated label, and overwrite all of label_ind indices with updated label + labels_2_alt = labels_2[label_z, label_y, x_val_alt] + labels_2[ + label_locs_v, label_locs_h1, label_locs_h2 + ] = labels_2_alt + skip_list = np.append(skip_list, label_ind) + break - - #copy over new labels after we have adjusted everything + # copy over new labels after we have adjusted everything labels = labels_2 - - elif PBC_flag == 'none': + + elif PBC_flag == "none": pass else: - #TODO: fix periodic flag to be str, then update this with the possible values. - raise ValueError("Options for periodic are currently: none, hdim_1, hdim_2, both") + # TODO: fix periodic flag to be str, then update this with the possible values. + raise ValueError( + "Options for periodic are currently: none, hdim_1, hdim_2, both" + ) - #num_labels = num_labels - len(skip_list) + # num_labels = num_labels - len(skip_list) # END PBC treatment - # we need to get label properties again after we handle PBCs. + # we need to get label properties again after we handle PBCs. label_props = tb_utils.get_label_props_in_dict(labels) - if len(label_props)>0: - [total_indices_all, vdim_indyces_all, hdim1_indices_all, hdim2_indices_all] = tb_utils.get_indices_of_labels_from_reg_prop_dict(label_props) - - - #values, count = np.unique(labels[:,:].ravel(), return_counts=True) - #values_counts=dict(zip(values, count)) + if len(label_props) > 0: + [ + total_indices_all, + vdim_indyces_all, + hdim1_indices_all, + hdim2_indices_all, + ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(label_props) + + # values, count = np.unique(labels[:,:].ravel(), return_counts=True) + # values_counts=dict(zip(values, count)) # Filter out regions that have less pixels than n_min_threshold - #values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold} + # values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold} - #check if not entire domain filled as one feature - if num_labels>0: - #create empty list to store individual features for this threshold - list_features_threshold=[] - #create empty dict to store regions for individual features for this threshold - regions=dict() - #create emptry list of features to remove from parent threshold value + # check if not entire domain filled as one feature + if num_labels > 0: + # create empty list to store individual features for this threshold + list_features_threshold = [] + # create empty dict to store regions for individual features for this threshold + regions = dict() + # create emptry list of features to remove from parent threshold value region = np.empty(mask.shape, dtype=bool) - #loop over individual regions: + # loop over individual regions: for cur_idx in total_indices_all: - #skip this if there aren't enough points to be considered a real feature - #as defined above by n_min_threshold + # skip this if there aren't enough points to be considered a real feature + # as defined above by n_min_threshold curr_count = total_indices_all[cur_idx] - if curr_count <=n_min_threshold: + if curr_count <= n_min_threshold: continue if is_3D: vdim_indyces = vdim_indyces_all[cur_idx] @@ -565,96 +640,149 @@ def feature_detection_threshold(data_i,i_time, hdim2_indeces = hdim2_indices_all[cur_idx] label_bbox = label_props[cur_idx].bbox - bbox_zstart, bbox_ystart, bbox_xstart, bbox_zend, bbox_yend, bbox_xend = label_bbox + ( + bbox_zstart, + bbox_ystart, + bbox_xstart, + bbox_zend, + bbox_yend, + bbox_xend, + ) = label_bbox bbox_zsize = bbox_zend - bbox_zstart bbox_xsize = bbox_xend - bbox_xstart bbox_ysize = bbox_yend - bbox_ystart - #build small region box + # build small region box if is_3D: region_small = np.full((bbox_zsize, bbox_ysize, bbox_xsize), False) - region_small[vdim_indyces-bbox_zstart, - hdim1_indices-bbox_ystart, hdim2_indeces-bbox_xstart] = True + region_small[ + vdim_indyces - bbox_zstart, + hdim1_indices - bbox_ystart, + hdim2_indeces - bbox_xstart, + ] = True else: region_small = np.full((bbox_ysize, bbox_xsize), False) - region_small[hdim1_indices-bbox_ystart, hdim2_indeces-bbox_xstart] = True - #we are 2D and need to remove the dummy 3D coordinate. - label_bbox = (label_bbox[1], label_bbox[2], label_bbox[4], label_bbox[5]) - - #[hdim1_indices,hdim2_indeces]= np.nonzero(region) - #write region for individual threshold and feature to dict + region_small[ + hdim1_indices - bbox_ystart, hdim2_indeces - bbox_xstart + ] = True + # we are 2D and need to remove the dummy 3D coordinate. + label_bbox = ( + label_bbox[1], + label_bbox[2], + label_bbox[4], + label_bbox[5], + ) + + # [hdim1_indices,hdim2_indeces]= np.nonzero(region) + # write region for individual threshold and feature to dict if is_3D: - region_i=list(zip(hdim1_indices*x_max*z_max +hdim2_indeces* z_max + vdim_indyces)) + region_i = list( + zip( + hdim1_indices * x_max * z_max + + hdim2_indeces * z_max + + vdim_indyces + ) + ) else: - region_i=np.array(hdim1_indices*x_max+hdim2_indeces) + region_i = np.array(hdim1_indices * x_max + hdim2_indeces) - regions[cur_idx+idx_start]=region_i + regions[cur_idx + idx_start] = region_i # Determine feature position for region by one of the following methods: - single_indices=feature_position( - hdim1_indices,hdim2_indeces, + single_indices = feature_position( + hdim1_indices, + hdim2_indeces, vdim_indyces=vdim_indyces, - region_small = region_small, region_bbox = label_bbox, - track_data = data_i, threshold_i = threshold, - position_threshold = position_threshold, target = target, - PBC_flag = PBC_flag, - x_min = x_min, x_max = x_max, y_min = y_min, y_max = y_max) + region_small=region_small, + region_bbox=label_bbox, + track_data=data_i, + threshold_i=threshold, + position_threshold=position_threshold, + target=target, + PBC_flag=PBC_flag, + x_min=x_min, + x_max=x_max, + y_min=y_min, + y_max=y_max, + ) if is_3D: vdim_index, hdim1_index, hdim2_index = single_indices else: hdim1_index, hdim2_index = single_indices - #create individual DataFrame row in tracky format for identified feature - appending_dict = {'frame': int(i_time), - 'idx':cur_idx+idx_start, - 'hdim_1': hdim1_index, - 'hdim_2':hdim2_index, - 'num':curr_count, - 'threshold_value':threshold} - column_names = ['frame', 'idx', 'hdim_1', 'hdim_2', 'num', 'threshold_value'] + # create individual DataFrame row in tracky format for identified feature + appending_dict = { + "frame": int(i_time), + "idx": cur_idx + idx_start, + "hdim_1": hdim1_index, + "hdim_2": hdim2_index, + "num": curr_count, + "threshold_value": threshold, + } + column_names = [ + "frame", + "idx", + "hdim_1", + "hdim_2", + "num", + "threshold_value", + ] if is_3D: - appending_dict['vdim'] = vdim_index - column_names = ['frame', 'idx', 'vdim', 'hdim_1', 'hdim_2', 'num', 'threshold_value'] + appending_dict["vdim"] = vdim_index + column_names = [ + "frame", + "idx", + "vdim", + "hdim_1", + "hdim_2", + "num", + "threshold_value", + ] list_features_threshold.append(appending_dict) - #after looping thru proto-features, check if any exceed num threshold - #if they do not, provide a blank pandas df and regions dict + # after looping thru proto-features, check if any exceed num threshold + # if they do not, provide a blank pandas df and regions dict if list_features_threshold == []: - #print("no features above num value at threshold: ",threshold) - features_threshold=pd.DataFrame() - regions=dict() - #if they do, provide a dataframe with features organized with 2D and 3D metadata + # print("no features above num value at threshold: ",threshold) + features_threshold = pd.DataFrame() + regions = dict() + # if they do, provide a dataframe with features organized with 2D and 3D metadata else: - #print("at least one feature above num value at threshold: ",threshold) - #print("column_names, after cur_idx loop: ",column_names) - features_threshold=pd.DataFrame(list_features_threshold, columns = column_names) - - #features_threshold=pd.DataFrame(list_features_threshold, columns = column_names) + # print("at least one feature above num value at threshold: ",threshold) + # print("column_names, after cur_idx loop: ",column_names) + features_threshold = pd.DataFrame( + list_features_threshold, columns=column_names + ) + + # features_threshold=pd.DataFrame(list_features_threshold, columns = column_names) else: - features_threshold=pd.DataFrame() - regions=dict() - + features_threshold = pd.DataFrame() + regions = dict() + return features_threshold, regions - -def feature_detection_multithreshold_timestep(data_i,i_time, - threshold=None, - min_num=0, - target='maximum', - position_threshold='center', - sigma_threshold=0.5, - n_erosion_threshold=0, - n_min_threshold=0, - min_distance=0, - feature_number_start=1, - PBC_flag='none', - vertical_axis = None, - ): - '''function to find features in each timestep based on iteratively finding regions above/below a set of thresholds - + + +def feature_detection_multithreshold_timestep( + data_i, + i_time, + threshold=None, + min_num=0, + target="maximum", + position_threshold="center", + sigma_threshold=0.5, + n_erosion_threshold=0, + n_min_threshold=0, + min_distance=0, + feature_number_start=1, + PBC_flag="none", + vertical_axis=None, +): + """function to find features in each timestep based on iteratively finding regions above/below a set of thresholds + Parameters ---------- data_i : iris.cube.Cube 2D field to perform the feature detection (single timestep) i_time : int - number of the current timestep + number of the current timestep threshold : list of floats threshold values used to select target regions to track dxy : float @@ -684,67 +812,78 @@ def feature_detection_multithreshold_timestep(data_i,i_time, Returns ------- - pandas DataFrame + pandas DataFrame detected features for individual timestep - ''' + """ # consider switching to scikit image filter? from scipy.ndimage.filters import gaussian_filter # get actual numpy array track_data = data_i.core_data() # smooth data slightly to create rounded, continuous field - track_data=gaussian_filter(track_data, sigma=sigma_threshold) + track_data = gaussian_filter(track_data, sigma=sigma_threshold) # create empty lists to store regions and features for individual timestep - features_thresholds=pd.DataFrame() - for i_threshold,threshold_i in enumerate(threshold): - if (i_threshold>0 and not features_thresholds.empty): - idx_start=features_thresholds['idx'].max()+feature_number_start + features_thresholds = pd.DataFrame() + for i_threshold, threshold_i in enumerate(threshold): + if i_threshold > 0 and not features_thresholds.empty: + idx_start = features_thresholds["idx"].max() + feature_number_start else: - idx_start=feature_number_start-1 - features_threshold_i,regions_i=feature_detection_threshold(track_data,i_time, - threshold=threshold_i, - sigma_threshold=sigma_threshold, - min_num=min_num, - target=target, - position_threshold=position_threshold, - n_erosion_threshold=n_erosion_threshold, - n_min_threshold=n_min_threshold, - min_distance=min_distance, - idx_start=idx_start, - PBC_flag = PBC_flag, - vertical_axis = vertical_axis, - ) + idx_start = feature_number_start - 1 + features_threshold_i, regions_i = feature_detection_threshold( + track_data, + i_time, + threshold=threshold_i, + sigma_threshold=sigma_threshold, + min_num=min_num, + target=target, + position_threshold=position_threshold, + n_erosion_threshold=n_erosion_threshold, + n_min_threshold=n_min_threshold, + min_distance=min_distance, + idx_start=idx_start, + PBC_flag=PBC_flag, + vertical_axis=vertical_axis, + ) if any([x is not None for x in features_threshold_i]): - features_thresholds=features_thresholds.append(features_threshold_i) + features_thresholds = features_thresholds.append(features_threshold_i) # For multiple threshold, and features found both in the current and previous step, remove "parent" features from Dataframe - if (i_threshold>0 and not features_thresholds.empty and regions_old): + if i_threshold > 0 and not features_thresholds.empty and regions_old: # for each threshold value: check if newly found features are surrounded by feature based on less restrictive threshold - features_thresholds=remove_parents(features_thresholds,regions_i,regions_old) - regions_old=regions_i - - logging.debug('Finished feature detection for threshold '+str(i_threshold) + ' : ' + str(threshold_i) ) + features_thresholds = remove_parents( + features_thresholds, regions_i, regions_old + ) + regions_old = regions_i + + logging.debug( + "Finished feature detection for threshold " + + str(i_threshold) + + " : " + + str(threshold_i) + ) return features_thresholds -def feature_detection_multithreshold(field_in, - dxy = None, - dz = None, - threshold=None, - min_num=0, - target='maximum', - position_threshold='center', - sigma_threshold=0.5, - n_erosion_threshold=0, - n_min_threshold=0, - min_distance=0, - feature_number_start=1, - PBC_flag='none', - vertical_coord = 'auto', - vertical_axis = None, - detect_subset = None, - ): - '''Function to perform feature detection based on contiguous regions above/below a threshold - + +def feature_detection_multithreshold( + field_in, + dxy=None, + dz=None, + threshold=None, + min_num=0, + target="maximum", + position_threshold="center", + sigma_threshold=0.5, + n_erosion_threshold=0, + n_min_threshold=0, + min_distance=0, + feature_number_start=1, + PBC_flag="none", + vertical_coord="auto", + vertical_axis=None, + detect_subset=None, +): + """Function to perform feature detection based on contiguous regions above/below a threshold + Parameters ---------- field_in: iris.cube.Cube @@ -752,17 +891,17 @@ def feature_detection_multithreshold(field_in, threshold: list of float or ints threshold values used to select target regions to track dxy: float - Constant horzontal grid spacing (m), optional. If not specified, + Constant horzontal grid spacing (m), optional. If not specified, this function requires that ```x_coordinate_name``` and - ```y_coordinate_name``` are available in `features`. If you specify a + ```y_coordinate_name``` are available in `features`. If you specify a value here, this function assumes that it is the x/y spacing between points - even if ```x_coordinate_name``` and ```y_coordinate_name``` are specified. + even if ```x_coordinate_name``` and ```y_coordinate_name``` are specified. dz: float or None. - Constant vertical grid spacing (m), optional. If not specified + Constant vertical grid spacing (m), optional. If not specified and the input is 3D, this function requires that vertical_coord is available in the `field_in` input. If you specify a value here, this function assumes that it is the constant z spacing between points, even if ```vertical_coord``` - is specified. + is specified. target: str ('minimum' or 'maximum') flag to determine if tracking is targetting minima or maxima in the data position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center') @@ -784,10 +923,10 @@ def feature_detection_multithreshold(field_in, vertical_coord: str Name or axis number of the vertical coordinate. If 'auto', tries to auto-detect. It looks for the coordinate or the dimension name corresponding - to the string. + to the string. vertical_axis: int or None. The vertical axis number of the data. If None, uses vertical_coord - to determine axis. + to determine axis. detect_subset: dict-like or None Whether to run feature detection on only a subset of the data. If this is not None, it will subset the grid that we run feature detection @@ -796,19 +935,21 @@ def feature_detection_multithreshold(field_in, start is inclusive, and end is exclusive. For example, if your data are oriented as (time, z, y, x) and you want to only detect on values between z levels 10 and 29, you would set: - {1: (10, 30)}. + {1: (10, 30)}. Returns ------- - pandas DataFrame + pandas DataFrame detected features - ''' + """ from .utils import add_coordinates, add_coordinates_3D - logging.debug('start feature detection based on thresholds') + logging.debug("start feature detection based on thresholds") - if 'time' not in [coord.name() for coord in field_in.coords()]: - raise ValueError("input to feature detection step must include a dimension named 'time'") + if "time" not in [coord.name() for coord in field_in.coords()]: + raise ValueError( + "input to feature detection step must include a dimension named 'time'" + ) # Check whether we need to run 2D or 3D feature detection if field_in.ndim == 3: @@ -820,115 +961,128 @@ def feature_detection_multithreshold(field_in, else: raise ValueError("Feature detection only works with 2D or 3D data") - ndim_time=field_in.coord_dims('time')[0] + ndim_time = field_in.coord_dims("time")[0] if detect_subset is not None: raise NotImplementedError("Subsetting feature detection not yet supported.") - if detect_subset is not None and ndim_time in detect_subset: raise NotImplementedError("Cannot subset on time") - if is_3D: - # We need to determine the time axis so that we can determine the + # We need to determine the time axis so that we can determine the # vertical axis in each timestep if vertical_axis is not none. if vertical_axis is not None: - # We only need to adjust the axis number if the time axis + # We only need to adjust the axis number if the time axis # is a lower axis number than the specified vertical coordinate. if ndim_time < vertical_axis: vertical_axis = vertical_axis - 1 else: # We need to determine vertical axis - vertical_axis = tb_utils.find_vertical_axis_from_coord(field_in, - vertical_coord=vertical_coord) - + vertical_axis = tb_utils.find_vertical_axis_from_coord( + field_in, vertical_coord=vertical_coord + ) # create empty list to store features for all timesteps - list_features_timesteps=[] + list_features_timesteps = [] # loop over timesteps for feature identification: - data_time=field_in.slices_over('time') + data_time = field_in.slices_over("time") - # if single threshold is put in as a single value, turn it into a list - if type(threshold) in [int,float]: - threshold=[threshold] - for i_time,data_i in enumerate(data_time): - time_i=data_i.coord('time').units.num2date(data_i.coord('time').points[0]) - - features_thresholds=feature_detection_multithreshold_timestep(data_i,i_time, - threshold=threshold, - sigma_threshold=sigma_threshold, - min_num=min_num, - target=target, - position_threshold=position_threshold, - n_erosion_threshold=n_erosion_threshold, - n_min_threshold=n_min_threshold, - min_distance=min_distance, - feature_number_start=feature_number_start, - PBC_flag=PBC_flag, - vertical_axis = vertical_axis, - ) - #check if list of features is not empty, then merge features from different threshold values - #into one DataFrame and append to list for individual timesteps: + if type(threshold) in [int, float]: + threshold = [threshold] + for i_time, data_i in enumerate(data_time): + time_i = data_i.coord("time").units.num2date(data_i.coord("time").points[0]) + + features_thresholds = feature_detection_multithreshold_timestep( + data_i, + i_time, + threshold=threshold, + sigma_threshold=sigma_threshold, + min_num=min_num, + target=target, + position_threshold=position_threshold, + n_erosion_threshold=n_erosion_threshold, + n_min_threshold=n_min_threshold, + min_distance=min_distance, + feature_number_start=feature_number_start, + PBC_flag=PBC_flag, + vertical_axis=vertical_axis, + ) + # check if list of features is not empty, then merge features from different threshold values + # into one DataFrame and append to list for individual timesteps: if not features_thresholds.empty: - #Loop over DataFrame to remove features that are closer than distance_min to each other: - if (min_distance > 0): - features_thresholds=filter_min_distance(features_thresholds,dxy=dxy, dz=dz, - min_distance = min_distance, - z_coordinate_name = vertical_coord, - ) + # Loop over DataFrame to remove features that are closer than distance_min to each other: + if min_distance > 0: + features_thresholds = filter_min_distance( + features_thresholds, + dxy=dxy, + dz=dz, + min_distance=min_distance, + z_coordinate_name=vertical_coord, + ) list_features_timesteps.append(features_thresholds) - - logging.debug('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) - logging.debug('feature detection: merging DataFrames') + logging.debug( + "Finished feature detection for " + time_i.strftime("%Y-%m-%d_%H:%M:%S") + ) + + logging.debug("feature detection: merging DataFrames") # Check if features are detected and then concatenate features from different timesteps into one pandas DataFrame # If no features are detected raise error if any([not x.empty for x in list_features_timesteps]): - features=pd.concat(list_features_timesteps, ignore_index=True) - features['feature']=features.index+feature_number_start - # features_filtered = features.drop(features[features['num'] < min_num].index) - # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) - if 'vdim' in features: - features=add_coordinates_3D(features,field_in, vertical_coord=vertical_coord) - else: - features=add_coordinates(features,field_in) + features = pd.concat(list_features_timesteps, ignore_index=True) + features["feature"] = features.index + feature_number_start + # features_filtered = features.drop(features[features['num'] < min_num].index) + # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) + if "vdim" in features: + features = add_coordinates_3D( + features, field_in, vertical_coord=vertical_coord + ) + else: + features = add_coordinates(features, field_in) else: - features=None - logging.debug('No features detected') - logging.debug('feature detection completed') + features = None + logging.debug("No features detected") + logging.debug("feature detection completed") return features -def filter_min_distance(features, dxy = None,dz = None, min_distance = None, - x_coordinate_name = None, - y_coordinate_name = None, - z_coordinate_name = None, - PBC_flag = 'none', - max_h1 = 0, max_h2 = 0,): - '''Function to remove features that are too close together. - If two features are closer than `min_distance`, it keeps the + +def filter_min_distance( + features, + dxy=None, + dz=None, + min_distance=None, + x_coordinate_name=None, + y_coordinate_name=None, + z_coordinate_name=None, + PBC_flag="none", + max_h1=0, + max_h2=0, +): + """Function to remove features that are too close together. + If two features are closer than `min_distance`, it keeps the larger feature. TODO: does this function work with minima? - + Parameters ---------- - features: pandas DataFrame + features: pandas DataFrame features dxy: float - Constant horzontal grid spacing (m), optional. If not specified, + Constant horzontal grid spacing (m), optional. If not specified, this function requires that ```x_coordinate_name``` and - ```y_coordinate_name``` are available in `features`. If you specify a + ```y_coordinate_name``` are available in `features`. If you specify a value here, this function assumes that it is the x/y spacing between points - even if ```x_coordinate_name``` and ```y_coordinate_name``` are specified. - dz: float - Constant vertical grid spacing (m), optional. If not specified + even if ```x_coordinate_name``` and ```y_coordinate_name``` are specified. + dz: float + Constant vertical grid spacing (m), optional. If not specified and the input is 3D, this function requires that `altitude` is available in the `features` input. If you specify a value here, this function assumes that it is the constant z spacing between points, even if ```z_coordinate_name``` - is specified. + is specified. min_distance: float minimum distance between detected features (m) x_coordinate_name: str @@ -953,78 +1107,111 @@ def filter_min_distance(features, dxy = None,dz = None, min_distance = None, Returns ------- - pandas DataFrame + pandas DataFrame features after filtering - ''' + """ from itertools import combinations - remove_list_distance=[] - if PBC_flag != 'none': + remove_list_distance = [] + + if PBC_flag != "none": raise NotImplementedError("We haven't yet implemented PBCs into this.") - #if we are 3D, the vertical dimension is in features. if we are 2D, there - #is no vertical dimension in features. - is_3D = 'vdim' in features + # if we are 3D, the vertical dimension is in features. if we are 2D, there + # is no vertical dimension in features. + is_3D = "vdim" in features if is_3D and dz is None: - z_coordinate_name = tb_utils.find_dataframe_vertical_coord(features, z_coordinate_name) + z_coordinate_name = tb_utils.find_dataframe_vertical_coord( + features, z_coordinate_name + ) # Check if both dxy and their coordinate names are specified. # If they are, warn that we will use dxy. - if dxy is not None and (x_coordinate_name in features and y_coordinate_name in features): - warnings.warn("Both "+x_coordinate_name+"/"+y_coordinate_name+" and dxy " - "set. Using constant dxy. Set dxy to None if you want to use the " - "interpolated coordinates, or set `x_coordinate_name` and " - "`y_coordinate_name` to None to use a constant dxy.") - + if dxy is not None and ( + x_coordinate_name in features and y_coordinate_name in features + ): + warnings.warn( + "Both " + x_coordinate_name + "/" + y_coordinate_name + " and dxy " + "set. Using constant dxy. Set dxy to None if you want to use the " + "interpolated coordinates, or set `x_coordinate_name` and " + "`y_coordinate_name` to None to use a constant dxy." + ) + # Check and if both dz is specified and altitude is available, warn that we will use dz. if is_3D and (dz is not None and z_coordinate_name in features): - warnings.warn("Both "+z_coordinate_name+" and dz available to filter_min_distance; using constant dz. " - "Set dz to none if you want to use altitude or set `z_coordinate_name` to None to use constant dz.") - - #create list of tuples with all combinations of features at the timestep: - indeces=combinations(features.index.values,2) - #Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) + warnings.warn( + "Both " + + z_coordinate_name + + " and dz available to filter_min_distance; using constant dz. " + "Set dz to none if you want to use altitude or set `z_coordinate_name` to None to use constant dz." + ) + + # create list of tuples with all combinations of features at the timestep: + indeces = combinations(features.index.values, 2) + # Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area) for index_1, index_2 in indeces: if index_1 is not index_2: if is_3D: if dz is not None: - z_coord_1 = dz * features.loc[index_1,'vdim'] - z_coord_2 = dz * features.loc[index_2,'vdim'] + z_coord_1 = dz * features.loc[index_1, "vdim"] + z_coord_2 = dz * features.loc[index_2, "vdim"] else: - z_coord_1 = features.loc[index_1,z_coordinate_name] - z_coord_2 = features.loc[index_2,z_coordinate_name] - - coord_1 = (z_coord_1, dxy*features.loc[index_1,'hdim_1'], - dxy*features.loc[index_1,'hdim_2']) - coord_2 = (z_coord_2, dxy*features.loc[index_2,'hdim_1'], - dxy*features.loc[index_2,'hdim_2']) + z_coord_1 = features.loc[index_1, z_coordinate_name] + z_coord_2 = features.loc[index_2, z_coordinate_name] + + coord_1 = ( + z_coord_1, + dxy * features.loc[index_1, "hdim_1"], + dxy * features.loc[index_1, "hdim_2"], + ) + coord_2 = ( + z_coord_2, + dxy * features.loc[index_2, "hdim_1"], + dxy * features.loc[index_2, "hdim_2"], + ) else: - coord_1 = (dxy*features.loc[index_1,'hdim_1'], dxy*features.loc[index_1,'hdim_2']) - coord_2 = (dxy*features.loc[index_2,'hdim_1'], dxy*features.loc[index_2,'hdim_2']) + coord_1 = ( + dxy * features.loc[index_1, "hdim_1"], + dxy * features.loc[index_1, "hdim_2"], + ) + coord_2 = ( + dxy * features.loc[index_2, "hdim_1"], + dxy * features.loc[index_2, "hdim_2"], + ) distance = tb_utils.calc_distance_coords_pbc( - coords_1 = np.array(coord_1), - coords_2 = np.array(coord_2), - min_h1 = 0, max_h1 = max_h1, min_h2 = 0, max_h2 = max_h2, PBC_flag=PBC_flag + coords_1=np.array(coord_1), + coords_2=np.array(coord_2), + min_h1=0, + max_h1=max_h1, + min_h2=0, + max_h2=max_h2, + PBC_flag=PBC_flag, ) - + if distance <= min_distance: - #print(distance, min_distance, index_1, index_2, features.size) -# logging.debug('distance<= min_distance: ' + str(distance)) - if features.loc[index_1,'threshold_value']>features.loc[index_2,'threshold_value']: + # print(distance, min_distance, index_1, index_2, features.size) + # logging.debug('distance<= min_distance: ' + str(distance)) + if ( + features.loc[index_1, "threshold_value"] + > features.loc[index_2, "threshold_value"] + ): remove_list_distance.append(index_2) - elif features.loc[index_1,'threshold_value']features.loc[index_2,'num']: + elif ( + features.loc[index_1, "threshold_value"] + == features.loc[index_2, "threshold_value"] + ): + if features.loc[index_1, "num"] > features.loc[index_2, "num"]: remove_list_distance.append(index_2) - elif features.loc[index_1,'num'] axis_extent[0]) - & (track['longitude'] < axis_extent[1]) - & (track['latitude'] > axis_extent[2]) - & (track['latitude'] < axis_extent[3])] - - - #Plot tracked features by looping over rows of Dataframe - for i_row,row in track.iterrows(): - feature=row['feature'] - cell=row['cell'] + track = track.loc[ + (track["longitude"] > axis_extent[0]) + & (track["longitude"] < axis_extent[1]) + & (track["latitude"] > axis_extent[2]) + & (track["latitude"] < axis_extent[3]) + ] + + # Plot tracked features by looping over rows of Dataframe + for i_row, row in track.iterrows(): + feature = row["feature"] + cell = row["cell"] if not np.isnan(cell): - color=colors_mask[int(cell%len(colors_mask))] + color = colors_mask[int(cell % len(colors_mask))] if plot_number: - cell_string=' '+str(int(row['cell'])) - axes.text(row['longitude'],row['latitude'],cell_string, - color=color,fontsize=6, clip_on=True) + cell_string = " " + str(int(row["cell"])) + axes.text( + row["longitude"], + row["latitude"], + cell_string, + color=color, + fontsize=6, + clip_on=True, + ) else: - color='grey' + color = "grey" if plot_outline: - mask_i=None + mask_i = None # if mask is 3D, create surface projection, if mask is 2D keep the mask - if mask.ndim==2: - mask_i=mask_features(mask,feature,masked=False) - elif mask.ndim==3: - mask_i=mask_features_surface(mask,feature,masked=False,z_coord='model_level_number') + if mask.ndim == 2: + mask_i = mask_features(mask, feature, masked=False) + elif mask.ndim == 3: + mask_i = mask_features_surface( + mask, feature, masked=False, z_coord="model_level_number" + ) else: - raise ValueError('mask has shape that cannot be understood') + raise ValueError("mask has shape that cannot be understood") # plot countour lines around the edges of the mask - iplt.contour(mask_i,coords=['longitude','latitude'], - levels=[0,feature], - colors=color,linewidths=linewidth_contour, - axes=axes) + iplt.contour( + mask_i, + coords=["longitude", "latitude"], + levels=[0, feature], + colors=color, + linewidths=linewidth_contour, + axes=axes, + ) if plot_marker: - axes.plot(row['longitude'],row['latitude'], - color=color,marker=marker_track,markersize=markersize_track) + axes.plot( + row["longitude"], + row["latitude"], + color=color, + marker=marker_track, + markersize=markersize_track, + ) axes.set_extent(axis_extent) return axes -def animation_mask_field(track,features,field,mask,interval=500,figsize=(10,10),**kwargs): + +def animation_mask_field( + track, features, field, mask, interval=500, figsize=(10, 10), **kwargs +): import cartopy.crs as ccrs import matplotlib.pyplot as plt import matplotlib.animation from iris import Constraint - fig=plt.figure(figsize=figsize) + fig = plt.figure(figsize=figsize) plt.close() def update(time_in): fig.clf() - ax=fig.add_subplot(111,projection=ccrs.PlateCarree()) + ax = fig.add_subplot(111, projection=ccrs.PlateCarree()) constraint_time = Constraint(time=time_in) - field_i=field.extract(constraint_time) - mask_i=mask.extract(constraint_time) - track_i=track[track['time']==time_in] - features_i=features[features['time']==time_in] - #fig1,ax1=plt.subplots(ncols=1, nrows=1,figsize=figsize, subplot_kw={'projection': ccrs.PlateCarree()}) - plot_tobac=plot_tracks_mask_field(track_i,field=field_i,mask=mask_i,features=features_i, - axes=ax, - **kwargs) - ax.set_title('{}'.format(time_in)) - - time=field.coord('time') - datetimes=time.units.num2date(time.points) - animation = matplotlib.animation.FuncAnimation(fig, update,init_func=None, frames=datetimes,interval=interval, blit=False) + field_i = field.extract(constraint_time) + mask_i = mask.extract(constraint_time) + track_i = track[track["time"] == time_in] + features_i = features[features["time"] == time_in] + # fig1,ax1=plt.subplots(ncols=1, nrows=1,figsize=figsize, subplot_kw={'projection': ccrs.PlateCarree()}) + plot_tobac = plot_tracks_mask_field( + track_i, field=field_i, mask=mask_i, features=features_i, axes=ax, **kwargs + ) + ax.set_title("{}".format(time_in)) + + time = field.coord("time") + datetimes = time.units.num2date(time.points) + animation = matplotlib.animation.FuncAnimation( + fig, update, init_func=None, frames=datetimes, interval=interval, blit=False + ) return animation -def plot_mask_cell_track_follow(cell,track, cog, features, mask_total, - field_contour, field_filled, - width=10000, - name= 'test', plotdir='./', - file_format=['png'],figsize=(10/2.54, 10/2.54),dpi=300, - **kwargs): - '''Make plots for all cells centred around cell and with one background field as filling and one background field as contrours + +def plot_mask_cell_track_follow( + cell, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + width=10000, + name="test", + plotdir="./", + file_format=["png"], + figsize=(10 / 2.54, 10 / 2.54), + dpi=300, + **kwargs +): + """Make plots for all cells centred around cell and with one background field as filling and one background field as contrours Input: Output: - ''' + """ from iris import Constraint from numpy import unique import os - track_cell=track[track['cell']==cell] - for i_row,row in track_cell.iterrows(): - - constraint_time = Constraint(time=row['time']) - constraint_x = Constraint(projection_x_coordinate = lambda cell: row['projection_x_coordinate']-width < cell < row['projection_x_coordinate']+width) - constraint_y = Constraint(projection_y_coordinate = lambda cell: row['projection_y_coordinate']-width < cell < row['projection_y_coordinate']+width) + track_cell = track[track["cell"] == cell] + for i_row, row in track_cell.iterrows(): + + constraint_time = Constraint(time=row["time"]) + constraint_x = Constraint( + projection_x_coordinate=lambda cell: row["projection_x_coordinate"] - width + < cell + < row["projection_x_coordinate"] + width + ) + constraint_y = Constraint( + projection_y_coordinate=lambda cell: row["projection_y_coordinate"] - width + < cell + < row["projection_y_coordinate"] + width + ) constraint = constraint_time & constraint_x & constraint_y - mask_total_i=mask_total.extract(constraint) + mask_total_i = mask_total.extract(constraint) if field_contour is None: - field_contour_i=None + field_contour_i = None else: - field_contour_i=field_contour.extract(constraint) + field_contour_i = field_contour.extract(constraint) if field_filled is None: - field_filled_i=None + field_filled_i = None else: - field_filled_i=field_filled.extract(constraint) + field_filled_i = field_filled.extract(constraint) - cells=list(unique(mask_total_i.core_data())) + cells = list(unique(mask_total_i.core_data())) if cell not in cells: cells.append(cell) if 0 in cells: cells.remove(0) - track_i=track[track['cell'].isin(cells)] - track_i=track_i[track_i['time']==row['time']] + track_i = track[track["cell"].isin(cells)] + track_i = track_i[track_i["time"] == row["time"]] if cog is None: - cog_i=None + cog_i = None else: - cog_i=cog[cog['cell'].isin(cells)] - cog_i=cog_i[cog_i['time']==row['time']] + cog_i = cog[cog["cell"].isin(cells)] + cog_i = cog_i[cog_i["time"] == row["time"]] if features is None: - features_i=None + features_i = None else: - features_i=features[features['time']==row['time']] - + features_i = features[features["time"] == row["time"]] fig1, ax1 = plt.subplots(ncols=1, nrows=1, figsize=figsize) fig1.subplots_adjust(left=0.2, bottom=0.15, right=0.85, top=0.80) - - - - datestring_stamp = row['time'].strftime('%Y-%m-%d %H:%M:%S') - celltime_stamp = "%02d:%02d:%02d" % (row['time_cell'].dt.total_seconds() // 3600,(row['time_cell'].dt.total_seconds() % 3600) // 60, row['time_cell'].dt.total_seconds() % 60 ) - title=datestring_stamp + ' , ' + celltime_stamp - datestring_file = row['time'].strftime('%Y-%m-%d_%H%M%S') - - ax1=plot_mask_cell_individual_follow(cell_i=cell,track=track_i, cog=cog_i,features=features_i, - mask_total=mask_total_i, - field_contour=field_contour_i, field_filled=field_filled_i, - width=width, - axes=ax1,title=title, - **kwargs) + datestring_stamp = row["time"].strftime("%Y-%m-%d %H:%M:%S") + celltime_stamp = "%02d:%02d:%02d" % ( + row["time_cell"].dt.total_seconds() // 3600, + (row["time_cell"].dt.total_seconds() % 3600) // 60, + row["time_cell"].dt.total_seconds() % 60, + ) + title = datestring_stamp + " , " + celltime_stamp + datestring_file = row["time"].strftime("%Y-%m-%d_%H%M%S") + + ax1 = plot_mask_cell_individual_follow( + cell_i=cell, + track=track_i, + cog=cog_i, + features=features_i, + mask_total=mask_total_i, + field_contour=field_contour_i, + field_filled=field_filled_i, + width=width, + axes=ax1, + title=title, + **kwargs + ) out_dir = os.path.join(plotdir, name) os.makedirs(out_dir, exist_ok=True) - if 'png' in file_format: - savepath_png = os.path.join(out_dir, name + '_' + datestring_file + '.png') + if "png" in file_format: + savepath_png = os.path.join(out_dir, name + "_" + datestring_file + ".png") fig1.savefig(savepath_png, dpi=dpi) - logging.debug('field_contour field_filled Mask plot saved to ' + savepath_png) - if 'pdf' in file_format: - savepath_pdf = os.path.join(out_dir, name + '_' + datestring_file + '.pdf') + logging.debug( + "field_contour field_filled Mask plot saved to " + savepath_png + ) + if "pdf" in file_format: + savepath_pdf = os.path.join(out_dir, name + "_" + datestring_file + ".pdf") fig1.savefig(savepath_pdf, dpi=dpi) - logging.debug('field_contour field_filled Mask plot saved to ' + savepath_pdf) + logging.debug( + "field_contour field_filled Mask plot saved to " + savepath_pdf + ) plt.close() plt.clf() -def plot_mask_cell_individual_follow(cell_i,track, cog,features, mask_total, - field_contour, field_filled, - axes=None,width=10000, - label_field_contour=None, cmap_field_contour='Blues',norm_field_contour=None, - linewidths_contour=0.8,contour_labels=False, - vmin_field_contour=0,vmax_field_contour=50,levels_field_contour=None,nlevels_field_contour=10, - label_field_filled=None,cmap_field_filled='summer',norm_field_filled=None, - vmin_field_filled=0,vmax_field_filled=100,levels_field_filled=None,nlevels_field_filled=10, - title=None - ): - '''Make individual plot for cell centred around cell and with one background field as filling and one background field as contrours +def plot_mask_cell_individual_follow( + cell_i, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + axes=None, + width=10000, + label_field_contour=None, + cmap_field_contour="Blues", + norm_field_contour=None, + linewidths_contour=0.8, + contour_labels=False, + vmin_field_contour=0, + vmax_field_contour=50, + levels_field_contour=None, + nlevels_field_contour=10, + label_field_filled=None, + cmap_field_filled="summer", + norm_field_filled=None, + vmin_field_filled=0, + vmax_field_filled=100, + levels_field_filled=None, + nlevels_field_filled=10, + title=None, +): + """Make individual plot for cell centred around cell and with one background field as filling and one background field as contrours Input: Output: - ''' + """ import numpy as np - from .utils import mask_cell_surface + from .utils import mask_cell_surface from mpl_toolkits.axes_grid1 import make_axes_locatable from matplotlib.colors import Normalize - divider = make_axes_locatable(axes) - x_pos=track[track['cell']==cell_i]['projection_x_coordinate'].item() - y_pos=track[track['cell']==cell_i]['projection_y_coordinate'].item() + x_pos = track[track["cell"] == cell_i]["projection_x_coordinate"].item() + y_pos = track[track["cell"] == cell_i]["projection_y_coordinate"].item() if field_filled is not None: if levels_field_filled is None: - levels_field_filled=np.linspace(vmin_field_filled,vmax_field_filled, nlevels_field_filled) - plot_field_filled = axes.contourf((field_filled.coord('projection_x_coordinate').points-x_pos)/1000, - (field_filled.coord('projection_y_coordinate').points-y_pos)/1000, - field_filled.data, - cmap=cmap_field_filled,norm=norm_field_filled, - levels=levels_field_filled,vmin=vmin_field_filled, vmax=vmax_field_filled) - + levels_field_filled = np.linspace( + vmin_field_filled, vmax_field_filled, nlevels_field_filled + ) + plot_field_filled = axes.contourf( + (field_filled.coord("projection_x_coordinate").points - x_pos) / 1000, + (field_filled.coord("projection_y_coordinate").points - y_pos) / 1000, + field_filled.data, + cmap=cmap_field_filled, + norm=norm_field_filled, + levels=levels_field_filled, + vmin=vmin_field_filled, + vmax=vmax_field_filled, + ) cax_filled = divider.append_axes("right", size="5%", pad=0.1) - norm_filled= Normalize(vmin=vmin_field_filled, vmax=vmax_field_filled) - sm_filled= plt.cm.ScalarMappable(norm=norm_filled, cmap = plot_field_filled.cmap) + norm_filled = Normalize(vmin=vmin_field_filled, vmax=vmax_field_filled) + sm_filled = plt.cm.ScalarMappable(norm=norm_filled, cmap=plot_field_filled.cmap) sm_filled.set_array([]) - cbar_field_filled = plt.colorbar(sm_filled, orientation='vertical',cax=cax_filled) + cbar_field_filled = plt.colorbar( + sm_filled, orientation="vertical", cax=cax_filled + ) cbar_field_filled.ax.set_ylabel(label_field_filled) cbar_field_filled.set_clim(vmin_field_filled, vmax_field_filled) if field_contour is not None: if levels_field_contour is None: - levels_field_contour=np.linspace(vmin_field_contour, vmax_field_contour, nlevels_field_contour) + levels_field_contour = np.linspace( + vmin_field_contour, vmax_field_contour, nlevels_field_contour + ) if norm_field_contour: - vmin_field_contour=None, - vmax_field_contour=None, - - plot_field_contour = axes.contour((field_contour.coord('projection_x_coordinate').points-x_pos)/1000, - (field_contour.coord('projection_y_coordinate').points-y_pos)/1000, - field_contour.data, - cmap=cmap_field_contour,norm=norm_field_contour, - levels=levels_field_contour,vmin=vmin_field_contour, vmax=vmax_field_contour, - linewidths=linewidths_contour) + vmin_field_contour = (None,) + vmax_field_contour = (None,) + + plot_field_contour = axes.contour( + (field_contour.coord("projection_x_coordinate").points - x_pos) / 1000, + (field_contour.coord("projection_y_coordinate").points - y_pos) / 1000, + field_contour.data, + cmap=cmap_field_contour, + norm=norm_field_contour, + levels=levels_field_contour, + vmin=vmin_field_contour, + vmax=vmax_field_contour, + linewidths=linewidths_contour, + ) if contour_labels: axes.clabel(plot_field_contour, fontsize=10) cax_contour = divider.append_axes("bottom", size="5%", pad=0.1) if norm_field_contour: - vmin_field_contour=None - vmax_field_contour=None - norm_contour=norm_field_contour + vmin_field_contour = None + vmax_field_contour = None + norm_contour = norm_field_contour else: - norm_contour= Normalize(vmin=vmin_field_contour, vmax=vmax_field_contour) + norm_contour = Normalize(vmin=vmin_field_contour, vmax=vmax_field_contour) - sm_contour= plt.cm.ScalarMappable(norm=norm_contour, cmap = plot_field_contour.cmap) + sm_contour = plt.cm.ScalarMappable( + norm=norm_contour, cmap=plot_field_contour.cmap + ) sm_contour.set_array([]) - cbar_field_contour = plt.colorbar(sm_contour, orientation='horizontal',ticks=levels_field_contour,cax=cax_contour) + cbar_field_contour = plt.colorbar( + sm_contour, + orientation="horizontal", + ticks=levels_field_contour, + cax=cax_contour, + ) cbar_field_contour.ax.set_xlabel(label_field_contour) cbar_field_contour.set_clim(vmin_field_contour, vmax_field_contour) - for i_row, row in track.iterrows(): - cell = int(row['cell']) - if cell==cell_i: - color='darkred' + cell = int(row["cell"]) + if cell == cell_i: + color = "darkred" else: - color='darkorange' - - cell_string=' '+str(int(row['cell'])) - axes.text((row['projection_x_coordinate']-x_pos)/1000, - (row['projection_y_coordinate']-y_pos)/1000, - cell_string,color=color,fontsize=6, clip_on=True) + color = "darkorange" + + cell_string = " " + str(int(row["cell"])) + axes.text( + (row["projection_x_coordinate"] - x_pos) / 1000, + (row["projection_y_coordinate"] - y_pos) / 1000, + cell_string, + color=color, + fontsize=6, + clip_on=True, + ) # Plot marker for tracked cell centre as a cross - axes.plot((row['projection_x_coordinate']-x_pos)/1000, - (row['projection_y_coordinate']-y_pos)/1000, - 'x', color=color,markersize=4) - - - #Create surface projection of mask for the respective cell and plot it in the right color - z_coord = 'model_level_number' - if len(mask_total.shape)==3: - mask_total_i_surface = mask_cell_surface(mask_total, cell, track, masked=False, z_coord=z_coord) - elif len(mask_total.shape)==2: - mask_total_i_surface=mask_total - axes.contour((mask_total_i_surface.coord('projection_x_coordinate').points-x_pos)/1000, - (mask_total_i_surface.coord('projection_y_coordinate').points-y_pos)/1000, - mask_total_i_surface.data, - levels=[0, cell], colors=color, linestyles=':',linewidth=1) + axes.plot( + (row["projection_x_coordinate"] - x_pos) / 1000, + (row["projection_y_coordinate"] - y_pos) / 1000, + "x", + color=color, + markersize=4, + ) + + # Create surface projection of mask for the respective cell and plot it in the right color + z_coord = "model_level_number" + if len(mask_total.shape) == 3: + mask_total_i_surface = mask_cell_surface( + mask_total, cell, track, masked=False, z_coord=z_coord + ) + elif len(mask_total.shape) == 2: + mask_total_i_surface = mask_total + axes.contour( + (mask_total_i_surface.coord("projection_x_coordinate").points - x_pos) + / 1000, + (mask_total_i_surface.coord("projection_y_coordinate").points - y_pos) + / 1000, + mask_total_i_surface.data, + levels=[0, cell], + colors=color, + linestyles=":", + linewidth=1, + ) if cog is not None: for i_row, row in cog.iterrows(): - cell = row['cell'] + cell = row["cell"] - if cell==cell_i: - color='darkred' + if cell == cell_i: + color = "darkred" else: - color='darkorange' + color = "darkorange" # plot marker for centre of gravity as a circle - axes.plot((row['x_M']-x_pos)/1000, (row['y_M']-y_pos)/1000, - 'o', markeredgecolor=color, markerfacecolor='None',markersize=4) + axes.plot( + (row["x_M"] - x_pos) / 1000, + (row["y_M"] - y_pos) / 1000, + "o", + markeredgecolor=color, + markerfacecolor="None", + markersize=4, + ) if features is not None: for i_row, row in features.iterrows(): - color='purple' - axes.plot((row['projection_x_coordinate']-x_pos)/1000, - (row['projection_y_coordinate']-y_pos)/1000, - '+', color=color,markersize=3) - - axes.set_xlabel('x (km)') - axes.set_ylabel('y (km)') - axes.set_xlim([-1*width/1000, width/1000]) - axes.set_ylim([-1*width/1000, width/1000]) - axes.xaxis.set_label_position('top') - axes.xaxis.set_ticks_position('top') - axes.set_title(title,pad=35,fontsize=10,horizontalalignment='left',loc='left') + color = "purple" + axes.plot( + (row["projection_x_coordinate"] - x_pos) / 1000, + (row["projection_y_coordinate"] - y_pos) / 1000, + "+", + color=color, + markersize=3, + ) + + axes.set_xlabel("x (km)") + axes.set_ylabel("y (km)") + axes.set_xlim([-1 * width / 1000, width / 1000]) + axes.set_ylim([-1 * width / 1000, width / 1000]) + axes.xaxis.set_label_position("top") + axes.xaxis.set_ticks_position("top") + axes.set_title(title, pad=35, fontsize=10, horizontalalignment="left", loc="left") return axes -def plot_mask_cell_track_static(cell,track, cog, features, mask_total, - field_contour, field_filled, - width=10000,n_extend=1, - name= 'test', plotdir='./', - file_format=['png'],figsize=(10/2.54, 10/2.54),dpi=300, - **kwargs): - '''Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours + +def plot_mask_cell_track_static( + cell, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + width=10000, + n_extend=1, + name="test", + plotdir="./", + file_format=["png"], + figsize=(10 / 2.54, 10 / 2.54), + dpi=300, + **kwargs +): + """Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours Input: Output: - ''' + """ from iris import Constraint from numpy import unique import os - track_cell=track[track['cell']==cell] - x_min=track_cell['projection_x_coordinate'].min()-width - x_max=track_cell['projection_x_coordinate'].max()+width - y_min=track_cell['projection_y_coordinate'].min()-width - y_max=track_cell['projection_y_coordinate'].max()+width - - #set up looping over time based on mask's time coordinate to allow for one timestep before and after the track - time_coord=mask_total.coord('time') - time=time_coord.units.num2date(time_coord.points) - i_start=max(0,np.where(time==track_cell['time'].values[0])[0][0]-n_extend) - i_end=min(len(time)-1,np.where(time==track_cell['time'].values[-1])[0][0]+n_extend+1) - time_cell=time[slice(i_start,i_end)] + + track_cell = track[track["cell"] == cell] + x_min = track_cell["projection_x_coordinate"].min() - width + x_max = track_cell["projection_x_coordinate"].max() + width + y_min = track_cell["projection_y_coordinate"].min() - width + y_max = track_cell["projection_y_coordinate"].max() + width + + # set up looping over time based on mask's time coordinate to allow for one timestep before and after the track + time_coord = mask_total.coord("time") + time = time_coord.units.num2date(time_coord.points) + i_start = max(0, np.where(time == track_cell["time"].values[0])[0][0] - n_extend) + i_end = min( + len(time) - 1, + np.where(time == track_cell["time"].values[-1])[0][0] + n_extend + 1, + ) + time_cell = time[slice(i_start, i_end)] for time_i in time_cell: -# for i_row,row in track_cell.iterrows(): -# time_i=row['time'] -# constraint_time = Constraint(time=row['time']) + # for i_row,row in track_cell.iterrows(): + # time_i=row['time'] + # constraint_time = Constraint(time=row['time']) constraint_time = Constraint(time=time_i) - constraint_x = Constraint(projection_x_coordinate = lambda cell: x_min < cell < x_max) - constraint_y = Constraint(projection_y_coordinate = lambda cell: y_min < cell < y_max) + constraint_x = Constraint( + projection_x_coordinate=lambda cell: x_min < cell < x_max + ) + constraint_y = Constraint( + projection_y_coordinate=lambda cell: y_min < cell < y_max + ) constraint = constraint_time & constraint_x & constraint_y - mask_total_i=mask_total.extract(constraint) + mask_total_i = mask_total.extract(constraint) if field_contour is None: - field_contour_i=None + field_contour_i = None else: - field_contour_i=field_contour.extract(constraint) + field_contour_i = field_contour.extract(constraint) if field_filled is None: - field_filled_i=None + field_filled_i = None else: - field_filled_i=field_filled.extract(constraint) - - - track_i=track[track['time']==time_i] - - cells_mask=list(unique(mask_total_i.core_data())) - track_cells=track_i.loc[(track_i['projection_x_coordinate'] > x_min) & (track_i['projection_x_coordinate'] < x_max) & (track_i['projection_y_coordinate'] > y_min) & (track_i['projection_y_coordinate'] < y_max)] - cells_track=list(track_cells['cell'].values) - cells=list(set( cells_mask + cells_track )) + field_filled_i = field_filled.extract(constraint) + + track_i = track[track["time"] == time_i] + + cells_mask = list(unique(mask_total_i.core_data())) + track_cells = track_i.loc[ + (track_i["projection_x_coordinate"] > x_min) + & (track_i["projection_x_coordinate"] < x_max) + & (track_i["projection_y_coordinate"] > y_min) + & (track_i["projection_y_coordinate"] < y_max) + ] + cells_track = list(track_cells["cell"].values) + cells = list(set(cells_mask + cells_track)) if cell not in cells: cells.append(cell) if 0 in cells: cells.remove(0) - track_i=track_i[track_i['cell'].isin(cells)] + track_i = track_i[track_i["cell"].isin(cells)] if cog is None: - cog_i=None + cog_i = None else: - cog_i=cog[cog['cell'].isin(cells)] - cog_i=cog_i[cog_i['time']==time_i] + cog_i = cog[cog["cell"].isin(cells)] + cog_i = cog_i[cog_i["time"] == time_i] if features is None: - features_i=None + features_i = None else: - features_i=features[features['time']==time_i] - + features_i = features[features["time"] == time_i] fig1, ax1 = plt.subplots(ncols=1, nrows=1, figsize=figsize) fig1.subplots_adjust(left=0.2, bottom=0.15, right=0.80, top=0.85) - datestring_stamp = time_i.strftime('%Y-%m-%d %H:%M:%S') - if time_i in track_cell['time'].values: - time_cell_i=track_cell[track_cell['time'].values==time_i]['time_cell'] - celltime_stamp = "%02d:%02d:%02d" % (time_cell_i.dt.total_seconds() // 3600, - (time_cell_i.dt.total_seconds() % 3600) // 60, - time_cell_i.dt.total_seconds() % 60 ) + datestring_stamp = time_i.strftime("%Y-%m-%d %H:%M:%S") + if time_i in track_cell["time"].values: + time_cell_i = track_cell[track_cell["time"].values == time_i]["time_cell"] + celltime_stamp = "%02d:%02d:%02d" % ( + time_cell_i.dt.total_seconds() // 3600, + (time_cell_i.dt.total_seconds() % 3600) // 60, + time_cell_i.dt.total_seconds() % 60, + ) else: - celltime_stamp=' - ' - title=datestring_stamp + ' , ' + celltime_stamp - datestring_file = time_i.strftime('%Y-%m-%d_%H%M%S') - - ax1=plot_mask_cell_individual_static(cell_i=cell, - track=track_i, cog=cog_i,features=features_i, - mask_total=mask_total_i, - field_contour=field_contour_i, field_filled=field_filled_i, - xlim=[x_min/1000,x_max/1000],ylim=[y_min/1000,y_max/1000], - axes=ax1,title=title,**kwargs) + celltime_stamp = " - " + title = datestring_stamp + " , " + celltime_stamp + datestring_file = time_i.strftime("%Y-%m-%d_%H%M%S") + + ax1 = plot_mask_cell_individual_static( + cell_i=cell, + track=track_i, + cog=cog_i, + features=features_i, + mask_total=mask_total_i, + field_contour=field_contour_i, + field_filled=field_filled_i, + xlim=[x_min / 1000, x_max / 1000], + ylim=[y_min / 1000, y_max / 1000], + axes=ax1, + title=title, + **kwargs + ) out_dir = os.path.join(plotdir, name) os.makedirs(out_dir, exist_ok=True) - if 'png' in file_format: - savepath_png = os.path.join(out_dir, name + '_' + datestring_file + '.png') + if "png" in file_format: + savepath_png = os.path.join(out_dir, name + "_" + datestring_file + ".png") fig1.savefig(savepath_png, dpi=dpi) - logging.debug('Mask static plot saved to ' + savepath_png) - if 'pdf' in file_format: - savepath_pdf = os.path.join(out_dir, name + '_' + datestring_file + '.pdf') + logging.debug("Mask static plot saved to " + savepath_png) + if "pdf" in file_format: + savepath_pdf = os.path.join(out_dir, name + "_" + datestring_file + ".pdf") fig1.savefig(savepath_pdf, dpi=dpi) - logging.debug('Mask static plot saved to ' + savepath_pdf) + logging.debug("Mask static plot saved to " + savepath_pdf) plt.close() plt.clf() -def plot_mask_cell_individual_static(cell_i,track, cog, features, mask_total, - field_contour, field_filled, - axes=None,xlim=None,ylim=None, - label_field_contour=None, cmap_field_contour='Blues',norm_field_contour=None, - linewidths_contour=0.8,contour_labels=False, - vmin_field_contour=0,vmax_field_contour=50,levels_field_contour=None,nlevels_field_contour=10, - label_field_filled=None,cmap_field_filled='summer',norm_field_filled=None, - vmin_field_filled=0,vmax_field_filled=100,levels_field_filled=None,nlevels_field_filled=10, - title=None,feature_number=False - ): - '''Make plots for cell in fixed frame and with one background field as filling and one background field as contrours +def plot_mask_cell_individual_static( + cell_i, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + axes=None, + xlim=None, + ylim=None, + label_field_contour=None, + cmap_field_contour="Blues", + norm_field_contour=None, + linewidths_contour=0.8, + contour_labels=False, + vmin_field_contour=0, + vmax_field_contour=50, + levels_field_contour=None, + nlevels_field_contour=10, + label_field_filled=None, + cmap_field_filled="summer", + norm_field_filled=None, + vmin_field_filled=0, + vmax_field_filled=100, + levels_field_filled=None, + nlevels_field_filled=10, + title=None, + feature_number=False, +): + """Make plots for cell in fixed frame and with one background field as filling and one background field as contrours Input: Output: - ''' + """ import numpy as np - from .utils import mask_features,mask_features_surface + from .utils import mask_features, mask_features_surface from mpl_toolkits.axes_grid1 import make_axes_locatable from matplotlib.colors import Normalize - divider = make_axes_locatable(axes) if field_filled is not None: if levels_field_filled is None: - levels_field_filled=np.linspace(vmin_field_filled,vmax_field_filled, 10) - plot_field_filled = axes.contourf(field_filled.coord('projection_x_coordinate').points/1000, - field_filled.coord('projection_y_coordinate').points/1000, - field_filled.data, - levels=levels_field_filled, norm=norm_field_filled, - cmap=cmap_field_filled, vmin=vmin_field_filled, vmax=vmax_field_filled) - + levels_field_filled = np.linspace(vmin_field_filled, vmax_field_filled, 10) + plot_field_filled = axes.contourf( + field_filled.coord("projection_x_coordinate").points / 1000, + field_filled.coord("projection_y_coordinate").points / 1000, + field_filled.data, + levels=levels_field_filled, + norm=norm_field_filled, + cmap=cmap_field_filled, + vmin=vmin_field_filled, + vmax=vmax_field_filled, + ) cax_filled = divider.append_axes("right", size="5%", pad=0.1) - norm_filled= Normalize(vmin=vmin_field_filled, vmax=vmax_field_filled) - sm1= plt.cm.ScalarMappable(norm=norm_filled, cmap = plot_field_filled.cmap) + norm_filled = Normalize(vmin=vmin_field_filled, vmax=vmax_field_filled) + sm1 = plt.cm.ScalarMappable(norm=norm_filled, cmap=plot_field_filled.cmap) sm1.set_array([]) - cbar_field_filled = plt.colorbar(sm1, orientation='vertical',cax=cax_filled) + cbar_field_filled = plt.colorbar(sm1, orientation="vertical", cax=cax_filled) cbar_field_filled.ax.set_ylabel(label_field_filled) cbar_field_filled.set_clim(vmin_field_filled, vmax_field_filled) - if field_contour is not None: if levels_field_contour is None: - levels_field_contour=np.linspace(vmin_field_contour, vmax_field_contour, 5) - plot_field_contour = axes.contour(field_contour.coord('projection_x_coordinate').points/1000, - field_contour.coord('projection_y_coordinate').points/1000, - field_contour.data, - cmap=cmap_field_contour,norm=norm_field_contour, - levels=levels_field_contour,vmin=vmin_field_contour, vmax=vmax_field_contour, - linewidths=linewidths_contour) + levels_field_contour = np.linspace( + vmin_field_contour, vmax_field_contour, 5 + ) + plot_field_contour = axes.contour( + field_contour.coord("projection_x_coordinate").points / 1000, + field_contour.coord("projection_y_coordinate").points / 1000, + field_contour.data, + cmap=cmap_field_contour, + norm=norm_field_contour, + levels=levels_field_contour, + vmin=vmin_field_contour, + vmax=vmax_field_contour, + linewidths=linewidths_contour, + ) if contour_labels: axes.clabel(plot_field_contour, fontsize=10) cax_contour = divider.append_axes("bottom", size="5%", pad=0.1) if norm_field_contour: - vmin_field_contour=None - vmax_field_contour=None - norm_contour=norm_field_contour + vmin_field_contour = None + vmax_field_contour = None + norm_contour = norm_field_contour else: - norm_contour= Normalize(vmin=vmin_field_contour, vmax=vmax_field_contour) + norm_contour = Normalize(vmin=vmin_field_contour, vmax=vmax_field_contour) - sm_contour= plt.cm.ScalarMappable(norm=norm_contour, cmap = plot_field_contour.cmap) + sm_contour = plt.cm.ScalarMappable( + norm=norm_contour, cmap=plot_field_contour.cmap + ) sm_contour.set_array([]) - cbar_field_contour = plt.colorbar(sm_contour, orientation='horizontal',ticks=levels_field_contour,cax=cax_contour) + cbar_field_contour = plt.colorbar( + sm_contour, + orientation="horizontal", + ticks=levels_field_contour, + cax=cax_contour, + ) cbar_field_contour.ax.set_xlabel(label_field_contour) cbar_field_contour.set_clim(vmin_field_contour, vmax_field_contour) for i_row, row in track.iterrows(): - cell = row['cell'] - feature = row['feature'] -# logging.debug("cell: "+ str(row['cell'])) -# logging.debug("feature: "+ str(row['feature'])) + cell = row["cell"] + feature = row["feature"] + # logging.debug("cell: "+ str(row['cell'])) + # logging.debug("feature: "+ str(row['feature'])) - if cell==cell_i: - color='darkred' + if cell == cell_i: + color = "darkred" if feature_number: - cell_string=' '+str(int(cell))+' ('+str(int(feature))+')' + cell_string = " " + str(int(cell)) + " (" + str(int(feature)) + ")" else: - cell_string=' '+str(int(cell)) + cell_string = " " + str(int(cell)) elif np.isnan(cell): - color='gray' + color = "gray" if feature_number: - cell_string=' '+'('+str(int(feature))+')' + cell_string = " " + "(" + str(int(feature)) + ")" else: - cell_string=' ' + cell_string = " " else: - color='darkorange' + color = "darkorange" if feature_number: - cell_string=' '+str(int(cell))+' ('+str(int(feature))+')' + cell_string = " " + str(int(cell)) + " (" + str(int(feature)) + ")" else: - cell_string=' '+str(int(cell)) + cell_string = " " + str(int(cell)) - axes.text(row['projection_x_coordinate']/1000, - row['projection_y_coordinate']/1000, - cell_string,color=color,fontsize=6, clip_on=True) + axes.text( + row["projection_x_coordinate"] / 1000, + row["projection_y_coordinate"] / 1000, + cell_string, + color=color, + fontsize=6, + clip_on=True, + ) # Plot marker for tracked cell centre as a cross - axes.plot(row['projection_x_coordinate']/1000, - row['projection_y_coordinate']/1000, - 'x', color=color,markersize=4) - - - #Create surface projection of mask for the respective cell and plot it in the right color - z_coord = 'model_level_number' - if len(mask_total.shape)==3: - mask_total_i_surface = mask_features_surface(mask_total, feature, masked=False, z_coord=z_coord) - elif len(mask_total.shape)==2: - mask_total_i_surface=mask_features(mask_total, feature, masked=False, z_coord=z_coord) - axes.contour(mask_total_i_surface.coord('projection_x_coordinate').points/1000, - mask_total_i_surface.coord('projection_y_coordinate').points/1000, - mask_total_i_surface.data, - levels=[0, feature], colors=color, linestyles=':',linewidth=1) + axes.plot( + row["projection_x_coordinate"] / 1000, + row["projection_y_coordinate"] / 1000, + "x", + color=color, + markersize=4, + ) + + # Create surface projection of mask for the respective cell and plot it in the right color + z_coord = "model_level_number" + if len(mask_total.shape) == 3: + mask_total_i_surface = mask_features_surface( + mask_total, feature, masked=False, z_coord=z_coord + ) + elif len(mask_total.shape) == 2: + mask_total_i_surface = mask_features( + mask_total, feature, masked=False, z_coord=z_coord + ) + axes.contour( + mask_total_i_surface.coord("projection_x_coordinate").points / 1000, + mask_total_i_surface.coord("projection_y_coordinate").points / 1000, + mask_total_i_surface.data, + levels=[0, feature], + colors=color, + linestyles=":", + linewidth=1, + ) if cog is not None: for i_row, row in cog.iterrows(): - cell = row['cell'] + cell = row["cell"] - if cell==cell_i: - color='darkred' + if cell == cell_i: + color = "darkred" else: - color='darkorange' + color = "darkorange" # plot marker for centre of gravity as a circle - axes.plot(row['x_M']/1000, row['y_M']/1000, - 'o', markeredgecolor=color, markerfacecolor='None',markersize=4) + axes.plot( + row["x_M"] / 1000, + row["y_M"] / 1000, + "o", + markeredgecolor=color, + markerfacecolor="None", + markersize=4, + ) if features is not None: for i_row, row in features.iterrows(): - color='purple' - axes.plot(row['projection_x_coordinate']/1000, - row['projection_y_coordinate']/1000, - '+', color=color,markersize=3) - - axes.set_xlabel('x (km)') - axes.set_ylabel('y (km)') + color = "purple" + axes.plot( + row["projection_x_coordinate"] / 1000, + row["projection_y_coordinate"] / 1000, + "+", + color=color, + markersize=3, + ) + + axes.set_xlabel("x (km)") + axes.set_ylabel("y (km)") axes.set_xlim(xlim) axes.set_ylim(ylim) - axes.xaxis.set_label_position('top') - axes.xaxis.set_ticks_position('top') - axes.set_title(title,pad=35,fontsize=10,horizontalalignment='left',loc='left') + axes.xaxis.set_label_position("top") + axes.xaxis.set_ticks_position("top") + axes.set_title(title, pad=35, fontsize=10, horizontalalignment="left", loc="left") return axes -def plot_mask_cell_track_2D3Dstatic(cell,track, cog, features, mask_total, - field_contour, field_filled, - width=10000,n_extend=1, - name= 'test', plotdir='./', - file_format=['png'],figsize=(10/2.54, 10/2.54),dpi=300, - ele=10,azim=30, - **kwargs): - '''Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours + +def plot_mask_cell_track_2D3Dstatic( + cell, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + width=10000, + n_extend=1, + name="test", + plotdir="./", + file_format=["png"], + figsize=(10 / 2.54, 10 / 2.54), + dpi=300, + ele=10, + azim=30, + **kwargs +): + """Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours Input: Output: - ''' + """ from iris import Constraint from numpy import unique import os from mpl_toolkits.mplot3d import Axes3D import matplotlib.gridspec as gridspec - - track_cell=track[track['cell']==cell] - x_min=track_cell['projection_x_coordinate'].min()-width - x_max=track_cell['projection_x_coordinate'].max()+width - y_min=track_cell['projection_y_coordinate'].min()-width - y_max=track_cell['projection_y_coordinate'].max()+width - - #set up looping over time based on mask's time coordinate to allow for one timestep before and after the track - time_coord=mask_total.coord('time') - time=time_coord.units.num2date(time_coord.points) - i_start=max(0,np.where(time==track_cell['time'].values[0])[0][0]-n_extend) - i_end=min(len(time)-1,np.where(time==track_cell['time'].values[-1])[0][0]+n_extend+1) - time_cell=time[slice(i_start,i_end)] + track_cell = track[track["cell"] == cell] + x_min = track_cell["projection_x_coordinate"].min() - width + x_max = track_cell["projection_x_coordinate"].max() + width + y_min = track_cell["projection_y_coordinate"].min() - width + y_max = track_cell["projection_y_coordinate"].max() + width + + # set up looping over time based on mask's time coordinate to allow for one timestep before and after the track + time_coord = mask_total.coord("time") + time = time_coord.units.num2date(time_coord.points) + i_start = max(0, np.where(time == track_cell["time"].values[0])[0][0] - n_extend) + i_end = min( + len(time) - 1, + np.where(time == track_cell["time"].values[-1])[0][0] + n_extend + 1, + ) + time_cell = time[slice(i_start, i_end)] for time_i in time_cell: -# for i_row,row in track_cell.iterrows(): -# time_i=row['time'] -# constraint_time = Constraint(time=row['time']) + # for i_row,row in track_cell.iterrows(): + # time_i=row['time'] + # constraint_time = Constraint(time=row['time']) constraint_time = Constraint(time=time_i) - constraint_x = Constraint(projection_x_coordinate = lambda cell: x_min < cell < x_max) - constraint_y = Constraint(projection_y_coordinate = lambda cell: y_min < cell < y_max) + constraint_x = Constraint( + projection_x_coordinate=lambda cell: x_min < cell < x_max + ) + constraint_y = Constraint( + projection_y_coordinate=lambda cell: y_min < cell < y_max + ) constraint = constraint_time & constraint_x & constraint_y - mask_total_i=mask_total.extract(constraint) + mask_total_i = mask_total.extract(constraint) if field_contour is None: - field_contour_i=None + field_contour_i = None else: - field_contour_i=field_contour.extract(constraint) + field_contour_i = field_contour.extract(constraint) if field_filled is None: - field_filled_i=None + field_filled_i = None else: - field_filled_i=field_filled.extract(constraint) - - - track_i=track[track['time']==time_i] - - cells_mask=list(unique(mask_total_i.core_data())) - track_cells=track_i.loc[(track_i['projection_x_coordinate'] > x_min) & (track_i['projection_x_coordinate'] < x_max) & (track_i['projection_y_coordinate'] > y_min) & (track_i['projection_y_coordinate'] < y_max)] - cells_track=list(track_cells['cell'].values) - cells=list(set( cells_mask + cells_track )) + field_filled_i = field_filled.extract(constraint) + + track_i = track[track["time"] == time_i] + + cells_mask = list(unique(mask_total_i.core_data())) + track_cells = track_i.loc[ + (track_i["projection_x_coordinate"] > x_min) + & (track_i["projection_x_coordinate"] < x_max) + & (track_i["projection_y_coordinate"] > y_min) + & (track_i["projection_y_coordinate"] < y_max) + ] + cells_track = list(track_cells["cell"].values) + cells = list(set(cells_mask + cells_track)) if cell not in cells: cells.append(cell) if 0 in cells: cells.remove(0) - track_i=track_i[track_i['cell'].isin(cells)] + track_i = track_i[track_i["cell"].isin(cells)] if cog is None: - cog_i=None + cog_i = None else: - cog_i=cog[cog['cell'].isin(cells)] - cog_i=cog_i[cog_i['time']==time_i] + cog_i = cog[cog["cell"].isin(cells)] + cog_i = cog_i[cog_i["time"] == time_i] if features is None: - features_i=None + features_i = None else: - features_i=features[features['time']==time_i] + features_i = features[features["time"] == time_i] - - fig1=plt.figure(figsize=(20 / 2.54, 10 / 2.54)) - fig1.subplots_adjust(left=0.1, bottom=0.15, right=0.9, top=0.9,wspace=0.3, hspace=0.25) + fig1 = plt.figure(figsize=(20 / 2.54, 10 / 2.54)) + fig1.subplots_adjust( + left=0.1, bottom=0.15, right=0.9, top=0.9, wspace=0.3, hspace=0.25 + ) # make two subplots for figure: - gs1 = gridspec.GridSpec(1, 2,width_ratios=[1,1.2]) + gs1 = gridspec.GridSpec(1, 2, width_ratios=[1, 1.2]) fig1.add_subplot(gs1[0]) - fig1.add_subplot(gs1[1], projection='3d') + fig1.add_subplot(gs1[1], projection="3d") ax1 = fig1.get_axes() - - datestring_stamp = time_i.strftime('%Y-%m-%d %H:%M:%S') - if time_i in track_cell['time'].values: - time_cell_i=track_cell[track_cell['time'].values==time_i]['time_cell'] - celltime_stamp = "%02d:%02d:%02d" % (time_cell_i.dt.total_seconds() // 3600, - (time_cell_i.dt.total_seconds() % 3600) // 60, - time_cell_i.dt.total_seconds() % 60 ) + datestring_stamp = time_i.strftime("%Y-%m-%d %H:%M:%S") + if time_i in track_cell["time"].values: + time_cell_i = track_cell[track_cell["time"].values == time_i]["time_cell"] + celltime_stamp = "%02d:%02d:%02d" % ( + time_cell_i.dt.total_seconds() // 3600, + (time_cell_i.dt.total_seconds() % 3600) // 60, + time_cell_i.dt.total_seconds() % 60, + ) else: - celltime_stamp=' - ' - title=datestring_stamp + ' , ' + celltime_stamp - datestring_file = time_i.strftime('%Y-%m-%d_%H%M%S') - - ax1[0]=plot_mask_cell_individual_static(cell_i=cell, - track=track_i, cog=cog_i,features=features_i, - mask_total=mask_total_i, - field_contour=field_contour_i, field_filled=field_filled_i, - xlim=[x_min/1000,x_max/1000],ylim=[y_min/1000,y_max/1000], - axes=ax1[0],title=title,**kwargs) - - ax1[1]=plot_mask_cell_individual_3Dstatic(cell_i=cell, - track=track_i, cog=cog_i,features=features_i, - mask_total=mask_total_i, - field_contour=field_contour_i, field_filled=field_filled_i, - xlim=[x_min/1000,x_max/1000],ylim=[y_min/1000,y_max/1000], - axes=ax1[1],title=title, - ele=ele,azim=azim, - **kwargs) + celltime_stamp = " - " + title = datestring_stamp + " , " + celltime_stamp + datestring_file = time_i.strftime("%Y-%m-%d_%H%M%S") + + ax1[0] = plot_mask_cell_individual_static( + cell_i=cell, + track=track_i, + cog=cog_i, + features=features_i, + mask_total=mask_total_i, + field_contour=field_contour_i, + field_filled=field_filled_i, + xlim=[x_min / 1000, x_max / 1000], + ylim=[y_min / 1000, y_max / 1000], + axes=ax1[0], + title=title, + **kwargs + ) + + ax1[1] = plot_mask_cell_individual_3Dstatic( + cell_i=cell, + track=track_i, + cog=cog_i, + features=features_i, + mask_total=mask_total_i, + field_contour=field_contour_i, + field_filled=field_filled_i, + xlim=[x_min / 1000, x_max / 1000], + ylim=[y_min / 1000, y_max / 1000], + axes=ax1[1], + title=title, + ele=ele, + azim=azim, + **kwargs + ) out_dir = os.path.join(plotdir, name) os.makedirs(out_dir, exist_ok=True) - if 'png' in file_format: - savepath_png = os.path.join(out_dir, name + '_' + datestring_file + '.png') + if "png" in file_format: + savepath_png = os.path.join(out_dir, name + "_" + datestring_file + ".png") fig1.savefig(savepath_png, dpi=dpi) - logging.debug('Mask static 2d/3D plot saved to ' + savepath_png) - if 'pdf' in file_format: - savepath_pdf = os.path.join(out_dir, name + '_' + datestring_file + '.pdf') + logging.debug("Mask static 2d/3D plot saved to " + savepath_png) + if "pdf" in file_format: + savepath_pdf = os.path.join(out_dir, name + "_" + datestring_file + ".pdf") fig1.savefig(savepath_pdf, dpi=dpi) - logging.debug('Mask static 2d/3D plot saved to ' + savepath_pdf) + logging.debug("Mask static 2d/3D plot saved to " + savepath_pdf) plt.close() plt.clf() - -def plot_mask_cell_track_3Dstatic(cell,track, cog, features, mask_total, - field_contour, field_filled, - width=10000,n_extend=1, - name= 'test', plotdir='./', - file_format=['png'],figsize=(10/2.54, 10/2.54),dpi=300, - **kwargs): - '''Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours +def plot_mask_cell_track_3Dstatic( + cell, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + width=10000, + n_extend=1, + name="test", + plotdir="./", + file_format=["png"], + figsize=(10 / 2.54, 10 / 2.54), + dpi=300, + **kwargs +): + """Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours Input: Output: - ''' + """ from iris import Constraint from numpy import unique import os from mpl_toolkits.mplot3d import Axes3D - track_cell=track[track['cell']==cell] - x_min=track_cell['projection_x_coordinate'].min()-width - x_max=track_cell['projection_x_coordinate'].max()+width - y_min=track_cell['projection_y_coordinate'].min()-width - y_max=track_cell['projection_y_coordinate'].max()+width - - #set up looping over time based on mask's time coordinate to allow for one timestep before and after the track - time_coord=mask_total.coord('time') - time=time_coord.units.num2date(time_coord.points) - i_start=max(0,np.where(time==track_cell['time'].values[0])[0][0]-n_extend) - i_end=min(len(time)-1,np.where(time==track_cell['time'].values[-1])[0][0]+n_extend+1) - time_cell=time[slice(i_start,i_end)] + track_cell = track[track["cell"] == cell] + x_min = track_cell["projection_x_coordinate"].min() - width + x_max = track_cell["projection_x_coordinate"].max() + width + y_min = track_cell["projection_y_coordinate"].min() - width + y_max = track_cell["projection_y_coordinate"].max() + width + + # set up looping over time based on mask's time coordinate to allow for one timestep before and after the track + time_coord = mask_total.coord("time") + time = time_coord.units.num2date(time_coord.points) + i_start = max(0, np.where(time == track_cell["time"].values[0])[0][0] - n_extend) + i_end = min( + len(time) - 1, + np.where(time == track_cell["time"].values[-1])[0][0] + n_extend + 1, + ) + time_cell = time[slice(i_start, i_end)] for time_i in time_cell: -# for i_row,row in track_cell.iterrows(): -# time_i=row['time'] -# constraint_time = Constraint(time=row['time']) + # for i_row,row in track_cell.iterrows(): + # time_i=row['time'] + # constraint_time = Constraint(time=row['time']) constraint_time = Constraint(time=time_i) - constraint_x = Constraint(projection_x_coordinate = lambda cell: x_min < cell < x_max) - constraint_y = Constraint(projection_y_coordinate = lambda cell: y_min < cell < y_max) + constraint_x = Constraint( + projection_x_coordinate=lambda cell: x_min < cell < x_max + ) + constraint_y = Constraint( + projection_y_coordinate=lambda cell: y_min < cell < y_max + ) constraint = constraint_time & constraint_x & constraint_y - mask_total_i=mask_total.extract(constraint) + mask_total_i = mask_total.extract(constraint) if field_contour is None: - field_contour_i=None + field_contour_i = None else: - field_contour_i=field_contour.extract(constraint) + field_contour_i = field_contour.extract(constraint) if field_filled is None: - field_filled_i=None + field_filled_i = None else: - field_filled_i=field_filled.extract(constraint) - - - track_i=track[track['time']==time_i] - - cells_mask=list(unique(mask_total_i.core_data())) - track_cells=track_i.loc[(track_i['projection_x_coordinate'] > x_min) & (track_i['projection_x_coordinate'] < x_max) & (track_i['projection_y_coordinate'] > y_min) & (track_i['projection_y_coordinate'] < y_max)] - cells_track=list(track_cells['cell'].values) - cells=list(set( cells_mask + cells_track )) + field_filled_i = field_filled.extract(constraint) + + track_i = track[track["time"] == time_i] + + cells_mask = list(unique(mask_total_i.core_data())) + track_cells = track_i.loc[ + (track_i["projection_x_coordinate"] > x_min) + & (track_i["projection_x_coordinate"] < x_max) + & (track_i["projection_y_coordinate"] > y_min) + & (track_i["projection_y_coordinate"] < y_max) + ] + cells_track = list(track_cells["cell"].values) + cells = list(set(cells_mask + cells_track)) if cell not in cells: cells.append(cell) if 0 in cells: cells.remove(0) - track_i=track_i[track_i['cell'].isin(cells)] + track_i = track_i[track_i["cell"].isin(cells)] if cog is None: - cog_i=None + cog_i = None else: - cog_i=cog[cog['cell'].isin(cells)] - cog_i=cog_i[cog_i['time']==time_i] + cog_i = cog[cog["cell"].isin(cells)] + cog_i = cog_i[cog_i["time"] == time_i] if features is None: - features_i=None + features_i = None else: - features_i=features[features['time']==time_i] - - -# fig1, ax1 = plt.subplots(ncols=1, nrows=1, figsize=figsize) -# fig1.subplots_adjust(left=0.2, bottom=0.15, right=0.80, top=0.85) - fig1, ax1 = plt.subplots(ncols=1, nrows=1, figsize=(10/2.54, 10/2.54), subplot_kw={'projection': '3d'}) - - - datestring_stamp = time_i.strftime('%Y-%m-%d %H:%M:%S') - if time_i in track_cell['time'].values: - time_cell_i=track_cell[track_cell['time'].values==time_i]['time_cell'] - celltime_stamp = "%02d:%02d:%02d" % (time_cell_i.dt.total_seconds() // 3600, - (time_cell_i.dt.total_seconds() % 3600) // 60, - time_cell_i.dt.total_seconds() % 60 ) + features_i = features[features["time"] == time_i] + + # fig1, ax1 = plt.subplots(ncols=1, nrows=1, figsize=figsize) + # fig1.subplots_adjust(left=0.2, bottom=0.15, right=0.80, top=0.85) + fig1, ax1 = plt.subplots( + ncols=1, + nrows=1, + figsize=(10 / 2.54, 10 / 2.54), + subplot_kw={"projection": "3d"}, + ) + + datestring_stamp = time_i.strftime("%Y-%m-%d %H:%M:%S") + if time_i in track_cell["time"].values: + time_cell_i = track_cell[track_cell["time"].values == time_i]["time_cell"] + celltime_stamp = "%02d:%02d:%02d" % ( + time_cell_i.dt.total_seconds() // 3600, + (time_cell_i.dt.total_seconds() % 3600) // 60, + time_cell_i.dt.total_seconds() % 60, + ) else: - celltime_stamp=' - ' - title=datestring_stamp + ' , ' + celltime_stamp - datestring_file = time_i.strftime('%Y-%m-%d_%H%M%S') - - ax1=plot_mask_cell_individual_3Dstatic(cell_i=cell, - track=track_i, cog=cog_i,features=features_i, - mask_total=mask_total_i, - field_contour=field_contour_i, field_filled=field_filled_i, - xlim=[x_min/1000,x_max/1000],ylim=[y_min/1000,y_max/1000], - axes=ax1,title=title,**kwargs) + celltime_stamp = " - " + title = datestring_stamp + " , " + celltime_stamp + datestring_file = time_i.strftime("%Y-%m-%d_%H%M%S") + + ax1 = plot_mask_cell_individual_3Dstatic( + cell_i=cell, + track=track_i, + cog=cog_i, + features=features_i, + mask_total=mask_total_i, + field_contour=field_contour_i, + field_filled=field_filled_i, + xlim=[x_min / 1000, x_max / 1000], + ylim=[y_min / 1000, y_max / 1000], + axes=ax1, + title=title, + **kwargs + ) out_dir = os.path.join(plotdir, name) os.makedirs(out_dir, exist_ok=True) - if 'png' in file_format: - savepath_png = os.path.join(out_dir, name + '_' + datestring_file + '.png') + if "png" in file_format: + savepath_png = os.path.join(out_dir, name + "_" + datestring_file + ".png") fig1.savefig(savepath_png, dpi=dpi) - logging.debug('Mask static plot saved to ' + savepath_png) - if 'pdf' in file_format: - savepath_pdf = os.path.join(out_dir, name + '_' + datestring_file + '.pdf') + logging.debug("Mask static plot saved to " + savepath_png) + if "pdf" in file_format: + savepath_pdf = os.path.join(out_dir, name + "_" + datestring_file + ".pdf") fig1.savefig(savepath_pdf, dpi=dpi) - logging.debug('Mask static plot saved to ' + savepath_pdf) + logging.debug("Mask static plot saved to " + savepath_pdf) plt.close() plt.clf() -def plot_mask_cell_individual_3Dstatic(cell_i,track, cog, features, mask_total, - field_contour, field_filled, - axes=None,xlim=None,ylim=None, - label_field_contour=None, cmap_field_contour='Blues',norm_field_contour=None, - linewidths_contour=0.8,contour_labels=False, - vmin_field_contour=0,vmax_field_contour=50,levels_field_contour=None,nlevels_field_contour=10, - label_field_filled=None,cmap_field_filled='summer',norm_field_filled=None, - vmin_field_filled=0,vmax_field_filled=100,levels_field_filled=None,nlevels_field_filled=10, - title=None,feature_number=False, - ele=10.,azim=210. - ): - '''Make plots for cell in fixed frame and with one background field as filling and one background field as contrours +def plot_mask_cell_individual_3Dstatic( + cell_i, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + axes=None, + xlim=None, + ylim=None, + label_field_contour=None, + cmap_field_contour="Blues", + norm_field_contour=None, + linewidths_contour=0.8, + contour_labels=False, + vmin_field_contour=0, + vmax_field_contour=50, + levels_field_contour=None, + nlevels_field_contour=10, + label_field_filled=None, + cmap_field_filled="summer", + norm_field_filled=None, + vmin_field_filled=0, + vmax_field_filled=100, + levels_field_filled=None, + nlevels_field_filled=10, + title=None, + feature_number=False, + ele=10.0, + azim=210.0, +): + """Make plots for cell in fixed frame and with one background field as filling and one background field as contrours Input: Output: - ''' + """ import numpy as np - from .utils import mask_features,mask_features_surface -# from mpl_toolkits.axes_grid1 import make_axes_locatable -# from matplotlib.colors import Normalize + from .utils import mask_features, mask_features_surface + + # from mpl_toolkits.axes_grid1 import make_axes_locatable + # from matplotlib.colors import Normalize from mpl_toolkits.mplot3d import Axes3D axes.view_init(elev=ele, azim=azim) @@ -936,340 +1319,436 @@ def plot_mask_cell_individual_3Dstatic(cell_i,track, cog, features, mask_total, axes.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) axes.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) # make the grid lines transparent - axes.xaxis._axinfo["grid"]['color'] = (1,1,1,0) - axes.yaxis._axinfo["grid"]['color'] = (1,1,1,0) - axes.zaxis._axinfo["grid"]['color'] = (1,1,1,0) + axes.xaxis._axinfo["grid"]["color"] = (1, 1, 1, 0) + axes.yaxis._axinfo["grid"]["color"] = (1, 1, 1, 0) + axes.zaxis._axinfo["grid"]["color"] = (1, 1, 1, 0) if title is not None: - axes.set_title(title,horizontalalignment='left',loc='left') - -# colors_mask = ['pink','darkred', 'orange', 'darkred', 'red', 'darkorange'] - x = mask_total.coord('projection_x_coordinate').points - y = mask_total.coord('projection_y_coordinate').points - z = mask_total.coord('model_level_number').points - -# z = mask_total.coord('geopotential_height').points - zz, yy, xx = np.meshgrid(z, y, x, indexing='ij') -# z_alt = mask_total.coord('geopotential_height').points - - -# divider = make_axes_locatable(axes) - -# if field_filled is not None: -# if levels_field_filled is None: -# levels_field_filled=np.linspace(vmin_field_filled,vmax_field_filled, 10) -# plot_field_filled = axes.contourf(field_filled.coord('projection_x_coordinate').points/1000, -# field_filled.coord('projection_y_coordinate').points/1000, -# field_filled.data, -# levels=levels_field_filled, norm=norm_field_filled, -# cmap=cmap_field_filled, vmin=vmin_field_filled, vmax=vmax_field_filled) - - -# cax_filled = divider.append_axes("right", size="5%", pad=0.1) - -# norm_filled= Normalize(vmin=vmin_field_filled, vmax=vmax_field_filled) -# sm1= plt.cm.ScalarMappable(norm=norm_filled, cmap = plot_field_filled.cmap) -# sm1.set_array([]) - -# cbar_field_filled = plt.colorbar(sm1, orientation='vertical',cax=cax_filled) -# cbar_field_filled.ax.set_ylabel(label_field_filled) -# cbar_field_filled.set_clim(vmin_field_filled, vmax_field_filled) - - -# if field_contour is not None: -# if levels_field_contour is None: -# levels_field_contour=np.linspace(vmin_field_contour, vmax_field_contour, 5) -# plot_field_contour = axes.contour(field_contour.coord('projection_x_coordinate').points/1000, -# field_contour.coord('projection_y_coordinate').points/1000, -# field_contour.data, -# cmap=cmap_field_contour,norm=norm_field_contour, -# levels=levels_field_contour,vmin=vmin_field_contour, vmax=vmax_field_contour, -# linewidths=linewidths_contour) - -# if contour_labels: -# axes.clabel(plot_field_contour, fontsize=10) - -# cax_contour = divider.append_axes("bottom", size="5%", pad=0.1) -# if norm_field_contour: -# vmin_field_contour=None -# vmax_field_contour=None -# norm_contour=norm_field_contour -# else: -# norm_contour= Normalize(vmin=vmin_field_contour, vmax=vmax_field_contour) -# -# sm_contour= plt.cm.ScalarMappable(norm=norm_contour, cmap = plot_field_contour.cmap) -# sm_contour.set_array([]) -# -# cbar_field_contour = plt.colorbar(sm_contour, orientation='horizontal',ticks=levels_field_contour,cax=cax_contour) -# cbar_field_contour.ax.set_xlabel(label_field_contour) -# cbar_field_contour.set_clim(vmin_field_contour, vmax_field_contour) -# + axes.set_title(title, horizontalalignment="left", loc="left") + + # colors_mask = ['pink','darkred', 'orange', 'darkred', 'red', 'darkorange'] + x = mask_total.coord("projection_x_coordinate").points + y = mask_total.coord("projection_y_coordinate").points + z = mask_total.coord("model_level_number").points + + # z = mask_total.coord('geopotential_height').points + zz, yy, xx = np.meshgrid(z, y, x, indexing="ij") + # z_alt = mask_total.coord('geopotential_height').points + + # divider = make_axes_locatable(axes) + + # if field_filled is not None: + # if levels_field_filled is None: + # levels_field_filled=np.linspace(vmin_field_filled,vmax_field_filled, 10) + # plot_field_filled = axes.contourf(field_filled.coord('projection_x_coordinate').points/1000, + # field_filled.coord('projection_y_coordinate').points/1000, + # field_filled.data, + # levels=levels_field_filled, norm=norm_field_filled, + # cmap=cmap_field_filled, vmin=vmin_field_filled, vmax=vmax_field_filled) + + # cax_filled = divider.append_axes("right", size="5%", pad=0.1) + + # norm_filled= Normalize(vmin=vmin_field_filled, vmax=vmax_field_filled) + # sm1= plt.cm.ScalarMappable(norm=norm_filled, cmap = plot_field_filled.cmap) + # sm1.set_array([]) + + # cbar_field_filled = plt.colorbar(sm1, orientation='vertical',cax=cax_filled) + # cbar_field_filled.ax.set_ylabel(label_field_filled) + # cbar_field_filled.set_clim(vmin_field_filled, vmax_field_filled) + + # if field_contour is not None: + # if levels_field_contour is None: + # levels_field_contour=np.linspace(vmin_field_contour, vmax_field_contour, 5) + # plot_field_contour = axes.contour(field_contour.coord('projection_x_coordinate').points/1000, + # field_contour.coord('projection_y_coordinate').points/1000, + # field_contour.data, + # cmap=cmap_field_contour,norm=norm_field_contour, + # levels=levels_field_contour,vmin=vmin_field_contour, vmax=vmax_field_contour, + # linewidths=linewidths_contour) + + # if contour_labels: + # axes.clabel(plot_field_contour, fontsize=10) + + # cax_contour = divider.append_axes("bottom", size="5%", pad=0.1) + # if norm_field_contour: + # vmin_field_contour=None + # vmax_field_contour=None + # norm_contour=norm_field_contour + # else: + # norm_contour= Normalize(vmin=vmin_field_contour, vmax=vmax_field_contour) + # + # sm_contour= plt.cm.ScalarMappable(norm=norm_contour, cmap = plot_field_contour.cmap) + # sm_contour.set_array([]) + # + # cbar_field_contour = plt.colorbar(sm_contour, orientation='horizontal',ticks=levels_field_contour,cax=cax_contour) + # cbar_field_contour.ax.set_xlabel(label_field_contour) + # cbar_field_contour.set_clim(vmin_field_contour, vmax_field_contour) + # for i_row, row in track.iterrows(): - cell = row['cell'] - feature = row['feature'] -# logging.debug("cell: "+ str(row['cell'])) -# logging.debug("feature: "+ str(row['feature'])) + cell = row["cell"] + feature = row["feature"] + # logging.debug("cell: "+ str(row['cell'])) + # logging.debug("feature: "+ str(row['feature'])) - if cell==cell_i: - color='darkred' + if cell == cell_i: + color = "darkred" if feature_number: - cell_string=' '+str(int(cell))+' ('+str(int(feature))+')' + cell_string = " " + str(int(cell)) + " (" + str(int(feature)) + ")" else: - cell_string=' '+str(int(cell)) + cell_string = " " + str(int(cell)) elif np.isnan(cell): - color='gray' + color = "gray" if feature_number: - cell_string=' '+'('+str(int(feature))+')' + cell_string = " " + "(" + str(int(feature)) + ")" else: - cell_string=' ' + cell_string = " " else: - color='darkorange' + color = "darkorange" if feature_number: - cell_string=' '+str(int(cell))+' ('+str(int(feature))+')' + cell_string = " " + str(int(cell)) + " (" + str(int(feature)) + ")" else: - cell_string=' '+str(int(cell)) - -# axes.text(row['projection_x_coordinate']/1000, -# row['projection_y_coordinate']/1000, -# 0, -# cell_string,color=color,fontsize=6, clip_on=True) - -# # Plot marker for tracked cell centre as a cross -# axes.plot(row['projection_x_coordinate']/1000, -# row['projection_y_coordinate']/1000, -# 0, -# 'x', color=color,markersize=4) - - - #Create surface projection of mask for the respective cell and plot it in the right color -# z_coord = 'model_level_number' -# if len(mask_total.shape)==3: -# mask_total_i_surface = mask_features_surface(mask_total, feature, masked=False, z_coord=z_coord) -# elif len(mask_total.shape)==2: -# mask_total_i_surface=mask_features(mask_total, feature, masked=False, z_coord=z_coord) -# axes.contour(mask_total_i_surface.coord('projection_x_coordinate').points/1000, -# mask_total_i_surface.coord('projection_y_coordinate').points/1000, -# 0, -# mask_total_i_surface.data, -# levels=[0, feature], colors=color, linestyles=':',linewidth=1) + cell_string = " " + str(int(cell)) + + # axes.text(row['projection_x_coordinate']/1000, + # row['projection_y_coordinate']/1000, + # 0, + # cell_string,color=color,fontsize=6, clip_on=True) + + # # Plot marker for tracked cell centre as a cross + # axes.plot(row['projection_x_coordinate']/1000, + # row['projection_y_coordinate']/1000, + # 0, + # 'x', color=color,markersize=4) + + # Create surface projection of mask for the respective cell and plot it in the right color + # z_coord = 'model_level_number' + # if len(mask_total.shape)==3: + # mask_total_i_surface = mask_features_surface(mask_total, feature, masked=False, z_coord=z_coord) + # elif len(mask_total.shape)==2: + # mask_total_i_surface=mask_features(mask_total, feature, masked=False, z_coord=z_coord) + # axes.contour(mask_total_i_surface.coord('projection_x_coordinate').points/1000, + # mask_total_i_surface.coord('projection_y_coordinate').points/1000, + # 0, + # mask_total_i_surface.data, + # levels=[0, feature], colors=color, linestyles=':',linewidth=1) mask_feature = mask_total.data == feature axes.scatter( -# xx[mask_feature]/1000, yy[mask_feature]/1000, zz[mask_feature]/1000, - xx[mask_feature]/1000, yy[mask_feature]/1000, zz[mask_feature], - c=color, marker=',', - s=5,#60000.0 * TWC_i[Mask_particle], - alpha=0.3, cmap='inferno', label=cell_string,rasterized=True) + # xx[mask_feature]/1000, yy[mask_feature]/1000, zz[mask_feature]/1000, + xx[mask_feature] / 1000, + yy[mask_feature] / 1000, + zz[mask_feature], + c=color, + marker=",", + s=5, # 60000.0 * TWC_i[Mask_particle], + alpha=0.3, + cmap="inferno", + label=cell_string, + rasterized=True, + ) axes.set_xlim(xlim) axes.set_ylim(ylim) axes.set_zlim([0, 100]) -# axes.set_zlim([0, 20]) -# axes.set_zticks([0, 5,10,15, 20]) - axes.set_xlabel('x (km)') - axes.set_ylabel('y (km)') + # axes.set_zlim([0, 20]) + # axes.set_zticks([0, 5,10,15, 20]) + axes.set_xlabel("x (km)") + axes.set_ylabel("y (km)") axes.zaxis.set_rotate_label(False) # disable automatic rotation -# axes.set_zlabel('z (km)', rotation=90) - axes.set_zlabel('model level', rotation=90) + # axes.set_zlabel('z (km)', rotation=90) + axes.set_zlabel("model level", rotation=90) return axes - -def plot_mask_cell_track_static_timeseries(cell,track, cog, features, mask_total, - field_contour, field_filled, - track_variable=None,variable=None,variable_ylabel=None,variable_label=[None],variable_legend=False,variable_color=None, - width=10000,n_extend=1, - name= 'test', plotdir='./', - file_format=['png'],figsize=(20/2.54, 10/2.54),dpi=300, - **kwargs): - '''Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours +def plot_mask_cell_track_static_timeseries( + cell, + track, + cog, + features, + mask_total, + field_contour, + field_filled, + track_variable=None, + variable=None, + variable_ylabel=None, + variable_label=[None], + variable_legend=False, + variable_color=None, + width=10000, + n_extend=1, + name="test", + plotdir="./", + file_format=["png"], + figsize=(20 / 2.54, 10 / 2.54), + dpi=300, + **kwargs +): + """Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours Input: Output: - ''' - '''Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours + """ + """Make plots for all cells with fixed frame including entire development of the cell and with one background field as filling and one background field as contrours Input: Output: - ''' + """ from iris import Constraint from numpy import unique import os import pandas as pd - track_cell=track[track['cell']==cell] - x_min=track_cell['projection_x_coordinate'].min()-width - x_max=track_cell['projection_x_coordinate'].max()+width - y_min=track_cell['projection_y_coordinate'].min()-width - y_max=track_cell['projection_y_coordinate'].max()+width - time_min=track_cell['time'].min() -# time_max=track_cell['time'].max() - - track_variable_cell=track_variable[track_variable['cell']==cell] - track_variable_cell['time_cell']=pd.to_timedelta(track_variable_cell['time_cell']) -# track_variable_cell=track_variable_cell[(track_variable_cell['time']>=time_min) & (track_variable_cell['time']<=time_max)] - - #set up looping over time based on mask's time coordinate to allow for one timestep before and after the track - time_coord=mask_total.coord('time') - time=time_coord.units.num2date(time_coord.points) - i_start=max(0,np.where(time==track_cell['time'].values[0])[0][0]-n_extend) - i_end=min(len(time)-1,np.where(time==track_cell['time'].values[-1])[0][0]+n_extend+1) - time_cell=time[slice(i_start,i_end)] + track_cell = track[track["cell"] == cell] + x_min = track_cell["projection_x_coordinate"].min() - width + x_max = track_cell["projection_x_coordinate"].max() + width + y_min = track_cell["projection_y_coordinate"].min() - width + y_max = track_cell["projection_y_coordinate"].max() + width + time_min = track_cell["time"].min() + # time_max=track_cell['time'].max() + + track_variable_cell = track_variable[track_variable["cell"] == cell] + track_variable_cell["time_cell"] = pd.to_timedelta(track_variable_cell["time_cell"]) + # track_variable_cell=track_variable_cell[(track_variable_cell['time']>=time_min) & (track_variable_cell['time']<=time_max)] + + # set up looping over time based on mask's time coordinate to allow for one timestep before and after the track + time_coord = mask_total.coord("time") + time = time_coord.units.num2date(time_coord.points) + i_start = max(0, np.where(time == track_cell["time"].values[0])[0][0] - n_extend) + i_end = min( + len(time) - 1, + np.where(time == track_cell["time"].values[-1])[0][0] + n_extend + 1, + ) + time_cell = time[slice(i_start, i_end)] for time_i in time_cell: constraint_time = Constraint(time=time_i) - constraint_x = Constraint(projection_x_coordinate = lambda cell: x_min < cell < x_max) - constraint_y = Constraint(projection_y_coordinate = lambda cell: y_min < cell < y_max) + constraint_x = Constraint( + projection_x_coordinate=lambda cell: x_min < cell < x_max + ) + constraint_y = Constraint( + projection_y_coordinate=lambda cell: y_min < cell < y_max + ) constraint = constraint_time & constraint_x & constraint_y - mask_total_i=mask_total.extract(constraint) + mask_total_i = mask_total.extract(constraint) if field_contour is None: - field_contour_i=None + field_contour_i = None else: - field_contour_i=field_contour.extract(constraint) + field_contour_i = field_contour.extract(constraint) if field_filled is None: - field_filled_i=None + field_filled_i = None else: - field_filled_i=field_filled.extract(constraint) - - - track_i=track[track['time']==time_i] - cells_mask=list(unique(mask_total_i.core_data())) - track_cells=track_i.loc[(track_i['projection_x_coordinate'] > x_min) & (track_i['projection_x_coordinate'] < x_max) & (track_i['projection_y_coordinate'] > y_min) & (track_i['projection_y_coordinate'] < y_max)] - cells_track=list(track_cells['cell'].values) - cells=list(set( cells_mask + cells_track )) + field_filled_i = field_filled.extract(constraint) + + track_i = track[track["time"] == time_i] + cells_mask = list(unique(mask_total_i.core_data())) + track_cells = track_i.loc[ + (track_i["projection_x_coordinate"] > x_min) + & (track_i["projection_x_coordinate"] < x_max) + & (track_i["projection_y_coordinate"] > y_min) + & (track_i["projection_y_coordinate"] < y_max) + ] + cells_track = list(track_cells["cell"].values) + cells = list(set(cells_mask + cells_track)) if cell not in cells: cells.append(cell) if 0 in cells: cells.remove(0) - track_i=track_i[track_i['cell'].isin(cells)] + track_i = track_i[track_i["cell"].isin(cells)] if cog is None: - cog_i=None + cog_i = None else: - cog_i=cog[cog['cell'].isin(cells)] - cog_i=cog_i[cog_i['time']==time_i] + cog_i = cog[cog["cell"].isin(cells)] + cog_i = cog_i[cog_i["time"] == time_i] if features is None: - features_i=None + features_i = None else: - features_i=features[features['time']==time_i] - + features_i = features[features["time"] == time_i] fig1, ax1 = plt.subplots(ncols=2, nrows=1, figsize=figsize) - fig1.subplots_adjust(left=0.1, bottom=0.15, right=0.90, top=0.85,wspace=0.3) - - datestring_stamp = time_i.strftime('%Y-%m-%d %H:%M:%S') - if time_i in track_cell['time'].values: - time_cell_i=track_cell[track_cell['time'].values==time_i]['time_cell'] - celltime_stamp = "%02d:%02d:%02d" % (time_cell_i.dt.total_seconds() // 3600, - (time_cell_i.dt.total_seconds() % 3600) // 60, - time_cell_i.dt.total_seconds() % 60 ) + fig1.subplots_adjust(left=0.1, bottom=0.15, right=0.90, top=0.85, wspace=0.3) + + datestring_stamp = time_i.strftime("%Y-%m-%d %H:%M:%S") + if time_i in track_cell["time"].values: + time_cell_i = track_cell[track_cell["time"].values == time_i]["time_cell"] + celltime_stamp = "%02d:%02d:%02d" % ( + time_cell_i.dt.total_seconds() // 3600, + (time_cell_i.dt.total_seconds() % 3600) // 60, + time_cell_i.dt.total_seconds() % 60, + ) else: - celltime_stamp=' - ' - title=celltime_stamp + ' , ' + datestring_stamp - datestring_file = time_i.strftime('%Y-%m-%d_%H%M%S') + celltime_stamp = " - " + title = celltime_stamp + " , " + datestring_stamp + datestring_file = time_i.strftime("%Y-%m-%d_%H%M%S") # plot evolving timeseries of variable to second axis: - ax1[0]=plot_mask_cell_individual_static(cell_i=cell, - track=track_i, cog=cog_i,features=features_i, - mask_total=mask_total_i, - field_contour=field_contour_i, field_filled=field_filled_i, - xlim=[x_min/1000,x_max/1000],ylim=[y_min/1000,y_max/1000], - axes=ax1[0],title=title,**kwargs) - - track_variable_past=track_variable_cell[(track_variable_cell['time']>=time_min) & (track_variable_cell['time']<=time_i)] - track_variable_current=track_variable_cell[track_variable_cell['time']==time_i] + ax1[0] = plot_mask_cell_individual_static( + cell_i=cell, + track=track_i, + cog=cog_i, + features=features_i, + mask_total=mask_total_i, + field_contour=field_contour_i, + field_filled=field_filled_i, + xlim=[x_min / 1000, x_max / 1000], + ylim=[y_min / 1000, y_max / 1000], + axes=ax1[0], + title=title, + **kwargs + ) + + track_variable_past = track_variable_cell[ + (track_variable_cell["time"] >= time_min) + & (track_variable_cell["time"] <= time_i) + ] + track_variable_current = track_variable_cell[ + track_variable_cell["time"] == time_i + ] if variable_color is None: - variable_color='navy' + variable_color = "navy" if type(variable) is str: -# logging.debug('variable: '+str(variable)) + # logging.debug('variable: '+str(variable)) if type(variable_color) is str: - variable_color={variable:variable_color} - variable=[variable] - - for i_variable,variable_i in enumerate(variable): - color=variable_color[variable_i] - ax1[1].plot(track_variable_past['time_cell'].dt.total_seconds()/ 60.,track_variable_past[variable_i].values,color=color,linestyle='-',label=variable_label[i_variable]) - ax1[1].plot(track_variable_current['time_cell'].dt.total_seconds()/ 60.,track_variable_current[variable_i].values,color=color,marker='o',markersize=4,fillstyle='full') + variable_color = {variable: variable_color} + variable = [variable] + + for i_variable, variable_i in enumerate(variable): + color = variable_color[variable_i] + ax1[1].plot( + track_variable_past["time_cell"].dt.total_seconds() / 60.0, + track_variable_past[variable_i].values, + color=color, + linestyle="-", + label=variable_label[i_variable], + ) + ax1[1].plot( + track_variable_current["time_cell"].dt.total_seconds() / 60.0, + track_variable_current[variable_i].values, + color=color, + marker="o", + markersize=4, + fillstyle="full", + ) ax1[1].yaxis.tick_right() ax1[1].yaxis.set_label_position("right") - ax1[1].set_xlim([0,2*60]) - ax1[1].set_xticks(np.arange(0,120,15)) - ax1[1].set_ylim([0,max(10,1.25*track_variable_cell[variable].max().max())]) - ax1[1].set_xlabel('cell lifetime (min)') - if variable_ylabel==None: - variable_ylabel=variable + ax1[1].set_xlim([0, 2 * 60]) + ax1[1].set_xticks(np.arange(0, 120, 15)) + ax1[1].set_ylim([0, max(10, 1.25 * track_variable_cell[variable].max().max())]) + ax1[1].set_xlabel("cell lifetime (min)") + if variable_ylabel == None: + variable_ylabel = variable ax1[1].set_ylabel(variable_ylabel) ax1[1].set_title(title) # insert legend, if flag is True if variable_legend: - if (len(variable_label)<5): - ncol=1 + if len(variable_label) < 5: + ncol = 1 else: - ncol=2 - ax1[1].legend(loc='upper right', bbox_to_anchor=(1, 1),ncol=ncol,fontsize=8) + ncol = 2 + ax1[1].legend( + loc="upper right", bbox_to_anchor=(1, 1), ncol=ncol, fontsize=8 + ) out_dir = os.path.join(plotdir, name) os.makedirs(out_dir, exist_ok=True) - if 'png' in file_format: - savepath_png = os.path.join(out_dir, name + '_' + datestring_file + '.png') + if "png" in file_format: + savepath_png = os.path.join(out_dir, name + "_" + datestring_file + ".png") fig1.savefig(savepath_png, dpi=dpi) - logging.debug('Mask static plot saved to ' + savepath_png) - if 'pdf' in file_format: - savepath_pdf = os.path.join(out_dir, name + '_' + datestring_file + '.pdf') + logging.debug("Mask static plot saved to " + savepath_png) + if "pdf" in file_format: + savepath_pdf = os.path.join(out_dir, name + "_" + datestring_file + ".pdf") fig1.savefig(savepath_pdf, dpi=dpi) - logging.debug('Mask static plot saved to ' + savepath_pdf) + logging.debug("Mask static plot saved to " + savepath_pdf) plt.close() plt.clf() -def map_tracks(track,axis_extent=None,figsize=(10,10),axes=None): - for cell in track['cell'].dropna().unique(): - track_i=track[track['cell']==cell] - axes.plot(track_i['longitude'],track_i['latitude'],'-') + +def map_tracks(track, axis_extent=None, figsize=(10, 10), axes=None): + for cell in track["cell"].dropna().unique(): + track_i = track[track["cell"] == cell] + axes.plot(track_i["longitude"], track_i["latitude"], "-") if axis_extent: axes.set_extent(axis_extent) - axes=make_map(axes) + axes = make_map(axes) return axes + def make_map(axes): import matplotlib.ticker as mticker import cartopy.crs as ccrs from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER - gl = axes.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, - linewidth=2, color='gray', alpha=0.5, linestyle='-') - axes.coastlines('10m') + gl = axes.gridlines( + crs=ccrs.PlateCarree(), + draw_labels=True, + linewidth=2, + color="gray", + alpha=0.5, + linestyle="-", + ) + axes.coastlines("10m") gl.xlabels_top = False gl.ylabels_right = False - gl.xlocator = mticker.MaxNLocator(nbins=5,min_n_ticks=3,steps=None) - gl.ylocator = mticker.MaxNLocator(nbins=5,min_n_ticks=3,steps=None) + gl.xlocator = mticker.MaxNLocator(nbins=5, min_n_ticks=3, steps=None) + gl.ylocator = mticker.MaxNLocator(nbins=5, min_n_ticks=3, steps=None) gl.xformatter = LONGITUDE_FORMATTER gl.yformatter = LATITUDE_FORMATTER - #gl.xlabel_style = {'size': 15, 'color': 'gray'} - #gl.xlabel_style = {'color': 'red', 'weight': 'bold'} + # gl.xlabel_style = {'size': 15, 'color': 'gray'} + # gl.xlabel_style = {'color': 'red', 'weight': 'bold'} return axes -def plot_lifetime_histogram(track,axes=None,bin_edges=np.arange(0,200,20),density=False,**kwargs): - hist, bin_edges,bin_centers = lifetime_histogram(track,bin_edges=bin_edges,density=density) - plot_hist=axes.plot(bin_centers, hist,**kwargs) + +def plot_lifetime_histogram( + track, axes=None, bin_edges=np.arange(0, 200, 20), density=False, **kwargs +): + hist, bin_edges, bin_centers = lifetime_histogram( + track, bin_edges=bin_edges, density=density + ) + plot_hist = axes.plot(bin_centers, hist, **kwargs) return plot_hist -def plot_lifetime_histogram_bar(track,axes=None,bin_edges=np.arange(0,200,20),density=False,width_bar=1,shift=0.5,**kwargs): - hist, bin_edges, bin_centers = lifetime_histogram(track,bin_edges=bin_edges,density=density) - plot_hist=axes.bar(bin_centers+shift,hist,width=width_bar,**kwargs) + +def plot_lifetime_histogram_bar( + track, + axes=None, + bin_edges=np.arange(0, 200, 20), + density=False, + width_bar=1, + shift=0.5, + **kwargs +): + hist, bin_edges, bin_centers = lifetime_histogram( + track, bin_edges=bin_edges, density=density + ) + plot_hist = axes.bar(bin_centers + shift, hist, width=width_bar, **kwargs) return plot_hist -def plot_histogram_cellwise(track,bin_edges,variable,quantity,axes=None,density=False,**kwargs): - hist, bin_edges,bin_centers = histogram_cellwise(track,bin_edges=bin_edges,variable=variable,quantity=quantity,density=density) - plot_hist=axes.plot(bin_centers, hist,**kwargs) + +def plot_histogram_cellwise( + track, bin_edges, variable, quantity, axes=None, density=False, **kwargs +): + hist, bin_edges, bin_centers = histogram_cellwise( + track, + bin_edges=bin_edges, + variable=variable, + quantity=quantity, + density=density, + ) + plot_hist = axes.plot(bin_centers, hist, **kwargs) return plot_hist -def plot_histogram_featurewise(Track,bin_edges,variable,axes=None,density=False,**kwargs): - hist, bin_edges, bin_centers = histogram_featurewise(Track,bin_edges=bin_edges,variable=variable,density=density) - plot_hist=axes.plot(bin_centers, hist,**kwargs) + +def plot_histogram_featurewise( + Track, bin_edges, variable, axes=None, density=False, **kwargs +): + hist, bin_edges, bin_centers = histogram_featurewise( + Track, bin_edges=bin_edges, variable=variable, density=density + ) + plot_hist = axes.plot(bin_centers, hist, **kwargs) return plot_hist diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 8541c1d2..7bfddb3e 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -3,10 +3,11 @@ from numpy import transpose from . import utils as tb_utils - + + def transfm_pbc_point(in_dim, dim_min, dim_max): - '''Function to transform a PBC-feature point for contiguity - + """Function to transform a PBC-feature point for contiguity + Parameters ---------- in_dim : int @@ -15,21 +16,24 @@ def transfm_pbc_point(in_dim, dim_min, dim_max): Minimum point for the dimension dim_max : int Maximum point for the dimension (inclusive) - + Returns ------- int The transformed point - - ''' - if in_dim < ((dim_min+dim_max)/2): - return in_dim+dim_max+1 + + """ + if in_dim < ((dim_min + dim_max) / 2): + return in_dim + dim_max + 1 else: return in_dim -def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = None, PBC_flag = 'none'): - '''Adds markers for watershedding using the `features` dataframe - to the marker_arr. + +def add_markers( + features, marker_arr, seed_3D_flag, seed_3D_size=5, level=None, PBC_flag="none" +): + """Adds markers for watershedding using the `features` dataframe + to the marker_arr. Parameters ---------- @@ -38,19 +42,19 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = No marker_arr: 2D or 3D array-like Array to add the markers to. Assumes a (z, y, x) configuration. seed_3D_flag: str('column', 'box') - Seed 3D field at feature positions with either the full column + Seed 3D field at feature positions with either the full column or a box of user-set size seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) - This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an - integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the - seed area for each dimension separately. - Note: we recommend the use of odd numbers for this. If you give - an even number, your seed box will be biased and not centered - around the feature. + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + seed area for each dimension separately. + Note: we recommend the use of odd numbers for this. If you give + an even number, your seed box will be biased and not centered + around the feature. Note: if two seed boxes overlap, the feature that is seeded will be the closer feature. level: slice or None - If `seed_3D_flag` is 'column', the levels at which to seed the + If `seed_3D_flag` is 'column', the levels at which to seed the cells for the watershedding algorithm. If None, seeds all levels. PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} Sets whether to use periodic boundaries, and if so in which directions. @@ -58,21 +62,21 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = No 'hdim_1' means that we are periodic along hdim1 'hdim_2' means that we are periodic along hdim2 'both' means that we are periodic along both horizontal dimensions - + Returns ------- 2D or 3D array like (same type as `marker_arr`) The marker array - ''' + """ import numpy as np # What marker number is the background? Assumed 0. bg_marker = 0 if level is None: - level=slice(None) + level = slice(None) - if len(marker_arr.shape)==3: + if len(marker_arr.shape) == 3: is_3D = True z_len = marker_arr.shape[0] h1_len = marker_arr.shape[1] @@ -83,25 +87,25 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = No z_len = 0 h1_len = marker_arr.shape[0] h2_len = marker_arr.shape[1] - # transpose to 3D array to make things easier. + # transpose to 3D array to make things easier. marker_arr = marker_arr[np.newaxis, :, :] - if seed_3D_flag == 'column': + if seed_3D_flag == "column": for index, row in features.iterrows(): - marker_arr[level,int(row['hdim_1']), int(row['hdim_2'])]=row['feature'] + marker_arr[level, int(row["hdim_1"]), int(row["hdim_2"])] = row["feature"] - elif seed_3D_flag == 'box': + elif seed_3D_flag == "box": # Get the size of the seed box from the input parameter try: if is_3D: seed_z = seed_3D_size[0] - start_num = 1 + start_num = 1 else: start_num = 0 seed_h1 = seed_3D_size[start_num] seed_h2 = seed_3D_size[start_num + 1] except TypeError: - # Not iterable, assume int. + # Not iterable, assume int. seed_z = seed_3D_size seed_h1 = seed_3D_size seed_h2 = seed_3D_size @@ -109,83 +113,108 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = No for index, row in features.iterrows(): if is_3D: # If we have a 3D input and we need to do box seeding - # we need to have 3D features. - try: - row['vdim'] + # we need to have 3D features. + try: + row["vdim"] except KeyError: - raise ValueError("For Box seeding on 3D segmentation," - " you must have a 3D input source.") - + raise ValueError( + "For Box seeding on 3D segmentation," + " you must have a 3D input source." + ) + # Because we don't support PBCs on the vertical axis, - # this is simple- just go in the seed_z/2 points around the - # vdim of the feature, up to the limits of the array. + # this is simple- just go in the seed_z/2 points around the + # vdim of the feature, up to the limits of the array. if is_3D: - z_seed_start = int(np.max([0, np.ceil(row['vdim']-seed_z/2)])) - z_seed_end = int(np.min([z_len, np.ceil(row['vdim']+seed_z/2)])) - + z_seed_start = int(np.max([0, np.ceil(row["vdim"] - seed_z / 2)])) + z_seed_end = int(np.min([z_len, np.ceil(row["vdim"] + seed_z / 2)])) + # For the horizontal dimensions, it's more complicated if we have - # PBCs. - hdim_1_min = int(np.ceil(row['hdim_1'] - seed_h1/2)) - hdim_1_max = int(np.ceil(row['hdim_1'] + seed_h1/2)) - hdim_2_min = int(np.ceil(row['hdim_2'] - seed_h2/2)) - hdim_2_max = int(np.ceil(row['hdim_2'] + seed_h2/2)) - + # PBCs. + hdim_1_min = int(np.ceil(row["hdim_1"] - seed_h1 / 2)) + hdim_1_max = int(np.ceil(row["hdim_1"] + seed_h1 / 2)) + hdim_2_min = int(np.ceil(row["hdim_2"] - seed_h2 / 2)) + hdim_2_max = int(np.ceil(row["hdim_2"] + seed_h2 / 2)) + all_seed_boxes = tb_utils.get_pbc_coordinates( - h1_min = 0, h1_max = h1_len, - h2_min = 0, h2_max = h2_len, - h1_start_coord = hdim_1_min, h1_end_coord = hdim_1_max, - h2_start_coord = hdim_2_min, h2_end_coord = hdim_2_max, - PBC_flag= PBC_flag) + h1_min=0, + h1_max=h1_len, + h2_min=0, + h2_max=h2_len, + h1_start_coord=hdim_1_min, + h1_end_coord=hdim_1_max, + h2_start_coord=hdim_2_min, + h2_end_coord=hdim_2_max, + PBC_flag=PBC_flag, + ) for seed_box in all_seed_boxes: - # Need to see if there are any other points seeded + # Need to see if there are any other points seeded # in this seed box first. - curr_box_markers = (marker_arr[z_seed_start:z_seed_end, - seed_box[0]:seed_box[1], - seed_box[2]:seed_box[3]]) + curr_box_markers = marker_arr[ + z_seed_start:z_seed_end, + seed_box[0] : seed_box[1], + seed_box[2] : seed_box[3], + ] all_feats_in_box = np.unique(curr_box_markers) - if np.any(curr_box_markers!=bg_marker): + if np.any(curr_box_markers != bg_marker): # If we have non-background points already seeded, - # we need to find the best way to seed them. - # Currently seeding with the closest point. + # we need to find the best way to seed them. + # Currently seeding with the closest point. # Loop through all points in the box - with np.nditer(curr_box_markers, flags=['multi_index']) as it: - for curr_box_pt in it: - # Get its global index so that we can calculate + with np.nditer(curr_box_markers, flags=["multi_index"]) as it: + for curr_box_pt in it: + # Get its global index so that we can calculate # distance and set the array. local_index = it.multi_index - global_index = (local_index[0]+z_seed_start, - local_index[1] + seed_box[0], - local_index[2] + seed_box[2]) + global_index = ( + local_index[0] + z_seed_start, + local_index[1] + seed_box[0], + local_index[2] + seed_box[2], + ) # If it's a background marker, we can just set it - # with the feature we're working on. + # with the feature we're working on. if curr_box_pt == bg_marker: - marker_arr[global_index] = row['feature'] + marker_arr[global_index] = row["feature"] continue # it has another feature in it. Calculate the distance # from its current set feature and the new feature. if is_3D: - curr_coord = (row['vdim'], row['hdim_1'], row['hdim_2']) + curr_coord = (row["vdim"], row["hdim_1"], row["hdim_2"]) else: - curr_coord = (0, row['hdim_1'], row['hdim_2']) + curr_coord = (0, row["hdim_1"], row["hdim_2"]) dist_from_curr_pt = tb_utils.calc_distance_coords_pbc( - np.array(global_index), np.array(curr_coord), - min_h1 = 0, max_h1 = h1_len, - min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag + np.array(global_index), + np.array(curr_coord), + min_h1=0, + max_h1=h1_len, + min_h2=0, + max_h2=h2_len, + PBC_flag=PBC_flag, ) - # This is technically an O(N^2) operation, but + # This is technically an O(N^2) operation, but # hopefully performance isn't too bad as this should # be rare. - orig_row = features[features['feature'] == curr_box_pt].iloc[0] + orig_row = features[ + features["feature"] == curr_box_pt + ].iloc[0] if is_3D: - orig_coord = (orig_row['vdim'], orig_row['hdim_1'], orig_row['hdim_2']) + orig_coord = ( + orig_row["vdim"], + orig_row["hdim_1"], + orig_row["hdim_2"], + ) else: - orig_coord = (0, orig_row['hdim_1'], orig_row['hdim_2']) + orig_coord = (0, orig_row["hdim_1"], orig_row["hdim_2"]) dist_from_orig_pt = tb_utils.calc_distance_coords_pbc( - np.array(global_index), np.array(orig_coord), - min_h1 = 0, max_h1 = h1_len, - min_h2 = 0, max_h2 = h2_len, PBC_flag= PBC_flag + np.array(global_index), + np.array(orig_coord), + min_h1=0, + max_h1=h1_len, + min_h2=0, + max_h2=h2_len, + PBC_flag=PBC_flag, ) # The current point center is further away # than the original point center, so do nothing @@ -193,13 +222,14 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = No continue else: # the current point center is closer. - marker_arr[global_index] = row['feature'] - # completely unseeded region so far. + marker_arr[global_index] = row["feature"] + # completely unseeded region so far. else: - marker_arr[z_seed_start:z_seed_end, - seed_box[0]:seed_box[1], - seed_box[2]:seed_box[3]]=row['feature'] - + marker_arr[ + z_seed_start:z_seed_end, + seed_box[0] : seed_box[1], + seed_box[2] : seed_box[3], + ] = row["feature"] # If we aren't 3D, transpose back. if not is_3D: @@ -208,23 +238,81 @@ def add_markers(features, marker_arr, seed_3D_flag, seed_3D_size = 5, level = No return marker_arr -def segmentation_3D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) - -def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,PBC_flag='none',seed_3D_flag='column'): - return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) - - -def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column', seed_3D_size=5): +def segmentation_3D( + features, + field, + dxy, + threshold=3e-3, + target="maximum", + level=None, + method="watershed", + max_distance=None, + PBC_flag="none", + seed_3D_flag="column", +): + return segmentation( + features, + field, + dxy, + threshold=threshold, + target=target, + level=level, + method=method, + max_distance=max_distance, + PBC_flag=PBC_flag, + seed_3D_flag=seed_3D_flag, + ) + + +def segmentation_2D( + features, + field, + dxy, + threshold=3e-3, + target="maximum", + level=None, + method="watershed", + max_distance=None, + PBC_flag="none", + seed_3D_flag="column", +): + return segmentation( + features, + field, + dxy, + threshold=threshold, + target=target, + level=level, + method=method, + max_distance=max_distance, + PBC_flag=PBC_flag, + seed_3D_flag=seed_3D_flag, + ) + + +def segmentation_timestep( + field_in, + features_in, + dxy, + threshold=3e-3, + target="maximum", + level=None, + method="watershed", + max_distance=None, + vertical_coord="auto", + PBC_flag="none", + seed_3D_flag="column", + seed_3D_size=5, +): """Function performing watershedding for an individual timestep of the data - + Parameters ---------- - features: pandas.DataFrame + features: pandas.DataFrame features for one specific point in time - field: iris.cube.Cube + field: iris.cube.Cube input field to perform the watershedding on (2D or 3D for one specific point in time) - threshold: float + threshold: float threshold for the watershedding field to be used for the mas target: string switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) @@ -244,12 +332,12 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu Seed 3D field at feature positions with either the full column (default) or a box of user-set size seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) - This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an - integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the seed area for each dimension separately. Note: we recommend the use - of odd numbers for this. If you give an even number, your seed box will be - biased and not centered around the feature. - + of odd numbers for this. If you give an even number, your seed box will be + biased and not centered around the feature. + Returns ------- iris.cube.Cube @@ -265,21 +353,23 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu import iris # How many dimensions are we using? - if field_in.ndim==2: + if field_in.ndim == 2: hdim_1_axis = 0 hdim_2_axis = 1 is_3D_seg = False elif field_in.ndim == 3: is_3D_seg = True - vertical_axis = tb_utils.find_vertical_axis_from_coord(field_in, vertical_coord=vertical_coord) - ndim_vertical=field_in.coord_dims(vertical_axis) - if len(ndim_vertical)>1: - raise ValueError('please specify 1 dimensional vertical coordinate') + vertical_axis = tb_utils.find_vertical_axis_from_coord( + field_in, vertical_coord=vertical_coord + ) + ndim_vertical = field_in.coord_dims(vertical_axis) + if len(ndim_vertical) > 1: + raise ValueError("please specify 1 dimensional vertical coordinate") vertical_coord_axis = ndim_vertical[0] - # Once we know the vertical coordinate, we can resolve the + # Once we know the vertical coordinate, we can resolve the # horizontal coordinates # To make things easier, we will transpose the axes - # so that they are consistent. + # so that they are consistent. if vertical_coord_axis == 0: hdim_1_axis = 1 hdim_2_axis = 2 @@ -292,22 +382,23 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu else: raise ValueError("Segmentation routine can't find vertical coordinate.") else: - raise ValueError('Segmentation routine only possible with 2 or 3 spatial dimensions') + raise ValueError( + "Segmentation routine only possible with 2 or 3 spatial dimensions" + ) - - # copy feature dataframe for output - features_out=deepcopy(features_in) - # Create cube of the same dimensions and coordinates as input data to store mask: - segmentation_out=1*field_in - segmentation_out.rename('segmentation_mask') - segmentation_out.units=1 + # copy feature dataframe for output + features_out = deepcopy(features_in) + # Create cube of the same dimensions and coordinates as input data to store mask: + segmentation_out = 1 * field_in + segmentation_out.rename("segmentation_mask") + segmentation_out.units = 1 # Get raw array from input data: - data=field_in.core_data() - is_3D_seg = len(data.shape)==3 + data = field_in.core_data() + is_3D_seg = len(data.shape) == 3 # To make things easier, we will transpose the axes # so that they are consistent: z, hdim_1, hdim_2 - # We only need to do this for 3D. + # We only need to do this for 3D. transposed_data = False if is_3D_seg: if vertical_coord_axis == 1: @@ -317,65 +408,69 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu data = np.transpose(data, axes=(2, 0, 1)) transposed_data = True - - #Set level at which to create "Seed" for each feature in the case of 3D watershedding: + # Set level at which to create "Seed" for each feature in the case of 3D watershedding: # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) - if level==None: - level=slice(None) + if level == None: + level = slice(None) # transform max_distance in metres to distance in pixels: if max_distance is not None: - max_distance_pixel=np.ceil(max_distance/dxy) + max_distance_pixel = np.ceil(max_distance / dxy) # mask data outside region above/below threshold and invert data if tracking maxima: - if target == 'maximum': - unmasked=data>threshold - data_segmentation=-1*data - elif target == 'minimum': - unmasked=data threshold + data_segmentation = -1 * data + elif target == "minimum": + unmasked = data < threshold + data_segmentation = data else: - raise ValueError('unknown type of target') + raise ValueError("unknown type of target") # set markers at the positions of the features: markers = np.zeros(unmasked.shape).astype(np.int32) - markers = add_markers(features_in, markers, seed_3D_flag, seed_3D_size, level, PBC_flag) + markers = add_markers( + features_in, markers, seed_3D_flag, seed_3D_size, level, PBC_flag + ) # set markers in cells not fulfilling threshold condition to zero: - markers[~unmasked]=0 - #marker_vals = np.unique(markers) - + markers[~unmasked] = 0 + # marker_vals = np.unique(markers) + # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm - data_segmentation=np.array(data_segmentation) - unmasked=np.array(unmasked) + data_segmentation = np.array(data_segmentation) + unmasked = np.array(unmasked) # perform segmentation: - if method=='watershed': - segmentation_mask = watershed(np.array(data_segmentation),markers.astype(np.int32), mask=unmasked) + if method == "watershed": + segmentation_mask = watershed( + np.array(data_segmentation), markers.astype(np.int32), mask=unmasked + ) - else: - raise ValueError('unknown method, must be watershed') + else: + raise ValueError("unknown method, must be watershed") # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: - D=distance_transform_edt((markers==0).astype(int)) - segmentation_mask[np.bitwise_and(segmentation_mask>0, D>max_distance_pixel)]=0 - - #mask all segmentation_mask points below threshold as -1 - #to differentiate from those unmasked points NOT filled by watershedding + D = distance_transform_edt((markers == 0).astype(int)) + segmentation_mask[ + np.bitwise_and(segmentation_mask > 0, D > max_distance_pixel) + ] = 0 + + # mask all segmentation_mask points below threshold as -1 + # to differentiate from those unmasked points NOT filled by watershedding # TODO: allow user to specify segmentation_mask[~unmasked] = -1 - - #saves/prints below for testing + + # saves/prints below for testing seg_m_data = segmentation_mask[:] - - + hdim1_min = 0 hdim1_max = segmentation_mask.shape[hdim_1_axis] - 1 hdim2_min = 0 hdim2_max = segmentation_mask.shape[hdim_2_axis] - 1 - + # all options that involve dealing with periodic boundaries - pbc_options = ['hdim_1', 'hdim_2', 'both'] + pbc_options = ["hdim_1", "hdim_2", "both"] # Only run this if we need to deal with PBCs if PBC_flag in pbc_options: @@ -391,327 +486,395 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu hdim_1_axis = 1 hdim_2_axis = 2 - seg_mask_unseeded = np.zeros(segmentation_mask.shape) - # Return all indices where segmentation field == 0 # meaning unfilled but above threshold # TODO: is there a way to do this without np.where? - vdim_unf,hdim1_unf,hdim2_unf = np.where(segmentation_mask==0) - seg_mask_unseeded[vdim_unf,hdim1_unf,hdim2_unf]=1 - + vdim_unf, hdim1_unf, hdim2_unf = np.where(segmentation_mask == 0) + seg_mask_unseeded[vdim_unf, hdim1_unf, hdim2_unf] = 1 + # create labeled field of unfilled, unseeded features - labels_unseeded,label_num = skimage.measure.label(seg_mask_unseeded, return_num=True) - + labels_unseeded, label_num = skimage.measure.label( + seg_mask_unseeded, return_num=True + ) + markers_2 = np.zeros(data_segmentation.shape).astype(np.int32) - + # PBC marker seeding approach # loop thru LB points, then check if fillable region (labels_unseeded > 0) and seed - # then check if point on other side of boundary is > 0 in segmentation_mask and + # then check if point on other side of boundary is > 0 in segmentation_mask and # adjust where needed - ''' + """ "First pass" at seeding features across the boundaries. This first pass will bring in eligible (meaning values that are higher than threshold) but not previously watershedded points across the boundary by seeding them with the appropriate feature across the boundary. Later, we will run the second pass or "buddy box" approach that handles cases where points across the boundary have been watershedded already. - ''' - - # TODO: clean up code. - if PBC_flag == 'hdim_1' or PBC_flag == 'both': - for vdim_ind in range(0,segmentation_mask.shape[0]): - for hdim1_ind in [hdim1_min,hdim1_max]: - for hdim2_ind in range(hdim2_min,hdim2_max): - - - if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + """ + + # TODO: clean up code. + if PBC_flag == "hdim_1" or PBC_flag == "both": + for vdim_ind in range(0, segmentation_mask.shape[0]): + for hdim1_ind in [hdim1_min, hdim1_max]: + for hdim2_ind in range(hdim2_min, hdim2_max): + + if labels_unseeded[vdim_ind, hdim1_ind, hdim2_ind] == 0: continue else: if hdim1_ind == 0: - if (segmentation_mask[vdim_ind,hdim1_max,hdim2_ind]<=0): + if ( + segmentation_mask[vdim_ind, hdim1_max, hdim2_ind] + <= 0 + ): continue else: - markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_max,hdim2_ind] + markers_2[ + vdim_ind, hdim1_ind, hdim2_ind + ] = segmentation_mask[ + vdim_ind, hdim1_max, hdim2_ind + ] elif hdim1_ind == hdim1_max: - if (segmentation_mask[vdim_ind,hdim1_min,hdim2_ind]<=0): + if ( + segmentation_mask[vdim_ind, hdim1_min, hdim2_ind] + <= 0 + ): continue else: - markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_min,hdim2_ind] - if PBC_flag == 'hdim_2' or PBC_flag == 'both': + markers_2[ + vdim_ind, hdim1_ind, hdim2_ind + ] = segmentation_mask[ + vdim_ind, hdim1_min, hdim2_ind + ] + if PBC_flag == "hdim_2" or PBC_flag == "both": # TODO: This seems quite slow, is there scope for further speedup? - for vdim_ind in range(0,segmentation_mask.shape[0]): - for hdim1_ind in range(hdim1_min,hdim1_max): - for hdim2_ind in [hdim2_min,hdim2_max]: - - if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + for vdim_ind in range(0, segmentation_mask.shape[0]): + for hdim1_ind in range(hdim1_min, hdim1_max): + for hdim2_ind in [hdim2_min, hdim2_max]: + + if labels_unseeded[vdim_ind, hdim1_ind, hdim2_ind] == 0: continue else: if hdim2_ind == hdim2_min: - if (segmentation_mask[vdim_ind,hdim1_ind,hdim2_max]<=0): + if ( + segmentation_mask[vdim_ind, hdim1_ind, hdim2_max] + <= 0 + ): continue else: - markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_max] + markers_2[ + vdim_ind, hdim1_ind, hdim2_ind + ] = segmentation_mask[ + vdim_ind, hdim1_ind, hdim2_max + ] elif hdim2_ind == hdim2_max: - if (segmentation_mask[vdim_ind,hdim1_ind,hdim2_min]<=0): + if ( + segmentation_mask[vdim_ind, hdim1_ind, hdim2_min] + <= 0 + ): continue else: - markers_2[vdim_ind,hdim1_ind,hdim2_ind] = segmentation_mask[vdim_ind,hdim1_ind,hdim2_min] - + markers_2[ + vdim_ind, hdim1_ind, hdim2_ind + ] = segmentation_mask[ + vdim_ind, hdim1_ind, hdim2_min + ] + # Deal with the opposite corner only - if PBC_flag == 'both': + if PBC_flag == "both": # TODO: This seems quite slow, is there scope for further speedup? - for vdim_ind in range(0,segmentation_mask.shape[0]): + for vdim_ind in range(0, segmentation_mask.shape[0]): for hdim1_ind in [hdim1_min, hdim1_max]: - for hdim2_ind in [hdim2_min,hdim2_max]: + for hdim2_ind in [hdim2_min, hdim2_max]: # If this point is unseeded and unlabeled - if(labels_unseeded[vdim_ind,hdim1_ind,hdim2_ind] == 0): + if labels_unseeded[vdim_ind, hdim1_ind, hdim2_ind] == 0: continue - + # Find the opposite point in hdim1 space - hdim1_opposite_corner = (hdim1_min if hdim1_ind == hdim1_max else hdim1_max) - hdim2_opposite_corner = (hdim2_min if hdim2_ind == hdim2_max else hdim2_max) - if segmentation_mask[vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner] <= 0: + hdim1_opposite_corner = ( + hdim1_min if hdim1_ind == hdim1_max else hdim1_max + ) + hdim2_opposite_corner = ( + hdim2_min if hdim2_ind == hdim2_max else hdim2_max + ) + if ( + segmentation_mask[ + vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner + ] + <= 0 + ): continue - markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[vdim_ind,hdim1_opposite_corner,hdim2_opposite_corner] + markers_2[vdim_ind, hdim1_ind, hdim2_ind] = segmentation_mask[ + vdim_ind, hdim1_opposite_corner, hdim2_opposite_corner + ] - markers_2[~unmasked]=0 - - if method=='watershed': - segmentation_mask_2 = watershed(data_segmentation,markers_2.astype(np.int32), mask=unmasked) - else: - raise ValueError('unknown method, must be watershed') + markers_2[~unmasked] = 0 + + if method == "watershed": + segmentation_mask_2 = watershed( + data_segmentation, markers_2.astype(np.int32), mask=unmasked + ) + else: + raise ValueError("unknown method, must be watershed") # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: - D=distance_transform_edt((markers==0).astype(int)) - segmentation_mask_2[np.bitwise_and(segmentation_mask_2>0, D>max_distance_pixel)]=0 - + D = distance_transform_edt((markers == 0).astype(int)) + segmentation_mask_2[ + np.bitwise_and(segmentation_mask_2 > 0, D > max_distance_pixel) + ] = 0 + # Sum up original mask and secondary PBC-mask for full PBC segmentation - segmentation_mask_3=segmentation_mask + segmentation_mask_2 - + segmentation_mask_3 = segmentation_mask + segmentation_mask_2 + # Secondary seeding complete, now blending periodic boundaries # keep segmentation mask fields for now so we can save these all later # for demos of changes, otherwise, could add deletion for memory efficiency, e.g. - - #del segmentation_mask - #del segmentation_mask_2 - #gc.collect() - - #update mask coord regions - - ''' + + # del segmentation_mask + # del segmentation_mask_2 + # gc.collect() + + # update mask coord regions + + """ Now, start the second round of watershedding- the "buddy box" approach. 'buddies' array contains features of interest and any neighbors that are across the boundary or otherwise have lateral and/or diagonal physical contact with that label - ''' + """ reg_props_dict = tb_utils.get_label_props_in_dict(segmentation_mask_3) - + if len(reg_props_dict) != 0: - curr_reg_inds, z_reg_inds, y_reg_inds, x_reg_inds= tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) + ( + curr_reg_inds, + z_reg_inds, + y_reg_inds, + x_reg_inds, + ) = tb_utils.get_indices_of_labels_from_reg_prop_dict(reg_props_dict) wall_labels = np.array([]) - w_wall = np.unique(segmentation_mask_3[:,:,0]) - wall_labels = np.append(wall_labels,w_wall) + w_wall = np.unique(segmentation_mask_3[:, :, 0]) + wall_labels = np.append(wall_labels, w_wall) - s_wall = np.unique(segmentation_mask_3[:,0,:]) - wall_labels = np.append(wall_labels,s_wall) + s_wall = np.unique(segmentation_mask_3[:, 0, :]) + wall_labels = np.append(wall_labels, s_wall) wall_labels = np.unique(wall_labels) wall_labels = wall_labels[(wall_labels) > 0].astype(int) - - # Loop through all segmentation mask labels on the wall + + # Loop through all segmentation mask labels on the wall for cur_idx in wall_labels: - + vdim_indices = z_reg_inds[cur_idx] hdim1_indices = y_reg_inds[cur_idx] hdim2_indices = x_reg_inds[cur_idx] - - #start buddies array with feature of interest - buddies = np.array([cur_idx],dtype=int) + + # start buddies array with feature of interest + buddies = np.array([cur_idx], dtype=int) # Loop through all points in the segmentation mask that we're intertested in - for label_z, label_y, label_x in zip(vdim_indices, hdim1_indices, hdim2_indices): - - # check if this is the special case of being a corner point. + for label_z, label_y, label_x in zip( + vdim_indices, hdim1_indices, hdim2_indices + ): + + # check if this is the special case of being a corner point. # if it's doubly periodic AND on both x and y boundaries, it's a corner point - # and we have to look at the other corner. - # here, we will only look at the corner point and let the below deal with x/y only. - if PBC_flag == 'both' and (np.any(label_y == [hdim1_min,hdim1_max]) and np.any(label_x == [hdim2_min,hdim2_max])): - - #adjust x and y points to the other side + # and we have to look at the other corner. + # here, we will only look at the corner point and let the below deal with x/y only. + if PBC_flag == "both" and ( + np.any(label_y == [hdim1_min, hdim1_max]) + and np.any(label_x == [hdim2_min, hdim2_max]) + ): + + # adjust x and y points to the other side y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) - label_on_corner = segmentation_mask_3[label_z,y_val_alt,x_val_alt] + label_on_corner = segmentation_mask_3[label_z, y_val_alt, x_val_alt] + + if label_on_corner > 0: + # add opposite-corner buddy if it exists + buddies = np.append(buddies, label_on_corner) - if((label_on_corner > 0)): - #add opposite-corner buddy if it exists - buddies = np.append(buddies,label_on_corner) - - # on the hdim1 boundary and periodic on hdim1 - if (PBC_flag == 'hdim_1' or PBC_flag == 'both') and np.any(label_y == [hdim1_min,hdim1_max]): + if (PBC_flag == "hdim_1" or PBC_flag == "both") and np.any( + label_y == [hdim1_min, hdim1_max] + ): y_val_alt = tb_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) - #get the label value on the opposite side - label_alt = segmentation_mask_3[label_z,y_val_alt,label_x] - - #if it's labeled and not already been dealt with - if((label_alt > 0)): - #add above/below buddy if it exists - buddies = np.append(buddies,label_alt) - - if (PBC_flag == 'hdim_2' or PBC_flag == 'both') and np.any(label_x == [hdim2_min,hdim2_max]): + # get the label value on the opposite side + label_alt = segmentation_mask_3[label_z, y_val_alt, label_x] + + # if it's labeled and not already been dealt with + if label_alt > 0: + # add above/below buddy if it exists + buddies = np.append(buddies, label_alt) + + if (PBC_flag == "hdim_2" or PBC_flag == "both") and np.any( + label_x == [hdim2_min, hdim2_max] + ): x_val_alt = tb_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) - #get the seg value on the opposite side - label_alt = segmentation_mask_3[label_z,label_y,x_val_alt] - - #if it's labeled and not already been dealt with - if((label_alt > 0)): - #add left/right buddy if it exists - buddies = np.append(buddies,label_alt) - - + # get the seg value on the opposite side + label_alt = segmentation_mask_3[label_z, label_y, x_val_alt] + + # if it's labeled and not already been dealt with + if label_alt > 0: + # add left/right buddy if it exists + buddies = np.append(buddies, label_alt) + buddies = np.unique(buddies) - - if np.all(buddies==cur_idx): + + if np.all(buddies == cur_idx): continue else: - inter_buddies,feat_inds,buddy_inds=np.intersect1d(features_in.feature.values[:],buddies,return_indices=True) + inter_buddies, feat_inds, buddy_inds = np.intersect1d( + features_in.feature.values[:], buddies, return_indices=True + ) # Get features that are needed for the buddy box buddy_features = deepcopy(features_in.iloc[feat_inds]) - - #create arrays to contain points of all buddies - #and their transpositions/transformations - #for use in Buddy Box space - #z,y,x points in the grid domain with no transformations - #NOTE: when I think about it, not sure if these are really needed + # create arrays to contain points of all buddies + # and their transpositions/transformations + # for use in Buddy Box space + + # z,y,x points in the grid domain with no transformations + # NOTE: when I think about it, not sure if these are really needed # as we use the y_a1/x_a1 points for the data transposition # to the buddy box rather than these and their z2/y2/x2 counterparts - buddy_z = np.array([],dtype=int) - buddy_y = np.array([],dtype=int) - buddy_x = np.array([],dtype=int) + buddy_z = np.array([], dtype=int) + buddy_y = np.array([], dtype=int) + buddy_x = np.array([], dtype=int) # z,y,x points from the grid domain WHICH MAY OR MAY NOT BE TRANSFORMED # so as to be continuous/contiguous across a grid boundary for that dimension - #(e.g., instead of [1496,1497,0,1,2,3] it would be [1496,1497,1498,1499,1500,1501]) - buddy_z2 = np.array([],dtype=int) - buddy_y2 = np.array([],dtype=int) - buddy_x2 = np.array([],dtype=int) + # (e.g., instead of [1496,1497,0,1,2,3] it would be [1496,1497,1498,1499,1500,1501]) + buddy_z2 = np.array([], dtype=int) + buddy_y2 = np.array([], dtype=int) + buddy_x2 = np.array([], dtype=int) # These are just for feature positions and are in z2/y2/x2 space # (may or may not be within real grid domain) # so that when the buddy box is constructed, seeding is done properly # in the buddy box space - #NOTE: We may not need this, as we already do this editing the buddy_features df + # NOTE: We may not need this, as we already do this editing the buddy_features df # and an iterrows call through this is what's used to actually seed the buddy box - buddy_zf = np.array([],dtype=int) - buddy_yf = np.array([],dtype=int) - buddy_xf = np.array([],dtype=int) - + buddy_zf = np.array([], dtype=int) + buddy_yf = np.array([], dtype=int) + buddy_xf = np.array([], dtype=int) + buddy_looper = 0 - - #loop thru buddies + + # loop thru buddies for buddy in buddies: - - #isolate feature from set of buddies - buddy_feat = features_in[features_in['feature'] == buddy] - #transform buddy feature position if needed for positioning in z2/y2/x2 space - #MAY be redundant with what is done just below here + # isolate feature from set of buddies + buddy_feat = features_in[features_in["feature"] == buddy] + + # transform buddy feature position if needed for positioning in z2/y2/x2 space + # MAY be redundant with what is done just below here yf2 = transfm_pbc_point(int(buddy_feat.hdim_1), hdim1_min, hdim1_max) xf2 = transfm_pbc_point(int(buddy_feat.hdim_2), hdim2_min, hdim2_max) - #edit value in buddy_features dataframe - buddy_features.hdim_1.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_1), hdim1_min, hdim1_max) - buddy_features.hdim_2.values[buddy_looper] = transfm_pbc_point(float(buddy_feat.hdim_2), hdim2_min, hdim2_max) - - #again, this may be redundant as I don't think we use buddy_zf/yf/xf after this - #in favor of iterrows thru the updated buddy_features - buddy_zf = np.append(buddy_zf,int(buddy_feat.vdim)) - buddy_yf = np.append(buddy_yf,yf2) - buddy_xf = np.append(buddy_xf,xf2) - - buddy_looper = buddy_looper+1 + # edit value in buddy_features dataframe + buddy_features.hdim_1.values[buddy_looper] = transfm_pbc_point( + float(buddy_feat.hdim_1), hdim1_min, hdim1_max + ) + buddy_features.hdim_2.values[buddy_looper] = transfm_pbc_point( + float(buddy_feat.hdim_2), hdim2_min, hdim2_max + ) + + # again, this may be redundant as I don't think we use buddy_zf/yf/xf after this + # in favor of iterrows thru the updated buddy_features + buddy_zf = np.append(buddy_zf, int(buddy_feat.vdim)) + buddy_yf = np.append(buddy_yf, yf2) + buddy_xf = np.append(buddy_xf, xf2) + + buddy_looper = buddy_looper + 1 # Create 1:1 map through actual domain points and continuous/contiguous points # used to identify buddy box dimension lengths for its construction - for z,y,x in zip(z_reg_inds[buddy],y_reg_inds[buddy],x_reg_inds[buddy]): - - buddy_z = np.append(buddy_z,z) - buddy_y = np.append(buddy_y,y) - buddy_x = np.append(buddy_x,x) - + for z, y, x in zip( + z_reg_inds[buddy], y_reg_inds[buddy], x_reg_inds[buddy] + ): + + buddy_z = np.append(buddy_z, z) + buddy_y = np.append(buddy_y, y) + buddy_x = np.append(buddy_x, x) + y2 = transfm_pbc_point(y, hdim1_min, hdim1_max) x2 = transfm_pbc_point(x, hdim2_min, hdim2_max) - - buddy_z2 = np.append(buddy_z2,z) - buddy_y2 = np.append(buddy_y2,y2) - buddy_x2 = np.append(buddy_x2,x2) - + + buddy_z2 = np.append(buddy_z2, z) + buddy_y2 = np.append(buddy_y2, y2) + buddy_x2 = np.append(buddy_x2, x2) + # Buddy Box! # Indentify mins and maxes of Buddy Box continuous points range # so that box of correct size can be constructred bbox_zstart = int(np.min(buddy_z2)) bbox_ystart = int(np.min(buddy_y2)) bbox_xstart = int(np.min(buddy_x2)) - bbox_zend = int(np.max(buddy_z2)+1) - bbox_yend = int(np.max(buddy_y2)+1) - bbox_xend = int(np.max(buddy_x2)+1) - + bbox_zend = int(np.max(buddy_z2) + 1) + bbox_yend = int(np.max(buddy_y2) + 1) + bbox_xend = int(np.max(buddy_x2) + 1) + bbox_zsize = bbox_zend - bbox_zstart bbox_ysize = bbox_yend - bbox_ystart bbox_xsize = bbox_xend - bbox_xstart - - + # Creation of actual Buddy Box space for transposition # of data in domain and re-seeding with Buddy feature markers buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) ind_ctr = 0 - - #need to loop thru ALL z,y,x inds in buddy box - #not just the ones that have nonzero seg mask values + + # need to loop thru ALL z,y,x inds in buddy box + # not just the ones that have nonzero seg mask values # "_a1" points are re-transformations from the continuous buddy box points # back to original grid/domain space to ensure that the correct data are # copied to the proper Buddy Box locations - for z in range(bbox_zstart,bbox_zend): - for y in range(bbox_ystart,bbox_yend): - for x in range(bbox_xstart,bbox_xend): + for z in range(bbox_zstart, bbox_zend): + for y in range(bbox_ystart, bbox_yend): + for x in range(bbox_xstart, bbox_xend): z_a1 = z if y > hdim1_max: y_a1 = y - (hdim1_max + 1) else: y_a1 = y - + if x > hdim2_max: x_a1 = x - (hdim2_max + 1) else: x_a1 = x - buddy_rgn[z-bbox_zstart,y-bbox_ystart,x-bbox_xstart] = field_in.data[z_a1,y_a1,x_a1] - - - #construction of iris cube corresponding to buddy box and its data - #for marker seeding and watershedding of buddy box - - #print(rgn_cube) - #print(rgn_cube.vdim) - - #Update buddy_features feature positions to correspond to buddy box space - #rather than domain space or continuous/contiguous point space - for buddy_looper in range(0,len(buddy_features)): - buddy_features.vdim.values[buddy_looper] = buddy_features.vdim.values[buddy_looper] - bbox_zstart - buddy_features.hdim_1.values[buddy_looper] = buddy_features.hdim_1.values[buddy_looper] - bbox_ystart - buddy_features.hdim_2.values[buddy_looper] = buddy_features.hdim_2.values[buddy_looper] - bbox_xstart - + buddy_rgn[ + z - bbox_zstart, y - bbox_ystart, x - bbox_xstart + ] = field_in.data[z_a1, y_a1, x_a1] + + # construction of iris cube corresponding to buddy box and its data + # for marker seeding and watershedding of buddy box + + # print(rgn_cube) + # print(rgn_cube.vdim) + + # Update buddy_features feature positions to correspond to buddy box space + # rather than domain space or continuous/contiguous point space + for buddy_looper in range(0, len(buddy_features)): + buddy_features.vdim.values[buddy_looper] = ( + buddy_features.vdim.values[buddy_looper] - bbox_zstart + ) + buddy_features.hdim_1.values[buddy_looper] = ( + buddy_features.hdim_1.values[buddy_looper] - bbox_ystart + ) + buddy_features.hdim_2.values[buddy_looper] = ( + buddy_features.hdim_2.values[buddy_looper] - bbox_xstart + ) + # Create dask array from input data: - #data=rgn_cube.core_data() + # data=rgn_cube.core_data() buddy_data = buddy_rgn # All of the below is the same overarching segmentation procedure as in the original @@ -719,135 +882,169 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu # "#transform segmentation_mask_4 data back to original mask after PBC first-pass ("segmentation_mask_3")" # It's just performed on the buddy box and its data rather than our full domain - #Set level at which to create "Seed" for each feature in the case of 3D watershedding: + # Set level at which to create "Seed" for each feature in the case of 3D watershedding: # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) - if level==None: - level=slice(None) + if level == None: + level = slice(None) # transform max_distance in metres to distance in pixels: if max_distance is not None: - max_distance_pixel=np.ceil(max_distance/dxy) - #note - this doesn't consider vertical distance in pixels + max_distance_pixel = np.ceil(max_distance / dxy) + # note - this doesn't consider vertical distance in pixels # mask data outside region above/below threshold and invert data if tracking maxima: - if target == 'maximum': - unmasked_buddies=buddy_data>threshold - buddy_segmentation=-1*buddy_data - elif target == 'minimum': - unmasked_buddies=buddy_data threshold + buddy_segmentation = -1 * buddy_data + elif target == "minimum": + unmasked_buddies = buddy_data < threshold + buddy_segmentation = buddy_data else: - raise ValueError('unknown type of target') + raise ValueError("unknown type of target") # set markers at the positions of the features: buddy_markers = np.zeros(unmasked_buddies.shape).astype(np.int32) # Buddy boxes are always without PBCs - buddy_markers = add_markers(buddy_features, buddy_markers, seed_3D_flag, - seed_3D_size, level, PBC_flag='none') + buddy_markers = add_markers( + buddy_features, + buddy_markers, + seed_3D_flag, + seed_3D_size, + level, + PBC_flag="none", + ) # set markers in cells not fulfilling threshold condition to zero: print(np.unique(buddy_markers)) - buddy_markers[~unmasked_buddies]=0 - + buddy_markers[~unmasked_buddies] = 0 + marker_vals = np.unique(buddy_markers) - + # Turn into np arrays (not necessary for markers) as dask arrays don't yet seem to work for watershedding algorithm - buddy_segmentation=np.array(buddy_segmentation) - unmasked_buddies=np.array(unmasked_buddies) + buddy_segmentation = np.array(buddy_segmentation) + unmasked_buddies = np.array(unmasked_buddies) # perform segmentation: - if method=='watershed': - segmentation_mask_4 = watershed(np.array(buddy_segmentation),buddy_markers.astype(np.int32), mask=unmasked_buddies) - - else: - raise ValueError('unknown method, must be watershed') + if method == "watershed": + segmentation_mask_4 = watershed( + np.array(buddy_segmentation), + buddy_markers.astype(np.int32), + mask=unmasked_buddies, + ) + + else: + raise ValueError("unknown method, must be watershed") # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: - D=distance_transform_edt((markers==0).astype(int)) - segmentation_mask_4[np.bitwise_and(segmentation_mask_4>0, D>max_distance_pixel)]=0 + D = distance_transform_edt((markers == 0).astype(int)) + segmentation_mask_4[ + np.bitwise_and(segmentation_mask_4 > 0, D > max_distance_pixel) + ] = 0 - - #mask all segmentation_mask points below threshold as -1 - #to differentiate from those unmasked points NOT filled by watershedding + # mask all segmentation_mask points below threshold as -1 + # to differentiate from those unmasked points NOT filled by watershedding print(np.unique(segmentation_mask_4)) segmentation_mask_4[~unmasked_buddies] = -1 - - - #transform segmentation_mask_4 data back to mask created after PBC first-pass ("segmentation_mask_3") - #print(np.unique(test_mask3.data)) - - #loop through buddy box inds and analogous seg mask inds - for z_val in range(bbox_zstart,bbox_zend): + + # transform segmentation_mask_4 data back to mask created after PBC first-pass ("segmentation_mask_3") + # print(np.unique(test_mask3.data)) + + # loop through buddy box inds and analogous seg mask inds + for z_val in range(bbox_zstart, bbox_zend): z_seg = z_val - bbox_zstart z_val_o = z_val - for y_val in range(bbox_ystart,bbox_yend): + for y_val in range(bbox_ystart, bbox_yend): y_seg = y_val - bbox_ystart - #y_val_o = y_val + # y_val_o = y_val if y_val > hdim1_max: - y_val_o = y_val - (hdim1_max+1) + y_val_o = y_val - (hdim1_max + 1) else: y_val_o = y_val - for x_val in range(bbox_xstart,bbox_xend): + for x_val in range(bbox_xstart, bbox_xend): x_seg = x_val - bbox_xstart - #x_val_o = x_val + # x_val_o = x_val if x_val > hdim2_max: - x_val_o = x_val - (hdim2_max+1) + x_val_o = x_val - (hdim2_max + 1) else: x_val_o = x_val - #print(z_seg,y_seg,x_seg) - #print(z_val,y_val,x_val) - - #fix to - #overwrite IF: - #1) feature of interest - #2) changing to/from feature of interest or adjacent segmented feature - - #We don't want to overwrite other features that may be in the - #buddy box if not contacting the intersected seg field - - if (np.any(segmentation_mask_3[z_val_o,y_val_o,x_val_o]==buddies) and np.any(segmentation_mask_4.data[z_seg,y_seg,x_seg]==buddies)): - #only do updating procedure if old and new values both in buddy set - #and values are different - if(segmentation_mask_3[z_val_o,y_val_o,x_val_o] != segmentation_mask_4.data[z_seg,y_seg,x_seg]): - segmentation_mask_3[z_val_o,y_val_o,x_val_o] = segmentation_mask_4.data[z_seg,y_seg,x_seg] - #print("updated") + # print(z_seg,y_seg,x_seg) + # print(z_val,y_val,x_val) + + # fix to + # overwrite IF: + # 1) feature of interest + # 2) changing to/from feature of interest or adjacent segmented feature + + # We don't want to overwrite other features that may be in the + # buddy box if not contacting the intersected seg field + + if np.any( + segmentation_mask_3[z_val_o, y_val_o, x_val_o] == buddies + ) and np.any( + segmentation_mask_4.data[z_seg, y_seg, x_seg] == buddies + ): + # only do updating procedure if old and new values both in buddy set + # and values are different + if ( + segmentation_mask_3[z_val_o, y_val_o, x_val_o] + != segmentation_mask_4.data[z_seg, y_seg, x_seg] + ): + segmentation_mask_3[ + z_val_o, y_val_o, x_val_o + ] = segmentation_mask_4.data[z_seg, y_seg, x_seg] + # print("updated") if not is_3D_seg: - segmentation_mask_3 = segmentation_mask_3[0] + segmentation_mask_3 = segmentation_mask_3[0] segmentation_mask = segmentation_mask_3 if transposed_data: - segmentation_mask = np.transpose(segmentation_mask, axes = - [vertical_coord_axis, hdim_1_axis, hdim_2_axis]) + segmentation_mask = np.transpose( + segmentation_mask, axes=[vertical_coord_axis, hdim_1_axis, hdim_2_axis] + ) - # Finished PBC checks and new PBC updated segmentation now in segmentation_mask. - #Write resulting mask into cube for output + # Finished PBC checks and new PBC updated segmentation now in segmentation_mask. + # Write resulting mask into cube for output segmentation_out.data = segmentation_mask # count number of grid cells asoociated to each tracked cell and write that into DataFrame: values, count = np.unique(segmentation_mask, return_counts=True) - counts=dict(zip(values, count)) - ncells=np.zeros(len(features_out)) - for i,(index,row) in enumerate(features_out.iterrows()): - if row['feature'] in counts.keys(): - ncells=counts[row['feature']] - features_out['ncells']=ncells - - return segmentation_out,features_out - -def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',PBC_flag='none',seed_3D_flag='column', seed_3D_size = 5): + counts = dict(zip(values, count)) + ncells = np.zeros(len(features_out)) + for i, (index, row) in enumerate(features_out.iterrows()): + if row["feature"] in counts.keys(): + ncells = counts[row["feature"]] + features_out["ncells"] = ncells + + return segmentation_out, features_out + + +def segmentation( + features, + field, + dxy, + threshold=3e-3, + target="maximum", + level=None, + method="watershed", + max_distance=None, + vertical_coord="auto", + PBC_flag="none", + seed_3D_flag="column", + seed_3D_size=5, +): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts - + Parameters: - features: pandas.DataFrame + features: pandas.DataFrame output from trackpy/maketrack - field: iris.cube.Cube - containing the field to perform the watershedding on - threshold: float + field: iris.cube.Cube + containing the field to perform the watershedding on + threshold: float threshold for the watershedding field to be used for the mask - + target: string Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) @@ -855,10 +1052,10 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m levels at which to seed the cells for the watershedding algorithm method: str ('method') flag determining the algorithm to use (currently watershedding implemented) - + max_distance: float Maximum distance from a marker allowed to be classified as belonging to that cell - + PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'} Sets whether to use periodic boundaries, and if so in which directions. 'none' means that we do not have periodic boundaries @@ -869,55 +1066,75 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m Seed 3D field at feature positions with either the full column (default) or a box of user-set size seed_3D_size: int or tuple (dimensions equal to dimensions of `field`) - This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an - integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the + This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an + integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the seed area for each dimension separately. Note: we recommend the use - of odd numbers for this. If you give an even number, your seed box will be - biased and not centered around the feature. - + of odd numbers for this. If you give an even number, your seed box will be + biased and not centered around the feature. + Output: segmentation_out: iris.cube.Cube Cloud mask, 0 outside and integer numbers according to track inside the cloud """ import pandas as pd from iris.cube import CubeList - - logging.info('Start watershedding 3D') - # check input for right dimensions: - if not (field.ndim==3 or field.ndim==4): - raise ValueError('input to segmentation step must be 3D or 4D including a time dimension') - if 'time' not in [coord.name() for coord in field.coords()]: - raise ValueError("input to segmentation step must include a dimension named 'time'") + logging.info("Start watershedding 3D") + + # check input for right dimensions: + if not (field.ndim == 3 or field.ndim == 4): + raise ValueError( + "input to segmentation step must be 3D or 4D including a time dimension" + ) + if "time" not in [coord.name() for coord in field.coords()]: + raise ValueError( + "input to segmentation step must include a dimension named 'time'" + ) # CubeList and list to store individual segmentation masks and feature DataFrames with information about segmentation - segmentation_out_list=CubeList() - features_out_list=[] - - #loop over individual input timesteps for segmentation: - #OR do segmentation on single timestep - field_time=field.slices_over('time') - time_len = len(field.coord('time').points[:]) - - for i,field_i in enumerate(field_time): - time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) - features_i=features.loc[features['time']==time_i] - segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord,PBC_flag=PBC_flag,seed_3D_flag=seed_3D_flag) + segmentation_out_list = CubeList() + features_out_list = [] + + # loop over individual input timesteps for segmentation: + # OR do segmentation on single timestep + field_time = field.slices_over("time") + time_len = len(field.coord("time").points[:]) + + for i, field_i in enumerate(field_time): + time_i = field_i.coord("time").units.num2date(field_i.coord("time").points[0]) + features_i = features.loc[features["time"] == time_i] + segmentation_out_i, features_out_i = segmentation_timestep( + field_i, + features_i, + dxy, + threshold=threshold, + target=target, + level=level, + method=method, + max_distance=max_distance, + vertical_coord=vertical_coord, + PBC_flag=PBC_flag, + seed_3D_flag=seed_3D_flag, + ) segmentation_out_list.append(segmentation_out_i) features_out_list.append(features_out_i) - logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) - - #Merge output from individual timesteps: - segmentation_out=segmentation_out_list.merge_cube() - features_out=pd.concat(features_out_list) - - logging.debug('Finished segmentation') - return segmentation_out,features_out - -def watershedding_3D(track,field_in,**kwargs): - kwargs.pop('method',None) - return segmentation_3D(track,field_in,method='watershed',**kwargs) - -def watershedding_2D(track,field_in,**kwargs): - kwargs.pop('method',None) - return segmentation_2D(track,field_in,method='watershed',**kwargs) + logging.debug( + "Finished segmentation for " + time_i.strftime("%Y-%m-%d_%H:%M:%S") + ) + + # Merge output from individual timesteps: + segmentation_out = segmentation_out_list.merge_cube() + features_out = pd.concat(features_out_list) + + logging.debug("Finished segmentation") + return segmentation_out, features_out + + +def watershedding_3D(track, field_in, **kwargs): + kwargs.pop("method", None) + return segmentation_3D(track, field_in, method="watershed", **kwargs) + + +def watershedding_2D(track, field_in, **kwargs): + kwargs.pop("method", None) + return segmentation_2D(track, field_in, method="watershed", **kwargs) diff --git a/tobac/testing.py b/tobac/testing.py index f5dfb622..07c4b3cf 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -4,18 +4,19 @@ from xarray import DataArray import pandas as pd -def make_simple_sample_data_2D(data_type='iris'): - """function creating a simple dataset to use in tests for tobac. + +def make_simple_sample_data_2D(data_type="iris"): + """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 500 in y direction. - Time resolution is 1 minute and the total length of the dataset is 100 minutes around a arbitrary date (2000-01-01 12:00). + Time resolution is 1 minute and the total length of the dataset is 100 minutes around a arbitrary date (2000-01-01 12:00). The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range. The data contains a single blob travelling on a linear trajectory through the dataset for part of the time. Parameters ---------- data_type: {'iris', 'xarray'} - The type of dataset to produce. Note that this function currently generates an iris cube + The type of dataset to produce. Note that this function currently generates an iris cube and if xarray is requested, it simply converts to xarray with the from_iris function in xarray. - + Returns ------- Iris.Cube.cube or xarray.DataArray @@ -23,50 +24,71 @@ def make_simple_sample_data_2D(data_type='iris'): """ from iris.cube import Cube - from iris.coords import DimCoord,AuxCoord + from iris.coords import DimCoord, AuxCoord - t_0=datetime.datetime(2000,1,1,12,0,0) - - x=np.arange(0,100e3,1000) - y=np.arange(0,50e3,1000) - t=t_0+np.arange(0,100,1)*datetime.timedelta(minutes=1) - xx,yy=np.meshgrid(x,y) - + t_0 = datetime.datetime(2000, 1, 1, 12, 0, 0) - t_temp=np.arange(0,60,1) - track1_t=t_0+t_temp*datetime.timedelta(minutes=1) - x_0_1=10e3 - y_0_1=10e3 - track1_x=x_0_1+30*t_temp*60 - track1_y=y_0_1+14*t_temp*60 - track1_magnitude=10*np.ones(track1_x.shape) + x = np.arange(0, 100e3, 1000) + y = np.arange(0, 50e3, 1000) + t = t_0 + np.arange(0, 100, 1) * datetime.timedelta(minutes=1) + xx, yy = np.meshgrid(x, y) - data=np.zeros((t.shape[0],y.shape[0],x.shape[0])) - for i_t,t_i in enumerate(t): + t_temp = np.arange(0, 60, 1) + track1_t = t_0 + t_temp * datetime.timedelta(minutes=1) + x_0_1 = 10e3 + y_0_1 = 10e3 + track1_x = x_0_1 + 30 * t_temp * 60 + track1_y = y_0_1 + 14 * t_temp * 60 + track1_magnitude = 10 * np.ones(track1_x.shape) + + data = np.zeros((t.shape[0], y.shape[0], x.shape[0])) + for i_t, t_i in enumerate(t): if np.any(t_i in track1_t): - x_i=track1_x[track1_t==t_i] - y_i=track1_y[track1_t==t_i] - mag_i=track1_magnitude[track1_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))*np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.))) - - t_start=datetime.datetime(1970,1,1,0,0) - t_points=(t-t_start).astype("timedelta64[ms]").astype(int) / 1000 - t_coord=DimCoord(t_points,standard_name='time',var_name='time',units='seconds since 1970-01-01 00:00') - x_coord=DimCoord(x,standard_name='projection_x_coordinate',var_name='x',units='m') - y_coord=DimCoord(y,standard_name='projection_y_coordinate',var_name='y',units='m') - lat_coord=AuxCoord(24+1e-5*xx,standard_name='latitude',var_name='latitude',units='degree') - lon_coord=AuxCoord(150+1e-5*yy,standard_name='longitude',var_name='longitude',units='degree') - sample_data=Cube(data,dim_coords_and_dims=[(t_coord, 0),(y_coord, 1),(x_coord, 2)],aux_coords_and_dims=[(lat_coord, (1,2)),(lon_coord, (1,2))],var_name='w',units='m s-1') - - if data_type=='xarray': - sample_data=DataArray.from_iris(sample_data) - + x_i = track1_x[track1_t == t_i] + y_i = track1_y[track1_t == t_i] + mag_i = track1_magnitude[track1_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) + + t_start = datetime.datetime(1970, 1, 1, 0, 0) + t_points = (t - t_start).astype("timedelta64[ms]").astype(int) / 1000 + t_coord = DimCoord( + t_points, + standard_name="time", + var_name="time", + units="seconds since 1970-01-01 00:00", + ) + x_coord = DimCoord( + x, standard_name="projection_x_coordinate", var_name="x", units="m" + ) + y_coord = DimCoord( + y, standard_name="projection_y_coordinate", var_name="y", units="m" + ) + lat_coord = AuxCoord( + 24 + 1e-5 * xx, standard_name="latitude", var_name="latitude", units="degree" + ) + lon_coord = AuxCoord( + 150 + 1e-5 * yy, standard_name="longitude", var_name="longitude", units="degree" + ) + sample_data = Cube( + data, + dim_coords_and_dims=[(t_coord, 0), (y_coord, 1), (x_coord, 2)], + aux_coords_and_dims=[(lat_coord, (1, 2)), (lon_coord, (1, 2))], + var_name="w", + units="m s-1", + ) + + if data_type == "xarray": + sample_data = DataArray.from_iris(sample_data) + return sample_data -def make_sample_data_2D_3blobs(data_type='iris'): +def make_sample_data_2D_3blobs(data_type="iris"): from iris.cube import Cube - from iris.coords import DimCoord,AuxCoord + from iris.coords import DimCoord, AuxCoord + """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 200 in y direction. Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). @@ -85,165 +107,207 @@ def make_sample_data_2D_3blobs(data_type='iris'): The simple output """ - t_0=datetime.datetime(2000,1,1,12,0,0) - - x=np.arange(0,100e3,1000) - y=np.arange(0,200e3,1000) - t=t_0+np.arange(0,100,1)*datetime.timedelta(minutes=1) - xx,yy=np.meshgrid(x,y) - - - t_temp=np.arange(0,60,1) - track1_t=t_0+t_temp*datetime.timedelta(minutes=1) - x_0_1=10e3 - y_0_1=10e3 - track1_x=x_0_1+30*t_temp*60 - track1_y=y_0_1+14*t_temp*60 - track1_magnitude=10*np.ones(track1_x.shape) - - t_temp=np.arange(0,30,1) - track2_t=t_0+(t_temp+40)*datetime.timedelta(minutes=1) - x_0_2=20e3 - y_0_2=10e3 - track2_x=x_0_2+24*(t_temp*60)**2/1000 - track2_y=y_0_2+12*t_temp*60 - track2_magnitude=20*np.ones(track2_x.shape) - - - - t_temp=np.arange(0,20,1) - track3_t=t_0+(t_temp+50)*datetime.timedelta(minutes=1) - x_0_3=70e3 - y_0_3=110e3 - track3_x=x_0_3+20*(t_temp*60)**2/1000 - track3_y=y_0_3+20*t_temp*60 - track3_magnitude=15*np.ones(track3_x.shape) - - - data=np.zeros((t.shape[0],y.shape[0],x.shape[0])) - for i_t,t_i in enumerate(t): + t_0 = datetime.datetime(2000, 1, 1, 12, 0, 0) + + x = np.arange(0, 100e3, 1000) + y = np.arange(0, 200e3, 1000) + t = t_0 + np.arange(0, 100, 1) * datetime.timedelta(minutes=1) + xx, yy = np.meshgrid(x, y) + + t_temp = np.arange(0, 60, 1) + track1_t = t_0 + t_temp * datetime.timedelta(minutes=1) + x_0_1 = 10e3 + y_0_1 = 10e3 + track1_x = x_0_1 + 30 * t_temp * 60 + track1_y = y_0_1 + 14 * t_temp * 60 + track1_magnitude = 10 * np.ones(track1_x.shape) + + t_temp = np.arange(0, 30, 1) + track2_t = t_0 + (t_temp + 40) * datetime.timedelta(minutes=1) + x_0_2 = 20e3 + y_0_2 = 10e3 + track2_x = x_0_2 + 24 * (t_temp * 60) ** 2 / 1000 + track2_y = y_0_2 + 12 * t_temp * 60 + track2_magnitude = 20 * np.ones(track2_x.shape) + + t_temp = np.arange(0, 20, 1) + track3_t = t_0 + (t_temp + 50) * datetime.timedelta(minutes=1) + x_0_3 = 70e3 + y_0_3 = 110e3 + track3_x = x_0_3 + 20 * (t_temp * 60) ** 2 / 1000 + track3_y = y_0_3 + 20 * t_temp * 60 + track3_magnitude = 15 * np.ones(track3_x.shape) + + data = np.zeros((t.shape[0], y.shape[0], x.shape[0])) + for i_t, t_i in enumerate(t): if np.any(t_i in track1_t): - x_i=track1_x[track1_t==t_i] - y_i=track1_y[track1_t==t_i] - mag_i=track1_magnitude[track1_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))*np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.))) + x_i = track1_x[track1_t == t_i] + y_i = track1_y[track1_t == t_i] + mag_i = track1_magnitude[track1_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) if np.any(t_i in track2_t): - x_i=track2_x[track2_t==t_i] - y_i=track2_y[track2_t==t_i] - mag_i=track2_magnitude[track2_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))*np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.))) + x_i = track2_x[track2_t == t_i] + y_i = track2_y[track2_t == t_i] + mag_i = track2_magnitude[track2_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) if np.any(t_i in track3_t): - x_i=track3_x[track3_t==t_i] - y_i=track3_y[track3_t==t_i] - mag_i=track3_magnitude[track3_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))*np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.)))\ - - t_start=datetime.datetime(1970,1,1,0,0) - t_points=(t-t_start).astype("timedelta64[ms]").astype(int) / 1000 - t_coord=DimCoord(t_points,standard_name='time',var_name='time',units='seconds since 1970-01-01 00:00') - x_coord=DimCoord(x,standard_name='projection_x_coordinate',var_name='x',units='m') - y_coord=DimCoord(y,standard_name='projection_y_coordinate',var_name='y',units='m') - lat_coord=AuxCoord(24+1e-5*xx,standard_name='latitude',var_name='latitude',units='degree') - lon_coord=AuxCoord(150+1e-5*yy,standard_name='longitude',var_name='longitude',units='degree') - sample_data=Cube(data,dim_coords_and_dims=[(t_coord, 0),(y_coord, 1),(x_coord, 2)],aux_coords_and_dims=[(lat_coord, (1,2)),(lon_coord, (1,2))],var_name='w',units='m s-1') - - if data_type=='xarray': - sample_data=DataArray.from_iris(sample_data) - + x_i = track3_x[track3_t == t_i] + y_i = track3_y[track3_t == t_i] + mag_i = track3_magnitude[track3_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) + t_start = datetime.datetime(1970, 1, 1, 0, 0) + t_points = (t - t_start).astype("timedelta64[ms]").astype(int) / 1000 + t_coord = DimCoord( + t_points, + standard_name="time", + var_name="time", + units="seconds since 1970-01-01 00:00", + ) + x_coord = DimCoord( + x, standard_name="projection_x_coordinate", var_name="x", units="m" + ) + y_coord = DimCoord( + y, standard_name="projection_y_coordinate", var_name="y", units="m" + ) + lat_coord = AuxCoord( + 24 + 1e-5 * xx, standard_name="latitude", var_name="latitude", units="degree" + ) + lon_coord = AuxCoord( + 150 + 1e-5 * yy, standard_name="longitude", var_name="longitude", units="degree" + ) + sample_data = Cube( + data, + dim_coords_and_dims=[(t_coord, 0), (y_coord, 1), (x_coord, 2)], + aux_coords_and_dims=[(lat_coord, (1, 2)), (lon_coord, (1, 2))], + var_name="w", + units="m s-1", + ) + + if data_type == "xarray": + sample_data = DataArray.from_iris(sample_data) + return sample_data -def make_sample_data_2D_3blobs_inv(data_type='iris'): - """function creating a version of the dataset created in the function make_sample_cube_2D, but with switched coordinate order for the horizontal coordinates +def make_sample_data_2D_3blobs_inv(data_type="iris"): + """function creating a version of the dataset created in the function make_sample_cube_2D, but with switched coordinate order for the horizontal coordinates for tests to ensure that this does not affect the results Parameters ---------- data_type: {'iris', 'xarray'} - The type of dataset to produce. Note that this function currently generates an iris cube + The type of dataset to produce. Note that this function currently generates an iris cube and if xarray is requested, it simply converts to xarray with the from_iris function in xarray. - + Returns ------- Iris.Cube.cube or xarray.DataArray The simple output - + """ from iris.cube import Cube - from iris.coords import DimCoord,AuxCoord - - t_0=datetime.datetime(2000,1,1,12,0,0) - x=np.arange(0,100e3,1000) - y=np.arange(0,200e3,1000) - t=t_0+np.arange(0,100,1)*datetime.timedelta(minutes=1) - yy,xx=np.meshgrid(y,x) - - - t_temp=np.arange(0,60,1) - track1_t=t_0+t_temp*datetime.timedelta(minutes=1) - x_0_1=10e3 - y_0_1=10e3 - track1_x=x_0_1+30*t_temp*60 - track1_y=y_0_1+14*t_temp*60 - track1_magnitude=10*np.ones(track1_x.shape) - - t_temp=np.arange(0,30,1) - track2_t=t_0+(t_temp+40)*datetime.timedelta(minutes=1) - x_0_2=20e3 - y_0_2=10e3 - track2_x=x_0_2+24*(t_temp*60)**2/1000 - track2_y=y_0_2+12*t_temp*60 - track2_magnitude=20*np.ones(track2_x.shape) - - - - t_temp=np.arange(0,20,1) - track3_t=t_0+(t_temp+50)*datetime.timedelta(minutes=1) - x_0_3=70e3 - y_0_3=110e3 - track3_x=x_0_3+20*(t_temp*60)**2/1000 - track3_y=y_0_3+20*t_temp*60 - track3_magnitude=15*np.ones(track3_x.shape) - - - data=np.zeros((t.shape[0],x.shape[0],y.shape[0])) - for i_t,t_i in enumerate(t): + from iris.coords import DimCoord, AuxCoord + + t_0 = datetime.datetime(2000, 1, 1, 12, 0, 0) + x = np.arange(0, 100e3, 1000) + y = np.arange(0, 200e3, 1000) + t = t_0 + np.arange(0, 100, 1) * datetime.timedelta(minutes=1) + yy, xx = np.meshgrid(y, x) + + t_temp = np.arange(0, 60, 1) + track1_t = t_0 + t_temp * datetime.timedelta(minutes=1) + x_0_1 = 10e3 + y_0_1 = 10e3 + track1_x = x_0_1 + 30 * t_temp * 60 + track1_y = y_0_1 + 14 * t_temp * 60 + track1_magnitude = 10 * np.ones(track1_x.shape) + + t_temp = np.arange(0, 30, 1) + track2_t = t_0 + (t_temp + 40) * datetime.timedelta(minutes=1) + x_0_2 = 20e3 + y_0_2 = 10e3 + track2_x = x_0_2 + 24 * (t_temp * 60) ** 2 / 1000 + track2_y = y_0_2 + 12 * t_temp * 60 + track2_magnitude = 20 * np.ones(track2_x.shape) + + t_temp = np.arange(0, 20, 1) + track3_t = t_0 + (t_temp + 50) * datetime.timedelta(minutes=1) + x_0_3 = 70e3 + y_0_3 = 110e3 + track3_x = x_0_3 + 20 * (t_temp * 60) ** 2 / 1000 + track3_y = y_0_3 + 20 * t_temp * 60 + track3_magnitude = 15 * np.ones(track3_x.shape) + + data = np.zeros((t.shape[0], x.shape[0], y.shape[0])) + for i_t, t_i in enumerate(t): if np.any(t_i in track1_t): - x_i=track1_x[track1_t==t_i] - y_i=track1_y[track1_t==t_i] - mag_i=track1_magnitude[track1_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))*np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.))) + x_i = track1_x[track1_t == t_i] + y_i = track1_y[track1_t == t_i] + mag_i = track1_magnitude[track1_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) if np.any(t_i in track2_t): - x_i=track2_x[track2_t==t_i] - y_i=track2_y[track2_t==t_i] - mag_i=track2_magnitude[track2_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))*np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.))) + x_i = track2_x[track2_t == t_i] + y_i = track2_y[track2_t == t_i] + mag_i = track2_magnitude[track2_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) if np.any(t_i in track3_t): - x_i=track3_x[track3_t==t_i] - y_i=track3_y[track3_t==t_i] - mag_i=track3_magnitude[track3_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))*np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.))) - - t_start=datetime.datetime(1970,1,1,0,0) - t_points=(t-t_start).astype("timedelta64[ms]").astype(int) / 1000 - - t_coord=DimCoord(t_points,standard_name='time',var_name='time',units='seconds since 1970-01-01 00:00') - x_coord=DimCoord(x,standard_name='projection_x_coordinate',var_name='x',units='m') - y_coord=DimCoord(y,standard_name='projection_y_coordinate',var_name='y',units='m') - lat_coord=AuxCoord(24+1e-5*xx,standard_name='latitude',var_name='latitude',units='degree') - lon_coord=AuxCoord(150+1e-5*yy,standard_name='longitude',var_name='longitude',units='degree') + x_i = track3_x[track3_t == t_i] + y_i = track3_y[track3_t == t_i] + mag_i = track3_magnitude[track3_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) + + t_start = datetime.datetime(1970, 1, 1, 0, 0) + t_points = (t - t_start).astype("timedelta64[ms]").astype(int) / 1000 + + t_coord = DimCoord( + t_points, + standard_name="time", + var_name="time", + units="seconds since 1970-01-01 00:00", + ) + x_coord = DimCoord( + x, standard_name="projection_x_coordinate", var_name="x", units="m" + ) + y_coord = DimCoord( + y, standard_name="projection_y_coordinate", var_name="y", units="m" + ) + lat_coord = AuxCoord( + 24 + 1e-5 * xx, standard_name="latitude", var_name="latitude", units="degree" + ) + lon_coord = AuxCoord( + 150 + 1e-5 * yy, standard_name="longitude", var_name="longitude", units="degree" + ) + + sample_data = Cube( + data, + dim_coords_and_dims=[(t_coord, 0), (y_coord, 2), (x_coord, 1)], + aux_coords_and_dims=[(lat_coord, (1, 2)), (lon_coord, (1, 2))], + var_name="w", + units="m s-1", + ) + if data_type == "xarray": + sample_data = DataArray.from_iris(sample_data) - sample_data=Cube(data,dim_coords_and_dims=[(t_coord, 0),(y_coord, 2),(x_coord, 1)],aux_coords_and_dims=[(lat_coord, (1,2)),(lon_coord, (1,2))],var_name='w',units='m s-1') - - if data_type=='xarray': - sample_data=DataArray.from_iris(sample_data) - return sample_data -def make_sample_data_3D_3blobs(data_type='iris',invert_xy=False): + +def make_sample_data_3D_3blobs(data_type="iris", invert_xy=False): from iris.cube import Cube - from iris.coords import DimCoord,AuxCoord + from iris.coords import DimCoord, AuxCoord + """function creating a simple dataset to use in tests for tobac. The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 200 in y direction. Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). @@ -265,97 +329,126 @@ def make_sample_data_3D_3blobs(data_type='iris',invert_xy=False): """ - t_0=datetime.datetime(2000,1,1,12,0,0) - - x=np.arange(0,100e3,1000) - y=np.arange(0,200e3,1000) - z=np.arange(0,20e3,1000) - - t=t_0+np.arange(0,50,2)*datetime.timedelta(minutes=1) - - t_temp=np.arange(0,60,1) - track1_t=t_0+t_temp*datetime.timedelta(minutes=1) - x_0_1=10e3 - y_0_1=10e3 - z_0_1=4e3 - track1_x=x_0_1+30*t_temp*60 - track1_y=y_0_1+14*t_temp*60 - track1_magnitude=10*np.ones(track1_x.shape) - - t_temp=np.arange(0,30,1) - track2_t=t_0+(t_temp+40)*datetime.timedelta(minutes=1) - x_0_2=20e3 - y_0_2=10e3 - z_0_2=6e3 - track2_x=x_0_2+24*(t_temp*60)**2/1000 - track2_y=y_0_2+12*t_temp*60 - track2_magnitude=20*np.ones(track2_x.shape) - - - - t_temp=np.arange(0,20,1) - track3_t=t_0+(t_temp+50)*datetime.timedelta(minutes=1) - x_0_3=70e3 - y_0_3=110e3 - z_0_3=8e3 - track3_x=x_0_3+20*(t_temp*60)**2/1000 - track3_y=y_0_3+20*t_temp*60 - track3_magnitude=15*np.ones(track3_x.shape) - - if invert_xy==False: - zz,yy,xx=np.meshgrid(z,y,x,indexing='ij') - y_dim=2 - x_dim=3 - data=np.zeros((t.shape[0],z.shape[0],y.shape[0],x.shape[0])) + t_0 = datetime.datetime(2000, 1, 1, 12, 0, 0) + + x = np.arange(0, 100e3, 1000) + y = np.arange(0, 200e3, 1000) + z = np.arange(0, 20e3, 1000) + + t = t_0 + np.arange(0, 50, 2) * datetime.timedelta(minutes=1) + + t_temp = np.arange(0, 60, 1) + track1_t = t_0 + t_temp * datetime.timedelta(minutes=1) + x_0_1 = 10e3 + y_0_1 = 10e3 + z_0_1 = 4e3 + track1_x = x_0_1 + 30 * t_temp * 60 + track1_y = y_0_1 + 14 * t_temp * 60 + track1_magnitude = 10 * np.ones(track1_x.shape) + + t_temp = np.arange(0, 30, 1) + track2_t = t_0 + (t_temp + 40) * datetime.timedelta(minutes=1) + x_0_2 = 20e3 + y_0_2 = 10e3 + z_0_2 = 6e3 + track2_x = x_0_2 + 24 * (t_temp * 60) ** 2 / 1000 + track2_y = y_0_2 + 12 * t_temp * 60 + track2_magnitude = 20 * np.ones(track2_x.shape) + + t_temp = np.arange(0, 20, 1) + track3_t = t_0 + (t_temp + 50) * datetime.timedelta(minutes=1) + x_0_3 = 70e3 + y_0_3 = 110e3 + z_0_3 = 8e3 + track3_x = x_0_3 + 20 * (t_temp * 60) ** 2 / 1000 + track3_y = y_0_3 + 20 * t_temp * 60 + track3_magnitude = 15 * np.ones(track3_x.shape) + + if invert_xy == False: + zz, yy, xx = np.meshgrid(z, y, x, indexing="ij") + y_dim = 2 + x_dim = 3 + data = np.zeros((t.shape[0], z.shape[0], y.shape[0], x.shape[0])) else: - zz,xx,yy=np.meshgrid(z,x,y,indexing='ij') - x_dim=2 - y_dim=3 - data=np.zeros((t.shape[0],z.shape[0],x.shape[0],y.shape[0])) + zz, xx, yy = np.meshgrid(z, x, y, indexing="ij") + x_dim = 2 + y_dim = 3 + data = np.zeros((t.shape[0], z.shape[0], x.shape[0], y.shape[0])) - - for i_t,t_i in enumerate(t): + for i_t, t_i in enumerate(t): if np.any(t_i in track1_t): - x_i=track1_x[track1_t==t_i] - y_i=track1_y[track1_t==t_i] - z_i=z_0_1 - mag_i=track1_magnitude[track1_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))\ - *np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.)))\ - *np.exp(-np.power(zz - z_i, 2.) / (2 * np.power(5e3, 2.))) + x_i = track1_x[track1_t == t_i] + y_i = track1_y[track1_t == t_i] + z_i = z_0_1 + mag_i = track1_magnitude[track1_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) * np.exp( + -np.power(zz - z_i, 2.0) / (2 * np.power(5e3, 2.0)) + ) if np.any(t_i in track2_t): - x_i=track2_x[track2_t==t_i] - y_i=track2_y[track2_t==t_i] - z_i=z_0_2 - mag_i=track2_magnitude[track2_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))\ - *np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.)))\ - *np.exp(-np.power(zz - z_i, 2.) / (2 * np.power(5e3, 2.))) + x_i = track2_x[track2_t == t_i] + y_i = track2_y[track2_t == t_i] + z_i = z_0_2 + mag_i = track2_magnitude[track2_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) * np.exp( + -np.power(zz - z_i, 2.0) / (2 * np.power(5e3, 2.0)) + ) if np.any(t_i in track3_t): - x_i=track3_x[track3_t==t_i] - y_i=track3_y[track3_t==t_i] - z_i=z_0_3 - mag_i=track3_magnitude[track3_t==t_i] - data[i_t]=data[i_t]+mag_i*np.exp(-np.power(xx - x_i,2.) / (2 * np.power(10e3, 2.)))\ - *np.exp(-np.power(yy - y_i, 2.) / (2 * np.power(10e3, 2.)))\ - *np.exp(-np.power(zz - z_i, 2.) / (2 * np.power(5e3, 2.))) - - - t_start=datetime.datetime(1970,1,1,0,0) - t_points=(t-t_start).astype("timedelta64[ms]").astype(int) / 1000 - t_coord=DimCoord(t_points,standard_name='time',var_name='time',units='seconds since 1970-01-01 00:00') - z_coord=DimCoord(z,standard_name='geopotential_height',var_name='z',units='m') - y_coord=DimCoord(y,standard_name='projection_y_coordinate',var_name='y',units='m') - x_coord=DimCoord(x,standard_name='projection_x_coordinate',var_name='x',units='m') - lat_coord=AuxCoord(24+1e-5*xx[0],standard_name='latitude',var_name='latitude',units='degree') - lon_coord=AuxCoord(150+1e-5*yy[0],standard_name='longitude',var_name='longitude',units='degree') - sample_data=Cube(data,dim_coords_and_dims=[(t_coord, 0),(z_coord, 1),(y_coord, y_dim),(x_coord, x_dim)],aux_coords_and_dims=[(lat_coord, (2,3)),(lon_coord, (2,3))],var_name='w',units='m s-1') - - if data_type=='xarray': - sample_data=DataArray.from_iris(sample_data) - + x_i = track3_x[track3_t == t_i] + y_i = track3_y[track3_t == t_i] + z_i = z_0_3 + mag_i = track3_magnitude[track3_t == t_i] + data[i_t] = data[i_t] + mag_i * np.exp( + -np.power(xx - x_i, 2.0) / (2 * np.power(10e3, 2.0)) + ) * np.exp(-np.power(yy - y_i, 2.0) / (2 * np.power(10e3, 2.0))) * np.exp( + -np.power(zz - z_i, 2.0) / (2 * np.power(5e3, 2.0)) + ) + + t_start = datetime.datetime(1970, 1, 1, 0, 0) + t_points = (t - t_start).astype("timedelta64[ms]").astype(int) / 1000 + t_coord = DimCoord( + t_points, + standard_name="time", + var_name="time", + units="seconds since 1970-01-01 00:00", + ) + z_coord = DimCoord(z, standard_name="geopotential_height", var_name="z", units="m") + y_coord = DimCoord( + y, standard_name="projection_y_coordinate", var_name="y", units="m" + ) + x_coord = DimCoord( + x, standard_name="projection_x_coordinate", var_name="x", units="m" + ) + lat_coord = AuxCoord( + 24 + 1e-5 * xx[0], standard_name="latitude", var_name="latitude", units="degree" + ) + lon_coord = AuxCoord( + 150 + 1e-5 * yy[0], + standard_name="longitude", + var_name="longitude", + units="degree", + ) + sample_data = Cube( + data, + dim_coords_and_dims=[ + (t_coord, 0), + (z_coord, 1), + (y_coord, y_dim), + (x_coord, x_dim), + ], + aux_coords_and_dims=[(lat_coord, (2, 3)), (lon_coord, (2, 3))], + var_name="w", + units="m s-1", + ) + + if data_type == "xarray": + sample_data = DataArray.from_iris(sample_data) + return sample_data @@ -364,7 +457,7 @@ def make_dataset_from_arr( data_type="xarray", time_dim_num=None, z_dim_num=None, - z_dim_name = 'altitude', + z_dim_name="altitude", y_dim_num=0, x_dim_num=1, ): @@ -404,7 +497,7 @@ def make_dataset_from_arr( z_max = in_arr.shape[z_dim_num] if has_time: - time_min = datetime.datetime(2022,1,1) + time_min = datetime.datetime(2022, 1, 1) time_num = in_arr.shape[time_dim_num] if data_type == "xarray": @@ -420,19 +513,34 @@ def make_dataset_from_arr( if has_time: if is_3D: out_arr_iris.add_dim_coord( - iris.coords.DimCoord(pd.date_range(start=time_min, periods=time_num).values.astype('datetime64[s]').astype(int), - standard_name='time', units='seconds since epoch'), + iris.coords.DimCoord( + pd.date_range(start=time_min, periods=time_num) + .values.astype("datetime64[s]") + .astype(int), + standard_name="time", + units="seconds since epoch", + ), time_dim_num, - ) + ) return out_arr_iris else: raise ValueError("data_type must be 'xarray' or 'iris'") -def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, - h1_size = 1, h2_size = 1, v_size = 1, - shape = 'rectangle', amplitude=1, - PBC_flag = 'none'): + +def make_feature_blob( + in_arr, + h1_loc, + h2_loc, + v_loc=None, + h1_size=1, + h2_size=1, + v_size=1, + shape="rectangle", + amplitude=1, + PBC_flag="none", +): import xarray as xr + """Function to make a defined "blob" in location (zloc, yloc, xloc) with user-specified shape and amplitude. Note that this function will round the size and locations to the nearest point within the array. @@ -475,7 +583,7 @@ def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, An array with the same type as `in_arr` that has the blob added. """ - # Check if z location is there and set our 3D-ness based on this. + # Check if z location is there and set our 3D-ness based on this. if v_loc is None: is_3D = False start_loc = 0 @@ -492,16 +600,15 @@ def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, if v_size > v_max - v_min: raise ValueError("v_size larger than domain size") - # Get min/max coordinates for hdim_1 and hdim_2 # Min is inclusive, end is exclusive h1_min = 0 h1_max = in_arr.shape[start_loc] h2_min = 0 - h2_max = in_arr.shape[start_loc+1] + h2_max = in_arr.shape[start_loc + 1] - if ((h1_size > h1_max - h1_min) or (h2_size > h2_max - h2_min)): + if (h1_size > h1_max - h1_min) or (h2_size > h2_max - h2_min): raise ValueError("Horizontal size larger than domain size") # let's get start/end x/y/z @@ -510,24 +617,40 @@ def make_feature_blob(in_arr, h1_loc, h2_loc, v_loc = None, start_h2 = int(np.ceil(h2_loc - h2_size / 2)) end_h2 = int(np.ceil(h2_loc + h2_size / 2)) - + # get the coordinate sets - coords_to_fill = get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, - start_h1, end_h1, start_h2, end_h2, PBC_flag=PBC_flag) - if shape == 'rectangle': + coords_to_fill = get_pbc_coordinates( + h1_min, + h1_max, + h2_min, + h2_max, + start_h1, + end_h1, + start_h2, + end_h2, + PBC_flag=PBC_flag, + ) + if shape == "rectangle": for coord_box in coords_to_fill: - in_arr = set_arr_2D_3D(in_arr, amplitude, coord_box[0], coord_box[1], coord_box[2], coord_box[3], - start_v, end_v) - return in_arr - - - - -def set_arr_2D_3D(in_arr, value, start_h1, end_h1, start_h2, end_h2, - start_v = None, end_v = None): - '''Function to set part of `in_arr` for either 2D or 3D points to `value`. - If `start_v` and `end_v` are not none, we assume that the array is 3D. If they - are none, we will set the array as if it is a 2D array. + in_arr = set_arr_2D_3D( + in_arr, + amplitude, + coord_box[0], + coord_box[1], + coord_box[2], + coord_box[3], + start_v, + end_v, + ) + return in_arr + + +def set_arr_2D_3D( + in_arr, value, start_h1, end_h1, start_h2, end_h2, start_v=None, end_v=None +): + """Function to set part of `in_arr` for either 2D or 3D points to `value`. + If `start_v` and `end_v` are not none, we assume that the array is 3D. If they + are none, we will set the array as if it is a 2D array. Parameters ---------- @@ -535,25 +658,25 @@ def set_arr_2D_3D(in_arr, value, start_h1, end_h1, start_h2, end_h2, Array of values to set value: int, float, or array-like of size (end_v-start_v, end_h1-start_h1, end_h2-start_h2) The value to assign to in_arr. This will work to assign an array, but the array - must have the same dimensions as the size specified in the function. + must have the same dimensions as the size specified in the function. start_h1: int Start index to set for hdim_1 end_h1: int End index to set for hdim_1 (exclusive, so it acts like [start_h1:end_h1]) start_h2: int Start index to set for hdim_2 - end_h2: int + end_h2: int End index to set for hdim_2 start_v: int Start index to set for vdim (optional) - end_v: int + end_v: int End index to set for vdim (optional) - + Returns ------- array-like in_arr with the new values set. - ''' + """ if start_v is not None and end_v is not None: in_arr[start_v:end_v, start_h1:end_h1, start_h2:end_h2] = value else: @@ -562,11 +685,12 @@ def set_arr_2D_3D(in_arr, value, start_h1, end_h1, start_h2, end_h2, return in_arr -def get_single_pbc_coordinate(h1_min, h1_max, h2_min, h2_max, h1_coord, h2_coord, - PBC_flag = 'none'): - '''Function to get the PBC-adjusted coordinate for an original non-PBC adjusted - coordinate. - +def get_single_pbc_coordinate( + h1_min, h1_max, h2_min, h2_max, h1_coord, h2_coord, PBC_flag="none" +): + """Function to get the PBC-adjusted coordinate for an original non-PBC adjusted + coordinate. + Parameters ---------- h1_min: int @@ -596,23 +720,21 @@ def get_single_pbc_coordinate(h1_min, h1_max, h2_min, h2_max, h1_coord, h2_coord Raises ------ ValueError - Raises a ValueError if the point is invalid (e.g., h1_coord < h1_min - when PBC_flag = 'none') - ''' - # Avoiding duplicating code here, so throwing this into a loop. + Raises a ValueError if the point is invalid (e.g., h1_coord < h1_min + when PBC_flag = 'none') + """ + # Avoiding duplicating code here, so throwing this into a loop. is_pbc = [False, False] - if PBC_flag in ['hdim_1', 'both']: + if PBC_flag in ["hdim_1", "both"]: is_pbc[0] = True - if PBC_flag in ['hdim_2', 'both']: + if PBC_flag in ["hdim_2", "both"]: is_pbc[1] = True out_coords = list() - - for point_query, dim_min, dim_max, dim_pbc in zip([h1_coord, h2_coord], - [h1_min, h2_min], - [h1_max, h2_max], - is_pbc): + for point_query, dim_min, dim_max, dim_pbc in zip( + [h1_coord, h2_coord], [h1_min, h2_min], [h1_max, h2_max], is_pbc + ): if point_query >= dim_min and point_query < dim_max: out_coords.append(point_query) continue @@ -625,25 +747,32 @@ def get_single_pbc_coordinate(h1_min, h1_max, h2_min, h2_max, h1_coord, h2_coord if not dim_pbc: raise ValueError("Point invalid!") out_coords.append(point_query - (dim_max - dim_min)) - + return tuple(out_coords) +def get_pbc_coordinates( + h1_min, + h1_max, + h2_min, + h2_max, + h1_start_coord, + h1_end_coord, + h2_start_coord, + h2_end_coord, + PBC_flag="none", +): + """Function to get the *actual* coordinate boxes of interest given a set of shifted + coordinates with periodic boundaries. -def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, - h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord, - PBC_flag = 'none'): - '''Function to get the *actual* coordinate boxes of interest given a set of shifted - coordinates with periodic boundaries. - - For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] (-3, 5, 2,6) with PBC_flag of 'both' or 'hdim_1', h1_max of 10, and h1_min of 0 this function will return: [(0,5,2,6), (7,10,2,6)]. If you pass in something outside the bounds of the array, this will truncate your - requested box. For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + requested box. For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] (-3, 5, 2,6) with PBC_flag of 'none' or 'hdim_2', this function will return: - [(0,5,2,6)], assuming h1_min is 0. + [(0,5,2,6)], assuming h1_min is 0. For cases where PBC_flag is 'both' and we have a corner case, it is possible to get overlapping boundaries. For example, if you pass in (-6, 5, -6, 5) @@ -657,7 +786,7 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, h2_min: int Minimum array value in hdim_2, typically 0. h2_max: int - Maximum array value in hdim_2 (exclusive). h2_max - h2_min should be the size in h2. + Maximum array value in hdim_2 (exclusive). h2_max - h2_min should be the size in h2. h1_start_coord: int Start coordinate in hdim_1. Can be < h1_min if dealing with PBCs. h1_end_coord: int @@ -678,37 +807,36 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, list of tuples A list of tuples containing (h1_start, h1_end, h2_start, h2_end) of each of the boxes needed to encompass the coordinates. - ''' + """ - if PBC_flag not in ['none', 'hdim_1', 'hdim_2', 'both']: + if PBC_flag not in ["none", "hdim_1", "hdim_2", "both"]: raise ValueError("PBC_flag must be 'none', 'hdim_1', 'hdim_2', or 'both'") - h1_start_coords = list() h1_end_coords = list() h2_start_coords = list() h2_end_coords = list() - - # In both of these cases, we just need to truncate the hdim_1 points. - if PBC_flag in ['none', 'hdim_2']: + # In both of these cases, we just need to truncate the hdim_1 points. + if PBC_flag in ["none", "hdim_2"]: h1_start_coords.append(max(h1_min, h1_start_coord)) h1_end_coords.append(min(h1_max, h1_end_coord)) - - + # In both of these cases, we only need to truncate the hdim_2 points. - if PBC_flag in ['none', 'hdim_1']: + if PBC_flag in ["none", "hdim_1"]: h2_start_coords.append(max(h2_min, h2_start_coord)) h2_end_coords.append(min(h2_max, h2_end_coord)) # If the PBC flag is none, we can just return. - if PBC_flag == 'none': - return [(h1_start_coords[0], h1_end_coords[0], h2_start_coords[0], h2_end_coords[0])] + if PBC_flag == "none": + return [ + (h1_start_coords[0], h1_end_coords[0], h2_start_coords[0], h2_end_coords[0]) + ] - # We have at least one periodic boundary. + # We have at least one periodic boundary. - # hdim_1 boundary is periodic. - if PBC_flag in ['hdim_1', 'both']: + # hdim_1 boundary is periodic. + if PBC_flag in ["hdim_1", "both"]: if (h1_end_coord - h1_start_coord) >= (h1_max - h1_min): # In this case, we have selected the full h1 length of the domain, # so we set the start and end coords to just that. @@ -737,8 +865,8 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, else: h1_start_coords.append(h1_start_coord) h1_end_coords.append(h1_end_coord) - - if PBC_flag in ['hdim_2', 'both']: + + if PBC_flag in ["hdim_2", "both"]: if (h2_end_coord - h2_start_coord) >= (h2_max - h2_min): # In this case, we have selected the full h2 length of the domain, # so we set the start and end coords to just that. @@ -769,20 +897,44 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, h2_end_coords.append(h2_end_coord) out_coords = list() - for h1_start_coord_single, h1_end_coord_single in zip(h1_start_coords, h1_end_coords): - for h2_start_coord_single, h2_end_coord_single in zip(h2_start_coords, h2_end_coords): - out_coords.append((h1_start_coord_single, h1_end_coord_single, h2_start_coord_single, h2_end_coord_single)) + for h1_start_coord_single, h1_end_coord_single in zip( + h1_start_coords, h1_end_coords + ): + for h2_start_coord_single, h2_end_coord_single in zip( + h2_start_coords, h2_end_coords + ): + out_coords.append( + ( + h1_start_coord_single, + h1_end_coord_single, + h2_start_coord_single, + h2_end_coord_single, + ) + ) return out_coords -def generate_single_feature(start_h1, start_h2, start_v = None, - spd_h1 = 1, spd_h2 = 1, spd_v = 1, - min_h1 = 0, max_h1 = None, min_h2 = 0, max_h2 = None, - num_frames = 1, dt = datetime.timedelta(minutes=5), - start_date = datetime.datetime(2022,1,1,0), - PBC_flag = 'none', frame_start = 0, feature_num=1, - feature_size = None, threshold_val = None): - '''Function to generate a dummy feature dataframe to test the tracking functionality +def generate_single_feature( + start_h1, + start_h2, + start_v=None, + spd_h1=1, + spd_h2=1, + spd_v=1, + min_h1=0, + max_h1=None, + min_h2=0, + max_h2=None, + num_frames=1, + dt=datetime.timedelta(minutes=5), + start_date=datetime.datetime(2022, 1, 1, 0), + PBC_flag="none", + frame_start=0, + feature_num=1, + feature_size=None, + threshold_val=None, +): + """Function to generate a dummy feature dataframe to test the tracking functionality Parameters ---------- @@ -798,13 +950,13 @@ def generate_single_feature(start_h1, start_h2, start_v = None, Speed (per frame) of the feature in hdim_2 spd_v: float Speed (per frame) of the feature in vdim - min_h1: int - Minimum value of hdim_1 allowed. If PBC_flag is not 'none', then - this will be used to know when to wrap around periodic boundaries. + min_h1: int + Minimum value of hdim_1 allowed. If PBC_flag is not 'none', then + this will be used to know when to wrap around periodic boundaries. If PBC_flag is 'none', features will disappear if they are above/below - these bounds. + these bounds. max_h1: int - Similar to min_h1, but the max value of hdim_1 allowed. + Similar to min_h1, but the max value of hdim_1 allowed. min_h2: int Similar to min_h1, but the minimum value of hdim_2 allowed. max_h2: int @@ -830,11 +982,10 @@ def generate_single_feature(start_h1, start_h2, start_v = None, If None, doesn't set this column threshold_val: float or None Threshold value of this feature - ''' + """ if max_h1 is None or max_h2 is None: - raise ValueError('Max coords must be specified.') - + raise ValueError("Max coords must be specified.") out_list_of_dicts = list() curr_h1 = start_h1 @@ -844,30 +995,31 @@ def generate_single_feature(start_h1, start_h2, start_v = None, is_3D = not (start_v is None) for i in range(num_frames): curr_dict = dict() - curr_h1, curr_h2 = get_single_pbc_coordinate(min_h1, max_h1, min_h2, max_h2, - curr_h1, curr_h2, PBC_flag) - curr_dict['hdim_1'] = curr_h1 - curr_dict['hdim_2'] = curr_h2 - curr_dict['frame'] = frame_start + i + curr_h1, curr_h2 = get_single_pbc_coordinate( + min_h1, max_h1, min_h2, max_h2, curr_h1, curr_h2, PBC_flag + ) + curr_dict["hdim_1"] = curr_h1 + curr_dict["hdim_2"] = curr_h2 + curr_dict["frame"] = frame_start + i if curr_v is not None: - curr_dict['vdim'] = curr_v + curr_dict["vdim"] = curr_v curr_v += spd_v - curr_dict['time'] = curr_dt + curr_dict["time"] = curr_dt curr_dict["feature"] = feature_num + i if feature_size is not None: - curr_dict['num'] = feature_size + curr_dict["num"] = feature_size if threshold_val is not None: - curr_dict['threshold_value'] = threshold_val + curr_dict["threshold_value"] = threshold_val curr_h1 += spd_h1 curr_h2 += spd_h2 curr_dt += dt out_list_of_dicts.append(curr_dict) - return pd.DataFrame.from_dict(out_list_of_dicts) -def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc = False): - '''Gets the start and ending points for a feature given a size and PBC + +def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc=False): + """Gets the start and ending points for a feature given a size and PBC conditions Parameters @@ -891,23 +1043,22 @@ def get_start_end_of_feat(center_point, size, axis_min, axis_max, is_pbc = False Note that if is_pbc is True, start_point can be less than axis_min and end_point can be greater than or equal to axis_max. This is designed to be used with ```get_pbc_coordinates``` - ''' + """ import numpy as np min_pt = int(np.ceil(center_point - size / 2)) - max_pt = int(np.ceil(center_point + size / 2))\ - + max_pt = int(np.ceil(center_point + size / 2)) # adjust points for boundaries, if needed. if min_pt < axis_min and not is_pbc: min_pt = axis_min if max_pt > axis_max and not is_pbc: max_pt = axis_max - + return (min_pt, max_pt) def generate_grid_coords(min_max_coords, lengths): - '''Generates a grid of coordinates, such as fake lat/lons for testing. + """Generates a grid of coordinates, such as fake lat/lons for testing. Parameters ---------- @@ -924,21 +1075,30 @@ def generate_grid_coords(min_max_coords, lengths): 1, 2, or 3 array-likes array-like of grid coordinates in the number of dimensions requested and with the number of arrays specified (meshed coordinates) - - ''' + + """ import numpy as np - if len(min_max_coords) != len(lengths)*2: - raise ValueError("The length of min_max_coords must be exactly 2 times" - " the length of lengths.") + + if len(min_max_coords) != len(lengths) * 2: + raise ValueError( + "The length of min_max_coords must be exactly 2 times" + " the length of lengths." + ) if len(lengths) == 1: - return np.mgrid[min_max_coords[0]:min_max_coords[1]:complex(imag=lengths[0])] + return np.mgrid[ + min_max_coords[0] : min_max_coords[1] : complex(imag=lengths[0]) + ] if len(lengths) == 2: - return np.mgrid[min_max_coords[0]:min_max_coords[1]:complex(imag=lengths[0]), - min_max_coords[2]:min_max_coords[3]:complex(imag=lengths[1])] - + return np.mgrid[ + min_max_coords[0] : min_max_coords[1] : complex(imag=lengths[0]), + min_max_coords[2] : min_max_coords[3] : complex(imag=lengths[1]), + ] + if len(lengths) == 3: - return np.mgrid[min_max_coords[0]:min_max_coords[1]:complex(imag=lengths[0]), - min_max_coords[2]:min_max_coords[3]:complex(imag=lengths[1]), - min_max_coords[4]:min_max_coords[5]:complex(imag=lengths[2])] \ No newline at end of file + return np.mgrid[ + min_max_coords[0] : min_max_coords[1] : complex(imag=lengths[0]), + min_max_coords[2] : min_max_coords[3] : complex(imag=lengths[1]), + min_max_coords[4] : min_max_coords[5] : complex(imag=lengths[2]), + ] diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index c0a4f9ba..d5838fe6 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -5,9 +5,9 @@ def test_feature_detection_multithreshold_timestep(): - ''' + """ Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep - ''' + """ import numpy as np from tobac import testing from tobac import feature_detection @@ -50,26 +50,69 @@ def test_feature_detection_multithreshold_timestep(): @pytest.mark.parametrize( "feature_1_loc, feature_2_loc, dxy, dz, min_distance," " add_x_coords, add_y_coords," - "add_z_coords, PBC_flag, expect_feature_1, expect_feature_2", - [((0,0,0,4,1), (1,1,1,4,1), 1000, 100, 1, False, False, False, - 'none', True, True), - ((0,0,0,4,1), (1,1,1,3,1), 1000, 100, 5000, False, False, False, - 'none', True, False), - ((0,0,0,4,2), (1,1,1,10,1), 1000, 100, 5000, False, False, False, - 'none', True, False), - - ] + "add_z_coords, PBC_flag, expect_feature_1, expect_feature_2", + [ + ( + (0, 0, 0, 4, 1), + (1, 1, 1, 4, 1), + 1000, + 100, + 1, + False, + False, + False, + "none", + True, + True, + ), + ( + (0, 0, 0, 4, 1), + (1, 1, 1, 3, 1), + 1000, + 100, + 5000, + False, + False, + False, + "none", + True, + False, + ), + ( + (0, 0, 0, 4, 2), + (1, 1, 1, 10, 1), + 1000, + 100, + 5000, + False, + False, + False, + "none", + True, + False, + ), + ], ) -def test_filter_min_distance(feature_1_loc, feature_2_loc, dxy, dz, - min_distance, add_x_coords, add_y_coords, - add_z_coords, PBC_flag, expect_feature_1, expect_feature_2): - '''Tests tobac.feature_detection.filter_min_distance +def test_filter_min_distance( + feature_1_loc, + feature_2_loc, + dxy, + dz, + min_distance, + add_x_coords, + add_y_coords, + add_z_coords, + PBC_flag, + expect_feature_1, + expect_feature_2, +): + """Tests tobac.feature_detection.filter_min_distance Parameters ---------- feature_1_loc: tuple, length of 4 or 5 - Feature 1 location, num, and threshold value (assumes a 100 x 100 x 100 grid). - Assumes z, y, x, num, threshold_value for 3D where num is the size/ 'num' - column of the feature and threshold_value is the threshold_value. + Feature 1 location, num, and threshold value (assumes a 100 x 100 x 100 grid). + Assumes z, y, x, num, threshold_value for 3D where num is the size/ 'num' + column of the feature and threshold_value is the threshold_value. If 2D, assumes y, x, num, threshold_value. feature_2_loc: tuple, length of 4 or 5 Feature 2 location, same format and length as `feature_1_loc` @@ -81,7 +124,7 @@ def test_filter_min_distance(feature_1_loc, feature_2_loc, dxy, dz, Minimum distance between features (m) add_x_coords: bool Whether or not to add x coordinates - add_y_coords: bool + add_y_coords: bool Whether or not to add y coordinates add_z_coords: bool Whether or not to add z coordinates @@ -95,7 +138,7 @@ def test_filter_min_distance(feature_1_loc, feature_2_loc, dxy, dz, True if we expect feature 1 to remain, false if we expect it gone. expect_feature_2: bool True if we expect feature 2 to remain, false if we expect it gone. - ''' + """ import pandas as pd import numpy as np @@ -106,36 +149,35 @@ def test_filter_min_distance(feature_1_loc, feature_2_loc, dxy, dz, assumed_dxy = 100 assumed_dz = 100 - x_coord_name = 'projection_coord_x' - y_coord_name = 'projection_coord_y' - z_coord_name = 'projection_coord_z' + x_coord_name = "projection_coord_x" + y_coord_name = "projection_coord_y" + z_coord_name = "projection_coord_z" is_3D = len(feature_1_loc) == 5 start_size_loc = 3 if is_3D else 2 start_h1_loc = 1 if is_3D else 0 feat_opts_f1 = { - 'start_h1': feature_1_loc[start_h1_loc], - 'start_h2': feature_1_loc[start_h1_loc+1], - 'max_h1': h1_max, - 'max_h2': h2_max, - 'feature_size': feature_1_loc[start_size_loc], - 'threshold_val': feature_1_loc[start_size_loc+1], - 'feature_num': 1, + "start_h1": feature_1_loc[start_h1_loc], + "start_h2": feature_1_loc[start_h1_loc + 1], + "max_h1": h1_max, + "max_h2": h2_max, + "feature_size": feature_1_loc[start_size_loc], + "threshold_val": feature_1_loc[start_size_loc + 1], + "feature_num": 1, } feat_opts_f2 = { - 'start_h1': feature_2_loc[start_h1_loc], - 'start_h2': feature_2_loc[start_h1_loc+1], - 'max_h1': h1_max, - 'max_h2': h2_max, - 'feature_size': feature_2_loc[start_size_loc], - 'threshold_val': feature_2_loc[start_size_loc+1], - 'feature_num': 2, + "start_h1": feature_2_loc[start_h1_loc], + "start_h2": feature_2_loc[start_h1_loc + 1], + "max_h1": h1_max, + "max_h2": h2_max, + "feature_size": feature_2_loc[start_size_loc], + "threshold_val": feature_2_loc[start_size_loc + 1], + "feature_num": 2, } if is_3D: - feat_opts_f1['start_v'] = feature_1_loc[0] - feat_opts_f2['start_v'] = feature_2_loc[0] - + feat_opts_f1["start_v"] = feature_1_loc[0] + feat_opts_f2["start_v"] = feature_2_loc[0] feat_1_interp = tbtest.generate_single_feature(**feat_opts_f1) feat_2_interp = tbtest.generate_single_feature(**feat_opts_f2) @@ -145,44 +187,52 @@ def test_filter_min_distance(feature_1_loc, feature_2_loc, dxy, dz, filter_dist_opts = dict() if add_x_coords: - feat_combined[x_coord_name] = feat_combined['hdim_2'] * assumed_dxy - filter_dist_opts['x_coordinate_name'] = x_coord_name + feat_combined[x_coord_name] = feat_combined["hdim_2"] * assumed_dxy + filter_dist_opts["x_coordinate_name"] = x_coord_name if add_y_coords: - feat_combined[y_coord_name] = feat_combined['hdim_1'] * assumed_dxy - filter_dist_opts['y_coordinate_name'] = y_coord_name + feat_combined[y_coord_name] = feat_combined["hdim_1"] * assumed_dxy + filter_dist_opts["y_coordinate_name"] = y_coord_name if add_z_coords and is_3D: - feat_combined[z_coord_name] = feat_combined['vdim'] * assumed_dz - filter_dist_opts['z_coordinate_name'] = z_coord_name + feat_combined[z_coord_name] = feat_combined["vdim"] * assumed_dz + filter_dist_opts["z_coordinate_name"] = z_coord_name filter_dist_opts = { - 'features': feat_combined, - 'dxy': dxy, - 'dz': dz, - 'min_distance': min_distance, - 'PBC_flag': PBC_flag, + "features": feat_combined, + "dxy": dxy, + "dz": dz, + "min_distance": min_distance, + "PBC_flag": PBC_flag, } out_feats = feat_detect.filter_min_distance(**filter_dist_opts) - assert expect_feature_1 == (np.sum(out_feats['feature']==1)==1) - assert expect_feature_2 == (np.sum(out_feats['feature']==2)==1) - -@pytest.mark.parametrize("test_dset_size, vertical_axis_num, " - "vertical_coord_name," - " vertical_coord_opt, expected_raise", - [((1,20,30,40), 1, 'altitude', 'auto', False), - ((1,20,30,40), 2, 'altitude', 'auto', False), - ((1,20,30,40), 3, 'altitude', 'auto', False), - ((1,20,30,40), 1, 'air_pressure', 'air_pressure', False), - ((1,20,30,40), 1, 'air_pressure', 'auto', True), - ((1,20,30,40), 1, 'model_level_number', 'auto', False), - ((1,20,30,40), 1, 'altitude', 'auto', False), - ((1,20,30,40), 1, 'geopotential_height', 'auto', False) - ] + assert expect_feature_1 == (np.sum(out_feats["feature"] == 1) == 1) + assert expect_feature_2 == (np.sum(out_feats["feature"] == 2) == 1) + + +@pytest.mark.parametrize( + "test_dset_size, vertical_axis_num, " + "vertical_coord_name," + " vertical_coord_opt, expected_raise", + [ + ((1, 20, 30, 40), 1, "altitude", "auto", False), + ((1, 20, 30, 40), 2, "altitude", "auto", False), + ((1, 20, 30, 40), 3, "altitude", "auto", False), + ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False), + ((1, 20, 30, 40), 1, "air_pressure", "auto", True), + ((1, 20, 30, 40), 1, "model_level_number", "auto", False), + ((1, 20, 30, 40), 1, "altitude", "auto", False), + ((1, 20, 30, 40), 1, "geopotential_height", "auto", False), + ], ) -def test_feature_detection_multiple_z_coords(test_dset_size, vertical_axis_num, vertical_coord_name, - vertical_coord_opt, expected_raise): - '''Tests ```tobac.feature_detection.feature_detection_multithreshold``` +def test_feature_detection_multiple_z_coords( + test_dset_size, + vertical_axis_num, + vertical_coord_name, + vertical_coord_opt, + expected_raise, +): + """Tests ```tobac.feature_detection.feature_detection_multithreshold``` with different axes Parameters @@ -192,47 +242,49 @@ def test_feature_detection_multiple_z_coords(test_dset_size, vertical_axis_num, vertical_axis_num: int (0-2, inclusive) Which axis in test_dset_size is the vertical axis vertical_coord_name: str - Name of the vertical coordinate. + Name of the vertical coordinate. vertical_coord_opt: str What to pass in as the vertical coordinate option to segmentation_timestep expected_raise: bool True if we expect a ValueError to be raised, false otherwise - ''' + """ import numpy as np - # First, just check that input and output shapes are the same. - test_dxy = 1000 - test_vdim_pt_1 = 8 + # First, just check that input and output shapes are the same. + test_dxy = 1000 + test_vdim_pt_1 = 8 test_hdim_1_pt_1 = 12 test_hdim_2_pt_1 = 12 test_data = np.zeros(test_dset_size) test_data[0, 0:5, 0:5, 0:5] = 3 common_dset_opts = { - 'in_arr': test_data, - 'data_type': 'iris', - 'z_dim_name': vertical_coord_name + "in_arr": test_data, + "data_type": "iris", + "z_dim_name": vertical_coord_name, } if vertical_axis_num == 1: test_data_iris = tbtest.make_dataset_from_arr( - time_dim_num = 0, z_dim_num=1, y_dim_num=2, x_dim_num=3, **common_dset_opts + time_dim_num=0, z_dim_num=1, y_dim_num=2, x_dim_num=3, **common_dset_opts ) elif vertical_axis_num == 2: test_data_iris = tbtest.make_dataset_from_arr( - time_dim_num = 0, z_dim_num=2, y_dim_num=1, x_dim_num=3, **common_dset_opts + time_dim_num=0, z_dim_num=2, y_dim_num=1, x_dim_num=3, **common_dset_opts ) elif vertical_axis_num == 3: test_data_iris = tbtest.make_dataset_from_arr( - time_dim_num = 0, z_dim_num=3, y_dim_num=1, x_dim_num=2, **common_dset_opts + time_dim_num=0, z_dim_num=3, y_dim_num=1, x_dim_num=2, **common_dset_opts ) if not expected_raise: out_df = feat_detect.feature_detection_multithreshold( field_in=test_data_iris, dxy=test_dxy, - threshold=[1.5,], - vertical_coord=vertical_coord_opt + threshold=[ + 1.5, + ], + vertical_coord=vertical_coord_opt, ) - # Check that the vertical coordinate is returned. + # Check that the vertical coordinate is returned. print(out_df.columns) assert vertical_coord_name in out_df else: @@ -241,6 +293,8 @@ def test_feature_detection_multiple_z_coords(test_dset_size, vertical_axis_num, out_seg_mask, out_df = feat_detect.feature_detection_multithreshold( field_in=test_data_iris, dxy=test_dxy, - threshold=[1.5,], - vertical_coord=vertical_coord_opt + threshold=[ + 1.5, + ], + vertical_coord=vertical_coord_opt, ) diff --git a/tobac/tests/test_import.py b/tobac/tests/test_import.py index 92e44ec2..5d5b7c68 100644 --- a/tobac/tests/test_import.py +++ b/tobac/tests/test_import.py @@ -1,5 +1,6 @@ import pytest import tobac + def test_dummy_function(): - assert 1==1 + assert 1 == 1 diff --git a/tobac/tests/test_sample_data.py b/tobac/tests/test_sample_data.py index e43f3225..a94e7c8a 100644 --- a/tobac/tests/test_sample_data.py +++ b/tobac/tests/test_sample_data.py @@ -1,63 +1,89 @@ """ Tests for tobac based on simple sample datasets with moving blobs. These tests should be adapted to be more modular in the future. """ -from tobac.testing import make_sample_data_2D_3blobs, make_sample_data_2D_3blobs_inv, make_sample_data_3D_3blobs -from tobac import feature_detection_multithreshold,linking_trackpy,get_spacings,segmentation_2D, segmentation_3D -from iris.analysis import MEAN,MAX,MIN +from tobac.testing import ( + make_sample_data_2D_3blobs, + make_sample_data_2D_3blobs_inv, + make_sample_data_3D_3blobs, +) +from tobac import ( + feature_detection_multithreshold, + linking_trackpy, + get_spacings, + segmentation_2D, + segmentation_3D, +) +from iris.analysis import MEAN, MAX, MIN from pandas.testing import assert_frame_equal from numpy.testing import assert_allclose import pandas as pd + def test_sample_data(): """ Test to make sure that sample datasets in the following tests are set up the right way """ - sample_data=make_sample_data_2D_3blobs() - sample_data_inv=make_sample_data_2D_3blobs_inv() - - assert sample_data.coord('projection_x_coordinate')==sample_data_inv.coord('projection_x_coordinate') - assert sample_data.coord('projection_y_coordinate')==sample_data_inv.coord('projection_y_coordinate') - assert sample_data.coord('time')==sample_data_inv.coord('time') - minimum=sample_data.collapsed(('time','projection_x_coordinate','projection_y_coordinate'),MIN).data - minimum_inv=sample_data_inv.collapsed(('time','projection_x_coordinate','projection_y_coordinate'),MIN).data - assert_allclose(minimum,minimum_inv) - mean=sample_data.collapsed(('time','projection_x_coordinate','projection_y_coordinate'),MEAN).data - mean_inv=sample_data_inv.collapsed(('time','projection_x_coordinate','projection_y_coordinate'),MEAN).data - assert_allclose(mean,mean_inv) + sample_data = make_sample_data_2D_3blobs() + sample_data_inv = make_sample_data_2D_3blobs_inv() + + assert sample_data.coord("projection_x_coordinate") == sample_data_inv.coord( + "projection_x_coordinate" + ) + assert sample_data.coord("projection_y_coordinate") == sample_data_inv.coord( + "projection_y_coordinate" + ) + assert sample_data.coord("time") == sample_data_inv.coord("time") + minimum = sample_data.collapsed( + ("time", "projection_x_coordinate", "projection_y_coordinate"), MIN + ).data + minimum_inv = sample_data_inv.collapsed( + ("time", "projection_x_coordinate", "projection_y_coordinate"), MIN + ).data + assert_allclose(minimum, minimum_inv) + mean = sample_data.collapsed( + ("time", "projection_x_coordinate", "projection_y_coordinate"), MEAN + ).data + mean_inv = sample_data_inv.collapsed( + ("time", "projection_x_coordinate", "projection_y_coordinate"), MEAN + ).data + assert_allclose(mean, mean_inv) + def test_tracking_coord_order(): """ Test a tracking applications to make sure that coordinate order does not lead to different results """ - sample_data=make_sample_data_2D_3blobs() - sample_data_inv=make_sample_data_2D_3blobs_inv() + sample_data = make_sample_data_2D_3blobs() + sample_data_inv = make_sample_data_2D_3blobs_inv() # Keyword arguments for feature detection step: - parameters_features={} - parameters_features['position_threshold']='weighted_diff' - parameters_features['sigma_threshold']=0.5 - parameters_features['min_num']=3 - parameters_features['min_distance']=0 - parameters_features['sigma_threshold']=1 - parameters_features['threshold']=[3,5,10] #m/s - parameters_features['n_erosion_threshold']=0 - parameters_features['n_min_threshold']=3 - - #calculate dxy,dt - dxy,dt=get_spacings(sample_data) - dxy_inv,dt_inv=get_spacings(sample_data_inv) - - #Test that dt and dxy are the same for different order of coordinates - assert_allclose(dxy,dxy_inv) - assert_allclose(dt,dt_inv) - - #Test that dt and dxy are as expected - assert_allclose(dt,60) - assert_allclose(dxy,1000) - - #Find features - Features=feature_detection_multithreshold(sample_data,dxy,**parameters_features) - Features_inv=feature_detection_multithreshold(sample_data_inv,dxy_inv,**parameters_features) - + parameters_features = {} + parameters_features["position_threshold"] = "weighted_diff" + parameters_features["sigma_threshold"] = 0.5 + parameters_features["min_num"] = 3 + parameters_features["min_distance"] = 0 + parameters_features["sigma_threshold"] = 1 + parameters_features["threshold"] = [3, 5, 10] # m/s + parameters_features["n_erosion_threshold"] = 0 + parameters_features["n_min_threshold"] = 3 + + # calculate dxy,dt + dxy, dt = get_spacings(sample_data) + dxy_inv, dt_inv = get_spacings(sample_data_inv) + + # Test that dt and dxy are the same for different order of coordinates + assert_allclose(dxy, dxy_inv) + assert_allclose(dt, dt_inv) + + # Test that dt and dxy are as expected + assert_allclose(dt, 60) + assert_allclose(dxy, 1000) + + # Find features + Features = feature_detection_multithreshold(sample_data, dxy, **parameters_features) + Features_inv = feature_detection_multithreshold( + sample_data_inv, dxy_inv, **parameters_features + ) + # Assert that output of feature detection not empty: assert type(Features) == pd.core.frame.DataFrame assert type(Features_inv) == pd.core.frame.DataFrame @@ -65,93 +91,109 @@ def test_tracking_coord_order(): assert not Features_inv.empty # perform watershedding segmentation - parameters_segmentation={} - parameters_segmentation['target']='maximum' - parameters_segmentation['method']='watershed' - - - segmentation_mask,features_segmentation=segmentation_2D(Features,sample_data,dxy=dxy,**parameters_segmentation) - segmentation_mask_inv,features_segmentation=segmentation_2D(Features_inv,sample_data_inv,dxy=dxy_inv,**parameters_segmentation) - + parameters_segmentation = {} + parameters_segmentation["target"] = "maximum" + parameters_segmentation["method"] = "watershed" + + segmentation_mask, features_segmentation = segmentation_2D( + Features, sample_data, dxy=dxy, **parameters_segmentation + ) + segmentation_mask_inv, features_segmentation = segmentation_2D( + Features_inv, sample_data_inv, dxy=dxy_inv, **parameters_segmentation + ) + # perform trajectory linking - parameters_linking={} - parameters_linking['method_linking']='predict' - parameters_linking['adaptive_stop']=0.2 - parameters_linking['adaptive_step']=0.95 - parameters_linking['extrapolate']=0 - parameters_linking['order']=1 - parameters_linking['subnetwork_size']=100 - parameters_linking['memory']=0 - parameters_linking['time_cell_min']=5*60 - parameters_linking['method_linking']='predict' - parameters_linking['v_max']=100 - parameters_linking['d_min']=2000 - - Track=linking_trackpy(Features,sample_data,dt=dt,dxy=dxy,**parameters_linking) - Track_inv=linking_trackpy(Features_inv,sample_data_inv,dt=dt_inv,dxy=dxy_inv,**parameters_linking) - + parameters_linking = {} + parameters_linking["method_linking"] = "predict" + parameters_linking["adaptive_stop"] = 0.2 + parameters_linking["adaptive_step"] = 0.95 + parameters_linking["extrapolate"] = 0 + parameters_linking["order"] = 1 + parameters_linking["subnetwork_size"] = 100 + parameters_linking["memory"] = 0 + parameters_linking["time_cell_min"] = 5 * 60 + parameters_linking["method_linking"] = "predict" + parameters_linking["v_max"] = 100 + parameters_linking["d_min"] = 2000 + + Track = linking_trackpy(Features, sample_data, dt=dt, dxy=dxy, **parameters_linking) + Track_inv = linking_trackpy( + Features_inv, sample_data_inv, dt=dt_inv, dxy=dxy_inv, **parameters_linking + ) + + def test_tracking_3D(): """ Test a tracking applications to make sure that coordinate order does not lead to different results """ - sample_data=make_sample_data_3D_3blobs() - sample_data_inv=make_sample_data_3D_3blobs(invert_xy=True) + sample_data = make_sample_data_3D_3blobs() + sample_data_inv = make_sample_data_3D_3blobs(invert_xy=True) # Keyword arguments for feature detection step: - parameters_features={} - parameters_features['position_threshold']='weighted_diff' - parameters_features['sigma_threshold']=0.5 - parameters_features['min_num']=3 - parameters_features['min_distance']=0 - parameters_features['sigma_threshold']=1 - parameters_features['threshold']=[3,5,10] #m/s - parameters_features['n_erosion_threshold']=0 - parameters_features['n_min_threshold']=3 - - sample_data_max=sample_data.collapsed('geopotential_height',MAX) - sample_data_max_inv=sample_data.collapsed('geopotential_height',MAX) - - #calculate dxy,dt - dxy,dt=get_spacings(sample_data_max) - dxy_inv,dt_inv=get_spacings(sample_data_max_inv) - - #Test that dt and dxy are the same for different order of coordinates - assert_allclose(dxy,dxy_inv) - assert_allclose(dt,dt_inv) - - #Test that dt and dxy are as expected - assert_allclose(dt,120) - assert_allclose(dxy,1000) - - #Find features - Features=feature_detection_multithreshold(sample_data_max,dxy,**parameters_features) - Features_inv=feature_detection_multithreshold(sample_data_max_inv,dxy_inv,**parameters_features) + parameters_features = {} + parameters_features["position_threshold"] = "weighted_diff" + parameters_features["sigma_threshold"] = 0.5 + parameters_features["min_num"] = 3 + parameters_features["min_distance"] = 0 + parameters_features["sigma_threshold"] = 1 + parameters_features["threshold"] = [3, 5, 10] # m/s + parameters_features["n_erosion_threshold"] = 0 + parameters_features["n_min_threshold"] = 3 + + sample_data_max = sample_data.collapsed("geopotential_height", MAX) + sample_data_max_inv = sample_data.collapsed("geopotential_height", MAX) + + # calculate dxy,dt + dxy, dt = get_spacings(sample_data_max) + dxy_inv, dt_inv = get_spacings(sample_data_max_inv) + + # Test that dt and dxy are the same for different order of coordinates + assert_allclose(dxy, dxy_inv) + assert_allclose(dt, dt_inv) + + # Test that dt and dxy are as expected + assert_allclose(dt, 120) + assert_allclose(dxy, 1000) + + # Find features + Features = feature_detection_multithreshold( + sample_data_max, dxy, **parameters_features + ) + Features_inv = feature_detection_multithreshold( + sample_data_max_inv, dxy_inv, **parameters_features + ) # perform watershedding segmentation - parameters_segmentation={} - parameters_segmentation['target']='maximum' - parameters_segmentation['method']='watershed' + parameters_segmentation = {} + parameters_segmentation["target"] = "maximum" + parameters_segmentation["method"] = "watershed" + + segmentation_mask, features_segmentation = segmentation_3D( + Features, sample_data_max, dxy=dxy, **parameters_segmentation + ) + segmentation_mask_inv, features_segmentation = segmentation_3D( + Features_inv, sample_data_max_inv, dxy=dxy_inv, **parameters_segmentation + ) - segmentation_mask,features_segmentation=segmentation_3D(Features,sample_data_max,dxy=dxy,**parameters_segmentation) - segmentation_mask_inv,features_segmentation=segmentation_3D(Features_inv,sample_data_max_inv,dxy=dxy_inv,**parameters_segmentation) - # perform trajectory linking - parameters_linking={} - parameters_linking['method_linking']='predict' - parameters_linking['adaptive_stop']=0.2 - parameters_linking['adaptive_step']=0.95 - parameters_linking['extrapolate']=0 - parameters_linking['order']=1 - parameters_linking['subnetwork_size']=100 - parameters_linking['memory']=0 - parameters_linking['time_cell_min']=5*60 - parameters_linking['method_linking']='predict' - parameters_linking['v_max']=100 - parameters_linking['d_min']=2000 - - Track=linking_trackpy(Features,sample_data,dt=dt,dxy=dxy,**parameters_linking) - Track_inv=linking_trackpy(Features_inv,sample_data_inv,dt=dt_inv,dxy=dxy_inv,**parameters_linking) + parameters_linking = {} + parameters_linking["method_linking"] = "predict" + parameters_linking["adaptive_stop"] = 0.2 + parameters_linking["adaptive_step"] = 0.95 + parameters_linking["extrapolate"] = 0 + parameters_linking["order"] = 1 + parameters_linking["subnetwork_size"] = 100 + parameters_linking["memory"] = 0 + parameters_linking["time_cell_min"] = 5 * 60 + parameters_linking["method_linking"] = "predict" + parameters_linking["v_max"] = 100 + parameters_linking["d_min"] = 2000 + + Track = linking_trackpy(Features, sample_data, dt=dt, dxy=dxy, **parameters_linking) + Track_inv = linking_trackpy( + Features_inv, sample_data_inv, dt=dt_inv, dxy=dxy_inv, **parameters_linking + ) # Assert that output of feature detection not empty: assert not Track.empty diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py index c31f4062..36cbf0fa 100644 --- a/tobac/tests/test_segmentation.py +++ b/tobac/tests/test_segmentation.py @@ -3,13 +3,14 @@ import tobac.testing as testing import tobac.segmentation as seg + def test_segmentation_timestep_2D_feature_2D_seg(): - ''' Tests `tobac.segmentation.segmentation_timestep` with a 2D + """Tests `tobac.segmentation.segmentation_timestep` with a 2D input feature and a 2D segmentation array - ''' - # Before we can run segmentation, we must run feature detection. + """ + # Before we can run segmentation, we must run feature detection. - # start by building a simple dataset with a single feature + # start by building a simple dataset with a single feature import numpy as np test_dset_size = (50, 50) @@ -36,17 +37,25 @@ def test_segmentation_timestep_2D_feature_2D_seg(): ) test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset - test_feature_ds = testing.generate_single_feature(start_h1 = 20.0, start_h2 = 20.0, - max_h1 = 1000, max_h2 = 1000) - - out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, - features_in = test_feature_ds, dxy = test_dxy, - threshold = 1.5, PBC_flag='none', ) - - # Make sure that all labeled points are segmented - assert np.all(out_seg_mask.core_data()[hdim_1_start_feat:hdim_1_end_feat, - hdim_2_start_feat:hdim_2_end_feat] == np.ones((test_hdim_1_sz, test_hdim_2_sz))) + test_feature_ds = testing.generate_single_feature( + start_h1=20.0, start_h2=20.0, max_h1=1000, max_h2=1000 + ) + + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=1.5, + PBC_flag="none", + ) + # Make sure that all labeled points are segmented + assert np.all( + out_seg_mask.core_data()[ + hdim_1_start_feat:hdim_1_end_feat, hdim_2_start_feat:hdim_2_end_feat + ] + == np.ones((test_hdim_1_sz, test_hdim_2_sz)) + ) # Now try PBCs # First, something stretching across hdim_1 @@ -54,7 +63,7 @@ def test_segmentation_timestep_2D_feature_2D_seg(): test_data = np.zeros(test_dset_size) # Note that PBC flag here is 'both' as we still want the blob to be on both - # sides of the boundary to see if we accidentally grab it without PBC + # sides of the boundary to see if we accidentally grab it without PBC # segmentation test_data = testing.make_feature_blob( test_data, @@ -63,41 +72,58 @@ def test_segmentation_timestep_2D_feature_2D_seg(): h1_size=test_hdim_1_sz, h2_size=test_hdim_2_sz, amplitude=test_amp, - PBC_flag = 'both' + PBC_flag="both", ) test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset - test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, - start_h2 = test_hdim_2_pt, - max_h1 = 1000, max_h2 = 1000 - ) - - hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, - test_hdim_1_sz, 0,test_dset_size[0], - is_pbc = True ) - - for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: - out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, - features_in = test_feature_ds, dxy = test_dxy, - threshold = test_amp-0.5, PBC_flag=pbc_option, ) + test_feature_ds = testing.generate_single_feature( + start_h1=test_hdim_1_pt, start_h2=test_hdim_2_pt, max_h1=1000, max_h2=1000 + ) + + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat( + test_hdim_1_pt, test_hdim_1_sz, 0, test_dset_size[0], is_pbc=True + ) + + for pbc_option in ["none", "hdim_1", "hdim_2", "both"]: + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=test_amp - 0.5, + PBC_flag=pbc_option, + ) # This will automatically give the appropriate box, and it's tested separately. - segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], - 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, - hdim_2_end_feat, PBC_flag=pbc_option) + segmented_box_expected = testing.get_pbc_coordinates( + 0, + test_dset_size[0], + 0, + test_dset_size[1], + hdim_1_start_feat, + hdim_1_end_feat, + hdim_2_start_feat, + hdim_2_end_feat, + PBC_flag=pbc_option, + ) # Make sure that all labeled points are segmented for seg_box in segmented_box_expected: - assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], - seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) - - if pbc_option in ['none', 'hdim_2']: - #there will only be one seg_box - assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == - np.sum(np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2])))) + assert np.all( + out_seg_mask.core_data()[ + seg_box[0] : seg_box[1], seg_box[2] : seg_box[3] + ] + == np.ones((seg_box[1] - seg_box[0], seg_box[3] - seg_box[2])) + ) + + if pbc_option in ["none", "hdim_2"]: + # there will only be one seg_box + assert np.sum( + out_seg_mask.core_data()[out_seg_mask.core_data() == 1] + ) == np.sum(np.ones((seg_box[1] - seg_box[0], seg_box[3] - seg_box[2]))) else: # We should be capturing the whole feature - assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == - np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) + assert np.sum( + out_seg_mask.core_data()[out_seg_mask.core_data() == 1] + ) == np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz))) # Same as the above test, but for hdim_2 # First, try the cases where we shouldn't get the points on the opposite @@ -112,44 +138,60 @@ def test_segmentation_timestep_2D_feature_2D_seg(): h1_size=test_hdim_1_sz, h2_size=test_hdim_2_sz, amplitude=test_amp, - PBC_flag = 'both' + PBC_flag="both", ) test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset - test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, - start_h2 = test_hdim_2_pt, - max_h1 = 1000, max_h2 = 1000) - hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, - test_hdim_1_sz, 0,test_dset_size[0], - is_pbc = True ) - - hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat(test_hdim_2_pt, - test_hdim_2_sz, 0,test_dset_size[1], - is_pbc = True ) - - for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: - out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, - features_in = test_feature_ds, dxy = test_dxy, - threshold = test_amp-0.5, PBC_flag=pbc_option, ) + test_feature_ds = testing.generate_single_feature( + start_h1=test_hdim_1_pt, start_h2=test_hdim_2_pt, max_h1=1000, max_h2=1000 + ) + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat( + test_hdim_1_pt, test_hdim_1_sz, 0, test_dset_size[0], is_pbc=True + ) + + hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat( + test_hdim_2_pt, test_hdim_2_sz, 0, test_dset_size[1], is_pbc=True + ) + + for pbc_option in ["none", "hdim_1", "hdim_2", "both"]: + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=test_amp - 0.5, + PBC_flag=pbc_option, + ) # This will automatically give the appropriate box(es), and it's tested separately. - segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], - 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, - hdim_2_end_feat, PBC_flag=pbc_option) + segmented_box_expected = testing.get_pbc_coordinates( + 0, + test_dset_size[0], + 0, + test_dset_size[1], + hdim_1_start_feat, + hdim_1_end_feat, + hdim_2_start_feat, + hdim_2_end_feat, + PBC_flag=pbc_option, + ) # Make sure that all labeled points are segmented for seg_box in segmented_box_expected: - assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], - seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) - - if pbc_option in ['none', 'hdim_1']: - #there will only be one seg_box - assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == - np.sum(np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2])))) + assert np.all( + out_seg_mask.core_data()[ + seg_box[0] : seg_box[1], seg_box[2] : seg_box[3] + ] + == np.ones((seg_box[1] - seg_box[0], seg_box[3] - seg_box[2])) + ) + + if pbc_option in ["none", "hdim_1"]: + # there will only be one seg_box + assert np.sum( + out_seg_mask.core_data()[out_seg_mask.core_data() == 1] + ) == np.sum(np.ones((seg_box[1] - seg_box[0], seg_box[3] - seg_box[2]))) else: # We should be capturing the whole feature - assert (np.sum(out_seg_mask.core_data()[out_seg_mask.core_data()==1]) == - np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz)))) - - + assert np.sum( + out_seg_mask.core_data()[out_seg_mask.core_data() == 1] + ) == np.sum(np.ones((test_hdim_1_sz, test_hdim_2_sz))) # Same as the above test, but for hdim_2 # First, try the cases where we shouldn't get the points on the opposite @@ -164,37 +206,53 @@ def test_segmentation_timestep_2D_feature_2D_seg(): h1_size=test_hdim_1_sz, h2_size=test_hdim_2_sz, amplitude=test_amp, - PBC_flag = 'both' + PBC_flag="both", ) test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris") # Generate dummy feature dataset - test_feature_ds = testing.generate_single_feature(start_h1 = test_hdim_1_pt, - start_h2 = test_hdim_2_pt, - max_h1 = 1000, max_h2 = 1000) - hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat(test_hdim_1_pt, - test_hdim_1_sz, 0,test_dset_size[0], - is_pbc = True ) - - hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat(test_hdim_2_pt, - test_hdim_2_sz, 0,test_dset_size[1], - is_pbc = True ) - - for pbc_option in ['none', 'hdim_1', 'hdim_2', 'both']: - out_seg_mask, out_df = seg.segmentation_timestep(field_in = test_data_iris, - features_in = test_feature_ds, dxy = test_dxy, - threshold = test_amp-0.5, PBC_flag=pbc_option, ) + test_feature_ds = testing.generate_single_feature( + start_h1=test_hdim_1_pt, start_h2=test_hdim_2_pt, max_h1=1000, max_h2=1000 + ) + hdim_1_start_feat, hdim_1_end_feat = testing.get_start_end_of_feat( + test_hdim_1_pt, test_hdim_1_sz, 0, test_dset_size[0], is_pbc=True + ) + + hdim_2_start_feat, hdim_2_end_feat = testing.get_start_end_of_feat( + test_hdim_2_pt, test_hdim_2_sz, 0, test_dset_size[1], is_pbc=True + ) + + for pbc_option in ["none", "hdim_1", "hdim_2", "both"]: + out_seg_mask, out_df = seg.segmentation_timestep( + field_in=test_data_iris, + features_in=test_feature_ds, + dxy=test_dxy, + threshold=test_amp - 0.5, + PBC_flag=pbc_option, + ) # This will automatically give the appropriate box(es), and it's tested separately. - segmented_box_expected = testing.get_pbc_coordinates(0, test_dset_size[0], - 0, test_dset_size[1], hdim_1_start_feat, hdim_1_end_feat, hdim_2_start_feat, - hdim_2_end_feat, PBC_flag=pbc_option) + segmented_box_expected = testing.get_pbc_coordinates( + 0, + test_dset_size[0], + 0, + test_dset_size[1], + hdim_1_start_feat, + hdim_1_end_feat, + hdim_2_start_feat, + hdim_2_end_feat, + PBC_flag=pbc_option, + ) # Make sure that all labeled points are segmented for seg_box in segmented_box_expected: print(pbc_option, seg_box) - #TODO: something is wrong with this case, unclear what. - assert np.all(out_seg_mask.core_data()[seg_box[0]:seg_box[1], - seg_box[2]:seg_box[3]] == np.ones((seg_box[1]-seg_box[0], seg_box[3]-seg_box[2]))) + # TODO: something is wrong with this case, unclear what. + assert np.all( + out_seg_mask.core_data()[ + seg_box[0] : seg_box[1], seg_box[2] : seg_box[3] + ] + == np.ones((seg_box[1] - seg_box[0], seg_box[3] - seg_box[2])) + ) - #TODO: Make sure for none, hdim_1, hdim_2 that only the appropriate points are segmented + # TODO: Make sure for none, hdim_1, hdim_2 that only the appropriate points are segmented def test_segmentation_timestep_level(): @@ -254,15 +312,16 @@ def test_segmentation_timestep_level(): test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 ) # Generate dummy feature dataset - test_feature_ds = testing.generate_single_feature(start_h1=20.0, start_h2=20.0, - max_h1 = 1000, max_h2 = 1000) + test_feature_ds = testing.generate_single_feature( + start_h1=20.0, start_h2=20.0, max_h1=1000, max_h2=1000 + ) out_seg_mask, out_df = seg.segmentation_timestep( field_in=test_data_iris, features_in=test_feature_ds, dxy=test_dxy, threshold=1.5, - seed_3D_flag= 'column' + seed_3D_flag="column", ) out_seg_mask_arr = out_seg_mask.core_data() # Make sure that all labeled points are segmented, before setting specific levels @@ -290,7 +349,7 @@ def test_segmentation_timestep_level(): dxy=test_dxy, level=slice(vdim_start_feat, vdim_end_feat), threshold=1.5, - seed_3D_flag = 'column' + seed_3D_flag="column", ) out_seg_mask_arr = out_seg_mask.core_data() # Make sure that all labeled points are segmented, before setting specific levels @@ -311,41 +370,44 @@ def test_segmentation_timestep_level(): == np.zeros((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz)) ) -@pytest.mark.parametrize("blob_size, shift_pts, seed_3D_size" - ", expected_both_segmented", - [((3,3,3), (0,0,4), 3, False), - ((3,3,3), (0,0,4), 5, False), - ((3,3,3), (0,0,4), 7, True), - ] + +@pytest.mark.parametrize( + "blob_size, shift_pts, seed_3D_size" ", expected_both_segmented", + [ + ((3, 3, 3), (0, 0, 4), 3, False), + ((3, 3, 3), (0, 0, 4), 5, False), + ((3, 3, 3), (0, 0, 4), 7, True), + ], ) -def test_segmentation_timestep_3d_seed_box_nopbcs(blob_size, shift_pts, - seed_3D_size, expected_both_segmented): - '''Tests ```tobac.segmentation.segmentation_timestep``` - to make sure that the 3D seed box works. +def test_segmentation_timestep_3d_seed_box_nopbcs( + blob_size, shift_pts, seed_3D_size, expected_both_segmented +): + """Tests ```tobac.segmentation.segmentation_timestep``` + to make sure that the 3D seed box works. Parameters ---------- blob_size: tuple(int, int, int) - Size of the initial blob to add to the domain in (z, y, x) space. - We strongly recommend that these be *odd* numbers. + Size of the initial blob to add to the domain in (z, y, x) space. + We strongly recommend that these be *odd* numbers. shift_pts: tuple(int, int, int) Number of points *relative to the center* to shift the blob in (z, y, x) space. seed_3D_size: int or tuple Seed size to pass to tobac expected_both_segmented: bool - True if we expect both features to be segmented, false + True if we expect both features to be segmented, false if we don't expect them both to be segmented - ''' + """ import numpy as np # For now, just testing this for no PBCs. - ''' + """ The best way to do this I think is to create two blobs near (but not touching) each other, varying the seed_3D_size so that they are either segmented together or not segmented together. - ''' + """ test_dset_size = (20, 50, 50) test_hdim_1_pt_1 = 20.0 test_hdim_2_pt_1 = 20.0 @@ -353,8 +415,7 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(blob_size, shift_pts, test_dxy = 1000 test_amp = 2 - PBC_opt = 'none' - + PBC_opt = "none" test_data = np.zeros(test_dset_size) test_data = testing.make_feature_blob( @@ -384,47 +445,59 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(blob_size, shift_pts, test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 ) # Generate dummy feature dataset only on the first feature. - test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, - start_h1=test_hdim_1_pt_1, - start_h2=test_hdim_2_pt_1, - max_h1 = 1000, max_h2 = 1000) + test_feature_ds = testing.generate_single_feature( + start_v=test_vdim_pt_1, + start_h1=test_hdim_1_pt_1, + start_h2=test_hdim_2_pt_1, + max_h1=1000, + max_h2=1000, + ) out_seg_mask, out_df = seg.segmentation_timestep( field_in=test_data_iris, features_in=test_feature_ds, dxy=test_dxy, threshold=1.5, - seed_3D_flag= 'box', - seed_3D_size=seed_3D_size + seed_3D_flag="box", + seed_3D_size=seed_3D_size, ) - second_point_seg = out_seg_mask.core_data()[int(test_vdim_pt_1 + shift_pts[0]), - int(test_hdim_1_pt_1 + shift_pts[1]), - int(test_hdim_2_pt_1 + shift_pts[2])] - # We really only need to check the center point here for this test. + second_point_seg = out_seg_mask.core_data()[ + int(test_vdim_pt_1 + shift_pts[0]), + int(test_hdim_1_pt_1 + shift_pts[1]), + int(test_hdim_2_pt_1 + shift_pts[2]), + ] + # We really only need to check the center point here for this test. seg_point_overlaps = second_point_seg == 1 assert seg_point_overlaps == expected_both_segmented -@pytest.mark.parametrize("test_dset_size, vertical_axis_num, " - "vertical_coord_name," - " vertical_coord_opt, expected_raise", - [((20,30,40), 0, 'altitude', 'auto', False), - ((20,30,40), 1, 'altitude', 'auto', False), - ((20,30,40), 2, 'altitude', 'auto', False), - ((20,30,40), 0, 'air_pressure', 'air_pressure', False), - ((20,30,40), 0, 'air_pressure', 'auto', True), - ((20,30,40), 0, 'model_level_number', 'auto', False), - ((20,30,40), 0, 'altitude', 'auto', False), - ((20,30,40), 0, 'geopotential_height', 'auto', False) - ] +@pytest.mark.parametrize( + "test_dset_size, vertical_axis_num, " + "vertical_coord_name," + " vertical_coord_opt, expected_raise", + [ + ((20, 30, 40), 0, "altitude", "auto", False), + ((20, 30, 40), 1, "altitude", "auto", False), + ((20, 30, 40), 2, "altitude", "auto", False), + ((20, 30, 40), 0, "air_pressure", "air_pressure", False), + ((20, 30, 40), 0, "air_pressure", "auto", True), + ((20, 30, 40), 0, "model_level_number", "auto", False), + ((20, 30, 40), 0, "altitude", "auto", False), + ((20, 30, 40), 0, "geopotential_height", "auto", False), + ], ) -def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name, - vertical_coord_opt, expected_raise): - '''Tests ```tobac.segmentation.segmentation_timestep``` +def test_different_z_axes( + test_dset_size, + vertical_axis_num, + vertical_coord_name, + vertical_coord_opt, + expected_raise, +): + """Tests ```tobac.segmentation.segmentation_timestep``` Tests: The output is the same no matter what order we have axes in. - A ValueError is raised if an invalid vertical coordinate is + A ValueError is raised if an invalid vertical coordinate is passed in Parameters @@ -434,24 +507,24 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name vertical_axis_num: int (0-2, inclusive) Which axis in test_dset_size is the vertical axis vertical_coord_name: str - Name of the vertical coordinate. + Name of the vertical coordinate. vertical_coord_opt: str What to pass in as the vertical coordinate option to segmentation_timestep expected_raise: bool True if we expect a ValueError to be raised, false otherwise - ''' + """ import numpy as np - # First, just check that input and output shapes are the same. - test_dxy = 1000 - test_vdim_pt_1 = 8 + # First, just check that input and output shapes are the same. + test_dxy = 1000 + test_vdim_pt_1 = 8 test_hdim_1_pt_1 = 12 test_hdim_2_pt_1 = 12 test_data = np.zeros(test_dset_size) common_dset_opts = { - 'in_arr': test_data, - 'data_type': 'iris', - 'z_dim_name': vertical_coord_name + "in_arr": test_data, + "data_type": "iris", + "z_dim_name": vertical_coord_name, } if vertical_axis_num == 0: test_data_iris = testing.make_dataset_from_arr( @@ -467,19 +540,22 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name ) # Generate dummy feature dataset only on the first feature. - test_feature_ds = testing.generate_single_feature(start_v=test_vdim_pt_1, - start_h1=test_hdim_1_pt_1, - start_h2=test_hdim_2_pt_1, - max_h1 = 1000, max_h2 = 1000) + test_feature_ds = testing.generate_single_feature( + start_v=test_vdim_pt_1, + start_h1=test_hdim_1_pt_1, + start_h2=test_hdim_2_pt_1, + max_h1=1000, + max_h2=1000, + ) if not expected_raise: out_seg_mask, out_df = seg.segmentation_timestep( field_in=test_data_iris, features_in=test_feature_ds, dxy=test_dxy, threshold=1.5, - vertical_coord=vertical_coord_opt + vertical_coord=vertical_coord_opt, ) - # Check that shapes don't change. + # Check that shapes don't change. assert test_data.shape == out_seg_mask.core_data().shape else: @@ -491,25 +567,44 @@ def test_different_z_axes(test_dset_size, vertical_axis_num, vertical_coord_name dxy=test_dxy, threshold=1.5, ) -# TODO: add more tests to make sure buddy box code is run. -# From this list right now, I'm not sure why buddy box isn't run actually. -@pytest.mark.parametrize("dset_size, blob_1_loc, blob_1_size, blob_2_loc, blob_2_size," - "shift_domain, seed_3D_size", - [((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), - ((20,30,40), (8,0,0), (5,5,5), (8, 3,3), (5,5,5), (0,-8,-8), None), - ((20,30,40), (8,1,1), (5,5,5), (8, 28,38), (5,5,5), (0,15,15), None), - ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), None), - ((20,30,40), (8,0,0), (5,5,5), (8, 28,38), (5,5,5), (0,-8,-8), (5,5,5)), - ] + + +# TODO: add more tests to make sure buddy box code is run. +# From this list right now, I'm not sure why buddy box isn't run actually. +@pytest.mark.parametrize( + "dset_size, blob_1_loc, blob_1_size, blob_2_loc, blob_2_size," + "shift_domain, seed_3D_size", + [ + ((20, 30, 40), (8, 0, 0), (5, 5, 5), (8, 3, 3), (5, 5, 5), (0, -8, -8), None), + ((20, 30, 40), (8, 0, 0), (5, 5, 5), (8, 3, 3), (5, 5, 5), (0, -8, -8), None), + ((20, 30, 40), (8, 1, 1), (5, 5, 5), (8, 28, 38), (5, 5, 5), (0, 15, 15), None), + ((20, 30, 40), (8, 0, 0), (5, 5, 5), (8, 28, 38), (5, 5, 5), (0, -8, -8), None), + ( + (20, 30, 40), + (8, 0, 0), + (5, 5, 5), + (8, 28, 38), + (5, 5, 5), + (0, -8, -8), + (5, 5, 5), + ), + ], ) # TODO: last test fails -def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, blob_2_loc, blob_2_size, - shift_domain, seed_3D_size): - '''Tests ```tobac.segmentation.segmentation_timestep``` - to make sure that the "buddy box" 3D PBC implementation works. +def test_segmentation_timestep_3d_buddy_box( + dset_size, + blob_1_loc, + blob_1_size, + blob_2_loc, + blob_2_size, + shift_domain, + seed_3D_size, +): + """Tests ```tobac.segmentation.segmentation_timestep``` + to make sure that the "buddy box" 3D PBC implementation works. Basic procedure: build a dataset with two features (preferrably on the corner) and then run segmentation, shift the points, and then run segmentation again. - After shifting back, the results should be identical. + After shifting back, the results should be identical. Note: only tests 'both' PBC condition. Parameters ---------- @@ -518,25 +613,25 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b blob_1_loc: tuple(int, int, int) Location of the first blob blob_1_size: tuple(int, int, int) - Size of the first blob. Note: use odd numbers here. + Size of the first blob. Note: use odd numbers here. blob_2_loc: tuple(int, int, int) Location of the second blob blob_2_size: tuple(int, int, int) Size of the second blob. Note: use odd numbers here. shift_domain: tuple(int, int, int) - How many points to shift the domain by. + How many points to shift the domain by. seed_3D_size: None, int, or tuple Seed size to pass to tobac. If None, passes in a column seed - ''' + """ import numpy as np import pandas as pd - ''' + """ The best way to do this I think is to create two blobs near (but not touching) each other, varying the seed_3D_size so that they are either segmented together or not segmented together. - ''' + """ test_dxy = 1000 test_amp = 2 @@ -550,8 +645,7 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b h2_size=blob_1_size[2], v_size=blob_1_size[0], amplitude=test_amp, - PBC_flag='both' - + PBC_flag="both", ) # Make a second feature @@ -564,66 +658,67 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b h2_size=blob_2_size[2], v_size=blob_2_size[0], amplitude=test_amp, - PBC_flag='both' + PBC_flag="both", ) test_data_iris = testing.make_dataset_from_arr( test_data, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 ) # Generate dummy feature dataset only on the first feature. - test_feature_ds_1 = testing.generate_single_feature(start_v=blob_1_loc[0], - start_h1=blob_1_loc[1], - start_h2=blob_1_loc[2], - max_h1 = dset_size[1], - max_h2 = dset_size[2], - feature_num = 1, - PBC_flag='both') - test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0], - start_h1=blob_2_loc[1], - start_h2=blob_2_loc[2], - max_h1 = dset_size[1], - max_h2 = dset_size[2], - feature_num = 2, - PBC_flag='both') + test_feature_ds_1 = testing.generate_single_feature( + start_v=blob_1_loc[0], + start_h1=blob_1_loc[1], + start_h2=blob_1_loc[2], + max_h1=dset_size[1], + max_h2=dset_size[2], + feature_num=1, + PBC_flag="both", + ) + test_feature_ds_2 = testing.generate_single_feature( + start_v=blob_2_loc[0], + start_h1=blob_2_loc[1], + start_h2=blob_2_loc[2], + max_h1=dset_size[1], + max_h2=dset_size[2], + feature_num=2, + PBC_flag="both", + ) test_feature_ds = pd.concat([test_feature_ds_1, test_feature_ds_2]) - common_seg_opts = { - 'dxy': test_dxy, - 'threshold': 1.5, - 'PBC_flag': 'both' - } + common_seg_opts = {"dxy": test_dxy, "threshold": 1.5, "PBC_flag": "both"} if seed_3D_size is None: - common_seg_opts['seed_3D_flag'] = 'column' + common_seg_opts["seed_3D_flag"] = "column" else: - common_seg_opts['seed_3D_flag'] = 'box' - common_seg_opts['seed_3D_size'] = seed_3D_size - + common_seg_opts["seed_3D_flag"] = "box" + common_seg_opts["seed_3D_size"] = seed_3D_size out_seg_mask, out_df = seg.segmentation_timestep( - field_in=test_data_iris, - features_in=test_feature_ds, - **common_seg_opts + field_in=test_data_iris, features_in=test_feature_ds, **common_seg_opts ) - # Now, shift the data over and re-run segmentation. - test_data_shifted = np.roll(test_data, shift_domain, axis=(0,1,2)) + # Now, shift the data over and re-run segmentation. + test_data_shifted = np.roll(test_data, shift_domain, axis=(0, 1, 2)) test_data_iris_shifted = testing.make_dataset_from_arr( test_data_shifted, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 ) - test_feature_ds_1 = testing.generate_single_feature(start_v=blob_1_loc[0]+shift_domain[0], - start_h1=blob_1_loc[1]+shift_domain[1], - start_h2=blob_1_loc[2]+shift_domain[2], - max_h1 = dset_size[1], - max_h2 = dset_size[2], - feature_num = 1, - PBC_flag='both') - test_feature_ds_2 = testing.generate_single_feature(start_v=blob_2_loc[0]+shift_domain[0], - start_h1=blob_2_loc[1]+shift_domain[1], - start_h2=blob_2_loc[2]+shift_domain[2], - max_h1 = dset_size[1], - max_h2 = dset_size[2], - feature_num = 2, - PBC_flag='both') + test_feature_ds_1 = testing.generate_single_feature( + start_v=blob_1_loc[0] + shift_domain[0], + start_h1=blob_1_loc[1] + shift_domain[1], + start_h2=blob_1_loc[2] + shift_domain[2], + max_h1=dset_size[1], + max_h2=dset_size[2], + feature_num=1, + PBC_flag="both", + ) + test_feature_ds_2 = testing.generate_single_feature( + start_v=blob_2_loc[0] + shift_domain[0], + start_h1=blob_2_loc[1] + shift_domain[1], + start_h2=blob_2_loc[2] + shift_domain[2], + max_h1=dset_size[1], + max_h2=dset_size[2], + feature_num=2, + PBC_flag="both", + ) test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) out_seg_mask_shifted, out_df = seg.segmentation_timestep( field_in=test_data_iris_shifted, @@ -631,24 +726,30 @@ def test_segmentation_timestep_3d_buddy_box(dset_size,blob_1_loc, blob_1_size, b **common_seg_opts ) - # Now, shift output back. - out_seg_reshifted = np.roll(out_seg_mask_shifted.core_data(), - tuple((-x for x in shift_domain)), axis=(0,1,2)) + # Now, shift output back. + out_seg_reshifted = np.roll( + out_seg_mask_shifted.core_data(), + tuple((-x for x in shift_domain)), + axis=(0, 1, 2), + ) assert np.all(out_seg_mask.core_data() == out_seg_reshifted) -@pytest.mark.parametrize("dset_size, feat_1_loc, feat_2_loc," - "shift_domain, seed_3D_size", - [((20,30,40), (8,0,0), (8, 3,3), (0,-8,-8), None), - ((20,30,40), (8,0,0), (8, 3,3), (0,-8,-8), None), - ((20,30,40), (8,1,1), (8, 28,38), (0,15,15), None), - ((20,30,40), (8,0,0), (8, 28,38), (0,-8,-8), None), - ((20,30,40), (8,0,0), (8, 28,38), (0,-8,-8), (5,5,5)), - ] +@pytest.mark.parametrize( + "dset_size, feat_1_loc, feat_2_loc," "shift_domain, seed_3D_size", + [ + ((20, 30, 40), (8, 0, 0), (8, 3, 3), (0, -8, -8), None), + ((20, 30, 40), (8, 0, 0), (8, 3, 3), (0, -8, -8), None), + ((20, 30, 40), (8, 1, 1), (8, 28, 38), (0, 15, 15), None), + ((20, 30, 40), (8, 0, 0), (8, 28, 38), (0, -8, -8), None), + ((20, 30, 40), (8, 0, 0), (8, 28, 38), (0, -8, -8), (5, 5, 5)), + ], ) -def test_add_markers_pbcs(dset_size,feat_1_loc, feat_2_loc, shift_domain, seed_3D_size): - '''Tests ```tobac.segmentation.add_markers``` +def test_add_markers_pbcs( + dset_size, feat_1_loc, feat_2_loc, shift_domain, seed_3D_size +): + """Tests ```tobac.segmentation.add_markers``` to make sure that adding markers works and is consistent across PBCs Parameters ---------- @@ -659,116 +760,128 @@ def test_add_markers_pbcs(dset_size,feat_1_loc, feat_2_loc, shift_domain, seed_3 feat_2_loc: tuple, same length as dset_size Location of the second blob shift_domain: tuple, same length as dset_size - How many points to shift the domain by. + How many points to shift the domain by. seed_3D_size: None, int, or tuple Seed size to pass to tobac. If None, passes in a column seed - ''' + """ import numpy as np import pandas as pd - if len(dset_size) == 2: is_3D = False start_h1_ax = 0 else: is_3D = True start_h1_ax = 1 - + common_feat_opts = { - 'PBC_flag': 'both', - 'max_h1': dset_size[start_h1_ax], - 'max_h2': dset_size[start_h1_ax + 1] + "PBC_flag": "both", + "max_h1": dset_size[start_h1_ax], + "max_h2": dset_size[start_h1_ax + 1], } - # Generate dummy feature dataset only on the first feature. - test_feature_ds_1 = testing.generate_single_feature(start_v=feat_1_loc[0], - start_h1=feat_1_loc[1], - start_h2=feat_1_loc[2], - feature_num = 1, - **common_feat_opts) - test_feature_ds_2 = testing.generate_single_feature(start_v=feat_2_loc[0], - start_h1=feat_2_loc[1], - start_h2=feat_2_loc[2], - feature_num = 2, - **common_feat_opts) + test_feature_ds_1 = testing.generate_single_feature( + start_v=feat_1_loc[0], + start_h1=feat_1_loc[1], + start_h2=feat_1_loc[2], + feature_num=1, + **common_feat_opts + ) + test_feature_ds_2 = testing.generate_single_feature( + start_v=feat_2_loc[0], + start_h1=feat_2_loc[1], + start_h2=feat_2_loc[2], + feature_num=2, + **common_feat_opts + ) test_feature_ds = pd.concat([test_feature_ds_1, test_feature_ds_2]) common_marker_opts = dict() - common_marker_opts['PBC_flag'] = 'both' + common_marker_opts["PBC_flag"] = "both" if seed_3D_size is None: - common_marker_opts['seed_3D_flag'] = 'column' + common_marker_opts["seed_3D_flag"] = "column" else: - common_marker_opts['seed_3D_flag'] = 'box' - common_marker_opts['seed_3D_size'] = seed_3D_size - - marker_arr = seg.add_markers(test_feature_ds, np.zeros(dset_size), **common_marker_opts) - - # Now, shift the data over and re-run markers. - test_feature_ds_1 = testing.generate_single_feature(start_v=feat_1_loc[0]+shift_domain[0], - start_h1=feat_1_loc[1]+shift_domain[1], - start_h2=feat_1_loc[2]+shift_domain[2], - feature_num = 1, - **common_feat_opts) - test_feature_ds_2 = testing.generate_single_feature(start_v=feat_2_loc[0]+shift_domain[0], - start_h1=feat_2_loc[1]+shift_domain[1], - start_h2=feat_2_loc[2]+shift_domain[2], - feature_num = 2, - **common_feat_opts) + common_marker_opts["seed_3D_flag"] = "box" + common_marker_opts["seed_3D_size"] = seed_3D_size + + marker_arr = seg.add_markers( + test_feature_ds, np.zeros(dset_size), **common_marker_opts + ) + + # Now, shift the data over and re-run markers. + test_feature_ds_1 = testing.generate_single_feature( + start_v=feat_1_loc[0] + shift_domain[0], + start_h1=feat_1_loc[1] + shift_domain[1], + start_h2=feat_1_loc[2] + shift_domain[2], + feature_num=1, + **common_feat_opts + ) + test_feature_ds_2 = testing.generate_single_feature( + start_v=feat_2_loc[0] + shift_domain[0], + start_h1=feat_2_loc[1] + shift_domain[1], + start_h2=feat_2_loc[2] + shift_domain[2], + feature_num=2, + **common_feat_opts + ) test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) - - marker_arr_shifted = seg.add_markers(test_feature_ds_shifted, np.zeros(dset_size), - **common_marker_opts) + marker_arr_shifted = seg.add_markers( + test_feature_ds_shifted, np.zeros(dset_size), **common_marker_opts + ) - # Now, shift output back. - marker_arr_reshifted = np.roll(marker_arr_shifted, - tuple((-x for x in shift_domain)), axis=(0,1,2)) + # Now, shift output back. + marker_arr_reshifted = np.roll( + marker_arr_shifted, tuple((-x for x in shift_domain)), axis=(0, 1, 2) + ) assert np.all(marker_arr == marker_arr_reshifted) -@pytest.mark.parametrize("PBC_flag", - [('none'), - ('hdim_1'), - ('hdim_2'), - ('both'), - ] +@pytest.mark.parametrize( + "PBC_flag", + [ + ("none"), + ("hdim_1"), + ("hdim_2"), + ("both"), + ], ) def test_empty_segmentation(PBC_flag): - '''Tests ```tobac.segmentation.segmentation_timestep``` with an + """Tests ```tobac.segmentation.segmentation_timestep``` with an empty/zeroed out array - - ''' + + """ import numpy as np + h1_size = 100 h2_size = 100 v_size = 5 test_dxy = 1000 - test_feature = testing.generate_single_feature(start_v=1, - start_h1=1, - start_h2=1, - max_h1 = h1_size, - max_h2 = h2_size, - feature_num = 1, - PBC_flag=PBC_flag) + test_feature = testing.generate_single_feature( + start_v=1, + start_h1=1, + start_h2=1, + max_h1=h1_size, + max_h2=h2_size, + feature_num=1, + PBC_flag=PBC_flag, + ) seg_arr = np.zeros((v_size, h1_size, h2_size)) seg_opts = { - 'dxy': test_dxy, - 'threshold': 1.5, - 'PBC_flag': PBC_flag, + "dxy": test_dxy, + "threshold": 1.5, + "PBC_flag": PBC_flag, } test_data_iris = testing.make_dataset_from_arr( seg_arr, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 ) out_seg_mask, out_df = seg.segmentation_timestep( - field_in=test_data_iris, - features_in=test_feature, - **seg_opts + field_in=test_data_iris, features_in=test_feature, **seg_opts ) - assert np.all(out_seg_mask.core_data() == -1) \ No newline at end of file + assert np.all(out_seg_mask.core_data() == -1) diff --git a/tobac/tests/test_testing.py b/tobac/tests/test_testing.py index 3248131d..395a0d59 100644 --- a/tobac/tests/test_testing.py +++ b/tobac/tests/test_testing.py @@ -1,18 +1,23 @@ -''' +""" Audit of the testing functions that produce our test data. Who's watching the watchmen, basically. -''' +""" import pytest -from tobac.testing import get_pbc_coordinates, generate_single_feature, get_single_pbc_coordinate +from tobac.testing import ( + get_pbc_coordinates, + generate_single_feature, + get_single_pbc_coordinate, +) import tobac.testing as tbtest from collections import Counter import pandas as pd from pandas.util.testing import assert_frame_equal import datetime + def lists_equal_without_order(a, b): """ - This will make sure the inner list contain the same, + This will make sure the inner list contain the same, but doesn't account for duplicate groups. from: https://stackoverflow.com/questions/31501909/assert-list-of-list-equality-without-order-in-python/31502000 """ @@ -22,109 +27,176 @@ def lists_equal_without_order(a, b): return False return True + def test_make_feature_blob(): - '''Tests ```tobac.testing.make_feature_blob``` + """Tests ```tobac.testing.make_feature_blob``` Currently runs the following tests: Creates a blob in the right location and cuts off without PBCs Blob extends off PBCs for all dimensions when appropriate - ''' + """ import numpy as np # Test without PBCs first, make sure that a blob is generated in the first place. # 2D test - out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=5, - h1_size = 2, h2_size= 2, shape='rectangle', amplitude= 1, PBC_flag='none') + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10)), + h1_loc=5, + h2_loc=5, + h1_size=2, + h2_size=2, + shape="rectangle", + amplitude=1, + PBC_flag="none", + ) assert np.all(out_blob[4:6, 4:6] == 1) - # There should be exactly 4 points of value 1. + # There should be exactly 4 points of value 1. assert np.sum(out_blob) == 4 and np.min(out_blob) == 0 # 3D test - out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=5, - v_loc = 5, h1_size = 2, h2_size= 2, v_size= 2, - shape='rectangle', amplitude= 1, PBC_flag='none') + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10, 10)), + h1_loc=5, + h2_loc=5, + v_loc=5, + h1_size=2, + h2_size=2, + v_size=2, + shape="rectangle", + amplitude=1, + PBC_flag="none", + ) assert np.all(out_blob[4:6, 4:6, 4:6] == 1) - # There should be exactly 8 points of value 1. + # There should be exactly 8 points of value 1. assert np.sum(out_blob) == 8 and np.min(out_blob) == 0 - # Test that it cuts things off along a boundary. - # 2D test - out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=9, - h1_size = 2, h2_size= 4, shape='rectangle', amplitude= 1, PBC_flag='none') + # 2D test + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10)), + h1_loc=5, + h2_loc=9, + h1_size=2, + h2_size=4, + shape="rectangle", + amplitude=1, + PBC_flag="none", + ) assert np.all(out_blob[4:6, 7:10] == 1) assert np.all(out_blob[4:6, 0:1] == 0) - # There should be exactly 4 points of value 1. + # There should be exactly 4 points of value 1. assert np.sum(out_blob) == 6 and np.min(out_blob) == 0 - # 3D test - out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=9, - v_loc = 5, h1_size = 2, h2_size= 4, v_size= 2, - shape='rectangle', amplitude= 1, PBC_flag='none') + # 3D test + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10, 10)), + h1_loc=5, + h2_loc=9, + v_loc=5, + h1_size=2, + h2_size=4, + v_size=2, + shape="rectangle", + amplitude=1, + PBC_flag="none", + ) assert np.all(out_blob[4:6, 4:6, 7:10] == 1) assert np.all(out_blob[4:6, 4:6, 0:1] == 0) - # There should be exactly 4 points of value 1. + # There should be exactly 4 points of value 1. assert np.sum(out_blob) == 12 and np.min(out_blob) == 0 - for PBC_condition in ['hdim_1', 'hdim_2', 'both']: - # Now test simple cases with PBCs + for PBC_condition in ["hdim_1", "hdim_2", "both"]: + # Now test simple cases with PBCs # 2D test - out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=5, - h1_size = 2, h2_size= 2, shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10)), + h1_loc=5, + h2_loc=5, + h1_size=2, + h2_size=2, + shape="rectangle", + amplitude=1, + PBC_flag=PBC_condition, + ) assert np.all(out_blob[4:6, 4:6] == 1) - # There should be exactly 4 points of value 1. + # There should be exactly 4 points of value 1. assert np.sum(out_blob) == 4 and np.min(out_blob) == 0 # 3D test - out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=5, - v_loc = 5, h1_size = 2, h2_size= 2, v_size= 2, - shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10, 10)), + h1_loc=5, + h2_loc=5, + v_loc=5, + h1_size=2, + h2_size=2, + v_size=2, + shape="rectangle", + amplitude=1, + PBC_flag=PBC_condition, + ) assert np.all(out_blob[4:6, 4:6, 4:6] == 1) - # There should be exactly 8 points of value 1. + # There should be exactly 8 points of value 1. assert np.sum(out_blob) == 8 and np.min(out_blob) == 0 - + # Test that it wraps around on the hdim_1 positive side - for PBC_condition in ['hdim_2', 'both']: - out_blob = tbtest.make_feature_blob(np.zeros((10,10)), h1_loc=5, h2_loc=9, - h1_size = 2, h2_size= 4, shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + for PBC_condition in ["hdim_2", "both"]: + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10)), + h1_loc=5, + h2_loc=9, + h1_size=2, + h2_size=4, + shape="rectangle", + amplitude=1, + PBC_flag=PBC_condition, + ) assert np.all(out_blob[4:6, 7:10] == 1) assert np.all(out_blob[4:6, 0:1] == 1) - # There should be exactly 4 points of value 1. + # There should be exactly 4 points of value 1. assert np.sum(out_blob) == 8 and np.min(out_blob) == 0 - # 3D test - out_blob = tbtest.make_feature_blob(np.zeros((10, 10, 10)), h1_loc=5, h2_loc=9, - v_loc = 5, h1_size = 2, h2_size= 4, v_size= 2, - shape='rectangle', amplitude= 1, PBC_flag=PBC_condition) + # 3D test + out_blob = tbtest.make_feature_blob( + np.zeros((10, 10, 10)), + h1_loc=5, + h2_loc=9, + v_loc=5, + h1_size=2, + h2_size=4, + v_size=2, + shape="rectangle", + amplitude=1, + PBC_flag=PBC_condition, + ) assert np.all(out_blob[4:6, 4:6, 7:10] == 1) assert np.all(out_blob[4:6, 4:6, 0:1] == 1) - # There should be exactly 4 points of value 1. + # There should be exactly 4 points of value 1. assert np.sum(out_blob) == 16 and np.min(out_blob) == 0 - def test_get_single_pbc_coordinate(): - '''Tests ```tobac.testing.get_single_pbc_coordinate```. + """Tests ```tobac.testing.get_single_pbc_coordinate```. Currently runs the following tests: Point within bounds with all PBC conditions Point off bounds on each side Invalid point - ''' + """ # Test points that do not need to be adjusted for all PBC conditions - for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: - assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, 3, PBC_condition) == (3,3)) - assert(get_single_pbc_coordinate(0, 10, 0, 10, 0, 0, PBC_condition) == (0,0)) - assert(get_single_pbc_coordinate(0, 10, 0, 10, 9, 9, PBC_condition) == (9,9)) + for PBC_condition in ["none", "hdim_1", "hdim_2", "both"]: + assert get_single_pbc_coordinate(0, 10, 0, 10, 3, 3, PBC_condition) == (3, 3) + assert get_single_pbc_coordinate(0, 10, 0, 10, 0, 0, PBC_condition) == (0, 0) + assert get_single_pbc_coordinate(0, 10, 0, 10, 9, 9, PBC_condition) == (9, 9) # Test points off bounds on each side # First points off min/max of hdim_1 for the two that allow it - for PBC_condition in ['hdim_1', 'both']: - assert(get_single_pbc_coordinate(0, 10, 0, 10, -3, 3, PBC_condition) == (7,3)) - assert(get_single_pbc_coordinate(0, 10, 0, 10, 12, 3, PBC_condition) == (2,3)) - assert(get_single_pbc_coordinate(0, 10, 0, 10, 10, 3, PBC_condition) == (0,3)) + for PBC_condition in ["hdim_1", "both"]: + assert get_single_pbc_coordinate(0, 10, 0, 10, -3, 3, PBC_condition) == (7, 3) + assert get_single_pbc_coordinate(0, 10, 0, 10, 12, 3, PBC_condition) == (2, 3) + assert get_single_pbc_coordinate(0, 10, 0, 10, 10, 3, PBC_condition) == (0, 3) # Now test points off min/max of hdim_1 for the two that don't allow it (expect raise error) - for PBC_condition in ['none','hdim_2']: + for PBC_condition in ["none", "hdim_2"]: with pytest.raises(ValueError): get_single_pbc_coordinate(0, 10, 0, 10, -3, 3, PBC_condition) with pytest.raises(ValueError): @@ -132,15 +204,14 @@ def test_get_single_pbc_coordinate(): with pytest.raises(ValueError): get_single_pbc_coordinate(0, 10, 0, 10, 10, 3, PBC_condition) - - # Now test points off min/max of hdim_2 for the two that allow it - for PBC_condition in ['hdim_2', 'both']: - assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, -3, PBC_condition) == (3,7)) - assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, 12, PBC_condition) == (3,2)) - assert(get_single_pbc_coordinate(0, 10, 0, 10, 3, 10, PBC_condition) == (3,0)) + # Now test points off min/max of hdim_2 for the two that allow it + for PBC_condition in ["hdim_2", "both"]: + assert get_single_pbc_coordinate(0, 10, 0, 10, 3, -3, PBC_condition) == (3, 7) + assert get_single_pbc_coordinate(0, 10, 0, 10, 3, 12, PBC_condition) == (3, 2) + assert get_single_pbc_coordinate(0, 10, 0, 10, 3, 10, PBC_condition) == (3, 0) # Now test hdim_2 min/max for the two that don't allow it - for PBC_condition in ['none','hdim_1']: + for PBC_condition in ["none", "hdim_1"]: with pytest.raises(ValueError): get_single_pbc_coordinate(0, 10, 0, 10, 3, -3, PBC_condition) with pytest.raises(ValueError): @@ -149,205 +220,583 @@ def test_get_single_pbc_coordinate(): get_single_pbc_coordinate(0, 10, 0, 10, 3, 10, PBC_condition) # Now test hdim_1 and hdim_2 min/max for 'both' - assert(get_single_pbc_coordinate(0, 11, 0, 10, -3, -3, 'both') == (8,7)) - assert(get_single_pbc_coordinate(0, 10, 0, 10, 12, 12, 'both') == (2,2)) - + assert get_single_pbc_coordinate(0, 11, 0, 10, -3, -3, "both") == (8, 7) + assert get_single_pbc_coordinate(0, 10, 0, 10, 12, 12, "both") == (2, 2) # Now test hdim_1 and hdim/2 min/max for the three that don't allow it - for PBC_condition in ['none','hdim_1', 'hdim_2']: + for PBC_condition in ["none", "hdim_1", "hdim_2"]: with pytest.raises(ValueError): get_single_pbc_coordinate(0, 11, 0, 10, -3, -3, PBC_condition) with pytest.raises(ValueError): get_single_pbc_coordinate(0, 10, 0, 10, 12, 12, PBC_condition) - - def test_get_pbc_coordinates(): - '''Tests tobac.testing.get_pbc_coordinates. + """Tests tobac.testing.get_pbc_coordinates. Currently runs the following tests: For an invalid PBC_flag, we raise an error - For PBC_flag of 'none', we truncate the box and give a valid box. + For PBC_flag of 'none', we truncate the box and give a valid box. - ''' + """ with pytest.raises(ValueError): - get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'c') + get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "c") # Test PBC_flag of none - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'none') == [(1, 4, 1, 4),]) - assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'none') == [(0, 4, 1, 4),]) - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 12, 1, 4, 'none') == [(1, 10, 1, 4),]) - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 12, -1, 4, 'none') == [(1, 10, 0, 4),]) - - # Test PBC_flag with hdim_1 + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "none") == [ + (1, 4, 1, 4), + ] + assert get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, "none") == [ + (0, 4, 1, 4), + ] + assert get_pbc_coordinates(0, 10, 0, 10, 1, 12, 1, 4, "none") == [ + (1, 10, 1, 4), + ] + assert get_pbc_coordinates(0, 10, 0, 10, 1, 12, -1, 4, "none") == [ + (1, 10, 0, 4), + ] + + # Test PBC_flag with hdim_1 # Simple case, no PBC overlapping - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_1') == [(1, 4, 1, 4),]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "hdim_1") == [ + (1, 4, 1, 4), + ] # PBC going on the min side - assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'hdim_1') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + assert get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, "hdim_1") == [ + (0, 4, 1, 4), + (9, 10, 1, 4), + ] # PBC going on the min side; should be truncated in hdim_2. - assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, -1, 4, 'hdim_1') == [(0, 4, 0, 4), (9, 10, 0, 4)]) + assert get_pbc_coordinates(0, 10, 0, 10, -1, 4, -1, 4, "hdim_1") == [ + (0, 4, 0, 4), + (9, 10, 0, 4), + ] # PBC going on the max side only - assert (get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'hdim_1') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + assert get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, "hdim_1") == [ + (4, 10, 1, 4), + (0, 2, 1, 4), + ] # PBC overlapping - assert (get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'hdim_1') == [(0, 10, 1, 4),]) + assert get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, "hdim_1") == [ + (0, 10, 1, 4), + ] # Test PBC_flag with hdim_2 # Simple case, no PBC overlapping - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_2') == [(1, 4, 1, 4),]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "hdim_2") == [ + (1, 4, 1, 4), + ] # PBC going on the min side - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'hdim_2') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, "hdim_2") == [ + (1, 4, 0, 4), + (1, 4, 9, 10), + ] # PBC going on the min side with truncation in hdim_1 - assert (get_pbc_coordinates(0, 10, 0, 10, -4, 4, -1, 4, 'hdim_2') == [(0, 4, 0, 4), (0, 4, 9, 10)]) - # PBC going on the max side - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'hdim_2') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + assert get_pbc_coordinates(0, 10, 0, 10, -4, 4, -1, 4, "hdim_2") == [ + (0, 4, 0, 4), + (0, 4, 9, 10), + ] + # PBC going on the max side + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, "hdim_2") == [ + (1, 4, 4, 10), + (1, 4, 0, 2), + ] # PBC overlapping - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'hdim_2') == [(1, 4, 0, 10),]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, "hdim_2") == [ + (1, 4, 0, 10), + ] # Test PBC_flag with both # Simple case, no PBC overlapping - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'both') == [(1, 4, 1, 4),]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "both") == [ + (1, 4, 1, 4), + ] # hdim_1 only testing # PBC on the min side of hdim_1 only - assert (get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'both') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + assert get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, "both") == [ + (0, 4, 1, 4), + (9, 10, 1, 4), + ] # PBC on the max side of hdim_1 only - assert (get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'both') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + assert get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, "both") == [ + (4, 10, 1, 4), + (0, 2, 1, 4), + ] # PBC overlapping on max side of hdim_1 only - assert (get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'both') == [(0, 10, 1, 4),]) + assert get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, "both") == [ + (0, 10, 1, 4), + ] # hdim_2 only testing # PBC on the min side of hdim_2 only - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'both') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, "both") == [ + (1, 4, 0, 4), + (1, 4, 9, 10), + ] # PBC on the max side of hdim_2 only - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'both') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, "both") == [ + (1, 4, 4, 10), + (1, 4, 0, 2), + ] # PBC overlapping on max side of hdim_2 only - assert (get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'both') == [(1, 4, 0, 10),]) + assert get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, "both") == [ + (1, 4, 0, 10), + ] # hdim_1 and hdim_2 testing simultaneous - # both larger than the actual domain - assert (get_pbc_coordinates(0, 10, 0, 10, -1, 12, -4, 14, 'both') == [(0, 10, 0, 10),]) - # min in hdim_1 and hdim_2 - assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, -3, 3, -4, 2, 'both'), [(0, 3, 0, 2), (0, 3, 6, 10), (7, 10, 6, 10), (7, 10, 0, 2)])) + # both larger than the actual domain + assert get_pbc_coordinates(0, 10, 0, 10, -1, 12, -4, 14, "both") == [ + (0, 10, 0, 10), + ] + # min in hdim_1 and hdim_2 + assert lists_equal_without_order( + get_pbc_coordinates(0, 10, 0, 10, -3, 3, -4, 2, "both"), + [(0, 3, 0, 2), (0, 3, 6, 10), (7, 10, 6, 10), (7, 10, 0, 2)], + ) # max in hdim_1, min in hdim_2 - assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, 5, 12, -4, 2, 'both'), [(5, 10, 0, 2), (5, 10, 6, 10), (0, 2, 6, 10), (0, 2, 0, 2)])) + assert lists_equal_without_order( + get_pbc_coordinates(0, 10, 0, 10, 5, 12, -4, 2, "both"), + [(5, 10, 0, 2), (5, 10, 6, 10), (0, 2, 6, 10), (0, 2, 0, 2)], + ) # max in hdim_1 and hdim_2 - assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, 'both'), [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)])) + assert lists_equal_without_order( + get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, "both"), + [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)], + ) # min in hdim_1, max in hdim_2 - assert (lists_equal_without_order(get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, 'both'), [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)])) + assert lists_equal_without_order( + get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, "both"), + [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)], + ) + def test_generate_single_feature(): - '''Tests the `generate_single_feature` function. - Currently runs the following tests: - A single feature is generated - - ''' - - # Testing a simple 3D case - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} - ]) + """Tests the `generate_single_feature` function. + Currently runs the following tests: + A single feature is generated - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) + """ + + # Testing a simple 3D case + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "vdim": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + } + ] + ) + + assert_frame_equal( + generate_single_feature( + 1, 1, start_v=1, frame_start=0, max_h1=1000, max_h2=1000 + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) # Testing a simple 2D case - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)} - ]) - assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + } + ] + ) + assert_frame_equal( + generate_single_feature( + 1, 1, frame_start=0, max_h1=1000, max_h2=1000 + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) # Testing a simple 2D case with movement - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, - ]) - assert_frame_equal(generate_single_feature(1, 1, frame_start = 0, num_frames=4, - spd_h1 = 1, spd_h2 = 1, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) - + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + }, + { + "hdim_1": 2, + "hdim_2": 2, + "frame": 1, + "feature": 2, + "time": datetime.datetime(2022, 1, 1, 0, 5), + }, + { + "hdim_1": 3, + "hdim_2": 3, + "frame": 2, + "feature": 3, + "time": datetime.datetime(2022, 1, 1, 0, 10), + }, + { + "hdim_1": 4, + "hdim_2": 4, + "frame": 3, + "feature": 4, + "time": datetime.datetime(2022, 1, 1, 0, 15), + }, + ] + ) + assert_frame_equal( + generate_single_feature( + 1, + 1, + frame_start=0, + num_frames=4, + spd_h1=1, + spd_h2=1, + max_h1=1000, + max_h2=1000, + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) + # Testing a simple 3D case with movement - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, - ]) - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, frame_start = 0, num_frames=4, - spd_h1 = 1, spd_h2 = 1, spd_v = 1, max_h1 = 1000, max_h2 = 1000).sort_index(axis=1), expected_df.sort_index(axis=1)) + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "vdim": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + }, + { + "hdim_1": 2, + "hdim_2": 2, + "vdim": 2, + "frame": 1, + "feature": 2, + "time": datetime.datetime(2022, 1, 1, 0, 5), + }, + { + "hdim_1": 3, + "hdim_2": 3, + "vdim": 3, + "frame": 2, + "feature": 3, + "time": datetime.datetime(2022, 1, 1, 0, 10), + }, + { + "hdim_1": 4, + "hdim_2": 4, + "vdim": 4, + "frame": 3, + "feature": 4, + "time": datetime.datetime(2022, 1, 1, 0, 15), + }, + ] + ) + assert_frame_equal( + generate_single_feature( + 1, + 1, + start_v=1, + frame_start=0, + num_frames=4, + spd_h1=1, + spd_h2=1, + spd_v=1, + max_h1=1000, + max_h2=1000, + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) # Testing a simple 3D case with movement that passes the hdim_1 boundary - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1, 0, 0)}, - {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1, 0, 5)}, - {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1, 0, 10)}, - {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1, 0, 15)}, - ]) - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - frame_start = 0, num_frames=4, - spd_h1 = 4, spd_h2 = 1, spd_v = 1, PBC_flag='hdim_1').sort_index(axis=1), expected_df.sort_index(axis=1)) + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "vdim": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + }, + { + "hdim_1": 5, + "hdim_2": 2, + "vdim": 2, + "frame": 1, + "feature": 2, + "time": datetime.datetime(2022, 1, 1, 0, 5), + }, + { + "hdim_1": 9, + "hdim_2": 3, + "vdim": 3, + "frame": 2, + "feature": 3, + "time": datetime.datetime(2022, 1, 1, 0, 10), + }, + { + "hdim_1": 3, + "hdim_2": 4, + "vdim": 4, + "frame": 3, + "feature": 4, + "time": datetime.datetime(2022, 1, 1, 0, 15), + }, + ] + ) + assert_frame_equal( + generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + frame_start=0, + num_frames=4, + spd_h1=4, + spd_h2=1, + spd_v=1, + PBC_flag="hdim_1", + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) # Testing a simple 3D case with movement that passes the hdim_1 boundary - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 5, 'hdim_2': 2, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 9, 'hdim_2': 3, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 3, 'hdim_2': 4, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, - ]) - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - frame_start = 0, num_frames=4, - spd_h1 = 4, spd_h2 = 1, spd_v = 1, PBC_flag='hdim_1').sort_index(axis=1), expected_df.sort_index(axis=1)) + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "vdim": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + }, + { + "hdim_1": 5, + "hdim_2": 2, + "vdim": 2, + "frame": 1, + "feature": 2, + "time": datetime.datetime(2022, 1, 1, 0, 5), + }, + { + "hdim_1": 9, + "hdim_2": 3, + "vdim": 3, + "frame": 2, + "feature": 3, + "time": datetime.datetime(2022, 1, 1, 0, 10), + }, + { + "hdim_1": 3, + "hdim_2": 4, + "vdim": 4, + "frame": 3, + "feature": 4, + "time": datetime.datetime(2022, 1, 1, 0, 15), + }, + ] + ) + assert_frame_equal( + generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + frame_start=0, + num_frames=4, + spd_h1=4, + spd_h2=1, + spd_v=1, + PBC_flag="hdim_1", + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) # Testing a simple 3D case with movement that passes the hdim_2 boundary - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 2, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 3, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 4, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, - ]) - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - frame_start = 0, num_frames=4, - spd_h1 = 1, spd_h2 = 4, spd_v = 1, PBC_flag='hdim_2').sort_index(axis=1), expected_df.sort_index(axis=1)) + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "vdim": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + }, + { + "hdim_1": 2, + "hdim_2": 5, + "vdim": 2, + "frame": 1, + "feature": 2, + "time": datetime.datetime(2022, 1, 1, 0, 5), + }, + { + "hdim_1": 3, + "hdim_2": 9, + "vdim": 3, + "frame": 2, + "feature": 3, + "time": datetime.datetime(2022, 1, 1, 0, 10), + }, + { + "hdim_1": 4, + "hdim_2": 3, + "vdim": 4, + "frame": 3, + "feature": 4, + "time": datetime.datetime(2022, 1, 1, 0, 15), + }, + ] + ) + assert_frame_equal( + generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + frame_start=0, + num_frames=4, + spd_h1=1, + spd_h2=4, + spd_v=1, + PBC_flag="hdim_2", + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) # Testing a simple 3D case with movement that passes the hdim_1 and hdim_2 boundaries - expected_df = pd.DataFrame.from_dict([ - {'hdim_1': 1, 'hdim_2': 1, 'vdim': 1, 'frame': 0, 'feature': 1, 'time': datetime.datetime(2022, 1, 1,0,0)}, - {'hdim_1': 6, 'hdim_2': 5, 'vdim': 2, 'frame': 1, 'feature': 2, 'time': datetime.datetime(2022, 1, 1,0,5)}, - {'hdim_1': 1, 'hdim_2': 9, 'vdim': 3, 'frame': 2, 'feature': 3, 'time': datetime.datetime(2022, 1, 1,0,10)}, - {'hdim_1': 6, 'hdim_2': 3, 'vdim': 4, 'frame': 3, 'feature': 4, 'time': datetime.datetime(2022, 1, 1,0,15)}, - ]) - assert_frame_equal(generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - frame_start = 0, num_frames=4, - spd_h1 = 5, spd_h2 = 4, spd_v = 1, PBC_flag='both').sort_index(axis=1), expected_df.sort_index(axis=1)) - -@pytest.mark.parametrize("in_pt,in_sz,axis_size,out_pts", - [(3, 0,(0,5), (3,3)), - (3, 3,(0,5), (2,5)), - ] + expected_df = pd.DataFrame.from_dict( + [ + { + "hdim_1": 1, + "hdim_2": 1, + "vdim": 1, + "frame": 0, + "feature": 1, + "time": datetime.datetime(2022, 1, 1, 0, 0), + }, + { + "hdim_1": 6, + "hdim_2": 5, + "vdim": 2, + "frame": 1, + "feature": 2, + "time": datetime.datetime(2022, 1, 1, 0, 5), + }, + { + "hdim_1": 1, + "hdim_2": 9, + "vdim": 3, + "frame": 2, + "feature": 3, + "time": datetime.datetime(2022, 1, 1, 0, 10), + }, + { + "hdim_1": 6, + "hdim_2": 3, + "vdim": 4, + "frame": 3, + "feature": 4, + "time": datetime.datetime(2022, 1, 1, 0, 15), + }, + ] + ) + assert_frame_equal( + generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + frame_start=0, + num_frames=4, + spd_h1=5, + spd_h2=4, + spd_v=1, + PBC_flag="both", + ).sort_index(axis=1), + expected_df.sort_index(axis=1), + ) + + +@pytest.mark.parametrize( + "in_pt,in_sz,axis_size,out_pts", + [ + (3, 0, (0, 5), (3, 3)), + (3, 3, (0, 5), (2, 5)), + ], ) def test_get_start_end_of_feat_nopbc(in_pt, in_sz, axis_size, out_pts): - '''Tests ```tobac.testing.get_start_end_of_feat``` + """Tests ```tobac.testing.get_start_end_of_feat```""" + assert ( + tbtest.get_start_end_of_feat(in_pt, in_sz, axis_size[0], axis_size[1]) + == out_pts + ) - ''' - assert tbtest.get_start_end_of_feat(in_pt, in_sz, axis_size[0], axis_size[1]) == out_pts - -''' +""" I acknowledge that this is a little confusing for the expected outputs, especially for the 3D. -''' -@pytest.mark.parametrize("min_max_coords, lengths, expected_outs", - [((0,3), (4,),[0,1,2,3]), - ((0,3, 0,3), (4,4),[[[0,]*4, [1]*4,[2]*4,[3]*4],[[0,1,2,3]]*4,]), - ((0,1, 0,1, 0, 1), (2,2,2),[[[[0]*2]*2, [[1]*2]*2,], - [[[0,0],[1,1]],[[0,0],[1,1]]], - [[[0,1],[0,1]],[[0,1],[0,1]]]] - ), - ] +""" + + +@pytest.mark.parametrize( + "min_max_coords, lengths, expected_outs", + [ + ((0, 3), (4,), [0, 1, 2, 3]), + ( + (0, 3, 0, 3), + (4, 4), + [ + [ + [ + 0, + ] + * 4, + [1] * 4, + [2] * 4, + [3] * 4, + ], + [[0, 1, 2, 3]] * 4, + ], + ), + ( + (0, 1, 0, 1, 0, 1), + (2, 2, 2), + [ + [ + [[0] * 2] * 2, + [[1] * 2] * 2, + ], + [[[0, 0], [1, 1]], [[0, 0], [1, 1]]], + [[[0, 1], [0, 1]], [[0, 1], [0, 1]]], + ], + ), + ], ) def test_generate_grid_coords(min_max_coords, lengths, expected_outs): - '''Tests ```tobac.testing.generate_grid_coords``` + """Tests ```tobac.testing.generate_grid_coords``` Parameters ---------- min_max_coords: array-like, either length 2, length 4, or length 6. @@ -359,7 +808,8 @@ def test_generate_grid_coords(min_max_coords, lengths, expected_outs): of min_max_coords. expected_outs: array-like, either 1D, 2D, or 3D The expected output - ''' + """ import numpy as np + out_grid = tbtest.generate_grid_coords(min_max_coords, lengths) assert np.all(np.isclose(out_grid, np.array(expected_outs))) diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index 9e6feed2..41d676d6 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -1,7 +1,7 @@ -''' +""" Test for the trackpy tracking functions Who's watching the watchmen, basically. -''' +""" from pyexpat import features import pytest import tobac.testing @@ -10,137 +10,254 @@ from pandas.util.testing import assert_frame_equal import numpy as np + def test_linking_trackpy(): - '''Function to test ```tobac.tracking.linking_trackpy``` + """Function to test ```tobac.tracking.linking_trackpy``` Currently tests: - 2D tracking + 2D tracking 3D tracking 3D tracking with PBCs - ''' + """ # Test 2D tracking of a simple moving feature - test_feature = tobac.testing.generate_single_feature(1, 1, - min_h1 = 0, max_h1 = 100, min_h2 = 0, max_h2 = 100, - frame_start = 0, num_frames=5, - spd_h1 = 1, spd_h2 = 1, PBC_flag='none') - + test_feature = tobac.testing.generate_single_feature( + 1, + 1, + min_h1=0, + max_h1=100, + min_h2=0, + max_h2=100, + frame_start=0, + num_frames=5, + spd_h1=1, + spd_h2=1, + PBC_flag="none", + ) + expected_out_feature = copy.deepcopy(test_feature) - expected_out_feature['cell'] = 1.0 + expected_out_feature["cell"] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 5, 1000, - v_max = 10000, method_linking='predict', - PBC_flag = 'none' + test_feature, + None, + 5, + 1000, + v_max=10000, + method_linking="predict", + PBC_flag="none", ) - # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'frame', 'feature', 'time', 'cell']] + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[ + ["hdim_1", "hdim_2", "frame", "feature", "time", "cell"] + ] - assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + assert_frame_equal( + expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) + ) # Test 3D tracking of a simple moving feature - test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 100, min_h2 = 0, max_h2 = 100, - frame_start = 0, num_frames=5, - spd_h1 = 1, spd_h2 = 1, spd_v = 1, PBC_flag='none') - + test_feature = tobac.testing.generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=100, + min_h2=0, + max_h2=100, + frame_start=0, + num_frames=5, + spd_h1=1, + spd_h2=1, + spd_v=1, + PBC_flag="none", + ) + expected_out_feature = copy.deepcopy(test_feature) - expected_out_feature['cell'] = 1.0 + expected_out_feature["cell"] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 5, 1000, dz=1000, - v_max = 10000, method_linking='predict', - PBC_flag = 'none', vertical_coord=None + test_feature, + None, + 5, + 1000, + dz=1000, + v_max=10000, + method_linking="predict", + PBC_flag="none", + vertical_coord=None, ) - # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] - - assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[ + ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] + ] + assert_frame_equal( + expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) + ) # Test 3D tracking of a simple moving feature with periodic boundaries on hdim_1 - test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - frame_start = 0, num_frames=8, - spd_h1 = 3, spd_h2 = 1, spd_v = 1, PBC_flag='hdim_1') - + test_feature = tobac.testing.generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + frame_start=0, + num_frames=8, + spd_h1=3, + spd_h2=1, + spd_v=1, + PBC_flag="hdim_1", + ) + expected_out_feature = copy.deepcopy(test_feature) - expected_out_feature['cell'] = 1.0 + expected_out_feature["cell"] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 1, 1, dz=1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - v_max = 4, method_linking='predict', vertical_coord=None, - PBC_flag = 'hdim_1' + test_feature, + None, + 1, + 1, + dz=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + v_max=4, + method_linking="predict", + vertical_coord=None, + PBC_flag="hdim_1", ) - # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[ + ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] + ] - assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + assert_frame_equal( + expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) + ) # Test 3D tracking of a simple moving feature with periodic boundaries on hdim_2 - test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - frame_start = 0, num_frames=8, - spd_h1 = 1, spd_h2 = 3, spd_v = 1, PBC_flag='hdim_2') - + test_feature = tobac.testing.generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + frame_start=0, + num_frames=8, + spd_h1=1, + spd_h2=3, + spd_v=1, + PBC_flag="hdim_2", + ) + expected_out_feature = copy.deepcopy(test_feature) - expected_out_feature['cell'] = 1.0 + expected_out_feature["cell"] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 1, 1, dz=1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - v_max = 4, method_linking='predict', vertical_coord=None, - PBC_flag = 'hdim_2' + test_feature, + None, + 1, + 1, + dz=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + v_max=4, + method_linking="predict", + vertical_coord=None, + PBC_flag="hdim_2", ) - # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[ + ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] + ] - assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + assert_frame_equal( + expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) + ) # Test 3D tracking of a simple moving feature with periodic boundaries on both hdim_1 and hdim_2 - test_feature = tobac.testing.generate_single_feature(1, 1, start_v = 1, - min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - frame_start = 0, num_frames=8, - spd_h1 = 3, spd_h2 = 3, spd_v = 0, PBC_flag='both') - + test_feature = tobac.testing.generate_single_feature( + 1, + 1, + start_v=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + frame_start=0, + num_frames=8, + spd_h1=3, + spd_h2=3, + spd_v=0, + PBC_flag="both", + ) + expected_out_feature = copy.deepcopy(test_feature) - expected_out_feature['cell'] = 1.0 + expected_out_feature["cell"] = 1.0 actual_out_feature = tobac.tracking.linking_trackpy( - test_feature, None, 1, 1,dz=1, min_h1 = 0, max_h1 = 10, min_h2 = 0, max_h2 = 10, - v_max = 5, method_linking='predict', vertical_coord=None, - PBC_flag = 'both' + test_feature, + None, + 1, + 1, + dz=1, + min_h1=0, + max_h1=10, + min_h2=0, + max_h2=10, + v_max=5, + method_linking="predict", + vertical_coord=None, + PBC_flag="both", ) - # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature[['hdim_1', 'hdim_2', 'vdim', 'frame', 'feature', 'time', 'cell']] + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature[ + ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] + ] - assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) + assert_frame_equal( + expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) + ) def test_build_distance_function(): - '''Tests ```tobac.tracking.build_distance_function``` + """Tests ```tobac.tracking.build_distance_function``` Currently tests: that this produces an object that is suitable to call from trackpy - ''' + """ - test_func = tobac.tracking.build_distance_function(0, 10, 0, 10, 'both') - assert (test_func(np.array((0,9,9)), np.array((0,0,0))) == pytest.approx(1.4142135)) + test_func = tobac.tracking.build_distance_function(0, 10, 0, 10, "both") + assert test_func(np.array((0, 9, 9)), np.array((0, 0, 0))) == pytest.approx( + 1.4142135 + ) -@pytest.mark.parametrize("point_init, speed, dxy, actual_dz, v_max," - "use_dz, features_connected", - [((0,0,0), (1,0,0), 1000, 100, 200, True, True), - ((0,0,0), (1,0,0), 1000, 100, 200, False, True), - ((0,0,0), (5,0,0), 1000, 100, 200, True, False), - ((0,0,0), (5,0,0), 1000, 100, 200, False, False), - ] +@pytest.mark.parametrize( + "point_init, speed, dxy, actual_dz, v_max," "use_dz, features_connected", + [ + ((0, 0, 0), (1, 0, 0), 1000, 100, 200, True, True), + ((0, 0, 0), (1, 0, 0), 1000, 100, 200, False, True), + ((0, 0, 0), (5, 0, 0), 1000, 100, 200, True, False), + ((0, 0, 0), (5, 0, 0), 1000, 100, 200, False, False), + ], ) -def test_3D_tracking_min_dist_z(point_init, speed, dxy, actual_dz, v_max, - use_dz, features_connected): - '''Tests ```tobac.tracking.linking_trackpy``` with +def test_3D_tracking_min_dist_z( + point_init, speed, dxy, actual_dz, v_max, use_dz, features_connected +): + """Tests ```tobac.tracking.linking_trackpy``` with points in z with varying distances between them. - + Parameters ---------- - point_init: 3D array-like + point_init: 3D array-like Initial point (z, y, x) speed: 3D array-like Speed of the feature (z, y, x) @@ -153,47 +270,51 @@ def test_3D_tracking_min_dist_z(point_init, speed, dxy, actual_dz, v_max, to use the calculated vertical coordinates features_connected: bool Do we expect the features to be connected? - ''' - + """ test_feature = tobac.testing.generate_single_feature( - start_h1 = point_init[1], start_h2 = point_init[2], - start_v = point_init[0], - min_h1 = 0, max_h1 = 100, min_h2 = 0, max_h2 = 100, - frame_start = 0, num_frames=2, - spd_h1 = speed[1], spd_h2 = speed[2], spd_v=speed[0], - PBC_flag='none') + start_h1=point_init[1], + start_h2=point_init[2], + start_v=point_init[0], + min_h1=0, + max_h1=100, + min_h2=0, + max_h2=100, + frame_start=0, + num_frames=2, + spd_h1=speed[1], + spd_h2=speed[2], + spd_v=speed[0], + PBC_flag="none", + ) if not use_dz: - test_feature['z'] = test_feature['vdim']*actual_dz - + test_feature["z"] = test_feature["vdim"] * actual_dz + expected_out_feature = copy.deepcopy(test_feature) if features_connected: - expected_out_feature['cell'] = 1.0 + expected_out_feature["cell"] = 1.0 else: - expected_out_feature['cell'] = np.nan + expected_out_feature["cell"] = np.nan common_params = { - 'features': test_feature, - 'field_in': None, - 'dt': 1, - 'time_cell_min': 1, - 'dxy': dxy, - 'v_max': v_max, - 'method_linking': 'predict', + "features": test_feature, + "field_in": None, + "dt": 1, + "time_cell_min": 1, + "dxy": dxy, + "v_max": v_max, + "method_linking": "predict", } - if use_dz: - common_params['dz'] = actual_dz - common_params['vertical_coord'] = None + if use_dz: + common_params["dz"] = actual_dz + common_params["vertical_coord"] = None else: - common_params['vertical_coord'] = 'z' + common_params["vertical_coord"] = "z" - actual_out_feature = tobac.tracking.linking_trackpy( - **common_params + actual_out_feature = tobac.tracking.linking_trackpy(**common_params) + # Just want to remove the time_cell column here. + actual_out_feature = actual_out_feature.drop("time_cell", axis=1) + assert_frame_equal( + expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) ) - # Just want to remove the time_cell column here. - actual_out_feature = actual_out_feature.drop('time_cell', axis=1) - assert_frame_equal(expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1)) - - - diff --git a/tobac/tests/test_util.py b/tobac/tests/test_util.py index 41caa4f2..893a5a42 100644 --- a/tobac/tests/test_util.py +++ b/tobac/tests/test_util.py @@ -8,7 +8,7 @@ def lists_equal_without_order(a, b): """ - This will make sure the inner list contain the same, + This will make sure the inner list contain the same, but doesn't account for duplicate groups. from: https://stackoverflow.com/questions/31501909/assert-list-of-list-equality-without-order-in-python/31502000 """ @@ -20,42 +20,40 @@ def lists_equal_without_order(a, b): def test_get_label_props_in_dict(): - '''Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases. - ''' + """Testing ```tobac.feature_detection.get_label_props_in_dict``` for both 2D and 3D cases.""" import skimage.measure as skim - test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') - test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type="xarray") + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type="xarray") # make sure it works for 3D data labels_3D = skim.label(test_3D_data.values[0]) - + output_3D = tb_utils.get_label_props_in_dict(labels_3D) - - #make sure it is a dict + + # make sure it is a dict assert type(output_3D) is dict - #make sure we get at least one output, there should be at least one label. + # make sure we get at least one output, there should be at least one label. assert len(output_3D) > 0 # make sure it works for 2D data labels_2D = skim.label(test_2D_data.values[0]) - + output_2D = tb_utils.get_label_props_in_dict(labels_2D) - - #make sure it is a dict + + # make sure it is a dict assert type(output_2D) is dict - #make sure we get at least one output, there should be at least one label. + # make sure we get at least one output, there should be at least one label. assert len(output_2D) > 0 def test_get_indices_of_labels_from_reg_prop_dict(): - '''Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases. - ''' + """Testing ```tobac.feature_detection.get_indices_of_labels_from_reg_prop_dict``` for 2D and 3D cases.""" import skimage.measure as skim import numpy as np - test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type='xarray') - test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type='xarray') + test_3D_data = tobac.testing.make_sample_data_3D_3blobs(data_type="xarray") + test_2D_data = tobac.testing.make_sample_data_2D_3blobs(data_type="xarray") # make sure it works for 3D data labels_3D = skim.label(test_3D_data.values[0]) @@ -66,123 +64,165 @@ def test_get_indices_of_labels_from_reg_prop_dict(): labels_2D = skim.label(test_2D_data.values[0]) nx_2D = test_2D_data.values[0].shape[1] ny_2D = test_2D_data.values[0].shape[0] - + region_props_3D = tb_utils.get_label_props_in_dict(labels_3D) region_props_2D = tb_utils.get_label_props_in_dict(labels_2D) - #get_indices_of_labels_from_reg_prop_dict - - [curr_loc_indices, z_indices, y_indices, x_indices] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_3D) + # get_indices_of_labels_from_reg_prop_dict + + [ + curr_loc_indices, + z_indices, + y_indices, + x_indices, + ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_3D) for index_key in curr_loc_indices: # there should be at least one value in each. assert curr_loc_indices[index_key] > 0 - - assert np.all(z_indices[index_key] >= 0) and np.all(z_indices[index_key] < nz_3D) - assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_3D) - assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_3D) - - [curr_loc_indices, y_indices, x_indices] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_2D) + + assert np.all(z_indices[index_key] >= 0) and np.all( + z_indices[index_key] < nz_3D + ) + assert np.all(x_indices[index_key] >= 0) and np.all( + x_indices[index_key] < nx_3D + ) + assert np.all(y_indices[index_key] >= 0) and np.all( + y_indices[index_key] < ny_3D + ) + + [ + curr_loc_indices, + y_indices, + x_indices, + ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(region_props_2D) for index_key in curr_loc_indices: # there should be at least one value in each. assert curr_loc_indices[index_key] > 0 - - assert np.all(x_indices[index_key] >= 0) and np.all(x_indices[index_key] < nx_2D) - assert np.all(y_indices[index_key] >= 0) and np.all(y_indices[index_key] < ny_2D) - + + assert np.all(x_indices[index_key] >= 0) and np.all( + x_indices[index_key] < nx_2D + ) + assert np.all(y_indices[index_key] >= 0) and np.all( + y_indices[index_key] < ny_2D + ) def test_calc_distance_coords_pbc(): - '''Tests ```tobac.utils.calc_distance_coords_pbc``` + """Tests ```tobac.utils.calc_distance_coords_pbc``` Currently tests: - two points in normal space + two points in normal space Periodicity along hdim_1, hdim_2, and corners - ''' + """ import numpy as np # Test first two points in normal space with varying PBC conditions - for PBC_condition in ['none', 'hdim_1', 'hdim_2', 'both']: - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(0)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0)), np.array((0,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((3,3,1)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(4.3588989, rel=1e-3)) + for PBC_condition in ["none", "hdim_1", "hdim_2", "both"]: + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 0)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(0) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 0)), np.array((0, 0, 1)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0)), np.array((0, 1)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 0)), np.array((3, 3, 1)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(4.3588989, rel=1e-3) # Now test two points that will be closer along the hdim_1 boundary for cases without PBCs - for PBC_condition in ['hdim_1', 'both']: - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tb_utils.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(2)) - assert (tb_utils.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(3.3166247)) - assert (tb_utils.calc_distance_coords_pbc(np.array((4,0,4)), np.array((3,7,3)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(3.3166247)) - - + for PBC_condition in ["hdim_1", "both"]: + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 0)), np.array((0, 9, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 9, 0)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1) + assert tb_utils.calc_distance_coords_pbc( + np.array((8, 0)), np.array((0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(2) + assert tb_utils.calc_distance_coords_pbc( + np.array((4, 0, 4)), np.array((3, 7, 3)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(3.3166247) + assert tb_utils.calc_distance_coords_pbc( + np.array((4, 0, 4)), np.array((3, 7, 3)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(3.3166247) # Test the same points, except without PBCs - for PBC_condition in ['none', 'hdim_2']: - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,0)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tb_utils.calc_distance_coords_pbc(np.array((8,0)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(8)) + for PBC_condition in ["none", "hdim_2"]: + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 0)), np.array((0, 9, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(9) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 9, 0)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(9) + assert tb_utils.calc_distance_coords_pbc( + np.array((8, 0)), np.array((0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(8) # Now test two points that will be closer along the hdim_2 boundary for cases without PBCs - for PBC_condition in ['hdim_2', 'both']: - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(2)) + for PBC_condition in ["hdim_2", "both"]: + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 0)), np.array((0, 0, 9)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 9)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 8)), np.array((0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(2) # Test the same points, except without PBCs - for PBC_condition in ['none', 'hdim_1']: - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,0)), np.array((0,0,9)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,8)), np.array((0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(8)) + for PBC_condition in ["none", "hdim_1"]: + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 0)), np.array((0, 0, 9)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(9) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 9)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(9) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 8)), np.array((0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(8) # Test points that will be closer for the both - PBC_condition = 'both' - assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1.4142135, rel=1e-3)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(1.4142135, rel=1e-3)) + PBC_condition = "both" + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 9, 9)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1.4142135, rel=1e-3) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 9)), np.array((0, 9, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(1.4142135, rel=1e-3) # Test the corner points for no PBCs - PBC_condition = 'none' - assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(12.727922, rel=1e-3)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(12.727922, rel=1e-3)) - - # Test the corner points for hdim_1 and hdim_2 - for PBC_condition in ['hdim_1', 'hdim_2']: - assert (tb_utils.calc_distance_coords_pbc(np.array((0,9,9)), np.array((0,0,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9.055385)) - assert (tb_utils.calc_distance_coords_pbc(np.array((0,0,9)), np.array((0,9,0)), 0, 10, 0, 10, PBC_condition) - == pytest.approx(9.055385)) + PBC_condition = "none" + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 9, 9)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(12.727922, rel=1e-3) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 9)), np.array((0, 9, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(12.727922, rel=1e-3) - -@pytest.mark.parametrize("loc_1, loc_2, bounds, PBC_flag, expected_dist", - [((0,0,0), (0,0,9), (0, 10, 0, 10), 'both', 1), - ] + # Test the corner points for hdim_1 and hdim_2 + for PBC_condition in ["hdim_1", "hdim_2"]: + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 9, 9)), np.array((0, 0, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(9.055385) + assert tb_utils.calc_distance_coords_pbc( + np.array((0, 0, 9)), np.array((0, 9, 0)), 0, 10, 0, 10, PBC_condition + ) == pytest.approx(9.055385) + + +@pytest.mark.parametrize( + "loc_1, loc_2, bounds, PBC_flag, expected_dist", + [ + ((0, 0, 0), (0, 0, 9), (0, 10, 0, 10), "both", 1), + ], ) def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected_dist): - '''Tests ```tobac.utils.calc_distance_coords_pbc``` in a parameterized way - + """Tests ```tobac.utils.calc_distance_coords_pbc``` in a parameterized way + Parameters ---------- loc_1: tuple @@ -199,241 +239,340 @@ def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected 'both' means that we are periodic along both horizontal dimensions expected_dist: float Expected distance between the two points - ''' + """ import numpy as np - assert (tb_utils.calc_distance_coords_pbc(np.array(loc_1), np.array(loc_2), bounds[0], bounds[1], - bounds[2], bounds[3], PBC_flag)== pytest.approx(expected_dist)) + assert tb_utils.calc_distance_coords_pbc( + np.array(loc_1), + np.array(loc_2), + bounds[0], + bounds[1], + bounds[2], + bounds[3], + PBC_flag, + ) == pytest.approx(expected_dist) def test_get_pbc_coordinates(): - '''Tests tobac.util.get_pbc_coordinates. + """Tests tobac.util.get_pbc_coordinates. Currently runs the following tests: For an invalid PBC_flag, we raise an error - For PBC_flag of 'none', we truncate the box and give a valid box. + For PBC_flag of 'none', we truncate the box and give a valid box. - ''' + """ with pytest.raises(ValueError): - tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'c') + tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "c") # Test PBC_flag of none - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'none') == [(1, 4, 1, 4),]) - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'none') == [(0, 4, 1, 4),]) - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, 1, 4, 'none') == [(1, 10, 1, 4),]) - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, -1, 4, 'none') == [(1, 10, 0, 4),]) - - # Test PBC_flag with hdim_1 + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "none") == [ + (1, 4, 1, 4), + ] + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, "none") == [ + (0, 4, 1, 4), + ] + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, 1, 4, "none") == [ + (1, 10, 1, 4), + ] + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 12, -1, 4, "none") == [ + (1, 10, 0, 4), + ] + + # Test PBC_flag with hdim_1 # Simple case, no PBC overlapping - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_1') == [(1, 4, 1, 4),]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "hdim_1") == [ + (1, 4, 1, 4), + ] # PBC going on the min side - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'hdim_1') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, "hdim_1") == [ + (0, 4, 1, 4), + (9, 10, 1, 4), + ] # PBC going on the min side; should be truncated in hdim_2. - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, -1, 4, 'hdim_1') == [(0, 4, 0, 4), (9, 10, 0, 4)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, -1, 4, "hdim_1") == [ + (0, 4, 0, 4), + (9, 10, 0, 4), + ] # PBC going on the max side only - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'hdim_1') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, "hdim_1") == [ + (4, 10, 1, 4), + (0, 2, 1, 4), + ] # PBC overlapping - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'hdim_1') == [(0, 10, 1, 4),]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, "hdim_1") == [ + (0, 10, 1, 4), + ] # Test PBC_flag with hdim_2 # Simple case, no PBC overlapping - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'hdim_2') == [(1, 4, 1, 4),]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "hdim_2") == [ + (1, 4, 1, 4), + ] # PBC going on the min side - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'hdim_2') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, "hdim_2") == [ + (1, 4, 0, 4), + (1, 4, 9, 10), + ] # PBC going on the min side with truncation in hdim_1 - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 4, -1, 4, 'hdim_2') == [(0, 4, 0, 4), (0, 4, 9, 10)]) - # PBC going on the max side - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'hdim_2') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 4, -1, 4, "hdim_2") == [ + (0, 4, 0, 4), + (0, 4, 9, 10), + ] + # PBC going on the max side + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, "hdim_2") == [ + (1, 4, 4, 10), + (1, 4, 0, 2), + ] # PBC overlapping - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'hdim_2') == [(1, 4, 0, 10),]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, "hdim_2") == [ + (1, 4, 0, 10), + ] # Test PBC_flag with both # Simple case, no PBC overlapping - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, 'both') == [(1, 4, 1, 4),]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 1, 4, "both") == [ + (1, 4, 1, 4), + ] # hdim_1 only testing # PBC on the min side of hdim_1 only - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, 'both') == [(0, 4, 1, 4), (9, 10, 1, 4)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 4, 1, 4, "both") == [ + (0, 4, 1, 4), + (9, 10, 1, 4), + ] # PBC on the max side of hdim_1 only - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, 'both') == [(4, 10, 1, 4), (0, 2, 1, 4)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 4, 12, 1, 4, "both") == [ + (4, 10, 1, 4), + (0, 2, 1, 4), + ] # PBC overlapping on max side of hdim_1 only - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, 'both') == [(0, 10, 1, 4),]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -4, 12, 1, 4, "both") == [ + (0, 10, 1, 4), + ] # hdim_2 only testing # PBC on the min side of hdim_2 only - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, 'both') == [(1, 4, 0, 4), (1, 4, 9, 10)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -1, 4, "both") == [ + (1, 4, 0, 4), + (1, 4, 9, 10), + ] # PBC on the max side of hdim_2 only - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, 'both') == [(1, 4, 4, 10), (1, 4, 0, 2)]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, 4, 12, "both") == [ + (1, 4, 4, 10), + (1, 4, 0, 2), + ] # PBC overlapping on max side of hdim_2 only - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, 'both') == [(1, 4, 0, 10),]) + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, 1, 4, -4, 12, "both") == [ + (1, 4, 0, 10), + ] # hdim_1 and hdim_2 testing simultaneous - # both larger than the actual domain - assert (tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 12, -4, 14, 'both') == [(0, 10, 0, 10),]) - # min in hdim_1 and hdim_2 - assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, -4, 2, 'both'), [(0, 3, 0, 2), (0, 3, 6, 10), (7, 10, 6, 10), (7, 10, 0, 2)])) + # both larger than the actual domain + assert tb_utils.get_pbc_coordinates(0, 10, 0, 10, -1, 12, -4, 14, "both") == [ + (0, 10, 0, 10), + ] + # min in hdim_1 and hdim_2 + assert lists_equal_without_order( + tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, -4, 2, "both"), + [(0, 3, 0, 2), (0, 3, 6, 10), (7, 10, 6, 10), (7, 10, 0, 2)], + ) # max in hdim_1, min in hdim_2 - assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, -4, 2, 'both'), [(5, 10, 0, 2), (5, 10, 6, 10), (0, 2, 6, 10), (0, 2, 0, 2)])) + assert lists_equal_without_order( + tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, -4, 2, "both"), + [(5, 10, 0, 2), (5, 10, 6, 10), (0, 2, 6, 10), (0, 2, 0, 2)], + ) # max in hdim_1 and hdim_2 - assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, 'both'), [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)])) + assert lists_equal_without_order( + tb_utils.get_pbc_coordinates(0, 10, 0, 10, 5, 12, 7, 15, "both"), + [(5, 10, 7, 10), (5, 10, 0, 5), (0, 2, 0, 5), (0, 2, 7, 10)], + ) # min in hdim_1, max in hdim_2 - assert (lists_equal_without_order(tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, 'both'), [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)])) - - -@pytest.mark.parametrize("feature_loc, min_max_coords, lengths, expected_coord_interp", - [((0,0), (0,1,0,1),(2,2), (0,0)), - ((0,0), (0,1),(2,), (0,)), - ] + assert lists_equal_without_order( + tb_utils.get_pbc_coordinates(0, 10, 0, 10, -3, 3, 7, 15, "both"), + [(0, 3, 7, 10), (0, 3, 0, 5), (7, 10, 0, 5), (7, 10, 7, 10)], + ) + + +@pytest.mark.parametrize( + "feature_loc, min_max_coords, lengths, expected_coord_interp", + [ + ((0, 0), (0, 1, 0, 1), (2, 2), (0, 0)), + ((0, 0), (0, 1), (2,), (0,)), + ], ) -def test_add_coordinates_2D(feature_loc, min_max_coords, lengths, expected_coord_interp): - ''' +def test_add_coordinates_2D( + feature_loc, min_max_coords, lengths, expected_coord_interp +): + """ Tests ```utils.add_coordinates``` for a 2D case with - both 1D and 2D coordinates - ''' + both 1D and 2D coordinates + """ import xarray as xr import numpy as np import datetime - feat_interp = tbtest.generate_single_feature(feature_loc[0], feature_loc[1], - max_h1 = 9999, max_h2 = 9999) + feat_interp = tbtest.generate_single_feature( + feature_loc[0], feature_loc[1], max_h1=9999, max_h2=9999 + ) grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths) ndims = len(lengths) - dim_names = ['time','longitude', 'latitude'] + dim_names = ["time", "longitude", "latitude"] dim_names = dim_names[:ndims] - # Note that this is arbitrary. - base_time = datetime.datetime(2022,1,1) + # Note that this is arbitrary. + base_time = datetime.datetime(2022, 1, 1) - - coord_dict = {'time': [base_time]} + coord_dict = {"time": [base_time]} if ndims == 1: # force at least a 2D array for data - lengths = lengths*2 - dim_names = ['time', 'longitude', 'latitude'] - coord_dict['longitude'] = grid_coords - coord_dict['latitude'] = grid_coords + lengths = lengths * 2 + dim_names = ["time", "longitude", "latitude"] + coord_dict["longitude"] = grid_coords + coord_dict["latitude"] = grid_coords elif ndims == 2: - dim_names = ['time','x', 'y'] - coord_dict['longitude'] = (('x','y'),grid_coords[0]) - coord_dict['latitude'] = (('x','y'),grid_coords[1]) + dim_names = ["time", "x", "y"] + coord_dict["longitude"] = (("x", "y"), grid_coords[0]) + coord_dict["latitude"] = (("x", "y"), grid_coords[1]) + + data_xr = xr.DataArray(np.empty((1,) + lengths), coords=coord_dict, dims=dim_names) - data_xr = xr.DataArray(np.empty((1,)+lengths), - coords = coord_dict, dims = dim_names) - feats_with_coords = tb_utils.add_coordinates(feat_interp, data_xr.to_iris()) - print(feats_with_coords.iloc[0]['longitude']) - assert feats_with_coords.iloc[0]['longitude'] == expected_coord_interp[0] + print(feats_with_coords.iloc[0]["longitude"]) + assert feats_with_coords.iloc[0]["longitude"] == expected_coord_interp[0] if ndims == 2: - assert feats_with_coords.iloc[0]['latitude'] == expected_coord_interp[1] - -@pytest.mark.parametrize("feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp", - [((0,0,0), None, (0,1,0,1),(2,2), (0,0)), - ((0,0,0), (1,1,1), (0,1,0,1),(2,2), (0,0)), - ((0.5,0.5,0.5), None, (0,3,3,6),(2,2), (1.5,4.5)), - ((0,0,0), None, (0,1),(2,), (0,)), - ((0,0,0), None, (0,1,0,1,0,1),(2,2,2), (0,0,0)), - ] + assert feats_with_coords.iloc[0]["latitude"] == expected_coord_interp[1] + + +@pytest.mark.parametrize( + "feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp", + [ + ((0, 0, 0), None, (0, 1, 0, 1), (2, 2), (0, 0)), + ((0, 0, 0), (1, 1, 1), (0, 1, 0, 1), (2, 2), (0, 0)), + ((0.5, 0.5, 0.5), None, (0, 3, 3, 6), (2, 2), (1.5, 4.5)), + ((0, 0, 0), None, (0, 1), (2,), (0,)), + ((0, 0, 0), None, (0, 1, 0, 1, 0, 1), (2, 2, 2), (0, 0, 0)), + ], ) -def test_add_coordinates_3D(feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp): - ''' +def test_add_coordinates_3D( + feature_loc, delta_feat, min_max_coords, lengths, expected_coord_interp +): + """ Tests ```utils.add_coordinates_3D``` for a 3D case with - 1D, 2D, and 3D coordinates - ''' + 1D, 2D, and 3D coordinates + """ import xarray as xr import numpy as np import datetime import pandas as pd - feat_interp = tbtest.generate_single_feature(feature_loc[1], feature_loc[2], - start_v = feature_loc[0], - max_h1 = 9999, max_h2 = 9999) + feat_interp = tbtest.generate_single_feature( + feature_loc[1], feature_loc[2], start_v=feature_loc[0], max_h1=9999, max_h2=9999 + ) if delta_feat is not None: - feat_interp_2 = tbtest.generate_single_feature(feature_loc[1]+delta_feat[1], feature_loc[2]+delta_feat[2], - start_v = feature_loc[0]+delta_feat[0], - max_h1 = 9999, max_h2 = 9999, feature_num=2) + feat_interp_2 = tbtest.generate_single_feature( + feature_loc[1] + delta_feat[1], + feature_loc[2] + delta_feat[2], + start_v=feature_loc[0] + delta_feat[0], + max_h1=9999, + max_h2=9999, + feature_num=2, + ) feat_interp = pd.concat([feat_interp, feat_interp_2], ignore_index=True) grid_coords = tbtest.generate_grid_coords(min_max_coords, lengths) ndims = len(lengths) - dim_names = ['time','longitude', 'latitude'] + dim_names = ["time", "longitude", "latitude"] dim_names = dim_names[:ndims] - # Note that this is arbitrary. - base_time = datetime.datetime(2022,1,1) + # Note that this is arbitrary. + base_time = datetime.datetime(2022, 1, 1) - - coord_dict = {'time': [base_time]} + coord_dict = {"time": [base_time]} if ndims == 1: # force at least a 3D array for data - lengths = lengths*3 - dim_names = ['time', 'longitude', 'latitude', 'z'] - coord_dict['longitude'] = grid_coords + lengths = lengths * 3 + dim_names = ["time", "longitude", "latitude", "z"] + coord_dict["longitude"] = grid_coords # we only test lon, so it doesn't really matter here what these are. - coord_dict['latitude'] = grid_coords - coord_dict['z'] = grid_coords + coord_dict["latitude"] = grid_coords + coord_dict["z"] = grid_coords elif ndims == 2: lengths = lengths + (lengths[0],) - dim_names = ['time','x', 'y', 'z'] - coord_dict['longitude'] = (('x','y'),grid_coords[0]) - coord_dict['latitude'] = (('x','y'),grid_coords[1]) + dim_names = ["time", "x", "y", "z"] + coord_dict["longitude"] = (("x", "y"), grid_coords[0]) + coord_dict["latitude"] = (("x", "y"), grid_coords[1]) # We only test lon and lat, so it doesn't matter what this is. - coord_dict['z'] = np.linspace(0,1,lengths[0]) + coord_dict["z"] = np.linspace(0, 1, lengths[0]) elif ndims == 3: - dim_names = ['time','x', 'y', 'z'] - coord_dict['longitude'] = (('x','y', 'z'),grid_coords[0]) - coord_dict['latitude'] = (('x','y', 'z'),grid_coords[1]) - coord_dict['altitude'] = (('x','y', 'z'),grid_coords[2]) - - data_xr = xr.DataArray(np.empty((1,)+lengths), - coords = coord_dict, dims = dim_names) - - if ndims <=2: + dim_names = ["time", "x", "y", "z"] + coord_dict["longitude"] = (("x", "y", "z"), grid_coords[0]) + coord_dict["latitude"] = (("x", "y", "z"), grid_coords[1]) + coord_dict["altitude"] = (("x", "y", "z"), grid_coords[2]) + + data_xr = xr.DataArray(np.empty((1,) + lengths), coords=coord_dict, dims=dim_names) + + if ndims <= 2: feats_with_coords = tb_utils.add_coordinates_3D(feat_interp, data_xr.to_iris()) else: - feats_with_coords = tb_utils.add_coordinates_3D(feat_interp, data_xr.to_iris(), vertical_coord = 2) - - assert np.isclose(feats_with_coords.iloc[0]['longitude'], expected_coord_interp[0]) + feats_with_coords = tb_utils.add_coordinates_3D( + feat_interp, data_xr.to_iris(), vertical_coord=2 + ) + + assert np.isclose(feats_with_coords.iloc[0]["longitude"], expected_coord_interp[0]) if ndims >= 2: - assert np.isclose(feats_with_coords.iloc[0]['latitude'], expected_coord_interp[1]) - + assert np.isclose( + feats_with_coords.iloc[0]["latitude"], expected_coord_interp[1] + ) + if ndims >= 3: - assert np.isclose(feats_with_coords.iloc[0]['altitude'], expected_coord_interp[2]) - -@pytest.mark.parametrize("vertical_coord_names, vertical_coord_pass_in, expect_raise", - [(['z'], 'auto', False), - (['pudding'], 'auto', True), - (['pudding'], 'pudding', False), - (['z', 'model_level_number'], 'pudding', True), - (['z', 'model_level_number'], 'auto', True), - (['z', 'model_level_number'], 'z', False), - ] + assert np.isclose( + feats_with_coords.iloc[0]["altitude"], expected_coord_interp[2] + ) + + +@pytest.mark.parametrize( + "vertical_coord_names, vertical_coord_pass_in, expect_raise", + [ + (["z"], "auto", False), + (["pudding"], "auto", True), + (["pudding"], "pudding", False), + (["z", "model_level_number"], "pudding", True), + (["z", "model_level_number"], "auto", True), + (["z", "model_level_number"], "z", False), + ], ) -def test_find_dataframe_vertical_coord(vertical_coord_names, vertical_coord_pass_in, - expect_raise): - '''Tests ```tobac.utils.find_dataframe_vertical_coord``` +def test_find_dataframe_vertical_coord( + vertical_coord_names, vertical_coord_pass_in, expect_raise +): + """Tests ```tobac.utils.find_dataframe_vertical_coord``` Parameters ---------- vertical_coord_names: array-like Names of vertical coordinates to add vertical_coord_pass_in: str - Value to pass into `vertical_coord` + Value to pass into `vertical_coord` expect_raise: bool True if we expect a ValueError to be raised, False otherwise - ''' + """ - test_feat = tbtest.generate_single_feature(0,0,max_h1=100, max_h2=100) + test_feat = tbtest.generate_single_feature(0, 0, max_h1=100, max_h2=100) for vertical_name in vertical_coord_names: test_feat[vertical_name] = 0.0 - + if expect_raise: with pytest.raises(ValueError): - tb_utils.find_dataframe_vertical_coord(test_feat, - vertical_coord=vertical_coord_pass_in) + tb_utils.find_dataframe_vertical_coord( + test_feat, vertical_coord=vertical_coord_pass_in + ) else: - assert tb_utils.find_dataframe_vertical_coord(test_feat, - vertical_coord=vertical_coord_pass_in) == vertical_coord_names[0] \ No newline at end of file + assert ( + tb_utils.find_dataframe_vertical_coord( + test_feat, vertical_coord=vertical_coord_pass_in + ) + == vertical_coord_names[0] + ) diff --git a/tobac/tracking.py b/tobac/tracking.py index 6ea192c4..176f8b97 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -6,25 +6,38 @@ from . import utils as tb_utils - -def linking_trackpy(features,field_in,dt,dxy, dz = None, - v_max=None,d_max=None,d_min=None,subnetwork_size=None, - memory=0,stubs=1,time_cell_min=None, - order=1,extrapolate=0, - method_linking='random', - adaptive_step=None,adaptive_stop=None, - cell_number_start=1, - vertical_coord = 'auto', - min_h1 = None, max_h1 = None, - min_h2 = None, max_h2 = None, - PBC_flag = 'none' - ): +def linking_trackpy( + features, + field_in, + dt, + dxy, + dz=None, + v_max=None, + d_max=None, + d_min=None, + subnetwork_size=None, + memory=0, + stubs=1, + time_cell_min=None, + order=1, + extrapolate=0, + method_linking="random", + adaptive_step=None, + adaptive_stop=None, + cell_number_start=1, + vertical_coord="auto", + min_h1=None, + max_h1=None, + min_h2=None, + max_h2=None, + PBC_flag="none", +): """Function to perform the linking of features in trajectories - + Parameters ---------- - features: pandas.DataFrame - Detected features to be linked + features: pandas.DataFrame + Detected features to be linked v_max: float speed at which features are allowed to move dt: float @@ -33,17 +46,17 @@ def linking_trackpy(features,field_in,dt,dxy, dz = None, Horizontal grid spacing of input data dz: float Constant vertical grid spacing of input data. If None, - uses `vertical_dim` to get vertical location. + uses `vertical_dim` to get vertical location. memory int number of output timesteps features allowed to vanish for to be still considered tracked subnetwork_size int - maximim size of subnetwork for linking + maximim size of subnetwork for linking method_linking: str('predict' or 'random') flag choosing method used for trajectory linking vertical_coord: str Name of the vertical coordinate in meters. If 'auto', tries to auto-detect. It looks for the coordinate or the dimension name corresponding - to the string. To use `dz`, set this to `None`. + to the string. To use `dz`, set this to `None`. min_h1: int Minimum hdim_1 value, required when PBC_flag is 'hdim_1' or 'both' max_h1: int @@ -69,177 +82,188 @@ def linking_trackpy(features,field_in,dt,dxy, dz = None, import trackpy as tp from copy import deepcopy - # Check if we are 3D. - if 'vdim' in features: + # Check if we are 3D. + if "vdim" in features: is_3D = True if dz is not None and vertical_coord is not None: - raise ValueError("dz and vertical_coord both set, vertical" - " spacing is ambiguous. Set one to None.") + raise ValueError( + "dz and vertical_coord both set, vertical" + " spacing is ambiguous. Set one to None." + ) if dz is None and vertical_coord is None: - raise ValueError("Neither dz nor vertical_coord are set. One" - " must be set.") + raise ValueError( + "Neither dz nor vertical_coord are set. One" " must be set." + ) if vertical_coord is not None: found_vertical_coord = tb_utils.find_dataframe_vertical_coord( - variable_dataframe=features, - vertical_coord=vertical_coord + variable_dataframe=features, vertical_coord=vertical_coord ) else: is_3D = False # make sure that we have min and max for h1 and h2 if we are PBC - if PBC_flag in ['hdim_1', 'both'] and (min_h1 is None or max_h1 is None): - raise ValueError("For PBC tracking, must set min and max coordinates.") - - if PBC_flag in ['hdim_2', 'both'] and (min_h2 is None or max_h2 is None): + if PBC_flag in ["hdim_1", "both"] and (min_h1 is None or max_h1 is None): raise ValueError("For PBC tracking, must set min and max coordinates.") + if PBC_flag in ["hdim_2", "both"] and (min_h2 is None or max_h2 is None): + raise ValueError("For PBC tracking, must set min and max coordinates.") # calculate search range based on timestep and grid spacing if v_max is not None: - search_range = dt*v_max/dxy - + search_range = dt * v_max / dxy + # calculate search range based on timestep and grid spacing if d_max is not None: - search_range=d_max/dxy - + search_range = d_max / dxy + # calculate search range based on timestep and grid spacing if d_min is not None: - search_range=max(search_range,d_min/dxy) + search_range = max(search_range, d_min / dxy) if time_cell_min: - stubs=np.floor(time_cell_min/dt)+1 - - - #logging.debug('stubs: '+ str(stubs)) - - #logging.debug('start linking features into trajectories') - - - #If subnetwork size given, set maximum subnet size + stubs = np.floor(time_cell_min / dt) + 1 + + # logging.debug('stubs: '+ str(stubs)) + + # logging.debug('start linking features into trajectories') + + # If subnetwork size given, set maximum subnet size if subnetwork_size is not None: - tp.linking.Linker.MAX_SUB_NET_SIZE=subnetwork_size + tp.linking.Linker.MAX_SUB_NET_SIZE = subnetwork_size # deep copy to preserve features field: - features_linking=deepcopy(features) + features_linking = deepcopy(features) # check if we are 3D or not if is_3D: - # If we are 3D, we need to convert the vertical + # If we are 3D, we need to convert the vertical # coordinates so that 1 unit is equal to dxy. if dz is not None: - features_linking['vdim_adj'] = features_linking['vdim']*dz/dxy + features_linking["vdim_adj"] = features_linking["vdim"] * dz / dxy else: vertical_coord = found_vertical_coord - features_linking['vdim_adj'] = (features_linking[found_vertical_coord]/dxy) + features_linking["vdim_adj"] = features_linking[found_vertical_coord] / dxy - pos_columns_tp = ['vdim_adj','hdim_1','hdim_2'] + pos_columns_tp = ["vdim_adj", "hdim_1", "hdim_2"] else: - pos_columns_tp = ['hdim_1', 'hdim_2'] - - # Check if we have PBCs. - if PBC_flag in ['hdim_1', 'hdim_2', 'both']: - # Per the trackpy docs, to specify a custom distance function - # which we need for PBCs, neighbor_strategy must be 'BTree'. + pos_columns_tp = ["hdim_1", "hdim_2"] + + # Check if we have PBCs. + if PBC_flag in ["hdim_1", "hdim_2", "both"]: + # Per the trackpy docs, to specify a custom distance function + # which we need for PBCs, neighbor_strategy must be 'BTree'. # I think this shouldn't change results, but it will degrade performance. - neighbor_strategy = 'BTree' + neighbor_strategy = "BTree" dist_func = build_distance_function(min_h1, max_h1, min_h2, max_h2, PBC_flag) else: - neighbor_strategy = 'KDTree' + neighbor_strategy = "KDTree" dist_func = None - - - if method_linking is 'random': -# link features into trajectories: - trajectories_unfiltered = tp.link(features_linking, - search_range=search_range, - memory=memory, - t_column='frame', - pos_columns=pos_columns_tp, - adaptive_step=adaptive_step,adaptive_stop=adaptive_stop, - neighbor_strategy=neighbor_strategy, link_strategy='auto', - dist_func = dist_func - ) - elif method_linking is 'predict': + + if method_linking is "random": + # link features into trajectories: + trajectories_unfiltered = tp.link( + features_linking, + search_range=search_range, + memory=memory, + t_column="frame", + pos_columns=pos_columns_tp, + adaptive_step=adaptive_step, + adaptive_stop=adaptive_stop, + neighbor_strategy=neighbor_strategy, + link_strategy="auto", + dist_func=dist_func, + ) + elif method_linking is "predict": pred = tp.predict.NearestVelocityPredict(span=1) - trajectories_unfiltered = pred.link_df(features_linking, search_range=search_range, memory=memory, - pos_columns=pos_columns_tp, - t_column='frame', - neighbor_strategy=neighbor_strategy, link_strategy='auto', - adaptive_step=adaptive_step,adaptive_stop=adaptive_stop, - dist_func = dist_func -# copy_features=False, diagnostics=False, -# hash_size=None, box_size=None, verify_integrity=True, -# retain_index=False - ) + trajectories_unfiltered = pred.link_df( + features_linking, + search_range=search_range, + memory=memory, + pos_columns=pos_columns_tp, + t_column="frame", + neighbor_strategy=neighbor_strategy, + link_strategy="auto", + adaptive_step=adaptive_step, + adaptive_stop=adaptive_stop, + dist_func=dist_func + # copy_features=False, diagnostics=False, + # hash_size=None, box_size=None, verify_integrity=True, + # retain_index=False + ) else: - raise ValueError('method_linking unknown') - - + raise ValueError("method_linking unknown") + # Filter trajectories to exclude short trajectories that are likely to be spurious -# trajectories_filtered = filter_stubs(trajectories_unfiltered,threshold=stubs) -# trajectories_filtered=trajectories_filtered.reset_index(drop=True) + # trajectories_filtered = filter_stubs(trajectories_unfiltered,threshold=stubs) + # trajectories_filtered=trajectories_filtered.reset_index(drop=True) # clean up our temporary filters if is_3D: - trajectories_unfiltered = trajectories_unfiltered.drop('vdim_adj', axis=1) + trajectories_unfiltered = trajectories_unfiltered.drop("vdim_adj", axis=1) # Reset particle numbers from the arbitray numbers at the end of the feature detection and linking to consecutive cell numbers # keep 'particle' for reference to the feature detection step. - trajectories_unfiltered['cell']=None + trajectories_unfiltered["cell"] = None particle_num_to_cell_num = dict() - for i_particle,particle in enumerate(pd.Series.unique(trajectories_unfiltered['particle'])): - cell=int(i_particle+cell_number_start) + for i_particle, particle in enumerate( + pd.Series.unique(trajectories_unfiltered["particle"]) + ): + cell = int(i_particle + cell_number_start) particle_num_to_cell_num[particle] = int(cell) remap_particle_to_cell_vec = np.vectorize(remap_particle_to_cell_nv) - trajectories_unfiltered['cell'] = remap_particle_to_cell_vec(particle_num_to_cell_num, trajectories_unfiltered['particle']) - trajectories_unfiltered['cell'] = trajectories_unfiltered['cell'].astype(int) - trajectories_unfiltered.drop(columns=['particle'],inplace=True) + trajectories_unfiltered["cell"] = remap_particle_to_cell_vec( + particle_num_to_cell_num, trajectories_unfiltered["particle"] + ) + trajectories_unfiltered["cell"] = trajectories_unfiltered["cell"].astype(int) + trajectories_unfiltered.drop(columns=["particle"], inplace=True) - trajectories_bycell=trajectories_unfiltered.groupby('cell') + trajectories_bycell = trajectories_unfiltered.groupby("cell") stub_cell_nums = list() - for cell,trajectories_cell in trajectories_bycell: - #logging.debug("cell: "+str(cell)) - #logging.debug("feature: "+str(trajectories_cell['feature'].values)) - #logging.debug("trajectories_cell.shape[0]: "+ str(trajectories_cell.shape[0])) - + for cell, trajectories_cell in trajectories_bycell: + # logging.debug("cell: "+str(cell)) + # logging.debug("feature: "+str(trajectories_cell['feature'].values)) + # logging.debug("trajectories_cell.shape[0]: "+ str(trajectories_cell.shape[0])) + if trajectories_cell.shape[0] < stubs: - #logging.debug("cell" + str(cell)+ " is a stub ("+str(trajectories_cell.shape[0])+ "), setting cell number to Nan..") + # logging.debug("cell" + str(cell)+ " is a stub ("+str(trajectories_cell.shape[0])+ "), setting cell number to Nan..") stub_cell_nums.append(cell) - - trajectories_unfiltered.loc[trajectories_unfiltered['cell'].isin(stub_cell_nums),'cell']=np.nan - trajectories_filtered=trajectories_unfiltered + trajectories_unfiltered.loc[ + trajectories_unfiltered["cell"].isin(stub_cell_nums), "cell" + ] = np.nan + trajectories_filtered = trajectories_unfiltered - #Interpolate to fill the gaps in the trajectories (left from allowing memory in the linking) - trajectories_filtered_unfilled=deepcopy(trajectories_filtered) + # Interpolate to fill the gaps in the trajectories (left from allowing memory in the linking) + trajectories_filtered_unfilled = deepcopy(trajectories_filtered) - -# trajectories_filtered_filled=fill_gaps(trajectories_filtered_unfilled,order=order, -# extrapolate=extrapolate,frame_max=field_in.shape[0]-1, -# hdim_1_max=field_in.shape[1],hdim_2_max=field_in.shape[2]) -# add coorinates from input fields to output trajectories (time,dimensions) -# logging.debug('start adding coordinates to trajectories') -# trajectories_filtered_filled=add_coordinates(trajectories_filtered_filled,field_in) -# add time coordinate relative to cell initiation: -# logging.debug('start adding cell time to trajectories') - trajectories_filtered_filled=trajectories_filtered_unfilled - trajectories_final=add_cell_time(trajectories_filtered_filled) + # trajectories_filtered_filled=fill_gaps(trajectories_filtered_unfilled,order=order, + # extrapolate=extrapolate,frame_max=field_in.shape[0]-1, + # hdim_1_max=field_in.shape[1],hdim_2_max=field_in.shape[2]) + # add coorinates from input fields to output trajectories (time,dimensions) + # logging.debug('start adding coordinates to trajectories') + # trajectories_filtered_filled=add_coordinates(trajectories_filtered_filled,field_in) + # add time coordinate relative to cell initiation: + # logging.debug('start adding cell time to trajectories') + trajectories_filtered_filled = trajectories_filtered_unfilled + trajectories_final = add_cell_time(trajectories_filtered_filled) # add coordinate to raw features identified: - #logging.debug('start adding coordinates to detected features') - #logging.debug('feature linking completed') + # logging.debug('start adding coordinates to detected features') + # logging.debug('feature linking completed') return trajectories_final - -def fill_gaps(t,order=1,extrapolate=0,frame_max=None,hdim_1_max=None,hdim_2_max=None): - '''add cell time as time since the initiation of each cell - + +def fill_gaps( + t, order=1, extrapolate=0, frame_max=None, hdim_1_max=None, hdim_2_max=None +): + """add cell time as time since the initiation of each cell + Parameters ---------- - t: pandas dataframe + t: pandas dataframe trajectories from trackpy order: int Order of polynomial used to extrapolate trajectory into gaps and beyond start and end point @@ -253,76 +277,84 @@ def fill_gaps(t,order=1,extrapolate=0,frame_max=None,hdim_1_max=None,hdim_2_max= size of input data along second horizontal axis Returns ------- - pandas dataframe + pandas dataframe trajectories from trackpy with with filled gaps and potentially extrapolated - ''' + """ from scipy.interpolate import InterpolatedUnivariateSpline - logging.debug('start filling gaps') - - t_list=[] # empty list to store interpolated DataFrames - + + logging.debug("start filling gaps") + + t_list = [] # empty list to store interpolated DataFrames + # group by cell number and perform process for each cell individually: - t_grouped=t.groupby('cell') - for cell,track in t_grouped: - - # Setup interpolator from existing points (of order given as keyword) - frame_in=track['frame'].values - hdim_1_in=track['hdim_1'].values - hdim_2_in=track['hdim_2'].values + t_grouped = t.groupby("cell") + for cell, track in t_grouped: + + # Setup interpolator from existing points (of order given as keyword) + frame_in = track["frame"].values + hdim_1_in = track["hdim_1"].values + hdim_2_in = track["hdim_2"].values s_x = InterpolatedUnivariateSpline(frame_in, hdim_1_in, k=order) - s_y = InterpolatedUnivariateSpline(frame_in, hdim_2_in, k=order) - + s_y = InterpolatedUnivariateSpline(frame_in, hdim_2_in, k=order) + # Create new index filling in gaps and possibly extrapolating: - index_min=min(frame_in)-extrapolate - index_min=max(index_min,0) - index_max=max(frame_in)+extrapolate - index_max=min(index_max,frame_max) - new_index=range(index_min,index_max+1) # +1 here to include last value - track=track.reindex(new_index) - + index_min = min(frame_in) - extrapolate + index_min = max(index_min, 0) + index_max = max(frame_in) + extrapolate + index_max = min(index_max, frame_max) + new_index = range(index_min, index_max + 1) # +1 here to include last value + track = track.reindex(new_index) + # Interpolate to extended index: - frame_out=new_index - hdim_1_out=s_x(frame_out) - hdim_2_out=s_y(frame_out) - - # Replace fields in data frame with - track['frame']=new_index - track['hdim_1']=hdim_1_out - track['hdim_2']=hdim_2_out - track['cell']=cell - + frame_out = new_index + hdim_1_out = s_x(frame_out) + hdim_2_out = s_y(frame_out) + + # Replace fields in data frame with + track["frame"] = new_index + track["hdim_1"] = hdim_1_out + track["hdim_2"] = hdim_2_out + track["cell"] = cell + # Append DataFrame to list of DataFrames - t_list.append(track) - # Concatenate interpolated trajectories into one DataFrame: - t_out=pd.concat(t_list) + t_list.append(track) + # Concatenate interpolated trajectories into one DataFrame: + t_out = pd.concat(t_list) # Restrict output trajectories to input data in time and space: - t_out=t_out.loc[(t_out['hdim_1']0) & (t_out['hdim_2']>0)] - t_out=t_out.reset_index(drop=True) + t_out = t_out.loc[ + (t_out["hdim_1"] < hdim_1_max) + & (t_out["hdim_2"] < hdim_2_max) + & (t_out["hdim_1"] > 0) + & (t_out["hdim_2"] > 0) + ] + t_out = t_out.reset_index(drop=True) return t_out + def add_cell_time(t): - ''' add cell time as time since the initiation of each cell - + """add cell time as time since the initiation of each cell + Parameters ---------- t: pandas DataFrame trajectories with added coordinates - + Returns ------- - t: pandas dataframe + t: pandas dataframe trajectories with added cell time - ''' + """ + + # logging.debug('start adding time relative to cell initiation') + t_grouped = t.groupby("cell") - #logging.debug('start adding time relative to cell initiation') - t_grouped=t.groupby('cell') - - t['time_cell'] = t['time']-t.groupby('cell')['time'].transform('min') - t['time_cell']=pd.to_timedelta(t['time_cell']) + t["time_cell"] = t["time"] - t.groupby("cell")["time"].transform("min") + t["time_cell"] = pd.to_timedelta(t["time_cell"]) return t + def remap_particle_to_cell_nv(particle_cell_map, input_particle): - '''Remaps the particles to new cells given an input map and the current particle. + """Remaps the particles to new cells given an input map and the current particle. Helper function that is designed to be vectorized with np.vectorize Parameters @@ -331,12 +363,13 @@ def remap_particle_to_cell_nv(particle_cell_map, input_particle): The dictionary mapping particle number to cell number input_particle: key for particle_cell_map The particle number to remap - - ''' + + """ return particle_cell_map[input_particle] + def build_distance_function(min_h1, max_h1, min_h2, max_h2, PBC_flag): - '''Function to build a partial ```calc_distance_coords_pbc``` function + """Function to build a partial ```calc_distance_coords_pbc``` function suitable for use with trackpy Parameters @@ -355,15 +388,21 @@ def build_distance_function(min_h1, max_h1, min_h2, max_h2, PBC_flag): 'hdim_1' means that we are periodic along hdim1 'hdim_2' means that we are periodic along hdim2 'both' means that we are periodic along both horizontal dimensions - + Returns ------- function object A version of calc_distance_coords_pbc suitable to be called by just f(coords_1, coords_2) - ''' + """ import functools - return functools.partial(tb_utils.calc_distance_coords_pbc, - min_h1 = min_h1, max_h1 = max_h1, min_h2 = min_h2, - max_h2 = max_h2, PBC_flag = PBC_flag) + + return functools.partial( + tb_utils.calc_distance_coords_pbc, + min_h1=min_h1, + max_h1=max_h1, + min_h2=min_h2, + max_h2=max_h2, + PBC_flag=PBC_flag, + ) diff --git a/tobac/utils.py b/tobac/utils.py index 5c2c4ecc..55a42565 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -2,278 +2,307 @@ import numpy as np -def column_mask_from2D(mask_2D,cube,z_coord='model_level_number'): - '''function to turn 2D watershedding mask into a 3D mask of selected columns +def column_mask_from2D(mask_2D, cube, z_coord="model_level_number"): + """function to turn 2D watershedding mask into a 3D mask of selected columns Parameters ---------- - cube: iris.cube.Cube + cube: iris.cube.Cube data cube - mask_2D: iris.cube.Cube + mask_2D: iris.cube.Cube 2D cube containing mask (int id for tacked volumes 0 everywhere else) z_coord: str name of the vertical coordinate in the cube Returns ------- - iris.cube.Cube + iris.cube.Cube 3D cube containing columns of 2D mask (int id for tacked volumes 0 everywhere else) - ''' + """ from copy import deepcopy - mask_3D=deepcopy(cube) - mask_3D.rename('segmentation_mask') - dim=mask_3D.coord_dims(z_coord)[0] + + mask_3D = deepcopy(cube) + mask_3D.rename("segmentation_mask") + dim = mask_3D.coord_dims(z_coord)[0] for i in range(len(mask_3D.coord(z_coord).points)): slc = [slice(None)] * len(mask_3D.shape) - slc[dim] = slice(i,i+1) - mask_out=mask_3D[slc] - mask_3D.data[slc]=mask_2D.core_data() + slc[dim] = slice(i, i + 1) + mask_out = mask_3D[slc] + mask_3D.data[slc] = mask_2D.core_data() return mask_3D -def mask_cube_cell(variable_cube,mask,cell,track): - '''Mask cube for tracked volume of an individual cell - +def mask_cube_cell(variable_cube, mask, cell, track): + """Mask cube for tracked volume of an individual cell + Parameters ---------- - variable_cube: iris.cube.Cube + variable_cube: iris.cube.Cube unmasked data cube - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) cell: int interger id of cell to create masked cube for - + Returns ------- - iris.cube.Cube + iris.cube.Cube Masked cube with data for respective cell - ''' + """ from copy import deepcopy - variable_cube_out=deepcopy(variable_cube) - feature_ids=track.loc[track['cell']==cell,'feature'].values - variable_cube_out=mask_cube_features(variable_cube,mask,feature_ids) + + variable_cube_out = deepcopy(variable_cube) + feature_ids = track.loc[track["cell"] == cell, "feature"].values + variable_cube_out = mask_cube_features(variable_cube, mask, feature_ids) return variable_cube_out -def mask_cube_all(variable_cube,mask): - ''' Mask cube for untracked volume - + +def mask_cube_all(variable_cube, mask): + """Mask cube for untracked volume + Parameters ---------- - variable_cube: iris.cube.Cube + variable_cube: iris.cube.Cube unmasked data cube - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- - variable_cube_out: iris.cube.Cube + variable_cube_out: iris.cube.Cube Masked cube for untracked volume - ''' + """ from dask.array import ma from copy import deepcopy - variable_cube_out=deepcopy(variable_cube) - variable_cube_out.data=ma.masked_where(mask.core_data()==0,variable_cube_out.core_data()) + + variable_cube_out = deepcopy(variable_cube) + variable_cube_out.data = ma.masked_where( + mask.core_data() == 0, variable_cube_out.core_data() + ) return variable_cube_out -def mask_cube_untracked(variable_cube,mask): - '''Mask cube for untracked volume - + +def mask_cube_untracked(variable_cube, mask): + """Mask cube for untracked volume + Parameters ---------- - variable_cube: iris.cube.Cube + variable_cube: iris.cube.Cube unmasked data cube - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- - variable_cube_out: iris.cube.Cube + variable_cube_out: iris.cube.Cube Masked cube for untracked volume - ''' + """ from dask.array import ma from copy import deepcopy - variable_cube_out=deepcopy(variable_cube) - variable_cube_out.data=ma.masked_where(mask.core_data()>0,variable_cube_out.core_data()) + + variable_cube_out = deepcopy(variable_cube) + variable_cube_out.data = ma.masked_where( + mask.core_data() > 0, variable_cube_out.core_data() + ) return variable_cube_out -def mask_cube(cube_in,mask): - ''' Mask cube where mask is larger than zero - + +def mask_cube(cube_in, mask): + """Mask cube where mask is larger than zero + Parameters ---------- - cube_in: iris.cube.Cube + cube_in: iris.cube.Cube unmasked data cube - mask: numpy.ndarray or dask.array + mask: numpy.ndarray or dask.array mask to use for masking, >0 where cube is supposed to be masked - + Returns ------- - iris.cube.Cube + iris.cube.Cube Masked cube - ''' + """ from dask.array import ma from copy import deepcopy - cube_out=deepcopy(cube_in) - cube_out.data=ma.masked_where(mask!=0,cube_in.core_data()) + + cube_out = deepcopy(cube_in) + cube_out.data = ma.masked_where(mask != 0, cube_in.core_data()) return cube_out -def mask_cell(mask,cell,track,masked=False): - '''create mask for specific cell - + +def mask_cell(mask, cell, track, masked=False): + """create mask for specific cell + Parameters ---------- - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- - numpy.ndarray + numpy.ndarray Masked cube for untracked volume - ''' - feature_ids=track.loc[track['cell']==cell,'feature'].values - mask_i=mask_features(mask,feature_ids,masked=masked) - return mask_i + """ + feature_ids = track.loc[track["cell"] == cell, "feature"].values + mask_i = mask_features(mask, feature_ids, masked=masked) + return mask_i + + +def mask_cell_surface(mask, cell, track, masked=False, z_coord="model_level_number"): + """Create surface projection of mask for individual cell -def mask_cell_surface(mask,cell,track,masked=False,z_coord='model_level_number'): - '''Create surface projection of mask for individual cell - Parameters ---------- - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- - iris.cube.Cube + iris.cube.Cube Masked cube for untracked volume - ''' - feature_ids=track.loc[track['cell']==cell,'feature'].values - mask_i_surface=mask_features_surface(mask,feature_ids,masked=masked,z_coord=z_coord) + """ + feature_ids = track.loc[track["cell"] == cell, "feature"].values + mask_i_surface = mask_features_surface( + mask, feature_ids, masked=masked, z_coord=z_coord + ) return mask_i_surface -def mask_cell_columns(mask,cell,track,masked=False,z_coord='model_level_number'): - '''Create mask with entire columns for individual cell - + +def mask_cell_columns(mask, cell, track, masked=False, z_coord="model_level_number"): + """Create mask with entire columns for individual cell + Parameters ---------- - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- - iris.cube.Cube + iris.cube.Cube Masked cube for untracked volume - ''' - feature_ids=track.loc[track['cell']==cell].loc['feature'] - mask_i=mask_features_columns(mask,feature_ids,masked=masked,z_coord=z_coord) + """ + feature_ids = track.loc[track["cell"] == cell].loc["feature"] + mask_i = mask_features_columns(mask, feature_ids, masked=masked, z_coord=z_coord) return mask_i -def mask_cube_features(variable_cube,mask,feature_ids): - ''' Mask cube for tracked volume of an individual cell - + +def mask_cube_features(variable_cube, mask, feature_ids): + """Mask cube for tracked volume of an individual cell + Parameters ---------- - variable_cube: iris.cube.Cube + variable_cube: iris.cube.Cube unmasked data cube - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) cell: int interger id of cell to create masked cube for - + Returns ------- - iris.cube.Cube + iris.cube.Cube Masked cube with data for respective cell - ''' - from dask.array import ma,isin + """ + from dask.array import ma, isin from copy import deepcopy - variable_cube_out=deepcopy(variable_cube) - variable_cube_out.data=ma.masked_where(~isin(mask.core_data(),feature_ids),variable_cube_out.core_data()) + + variable_cube_out = deepcopy(variable_cube) + variable_cube_out.data = ma.masked_where( + ~isin(mask.core_data(), feature_ids), variable_cube_out.core_data() + ) return variable_cube_out -def mask_features(mask,feature_ids,masked=False): - '''create mask for specific features - + +def mask_features(mask, feature_ids, masked=False): + """create mask for specific features + Parameters ---------- - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- - numpy.ndarray + numpy.ndarray Masked cube for untracked volume - ''' - from dask.array import ma,isin + """ + from dask.array import ma, isin from copy import deepcopy - mask_i=deepcopy(mask) - mask_i_data=mask_i.core_data() - mask_i_data[~isin(mask_i.core_data(),feature_ids)]=0 - if masked: - mask_i.data=ma.masked_equal(mask_i.core_data(),0) - return mask_i - -def mask_features_surface(mask,feature_ids,masked=False,z_coord='model_level_number'): - ''' create surface mask for individual features - + + mask_i = deepcopy(mask) + mask_i_data = mask_i.core_data() + mask_i_data[~isin(mask_i.core_data(), feature_ids)] = 0 + if masked: + mask_i.data = ma.masked_equal(mask_i.core_data(), 0) + return mask_i + + +def mask_features_surface( + mask, feature_ids, masked=False, z_coord="model_level_number" +): + """create surface mask for individual features + Parameters ---------- - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- - iris.cube.Cube + iris.cube.Cube Masked cube for untracked volume - ''' + """ from iris.analysis import MAX - from dask.array import ma,isin + from dask.array import ma, isin from copy import deepcopy - mask_i=deepcopy(mask) -# mask_i.data=[~isin(mask_i.data,feature_ids)]=0 - mask_i_data=mask_i.core_data() - mask_i_data[~isin(mask_i.core_data(),feature_ids)]=0 - mask_i_surface=mask_i.collapsed(z_coord,MAX) + + mask_i = deepcopy(mask) + # mask_i.data=[~isin(mask_i.data,feature_ids)]=0 + mask_i_data = mask_i.core_data() + mask_i_data[~isin(mask_i.core_data(), feature_ids)] = 0 + mask_i_surface = mask_i.collapsed(z_coord, MAX) if masked: - mask_i_surface.data=ma.masked_equal(mask_i_surface.core_data(),0) - return mask_i_surface + mask_i_surface.data = ma.masked_equal(mask_i_surface.core_data(), 0) + return mask_i_surface + + +def mask_all_surface(mask, masked=False, z_coord="model_level_number"): + """create surface mask for individual features -def mask_all_surface(mask,masked=False,z_coord='model_level_number'): - ''' create surface mask for individual features - Parameters ---------- - mask: iris.cube.Cube + mask: iris.cube.Cube cube containing mask (int id for tacked volumes 0 everywhere else) - + Returns ------- mask_i_surface: iris.cube.Cube (2D) Mask with 1 below features and 0 everywhere else - ''' + """ from iris.analysis import MAX - from dask.array import ma,isin + from dask.array import ma, isin from copy import deepcopy - mask_i=deepcopy(mask) - mask_i_surface=mask_i.collapsed(z_coord,MAX) - mask_i_surface_data=mask_i_surface.core_data() - mask_i_surface[mask_i_surface_data>0]=1 + + mask_i = deepcopy(mask) + mask_i_surface = mask_i.collapsed(z_coord, MAX) + mask_i_surface_data = mask_i_surface.core_data() + mask_i_surface[mask_i_surface_data > 0] = 1 if masked: - mask_i_surface.data=ma.masked_equal(mask_i_surface.core_data(),0) - return mask_i_surface + mask_i_surface.data = ma.masked_equal(mask_i_surface.core_data(), 0) + return mask_i_surface # def mask_features_columns(mask,feature_ids,masked=False,z_coord='model_level_number'): -# ''' Mask cube for untracked volume +# ''' Mask cube for untracked volume # Input: -# variable_cube: iris.cube.Cube +# variable_cube: iris.cube.Cube # unmasked data cube -# mask: iris.cube.Cube +# mask: iris.cube.Cube # cube containing mask (int id for tacked volumes 0 everywhere else) # Output: -# variable_cube_out: iris.cube.Cube +# variable_cube_out: iris.cube.Cube # Masked cube for untracked volume # ''' # from iris.analysis import MAX -# import numpy as np +# import numpy as np # from copy import deepcopy # mask_i=deepcopy(mask) # mask_i.data[~np.isin(mask_i.data,feature_ids)]=0 @@ -285,27 +314,25 @@ def mask_all_surface(mask,masked=False,z_coord='model_level_number'): # return mask_i - - -#def constraint_cell(track,mask_cell,width=None,x=None,): +# def constraint_cell(track,mask_cell,width=None,x=None,): # from iris import Constraint # import numpy as np -# +# # time_coord=mask_cell.coord('time') # time_units=time_coord.units -# +# # def time_condition(cell): # return time_units.num2date(track.head(n=1)['time']) <= cell <= time_units.num2date(track.tail(n=1)['time']) # # constraint_time=Constraint(time=time_condition) ## mask_cell_i=mask_cell.extract(constraint_time) # mask_cell_surface_i=mask_cell_surface.extract(constraint_time) -# +# # x_dim=mask_cell_surface_i.coord_dims('projection_x_coordinate')[0] # y_dim=mask_cell_surface_i.coord_dims('projection_y_coordinate')[0] # x_coord=mask_cell_surface_i.coord('projection_x_coordinate') # y_coord=mask_cell_surface_i.coord('projection_y_coordinate') -# +# # if (mask_cell_surface_i.core_data()>0).any(): # box_mask_i=get_bounding_box(mask_cell_surface_i.core_data(),buffer=1) # @@ -323,10 +350,12 @@ def mask_all_surface(mask,masked=False,z_coord='model_level_number'): # # constraint=constraint_time & constraint_x & constraint_y # return constraint - -def add_coordinates(t,variable_cube): + + +def add_coordinates(t, variable_cube): import numpy as np - '''Function adding coordinates from the tracking cube to the trajectories + + """Function adding coordinates from the tracking cube to the trajectories for the 2D case: time, longitude&latitude, x&y dimensions Parameters @@ -343,111 +372,134 @@ def add_coordinates(t,variable_cube): ------- pandas DataFrame trajectories with added coordinates - ''' + """ from scipy.interpolate import interp2d, interp1d - logging.debug('start adding coordinates from cube') + logging.debug("start adding coordinates from cube") - # pull time as datetime object and timestr from input data and add it to DataFrame: - t['time']=None - t['timestr']=None - - - logging.debug('adding time coordinate') + # pull time as datetime object and timestr from input data and add it to DataFrame: + t["time"] = None + t["timestr"] = None - time_in=variable_cube.coord('time') - time_in_datetime=time_in.units.num2date(time_in.points) - - t["time"]=time_in_datetime[t['frame']] - t["timestr"]=[x.strftime('%Y-%m-%d %H:%M:%S') for x in time_in_datetime[t['frame']]] + logging.debug("adding time coordinate") + + time_in = variable_cube.coord("time") + time_in_datetime = time_in.units.num2date(time_in.points) + + t["time"] = time_in_datetime[t["frame"]] + t["timestr"] = [ + x.strftime("%Y-%m-%d %H:%M:%S") for x in time_in_datetime[t["frame"]] + ] # Get list of all coordinates in input cube except for time (already treated): - coord_names=[coord.name() for coord in variable_cube.coords()] - coord_names.remove('time') - - logging.debug('time coordinate added') - - # chose right dimension for horizontal axis based on time dimension: - ndim_time=variable_cube.coord_dims('time')[0] - if ndim_time==0: - hdim_1=1 - hdim_2=2 - elif ndim_time==1: - hdim_1=0 - hdim_2=2 - elif ndim_time==2: - hdim_1=0 - hdim_2=1 - + coord_names = [coord.name() for coord in variable_cube.coords()] + coord_names.remove("time") + + logging.debug("time coordinate added") + + # chose right dimension for horizontal axis based on time dimension: + ndim_time = variable_cube.coord_dims("time")[0] + if ndim_time == 0: + hdim_1 = 1 + hdim_2 = 2 + elif ndim_time == 1: + hdim_1 = 0 + hdim_2 = 2 + elif ndim_time == 2: + hdim_1 = 0 + hdim_2 = 1 + # create vectors to use to interpolate from pixels to coordinates - dimvec_1=np.arange(variable_cube.shape[hdim_1]) - dimvec_2=np.arange(variable_cube.shape[hdim_2]) + dimvec_1 = np.arange(variable_cube.shape[hdim_1]) + dimvec_2 = np.arange(variable_cube.shape[hdim_2]) # loop over coordinates in input data: for coord in coord_names: - logging.debug('adding coord: '+ coord) + logging.debug("adding coord: " + coord) # interpolate 2D coordinates: - if variable_cube.coord(coord).ndim==1: - - if variable_cube.coord_dims(coord)==(hdim_1,): - f=interp1d(dimvec_1,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['hdim_1']) - - if variable_cube.coord_dims(coord)==(hdim_2,): - f=interp1d(dimvec_2,variable_cube.coord(coord).points,fill_value="extrapolate") - coordinate_points=f(t['hdim_2']) + if variable_cube.coord(coord).ndim == 1: + + if variable_cube.coord_dims(coord) == (hdim_1,): + f = interp1d( + dimvec_1, + variable_cube.coord(coord).points, + fill_value="extrapolate", + ) + coordinate_points = f(t["hdim_1"]) + + if variable_cube.coord_dims(coord) == (hdim_2,): + f = interp1d( + dimvec_2, + variable_cube.coord(coord).points, + fill_value="extrapolate", + ) + coordinate_points = f(t["hdim_2"]) # interpolate 2D coordinates: - elif variable_cube.coord(coord).ndim==2: + elif variable_cube.coord(coord).ndim == 2: - if variable_cube.coord_dims(coord)==(hdim_1,hdim_2): - f=interp2d(dimvec_2,dimvec_1,variable_cube.coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] + if variable_cube.coord_dims(coord) == (hdim_1, hdim_2): + f = interp2d(dimvec_2, dimvec_1, variable_cube.coord(coord).points) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_2"], t["hdim_1"])] - if variable_cube.coord_dims(coord)==(hdim_2,hdim_1): - f=interp2d(dimvec_1,dimvec_2,variable_cube.coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] + if variable_cube.coord_dims(coord) == (hdim_2, hdim_1): + f = interp2d(dimvec_1, dimvec_2, variable_cube.coord(coord).points) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_1"], t["hdim_2"])] - # interpolate 3D coordinates: + # interpolate 3D coordinates: # mainly workaround for wrf latitude and longitude (to be fixed in future) # TODO: investigate, is this necessary? - elif variable_cube.coord(coord).ndim==3: - - if variable_cube.coord_dims(coord)==(ndim_time,hdim_1,hdim_2): - f=interp2d(dimvec_2,dimvec_1,variable_cube[0,:,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - - if variable_cube.coord_dims(coord)==(ndim_time,hdim_2,hdim_1): - f=interp2d(dimvec_1,dimvec_2,variable_cube[0,:,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - - if variable_cube.coord_dims(coord)==(hdim_1,ndim_time,hdim_2): - f=interp2d(dimvec_2,dimvec_1,variable_cube[:,0,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim_1'])] - - if variable_cube.coord_dims(coord)==(hdim_1,hdim_2,ndim_time): - f=interp2d(dimvec_2,dimvec_1,variable_cube[:,:,0].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_2'],t['hdim1'])] - - - if variable_cube.coord_dims(coord)==(hdim_2,ndim_time,hdim_1): - f=interp2d(dimvec_1,dimvec_2,variable_cube[:,0,:].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] - - if variable_cube.coord_dims(coord)==(hdim_2,hdim_1,ndim_time): - f=interp2d(dimvec_1,dimvec_2,variable_cube[:,:,0].coord(coord).points) - coordinate_points=[f(a,b) for a,b in zip(t['hdim_1'],t['hdim_2'])] + elif variable_cube.coord(coord).ndim == 3: + + if variable_cube.coord_dims(coord) == (ndim_time, hdim_1, hdim_2): + f = interp2d( + dimvec_2, dimvec_1, variable_cube[0, :, :].coord(coord).points + ) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_2"], t["hdim_1"])] + + if variable_cube.coord_dims(coord) == (ndim_time, hdim_2, hdim_1): + f = interp2d( + dimvec_1, dimvec_2, variable_cube[0, :, :].coord(coord).points + ) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_1"], t["hdim_2"])] + + if variable_cube.coord_dims(coord) == (hdim_1, ndim_time, hdim_2): + f = interp2d( + dimvec_2, dimvec_1, variable_cube[:, 0, :].coord(coord).points + ) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_2"], t["hdim_1"])] + + if variable_cube.coord_dims(coord) == (hdim_1, hdim_2, ndim_time): + f = interp2d( + dimvec_2, dimvec_1, variable_cube[:, :, 0].coord(coord).points + ) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_2"], t["hdim1"])] + + if variable_cube.coord_dims(coord) == (hdim_2, ndim_time, hdim_1): + f = interp2d( + dimvec_1, dimvec_2, variable_cube[:, 0, :].coord(coord).points + ) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_1"], t["hdim_2"])] + + if variable_cube.coord_dims(coord) == (hdim_2, hdim_1, ndim_time): + f = interp2d( + dimvec_1, dimvec_2, variable_cube[:, :, 0].coord(coord).points + ) + coordinate_points = [f(a, b) for a, b in zip(t["hdim_1"], t["hdim_2"])] # write resulting array or list into DataFrame: - t[coord]=coordinate_points + t[coord] = coordinate_points - logging.debug('added coord: '+ coord) + logging.debug("added coord: " + coord) return t -def add_coordinates_3D(t,variable_cube, vertical_coord='auto', assume_coords_fixed_in_time = True): + +def add_coordinates_3D( + t, variable_cube, vertical_coord="auto", assume_coords_fixed_in_time=True +): import numpy as np - '''Function adding coordinates from the tracking cube to the trajectories + + """Function adding coordinates from the tracking cube to the trajectories for the 3D case: time, longitude&latitude, x&y dimensions, and altitude Parameters @@ -475,49 +527,53 @@ def add_coordinates_3D(t,variable_cube, vertical_coord='auto', assume_coords_fix ------- pandas DataFrame trajectories with added coordinates - ''' + """ from scipy.interpolate import interp2d, interp1d, interpn - logging.debug('start adding coordinates from cube') + logging.debug("start adding coordinates from cube") - # pull time as datetime object and timestr from input data and add it to DataFrame: - t['time']=None - t['timestr']=None - - - logging.debug('adding time coordinate') + # pull time as datetime object and timestr from input data and add it to DataFrame: + t["time"] = None + t["timestr"] = None - time_in=variable_cube.coord('time') - time_in_datetime=time_in.units.num2date(time_in.points) - - t["time"]=time_in_datetime[t['frame']] - t["timestr"]=[x.strftime('%Y-%m-%d %H:%M:%S') for x in time_in_datetime[t['frame']]] + logging.debug("adding time coordinate") + + time_in = variable_cube.coord("time") + time_in_datetime = time_in.units.num2date(time_in.points) + + t["time"] = time_in_datetime[t["frame"]] + t["timestr"] = [ + x.strftime("%Y-%m-%d %H:%M:%S") for x in time_in_datetime[t["frame"]] + ] # Get list of all coordinates in input cube except for time (already treated): - coord_names=[coord.name() for coord in variable_cube.coords()] - coord_names.remove('time') - - logging.debug('time coordinate added') + coord_names = [coord.name() for coord in variable_cube.coords()] + coord_names.remove("time") + + logging.debug("time coordinate added") + + # chose right dimension for horizontal and vertical axes based on time dimension: + ndim_time = variable_cube.coord_dims("time")[0] - # chose right dimension for horizontal and vertical axes based on time dimension: - ndim_time=variable_cube.coord_dims('time')[0] - # TODO: move this to a function, this is duplicated from segmentation. if type(vertical_coord) is int: ndim_vertical = vertical_coord vertical_axis = None else: - vertical_axis = find_vertical_axis_from_coord(variable_cube, vertical_coord=vertical_coord) - + vertical_axis = find_vertical_axis_from_coord( + variable_cube, vertical_coord=vertical_coord + ) + if vertical_axis is not None: - ndim_vertical=variable_cube.coord_dims(vertical_axis) + ndim_vertical = variable_cube.coord_dims(vertical_axis) if len(ndim_vertical) > 1: - raise ValueError("Vertical coordinate detected as multidimensional. Please pass in " - "axis number of vertical data.") + raise ValueError( + "Vertical coordinate detected as multidimensional. Please pass in " + "axis number of vertical data." + ) else: ndim_vertical = ndim_vertical[0] - # We need to figure out the axis number of hdim_1 and hdim_2. ndim_hdim_1 = None ndim_hdim_2 = None @@ -530,69 +586,85 @@ def add_coordinates_3D(t,variable_cube, vertical_coord='auto', assume_coords_fix if ndim_hdim_1 is None or ndim_hdim_2 is None: raise ValueError("Could not find hdim coordinates.") - + # create vectors to use to interpolate from pixels to coordinates - dimvec_1=np.arange(variable_cube.shape[ndim_vertical]) - dimvec_2=np.arange(variable_cube.shape[ndim_hdim_1]) - dimvec_3=np.arange(variable_cube.shape[ndim_hdim_2]) + dimvec_1 = np.arange(variable_cube.shape[ndim_vertical]) + dimvec_2 = np.arange(variable_cube.shape[ndim_hdim_1]) + dimvec_3 = np.arange(variable_cube.shape[ndim_hdim_2]) dimvec_time = np.arange(variable_cube.shape[ndim_time]) - - coord_to_ax = {ndim_vertical: (dimvec_1, 'vdim'), - ndim_time: (dimvec_time,'time'), - ndim_hdim_1: (dimvec_2, 'hdim_1'), ndim_hdim_2: (dimvec_3, 'hdim_2')} + + coord_to_ax = { + ndim_vertical: (dimvec_1, "vdim"), + ndim_time: (dimvec_time, "time"), + ndim_hdim_1: (dimvec_2, "hdim_1"), + ndim_hdim_2: (dimvec_3, "hdim_2"), + } # loop over coordinates in input data: for coord in coord_names: - logging.debug('adding coord: '+ coord) + logging.debug("adding coord: " + coord) # interpolate 1D coordinates: var_coord = variable_cube.coord(coord) - if var_coord.ndim==1: + if var_coord.ndim == 1: curr_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] - f=interp1d(curr_dim[0],var_coord.points,fill_value="extrapolate") - coordinate_points=f(t[curr_dim[1]]) + f = interp1d(curr_dim[0], var_coord.points, fill_value="extrapolate") + coordinate_points = f(t[curr_dim[1]]) - # interpolate 2D coordinates - elif var_coord.ndim==2: + # interpolate 2D coordinates + elif var_coord.ndim == 2: first_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] second_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] - f=interp2d(first_dim[0],second_dim[0],var_coord.points) - coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] - + f = interp2d(first_dim[0], second_dim[0], var_coord.points) + coordinate_points = [ + f(a, b) for a, b in zip(t[first_dim[1]], t[second_dim[1]]) + ] + # Deal with the special case where the coordinate is 3D but # one of the dimensions is time and we assume the coordinates # don't vary in time. - elif (var_coord.ndim == 3 and ndim_time in variable_cube.coord_dims(coord) - and assume_coords_fixed_in_time): + elif ( + var_coord.ndim == 3 + and ndim_time in variable_cube.coord_dims(coord) + and assume_coords_fixed_in_time + ): time_pos = variable_cube.coord_dims(coord).index(ndim_time) - hdim1_pos = 0 if time_pos !=0 else 1 + hdim1_pos = 0 if time_pos != 0 else 1 hdim2_pos = 1 if time_pos == 2 else 2 first_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim2_pos]] second_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim1_pos]] - f=interp2d(first_dim[0],second_dim[0],var_coord.points) - coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] - + f = interp2d(first_dim[0], second_dim[0], var_coord.points) + coordinate_points = [ + f(a, b) for a, b in zip(t[first_dim[1]], t[second_dim[1]]) + ] - # interpolate 3D coordinates: - elif var_coord.ndim==3: + # interpolate 3D coordinates: + elif var_coord.ndim == 3: curr_coord_dims = variable_cube.coord_dims(coord) first_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] second_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] third_dim = coord_to_ax[variable_cube.coord_dims(coord)[2]] - coordinate_points=interpn([first_dim[0],second_dim[0], third_dim[0]],var_coord.points, - [[a,b,c] for a,b,c in zip(t[first_dim[1]],t[second_dim[1]], t[third_dim[1]])]) - #coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] + coordinate_points = interpn( + [first_dim[0], second_dim[0], third_dim[0]], + var_coord.points, + [ + [a, b, c] + for a, b, c in zip( + t[first_dim[1]], t[second_dim[1]], t[third_dim[1]] + ) + ], + ) + # coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] # write resulting array or list into DataFrame: - t[coord]=coordinate_points + t[coord] = coordinate_points - logging.debug('added coord: '+ coord) + logging.debug("added coord: " + coord) return t +def get_bounding_box(x, buffer=1): + from numpy import delete, arange, diff, nonzero, array - -def get_bounding_box(x,buffer=1): - from numpy import delete,arange,diff,nonzero,array """Calculates the bounding box of a ndarray https://stackoverflow.com/questions/31400769/bounding-box-of-numpy-array """ @@ -600,7 +672,7 @@ def get_bounding_box(x,buffer=1): bbox = [] all_axis = arange(x.ndim) - #loop over dimensions + # loop over dimensions for kdim in all_axis: nk_dim = delete(all_axis, kdim) mask_i = mask.all(axis=tuple(nk_dim)) @@ -608,74 +680,86 @@ def get_bounding_box(x,buffer=1): idx_i = nonzero(dmask_i)[0] # for case where there is no value in idx_i if len(idx_i) == 0: - idx_i=array([0,x.shape[kdim]-1]) + idx_i = array([0, x.shape[kdim] - 1]) # for case where there is only one value in idx_i elif len(idx_i) == 1: - idx_i=array([idx_i,idx_i]) + idx_i = array([idx_i, idx_i]) # make sure there is two values in idx_i elif len(idx_i) > 2: - idx_i=array([idx_i[0],idx_i[-1]]) + idx_i = array([idx_i[0], idx_i[-1]]) # caluclate min and max values for idx_i and append them to list - idx_min=max(0,idx_i[0]+1-buffer) - idx_max=min(x.shape[kdim]-1,idx_i[1]+1+buffer) + idx_min = max(0, idx_i[0] + 1 - buffer) + idx_max = min(x.shape[kdim] - 1, idx_i[1] + 1 + buffer) bbox.append([idx_min, idx_max]) return bbox -def get_spacings(field_in,grid_spacing=None,time_spacing=None): + +def get_spacings(field_in, grid_spacing=None, time_spacing=None): import numpy as np from copy import deepcopy + # set horizontal grid spacing of input data # If cartesian x and y corrdinates are present, use these to determine dxy (vertical grid spacing used to transfer pixel distances to real distances): - coord_names=[coord.name() for coord in field_in.coords()] - - if (('projection_x_coordinate' in coord_names and 'projection_y_coordinate' in coord_names) and (grid_spacing is None)): - x_coord=deepcopy(field_in.coord('projection_x_coordinate')) - x_coord.convert_units('metre') - dx=np.diff(field_in.coord('projection_y_coordinate')[0:2].points)[0] - y_coord=deepcopy(field_in.coord('projection_y_coordinate')) - y_coord.convert_units('metre') - dy=np.diff(field_in.coord('projection_y_coordinate')[0:2].points)[0] - dxy=0.5*(dx+dy) + coord_names = [coord.name() for coord in field_in.coords()] + + if ( + "projection_x_coordinate" in coord_names + and "projection_y_coordinate" in coord_names + ) and (grid_spacing is None): + x_coord = deepcopy(field_in.coord("projection_x_coordinate")) + x_coord.convert_units("metre") + dx = np.diff(field_in.coord("projection_y_coordinate")[0:2].points)[0] + y_coord = deepcopy(field_in.coord("projection_y_coordinate")) + y_coord.convert_units("metre") + dy = np.diff(field_in.coord("projection_y_coordinate")[0:2].points)[0] + dxy = 0.5 * (dx + dy) elif grid_spacing is not None: - dxy=grid_spacing + dxy = grid_spacing else: - ValueError('no information about grid spacing, need either input cube with projection_x_coord and projection_y_coord or keyword argument grid_spacing') - + ValueError( + "no information about grid spacing, need either input cube with projection_x_coord and projection_y_coord or keyword argument grid_spacing" + ) + # set horizontal grid spacing of input data - if (time_spacing is None): + if time_spacing is None: # get time resolution of input data from first to steps of input cube: - time_coord=field_in.coord('time') - dt=(time_coord.units.num2date(time_coord.points[1])-time_coord.units.num2date(time_coord.points[0])).seconds - elif (time_spacing is not None): + time_coord = field_in.coord("time") + dt = ( + time_coord.units.num2date(time_coord.points[1]) + - time_coord.units.num2date(time_coord.points[0]) + ).seconds + elif time_spacing is not None: # use value of time_spacing for dt: - dt=time_spacing - return dxy,dt + dt = time_spacing + return dxy, dt + def get_label_props_in_dict(labels): - '''Function to get the label properties into a dictionary format. - + """Function to get the label properties into a dictionary format. + Parameters ---------- labels: 2D or 3D array-like comes from the `skimage.measure.label` function - + Returns ------- dict output from skimage.measure.regionprops in dictionary format, where they key is the label number - ''' + """ import skimage.measure - + region_properties_raw = skimage.measure.regionprops(labels) region_properties_dict = dict() for region_prop in region_properties_raw: region_properties_dict[region_prop.label] = region_prop - + return region_properties_dict + def get_indices_of_labels_from_reg_prop_dict(region_property_dict): - '''Function to get the x, y, and z indices (as well as point count) of all labeled regions. - + """Function to get the x, y, and z indices (as well as point count) of all labeled regions. + Parameters ---------- region_property_dict: dict of region_property objects @@ -691,32 +775,31 @@ def get_indices_of_labels_from_reg_prop_dict(region_property_dict): the y indices in the label number dict (key: label number, int) the x indices in the label number - + Raises ------ ValueError a ValueError is raised if there are no regions in the region property dict - ''' - + """ + import skimage.measure import numpy as np - if len(region_property_dict) ==0: + if len(region_property_dict) == 0: raise ValueError("No regions!") - z_indices = dict() y_indices = dict() x_indices = dict() curr_loc_indices = dict() is_3D = False - - #loop through all skimage identified regions + + # loop through all skimage identified regions for region_prop_key in region_property_dict: region_prop = region_property_dict[region_prop_key] index = region_prop.label - if len(region_prop.coords[0])>=3: + if len(region_prop.coords[0]) >= 3: is_3D = True curr_z_ixs, curr_y_ixs, curr_x_ixs = np.transpose(region_prop.coords) z_indices[index] = curr_z_ixs @@ -727,16 +810,17 @@ def get_indices_of_labels_from_reg_prop_dict(region_property_dict): y_indices[index] = curr_y_ixs x_indices[index] = curr_x_ixs curr_loc_indices[index] = len(curr_y_ixs) - - #print("indices found") + + # print("indices found") if is_3D: return [curr_loc_indices, z_indices, y_indices, x_indices] - else: + else: return [curr_loc_indices, y_indices, x_indices] + def adjust_pbc_point(in_dim, dim_min, dim_max): - '''Function to adjust a point to the other boundary for PBCs - + """Function to adjust a point to the other boundary for PBCs + Parameters ---------- in_dim : int @@ -745,17 +829,17 @@ def adjust_pbc_point(in_dim, dim_min, dim_max): Minimum point for the dimension dim_max : int Maximum point for the dimension (inclusive) - + Returns ------- int The adjusted point on the opposite boundary - + Raises ------ ValueError If in_dim isn't on one of the boundary points - ''' + """ if in_dim == dim_min: return dim_max elif in_dim == dim_max: @@ -764,20 +848,28 @@ def adjust_pbc_point(in_dim, dim_min, dim_max): raise ValueError("In adjust_pbc_point, in_dim isn't on a boundary.") -def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, - h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord, - PBC_flag = 'none'): - '''Function to get the *actual* coordinate boxes of interest given a set of shifted - coordinates with periodic boundaries. - - For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] +def get_pbc_coordinates( + h1_min, + h1_max, + h2_min, + h2_max, + h1_start_coord, + h1_end_coord, + h2_start_coord, + h2_end_coord, + PBC_flag="none", +): + """Function to get the *actual* coordinate boxes of interest given a set of shifted + coordinates with periodic boundaries. + + For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] (-3, 5, 2,6) with PBC_flag of 'both' or 'hdim_1', h1_max of 10, and h1_min of 0 this function will return: [(0,5,2,6), (7,10,2,6)]. If you pass in something outside the bounds of the array, this will truncate your - requested box. For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] + requested box. For example, if you pass in [as h1_start_coord, h1_end_coord, h2_start_coord, h2_end_coord] (-3, 5, 2,6) with PBC_flag of 'none' or 'hdim_2', this function will return: - [(0,5,2,6)], assuming h1_min is 0. + [(0,5,2,6)], assuming h1_min is 0. For cases where PBC_flag is 'both' and we have a corner case, it is possible to get overlapping boundaries. For example, if you pass in (-6, 5, -6, 5) @@ -791,7 +883,7 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, h2_min: int Minimum array value in hdim_2, typically 0. h2_max: int - Maximum array value in hdim_2 (exclusive). h2_max - h2_min should be the size in h2. + Maximum array value in hdim_2 (exclusive). h2_max - h2_min should be the size in h2. h1_start_coord: int Start coordinate in hdim_1. Can be < h1_min if dealing with PBCs. h1_end_coord: int @@ -812,37 +904,36 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, list of tuples A list of tuples containing (h1_start, h1_end, h2_start, h2_end) of each of the boxes needed to encompass the coordinates. - ''' + """ - if PBC_flag not in ['none', 'hdim_1', 'hdim_2', 'both']: + if PBC_flag not in ["none", "hdim_1", "hdim_2", "both"]: raise ValueError("PBC_flag must be 'none', 'hdim_1', 'hdim_2', or 'both'") - h1_start_coords = list() h1_end_coords = list() h2_start_coords = list() h2_end_coords = list() - - # In both of these cases, we just need to truncate the hdim_1 points. - if PBC_flag in ['none', 'hdim_2']: + # In both of these cases, we just need to truncate the hdim_1 points. + if PBC_flag in ["none", "hdim_2"]: h1_start_coords.append(max(h1_min, h1_start_coord)) h1_end_coords.append(min(h1_max, h1_end_coord)) - - + # In both of these cases, we only need to truncate the hdim_2 points. - if PBC_flag in ['none', 'hdim_1']: + if PBC_flag in ["none", "hdim_1"]: h2_start_coords.append(max(h2_min, h2_start_coord)) h2_end_coords.append(min(h2_max, h2_end_coord)) # If the PBC flag is none, we can just return. - if PBC_flag == 'none': - return [(h1_start_coords[0], h1_end_coords[0], h2_start_coords[0], h2_end_coords[0])] + if PBC_flag == "none": + return [ + (h1_start_coords[0], h1_end_coords[0], h2_start_coords[0], h2_end_coords[0]) + ] - # We have at least one periodic boundary. + # We have at least one periodic boundary. - # hdim_1 boundary is periodic. - if PBC_flag in ['hdim_1', 'both']: + # hdim_1 boundary is periodic. + if PBC_flag in ["hdim_1", "both"]: if (h1_end_coord - h1_start_coord) >= (h1_max - h1_min): # In this case, we have selected the full h1 length of the domain, # so we set the start and end coords to just that. @@ -871,8 +962,8 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, else: h1_start_coords.append(h1_start_coord) h1_end_coords.append(h1_end_coord) - - if PBC_flag in ['hdim_2', 'both']: + + if PBC_flag in ["hdim_2", "both"]: if (h2_end_coord - h2_start_coord) >= (h2_max - h2_min): # In this case, we have selected the full h2 length of the domain, # so we set the start and end coords to just that. @@ -903,14 +994,26 @@ def get_pbc_coordinates(h1_min, h1_max, h2_min, h2_max, h2_end_coords.append(h2_end_coord) out_coords = list() - for h1_start_coord_single, h1_end_coord_single in zip(h1_start_coords, h1_end_coords): - for h2_start_coord_single, h2_end_coord_single in zip(h2_start_coords, h2_end_coords): - out_coords.append((h1_start_coord_single, h1_end_coord_single, h2_start_coord_single, h2_end_coord_single)) + for h1_start_coord_single, h1_end_coord_single in zip( + h1_start_coords, h1_end_coords + ): + for h2_start_coord_single, h2_end_coord_single in zip( + h2_start_coords, h2_end_coords + ): + out_coords.append( + ( + h1_start_coord_single, + h1_end_coord_single, + h2_start_coord_single, + h2_end_coord_single, + ) + ) return out_coords + def njit_if_available(func, **kwargs): - '''Decorator to wrap a function with numba.njit if available. - If numba isn't available, it just returns the function. + """Decorator to wrap a function with numba.njit if available. + If numba isn't available, it just returns the function. Parameters ---------- @@ -918,20 +1021,22 @@ def njit_if_available(func, **kwargs): Function to wrap with njit kwargs: Keyword arguments to pass to numba njit - ''' + """ try: from numba import njit + return njit(func, kwargs) except ModuleNotFoundError: return func @njit_if_available -def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, - PBC_flag): - '''Function to calculate the distance between cartesian +def calc_distance_coords_pbc( + coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, PBC_flag +): + """Function to calculate the distance between cartesian coordinate set 1 and coordinate set 2. Note that we assume both - coordinates are within their min/max already. + coordinates are within their min/max already. Parameters ---------- @@ -943,26 +1048,26 @@ def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, min_h1: int Minimum point in hdim_1 max_h1: int - Maximum point in hdim_1, exclusive. max_h1-min_h1 should be the size. + Maximum point in hdim_1, exclusive. max_h1-min_h1 should be the size. min_h2: int Minimum point in hdim_2 max_h2: int - Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size. + Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size. PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') Sets whether to use periodic boundaries, and if so in which directions. 'none' means that we do not have periodic boundaries 'hdim_1' means that we are periodic along hdim1 'hdim_2' means that we are periodic along hdim2 'both' means that we are periodic along both horizontal dimensions - + Returns ------- float Distance between coords_1 and coords_2 in cartesian space. - ''' - - is_3D = len(coords_1)== 3 + """ + + is_3D = len(coords_1) == 3 size_h1 = max_h1 - min_h1 size_h2 = max_h2 - min_h2 @@ -971,11 +1076,11 @@ def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, coords_1 = np.array((0, coords_1[0], coords_1[1])) coords_2 = np.array((0, coords_2[0], coords_2[1])) - if PBC_flag in ['hdim_1', 'both']: + if PBC_flag in ["hdim_1", "both"]: mod_h1 = size_h1 else: mod_h1 = 0 - if PBC_flag in ['hdim_2', 'both']: + if PBC_flag in ["hdim_2", "both"]: mod_h2 = size_h2 else: mod_h2 = 0 @@ -984,13 +1089,14 @@ def calc_distance_coords_pbc(coords_1, coords_2, min_h1, max_h1, min_h2, max_h2, deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) return np.sqrt(np.sum(deltas**2)) -def find_vertical_axis_from_coord(variable_cube, vertical_coord='auto'): - '''Function to find the vertical coordinate in the iris cube + +def find_vertical_axis_from_coord(variable_cube, vertical_coord="auto"): + """Function to find the vertical coordinate in the iris cube Parameters ---------- variable_cube: iris.cube - Input variable cube, containing a vertical coordinate. + Input variable cube, containing a vertical coordinate. vertical_coord: str Vertical coordinate name. If `auto`, this function tries to auto-detect. @@ -998,38 +1104,38 @@ def find_vertical_axis_from_coord(variable_cube, vertical_coord='auto'): ------- str the vertical coordinate name - + Raises ------ ValueError - Raised if the vertical coordinate isn't found in the cube. - ''' + Raised if the vertical coordinate isn't found in the cube. + """ - list_coord_names=[coord.name() for coord in variable_cube.coords()] + list_coord_names = [coord.name() for coord in variable_cube.coords()] - if vertical_coord=='auto': - list_vertical=['z','model_level_number','altitude','geopotential_height'] + if vertical_coord == "auto": + list_vertical = ["z", "model_level_number", "altitude", "geopotential_height"] # find the intersection all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) if len(all_vertical_axes) >= 1: return all_vertical_axes[0] else: - raise ValueError('Cube lacks suitable automatic vertical coordinate (z, model_level_number, altitude, or geopotential_height)') + raise ValueError( + "Cube lacks suitable automatic vertical coordinate (z, model_level_number, altitude, or geopotential_height)" + ) elif vertical_coord in list_coord_names: return vertical_coord else: - raise ValueError('Please specify vertical coordinate found in cube') + raise ValueError("Please specify vertical coordinate found in cube") - - -def find_dataframe_vertical_coord(variable_dataframe, vertical_coord='auto'): - '''Function to find the vertical coordinate in the iris cube +def find_dataframe_vertical_coord(variable_dataframe, vertical_coord="auto"): + """Function to find the vertical coordinate in the iris cube Parameters ---------- variable_dataframe: pandas.DataFrame - Input variable cube, containing a vertical coordinate. + Input variable cube, containing a vertical coordinate. vertical_coord: str Vertical coordinate name. If `auto`, this function tries to auto-detect. @@ -1037,23 +1143,23 @@ def find_dataframe_vertical_coord(variable_dataframe, vertical_coord='auto'): ------- str the vertical coordinate name - + Raises ------ ValueError - Raised if the vertical coordinate isn't found in the cube. - ''' + Raised if the vertical coordinate isn't found in the cube. + """ - if vertical_coord == 'auto': - list_vertical=['z','model_level_number','altitude','geopotential_height'] + if vertical_coord == "auto": + list_vertical = ["z", "model_level_number", "altitude", "geopotential_height"] all_vertical_axes = list(set(variable_dataframe.columns) & set(list_vertical)) if len(all_vertical_axes) == 1: return all_vertical_axes[0] else: - raise ValueError('Please specify vertical coordinate') + raise ValueError("Please specify vertical coordinate") else: if vertical_coord in variable_dataframe.columns: return vertical_coord else: - raise ValueError("Please specify vertical coordinate") \ No newline at end of file + raise ValueError("Please specify vertical coordinate") diff --git a/tobac/wrapper.py b/tobac/wrapper.py index 78bba6d3..87d8be0a 100644 --- a/tobac/wrapper.py +++ b/tobac/wrapper.py @@ -3,79 +3,92 @@ def tracking_wrapper( - field_in_features, - field_in_segmentation, - time_spacing=None, - grid_spacing=None, - parameters_features=None, - parameters_tracking=None, - parameters_segmentation=None, - ): - + field_in_features, + field_in_segmentation, + time_spacing=None, + grid_spacing=None, + parameters_features=None, + parameters_tracking=None, + parameters_segmentation=None, +): + from .feature_detection import feature_detection_multithreshold from .tracking import linking_trackpy from .segmentation import segmentation_3D, segmentation_2D from .utils import get_spacings - logger = logging.getLogger('trackpy') + logger = logging.getLogger("trackpy") logger.propagate = False logger.setLevel(logging.WARNING) - + ### Prepare Tracking - dxy,dt=get_spacings(field_in_features,grid_spacing=grid_spacing,time_spacing=time_spacing) - + dxy, dt = get_spacings( + field_in_features, grid_spacing=grid_spacing, time_spacing=time_spacing + ) + ### Start Tracking # Feature detection: - - method_detection=parameters_features.pop('method_detection',None) - if method_detection in ["threshold","threshold_multi"]: - features=feature_detection_multithreshold(field_in_features,**parameters_features) - else: - raise ValueError('method_detection unknown, has to be either threshold_multi or threshold') - - method_segmentation=parameters_features.pop('method_segmentation',None) - if method_segmentation == 'watershedding': - if field_in_segmentation.ndim==4: - segmentation_mask,features_segmentation=segmentation_3D(features,field_in_segmentation,**parameters_segmentation) - if field_in_segmentation.ndim==3: - segmentation_mask,features_segmentation=segmentation_2D(features,field_in_segmentation,**parameters_segmentation) - + method_detection = parameters_features.pop("method_detection", None) + if method_detection in ["threshold", "threshold_multi"]: + features = feature_detection_multithreshold( + field_in_features, **parameters_features + ) + else: + raise ValueError( + "method_detection unknown, has to be either threshold_multi or threshold" + ) + + method_segmentation = parameters_features.pop("method_segmentation", None) + + if method_segmentation == "watershedding": + if field_in_segmentation.ndim == 4: + segmentation_mask, features_segmentation = segmentation_3D( + features, field_in_segmentation, **parameters_segmentation + ) + if field_in_segmentation.ndim == 3: + segmentation_mask, features_segmentation = segmentation_2D( + features, field_in_segmentation, **parameters_segmentation + ) # Link the features in the individual frames to trajectories: - method_linking=parameters_features.pop('method_linking',None) + method_linking = parameters_features.pop("method_linking", None) - if method_linking == 'trackpy': - trajectories=linking_trackpy(features,**parameters_tracking) - logging.debug('Finished tracking') + if method_linking == "trackpy": + trajectories = linking_trackpy(features, **parameters_tracking) + logging.debug("Finished tracking") else: - raise ValueError('method_linking unknown, has to be trackpy') - - return features,segmentation_mask,trajectories - - - - -def maketrack(field_in, - grid_spacing=None,time_spacing=None, - target='maximum', - v_max=None,d_max=None, - memory=0,stubs=5, - order=1,extrapolate=0, - method_detection="threshold", - position_threshold='center', - sigma_threshold=0.5, - n_erosion_threshold=0, - threshold=1, min_num=0, - min_distance=0, - method_linking="random", - cell_number_start=1, - subnetwork_size=None, - adaptive_stop=None, - adaptive_step=None, - return_intermediate=False, - ): + raise ValueError("method_linking unknown, has to be trackpy") + + return features, segmentation_mask, trajectories + + +def maketrack( + field_in, + grid_spacing=None, + time_spacing=None, + target="maximum", + v_max=None, + d_max=None, + memory=0, + stubs=5, + order=1, + extrapolate=0, + method_detection="threshold", + position_threshold="center", + sigma_threshold=0.5, + n_erosion_threshold=0, + threshold=1, + min_num=0, + min_distance=0, + method_linking="random", + cell_number_start=1, + subnetwork_size=None, + adaptive_stop=None, + adaptive_step=None, + return_intermediate=False, +): from .feature_detection import feature_detection_multithreshold from .tracking import linking_trackpy @@ -134,74 +147,86 @@ def maketrack(field_in, """ from copy import deepcopy - - logger = logging.getLogger('trackpy') + + logger = logging.getLogger("trackpy") logger.propagate = False logger.setLevel(logging.WARNING) - + ### Prepare Tracking # set horizontal grid spacing of input data # If cartesian x and y corrdinates are present, use these to determine dxy (vertical grid spacing used to transfer pixel distances to real distances): - coord_names=[coord.name() for coord in field_in.coords()] - - if (('projection_x_coordinate' in coord_names and 'projection_y_coordinate' in coord_names) and (grid_spacing is None)): - x_coord=deepcopy(field_in.coord('projection_x_coordinate')) - x_coord.convert_units('metre') - dx=np.diff(field_in.coord('projection_y_coordinate')[0:2].points)[0] - y_coord=deepcopy(field_in.coord('projection_y_coordinate')) - y_coord.convert_units('metre') - dy=np.diff(field_in.coord('projection_y_coordinate')[0:2].points)[0] - dxy=0.5*(dx+dy) + coord_names = [coord.name() for coord in field_in.coords()] + + if ( + "projection_x_coordinate" in coord_names + and "projection_y_coordinate" in coord_names + ) and (grid_spacing is None): + x_coord = deepcopy(field_in.coord("projection_x_coordinate")) + x_coord.convert_units("metre") + dx = np.diff(field_in.coord("projection_y_coordinate")[0:2].points)[0] + y_coord = deepcopy(field_in.coord("projection_y_coordinate")) + y_coord.convert_units("metre") + dy = np.diff(field_in.coord("projection_y_coordinate")[0:2].points)[0] + dxy = 0.5 * (dx + dy) elif grid_spacing is not None: - dxy=grid_spacing + dxy = grid_spacing else: - ValueError('no information about grid spacing, need either input cube with projection_x_coord and projection_y_coord or keyword argument grid_spacing') - + ValueError( + "no information about grid spacing, need either input cube with projection_x_coord and projection_y_coord or keyword argument grid_spacing" + ) + # set horizontal grid spacing of input data - if (time_spacing is None): + if time_spacing is None: # get time resolution of input data from first to steps of input cube: - time_coord=field_in.coord('time') - dt=(time_coord.units.num2date(time_coord.points[1])-time_coord.units.num2date(time_coord.points[0])).seconds - elif (time_spacing is not None): + time_coord = field_in.coord("time") + dt = ( + time_coord.units.num2date(time_coord.points[1]) + - time_coord.units.num2date(time_coord.points[0]) + ).seconds + elif time_spacing is not None: # use value of time_spacing for dt: - dt=time_spacing + dt = time_spacing ### Start Tracking # Feature detection: - if method_detection in ["threshold","threshold_multi"]: - features=feature_detection_multithreshold(field_in=field_in, - threshold=threshold, - dxy=dxy, - target=target, - position_threshold=position_threshold, - sigma_threshold=sigma_threshold, - n_erosion_threshold=n_erosion_threshold) - features_filtered = features.drop(features[features['num'] < min_num].index) + if method_detection in ["threshold", "threshold_multi"]: + features = feature_detection_multithreshold( + field_in=field_in, + threshold=threshold, + dxy=dxy, + target=target, + position_threshold=position_threshold, + sigma_threshold=sigma_threshold, + n_erosion_threshold=n_erosion_threshold, + ) + features_filtered = features.drop(features[features["num"] < min_num].index) else: - raise ValueError('method_detection unknown, has to be either threshold_multi or threshold') - - # Link the features in the individual frames to trajectories: - - trajectories=linking_trackpy(features=features_filtered, - field_in=field_in, - dxy=dxy, - dt=dt, - memory=memory, - subnetwork_size=subnetwork_size, - adaptive_stop=adaptive_stop, - adaptive_step=adaptive_step, - v_max=v_max, - d_max=d_max, - stubs=stubs, - order=order,extrapolate=extrapolate, - method_linking=method_linking, - cell_number_start=1 - ) - - logging.debug('Finished tracking') - - return trajectories,features + raise ValueError( + "method_detection unknown, has to be either threshold_multi or threshold" + ) + # Link the features in the individual frames to trajectories: + trajectories = linking_trackpy( + features=features_filtered, + field_in=field_in, + dxy=dxy, + dt=dt, + memory=memory, + subnetwork_size=subnetwork_size, + adaptive_stop=adaptive_stop, + adaptive_step=adaptive_step, + v_max=v_max, + d_max=d_max, + stubs=stubs, + order=order, + extrapolate=extrapolate, + method_linking=method_linking, + cell_number_start=1, + ) + + logging.debug("Finished tracking") + + return trajectories, features From 2dedc587f990ae65a747e940e272a86e9f4560e2 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 3 May 2022 10:26:02 -0600 Subject: [PATCH 72/82] Reformatted files --- tobac/segmentation.py | 1 + tobac/tests/test_tracking.py | 2 +- tobac/tracking.py | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index c5c16019..28c3218a 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -24,6 +24,7 @@ def segmentation_3D( max_distance=max_distance, ) + def segmentation_2D( features, field, diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index c14c9efb..e5c12088 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -305,7 +305,7 @@ def test_3D_tracking_min_dist_z( "dxy": dxy, "v_max": v_max, "method_linking": "predict", - "cell_number_unassigned": -1 + "cell_number_unassigned": -1, } if use_dz: common_params["dz"] = actual_dz diff --git a/tobac/tracking.py b/tobac/tracking.py index 120d7def..facfc7e8 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -140,10 +140,9 @@ def linking_trackpy( if PBC_flag in ["hdim_2", "both"] and (min_h2 is None or max_h2 is None): raise ValueError("For PBC tracking, must set min and max coordinates.") - if time_cell_min: stubs = np.floor(time_cell_min / dt) + 1 - + logging.debug("stubs: " + str(stubs)) logging.debug("start linking features into trajectories") @@ -253,7 +252,8 @@ def linking_trackpy( + " is a stub (" + str(trajectories_cell.shape[0]) + "), setting cell number to " - + str(cell_number_unassigned)) + + str(cell_number_unassigned) + ) stub_cell_nums.append(cell) trajectories_unfiltered.loc[ From f77d6fe3813a2c2ed80d8b2807dfefe1869cabfa Mon Sep 17 00:00:00 2001 From: JuliaKukulies <44163060+JuliaKukulies@users.noreply.github.com> Date: Wed, 8 Jun 2022 17:07:36 +0200 Subject: [PATCH 73/82] Rename test_util.py to test_utils.py fixed filename so that it is exactly the same as the module to be tested --- tobac/tests/{test_util.py => test_utils.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tobac/tests/{test_util.py => test_utils.py} (100%) diff --git a/tobac/tests/test_util.py b/tobac/tests/test_utils.py similarity index 100% rename from tobac/tests/test_util.py rename to tobac/tests/test_utils.py From 254ae0e291f538b95ba32d4d7dadd16e4075e547 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 1 Jul 2022 10:16:44 -0600 Subject: [PATCH 74/82] Reverting changes not relevant to 3D/PBC After discussion in #127, decided to revert any changes that aren't relevant to 3D and PBCs. --- doc/_static/theme_overrides.css | 17 ----- doc/analysis.rst | 4 +- doc/conf.py | 38 ----------- doc/data_input.rst | 2 +- doc/examples.rst | 2 +- doc/feature_detection_3D_out_vars.csv | 4 -- doc/feature_detection_base_out_vars.csv | 16 ----- doc/index.rst | 29 ++------- doc/installation.rst | 14 +--- doc/modules.rst | 7 -- doc/plotting.rst | 2 +- doc/tobac.rst | 85 ------------------------- tobac/centerofgravity.py | 15 ++--- tobac/wrapper.py | 11 ++-- 14 files changed, 26 insertions(+), 220 deletions(-) delete mode 100644 doc/_static/theme_overrides.css delete mode 100644 doc/conf.py delete mode 100644 doc/feature_detection_3D_out_vars.csv delete mode 100644 doc/feature_detection_base_out_vars.csv delete mode 100644 doc/modules.rst delete mode 100644 doc/tobac.rst diff --git a/doc/_static/theme_overrides.css b/doc/_static/theme_overrides.css deleted file mode 100644 index 4f6920b2..00000000 --- a/doc/_static/theme_overrides.css +++ /dev/null @@ -1,17 +0,0 @@ -/* from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-41506687 */ -/* with augmentations from https://github.com/readthedocs/sphinx_rtd_theme/issues/117#issuecomment-153083280 */ -/* override table width restrictions */ -@media screen and (min-width: 767px) { - - .wy-table-responsive table td { - /* !important prevents the common CSS stylesheets from - overriding this as on RTD they are loaded after this stylesheet */ - white-space: normal !important; - } - - .wy-table-responsive { - overflow: visible !important; - } - - } - \ No newline at end of file diff --git a/doc/analysis.rst b/doc/analysis.rst index 70706aea..13483a2a 100644 --- a/doc/analysis.rst +++ b/doc/analysis.rst @@ -1,5 +1,5 @@ Analysis -========= -tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of properties such as cloud lifetimes and cloud areas/volumes, but also allows for a convenient calculation of statistics for arbitrary fields of the same shape as as the input data used for the tracking analysis. +======= +tobac provides several analysis functions that allow for the calculation of important quantities based on the tracking results. This includes the calculation of important properties of the tracked objects such as cloud lifetimes, cloud areas/volumes, but also allows for a convenient calculation of statistics for arbitratry fields of the same shape as as the input data used for the tracking analysis. diff --git a/doc/conf.py b/doc/conf.py deleted file mode 100644 index 04e02516..00000000 --- a/doc/conf.py +++ /dev/null @@ -1,38 +0,0 @@ -import sphinx_rtd_theme -import sys, os - -sys.path.insert(0, os.path.abspath('extensions')) - -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', - 'sphinx.ext.coverage', 'sphinx.ext.imgmath', 'sphinx.ext.ifconfig', - 'sphinx_rtd_theme','sphinx.ext.napoleon'] - - -html_theme = "sphinx_rtd_theme" - -project = u'tobac' - - -def setup(app): - app.add_css_file("theme_overrides.css") - -autodoc_mock_imports = ['numpy', 'scipy', 'scikit-image', 'pandas', 'pytables', 'matplotlib', 'iris', - 'cf-units', 'xarray', 'cartopy', 'trackpy', 'numba'] - -sys.path.insert(0, os.path.abspath("../")) - -# Napoleon settings -napoleon_google_docstring = True -napoleon_numpy_docstring = True -napoleon_include_init_with_doc = False -napoleon_include_private_with_doc = False -napoleon_include_special_with_doc = True -napoleon_use_admonition_for_examples = False -napoleon_use_admonition_for_notes = False -napoleon_use_admonition_for_references = False -napoleon_use_ivar = False -napoleon_use_param = True -napoleon_use_rtype = True -napoleon_preprocess_types = False -napoleon_type_aliases = None -napoleon_attr_annotations = True diff --git a/doc/data_input.rst b/doc/data_input.rst index cbbed02e..68eb3277 100644 --- a/doc/data_input.rst +++ b/doc/data_input.rst @@ -1,4 +1,4 @@ -Data input and output +*Data input and output ====================== Input data for tobac should consist of one or more fields on a common, regular grid with a time dimension and two or more spatial dimensions. The input data should also include latitude and longitude coordinates, either as 1-d or 2-d variables depending on the grid used. diff --git a/doc/examples.rst b/doc/examples.rst index 05b24fbf..7a98fc71 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -1,5 +1,5 @@ Example notebooks -================== +=============== tobac is provided with a set of Jupyter notebooks that show examples of the application of tobac for different types of datasets. The notebooks can be found in the **examples** folder in the the repository. The necessary input data for these examples is avaliable on zenodo: diff --git a/doc/feature_detection_3D_out_vars.csv b/doc/feature_detection_3D_out_vars.csv deleted file mode 100644 index 4b5d23b1..00000000 --- a/doc/feature_detection_3D_out_vars.csv +++ /dev/null @@ -1,4 +0,0 @@ -Variable Name,Description,Units,Type -vdim,vertical dimension in grid point space,Number of grid points,float64 -z,grid point z location of the feature (see vdim). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float64 -altitude,z location of the feature above ground level,meters,float64 \ No newline at end of file diff --git a/doc/feature_detection_base_out_vars.csv b/doc/feature_detection_base_out_vars.csv deleted file mode 100644 index ce935797..00000000 --- a/doc/feature_detection_base_out_vars.csv +++ /dev/null @@ -1,16 +0,0 @@ -Variable Name,Description,Units,Type -frame,Frame/time/file number; starts from 0 and increments by 1 to N times. ,n/a,int64 -idx,"Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments",n/a,int64 -hdim_1,"First horizontal dimension in grid point space (typically, although not always, N/S or y space)",Number of grid points,float64 -hdim_2,"Second horizontal dimension in grid point space (typically, although not always, E/W or x space)",Number of grid points,float64 -num,Number of grid points that are within the threshold of this feature,Number of grid points,int64 -threshold_value,Maximum threshold value reached by the feature,Units of the input feature,int64(?) -feature,Unique number of the feature; starts from 1 and increments by 1 to the number of features,n/a,int64 -time,Time of the feature,Date and time,object/python datetime -timestr,String representation of the feature time,YYYY-MM-DD HH:MM:SS,object/string -y,Grid point y location of the feature (see hdim_1 and hdim_2). Note that this is not necessarily an integer value depending on your selection of position_threshold,Number of grid points,float64 -x,Grid point x location of the feature (see also y),Number of grid points,float64 -projection_y_coordinate,Y location of the feature in projection coordinates,Projection coordinates (usually m),float64 -projection_x_coordinate,X location of the feature in projection coodinates,Projection coordinates (usually m),float64 -lat,Latitude of the feature,Decimal degrees,float64 -lon,Longitude of the feature,Decimal degrees,float64 \ No newline at end of file diff --git a/doc/index.rst b/doc/index.rst index bd087fae..86540728 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,20 +1,18 @@ tobac - Tracking and Object-Based Analysis of Clouds -------------------------------------------------------- +----------- -**tobac** is a Python package to identify, track and analyze clouds in different types of gridded datasets, such as 3D model output from cloud-resolving model simulations or 2D data from satellite retrievals. +**tobac** is a Python package to identify, track and analyse clouds in different types of gridded datasets, such as 3D model output from cloud resolving model simulations or 2D data from satellite retrievals. -The software is set up in a modular way to include different algorithms for feature identification, tracking, and analyses. **tobac** is also input variable agnostic and doesn't rely on specific input variables to work. +The software is set up in a modular way to include different algorithms for feature identification, tracking and analyses. +In the current implementation, individual features are indentified as either maxima or minima in a two dimensional time varying field. The volume/are associated with the identified object can be determined based on a time-varying 2D or 3D field and a threshold value. In the tracking step, the identified objects are linked into consistent trajectories representing the cloud over its lifecycle. Analysis and visualisation methods provide a convenient way to use and display the tracking results. -In the current implementation, individual features are identified as either maxima or minima in a two or three-dimensional time-varying field. An associated volume can then be determined using these features with a separate (or identical) time-varying 2D or 3D field and a threshold value. The identified objects are linked into consistent trajectories representing the cloud over its lifecycle in the tracking step. Analysis and visualization methods provide a convenient way to use and display the tracking results. +Version 1.0 of tobac and some example applications are described in a paper that is currently in discussion for the journal "Geoscientific Model Development" as: -Version 1.2 of tobac and some example applications are described in a manuscript in Geoscientific Model Development as: +Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P.: tobac v1.0: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev. Discuss., `https://doi.org/10.5194/gmd-2019-105 `_ , in review, 2019. -Heikenfeld, M., Marinescu, P. J., Christensen, M., Watson-Parris, D., Senf, F., van den Heever, S. C., and Stier, P.: tobac 1.2: towards a flexible framework for tracking and analysis of clouds in diverse datasets, Geosci. Model Dev., 12, 4551–4570, https://doi.org/10.5194/gmd-12-4551-2019, 2019. - -The project is currently being extended by several contributors to include additional workflows and algorithms using the same structure, syntax, and data formats. +The project is currently extended by several contributors to include additional workflows and algorithms using the same structure, synthax and data formats. .. toctree:: - :caption: Basic Information :maxdepth: 2 :numbered: @@ -26,16 +24,3 @@ The project is currently being extended by several contributors to include addit analysis plotting examples - -.. toctree:: - :caption: Output Documentation - :maxdepth: 2 - - feature_detection_output - -.. toctree:: - :caption: API Reference - :maxdepth: 2 - - tobac - diff --git a/doc/installation.rst b/doc/installation.rst index 7caaf31b..f8895e4a 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -2,28 +2,20 @@ Installation ------------ tobac is now capable of working with both Python 2 and Python 3 (tested for 2.7,3.6 and 3.7) installations. -The easiest way is to install the most recent version of tobac via conda or mamba and the conda-forge channel: +The easiest way is to install the most recent version of tobac via conda and the conda-forge channel: ``` conda install -c conda-forge tobac ``` -or -``` -mamba install -c conda-forge tobac -``` -This will take care of all necessary dependencies and should do the job for most users. It also allows for an easy update of the installation by +This will take care of all necessary dependencies and should do the job for most users and also allows for an easy update of the installation by ``` conda update -c conda-forge tobac ``` -or -``` -mamba update -c conda-forge tobac -``` -You can also install conda via pip, which is mainly interesting for development purposes or using specific development branches for the Github repository. +You can also install conda via pip, which is mainly interesting for development purposed or to use specific development branches for the Github repository. The follwoing python packages are required (including dependencies of these packages): diff --git a/doc/modules.rst b/doc/modules.rst deleted file mode 100644 index aa0e3d9f..00000000 --- a/doc/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -tobac -===== - -.. toctree:: - :maxdepth: 4 - - tobac diff --git a/doc/plotting.rst b/doc/plotting.rst index 3425a0e5..ae11e5cf 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -1,3 +1,3 @@ Plotting ---------- +------- tobac provides functions to conveniently visualise the tracking results and analyses. diff --git a/doc/tobac.rst b/doc/tobac.rst deleted file mode 100644 index 51722c77..00000000 --- a/doc/tobac.rst +++ /dev/null @@ -1,85 +0,0 @@ -tobac package -============= - -Submodules ----------- - -tobac.analysis module ---------------------- - -.. automodule:: tobac.analysis - :members: - :undoc-members: - :show-inheritance: - -tobac.centerofgravity module ----------------------------- - -.. automodule:: tobac.centerofgravity - :members: - :undoc-members: - :show-inheritance: - -tobac.feature\_detection module -------------------------------- - -.. automodule:: tobac.feature_detection - :members: - :undoc-members: - :show-inheritance: - -tobac.plotting module ---------------------- - -.. automodule:: tobac.plotting - :members: - :undoc-members: - :show-inheritance: - -tobac.segmentation module -------------------------- - -.. automodule:: tobac.segmentation - :members: - :undoc-members: - :show-inheritance: - -tobac.testing module --------------------- - -.. automodule:: tobac.testing - :members: - :undoc-members: - :show-inheritance: - -tobac.tracking module ---------------------- - -.. automodule:: tobac.tracking - :members: - :undoc-members: - :show-inheritance: - -tobac.utils module ------------------- - -.. automodule:: tobac.utils - :members: - :undoc-members: - :show-inheritance: - -tobac.wrapper module --------------------- - -.. automodule:: tobac.wrapper - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: tobac - :members: - :undoc-members: - :show-inheritance: diff --git a/tobac/centerofgravity.py b/tobac/centerofgravity.py index 8fff4c9e..d7d5675d 100644 --- a/tobac/centerofgravity.py +++ b/tobac/centerofgravity.py @@ -10,9 +10,8 @@ def calculate_cog(tracks, mass, mask): cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') mask: iris.cube.Cube cube containing mask (int > where belonging to cloud volume, 0 everywhere else ) - Returns - ------- - pandas.DataFrame + Output: + tracks_out pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass each tracked cells at each timestep """ from .utils import mask_cube_cell @@ -47,9 +46,8 @@ def calculate_cog_untracked(mass, mask): mask: iris.cube.Cube cube containing mask (int > where belonging to cloud volume, 0 everywhere else ) - Returns - ------- - pandas.DataFrame + Output: + tracks_out pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass for untracked part of dimain """ from pandas import DataFrame @@ -87,9 +85,8 @@ def calculate_cog_domain(mass): Input: mass: iris.cube.Cube cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate') - Returns - ------- - pandas.DataFrame + Output: + tracks_out pandas.DataFrame Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass """ from pandas import DataFrame diff --git a/tobac/wrapper.py b/tobac/wrapper.py index 87d8be0a..0d2e64a8 100644 --- a/tobac/wrapper.py +++ b/tobac/wrapper.py @@ -92,10 +92,10 @@ def maketrack( from .feature_detection import feature_detection_multithreshold from .tracking import linking_trackpy - """Function identifiying features and linking them into trajectories + """ + Function identifiying features andlinking them into trajectories - Parameters - ---------- + Parameters: field_in: iris.cube.Cube 2D input field tracking is performed on grid_spacing: float @@ -131,9 +131,8 @@ def maketrack( return_intermediate: boolean flag to tetermine if only final tracjectories are output (False, default) or if detected features, filtered features and unfilled tracks are returned additionally (True) - - Returns - ------- + + Output: trajectories_final: pandas.DataFrame Tracked updrafts, one row per timestep and updraft, includes dimensions 'time','latitude','longitude','projection_x_variable', 'projection_y_variable' based on w cube. 'hdim_1' and 'hdim_2' are used for segementation step. From da4b06a99b913819cb446631d1ba02ad6aaee19b Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 1 Jul 2022 10:36:30 -0600 Subject: [PATCH 75/82] Updates from changes requested by @JuliaKukulies --- tobac/feature_detection.py | 87 ++++++++++++++++++-------------------- tobac/segmentation.py | 51 +--------------------- tobac/tests/test_utils.py | 21 +++++---- tobac/tracking.py | 4 +- tobac/utils.py | 6 +-- 5 files changed, 59 insertions(+), 110 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 7ef0b089..9cf894bc 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1,4 +1,3 @@ -from operator import is_ import numpy as np import pandas as pd import logging @@ -8,8 +7,8 @@ def feature_position( hdim1_indices, - hdim2_indeces, - vdim_indyces=None, + hdim2_indices, + vdim_indices=None, region_small=None, region_bbox=None, track_data=None, @@ -29,10 +28,10 @@ def feature_position( hdim1_indices : list list of indices along hdim1 (typically ```y```) - hdim2_indeces : list + hdim2_indices : list List of indices of feature along hdim2 (typically ```x```) - vdim_indyces : list, optional + vdim_indices : list, optional List of indices of feature along optional vdim (typically ```z```) region_small : 2D or 3D array-like @@ -101,10 +100,13 @@ def feature_position( # are we 3D? if so, True. is_3D = False + pbc_options = ["hdim_1", "hdim_2", "both"] + + hdim1_indices_2 = hdim1_indices + hdim2_indices_2 = hdim2_indices + if PBC_flag == "hdim_1": # ONLY periodic in y - hdim1_indices_2 = hdim1_indices - hdim2_indeces_2 = hdim2_indeces if ((np.max(hdim1_indices)) == y_max) and ((np.min(hdim1_indices) == y_min)): for y2 in range(0, len(hdim1_indices_2)): @@ -114,19 +116,15 @@ def feature_position( elif PBC_flag == "hdim_2": # ONLY periodic in x - hdim1_indices_2 = hdim1_indices - hdim2_indeces_2 = hdim2_indeces - if ((np.max(hdim2_indeces)) == x_max) and ((np.min(hdim2_indeces) == x_min)): - for x2 in range(0, len(hdim2_indeces_2)): - h2_ind = hdim2_indeces_2[x2] + if ((np.max(hdim2_indices)) == x_max) and ((np.min(hdim2_indices) == x_min)): + for x2 in range(0, len(hdim2_indices_2)): + h2_ind = hdim2_indices_2[x2] if h2_ind < (x_max / 2): - hdim2_indeces_2[x2] = h2_ind + x_max + hdim2_indices_2[x2] = h2_ind + x_max elif PBC_flag == "both": # DOUBLY periodic boundaries - hdim1_indices_2 = hdim1_indices - hdim2_indeces_2 = hdim2_indeces if ((np.max(hdim1_indices)) == y_max) and ((np.min(hdim1_indices) == y_min)): for y2 in range(0, len(hdim1_indices_2)): @@ -134,18 +132,14 @@ def feature_position( if h1_ind < (y_max / 2): hdim1_indices_2[y2] = h1_ind + y_max - if ((np.max(hdim2_indeces)) == x_max) and ((np.min(hdim2_indeces) == x_min)): - for x2 in range(0, len(hdim2_indeces_2)): - h2_ind = hdim2_indeces_2[x2] + if ((np.max(hdim2_indices)) == x_max) and ((np.min(hdim2_indices) == x_min)): + for x2 in range(0, len(hdim2_indices_2)): + h2_ind = hdim2_indices_2[x2] if h2_ind < (x_max / 2): - hdim2_indeces_2[x2] = h2_ind + x_max - - else: - hdim1_indices_2 = hdim1_indices - hdim2_indeces_2 = hdim2_indeces + hdim2_indices_2[x2] = h2_ind + x_max hdim1_indices = hdim1_indices_2 - hdim2_indeces = hdim2_indeces_2 + hdim2_indices = hdim2_indices_2 if len(region_bbox) == 4: # 2D case @@ -165,9 +159,9 @@ def feature_position( if position_threshold == "center": # get position as geometrical centre of identified region: hdim1_index = np.mean(hdim1_indices) - hdim2_index = np.mean(hdim2_indeces) + hdim2_index = np.mean(hdim2_indices) if is_3D: - vdim_index = np.mean(vdim_indyces) + vdim_index = np.mean(vdim_indices) elif position_threshold == "extreme": # get position as max/min position inside the identified region: @@ -176,9 +170,9 @@ def feature_position( if target == "minimum": index = np.argmin(track_data_region[region_small]) hdim1_index = hdim1_indices[index] - hdim2_index = hdim2_indeces[index] + hdim2_index = hdim2_indices[index] if is_3D: - vdim_index = vdim_indyces[index] + vdim_index = vdim_indices[index] elif position_threshold == "weighted_diff": # get position as centre of identified region, weighted by difference from the threshold: @@ -186,9 +180,9 @@ def feature_position( if sum(weights) == 0: weights = None hdim1_index = np.average(hdim1_indices, weights=weights) - hdim2_index = np.average(hdim2_indeces, weights=weights) + hdim2_index = np.average(hdim2_indices, weights=weights) if is_3D: - vdim_index = np.average(vdim_indyces, weights=weights) + vdim_index = np.average(vdim_indices, weights=weights) elif position_threshold == "weighted_abs": # get position as centre of identified region, weighted by absolute values if the field: @@ -196,9 +190,9 @@ def feature_position( if sum(weights) == 0: weights = None hdim1_index = np.average(hdim1_indices, weights=weights) - hdim2_index = np.average(hdim2_indeces, weights=weights) + hdim2_index = np.average(hdim2_indices, weights=weights) if is_3D: - vdim_index = np.average(vdim_indyces, weights=weights) + vdim_index = np.average(vdim_indices, weights=weights) else: raise ValueError( @@ -600,9 +594,8 @@ def feature_detection_threshold( elif PBC_flag == "none": pass else: - # TODO: fix periodic flag to be str, then update this with the possible values. raise ValueError( - "Options for periodic are currently: none, hdim_1, hdim_2, both" + "Options for periodic are currently: none, " + ", ".join(pbc_options) ) # num_labels = num_labels - len(skip_list) @@ -612,7 +605,7 @@ def feature_detection_threshold( if len(label_props) > 0: [ total_indices_all, - vdim_indyces_all, + vdim_indices_all, hdim1_indices_all, hdim2_indices_all, ] = tb_utils.get_indices_of_labels_from_reg_prop_dict(label_props) @@ -639,11 +632,11 @@ def feature_detection_threshold( if curr_count <= n_min_threshold: continue if is_3D: - vdim_indyces = vdim_indyces_all[cur_idx] + vdim_indices = vdim_indices_all[cur_idx] else: - vdim_indyces = None + vdim_indices = None hdim1_indices = hdim1_indices_all[cur_idx] - hdim2_indeces = hdim2_indices_all[cur_idx] + hdim2_indices = hdim2_indices_all[cur_idx] label_bbox = label_props[cur_idx].bbox ( @@ -661,15 +654,15 @@ def feature_detection_threshold( if is_3D: region_small = np.full((bbox_zsize, bbox_ysize, bbox_xsize), False) region_small[ - vdim_indyces - bbox_zstart, + vdim_indices - bbox_zstart, hdim1_indices - bbox_ystart, - hdim2_indeces - bbox_xstart, + hdim2_indices - bbox_xstart, ] = True else: region_small = np.full((bbox_ysize, bbox_xsize), False) region_small[ - hdim1_indices - bbox_ystart, hdim2_indeces - bbox_xstart + hdim1_indices - bbox_ystart, hdim2_indices - bbox_xstart ] = True # we are 2D and need to remove the dummy 3D coordinate. label_bbox = ( @@ -679,26 +672,26 @@ def feature_detection_threshold( label_bbox[5], ) - # [hdim1_indices,hdim2_indeces]= np.nonzero(region) + # [hdim1_indices,hdim2_indices]= np.nonzero(region) # write region for individual threshold and feature to dict if is_3D: region_i = list( zip( hdim1_indices * x_max * z_max - + hdim2_indeces * z_max - + vdim_indyces + + hdim2_indices * z_max + + vdim_indices ) ) else: - region_i = np.array(hdim1_indices * x_max + hdim2_indeces) + region_i = np.array(hdim1_indices * x_max + hdim2_indices) regions[cur_idx + idx_start] = region_i # Determine feature position for region by one of the following methods: single_indices = feature_position( hdim1_indices, - hdim2_indeces, - vdim_indyces=vdim_indyces, + hdim2_indices, + vdim_indices=vdim_indices, region_small=region_small, region_bbox=label_bbox, track_data=data_i, diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 28c3218a..02a59593 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -1,52 +1,7 @@ import logging -from operator import is_ from . import utils as tb_utils -def segmentation_3D( - features, - field, - dxy, - threshold=3e-3, - target="maximum", - level=None, - method="watershed", - max_distance=None, -): - return segmentation( - features, - field, - dxy, - threshold=threshold, - target=target, - level=level, - method=method, - max_distance=max_distance, - ) - - -def segmentation_2D( - features, - field, - dxy, - threshold=3e-3, - target="maximum", - level=None, - method="watershed", - max_distance=None, -): - return segmentation( - features, - field, - dxy, - threshold=threshold, - target=target, - level=level, - method=method, - max_distance=max_distance, - ) - - def transfm_pbc_point(in_dim, dim_min, dim_max): """Function to transform a PBC-feature point for contiguity @@ -404,9 +359,7 @@ def segmentation_timestep( if field_in.ndim == 2: hdim_1_axis = 0 hdim_2_axis = 1 - is_3D_seg = False elif field_in.ndim == 3: - is_3D_seg = True vertical_axis = tb_utils.find_vertical_axis_from_coord( field_in, vertical_coord=vertical_coord ) @@ -875,7 +828,6 @@ def segmentation_timestep( # Creation of actual Buddy Box space for transposition # of data in domain and re-seeding with Buddy feature markers buddy_rgn = np.zeros((bbox_zsize, bbox_ysize, bbox_xsize)) - ind_ctr = 0 # need to loop thru ALL z,y,x inds in buddy box # not just the ones that have nonzero seg mask values @@ -931,7 +883,7 @@ def segmentation_timestep( # Set level at which to create "Seed" for each feature in the case of 3D watershedding: # If none, use all levels (later reduced to the ones fulfilling the theshold conditions) - if level == None: + if level is None: level = slice(None) # transform max_distance in metres to distance in pixels: @@ -1162,6 +1114,7 @@ def segmentation( vertical_coord=vertical_coord, PBC_flag=PBC_flag, seed_3D_flag=seed_3D_flag, + seed_3D_size=seed_3D_size, ) segmentation_out_list.append(segmentation_out_i) features_out_list.append(features_out_i) diff --git a/tobac/tests/test_utils.py b/tobac/tests/test_utils.py index 893a5a42..dd734244 100644 --- a/tobac/tests/test_utils.py +++ b/tobac/tests/test_utils.py @@ -242,15 +242,18 @@ def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected """ import numpy as np - assert tb_utils.calc_distance_coords_pbc( - np.array(loc_1), - np.array(loc_2), - bounds[0], - bounds[1], - bounds[2], - bounds[3], - PBC_flag, - ) == pytest.approx(expected_dist) + assert ( + tb_utils.calc_distance_coords_pbc( + np.array(loc_1), + np.array(loc_2), + bounds[0], + bounds[1], + bounds[2], + bounds[3], + PBC_flag, + ) + == pytest.approx(expected_dist) + ) def test_get_pbc_coordinates(): diff --git a/tobac/tracking.py b/tobac/tracking.py index facfc7e8..f2dece29 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -28,9 +28,9 @@ def linking_trackpy( cell_number_start=1, cell_number_unassigned=-1, vertical_coord="auto", - min_h1=None, + min_h1=0, max_h1=None, - min_h2=None, + min_h2=0, max_h2=None, PBC_flag="none", ): diff --git a/tobac/utils.py b/tobac/utils.py index b4e33f7a..03161cc2 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1068,8 +1068,6 @@ def calc_distance_coords_pbc( """ is_3D = len(coords_1) == 3 - size_h1 = max_h1 - min_h1 - size_h2 = max_h2 - min_h2 if not is_3D: # Let's make the accounting easier. @@ -1077,17 +1075,19 @@ def calc_distance_coords_pbc( coords_2 = np.array((0, coords_2[0], coords_2[1])) if PBC_flag in ["hdim_1", "both"]: + size_h1 = max_h1 - min_h1 mod_h1 = size_h1 else: mod_h1 = 0 if PBC_flag in ["hdim_2", "both"]: + size_h2 = max_h2 - min_h2 mod_h2 = size_h2 else: mod_h2 = 0 max_dims = np.array((0, mod_h1, mod_h2)) deltas = np.abs(coords_1 - coords_2) deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) - return np.sqrt(np.sum(deltas**2)) + return np.sqrt(np.sum(deltas ** 2)) def find_vertical_axis_from_coord(variable_cube, vertical_coord="auto"): From b4a4f744b8fa2808e625a98584e5f61b7dda84bc Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 1 Jul 2022 11:02:45 -0600 Subject: [PATCH 76/82] fixed black formatting --- tobac/tests/test_utils.py | 21 +++++++++------------ tobac/utils.py | 2 +- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/tobac/tests/test_utils.py b/tobac/tests/test_utils.py index dd734244..893a5a42 100644 --- a/tobac/tests/test_utils.py +++ b/tobac/tests/test_utils.py @@ -242,18 +242,15 @@ def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected """ import numpy as np - assert ( - tb_utils.calc_distance_coords_pbc( - np.array(loc_1), - np.array(loc_2), - bounds[0], - bounds[1], - bounds[2], - bounds[3], - PBC_flag, - ) - == pytest.approx(expected_dist) - ) + assert tb_utils.calc_distance_coords_pbc( + np.array(loc_1), + np.array(loc_2), + bounds[0], + bounds[1], + bounds[2], + bounds[3], + PBC_flag, + ) == pytest.approx(expected_dist) def test_get_pbc_coordinates(): diff --git a/tobac/utils.py b/tobac/utils.py index 03161cc2..d3ea24a7 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1087,7 +1087,7 @@ def calc_distance_coords_pbc( max_dims = np.array((0, mod_h1, mod_h2)) deltas = np.abs(coords_1 - coords_2) deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) - return np.sqrt(np.sum(deltas ** 2)) + return np.sqrt(np.sum(deltas**2)) def find_vertical_axis_from_coord(variable_cube, vertical_coord="auto"): From 9a4b0f5f3fdea7cc478951201d14d2470355a2e9 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 11 Jul 2022 08:50:52 -0600 Subject: [PATCH 77/82] Black formatting --- tobac/tests/test_feature_detection.py | 4 +++- tobac/tests/test_utils.py | 21 ++++++++++++--------- tobac/utils.py | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index 368aa539..aedf51fe 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -46,7 +46,7 @@ def test_feature_detection_multithreshold_timestep(): @pytest.mark.parametrize( - "position_threshold", [("center"), ("extreme"), ("weighted_diff"), ("weighted_abs")] + "position_threshold", [("center"), ("extreme"), ("weighted_diff"), ("weighted_abs")] ) def test_feature_detection_position(position_threshold): """ @@ -75,6 +75,8 @@ def test_feature_detection_position(position_threshold): ) pass + + @pytest.mark.parametrize( "feature_1_loc, feature_2_loc, dxy, dz, min_distance," " add_x_coords, add_y_coords," diff --git a/tobac/tests/test_utils.py b/tobac/tests/test_utils.py index 893a5a42..dd734244 100644 --- a/tobac/tests/test_utils.py +++ b/tobac/tests/test_utils.py @@ -242,15 +242,18 @@ def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected """ import numpy as np - assert tb_utils.calc_distance_coords_pbc( - np.array(loc_1), - np.array(loc_2), - bounds[0], - bounds[1], - bounds[2], - bounds[3], - PBC_flag, - ) == pytest.approx(expected_dist) + assert ( + tb_utils.calc_distance_coords_pbc( + np.array(loc_1), + np.array(loc_2), + bounds[0], + bounds[1], + bounds[2], + bounds[3], + PBC_flag, + ) + == pytest.approx(expected_dist) + ) def test_get_pbc_coordinates(): diff --git a/tobac/utils.py b/tobac/utils.py index d3ea24a7..03161cc2 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1087,7 +1087,7 @@ def calc_distance_coords_pbc( max_dims = np.array((0, mod_h1, mod_h2)) deltas = np.abs(coords_1 - coords_2) deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) - return np.sqrt(np.sum(deltas**2)) + return np.sqrt(np.sum(deltas ** 2)) def find_vertical_axis_from_coord(variable_cube, vertical_coord="auto"): From fe1948778d1b5c569b520ef7a6a600a9a8080f76 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 11 Jul 2022 08:56:32 -0600 Subject: [PATCH 78/82] Black formatting (again) --- tobac/tests/test_utils.py | 21 +++++++++------------ tobac/utils.py | 2 +- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/tobac/tests/test_utils.py b/tobac/tests/test_utils.py index dd734244..893a5a42 100644 --- a/tobac/tests/test_utils.py +++ b/tobac/tests/test_utils.py @@ -242,18 +242,15 @@ def test_calc_distance_coords_pbc_param(loc_1, loc_2, bounds, PBC_flag, expected """ import numpy as np - assert ( - tb_utils.calc_distance_coords_pbc( - np.array(loc_1), - np.array(loc_2), - bounds[0], - bounds[1], - bounds[2], - bounds[3], - PBC_flag, - ) - == pytest.approx(expected_dist) - ) + assert tb_utils.calc_distance_coords_pbc( + np.array(loc_1), + np.array(loc_2), + bounds[0], + bounds[1], + bounds[2], + bounds[3], + PBC_flag, + ) == pytest.approx(expected_dist) def test_get_pbc_coordinates(): diff --git a/tobac/utils.py b/tobac/utils.py index 03161cc2..d3ea24a7 100644 --- a/tobac/utils.py +++ b/tobac/utils.py @@ -1087,7 +1087,7 @@ def calc_distance_coords_pbc( max_dims = np.array((0, mod_h1, mod_h2)) deltas = np.abs(coords_1 - coords_2) deltas = np.where(deltas > 0.5 * max_dims, deltas - max_dims, deltas) - return np.sqrt(np.sum(deltas ** 2)) + return np.sqrt(np.sum(deltas**2)) def find_vertical_axis_from_coord(variable_cube, vertical_coord="auto"): From ea9922f5830a0cb172742292b69844bb550c7ddd Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 9 Nov 2022 16:45:24 -0600 Subject: [PATCH 79/82] Updated code for RC_V1.5.0 changes. Switched 3D tracking to random only. --- tobac/segmentation.py | 3 ++ tobac/tests/test_tracking.py | 70 ++++++++++++++++++++++++++++++------ tobac/tracking.py | 40 +++++++++++++++++---- 3 files changed, 96 insertions(+), 17 deletions(-) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 45cbe77c..2984c39b 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -31,6 +31,9 @@ """ import logging + +import skimage + from . import utils as tb_utils diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index ca97a9c7..8f7ff7f7 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -13,6 +13,37 @@ import trackpy as tp +def convert_cell_dtype_if_appropriate(output, expected_output): + """Helper function to convert datatype of output if + necessary. Fixes a bug in testing on some OS/Python versions that cause + default int types to be different + + Parameters + ---------- + output: pd.DataFrame + the pandas dataframe to base cell datatype off of + expected_output: pd.DataFrame + the pandas dataframe to change the cell datatype + + Returns + ------- + expected_output: pd.DataFrame + an adjusted dataframe with a matching int dtype + """ + + # if they are already the same datatype, can return. + if output["cell"].dtype == expected_output["cell"].dtype: + return expected_output + + if output["cell"].dtype == np.int32: + expected_output["cell"] = expected_output["cell"].astype(np.int32) + + if output["cell"].dtype == np.int64: + expected_output["cell"] = expected_output["cell"].astype(np.int64) + + return expected_output + + def test_linking_trackpy(): """Function to test ```tobac.tracking.linking_trackpy``` Currently tests: @@ -53,6 +84,9 @@ def test_linking_trackpy(): ["hdim_1", "hdim_2", "frame", "feature", "time", "cell"] ] + expected_out_feature = convert_cell_dtype_if_appropriate( + actual_out_feature, expected_out_feature + ) assert_frame_equal( expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) ) @@ -84,7 +118,7 @@ def test_linking_trackpy(): 1000, dz=1000, v_max=10000, - method_linking="predict", + method_linking="random", PBC_flag="none", vertical_coord=None, ) @@ -93,6 +127,10 @@ def test_linking_trackpy(): ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] ] + expected_out_feature = convert_cell_dtype_if_appropriate( + actual_out_feature, expected_out_feature + ) + assert_frame_equal( expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) ) @@ -128,7 +166,7 @@ def test_linking_trackpy(): min_h2=0, max_h2=10, v_max=4, - method_linking="predict", + method_linking="random", vertical_coord=None, PBC_flag="hdim_1", ) @@ -136,6 +174,9 @@ def test_linking_trackpy(): actual_out_feature = actual_out_feature[ ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] ] + expected_out_feature = convert_cell_dtype_if_appropriate( + actual_out_feature, expected_out_feature + ) assert_frame_equal( expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) @@ -172,7 +213,7 @@ def test_linking_trackpy(): min_h2=0, max_h2=10, v_max=4, - method_linking="predict", + method_linking="random", vertical_coord=None, PBC_flag="hdim_2", ) @@ -180,6 +221,9 @@ def test_linking_trackpy(): actual_out_feature = actual_out_feature[ ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] ] + expected_out_feature = convert_cell_dtype_if_appropriate( + actual_out_feature, expected_out_feature + ) assert_frame_equal( expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) @@ -216,7 +260,7 @@ def test_linking_trackpy(): min_h2=0, max_h2=10, v_max=5, - method_linking="predict", + method_linking="random", vertical_coord=None, PBC_flag="both", ) @@ -224,7 +268,9 @@ def test_linking_trackpy(): actual_out_feature = actual_out_feature[ ["hdim_1", "hdim_2", "vdim", "frame", "feature", "time", "cell"] ] - + expected_out_feature = convert_cell_dtype_if_appropriate( + actual_out_feature, expected_out_feature + ) assert_frame_equal( expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) ) @@ -306,7 +352,7 @@ def test_3D_tracking_min_dist_z( "time_cell_min": 1, "dxy": dxy, "v_max": v_max, - "method_linking": "predict", + "method_linking": "random", "cell_number_unassigned": -1, } if use_dz: @@ -318,6 +364,9 @@ def test_3D_tracking_min_dist_z( actual_out_feature = tobac.tracking.linking_trackpy(**common_params) # Just want to remove the time_cell column here. actual_out_feature = actual_out_feature.drop("time_cell", axis=1) + expected_out_feature = convert_cell_dtype_if_appropriate( + actual_out_feature, expected_out_feature + ) assert_frame_equal( expected_out_feature.sort_index(axis=1), actual_out_feature.sort_index(axis=1) ) @@ -368,9 +417,9 @@ def test_trackpy_predict(): 1, 1, min_h1=0, - max_h1=100, + max_h1=101, min_h2=0, - max_h2=100, + max_h2=101, frame_start=0, num_frames=5, spd_h1=20, @@ -384,9 +433,9 @@ def test_trackpy_predict(): 1, 100, min_h1=0, - max_h1=100, + max_h1=101, min_h2=0, - max_h2=100, + max_h2=101, frame_start=0, num_frames=5, spd_h1=20, @@ -412,6 +461,7 @@ def test_trackpy_predict(): # sorting and dropping indices for comparison with the expected output output = output[["hdim_1", "hdim_2", "frame", "time", "feature", "cell"]] + expected_output = convert_cell_dtype_if_appropriate(output, expected_output) assert_frame_equal(expected_output.sort_index(), output.sort_index()) diff --git a/tobac/tracking.py b/tobac/tracking.py index 5c82871a..d62fd9e4 100644 --- a/tobac/tracking.py +++ b/tobac/tracking.py @@ -27,6 +27,8 @@ import math from . import utils as tb_utils +from packaging import version as pkgvsn + def linking_trackpy( features, @@ -306,6 +308,7 @@ def linking_trackpy( features_linking["vdim_adj"] = features_linking[found_vertical_coord] / dxy pos_columns_tp = ["vdim_adj", "hdim_1", "hdim_2"] + else: pos_columns_tp = ["hdim_1", "hdim_2"] @@ -336,13 +339,24 @@ def linking_trackpy( dist_func=dist_func, ) elif method_linking == "predict": + if is_3D and pkgvsn.parse(tp.__version__) < pkgvsn.parse("0.6.0"): + raise ValueError( + "3D Predictive Tracking Only Supported with trackpy versions newer than 0.6.0." + ) # avoid setting pos_columns by renaimng to default values to avoid trackpy bug - features.rename(columns={"hdim_1": "y", "hdim_2": "x"}, inplace=True) + if not is_3D: + features_linking.rename( + columns={"hdim_1": "y", "hdim_2": "x"}, inplace=True + ) + else: + features_linking.rename( + columns={"hdim_1": "y", "hdim_2": "x", "vdim_adj": "z"}, inplace=True + ) # generate list of features as input for df_link_iter to avoid bug in df_link features_linking_list = [ - frame for i, frame in features.groupby("frame", sort=True) + frame for i, frame in features_linking.groupby("frame", sort=True) ] pred = tp.predict.NearestVelocityPredict(span=1) @@ -356,19 +370,31 @@ def linking_trackpy( link_strategy="auto", adaptive_step=adaptive_step, adaptive_stop=adaptive_stop, - dist_func=dist_func + # dist_func=dist_func # copy_features=False, diagnostics=False, # hash_size=None, box_size=None, verify_integrity=True, # retain_index=False ) # recreate a single dataframe from the list + trajectories_unfiltered = pd.concat(trajectories_unfiltered) # change to column names back - trajectories_unfiltered.rename( - columns={"y": "hdim_1", "x": "hdim_2"}, inplace=True - ) - features.rename(columns={"y": "hdim_1", "x": "hdim_2"}, inplace=True) + if not is_3D: + trajectories_unfiltered.rename( + columns={"y": "hdim_1", "x": "hdim_2"}, inplace=True + ) + features_linking.rename( + columns={"y": "hdim_1", "x": "hdim_2"}, inplace=True + ) + else: + trajectories_unfiltered.rename( + columns={"y": "hdim_1", "x": "hdim_2", "z": "vdim_adj"}, inplace=True + ) + features_linking.rename( + columns={"y": "hdim_1", "x": "hdim_2", "z": "vdim_adj"}, inplace=True + ) + else: raise ValueError("method_linking unknown") From dd8c78c9ee905a79889cd1e2dfe22d6df668e251 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 30 Nov 2022 10:30:39 -0600 Subject: [PATCH 80/82] updates from comments --- tobac/feature_detection.py | 8 +++++--- tobac/segmentation.py | 3 +++ tobac/tests/test_tracking.py | 12 ++++++------ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 0419b22c..dc47e455 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -437,8 +437,10 @@ def feature_detection_threshold( if not is_3D: # let's transpose labels to a 1,y,x array to make calculations etc easier. labels = labels[np.newaxis, :, :] + # these are [min, max], meaning that the max value is inclusive and a valid + # value. z_min = 0 - z_max = labels.shape[0] + z_max = labels.shape[0] - 1 y_min = 0 y_max = labels.shape[1] - 1 x_min = 0 @@ -742,8 +744,8 @@ def feature_detection_threshold( if is_3D: region_i = list( zip( - hdim1_indices * x_max * z_max - + hdim2_indices * z_max + hdim1_indices * (x_max + 1) * (z_max + 1) + + hdim2_indices * (z_max + 1) + vdim_indices ) ) diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 45cbe77c..2984c39b 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -31,6 +31,9 @@ """ import logging + +import skimage + from . import utils as tb_utils diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index ca97a9c7..48f9b32e 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -368,9 +368,9 @@ def test_trackpy_predict(): 1, 1, min_h1=0, - max_h1=100, + max_h1=101, min_h2=0, - max_h2=100, + max_h2=101, frame_start=0, num_frames=5, spd_h1=20, @@ -384,9 +384,9 @@ def test_trackpy_predict(): 1, 100, min_h1=0, - max_h1=100, + max_h1=101, min_h2=0, - max_h2=100, + max_h2=101, frame_start=0, num_frames=5, spd_h1=20, @@ -394,7 +394,7 @@ def test_trackpy_predict(): ) cell_2_expected = copy.deepcopy(cell_2) - cell_2_expected["cell"] = 2 + cell_2_expected["cell"] = np.int32(2) features = pd.concat([cell_1, cell_2]) expected_output = pd.concat([cell_1_expected, cell_2_expected]) @@ -412,7 +412,7 @@ def test_trackpy_predict(): # sorting and dropping indices for comparison with the expected output output = output[["hdim_1", "hdim_2", "frame", "time", "feature", "cell"]] - + expected_output["cell"] = expected_output["cell"].astype(np.int32) assert_frame_equal(expected_output.sort_index(), output.sort_index()) From 3b3855deacf84375d086dca4e34af78246143720 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 30 Nov 2022 11:42:36 -0600 Subject: [PATCH 81/82] black formatting --- .gitignore | 3 ++- tobac/feature_detection.py | 1 - tobac/segmentation.py | 16 ++++++++++++---- tobac/tests/test_convert.py | 12 +++++++++--- tobac/tests/test_pbc_utils.py | 1 - tobac/tests/test_utils.py | 20 +++++++++++++------- tobac/utils/general.py | 12 ++++++------ tobac/utils/internal.py | 2 ++ tobac/utils/periodic_boundaries.py | 1 + 9 files changed, 45 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 6bb7df80..bf579958 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ __pycache__ .vscode htmlcov .coverage -build \ No newline at end of file +build +.idea \ No newline at end of file diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 14636ce4..2006bc5c 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -998,7 +998,6 @@ def feature_detection_multithreshold( detect_subset=None, wavelength_filtering=None, dz=None, - ): """Perform feature detection based on contiguous regions. diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 8cc7394c..54e9c4c5 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -770,8 +770,12 @@ def segmentation_timestep( ): # adjust x and y points to the other side - y_val_alt = pbc_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) - x_val_alt = pbc_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) + y_val_alt = pbc_utils.adjust_pbc_point( + label_y, hdim1_min, hdim1_max + ) + x_val_alt = pbc_utils.adjust_pbc_point( + label_x, hdim2_min, hdim2_max + ) label_on_corner = segmentation_mask_3[label_z, y_val_alt, x_val_alt] if label_on_corner > 0: @@ -782,7 +786,9 @@ def segmentation_timestep( if (PBC_flag == "hdim_1" or PBC_flag == "both") and np.any( label_y == [hdim1_min, hdim1_max] ): - y_val_alt = pbc_utils.adjust_pbc_point(label_y, hdim1_min, hdim1_max) + y_val_alt = pbc_utils.adjust_pbc_point( + label_y, hdim1_min, hdim1_max + ) # get the label value on the opposite side label_alt = segmentation_mask_3[label_z, y_val_alt, label_x] @@ -795,7 +801,9 @@ def segmentation_timestep( if (PBC_flag == "hdim_2" or PBC_flag == "both") and np.any( label_x == [hdim2_min, hdim2_max] ): - x_val_alt = pbc_utils.adjust_pbc_point(label_x, hdim2_min, hdim2_max) + x_val_alt = pbc_utils.adjust_pbc_point( + label_x, hdim2_min, hdim2_max + ) # get the seg value on the opposite side label_alt = segmentation_mask_3[label_z, label_y, x_val_alt] diff --git a/tobac/tests/test_convert.py b/tobac/tests/test_convert.py index 79207aca..15c31310 100644 --- a/tobac/tests/test_convert.py +++ b/tobac/tests/test_convert.py @@ -182,7 +182,9 @@ def test_function_tuple_output(test_input, kwarg=None): elif input_types[0] == Cube: data = tobac.testing.make_simple_sample_data_2D() elif input_types[0] == xarray.Dataset: - data = tobac.testing.generate_single_feature(1, 1, max_h1=100, max_h2=100).to_xarray() + data = tobac.testing.generate_single_feature( + 1, 1, max_h1=100, max_h2=100 + ).to_xarray() elif input_types[0] == pd.DataFrame: data = tobac.testing.generate_single_feature(1, 1, max_h1=100, max_h2=100) @@ -191,7 +193,9 @@ def test_function_tuple_output(test_input, kwarg=None): elif input_types[1] == Cube: kwarg = tobac.testing.make_simple_sample_data_2D() elif input_types[1] == xarray.Dataset: - kwarg = tobac.testing.generate_single_feature(1, 1, max_h1=100, max_h2=100).to_xarray() + kwarg = tobac.testing.generate_single_feature( + 1, 1, max_h1=100, max_h2=100 + ).to_xarray() elif input_types[1] == pd.DataFrame: kwarg = tobac.testing.generate_single_feature(1, 1, max_h1=100, max_h2=100) @@ -234,7 +238,9 @@ def test_xarray_workflow(): feature_detection_xarray = xarray_to_iris( tobac.feature_detection.feature_detection_multithreshold ) - features = tobac.feature_detection.feature_detection_multithreshold(data, dxy, threshold=1.0) + features = tobac.feature_detection.feature_detection_multithreshold( + data, dxy, threshold=1.0 + ) features_xarray = feature_detection_xarray(data_xarray, dxy_xarray, threshold=1.0) assert_frame_equal(features, features_xarray) diff --git a/tobac/tests/test_pbc_utils.py b/tobac/tests/test_pbc_utils.py index 518080e1..6804fe41 100644 --- a/tobac/tests/test_pbc_utils.py +++ b/tobac/tests/test_pbc_utils.py @@ -4,7 +4,6 @@ from collections import Counter - def lists_equal_without_order(a, b): """ This will make sure the inner list contain the same, diff --git a/tobac/tests/test_utils.py b/tobac/tests/test_utils.py index 8a69ddbf..9264fc1e 100644 --- a/tobac/tests/test_utils.py +++ b/tobac/tests/test_utils.py @@ -119,8 +119,6 @@ def test_get_indices_of_labels_from_reg_prop_dict(): ) - - @pytest.mark.parametrize( "feature_loc, min_max_coords, lengths, expected_coord_interp", [ @@ -371,18 +369,26 @@ def test_combine_tobac_feats(): """ single_feat_1 = tb_test.generate_single_feature( - 0, 0, start_date=datetime.datetime(2022, 1, 1, 0, 0), frame_start=0, - max_h1 = 100, max_h2 = 100 + 0, + 0, + start_date=datetime.datetime(2022, 1, 1, 0, 0), + frame_start=0, + max_h1=100, + max_h2=100, ) single_feat_2 = tb_test.generate_single_feature( - 1, 1, start_date=datetime.datetime(2022, 1, 1, 0, 5), frame_start=0, - max_h1 = 100, max_h2 = 100 + 1, + 1, + start_date=datetime.datetime(2022, 1, 1, 0, 5), + frame_start=0, + max_h1=100, + max_h2=100, ) combined_feat = tb_utils.combine_tobac_feats([single_feat_1, single_feat_2]) tot_feat = tb_test.generate_single_feature( - 0, 0, spd_h1=1, spd_h2=1, num_frames=2, frame_start=0, max_h1 = 100, max_h2 = 100 + 0, 0, spd_h1=1, spd_h2=1, num_frames=2, frame_start=0, max_h1=100, max_h2=100 ) pd_test.assert_frame_equal(combined_feat, tot_feat) diff --git a/tobac/utils/general.py b/tobac/utils/general.py index ce3cd269..041defda 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -149,7 +149,7 @@ def add_coordinates(t, variable_cube): def add_coordinates_3D( - t, variable_cube, vertical_coord="auto", assume_coords_fixed_in_time=True + t, variable_cube, vertical_coord="auto", assume_coords_fixed_in_time=True ): import numpy as np @@ -277,9 +277,9 @@ def add_coordinates_3D( # one of the dimensions is time and we assume the coordinates # don't vary in time. elif ( - var_coord.ndim == 3 - and ndim_time in variable_cube.coord_dims(coord) - and assume_coords_fixed_in_time + var_coord.ndim == 3 + and ndim_time in variable_cube.coord_dims(coord) + and assume_coords_fixed_in_time ): time_pos = variable_cube.coord_dims(coord).index(ndim_time) hdim1_pos = 0 if time_pos != 0 else 1 @@ -303,8 +303,8 @@ def add_coordinates_3D( [ [a, b, c] for a, b, c in zip( - t[first_dim[1]], t[second_dim[1]], t[third_dim[1]] - ) + t[first_dim[1]], t[second_dim[1]], t[third_dim[1]] + ) ], ) # coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] diff --git a/tobac/utils/internal.py b/tobac/utils/internal.py index 58a5c475..a67deec9 100644 --- a/tobac/utils/internal.py +++ b/tobac/utils/internal.py @@ -415,6 +415,7 @@ def wrapper(*args, **kwargs): return wrapper + def njit_if_available(func, **kwargs): """Decorator to wrap a function with numba.njit if available. If numba isn't available, it just returns the function. @@ -433,6 +434,7 @@ def njit_if_available(func, **kwargs): except ModuleNotFoundError: return func + def find_vertical_axis_from_coord(variable_cube, vertical_coord="auto"): """Function to find the vertical coordinate in the iris cube diff --git a/tobac/utils/periodic_boundaries.py b/tobac/utils/periodic_boundaries.py index 9e088681..6cb9e8ed 100644 --- a/tobac/utils/periodic_boundaries.py +++ b/tobac/utils/periodic_boundaries.py @@ -1,6 +1,7 @@ import numpy as np from .internal import njit_if_available + def adjust_pbc_point(in_dim, dim_min, dim_max): """Function to adjust a point to the other boundary for PBCs From 23ceda3b7c31aa63475cec7efdc9a843f249abe3 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 30 Nov 2022 11:51:13 -0600 Subject: [PATCH 82/82] code cleanup --- tobac/feature_detection.py | 2 +- tobac/tests/test_tracking.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 2006bc5c..1bef595c 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -231,7 +231,7 @@ def feature_position( # re-transform of any coords beyond the boundaries - (should be) general enough to work for any variety of PBC # as no x or y points will be beyond the boundaries if we haven't transformed them in the first place - if (PBC_flag == "hdim_1") or (PBC_flag == "hdim_2") or (PBC_flag == "both"): + if PBC_flag in pbc_options: if hdim1_index > y_max: hdim1_index = hdim1_index - y_max diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index 5480dfe1..3535ee05 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -1,8 +1,6 @@ """ Test for the trackpy tracking functions -Who's watching the watchmen, basically. """ -from pyexpat import features import pytest import tobac.testing import tobac.tracking