diff --git a/package/MDAnalysis/visualization/__init__.py b/package/MDAnalysis/visualization/__init__.py index 72e488329a..e113eb73d7 100644 --- a/package/MDAnalysis/visualization/__init__.py +++ b/package/MDAnalysis/visualization/__init__.py @@ -24,4 +24,4 @@ from . import streamlines from . import streamlines_3D -__all__ = ['streamlines', 'streamlines_3D'] +__all__ = ["streamlines", "streamlines_3D"] diff --git a/package/MDAnalysis/visualization/streamlines.py b/package/MDAnalysis/visualization/streamlines.py index 965074a43d..ea9a2bb634 100644 --- a/package/MDAnalysis/visualization/streamlines.py +++ b/package/MDAnalysis/visualization/streamlines.py @@ -52,16 +52,15 @@ import matplotlib.path except ImportError: raise ImportError( - '2d streamplot module requires: matplotlib.path for its ' - 'path.Path.contains_points method. The installation ' - 'instructions for the matplotlib module can be found here: ' - 'http://matplotlib.org/faq/installing_faq.html?highlight=install' - ) from None + "2d streamplot module requires: matplotlib.path for its " + "path.Path.contains_points method. The installation " + "instructions for the matplotlib module can be found here: " + "http://matplotlib.org/faq/installing_faq.html?highlight=install" + ) from None import MDAnalysis - def produce_grid(tuple_of_limits, grid_spacing): """Produce a 2D grid for the simulation system. @@ -120,12 +119,16 @@ def split_grid(grid, num_cores): # produce an array containing the cartesian coordinates of all vertices in the grid: x_array, y_array = grid grid_vertex_cartesian_array = np.dstack((x_array, y_array)) - #the grid_vertex_cartesian_array has N_rows, with each row corresponding to a column of coordinates in the grid ( + # the grid_vertex_cartesian_array has N_rows, with each row corresponding to a column of coordinates in the grid ( # so a given row has shape N_rows, 2); overall shape (N_columns_in_grid, N_rows_in_a_column, 2) - #although I'll eventually want a pure numpy/scipy/vector-based solution, for now I'll allow loops to simplify the + # although I'll eventually want a pure numpy/scipy/vector-based solution, for now I'll allow loops to simplify the # division of the cartesian coordinates into a list of the squares in the grid - list_all_squares_in_grid = [] # should eventually be a nested list of all the square vertices in the grid/system - list_parent_index_values = [] # want an ordered list of assignment indices for reconstructing the grid positions + list_all_squares_in_grid = ( + [] + ) # should eventually be a nested list of all the square vertices in the grid/system + list_parent_index_values = ( + [] + ) # want an ordered list of assignment indices for reconstructing the grid positions # in the parent process current_column = 0 while current_column < grid_vertex_cartesian_array.shape[0] - 1: @@ -134,100 +137,182 @@ def split_grid(grid, num_cores): current_row = 0 while current_row < grid_vertex_cartesian_array.shape[1] - 1: # all rows except the top row, which doesn't have a row above it for forming squares - bottom_left_vertex_current_square = grid_vertex_cartesian_array[current_column, current_row] - bottom_right_vertex_current_square = grid_vertex_cartesian_array[current_column + 1, current_row] - top_right_vertex_current_square = grid_vertex_cartesian_array[current_column + 1, current_row + 1] - top_left_vertex_current_square = grid_vertex_cartesian_array[current_column, current_row + 1] - #append the vertices of this square to the overall list of square vertices: + bottom_left_vertex_current_square = grid_vertex_cartesian_array[ + current_column, current_row + ] + bottom_right_vertex_current_square = grid_vertex_cartesian_array[ + current_column + 1, current_row + ] + top_right_vertex_current_square = grid_vertex_cartesian_array[ + current_column + 1, current_row + 1 + ] + top_left_vertex_current_square = grid_vertex_cartesian_array[ + current_column, current_row + 1 + ] + # append the vertices of this square to the overall list of square vertices: list_all_squares_in_grid.append( - [bottom_left_vertex_current_square, bottom_right_vertex_current_square, top_right_vertex_current_square, - top_left_vertex_current_square]) + [ + bottom_left_vertex_current_square, + bottom_right_vertex_current_square, + top_right_vertex_current_square, + top_left_vertex_current_square, + ] + ) list_parent_index_values.append([current_row, current_column]) current_row += 1 current_column += 1 - #split the list of square vertices [[v1,v2,v3,v4],[v1,v2,v3,v4],...,...] into roughly equally-sized sublists to + # split the list of square vertices [[v1,v2,v3,v4],[v1,v2,v3,v4],...,...] into roughly equally-sized sublists to # be distributed over the available cores on the system: - list_square_vertex_arrays_per_core = np.array_split(list_all_squares_in_grid, num_cores) - list_parent_index_values = np.array_split(list_parent_index_values, num_cores) - return [list_square_vertex_arrays_per_core, list_parent_index_values, current_row, current_column] - - -def per_core_work(topology_file_path, trajectory_file_path, list_square_vertex_arrays_this_core, MDA_selection, - start_frame, end_frame, reconstruction_index_list, maximum_delta_magnitude): + list_square_vertex_arrays_per_core = np.array_split( + list_all_squares_in_grid, num_cores + ) + list_parent_index_values = np.array_split( + list_parent_index_values, num_cores + ) + return [ + list_square_vertex_arrays_per_core, + list_parent_index_values, + current_row, + current_column, + ] + + +def per_core_work( + topology_file_path, + trajectory_file_path, + list_square_vertex_arrays_this_core, + MDA_selection, + start_frame, + end_frame, + reconstruction_index_list, + maximum_delta_magnitude, +): """Run the analysis on one core. The code to perform on a given core given the list of square vertices assigned to it. """ # obtain the relevant coordinates for particles of interest - universe_object = MDAnalysis.Universe(topology_file_path, trajectory_file_path) + universe_object = MDAnalysis.Universe( + topology_file_path, trajectory_file_path + ) list_previous_frame_centroids = [] list_previous_frame_indices = [] - #define some utility functions for trajectory iteration: + # define some utility functions for trajectory iteration: def produce_list_indices_point_in_polygon_this_frame(vertex_coord_list): list_indices_point_in_polygon = [] for square_vertices in vertex_coord_list: path_object = matplotlib.path.Path(square_vertices) - index_list_in_polygon = np.where(path_object.contains_points(relevant_particle_coordinate_array_xy)) + index_list_in_polygon = np.where( + path_object.contains_points( + relevant_particle_coordinate_array_xy + ) + ) list_indices_point_in_polygon.append(index_list_in_polygon) return list_indices_point_in_polygon def produce_list_centroids_this_frame(list_indices_in_polygon): list_centroids_this_frame = [] for indices in list_indices_in_polygon: - if not indices[0].size > 0: # if there are no particles of interest in this particular square + if ( + not indices[0].size > 0 + ): # if there are no particles of interest in this particular square list_centroids_this_frame.append(None) else: - current_coordinate_array_in_square = relevant_particle_coordinate_array_xy[indices] - current_square_indices_centroid = np.average(current_coordinate_array_in_square, axis=0) - list_centroids_this_frame.append(current_square_indices_centroid) + current_coordinate_array_in_square = ( + relevant_particle_coordinate_array_xy[indices] + ) + current_square_indices_centroid = np.average( + current_coordinate_array_in_square, axis=0 + ) + list_centroids_this_frame.append( + current_square_indices_centroid + ) return list_centroids_this_frame # a list of numpy xy centroid arrays for this frame for ts in universe_object.trajectory: if ts.frame < start_frame: # don't start until first specified frame continue - relevant_particle_coordinate_array_xy = universe_object.select_atoms(MDA_selection).positions[..., :-1] + relevant_particle_coordinate_array_xy = universe_object.select_atoms( + MDA_selection + ).positions[..., :-1] # only 2D / xy coords for now - #I will need a list of indices for relevant particles falling within each square in THIS frame: - list_indices_in_squares_this_frame = produce_list_indices_point_in_polygon_this_frame( - list_square_vertex_arrays_this_core) - #likewise, I will need a list of centroids of particles in each square (same order as above list): - list_centroids_in_squares_this_frame = produce_list_centroids_this_frame(list_indices_in_squares_this_frame) - if list_previous_frame_indices: # if the previous frame had indices in at least one square I will need to use + # I will need a list of indices for relevant particles falling within each square in THIS frame: + list_indices_in_squares_this_frame = ( + produce_list_indices_point_in_polygon_this_frame( + list_square_vertex_arrays_this_core + ) + ) + # likewise, I will need a list of centroids of particles in each square (same order as above list): + list_centroids_in_squares_this_frame = ( + produce_list_centroids_this_frame( + list_indices_in_squares_this_frame + ) + ) + if ( + list_previous_frame_indices + ): # if the previous frame had indices in at least one square I will need to use # those indices to generate the updates to the corresponding centroids in this frame: - list_centroids_this_frame_using_indices_from_last_frame = produce_list_centroids_this_frame( - list_previous_frame_indices) - #I need to write a velocity of zero if there are any 'empty' squares in either frame: + list_centroids_this_frame_using_indices_from_last_frame = ( + produce_list_centroids_this_frame(list_previous_frame_indices) + ) + # I need to write a velocity of zero if there are any 'empty' squares in either frame: xy_deltas_to_write = [] - for square_1_centroid, square_2_centroid in zip(list_centroids_this_frame_using_indices_from_last_frame, - list_previous_frame_centroids): + for square_1_centroid, square_2_centroid in zip( + list_centroids_this_frame_using_indices_from_last_frame, + list_previous_frame_centroids, + ): if square_1_centroid is None or square_2_centroid is None: xy_deltas_to_write.append([0, 0]) else: - xy_deltas_to_write.append(np.subtract(square_1_centroid, square_2_centroid).tolist()) + xy_deltas_to_write.append( + np.subtract( + square_1_centroid, square_2_centroid + ).tolist() + ) - #xy_deltas_to_write = np.subtract(np.array( + # xy_deltas_to_write = np.subtract(np.array( # list_centroids_this_frame_using_indices_from_last_frame),np.array(list_previous_frame_centroids)) xy_deltas_to_write = np.array(xy_deltas_to_write) - #now filter the array to only contain distances in the range [-8,8] as a placeholder for dealing with PBC + # now filter the array to only contain distances in the range [-8,8] as a placeholder for dealing with PBC # issues (Matthieu seemed to use a limit of 8 as well); - xy_deltas_to_write = np.clip(xy_deltas_to_write, -maximum_delta_magnitude, maximum_delta_magnitude) + xy_deltas_to_write = np.clip( + xy_deltas_to_write, + -maximum_delta_magnitude, + maximum_delta_magnitude, + ) - #with the xy and dx,dy values calculated I need to set the values from this frame to previous frame + # with the xy and dx,dy values calculated I need to set the values from this frame to previous frame # values in anticipation of the next frame: - list_previous_frame_centroids = list_centroids_in_squares_this_frame[:] + list_previous_frame_centroids = ( + list_centroids_in_squares_this_frame[:] + ) list_previous_frame_indices = list_indices_in_squares_this_frame[:] else: # either no points in squares or after the first frame I'll just reset the 'previous' values so they # can be used when consecutive frames have proper values - list_previous_frame_centroids = list_centroids_in_squares_this_frame[:] + list_previous_frame_centroids = ( + list_centroids_in_squares_this_frame[:] + ) list_previous_frame_indices = list_indices_in_squares_this_frame[:] if ts.frame > end_frame: break # stop here return list(zip(reconstruction_index_list, xy_deltas_to_write.tolist())) -def generate_streamlines(topology_file_path, trajectory_file_path, grid_spacing, MDA_selection, start_frame, - end_frame, xmin, xmax, ymin, ymax, maximum_delta_magnitude, num_cores='maximum'): +def generate_streamlines( + topology_file_path, + trajectory_file_path, + grid_spacing, + MDA_selection, + start_frame, + end_frame, + xmin, + xmax, + ymin, + ymax, + maximum_delta_magnitude, + num_cores="maximum", +): r"""Produce the x and y components of a 2D streamplot data set. Parameters @@ -311,35 +396,58 @@ def generate_streamlines(topology_file_path, trajectory_file_path, grid_spacing, """ # work out the number of cores to use: - if num_cores == 'maximum': + if num_cores == "maximum": num_cores = multiprocessing.cpu_count() # use all available cores else: num_cores = num_cores # use the value specified by the user - #assert isinstance(num_cores,(int,long)), "The number of specified cores must (of course) be an integer." - np.seterr(all='warn', over='raise') + # assert isinstance(num_cores,(int,long)), "The number of specified cores must (of course) be an integer." + np.seterr(all="warn", over="raise") parent_list_deltas = [] # collect all data from child processes here def log_result_to_parent(delta_array): parent_list_deltas.extend(delta_array) tuple_of_limits = (xmin, xmax, ymin, ymax) - grid = produce_grid(tuple_of_limits=tuple_of_limits, grid_spacing=grid_spacing) - list_square_vertex_arrays_per_core, list_parent_index_values, total_rows, total_columns = \ - split_grid(grid=grid, - num_cores=num_cores) + grid = produce_grid( + tuple_of_limits=tuple_of_limits, grid_spacing=grid_spacing + ) + ( + list_square_vertex_arrays_per_core, + list_parent_index_values, + total_rows, + total_columns, + ) = split_grid(grid=grid, num_cores=num_cores) pool = multiprocessing.Pool(num_cores) - for vertex_sublist, index_sublist in zip(list_square_vertex_arrays_per_core, list_parent_index_values): - pool.apply_async(per_core_work, args=( - topology_file_path, trajectory_file_path, vertex_sublist, MDA_selection, start_frame, end_frame, - index_sublist, maximum_delta_magnitude), callback=log_result_to_parent) + for vertex_sublist, index_sublist in zip( + list_square_vertex_arrays_per_core, list_parent_index_values + ): + pool.apply_async( + per_core_work, + args=( + topology_file_path, + trajectory_file_path, + vertex_sublist, + MDA_selection, + start_frame, + end_frame, + index_sublist, + maximum_delta_magnitude, + ), + callback=log_result_to_parent, + ) pool.close() pool.join() dx_array = np.zeros((total_rows, total_columns)) dy_array = np.zeros((total_rows, total_columns)) - #the parent_list_deltas is shaped like this: [ ([row_index,column_index],[dx,dy]), ... (...),...,] - for index_array, delta_array in parent_list_deltas: # go through the list in the parent process and assign to the + # the parent_list_deltas is shaped like this: [ ([row_index,column_index],[dx,dy]), ... (...),...,] + for ( + index_array, + delta_array, + ) in ( + parent_list_deltas + ): # go through the list in the parent process and assign to the # appropriate positions in the dx and dy matrices: - #build in a filter to replace all values at the cap (currently between -8,8) with 0 to match Matthieu's code + # build in a filter to replace all values at the cap (currently between -8,8) with 0 to match Matthieu's code # (I think eventually we'll reduce the cap to a narrower boundary though) index_1 = index_array.tolist()[0] index_2 = index_array.tolist()[1] @@ -352,9 +460,14 @@ def log_result_to_parent(delta_array): else: dy_array[index_1, index_2] = delta_array[1] - #at Matthieu's request, we now want to calculate the average and standard deviation of the displacement values: - displacement_array = np.sqrt(dx_array ** 2 + dy_array ** 2) + # at Matthieu's request, we now want to calculate the average and standard deviation of the displacement values: + displacement_array = np.sqrt(dx_array**2 + dy_array**2) average_displacement = np.average(displacement_array) standard_deviation_of_displacement = np.std(displacement_array) - return (dx_array, dy_array, average_displacement, standard_deviation_of_displacement) + return ( + dx_array, + dy_array, + average_displacement, + standard_deviation_of_displacement, + ) diff --git a/package/MDAnalysis/visualization/streamlines_3D.py b/package/MDAnalysis/visualization/streamlines_3D.py index 1f85851c16..5c9a03c4e1 100644 --- a/package/MDAnalysis/visualization/streamlines_3D.py +++ b/package/MDAnalysis/visualization/streamlines_3D.py @@ -56,8 +56,9 @@ import MDAnalysis -def determine_container_limits(topology_file_path, trajectory_file_path, - buffer_value): +def determine_container_limits( + topology_file_path, trajectory_file_path, buffer_value +): """Calculate the extent of the atom coordinates + buffer. A function for the parent process which should take the input trajectory @@ -73,19 +74,29 @@ def determine_container_limits(topology_file_path, trajectory_file_path, buffer_value : float buffer value (padding) in +/- {x, y, z} """ - universe_object = MDAnalysis.Universe(topology_file_path, trajectory_file_path) - all_atom_selection = universe_object.select_atoms('all') # select all particles + universe_object = MDAnalysis.Universe( + topology_file_path, trajectory_file_path + ) + all_atom_selection = universe_object.select_atoms( + "all" + ) # select all particles all_atom_coordinate_array = all_atom_selection.positions x_min, x_max, y_min, y_max, z_min, z_max = [ all_atom_coordinate_array[..., 0].min(), - all_atom_coordinate_array[..., 0].max(), all_atom_coordinate_array[..., 1].min(), - all_atom_coordinate_array[..., 1].max(), all_atom_coordinate_array[..., 2].min(), - all_atom_coordinate_array[..., 2].max()] - tuple_of_limits = \ - ( - x_min - buffer_value, - x_max + buffer_value, y_min - buffer_value, y_max + buffer_value, z_min - buffer_value, - z_max + buffer_value) # using buffer_value to catch particles near edges + all_atom_coordinate_array[..., 0].max(), + all_atom_coordinate_array[..., 1].min(), + all_atom_coordinate_array[..., 1].max(), + all_atom_coordinate_array[..., 2].min(), + all_atom_coordinate_array[..., 2].max(), + ] + tuple_of_limits = ( + x_min - buffer_value, + x_max + buffer_value, + y_min - buffer_value, + y_max + buffer_value, + z_min - buffer_value, + z_max + buffer_value, + ) # using buffer_value to catch particles near edges return tuple_of_limits @@ -109,7 +120,11 @@ def produce_grid(tuple_of_limits, grid_spacing): """ x_min, x_max, y_min, y_max, z_min, z_max = tuple_of_limits - grid = np.mgrid[x_min:x_max:grid_spacing, y_min:y_max:grid_spacing, z_min:z_max:grid_spacing] + grid = np.mgrid[ + x_min:x_max:grid_spacing, + y_min:y_max:grid_spacing, + z_min:z_max:grid_spacing, + ] return grid @@ -139,78 +154,124 @@ def split_grid(grid, num_cores): num_z_values = z.shape[-1] num_sheets = z.shape[0] delta_array_shape = tuple( - [n - 1 for n in x.shape]) # the final target shape for return delta arrays is n-1 in each dimension + [n - 1 for n in x.shape] + ) # the final target shape for return delta arrays is n-1 in each dimension ordered_list_per_sheet_x_values = [] - for x_sheet in x: # each x_sheet should have shape (25,23) and the same x value in each element + for ( + x_sheet + ) in ( + x + ): # each x_sheet should have shape (25,23) and the same x value in each element array_all_x_values_current_sheet = x_sheet.flatten() - ordered_list_per_sheet_x_values.append(array_all_x_values_current_sheet) + ordered_list_per_sheet_x_values.append( + array_all_x_values_current_sheet + ) ordered_list_per_sheet_y_values = [] for y_columns in y: array_all_y_values_current_sheet = y_columns.flatten() - ordered_list_per_sheet_y_values.append(array_all_y_values_current_sheet) + ordered_list_per_sheet_y_values.append( + array_all_y_values_current_sheet + ) ordered_list_per_sheet_z_values = [] for z_slices in z: array_all_z_values_current_sheet = z_slices.flatten() - ordered_list_per_sheet_z_values.append(array_all_z_values_current_sheet) + ordered_list_per_sheet_z_values.append( + array_all_z_values_current_sheet + ) ordered_list_cartesian_coordinates_per_sheet = [] - for x_sheet_coords, y_sheet_coords, z_sheet_coords in zip(ordered_list_per_sheet_x_values, - ordered_list_per_sheet_y_values, - ordered_list_per_sheet_z_values): - ordered_list_cartesian_coordinates_per_sheet.append(list(zip(x_sheet_coords, y_sheet_coords, z_sheet_coords))) - array_ordered_cartesian_coords_per_sheet = np.array(ordered_list_cartesian_coordinates_per_sheet) - #now I'm going to want to build cubes in an ordered fashion, and in such a way that I can track the index / + for x_sheet_coords, y_sheet_coords, z_sheet_coords in zip( + ordered_list_per_sheet_x_values, + ordered_list_per_sheet_y_values, + ordered_list_per_sheet_z_values, + ): + ordered_list_cartesian_coordinates_per_sheet.append( + list(zip(x_sheet_coords, y_sheet_coords, z_sheet_coords)) + ) + array_ordered_cartesian_coords_per_sheet = np.array( + ordered_list_cartesian_coordinates_per_sheet + ) + # now I'm going to want to build cubes in an ordered fashion, and in such a way that I can track the index / # centroid of each cube for domain decomposition / reconstruction and mayavi mlab.flow() input - #cubes will be formed from N - 1 base sheets combined with subsequent sheets + # cubes will be formed from N - 1 base sheets combined with subsequent sheets current_base_sheet = 0 dictionary_cubes_centroids_indices = {} cube_counter = 0 while current_base_sheet < num_sheets - 1: - current_base_sheet_array = array_ordered_cartesian_coords_per_sheet[current_base_sheet] + current_base_sheet_array = array_ordered_cartesian_coords_per_sheet[ + current_base_sheet + ] current_top_sheet_array = array_ordered_cartesian_coords_per_sheet[ - current_base_sheet + 1] # the points of the sheet 'to the right' in the grid + current_base_sheet + 1 + ] # the points of the sheet 'to the right' in the grid current_index = 0 while current_index < current_base_sheet_array.shape[0] - num_z_values: # iterate through all the indices in each of the sheet arrays (careful to avoid extra # points not needed for cubes) - column_z_level = 0 # start at the bottom of a given 4-point column and work up + column_z_level = ( + 0 # start at the bottom of a given 4-point column and work up + ) while column_z_level < num_z_values - 1: current_list_cube_vertices = [] - first_two_vertices_base_sheet = current_base_sheet_array[current_index:current_index + 2, ...].tolist() - first_two_vertices_top_sheet = current_top_sheet_array[current_index:current_index + 2, ...].tolist() - next_two_vertices_base_sheet = current_base_sheet_array[current_index + - num_z_values: 2 + - num_z_values + current_index, ...].tolist() - next_two_vertices_top_sheet = current_top_sheet_array[current_index + - num_z_values: 2 + - num_z_values + current_index, ...].tolist() + first_two_vertices_base_sheet = current_base_sheet_array[ + current_index : current_index + 2, ... + ].tolist() + first_two_vertices_top_sheet = current_top_sheet_array[ + current_index : current_index + 2, ... + ].tolist() + next_two_vertices_base_sheet = current_base_sheet_array[ + current_index + + num_z_values : 2 + + num_z_values + + current_index, + ..., + ].tolist() + next_two_vertices_top_sheet = current_top_sheet_array[ + current_index + + num_z_values : 2 + + num_z_values + + current_index, + ..., + ].tolist() for vertex_set in [ - first_two_vertices_base_sheet, first_two_vertices_top_sheet, - next_two_vertices_base_sheet, next_two_vertices_top_sheet + first_two_vertices_base_sheet, + first_two_vertices_top_sheet, + next_two_vertices_base_sheet, + next_two_vertices_top_sheet, ]: current_list_cube_vertices.extend(vertex_set) vertex_array = np.array(current_list_cube_vertices) - assert vertex_array.shape == (8, 3), "vertex_array has incorrect shape" - cube_centroid = np.average(np.array(current_list_cube_vertices), axis=0) + assert vertex_array.shape == ( + 8, + 3, + ), "vertex_array has incorrect shape" + cube_centroid = np.average( + np.array(current_list_cube_vertices), axis=0 + ) dictionary_cubes_centroids_indices[cube_counter] = { - 'centroid': cube_centroid, - 'vertex_list': current_list_cube_vertices} + "centroid": cube_centroid, + "vertex_list": current_list_cube_vertices, + } cube_counter += 1 current_index += 1 column_z_level += 1 - if column_z_level == num_z_values - 1: # the loop will break but I should also increment the + if ( + column_z_level == num_z_values - 1 + ): # the loop will break but I should also increment the # current_index current_index += 1 current_base_sheet += 1 total_cubes = len(dictionary_cubes_centroids_indices) - #produce an array of pseudo cube indices (actually the dictionary keys which are cube numbers in string format): + # produce an array of pseudo cube indices (actually the dictionary keys which are cube numbers in string format): pseudo_cube_indices = np.arange(0, total_cubes) - sublist_of_cube_indices_per_core = np.array_split(pseudo_cube_indices, num_cores) - #now, the split of pseudoindices seems to work well, and the above sublist_of_cube_indices_per_core is a list of + sublist_of_cube_indices_per_core = np.array_split( + pseudo_cube_indices, num_cores + ) + # now, the split of pseudoindices seems to work well, and the above sublist_of_cube_indices_per_core is a list of # arrays of cube numbers / keys in the original dictionary - #now I think I'll try to produce a list of dictionaries that each contain their assigned cubes based on the above + # now I think I'll try to produce a list of dictionaries that each contain their assigned cubes based on the above # per core split list_dictionaries_for_cores = [] subdictionary_counter = 0 @@ -224,11 +285,22 @@ def split_grid(grid, num_cores): items_popped += 1 list_dictionaries_for_cores.append(current_core_dictionary) subdictionary_counter += 1 - return list_dictionaries_for_cores, total_cubes, num_sheets, delta_array_shape - - -def per_core_work(start_frame_coord_array, end_frame_coord_array, dictionary_cube_data_this_core, MDA_selection, - start_frame, end_frame): + return ( + list_dictionaries_for_cores, + total_cubes, + num_sheets, + delta_array_shape, + ) + + +def per_core_work( + start_frame_coord_array, + end_frame_coord_array, + dictionary_cube_data_this_core, + MDA_selection, + start_frame, + end_frame, +): """Run the analysis on one core. The code to perform on a given core given the dictionary of cube data. @@ -237,82 +309,137 @@ def per_core_work(start_frame_coord_array, end_frame_coord_array, dictionary_cub list_previous_frame_indices = [] # define some utility functions for trajectory iteration: - def point_in_cube(array_point_coordinates, list_cube_vertices, cube_centroid): + def point_in_cube( + array_point_coordinates, list_cube_vertices, cube_centroid + ): """Determine if an array of coordinates are within a cube.""" - #the simulation particle point can't be more than half the cube side length away from the cube centroid in + # the simulation particle point can't be more than half the cube side length away from the cube centroid in # any given dimension: array_cube_vertices = np.array(list_cube_vertices) - cube_half_side_length = scipy.spatial.distance.pdist(array_cube_vertices, 'euclidean').min() / 2.0 - array_cube_vertex_distances_from_centroid = scipy.spatial.distance.cdist(array_cube_vertices, - cube_centroid[np.newaxis, :]) - np.testing.assert_allclose(array_cube_vertex_distances_from_centroid.min(), - array_cube_vertex_distances_from_centroid.max(), rtol=0, atol=1.5e-4, - err_msg="not all cube vertex to centroid distances are the same, " - "so not a true cube") - absolute_delta_coords = np.absolute(np.subtract(array_point_coordinates, cube_centroid)) + cube_half_side_length = ( + scipy.spatial.distance.pdist( + array_cube_vertices, "euclidean" + ).min() + / 2.0 + ) + array_cube_vertex_distances_from_centroid = ( + scipy.spatial.distance.cdist( + array_cube_vertices, cube_centroid[np.newaxis, :] + ) + ) + np.testing.assert_allclose( + array_cube_vertex_distances_from_centroid.min(), + array_cube_vertex_distances_from_centroid.max(), + rtol=0, + atol=1.5e-4, + err_msg="not all cube vertex to centroid distances are the same, " + "so not a true cube", + ) + absolute_delta_coords = np.absolute( + np.subtract(array_point_coordinates, cube_centroid) + ) absolute_delta_x_coords = absolute_delta_coords[..., 0] - indices_delta_x_acceptable = np.where(absolute_delta_x_coords <= cube_half_side_length) + indices_delta_x_acceptable = np.where( + absolute_delta_x_coords <= cube_half_side_length + ) absolute_delta_y_coords = absolute_delta_coords[..., 1] - indices_delta_y_acceptable = np.where(absolute_delta_y_coords <= cube_half_side_length) + indices_delta_y_acceptable = np.where( + absolute_delta_y_coords <= cube_half_side_length + ) absolute_delta_z_coords = absolute_delta_coords[..., 2] - indices_delta_z_acceptable = np.where(absolute_delta_z_coords <= cube_half_side_length) - intersection_xy_acceptable_arrays = np.intersect1d(indices_delta_x_acceptable[0], - indices_delta_y_acceptable[0]) - overall_indices_points_in_current_cube = np.intersect1d(intersection_xy_acceptable_arrays, - indices_delta_z_acceptable[0]) + indices_delta_z_acceptable = np.where( + absolute_delta_z_coords <= cube_half_side_length + ) + intersection_xy_acceptable_arrays = np.intersect1d( + indices_delta_x_acceptable[0], indices_delta_y_acceptable[0] + ) + overall_indices_points_in_current_cube = np.intersect1d( + intersection_xy_acceptable_arrays, indices_delta_z_acceptable[0] + ) return overall_indices_points_in_current_cube - def update_dictionary_point_in_cube_start_frame(array_simulation_particle_coordinates, - dictionary_cube_data_this_core): + def update_dictionary_point_in_cube_start_frame( + array_simulation_particle_coordinates, dictionary_cube_data_this_core + ): """Basically update the cube dictionary objects assigned to this core to contain a new key/value pair corresponding to the indices of the relevant particles that fall within a given cube. Also, for a given cube, - store a key/value pair for the centroid of the particles that fall within the cube.""" + store a key/value pair for the centroid of the particles that fall within the cube. + """ cube_counter = 0 for key, cube in dictionary_cube_data_this_core.items(): - index_list_in_cube = point_in_cube(array_simulation_particle_coordinates, cube['vertex_list'], - cube['centroid']) - cube['start_frame_index_list_in_cube'] = index_list_in_cube - if len(index_list_in_cube) > 0: # if there's at least one particle in this cube - centroid_particles_in_cube = np.average(array_simulation_particle_coordinates[index_list_in_cube], - axis=0) - cube['centroid_of_particles_first_frame'] = centroid_particles_in_cube + index_list_in_cube = point_in_cube( + array_simulation_particle_coordinates, + cube["vertex_list"], + cube["centroid"], + ) + cube["start_frame_index_list_in_cube"] = index_list_in_cube + if ( + len(index_list_in_cube) > 0 + ): # if there's at least one particle in this cube + centroid_particles_in_cube = np.average( + array_simulation_particle_coordinates[index_list_in_cube], + axis=0, + ) + cube["centroid_of_particles_first_frame"] = ( + centroid_particles_in_cube + ) else: # empty cube - cube['centroid_of_particles_first_frame'] = None + cube["centroid_of_particles_first_frame"] = None cube_counter += 1 - def update_dictionary_end_frame(array_simulation_particle_coordinates, dictionary_cube_data_this_core): + def update_dictionary_end_frame( + array_simulation_particle_coordinates, dictionary_cube_data_this_core + ): """Update the cube dictionary objects again as appropriate for the second and final frame.""" cube_counter = 0 for key, cube in dictionary_cube_data_this_core.items(): # if there were no particles in the cube in the first frame, then set dx,dy,dz each to 0 - if cube['centroid_of_particles_first_frame'] is None: - cube['dx'] = 0 - cube['dy'] = 0 - cube['dz'] = 0 + if cube["centroid_of_particles_first_frame"] is None: + cube["dx"] = 0 + cube["dy"] = 0 + cube["dz"] = 0 else: # there was at least one particle in the starting cube so we can get dx,dy,dz centroid values - new_coordinate_array_for_particles_starting_in_this_cube = array_simulation_particle_coordinates[ - cube['start_frame_index_list_in_cube']] + new_coordinate_array_for_particles_starting_in_this_cube = ( + array_simulation_particle_coordinates[ + cube["start_frame_index_list_in_cube"] + ] + ) new_centroid_for_particles_starting_in_this_cube = np.average( - new_coordinate_array_for_particles_starting_in_this_cube, axis=0) - cube['centroid_of_paticles_final_frame'] = new_centroid_for_particles_starting_in_this_cube - delta_centroid_array_this_cube = new_centroid_for_particles_starting_in_this_cube - cube[ - 'centroid_of_particles_first_frame'] - cube['dx'] = delta_centroid_array_this_cube[0] - cube['dy'] = delta_centroid_array_this_cube[1] - cube['dz'] = delta_centroid_array_this_cube[2] + new_coordinate_array_for_particles_starting_in_this_cube, + axis=0, + ) + cube["centroid_of_paticles_final_frame"] = ( + new_centroid_for_particles_starting_in_this_cube + ) + delta_centroid_array_this_cube = ( + new_centroid_for_particles_starting_in_this_cube + - cube["centroid_of_particles_first_frame"] + ) + cube["dx"] = delta_centroid_array_this_cube[0] + cube["dy"] = delta_centroid_array_this_cube[1] + cube["dz"] = delta_centroid_array_this_cube[2] cube_counter += 1 - #now that the parent process is dealing with the universe object & grabbing required coordinates, each child + # now that the parent process is dealing with the universe object & grabbing required coordinates, each child # process only needs to take the coordinate arrays & perform the operations with its assigned cubes (no more file # opening and trajectory iteration on each core--which I'm hoping will substantially reduce the physical memory # footprint of my 3D streamplot code) - update_dictionary_point_in_cube_start_frame(start_frame_coord_array, dictionary_cube_data_this_core) - update_dictionary_end_frame(end_frame_coord_array, dictionary_cube_data_this_core) + update_dictionary_point_in_cube_start_frame( + start_frame_coord_array, dictionary_cube_data_this_core + ) + update_dictionary_end_frame( + end_frame_coord_array, dictionary_cube_data_this_core + ) return dictionary_cube_data_this_core -def produce_coordinate_arrays_single_process(topology_file_path, trajectory_file_path, MDA_selection, start_frame, - end_frame): +def produce_coordinate_arrays_single_process( + topology_file_path, + trajectory_file_path, + MDA_selection, + start_frame, + end_frame, +): """Generate coordinate arrays. To reduce memory footprint produce only a single MDA selection and get @@ -321,24 +448,46 @@ def produce_coordinate_arrays_single_process(topology_file_path, trajectory_file waste memory. """ - universe_object = MDAnalysis.Universe(topology_file_path, trajectory_file_path) + universe_object = MDAnalysis.Universe( + topology_file_path, trajectory_file_path + ) relevant_particles = universe_object.select_atoms(MDA_selection) # pull out coordinate arrays from desired frames: for ts in universe_object.trajectory: if ts.frame > end_frame: break # stop here if ts.frame == start_frame: - start_frame_relevant_particle_coordinate_array_xyz = relevant_particles.positions + start_frame_relevant_particle_coordinate_array_xyz = ( + relevant_particles.positions + ) elif ts.frame == end_frame: - end_frame_relevant_particle_coordinate_array_xyz = relevant_particles.positions + end_frame_relevant_particle_coordinate_array_xyz = ( + relevant_particles.positions + ) else: continue - return (start_frame_relevant_particle_coordinate_array_xyz, end_frame_relevant_particle_coordinate_array_xyz) - - -def generate_streamlines_3d(topology_file_path, trajectory_file_path, grid_spacing, MDA_selection, start_frame, - end_frame, xmin, xmax, ymin, ymax, zmin, zmax, maximum_delta_magnitude=2.0, - num_cores='maximum'): + return ( + start_frame_relevant_particle_coordinate_array_xyz, + end_frame_relevant_particle_coordinate_array_xyz, + ) + + +def generate_streamlines_3d( + topology_file_path, + trajectory_file_path, + grid_spacing, + MDA_selection, + start_frame, + end_frame, + xmin, + xmax, + ymin, + ymax, + zmin, + zmax, + maximum_delta_magnitude=2.0, + num_cores="maximum", +): r"""Produce the x, y and z components of a 3D streamplot data set. Parameters @@ -439,68 +588,91 @@ def generate_streamlines_3d(topology_file_path, trajectory_file_path, grid_spaci .. _mayavi: http://docs.enthought.com/mayavi/mayavi/ """ # work out the number of cores to use: - if num_cores == 'maximum': + if num_cores == "maximum": num_cores = multiprocessing.cpu_count() # use all available cores else: num_cores = num_cores # use the value specified by the user # assert isinstance(num_cores,(int,long)), "The number of specified cores must (of course) be an integer." - np.seterr(all='warn', over='raise') + np.seterr(all="warn", over="raise") parent_cube_dictionary = {} # collect all data from child processes here def log_result_to_parent(process_dict): parent_cube_dictionary.update(process_dict) - #step 1: produce tuple of cartesian coordinate limits for the first frame - #tuple_of_limits = determine_container_limits(topology_file_path = topology_file_path,trajectory_file_path = + # step 1: produce tuple of cartesian coordinate limits for the first frame + # tuple_of_limits = determine_container_limits(topology_file_path = topology_file_path,trajectory_file_path = # trajectory_file_path,buffer_value=buffer_value) tuple_of_limits = (xmin, xmax, ymin, ymax, zmin, zmax) - #step 2: produce a suitable grid (will assume that grid size / container size does not vary during simulation--or + # step 2: produce a suitable grid (will assume that grid size / container size does not vary during simulation--or # at least not beyond the buffer limit, such that this grid can be used for all subsequent frames) - grid = produce_grid(tuple_of_limits=tuple_of_limits, grid_spacing=grid_spacing) - #step 3: split the grid into a dictionary of cube information that can be sent to each core for processing: - list_dictionaries_for_cores, total_cubes, num_sheets, delta_array_shape = split_grid(grid=grid, num_cores=num_cores) - #step 3b: produce required coordinate arrays on a single core to avoid making a universe object on each core: - start_frame_coord_array, end_frame_coord_array = produce_coordinate_arrays_single_process(topology_file_path, - trajectory_file_path, - MDA_selection, - start_frame, end_frame) - #step 4: per process work using the above grid data split + grid = produce_grid( + tuple_of_limits=tuple_of_limits, grid_spacing=grid_spacing + ) + # step 3: split the grid into a dictionary of cube information that can be sent to each core for processing: + list_dictionaries_for_cores, total_cubes, num_sheets, delta_array_shape = ( + split_grid(grid=grid, num_cores=num_cores) + ) + # step 3b: produce required coordinate arrays on a single core to avoid making a universe object on each core: + start_frame_coord_array, end_frame_coord_array = ( + produce_coordinate_arrays_single_process( + topology_file_path, + trajectory_file_path, + MDA_selection, + start_frame, + end_frame, + ) + ) + # step 4: per process work using the above grid data split pool = multiprocessing.Pool(num_cores) for sub_dictionary_of_cube_data in list_dictionaries_for_cores: - pool.apply_async(per_core_work, args=( - start_frame_coord_array, end_frame_coord_array, sub_dictionary_of_cube_data, MDA_selection, start_frame, - end_frame), callback=log_result_to_parent) + pool.apply_async( + per_core_work, + args=( + start_frame_coord_array, + end_frame_coord_array, + sub_dictionary_of_cube_data, + MDA_selection, + start_frame, + end_frame, + ), + callback=log_result_to_parent, + ) pool.close() pool.join() - #so, at this stage the parent process now has a single dictionary with all the cube objects updated from all + # so, at this stage the parent process now has a single dictionary with all the cube objects updated from all # available cores - #the 3D streamplot (i.e, mayavi flow() function) will require separate 3D np arrays for dx,dy,dz - #the shape of each 3D array will unfortunately have to match the mgrid data structure (bit of a pain): ( + # the 3D streamplot (i.e, mayavi flow() function) will require separate 3D np arrays for dx,dy,dz + # the shape of each 3D array will unfortunately have to match the mgrid data structure (bit of a pain): ( # num_sheets - 1, num_sheets - 1, cubes_per_column) cubes_per_sheet = int(float(total_cubes) / float(num_sheets - 1)) - #produce dummy zero arrays for dx,dy,dz of the appropriate shape: + # produce dummy zero arrays for dx,dy,dz of the appropriate shape: dx_array = np.zeros(delta_array_shape) dy_array = np.zeros(delta_array_shape) dz_array = np.zeros(delta_array_shape) - #now use the parent cube dictionary to correctly substitute in dx,dy,dz values + # now use the parent cube dictionary to correctly substitute in dx,dy,dz values current_sheet = 0 # which is also the current row y_index_current_sheet = 0 # sub row z_index_current_column = 0 # column total_cubes_current_sheet = 0 for cube_number in range(0, total_cubes): - dx_array[current_sheet, y_index_current_sheet, z_index_current_column] = parent_cube_dictionary[cube_number][ - 'dx'] - dy_array[current_sheet, y_index_current_sheet, z_index_current_column] = parent_cube_dictionary[cube_number][ - 'dy'] - dz_array[current_sheet, y_index_current_sheet, z_index_current_column] = parent_cube_dictionary[cube_number][ - 'dz'] + dx_array[ + current_sheet, y_index_current_sheet, z_index_current_column + ] = parent_cube_dictionary[cube_number]["dx"] + dy_array[ + current_sheet, y_index_current_sheet, z_index_current_column + ] = parent_cube_dictionary[cube_number]["dy"] + dz_array[ + current_sheet, y_index_current_sheet, z_index_current_column + ] = parent_cube_dictionary[cube_number]["dz"] z_index_current_column += 1 total_cubes_current_sheet += 1 if z_index_current_column == delta_array_shape[2]: # done building current y-column so iterate y value and reset z z_index_current_column = 0 y_index_current_sheet += 1 - if y_index_current_sheet == delta_array_shape[1]: # current sheet is complete + if ( + y_index_current_sheet == delta_array_shape[1] + ): # current sheet is complete current_sheet += 1 y_index_current_sheet = 0 # restart for new sheet z_index_current_column = 0 diff --git a/package/pyproject.toml b/package/pyproject.toml index cd7ec3a780..3516903fe2 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -130,6 +130,9 @@ include = ''' ( tables\.py | due\.py +| setup\.py +| visualization/.*\.py ) ''' +extend-exclude = '__pycache__' required-version = '24' diff --git a/package/setup.py b/package/setup.py index 0fc4bef4e2..412087cdaf 100755 --- a/package/setup.py +++ b/package/setup.py @@ -60,7 +60,7 @@ # NOTE: keep in sync with MDAnalysis.__version__ in version.py RELEASE = "2.8.0-dev0" -is_release = 'dev' not in RELEASE +is_release = "dev" not in RELEASE # Handle cython modules try: @@ -68,18 +68,22 @@ # minimum cython version now set to 0.28 to match pyproject.toml import Cython from Cython.Build import cythonize + cython_found = True required_version = "0.28" if not Version(Cython.__version__) >= Version(required_version): # We don't necessarily die here. Maybe we already have # the cythonized '.c' files. - print("Cython version {0} was found but won't be used: version {1} " - "or greater is required because it offers a handy " - "parallelization module".format( - Cython.__version__, required_version)) + print( + "Cython version {0} was found but won't be used: version {1} " + "or greater is required because it offers a handy " + "parallelization module".format( + Cython.__version__, required_version + ) + ) cython_found = False - cython_linetrace = bool(os.environ.get('CYTHON_TRACE_NOGIL', False)) + cython_linetrace = bool(os.environ.get("CYTHON_TRACE_NOGIL", False)) except ImportError: cython_found = False if not is_release: @@ -88,9 +92,10 @@ sys.exit(1) cython_linetrace = False + def abspath(file): - return os.path.join(os.path.dirname(os.path.abspath(__file__)), - file) + return os.path.join(os.path.dirname(os.path.abspath(__file__)), file) + class Config(object): """Config wrapper class to get build options @@ -109,31 +114,31 @@ class Config(object): """ - def __init__(self, fname='setup.cfg'): + def __init__(self, fname="setup.cfg"): fname = abspath(fname) if os.path.exists(fname): self.config = configparser.ConfigParser() self.config.read(fname) def get(self, option_name, default=None): - environ_name = 'MDA_' + option_name.upper() + environ_name = "MDA_" + option_name.upper() if environ_name in os.environ: val = os.environ[environ_name] - if val.upper() in ('1', 'TRUE'): + if val.upper() in ("1", "TRUE"): return True - elif val.upper() in ('0', 'FALSE'): + elif val.upper() in ("0", "FALSE"): return False return val try: - option = self.config.get('options', option_name) + option = self.config.get("options", option_name) return option except configparser.NoOptionError: return default class MDAExtension(Extension, object): - """Derived class to cleanly handle setup-time (numpy) dependencies. - """ + """Derived class to cleanly handle setup-time (numpy) dependencies.""" + # The only setup-time numpy dependency comes when setting up its # include dir. # The actual numpy import and call can be delayed until after pip @@ -151,7 +156,7 @@ def include_dirs(self): if not self._mda_include_dirs: for item in self._mda_include_dir_args: try: - self._mda_include_dirs.append(item()) #The numpy callable + self._mda_include_dirs.append(item()) # The numpy callable except TypeError: item = abspath(item) self._mda_include_dirs.append((item)) @@ -174,9 +179,13 @@ def get_numpy_include(): import numpy as np except ImportError: print('*** package "numpy" not found ***') - print('MDAnalysis requires a version of NumPy (>=1.21.0), even for setup.') - print('Please get it from http://numpy.scipy.org/ or install it through ' - 'your package manager.') + print( + "MDAnalysis requires a version of NumPy (>=1.21.0), even for setup." + ) + print( + "Please get it from http://numpy.scipy.org/ or install it through " + "your package manager." + ) sys.exit(-1) return np.get_include() @@ -184,26 +193,27 @@ def get_numpy_include(): def hasfunction(cc, funcname, include=None, extra_postargs=None): # From http://stackoverflow.com/questions/ # 7018879/disabling-output-when-compiling-with-distutils - tmpdir = tempfile.mkdtemp(prefix='hasfunction-') + tmpdir = tempfile.mkdtemp(prefix="hasfunction-") devnull = oldstderr = None try: try: - fname = os.path.join(tmpdir, 'funcname.c') - with open(fname, 'w') as f: + fname = os.path.join(tmpdir, "funcname.c") + with open(fname, "w") as f: if include is not None: - f.write('#include {0!s}\n'.format(include)) - f.write('int main(void) {\n') - f.write(' {0!s};\n'.format(funcname)) - f.write('}\n') + f.write("#include {0!s}\n".format(include)) + f.write("int main(void) {\n") + f.write(" {0!s};\n".format(funcname)) + f.write("}\n") # Redirect stderr to /dev/null to hide any error messages # from the compiler. # This will have to be changed if we ever have to check # for a function on Windows. - devnull = open('/dev/null', 'w') + devnull = open("/dev/null", "w") oldstderr = os.dup(sys.stderr.fileno()) os.dup2(devnull.fileno(), sys.stderr.fileno()) - objects = cc.compile([fname], output_dir=tmpdir, - extra_postargs=extra_postargs) + objects = cc.compile( + [fname], output_dir=tmpdir, extra_postargs=extra_postargs + ) cc.link_executable(objects, os.path.join(tmpdir, "a.out")) except Exception: return False @@ -221,11 +231,15 @@ def detect_openmp(): print("Attempting to autodetect OpenMP support... ", end="") compiler = new_compiler() customize_compiler(compiler) - compiler.add_library('gomp') - include = '' - extra_postargs = ['-fopenmp'] - hasopenmp = hasfunction(compiler, 'omp_get_num_threads()', include=include, - extra_postargs=extra_postargs) + compiler.add_library("gomp") + include = "" + extra_postargs = ["-fopenmp"] + hasopenmp = hasfunction( + compiler, + "omp_get_num_threads()", + include=include, + extra_postargs=extra_postargs, + ) if hasopenmp: print("Compiler supports OpenMP") else: @@ -238,12 +252,12 @@ def using_clang(): compiler = new_compiler() customize_compiler(compiler) compiler_ver = getoutput("{0} -v".format(compiler.compiler[0])) - if 'Spack GCC' in compiler_ver: + if "Spack GCC" in compiler_ver: # when gcc toolchain is built from source with spack # using clang, the 'clang' string may be present in # the compiler metadata, but it is not clang is_clang = False - elif 'clang' in compiler_ver: + elif "clang" in compiler_ver: # by default, Apple will typically alias gcc to # clang, with some mention of 'clang' in the # metadata @@ -255,196 +269,252 @@ def using_clang(): def extensions(config): # usually (except coming from release tarball) cython files must be generated - use_cython = config.get('use_cython', default=cython_found) - use_openmp = config.get('use_openmp', default=True) - annotate_cython = config.get('annotate_cython', default=False) - - extra_compile_args = ['-std=c11', '-O3', '-funroll-loops', - '-fsigned-zeros'] # see #2722 + use_cython = config.get("use_cython", default=cython_found) + use_openmp = config.get("use_openmp", default=True) + annotate_cython = config.get("annotate_cython", default=False) + + extra_compile_args = [ + "-std=c11", + "-O3", + "-funroll-loops", + "-fsigned-zeros", + ] # see #2722 define_macros = [] - if config.get('debug_cflags', default=False): - extra_compile_args.extend(['-Wall', '-pedantic']) - define_macros.extend([('DEBUG', '1')]) + if config.get("debug_cflags", default=False): + extra_compile_args.extend(["-Wall", "-pedantic"]) + define_macros.extend([("DEBUG", "1")]) # encore is sensitive to floating point accuracy, especially on non-x86 # to avoid reducing optimisations on everything, we make a set of compile # args specific to encore see #2997 for an example of this. - encore_compile_args = [a for a in extra_compile_args if 'O3' not in a] - if platform.machine() == 'aarch64' or platform.machine() == 'ppc64le': - encore_compile_args.append('-O1') + encore_compile_args = [a for a in extra_compile_args if "O3" not in a] + if platform.machine() == "aarch64" or platform.machine() == "ppc64le": + encore_compile_args.append("-O1") else: - encore_compile_args.append('-O3') + encore_compile_args.append("-O3") # allow using custom c/c++ flags and architecture specific instructions. # This allows people to build optimized versions of MDAnalysis. # Do here so not included in encore - extra_cflags = config.get('extra_cflags', default=False) + extra_cflags = config.get("extra_cflags", default=False) if extra_cflags: flags = extra_cflags.split() extra_compile_args.extend(flags) - cpp_extra_compile_args = [a for a in extra_compile_args if 'std' not in a] - cpp_extra_compile_args.append('-std=c++11') - cpp_extra_link_args=[] + cpp_extra_compile_args = [a for a in extra_compile_args if "std" not in a] + cpp_extra_compile_args.append("-std=c++11") + cpp_extra_link_args = [] # needed to specify c++ runtime library on OSX - if platform.system() == 'Darwin' and using_clang(): - cpp_extra_compile_args.append('-stdlib=libc++') - cpp_extra_compile_args.append('-mmacosx-version-min=10.9') - cpp_extra_link_args.append('-stdlib=libc++') - cpp_extra_link_args.append('-mmacosx-version-min=10.9') + if platform.system() == "Darwin" and using_clang(): + cpp_extra_compile_args.append("-stdlib=libc++") + cpp_extra_compile_args.append("-mmacosx-version-min=10.9") + cpp_extra_link_args.append("-stdlib=libc++") + cpp_extra_link_args.append("-mmacosx-version-min=10.9") # Needed for large-file seeking under 32bit systems (for xtc/trr indexing # and access). largefile_macros = [ - ('_LARGEFILE_SOURCE', None), - ('_LARGEFILE64_SOURCE', None), - ('_FILE_OFFSET_BITS', '64') + ("_LARGEFILE_SOURCE", None), + ("_LARGEFILE64_SOURCE", None), + ("_FILE_OFFSET_BITS", "64"), ] has_openmp = detect_openmp() if use_openmp and not has_openmp: - print('No openmp compatible compiler found default to serial build.') + print("No openmp compatible compiler found default to serial build.") - parallel_args = ['-fopenmp'] if has_openmp and use_openmp else [] - parallel_libraries = ['gomp'] if has_openmp and use_openmp else [] - parallel_macros = [('PARALLEL', None)] if has_openmp and use_openmp else [] + parallel_args = ["-fopenmp"] if has_openmp and use_openmp else [] + parallel_libraries = ["gomp"] if has_openmp and use_openmp else [] + parallel_macros = [("PARALLEL", None)] if has_openmp and use_openmp else [] if use_cython: - print('Will attempt to use Cython.') + print("Will attempt to use Cython.") if not cython_found: - print("Couldn't find a Cython installation. " - "Not recompiling cython extensions.") + print( + "Couldn't find a Cython installation. " + "Not recompiling cython extensions." + ) use_cython = False else: - print('Will not attempt to use Cython.') + print("Will not attempt to use Cython.") - source_suffix = '.pyx' if use_cython else '.c' - cpp_source_suffix = '.pyx' if use_cython else '.cpp' + source_suffix = ".pyx" if use_cython else ".c" + cpp_source_suffix = ".pyx" if use_cython else ".cpp" # The callable is passed so that it is only evaluated at install time. include_dirs = [get_numpy_include] # Windows automatically handles math library linking # and will not build MDAnalysis if we try to specify one - if os.name == 'nt': + if os.name == "nt": mathlib = [] else: - mathlib = ['m'] + mathlib = ["m"] if cython_linetrace: extra_compile_args.append("-DCYTHON_TRACE_NOGIL") cpp_extra_compile_args.append("-DCYTHON_TRACE_NOGIL") - libdcd = MDAExtension('MDAnalysis.lib.formats.libdcd', - ['MDAnalysis/lib/formats/libdcd' + source_suffix], - include_dirs=include_dirs + ['MDAnalysis/lib/formats/include'], - define_macros=define_macros, - extra_compile_args=extra_compile_args) - distances = MDAExtension('MDAnalysis.lib.c_distances', - ['MDAnalysis/lib/c_distances' + source_suffix], - include_dirs=include_dirs + ['MDAnalysis/lib/include'], - libraries=mathlib, - define_macros=define_macros, - extra_compile_args=extra_compile_args) - distances_omp = MDAExtension('MDAnalysis.lib.c_distances_openmp', - ['MDAnalysis/lib/c_distances_openmp' + source_suffix], - include_dirs=include_dirs + ['MDAnalysis/lib/include'], - libraries=mathlib + parallel_libraries, - define_macros=define_macros + parallel_macros, - extra_compile_args=parallel_args + extra_compile_args, - extra_link_args=parallel_args) - qcprot = MDAExtension('MDAnalysis.lib.qcprot', - ['MDAnalysis/lib/qcprot' + source_suffix], - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args) - transformation = MDAExtension('MDAnalysis.lib._transformations', - ['MDAnalysis/lib/src/transformations/transformations.c'], - libraries=mathlib, - define_macros=define_macros, - include_dirs=include_dirs, - extra_compile_args=extra_compile_args) - libmdaxdr = MDAExtension('MDAnalysis.lib.formats.libmdaxdr', - sources=['MDAnalysis/lib/formats/libmdaxdr' + source_suffix, - 'MDAnalysis/lib/formats/src/xdrfile.c', - 'MDAnalysis/lib/formats/src/xdrfile_xtc.c', - 'MDAnalysis/lib/formats/src/xdrfile_trr.c', - 'MDAnalysis/lib/formats/src/trr_seek.c', - 'MDAnalysis/lib/formats/src/xtc_seek.c', - ], - include_dirs=include_dirs + ['MDAnalysis/lib/formats/include', - 'MDAnalysis/lib/formats'], - define_macros=largefile_macros + define_macros, - extra_compile_args=extra_compile_args) - util = MDAExtension('MDAnalysis.lib.formats.cython_util', - sources=['MDAnalysis/lib/formats/cython_util' + source_suffix], - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args) - cutil = MDAExtension('MDAnalysis.lib._cutil', - sources=['MDAnalysis/lib/_cutil' + cpp_source_suffix], - language='c++', - libraries=mathlib, - include_dirs=include_dirs + ['MDAnalysis/lib/include'], - define_macros=define_macros, - extra_compile_args=cpp_extra_compile_args, - extra_link_args= cpp_extra_link_args) - augment = MDAExtension('MDAnalysis.lib._augment', - sources=['MDAnalysis/lib/_augment' + cpp_source_suffix], - language='c++', - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=cpp_extra_compile_args, - extra_link_args= cpp_extra_link_args) - timestep = MDAExtension('MDAnalysis.coordinates.timestep', - sources=['MDAnalysis/coordinates/timestep' + cpp_source_suffix], - language='c++', - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=cpp_extra_compile_args, - extra_link_args= cpp_extra_link_args) - - - encore_utils = MDAExtension('MDAnalysis.analysis.encore.cutils', - sources=['MDAnalysis/analysis/encore/cutils' + source_suffix], - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=encore_compile_args) - ap_clustering = MDAExtension('MDAnalysis.analysis.encore.clustering.affinityprop', - sources=['MDAnalysis/analysis/encore/clustering/affinityprop' + source_suffix, - 'MDAnalysis/analysis/encore/clustering/src/ap.c'], - include_dirs=include_dirs+['MDAnalysis/analysis/encore/clustering/include'], - libraries=mathlib, - define_macros=define_macros, - extra_compile_args=encore_compile_args) - spe_dimred = MDAExtension('MDAnalysis.analysis.encore.dimensionality_reduction.stochasticproxembed', - sources=['MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed' + source_suffix, - 'MDAnalysis/analysis/encore/dimensionality_reduction/src/spe.c'], - include_dirs=include_dirs+['MDAnalysis/analysis/encore/dimensionality_reduction/include'], - libraries=mathlib, - define_macros=define_macros, - extra_compile_args=encore_compile_args) - nsgrid = MDAExtension('MDAnalysis.lib.nsgrid', - ['MDAnalysis/lib/nsgrid' + cpp_source_suffix], - include_dirs=include_dirs + ['MDAnalysis/lib/include'], - language='c++', - define_macros=define_macros, - extra_compile_args=cpp_extra_compile_args, - extra_link_args= cpp_extra_link_args) - pre_exts = [libdcd, distances, distances_omp, qcprot, - transformation, libmdaxdr, util, encore_utils, - ap_clustering, spe_dimred, cutil, augment, nsgrid, timestep] + libdcd = MDAExtension( + "MDAnalysis.lib.formats.libdcd", + ["MDAnalysis/lib/formats/libdcd" + source_suffix], + include_dirs=include_dirs + ["MDAnalysis/lib/formats/include"], + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + distances = MDAExtension( + "MDAnalysis.lib.c_distances", + ["MDAnalysis/lib/c_distances" + source_suffix], + include_dirs=include_dirs + ["MDAnalysis/lib/include"], + libraries=mathlib, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + distances_omp = MDAExtension( + "MDAnalysis.lib.c_distances_openmp", + ["MDAnalysis/lib/c_distances_openmp" + source_suffix], + include_dirs=include_dirs + ["MDAnalysis/lib/include"], + libraries=mathlib + parallel_libraries, + define_macros=define_macros + parallel_macros, + extra_compile_args=parallel_args + extra_compile_args, + extra_link_args=parallel_args, + ) + qcprot = MDAExtension( + "MDAnalysis.lib.qcprot", + ["MDAnalysis/lib/qcprot" + source_suffix], + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + transformation = MDAExtension( + "MDAnalysis.lib._transformations", + ["MDAnalysis/lib/src/transformations/transformations.c"], + libraries=mathlib, + define_macros=define_macros, + include_dirs=include_dirs, + extra_compile_args=extra_compile_args, + ) + libmdaxdr = MDAExtension( + "MDAnalysis.lib.formats.libmdaxdr", + sources=[ + "MDAnalysis/lib/formats/libmdaxdr" + source_suffix, + "MDAnalysis/lib/formats/src/xdrfile.c", + "MDAnalysis/lib/formats/src/xdrfile_xtc.c", + "MDAnalysis/lib/formats/src/xdrfile_trr.c", + "MDAnalysis/lib/formats/src/trr_seek.c", + "MDAnalysis/lib/formats/src/xtc_seek.c", + ], + include_dirs=include_dirs + + ["MDAnalysis/lib/formats/include", "MDAnalysis/lib/formats"], + define_macros=largefile_macros + define_macros, + extra_compile_args=extra_compile_args, + ) + util = MDAExtension( + "MDAnalysis.lib.formats.cython_util", + sources=["MDAnalysis/lib/formats/cython_util" + source_suffix], + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + cutil = MDAExtension( + "MDAnalysis.lib._cutil", + sources=["MDAnalysis/lib/_cutil" + cpp_source_suffix], + language="c++", + libraries=mathlib, + include_dirs=include_dirs + ["MDAnalysis/lib/include"], + define_macros=define_macros, + extra_compile_args=cpp_extra_compile_args, + extra_link_args=cpp_extra_link_args, + ) + augment = MDAExtension( + "MDAnalysis.lib._augment", + sources=["MDAnalysis/lib/_augment" + cpp_source_suffix], + language="c++", + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=cpp_extra_compile_args, + extra_link_args=cpp_extra_link_args, + ) + timestep = MDAExtension( + "MDAnalysis.coordinates.timestep", + sources=["MDAnalysis/coordinates/timestep" + cpp_source_suffix], + language="c++", + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=cpp_extra_compile_args, + extra_link_args=cpp_extra_link_args, + ) + encore_utils = MDAExtension( + "MDAnalysis.analysis.encore.cutils", + sources=["MDAnalysis/analysis/encore/cutils" + source_suffix], + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=encore_compile_args, + ) + ap_clustering = MDAExtension( + "MDAnalysis.analysis.encore.clustering.affinityprop", + sources=[ + "MDAnalysis/analysis/encore/clustering/affinityprop" + + source_suffix, + "MDAnalysis/analysis/encore/clustering/src/ap.c", + ], + include_dirs=include_dirs + + ["MDAnalysis/analysis/encore/clustering/include"], + libraries=mathlib, + define_macros=define_macros, + extra_compile_args=encore_compile_args, + ) + spe_dimred = MDAExtension( + "MDAnalysis.analysis.encore.dimensionality_reduction.stochasticproxembed", + sources=[ + "MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed" + + source_suffix, + "MDAnalysis/analysis/encore/dimensionality_reduction/src/spe.c", + ], + include_dirs=include_dirs + + ["MDAnalysis/analysis/encore/dimensionality_reduction/include"], + libraries=mathlib, + define_macros=define_macros, + extra_compile_args=encore_compile_args, + ) + nsgrid = MDAExtension( + "MDAnalysis.lib.nsgrid", + ["MDAnalysis/lib/nsgrid" + cpp_source_suffix], + include_dirs=include_dirs + ["MDAnalysis/lib/include"], + language="c++", + define_macros=define_macros, + extra_compile_args=cpp_extra_compile_args, + extra_link_args=cpp_extra_link_args, + ) + pre_exts = [ + libdcd, + distances, + distances_omp, + qcprot, + transformation, + libmdaxdr, + util, + encore_utils, + ap_clustering, + spe_dimred, + cutil, + augment, + nsgrid, + timestep, + ] cython_generated = [] if use_cython: extensions = cythonize( pre_exts, annotate=annotate_cython, - compiler_directives={'linetrace': cython_linetrace, - 'embedsignature': False, - 'language_level': '3'}, + compiler_directives={ + "linetrace": cython_linetrace, + "embedsignature": False, + "language_level": "3", + }, ) if cython_linetrace: print("Cython coverage will be enabled") @@ -453,15 +523,16 @@ def extensions(config): if source not in pre_ext.sources: cython_generated.append(source) else: - #Let's check early for missing .c files + # Let's check early for missing .c files extensions = pre_exts for ext in extensions: for source in ext.sources: - if not (os.path.isfile(source) and - os.access(source, os.R_OK)): - raise IOError("Source file '{}' not found. This might be " - "caused by a missing Cython install, or a " - "failed/disabled Cython build.".format(source)) + if not (os.path.isfile(source) and os.access(source, os.R_OK)): + raise IOError( + "Source file '{}' not found. This might be " + "caused by a missing Cython install, or a " + "failed/disabled Cython build.".format(source) + ) return extensions, cython_generated @@ -477,7 +548,7 @@ def dynamic_author_list(): "Chronological list of authors" title. """ authors = [] - with codecs.open(abspath('AUTHORS'), encoding='utf-8') as infile: + with codecs.open(abspath("AUTHORS"), encoding="utf-8") as infile: # An author is a bullet point under the title "Chronological list of # authors". We first want move the cursor down to the title of # interest. @@ -486,21 +557,23 @@ def dynamic_author_list(): break else: # If we did not break, it means we did not find the authors. - raise IOError('EOF before the list of authors') + raise IOError("EOF before the list of authors") # Skip the next line as it is the title underlining line = next(infile) line_no += 1 - if line[:4] != '----': - raise IOError('Unexpected content on line {0}, ' - 'should be a string of "-".'.format(line_no)) + if line[:4] != "----": + raise IOError( + "Unexpected content on line {0}, " + 'should be a string of "-".'.format(line_no) + ) # Add each bullet point as an author until the next title underlining for line in infile: - if line[:4] in ('----', '====', '~~~~'): + if line[:4] in ("----", "====", "~~~~"): # The previous line was a title, hopefully it did not start as # a bullet point so it got ignored. Since we hit a title, we # are done reading the list of authors. break - elif line.strip()[:2] == '- ': + elif line.strip()[:2] == "- ": # This is a bullet point, so it should be an author name. name = line.strip()[2:].strip() authors.append(name) @@ -509,28 +582,32 @@ def dynamic_author_list(): # sorted alphabetically of the last name. authors.sort(key=lambda name: name.split()[-1]) # Move Naveen and Elizabeth first, and Oliver last. - authors.remove('Naveen Michaud-Agrawal') - authors.remove('Elizabeth J. Denning') - authors.remove('Oliver Beckstein') - authors = (['Naveen Michaud-Agrawal', 'Elizabeth J. Denning'] - + authors + ['Oliver Beckstein']) + authors.remove("Naveen Michaud-Agrawal") + authors.remove("Elizabeth J. Denning") + authors.remove("Oliver Beckstein") + authors = ( + ["Naveen Michaud-Agrawal", "Elizabeth J. Denning"] + + authors + + ["Oliver Beckstein"] + ) # Write the authors.py file. - out_path = abspath('MDAnalysis/authors.py') - with codecs.open(out_path, 'w', encoding='utf-8') as outfile: + out_path = abspath("MDAnalysis/authors.py") + with codecs.open(out_path, "w", encoding="utf-8") as outfile: # Write the header - header = '''\ + header = """\ #-*- coding:utf-8 -*- # This file is generated from the AUTHORS file during the installation process. # Do not edit it as your changes will be overwritten. -''' +""" print(header, file=outfile) # Write the list of authors as a python list - template = u'__authors__ = [\n{}\n]' - author_string = u',\n'.join(u' u"{}"'.format(name) - for name in authors) + template = "__authors__ = [\n{}\n]" + author_string = ",\n".join( + ' u"{}"'.format(name) for name in authors + ) print(template.format(author_string), file=outfile) @@ -540,17 +617,18 @@ def long_description(readme): with open(abspath(readme)) as summary: buffer = summary.read() # remove top heading that messes up pypi display - m = re.search('====*\n[^\n]*README[^\n]*\n=====*\n', buffer, - flags=re.DOTALL) + m = re.search( + "====*\n[^\n]*README[^\n]*\n=====*\n", buffer, flags=re.DOTALL + ) assert m, "README.rst does not contain a level-1 heading" - return buffer[m.end():] + return buffer[m.end() :] -if __name__ == '__main__': +if __name__ == "__main__": try: dynamic_author_list() except (OSError, IOError): - warnings.warn('Cannot write the list of authors.') + warnings.warn("Cannot write the list of authors.") try: # when building from repository for creating the distribution @@ -563,24 +641,30 @@ def long_description(readme): config = Config() exts, cythonfiles = extensions(config) - setup(name='MDAnalysis', - version=RELEASE, - long_description=LONG_DESCRIPTION, - long_description_content_type='text/x-rst', - # currently unused & may become obsolte see setuptools #1569 - provides=['MDAnalysis'], - ext_modules=exts, - test_suite="MDAnalysisTests", - tests_require=[ - 'MDAnalysisTests=={0!s}'.format(RELEASE), # same as this release! - ], + setup( + name="MDAnalysis", + version=RELEASE, + long_description=LONG_DESCRIPTION, + long_description_content_type="text/x-rst", + # currently unused & may become obsolte see setuptools #1569 + provides=["MDAnalysis"], + ext_modules=exts, + test_suite="MDAnalysisTests", + tests_require=[ + "MDAnalysisTests=={0!s}".format(RELEASE), # same as this release! + ], ) # Releases keep their cythonized stuff for shipping. - if not config.get('keep_cythonized', default=is_release) and not cython_linetrace: + if ( + not config.get("keep_cythonized", default=is_release) + and not cython_linetrace + ): for cythonized in cythonfiles: try: os.unlink(cythonized) except OSError as err: - print("Warning: failed to delete cythonized file {0}: {1}. " - "Moving on.".format(cythonized, err.strerror)) + print( + "Warning: failed to delete cythonized file {0}: {1}. " + "Moving on.".format(cythonized, err.strerror) + )