diff --git a/examples/LennardJones/LJ.json b/examples/LennardJones/LJ.json new file mode 100644 index 000000000..a79c5f41d --- /dev/null +++ b/examples/LennardJones/LJ.json @@ -0,0 +1,72 @@ +{ + "Verbosity": { + "level": 2 + }, + "Dataset": { + "name": "LJdataset", + "format": "XYZ", + "node_features": { + "name": ["atom_type"], + "dim": [1], + "column_index": [0] + }, + "graph_features":{ + "name": ["total_energy"], + "dim": [1], + "column_index": [0] + } + }, + "NeuralNetwork": { + "Architecture": { + "periodic_boundary_conditions": true, + "model_type": "DimeNet", + "radius": 5.0, + "max_neighbours": 5, + "int_emb_size": 32, + "out_emb_size": 16, + "basis_emb_size": 8, + "num_before_skip": 1, + "num_after_skip": 1, + "envelope_exponent": 5, + "num_radial": 5, + "num_spherical": 2, + "hidden_dim": 20, + "num_conv_layers": 4, + "output_heads": { + "node": { + "num_headlayers": 2, + "dim_headlayers": [60,20], + "type": "mlp" + } + }, + "task_weights": [1] + }, + "Variables_of_interest": { + "input_node_features": [0], + "output_index": [ + 0 + ], + "type": [ + "node" + ], + "output_dim": [1], + "output_names": ["graph_energy"] + }, + "Training": { + "num_epoch": 15, + "batch_size": 64, + "patience": 20, + "early_stopping": true, + "Optimizer": { + "type": "Adam", + "learning_rate": 0.005 + }, + "conv_checkpointing": false + } + }, + "Visualization": { + "plot_init_solution": true, + "plot_hist_solution": true, + "create_plots": true + } +} diff --git a/examples/LennardJones/LJ_data.py b/examples/LennardJones/LJ_data.py new file mode 100644 index 000000000..6226ff6f8 --- /dev/null +++ b/examples/LennardJones/LJ_data.py @@ -0,0 +1,504 @@ +############################################################################## +# Copyright (c) 2024, Oak Ridge National Laboratory # +# All rights reserved. # +# # +# This file is part of HydraGNN and is distributed under a BSD 3-clause # +# license. For the licensing terms see the LICENSE file in the top-level # +# directory. # +# # +# SPDX-License-Identifier: BSD-3-Clause # +############################################################################## + +# General +import os +import logging +import numpy + +numpy.set_printoptions(threshold=numpy.inf) +numpy.set_printoptions(linewidth=numpy.inf) + +# Torch +import torch +from torch_geometric.data import Data + +# torch.set_default_tensor_type(torch.DoubleTensor) +# torch.set_default_dtype(torch.float64) + +# Distributed +import mpi4py +from mpi4py import MPI + +mpi4py.rc.thread_level = "serialized" +mpi4py.rc.threads = False + +# HydraGNN +from hydragnn.utils.abstractrawdataset import AbstractBaseDataset +from hydragnn.utils import nsplit +from hydragnn.preprocess.utils import get_radius_graph_pbc + +# Angstrom unit +primitive_bravais_lattice_constant_x = 3.8 +primitive_bravais_lattice_constant_y = 3.8 +primitive_bravais_lattice_constant_z = 3.8 + + +################################################################################################################## + + +"""High-Level Function""" + + +def create_dataset(path, config): + radius_cutoff = config["NeuralNetwork"]["Architecture"]["radius"] + number_configurations = ( + config["Dataset"]["number_configurations"] + if "number_configurations" in config["Dataset"] + else 300 + ) + atom_types = [1] + formula = LJpotential(1.0, 3.4) + atomic_structure_handler = AtomicStructureHandler( + atom_types, + [ + primitive_bravais_lattice_constant_x, + primitive_bravais_lattice_constant_y, + primitive_bravais_lattice_constant_z, + ], + radius_cutoff, + formula, + ) + deterministic_graph_data( + path, + atom_types, + atomic_structure_handler=atomic_structure_handler, + radius_cutoff=radius_cutoff, + relative_maximum_atomic_displacement=1e-1, + number_configurations=number_configurations, + ) + + +"""Reading/Transforming Data""" + + +class LJDataset(AbstractBaseDataset): + """LJDataset dataset class""" + + def __init__(self, dirpath, config, dist=False, sampling=None): + super().__init__() + + self.dist = dist + self.world_size = 1 + self.rank = 1 + if self.dist: + assert torch.distributed.is_initialized() + self.world_size = torch.distributed.get_world_size() + self.rank = torch.distributed.get_rank() + + self.radius = config["NeuralNetwork"]["Architecture"]["radius"] + self.max_neighbours = config["NeuralNetwork"]["Architecture"]["max_neighbours"] + + dirfiles = sorted(os.listdir(dirpath)) + + rx = list(nsplit((dirfiles), self.world_size))[self.rank] + + for file in rx: + filepath = os.path.join(dirpath, file) + self.dataset.append(self.transform_inumpyut_to_data_object_base(filepath)) + + def transform_inumpyut_to_data_object_base(self, filepath): + + # Using readline() + file = open(filepath, "r") + + torch_data = torch.empty((0, 8), dtype=torch.float32) + torch_supercell = torch.zeros((0, 3), dtype=torch.float32) + + count = 0 + + while True: + count += 1 + + # Get next line from file + line = file.readline() + + # if line is empty + # end of file is reached + if not line: + break + + if count == 1: + total_energy = float(line) + elif count == 2: + energy_per_atom = float(line) + elif 2 < count < 6: + array_line = numpy.fromstring(line, dtype=float, sep="\t") + torch_supercell = torch.cat( + [torch_supercell, torch.from_numpy(array_line).unsqueeze(0)], axis=0 + ) + elif count > 5: + array_line = numpy.fromstring(line, dtype=float, sep="\t") + torch_data = torch.cat( + [torch_data, torch.from_numpy(array_line).unsqueeze(0)], axis=0 + ) + # print("Line{}: {}".format(count, line.strip())) + + file.close() + + num_nodes = torch_data.shape[0] + + energy_pre_translation_factor = 0.0 + energy_pre_scaling_factor = 1.0 / num_nodes + energy_per_atom_pretransformed = ( + energy_per_atom - energy_pre_translation_factor + ) * energy_pre_scaling_factor + grad_energy_post_scaling_factor = ( + 1.0 / energy_pre_scaling_factor * torch.ones(num_nodes, 1) + ) + forces = torch_data[:, [5, 6, 7]] + forces_pre_scaling_factor = 1.0 + forces_pre_scaled = forces * forces_pre_scaling_factor + + data = Data( + supercell_size=torch_supercell.to(torch.float32), + num_nodes=num_nodes, + grad_energy_post_scaling_factor=grad_energy_post_scaling_factor, + forces_pre_scaling_factor=torch.tensor(forces_pre_scaling_factor).to( + torch.float32 + ), + forces=forces, + forces_pre_scaled=forces_pre_scaled, + pos=torch_data[:, [1, 2, 3]].to(torch.float32), + x=torch.cat([torch_data[:, [0, 4]]], axis=1).to(torch.float32), + y=torch.tensor(total_energy).unsqueeze(0).to(torch.float32), + energy_per_atom=torch.tensor(energy_per_atom_pretransformed) + .unsqueeze(0) + .to(torch.float32), + energy=torch.tensor(total_energy).unsqueeze(0).to(torch.float32), + ) + + # Create pbc edges and lengths + edge_creation = get_radius_graph_pbc(self.radius, self.max_neighbours) + data = edge_creation(data) + + return data + + def len(self): + return len(self.dataset) + + def get(self, idx): + return self.dataset[idx] + + +"""Create Data""" + + +def deterministic_graph_data( + path: str, + atom_types: list, + atomic_structure_handler, + radius_cutoff=float("inf"), + max_num_neighbors=float("inf"), + number_configurations: int = 500, + configuration_start: int = 0, + unit_cell_x_range: list = [3, 4], + unit_cell_y_range: list = [3, 4], + unit_cell_z_range: list = [3, 4], + relative_maximum_atomic_displacement: float = 1e-1, +): + + comm = MPI.COMM_WORLD + comm_size = comm.Get_size() + comm_rank = comm.Get_rank() + torch.manual_seed(comm_rank) + + if 0 == comm_rank: + os.makedirs(path, exist_ok=False) + comm.Barrier() + + # We assume that the unit cell is Simple Center Cubic (SCC) + unit_cell_x = torch.randint( + unit_cell_x_range[0], + unit_cell_x_range[1], + (number_configurations,), + ) + unit_cell_y = torch.randint( + unit_cell_y_range[0], + unit_cell_y_range[1], + (number_configurations,), + ) + unit_cell_z = torch.randint( + unit_cell_z_range[0], + unit_cell_z_range[1], + (number_configurations,), + ) + + configurations_list = range(number_configurations) + rx = list(nsplit(configurations_list, comm_size))[comm_rank] + + for configuration in configurations_list[rx.start : rx.stop]: + uc_x = unit_cell_x[configuration] + uc_y = unit_cell_y[configuration] + uc_z = unit_cell_z[configuration] + create_configuration( + path, + atomic_structure_handler, + configuration, + configuration_start, + uc_x, + uc_y, + uc_z, + atom_types, + radius_cutoff, + max_num_neighbors, + relative_maximum_atomic_displacement, + ) + + +def create_configuration( + path, + atomic_structure_handler, + configuration, + configuration_start, + uc_x, + uc_y, + uc_z, + types, + radius_cutoff, + max_num_neighbors, + relative_maximum_atomic_displacement, +): + ############################################################################################### + ################################### STRUCTURE OF THE DATA ################################## + ############################################################################################### + + # GLOBAL_OUTPUT1 + # GLOBAL_OUTPUT2 + # NODE1_FEATURE NODE1_INDEX NODE1_COORDINATE_X NODE1_COORDINATE_Y NODE1_COORDINATE_Z NODAL_OUTPUT1 NODAL_OUTPUT2 NODAL_OUTPUT3 + # NODE2_FEATURE NODE2_INDEX NODE2_COORDINATE_X NODE2_COORDINATE_Y NODE2_COORDINATE_Z NODAL_OUTPUT1 NODAL_OUTPUT2 NODAL_OUTPUT3 + # ... + # NODENn_FEATURE NODEn_INDEX NODEn_COORDINATE_X NODEn_COORDINATE_Y NODEn_COORDINATE_Z NODAL_OUTPUT1 NODAL_OUTPUT2 NODAL_OUTPUT3 + + ############################################################################################### + ################################# FORMULAS FOR NODAL FEATURE ############################### + ############################################################################################### + + # NODAL_FEATURE = ATOM SPECIES + + ############################################################################################### + ########################## FORMULAS FOR GLOBAL AND NODAL OUTPUTS ########################### + ############################################################################################### + + # GLOBAL_OUTPUT = TOTAL ENERGY + # GLOBAL_OUTPUT = TOTAL ENERGY / NUMBER OF NODES + # NODAL_OUTPUT1(X) = FORCE ACTING ON ATOM IN X DIRECTION + # NODAL_OUTPUT2(X) = FORCE ACTING ON ATOM IN Y DIRECTION + # NODAL_OUTPUT3(X) = FORCE ACTING ON ATOM IN Z DIRECTION + + ############################################################################################### + count_pos = 0 + number_nodes = uc_x * uc_y * uc_z + positions = torch.zeros(number_nodes, 3) + for x in range(uc_x): + for y in range(uc_y): + for z in range(uc_z): + positions[count_pos][0] = ( + x + + relative_maximum_atomic_displacement + * ((torch.rand(1, 1).item()) - 0.5) + ) * primitive_bravais_lattice_constant_x + positions[count_pos][1] = ( + y + + relative_maximum_atomic_displacement + * ((torch.rand(1, 1).item()) - 0.5) + ) * primitive_bravais_lattice_constant_y + positions[count_pos][2] = ( + z + + relative_maximum_atomic_displacement + * ((torch.rand(1, 1).item()) - 0.5) + ) * primitive_bravais_lattice_constant_z + + count_pos = count_pos + 1 + + atom_types = torch.randint(min(types), max(types) + 1, (number_nodes, 1)) + + data = Data() + + data.pos = positions + supercell_size_x = primitive_bravais_lattice_constant_x * uc_x + supercell_size_y = primitive_bravais_lattice_constant_y * uc_y + supercell_size_z = primitive_bravais_lattice_constant_z * uc_z + data.supercell_size = torch.diag( + torch.tensor([supercell_size_x, supercell_size_y, supercell_size_z]) + ) + + create_graph_connectivity_pbc = get_radius_graph_pbc( + radius_cutoff, max_num_neighbors + ) + data = create_graph_connectivity_pbc(data) + + atomic_descriptors = torch.cat( + ( + atom_types, + positions, + ), + 1, + ) + + data.x = atomic_descriptors + + data = atomic_structure_handler.compute(data) + + total_energy = torch.sum(data.x[:, 4]) + energy_per_atom = total_energy / number_nodes + + total_energy_str = numpy.array2string(total_energy.detach().numpy()) + energy_per_atom_str = numpy.array2string(energy_per_atom.detach().numpy()) + filetxt = total_energy_str + "\n" + energy_per_atom_str + + for index in range(0, 3): + numpy_row = data.supercell_size[index, :].detach().numpy() + numpy_string_row = numpy.array2string(numpy_row, precision=64, separator="\t") + filetxt += "\n" + numpy_string_row.lstrip("[").rstrip("]") + + for index in range(0, number_nodes): + numpy_row = data.x[index, :].detach().numpy() + numpy_string_row = numpy.array2string(numpy_row, precision=64, separator="\t") + filetxt += "\n" + numpy_string_row.lstrip("[").rstrip("]") + + filename = os.path.join( + path, "output" + str(configuration + configuration_start) + ".txt" + ) + with open(filename, "w") as f: + f.write(filetxt) + + +"""Function Calculation""" + + +class AtomicStructureHandler: + def __init__( + self, list_atom_types, bravais_lattice_constants, radius_cutoff, formula + ): + + self.bravais_lattice_constants = bravais_lattice_constants + self.radius_cutoff = radius_cutoff + self.formula = formula + + def compute(self, data): + + assert data.pos.shape[0] == data.x.shape[0] + + interatomic_potential = torch.zeros([data.pos.shape[0], 1]) + interatomic_forces = torch.zeros([data.pos.shape[0], 3]) + + for node_id in range(data.pos.shape[0]): + + neighbor_list_indices = torch.where(data.edge_index[0, :] == node_id)[ + 0 + ].tolist() + neighbor_list = data.edge_index[1, neighbor_list_indices] + + for neighbor_id, edge_id in zip(neighbor_list, neighbor_list_indices): + + neighbor_pos = data.pos[neighbor_id, :] + distance_vector = data.pos[neighbor_id, :] - data.pos[node_id, :] + + # Adjust the neighbor position based on periodic boundary conditions (PBC) + ## If the distance between the atoms is larger than the cutoff radius, the edge is because of PBC conditions + if torch.norm(distance_vector) > self.radius_cutoff: + ## At this point, we know that the edge is due to PBC conditions, so we need to adjust the neighbor position. We also know that + ## that this connection MUST be the closest connection possible as a result of the asserted radius_cutoff < supercell_size earlier + ## in the code. Because of this, we can simply adjust the neighbor position coordinate-wise to be closer than + ## as done in the following lines of code. The logic goes that if the distance vector[index] is larger than half the supercell size, + ## then there is a closer distance at +- supercell_size[index], and we adjust to that for each coordinate + if abs(distance_vector[0]) > data.supercell_size[0, 0] / 2: + if distance_vector[0] > 0: + neighbor_pos[0] -= data.supercell_size[0, 0] + else: + neighbor_pos[0] += data.supercell_size[0, 0] + + if abs(distance_vector[1]) > data.supercell_size[1, 1] / 2: + if distance_vector[1] > 0: + neighbor_pos[1] -= data.supercell_size[1, 1] + else: + neighbor_pos[1] += data.supercell_size[1, 1] + + if abs(distance_vector[2]) > data.supercell_size[2, 2] / 2: + if distance_vector[2] > 0: + neighbor_pos[2] -= data.supercell_size[2, 2] + else: + neighbor_pos[2] += data.supercell_size[2, 2] + + # The distance vecor may need to be updated after applying PBCs + distance_vector = data.pos[node_id, :] - neighbor_pos + + # pair_distance = data.edge_attr[edge_id].item() + interatomic_potential[node_id] += self.formula.potential_energy( + distance_vector + ) + + derivative_x = self.formula.derivative_x(distance_vector) + derivative_y = self.formula.derivative_y(distance_vector) + derivative_z = self.formula.derivative_z(distance_vector) + + interatomic_forces_contribution_x = -derivative_x + interatomic_forces_contribution_y = -derivative_y + interatomic_forces_contribution_z = -derivative_z + + interatomic_forces[node_id, 0] += interatomic_forces_contribution_x + interatomic_forces[node_id, 1] += interatomic_forces_contribution_y + interatomic_forces[node_id, 2] += interatomic_forces_contribution_z + + data.x = torch.cat( + (data.x, interatomic_potential, interatomic_forces), + 1, + ) + + return data + + +class LJpotential: + def __init__(self, epsilon, sigma): + self.epsilon = epsilon + self.sigma = sigma + + def potential_energy(self, distance_vector): + pair_distance = torch.norm(distance_vector) + return ( + 4 + * self.epsilon + * ((self.sigma / pair_distance) ** 12 - (self.sigma / pair_distance) ** 6) + ) + + def radial_derivative(self, distance_vector): + pair_distance = torch.norm(distance_vector) + return ( + 4 + * self.epsilon + * ( + -12 * (self.sigma / pair_distance) ** 12 * 1 / pair_distance + + 6 * (self.sigma / pair_distance) ** 6 * 1 / pair_distance + ) + ) + + def derivative_x(self, distance_vector): + pair_distance = torch.norm(distance_vector) + radial_derivative = self.radial_derivative(pair_distance) + return radial_derivative * (distance_vector[0].item()) / pair_distance + + def derivative_y(self, distance_vector): + pair_distance = torch.norm(distance_vector) + radial_derivative = self.radial_derivative(pair_distance) + return radial_derivative * (distance_vector[1].item()) / pair_distance + + def derivative_z(self, distance_vector): + pair_distance = torch.norm(distance_vector) + radial_derivative = self.radial_derivative(pair_distance) + return radial_derivative * (distance_vector[2].item()) / pair_distance + + +"""Etc""" + + +def info(*args, logtype="info", sep=" "): + getattr(logging, logtype)(sep.join(map(str, args))) diff --git a/examples/LennardJones/LJ_inference_plots.py b/examples/LennardJones/LJ_inference_plots.py new file mode 100644 index 000000000..324da425f --- /dev/null +++ b/examples/LennardJones/LJ_inference_plots.py @@ -0,0 +1,241 @@ +############################################################################## +# Copyright (c) 2024, Oak Ridge National Laboratory # +# All rights reserved. # +# # +# This file is part of HydraGNN and is distributed under a BSD 3-clause # +# license. For the licensing terms see the LICENSE file in the top-level # +# directory. # +# # +# SPDX-License-Identifier: BSD-3-Clause # +############################################################################## + +import json, os +import sys +import logging +import pickle +from tqdm import tqdm +from mpi4py import MPI +import argparse + +import torch +import torch_scatter +import numpy as np + +import hydragnn +from hydragnn.utils.time_utils import Timer +from hydragnn.utils.distributed import get_device +from hydragnn.utils.model import load_existing_model +from hydragnn.utils.pickledataset import SimplePickleDataset +from hydragnn.utils.config_utils import ( + update_config, +) +from hydragnn.models.create import create_model_config +from hydragnn.preprocess import create_dataloaders + +from scipy.interpolate import griddata + +try: + from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset +except ImportError: + pass + +from LJ_data import info + +import matplotlib.pyplot as plt + +plt.rcParams.update({"font.size": 16}) + + +def get_log_name_config(config): + return ( + config["NeuralNetwork"]["Architecture"]["model_type"] + + "-r-" + + str(config["NeuralNetwork"]["Architecture"]["radius"]) + + "-ncl-" + + str(config["NeuralNetwork"]["Architecture"]["num_conv_layers"]) + + "-hd-" + + str(config["NeuralNetwork"]["Architecture"]["hidden_dim"]) + + "-ne-" + + str(config["NeuralNetwork"]["Training"]["num_epoch"]) + + "-lr-" + + str(config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"]) + + "-bs-" + + str(config["NeuralNetwork"]["Training"]["batch_size"]) + + "-node_ft-" + + "".join( + str(x) + for x in config["NeuralNetwork"]["Variables_of_interest"][ + "input_node_features" + ] + ) + + "-task_weights-" + + "".join( + str(weigh) + "-" + for weigh in config["NeuralNetwork"]["Architecture"]["task_weights"] + ) + ) + + +def getcolordensity(xdata, ydata): + ############################### + nbin = 20 + hist2d, xbins_edge, ybins_edge = np.histogram2d(x=xdata, y=ydata, bins=[nbin, nbin]) + xbin_cen = 0.5 * (xbins_edge[0:-1] + xbins_edge[1:]) + ybin_cen = 0.5 * (ybins_edge[0:-1] + ybins_edge[1:]) + BCTY, BCTX = np.meshgrid(ybin_cen, xbin_cen) + hist2d = hist2d / np.amax(hist2d) + print(np.amax(hist2d)) + + bctx1d = np.reshape(BCTX, len(xbin_cen) * nbin) + bcty1d = np.reshape(BCTY, len(xbin_cen) * nbin) + loc_pts = np.zeros((len(xbin_cen) * nbin, 2)) + loc_pts[:, 0] = bctx1d + loc_pts[:, 1] = bcty1d + hist2d_norm = griddata( + loc_pts, + hist2d.reshape(len(xbin_cen) * nbin), + (xdata, ydata), + method="linear", + fill_value=0, + ) # np.nan) + return hist2d_norm + + +if __name__ == "__main__": + + modelname = "LJ" + + parser = argparse.ArgumentParser() + parser.add_argument( + "--inputfile", help="input file", type=str, default="./logs/LJ/config.json" + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "--adios", + help="Adios gan_dataset", + action="store_const", + dest="format", + const="adios", + ) + group.add_argument( + "--pickle", + help="Pickle gan_dataset", + action="store_const", + dest="format", + const="pickle", + ) + parser.set_defaults(format="pickle") + + args = parser.parse_args() + + dirpwd = os.path.dirname(os.path.abspath(__file__)) + input_filename = os.path.join(dirpwd, args.inputfile) + with open(input_filename, "r") as f: + config = json.load(f) + hydragnn.utils.setup_log(get_log_name_config(config)) + ################################################################################################################## + # Always initialize for multi-rank training. + comm_size, rank = hydragnn.utils.setup_ddp() + ################################################################################################################## + comm = MPI.COMM_WORLD + + datasetname = "LJ" + + comm.Barrier() + + timer = Timer("load_data") + timer.start() + if args.format == "pickle": + info("Pickle load") + basedir = os.path.join( + os.path.dirname(__file__), "dataset", "%s.pickle" % modelname + ) + trainset = SimplePickleDataset( + basedir=basedir, + label="trainset", + var_config=config["NeuralNetwork"]["Variables_of_interest"], + ) + valset = SimplePickleDataset( + basedir=basedir, + label="valset", + var_config=config["NeuralNetwork"]["Variables_of_interest"], + ) + testset = SimplePickleDataset( + basedir=basedir, + label="testset", + var_config=config["NeuralNetwork"]["Variables_of_interest"], + ) + pna_deg = trainset.pna_deg + else: + raise NotImplementedError("No supported format: %s" % (args.format)) + + model = create_model_config( + config=config["NeuralNetwork"], + verbosity=config["Verbosity"]["level"], + ) + + model = torch.nn.parallel.DistributedDataParallel(model) + + load_existing_model(model, modelname, path="./logs/") + model.eval() + + variable_index = 0 + # for output_name, output_type, output_dim in zip(config["NeuralNetwork"]["Variables_of_interest"]["output_names"], config["NeuralNetwork"]["Variables_of_interest"]["type"], config["NeuralNetwork"]["Variables_of_interest"]["output_dim"]): + + test_MAE = 0.0 + + num_samples = len(testset) + energy_true_list = [] + energy_pred_list = [] + forces_true_list = [] + forces_pred_list = [] + + for data_id, data in enumerate(tqdm(testset)): + data.pos.requires_grad = True + node_energy_pred = model(data.to(get_device()))[ + 0 + ] # Note that this is sensitive to energy and forces prediction being single-task (current requirement) + energy_pred = torch.sum(node_energy_pred, dim=0).float() + test_MAE += torch.norm(energy_pred - data.energy, p=1).item() / len(testset) + # predicted.backward(retain_graph=True) + # gradients = data.pos.grad + grads_energy = torch.autograd.grad( + outputs=energy_pred, + inputs=data.pos, + grad_outputs=torch.ones_like(energy_pred), + retain_graph=False, + create_graph=True, + )[0] + energy_pred_list.extend(energy_pred.tolist()) + energy_true_list.extend(data.energy.tolist()) + forces_pred_list.extend((-grads_energy).flatten().tolist()) + forces_true_list.extend(data.forces.flatten().tolist()) + + hist2d_norm = getcolordensity(energy_true_list, energy_pred_list) + + fig, ax = plt.subplots() + plt.scatter(energy_true_list, energy_pred_list, s=8, c=hist2d_norm, vmin=0, vmax=1) + plt.clim(0, 1) + ax.plot(ax.get_xlim(), ax.get_xlim(), ls="--", color="red") + plt.colorbar() + plt.xlabel("True values") + plt.ylabel("Predicted values") + plt.title(f"energy") + plt.draw() + plt.tight_layout() + plt.savefig(f"./energy_Scatterplot" + ".png", dpi=400) + + print(f"Test MAE energy: ", test_MAE) + + hist2d_norm = getcolordensity(forces_pred_list, forces_true_list) + fig, ax = plt.subplots() + plt.scatter(forces_pred_list, forces_true_list, s=8, c=hist2d_norm, vmin=0, vmax=1) + plt.clim(0, 1) + ax.plot(ax.get_xlim(), ax.get_xlim(), ls="--", color="red") + plt.colorbar() + plt.xlabel("Predicted Values") + plt.ylabel("True Values") + plt.title("Forces") + plt.draw() + plt.tight_layout() + plt.savefig(f"./Forces_Scatterplot" + ".png", dpi=400) diff --git a/examples/LennardJones/LennardJones.py b/examples/LennardJones/LennardJones.py new file mode 100644 index 000000000..045b1d251 --- /dev/null +++ b/examples/LennardJones/LennardJones.py @@ -0,0 +1,321 @@ +############################################################################## +# Copyright (c) 2024, Oak Ridge National Laboratory # +# All rights reserved. # +# # +# This file is part of HydraGNN and is distributed under a BSD 3-clause # +# license. For the licensing terms see the LICENSE file in the top-level # +# directory. # +# # +# SPDX-License-Identifier: BSD-3-Clause # +############################################################################## + +# General +import os, json +import logging +import sys +import argparse + +# Torch +import torch + +# torch.set_default_tensor_type(torch.DoubleTensor) +# torch.set_default_dtype(torch.float64) + +# Distributed +import mpi4py +from mpi4py import MPI + +mpi4py.rc.thread_level = "serialized" +mpi4py.rc.threads = False + +# HydraGNN +import hydragnn +from hydragnn.utils.print_utils import log +from hydragnn.utils.time_utils import Timer +import hydragnn.utils.tracer as tr +from hydragnn.preprocess.load_data import split_dataset +from hydragnn.utils.distdataset import DistDataset +from hydragnn.utils.pickledataset import SimplePickleWriter, SimplePickleDataset +from hydragnn.preprocess.utils import gather_deg + +try: + from hydragnn.utils.adiosdataset import AdiosWriter, AdiosDataset +except ImportError: + pass + +# Lennard Jones +from LJ_data import create_dataset, LJDataset, info + + +################################################################################################################## + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("--sampling", type=float, help="sampling ratio", default=None) + parser.add_argument( + "--preonly", + action="store_true", + help="preprocess only (no training)", + ) + parser.add_argument("--inputfile", help="input file", type=str, default="LJ.json") + parser.add_argument("--mae", action="store_true", help="do mae calculation") + parser.add_argument("--ddstore", action="store_true", help="ddstore dataset") + parser.add_argument("--ddstore_width", type=int, help="ddstore width", default=None) + parser.add_argument("--shmem", action="store_true", help="shmem") + parser.add_argument("--log", help="log name") + parser.add_argument("--batch_size", type=int, help="batch_size", default=None) + parser.add_argument("--everyone", action="store_true", help="gptimer") + + group = parser.add_mutually_exclusive_group() + group.add_argument( + "--adios", + help="Adios dataset", + action="store_const", + dest="format", + const="adios", + ) + group.add_argument( + "--pickle", + help="Pickle dataset", + action="store_const", + dest="format", + const="pickle", + ) + parser.set_defaults(format="pickle") # Changed this for my PC + args = parser.parse_args() + + graph_feature_names = ["total_energy"] + graph_feature_dims = [1] + node_feature_names = ["atomic_number", "potential", "forces"] + node_feature_dims = [1, 1, 3] + dirpwd = os.path.dirname(os.path.abspath(__file__)) + ################################################################################################################## + input_filename = os.path.join(dirpwd, args.inputfile) + ################################################################################################################## + # Configurable run choices (JSON file that accompanies this example script). + with open(input_filename, "r") as f: + config = json.load(f) + verbosity = config["Verbosity"]["level"] + config["NeuralNetwork"]["Variables_of_interest"][ + "graph_feature_names" + ] = graph_feature_names + config["NeuralNetwork"]["Variables_of_interest"][ + "graph_feature_dims" + ] = graph_feature_dims + config["NeuralNetwork"]["Variables_of_interest"][ + "node_feature_names" + ] = node_feature_names + config["NeuralNetwork"]["Variables_of_interest"][ + "node_feature_dims" + ] = node_feature_dims + + if args.batch_size is not None: + config["NeuralNetwork"]["Training"]["batch_size"] = args.batch_size + + ################################################################################################################## + # Always initialize for multi-rank training. + comm_size, rank = hydragnn.utils.setup_ddp() + ################################################################################################################## + + comm = MPI.COMM_WORLD + + ## Set up logging + logging.basicConfig( + level=logging.INFO, + format="%%(levelname)s (rank %d): %%(message)s" % (rank), + datefmt="%H:%M:%S", + ) + + log_name = "LJ" if args.log is None else args.log + hydragnn.utils.setup_log(log_name) + writer = hydragnn.utils.get_summary_writer(log_name) + + log("Command: {0}\n".format(" ".join([x for x in sys.argv])), rank=0) + + modelname = "LJ" + # Check for dataset for each format + if args.format == "pickle": + basedir = os.path.join(dirpwd, "dataset", "%s.pickle" % modelname) + dataset_exists = os.path.exists(os.path.join(dirpwd, "dataset/LJ.pickle")) + if args.format == "adios": + fname = os.path.join(dirpwd, "./dataset/%s.bp" % modelname) + dataset_exists = os.path.exists( + os.path.join(dirpwd, "dataset", "%s.bp" % modelname) + ) + + # Create dataset if preonly specified or dataset does not exist + if not dataset_exists: + + ## local data + create_dataset(os.path.join(dirpwd, "dataset/data"), config) + total = LJDataset( + os.path.join(dirpwd, "dataset/data"), + config, + dist=True, + ) + ## This is a local split + trainset, valset, testset = split_dataset( + dataset=total, + perc_train=0.9, + stratify_splitting=False, + ) + print("Local splitting: ", len(total), len(trainset), len(valset), len(testset)) + + deg = gather_deg(trainset) + config["pna_deg"] = deg.tolist() + + setnames = ["trainset", "valset", "testset"] + + if args.format == "pickle": + + ## pickle + attrs = dict() + attrs["pna_deg"] = deg + SimplePickleWriter( + trainset, + basedir, + "trainset", + # minmax_node_feature=total.minmax_node_feature, + # minmax_graph_feature=total.minmax_graph_feature, + use_subdir=True, + attrs=attrs, + ) + SimplePickleWriter( + valset, + basedir, + "valset", + # minmax_node_feature=total.minmax_node_feature, + # minmax_graph_feature=total.minmax_graph_feature, + use_subdir=True, + ) + SimplePickleWriter( + testset, + basedir, + "testset", + # minmax_node_feature=total.minmax_node_feature, + # minmax_graph_feature=total.minmax_graph_feature, + use_subdir=True, + ) + + if args.format == "adios": + ## adios + adwriter = AdiosWriter(fname, comm) + adwriter.add("trainset", trainset) + adwriter.add("valset", valset) + adwriter.add("testset", testset) + # adwriter.add_global("minmax_node_feature", total.minmax_node_feature) + # adwriter.add_global("minmax_graph_feature", total.minmax_graph_feature) + adwriter.add_global("pna_deg", deg) + adwriter.save() + + tr.initialize() + tr.disable() + timer = Timer("load_data") + timer.start() + if args.format == "adios": + info("Adios load") + assert not (args.shmem and args.ddstore), "Cannot use both ddstore and shmem" + opt = { + "preload": False, + "shmem": args.shmem, + "ddstore": args.ddstore, + "ddstore_width": args.ddstore_width, + } + trainset = AdiosDataset(fname, "trainset", comm, **opt) + valset = AdiosDataset(fname, "valset", comm, **opt) + testset = AdiosDataset(fname, "testset", comm, **opt) + elif args.format == "pickle": + info("Pickle load") + var_config = config["NeuralNetwork"]["Variables_of_interest"] + trainset = SimplePickleDataset( + basedir=basedir, label="trainset", preload=True, var_config=var_config + ) + valset = SimplePickleDataset( + basedir=basedir, label="valset", var_config=var_config + ) + testset = SimplePickleDataset( + basedir=basedir, label="testset", var_config=var_config + ) + # minmax_node_feature = trainset.minmax_node_feature + # minmax_graph_feature = trainset.minmax_graph_feature + pna_deg = trainset.pna_deg + if args.ddstore: + opt = {"ddstore_width": args.ddstore_width} + trainset = DistDataset(trainset, "trainset", comm, **opt) + valset = DistDataset(valset, "valset", comm, **opt) + testset = DistDataset(testset, "testset", comm, **opt) + # trainset.minmax_node_feature = minmax_node_feature + # trainset.minmax_graph_feature = minmax_graph_feature + trainset.pna_deg = pna_deg + else: + raise NotImplementedError("No supported format: %s" % (args.format)) + + info( + "trainset,valset,testset size: %d %d %d" + % (len(trainset), len(valset), len(testset)) + ) + + if args.ddstore: + os.environ["HYDRAGNN_AGGR_BACKEND"] = "mpi" + os.environ["HYDRAGNN_USE_ddstore"] = "1" + + (train_loader, val_loader, test_loader,) = hydragnn.preprocess.create_dataloaders( + trainset, valset, testset, config["NeuralNetwork"]["Training"]["batch_size"] + ) + + config = hydragnn.utils.update_config(config, train_loader, val_loader, test_loader) + ## Good to sync with everyone right after DDStore setup + comm.Barrier() + + hydragnn.utils.save_config(config, log_name) + + timer.stop() + + model = hydragnn.models.create_model_config( + config=config["NeuralNetwork"], + verbosity=verbosity, + ) + model = hydragnn.utils.get_distributed_model(model, verbosity) + + learning_rate = config["NeuralNetwork"]["Training"]["Optimizer"]["learning_rate"] + optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizer, mode="min", factor=0.5, patience=5, min_lr=0.00001 + ) + + hydragnn.utils.load_existing_model_config( + model, config["NeuralNetwork"]["Training"], optimizer=optimizer + ) + + ################################################################################################################## + + hydragnn.train.train_validate_test( + model, + optimizer, + train_loader, + val_loader, + test_loader, + writer, + scheduler, + config["NeuralNetwork"], + log_name, + verbosity, + create_plots=False, + compute_grad_energy=True, + ) + + hydragnn.utils.save_model(model, optimizer, log_name) + hydragnn.utils.print_timers(verbosity) + + if tr.has("GPTLTracer"): + import gptl4py as gp + + eligible = rank if args.everyone else 0 + if rank == eligible: + gp.pr_file(os.path.join("logs", log_name, "gp_timing.p%d" % rank)) + gp.pr_summary_file(os.path.join("logs", log_name, "gp_timing.summary")) + gp.finalize() + sys.exit(0) diff --git a/hydragnn/models/Base.py b/hydragnn/models/Base.py index 254461b76..2bcb791ba 100644 --- a/hydragnn/models/Base.py +++ b/hydragnn/models/Base.py @@ -15,6 +15,7 @@ from torch_geometric.nn import global_mean_pool, BatchNorm from torch.nn import GaussianNLLLoss from torch.utils.checkpoint import checkpoint +import torch_scatter from hydragnn.utils.model import activation_function_selection, loss_function_selection import sys from hydragnn.utils.distributed import get_device @@ -355,6 +356,60 @@ def loss(self, pred, value, head_index): elif self.ilossweights_hyperp == 1: return self.loss_hpweighted(pred, value, head_index, var=var) + def energy_force_loss(self, pred, data): + # Asserts + assert ( + data.pos is not None and data.energy is not None and data.forces is not None + ), "data.pos, data.energy, data.forces must be provided for energy-force loss. Check your dataset creation and naming." + assert ( + data.pos.requires_grad + ), "data.pos does not have grad, so force predictions cannot be computed. Check that data.pos has grad set to true before prediction." + assert ( + self.num_heads == 1 and self.head_type[0] == "node" + ), "Force predictions are only supported for models with one head that predict nodal energy. Check your num_heads and head_types." + # Initialize loss + tot_loss = 0 + tasks_loss = [] + # Energies + node_energy_pred = pred[0] + graph_energy_pred = torch_scatter.scatter_add( + node_energy_pred, data.batch, dim=0 + ).float() + graph_energy_true = data.energy + energy_loss_weight = self.loss_weights[ + 0 + ] # There should only be one loss-weight for energy + tot_loss += ( + self.loss_function(graph_energy_pred, graph_energy_true) + * energy_loss_weight + ) + tasks_loss.append(self.loss_function(graph_energy_pred, graph_energy_true)) + # Forces + forces_true = data.forces.float() + forces_pred = torch.autograd.grad( + graph_energy_pred, + data.pos, + grad_outputs=torch.ones_like(graph_energy_pred), + retain_graph=graph_energy_pred.requires_grad, # Retain graph only if needed (it will be needed during training, but not during validation/testing) + create_graph=True, + )[0].float() + assert ( + forces_pred is not None + ), "No gradients were found for data.pos. Does your model use positions for prediction?" + forces_pred = -forces_pred + force_loss_weight = ( + energy_loss_weight + * torch.mean(torch.abs(graph_energy_true)) + / (torch.mean(torch.abs(forces_true)) + 1e-8) + ) # Weight force loss and graph energy equally + tot_loss += ( + self.loss_function(forces_pred, forces_true) * force_loss_weight + ) # Have force-weight be the complement to energy-weight + ## FixMe: current loss functions require the number of heads to be the number of things being predicted + ## so, we need to do loss calculation manually without calling the other functions. + + return tot_loss, tasks_loss + def loss_nll(self, pred, value, head_index, var=None): # negative log likelihood loss # uncertainty to weigh losses in https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf diff --git a/hydragnn/run_training.py b/hydragnn/run_training.py index c702074f9..035693380 100644 --- a/hydragnn/run_training.py +++ b/hydragnn/run_training.py @@ -175,6 +175,7 @@ def _(config: dict, use_deepspeed=False): plot_hist_solution, create_plots, use_deepspeed=use_deepspeed, + compute_grad_energy=config["NeuralNetwork"]["Training"]["compute_grad_energy"], ) save_model(model, optimizer, log_name, use_deepspeed=use_deepspeed) diff --git a/hydragnn/train/train_validate_test.py b/hydragnn/train/train_validate_test.py index fee6e7ea2..a4fc9c635 100644 --- a/hydragnn/train/train_validate_test.py +++ b/hydragnn/train/train_validate_test.py @@ -66,6 +66,7 @@ def train_validate_test( plot_hist_solution=False, create_plots=False, use_deepspeed=False, + compute_grad_energy=False, ): num_epoch = config["Training"]["num_epoch"] EarlyStop = ( @@ -162,6 +163,7 @@ def train_validate_test( verbosity, profiler=prof, use_deepspeed=use_deepspeed, + compute_grad_energy=compute_grad_energy, ) tr.stop("train") tr.disable() @@ -172,7 +174,11 @@ def train_validate_test( continue val_loss, val_taskserr = validate( - val_loader, model, verbosity, reduce_ranks=True + val_loader, + model, + verbosity, + reduce_ranks=True, + compute_grad_energy=compute_grad_energy, ) test_loss, test_taskserr, true_values, predicted_values = test( test_loader, @@ -180,6 +186,7 @@ def train_validate_test( verbosity, reduce_ranks=True, return_samples=plot_hist_solution, + compute_grad_energy=compute_grad_energy, ) scheduler.step(val_loss) if writer is not None: @@ -434,7 +441,15 @@ def gather_tensor_ranks(head_values): return head_values -def train(loader, model, opt, verbosity, profiler=None, use_deepspeed=False): +def train( + loader, + model, + opt, + verbosity, + profiler=None, + use_deepspeed=False, + compute_grad_energy=False, +): if profiler is None: profiler = Profiler() @@ -492,8 +507,13 @@ def train(loader, model, opt, verbosity, profiler=None, use_deepspeed=False): data = data.to(get_device()) if trace_level > 0: tr.stop("h2d", **syncopt) - pred = model(data) - loss, tasks_loss = model.module.loss(pred, data.y, head_index) + if compute_grad_energy: # for force and energy prediction + data.pos.requires_grad = True + pred = model(data) + loss, tasks_loss = model.module.energy_force_loss(pred, data) + else: + pred = model(data) + loss, tasks_loss = model.module.loss(pred, data.y, head_index) if trace_level > 0: tr.start("forward_sync", **syncopt) MPI.COMM_WORLD.Barrier() @@ -541,7 +561,7 @@ def train(loader, model, opt, verbosity, profiler=None, use_deepspeed=False): @torch.no_grad() -def validate(loader, model, verbosity, reduce_ranks=True): +def validate(loader, model, verbosity, reduce_ranks=True, compute_grad_energy=False): total_error = torch.tensor(0.0, device=get_device()) tasks_error = torch.zeros(model.module.num_heads, device=get_device()) @@ -565,8 +585,14 @@ def validate(loader, model, verbosity, reduce_ranks=True): loader.dataset.ddstore.epoch_end() head_index = get_head_indices(model, data) data = data.to(get_device()) - pred = model(data) - error, tasks_loss = model.module.loss(pred, data.y, head_index) + if compute_grad_energy: # for force and energy prediction + with torch.enable_grad(): + data.pos.requires_grad = True + pred = model(data) + error, tasks_loss = model.module.energy_force_loss(pred, data) + else: + pred = model(data) + error, tasks_loss = model.module.loss(pred, data.y, head_index) total_error += error * data.num_graphs num_samples_local += data.num_graphs for itask in range(len(tasks_loss)): @@ -585,7 +611,14 @@ def validate(loader, model, verbosity, reduce_ranks=True): @torch.no_grad() -def test(loader, model, verbosity, reduce_ranks=True, return_samples=True): +def test( + loader, + model, + verbosity, + reduce_ranks=True, + return_samples=True, + compute_grad_energy=False, +): total_error = torch.tensor(0.0, device=get_device()) tasks_error = torch.zeros(model.module.num_heads, device=get_device()) @@ -612,8 +645,14 @@ def test(loader, model, verbosity, reduce_ranks=True, return_samples=True): loader.dataset.ddstore.epoch_end() head_index = get_head_indices(model, data) data = data.to(get_device()) - pred = model(data) - error, tasks_loss = model.module.loss(pred, data.y, head_index) + if compute_grad_energy: # for force and energy prediction + with torch.enable_grad(): + data.pos.requires_grad = True + pred = model(data) + error, tasks_loss = model.module.energy_force_loss(pred, data) + else: + pred = model(data) + error, tasks_loss = model.module.loss(pred, data.y, head_index) ## FIXME: temporary if int(os.getenv("HYDRAGNN_DUMP_TESTDATA", "0")) == 1: if model.module.var_output: diff --git a/hydragnn/utils/config_utils.py b/hydragnn/utils/config_utils.py index 3331952c8..7b71ab6b2 100644 --- a/hydragnn/utils/config_utils.py +++ b/hydragnn/utils/config_utils.py @@ -106,6 +106,9 @@ def update_config(config, train_loader, val_loader, test_loader): if "conv_checkpointing" not in config["NeuralNetwork"]["Training"]: config["NeuralNetwork"]["Training"]["conv_checkpointing"] = False + + if "compute_grad_energy" not in config["NeuralNetwork"]["Training"]: + config["NeuralNetwork"]["Training"]["compute_grad_energy"] = False return config @@ -260,9 +263,11 @@ def get_log_name_config(config): + str(config["NeuralNetwork"]["Training"]["batch_size"]) + "-data-" + config["Dataset"]["name"][ - : config["Dataset"]["name"].rfind("_") - if config["Dataset"]["name"].rfind("_") > 0 - else None + : ( + config["Dataset"]["name"].rfind("_") + if config["Dataset"]["name"].rfind("_") > 0 + else None + ) ] + "-node_ft-" + "".join( diff --git a/tests/test_examples.py b/tests/test_examples.py index 7ca1e4cd0..8a82fa18d 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -15,7 +15,7 @@ import subprocess -@pytest.mark.parametrize("example", ["qm9", "md17"]) +@pytest.mark.parametrize("example", ["qm9", "md17", "LennardJones"]) @pytest.mark.mpi_skip() def pytest_examples(example): path = os.path.join(os.path.dirname(__file__), "..", "examples", example)