From c650cf3dc7d58b6d62d0ffab3756603c04b9d280 Mon Sep 17 00:00:00 2001 From: Aron Date: Tue, 9 Jan 2024 12:35:52 +0100 Subject: [PATCH] Always return a single NNs model for all replicas, adjust weight getting and setting accordingly --- .../n3fit/backends/keras_backend/MetaModel.py | 59 +++++++++++++------ n3fit/src/n3fit/model_gen.py | 31 +++++----- n3fit/src/n3fit/model_trainer.py | 2 +- 3 files changed, 58 insertions(+), 34 deletions(-) diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py index 9ffa71ce81..b73c58fc68 100644 --- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py +++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py @@ -46,7 +46,8 @@ } NN_PREFIX = "NN" -PREPROCESSING_PREFIX = "preprocessing_factor" +NN_LAYER = "NNs" +PREPROESSING_LAYER = "preprocessing_factor" # Some keys need to work for everyone for k, v in optimizers.items(): @@ -353,14 +354,12 @@ def get_replica_weights(self, i_replica): dict dictionary with the weights of the replica """ - NN_weights = [ - tf.Variable(w, name=w.name) for w in self.get_layer(f"{NN_PREFIX}_{i_replica}").weights - ] - prepro_weights = [ - tf.Variable(w, name=w.name) - for w in get_layer_replica_weights(self.get_layer(PREPROCESSING_PREFIX), i_replica) - ] - weights = {NN_PREFIX: NN_weights, PREPROCESSING_PREFIX: prepro_weights} + weights = {} + for layer_type in [NN_LAYER, PREPROESSING_LAYER]: + weights[layer_type] = [ + tf.Variable(w, name=w.name) + for w in get_layer_replica_weights(self.get_layer(layer_type), i_replica) + ] return weights @@ -378,12 +377,10 @@ def set_replica_weights(self, weights, i_replica=0): i_replica: int the replica number to set, defaulting to 0 """ - self.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights[NN_PREFIX]) - set_layer_replica_weights( - layer=self.get_layer(PREPROCESSING_PREFIX), - weights=weights[PREPROCESSING_PREFIX], - i_replica=i_replica, - ) + for layer_type in [NN_LAYER, PREPROESSING_LAYER]: + set_layer_replica_weights( + layer=self.get_layer(layer_type), weights=weights[layer_type], i_replica=i_replica + ) def split_replicas(self): """ @@ -427,6 +424,25 @@ def load_identical_replicas(self, model_file): self.set_replica_weights(weights, i_replica) +def stacked_single_replicas(layer): + """ + Check if the layer consists of stacked single replicas (Only happens for NN layers) + + Parameters + ---------- + layer: MetaLayer + the layer to check + + Returns + ------- + bool + True if the layer consists of stacked single replicas + """ + if not isinstance(layer, MetaModel): + return False + return f"{NN_PREFIX}_0" in [sublayer.name for sublayer in layer.layers] + + def get_layer_replica_weights(layer, i_replica: int): """ Get the weights for the given single replica `i_replica`, @@ -444,13 +460,18 @@ def get_layer_replica_weights(layer, i_replica: int): weights: list list of weights for the replica """ - return [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights] + if stacked_single_replicas(layer): + weights = layer.get_layer(f"{NN_PREFIX}_{i_replica}").weights + else: + weights = [tf.Variable(w[i_replica : i_replica + 1], name=w.name) for w in layer.weights] + + return weights def set_layer_replica_weights(layer, weights, i_replica: int): """ Set the weights for the given single replica `i_replica`, - from a `layer` that has weights for all replicas. + for a `layer` that has weights for all replicas. Parameters ---------- @@ -461,6 +482,10 @@ def set_layer_replica_weights(layer, weights, i_replica: int): i_replica: int the replica number """ + if stacked_single_replicas(layer): + layer.get_layer(f"{NN_PREFIX}_{i_replica}").set_weights(weights) + return + full_weights = [w.numpy() for w in layer.weights] for w_old, w_new in zip(full_weights, weights): w_old[i_replica : i_replica + 1] = w_new diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 2f9e47edf3..6555456965 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -596,15 +596,11 @@ def pdfNN_layer_generator( # Apply NN layers for all replicas to a given input grid def neural_network_replicas(x, postfix=""): - NNs_x = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}")( - [nn(x) for nn in nn_replicas] - ) + NNs_x = nn_replicas(x) if subtract_one: x_eq_1_processed = process_input(layer_x_eq_1) - NNs_x_1 = Lambda(lambda nns: op.stack(nns, axis=1), name=f"NNs{postfix}_x_1")( - [nn(x_eq_1_processed) for nn in nn_replicas] - ) + NNs_x_1 = nn_replicas(x_eq_1_processed) NNs_x = subtract_one_layer([NNs_x, NNs_x_1]) return NNs_x @@ -660,11 +656,10 @@ def compute_unnormalized_pdf(x, postfix=""): if photons: PDFs = layer_photon(PDFs) - if replica_axis: - pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler) - else: - pdf_model = MetaModel(model_input, PDFs[:, 0], name=f"PDFs", scaler=scaler) + if not replica_axis: + PDFs = Lambda(lambda pdfs: pdfs[:, 0], name="remove_replica_axis")(PDFs) + pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler) return pdf_model @@ -709,8 +704,8 @@ def generate_nn( Returns ------- - nn_replicas: List[MetaModel] - List of MetaModel objects, one for each replica. + nn_replicas: MetaModel + Single model containing all replicas. """ nodes_list = list(nodes) # so we can modify it x_input = Input(shape=(None, nodes_in), batch_size=1, name='xgrids_processed') @@ -734,7 +729,7 @@ def initializer_generator(seed, i_layer): ] return initializers - elif layer_type == "dense": + else: # "dense" reg = regularizer_selector(regularizer, **regularizer_args) custom_args['regularizer'] = reg @@ -772,6 +767,7 @@ def initializer_generator(seed, i_layer): # Apply all layers to the input to create the models pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]] + for layers in list_of_pdf_layers[1:]: # Since some layers (dropout) are shared, we have to treat them separately if type(layers) is list: @@ -779,9 +775,12 @@ def initializer_generator(seed, i_layer): else: pdfs = [layers(x) for x in pdfs] - models = [ - MetaModel({'NN_input': x_input}, pdf, name=f"NN_{i_replica}") + # Wrap the pdfs in a MetaModel to enable getting/setting of weights later + pdfs = [ + MetaModel({'NN_input': x_input}, pdf, name=f"NN_{i_replica}")(x_input) for i_replica, pdf in enumerate(pdfs) ] + pdfs = Lambda(lambda nns: op.stack(nns, axis=1), name=f"stack_replicas")(pdfs) + model = MetaModel({'NN_input': x_input}, pdfs, name=f"NNs") - return models + return model diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 81af8337ab..7d78bfb23f 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -454,7 +454,7 @@ def _model_generation(self, xinput, pdf_model, partition, partition_idx): training.summary() pdf_model = training.get_layer("PDFs") pdf_model.summary() - nn_model = pdf_model.get_layer("NN_0") + nn_model = pdf_model.get_layer("NNs") nn_model.summary() # We may have fits without sumrules imposed try: