From 8b3d9cc4a82a3c7f61ec7479f6ea90647b223bff Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 24 Oct 2023 05:08:58 -0700 Subject: [PATCH] Fix linting on SLSA for models Signed-off-by: Mihai Maruseac --- slsa_for_models/main.py | 36 ++-- slsa_for_models/pytorch_cifar10.py | 248 +++++++++++++------------- slsa_for_models/tensorflow_cifar10.py | 206 ++++++++++----------- 3 files changed, 246 insertions(+), 244 deletions(-) diff --git a/slsa_for_models/main.py b/slsa_for_models/main.py index 6005cec9..d8a1dc08 100644 --- a/slsa_for_models/main.py +++ b/slsa_for_models/main.py @@ -19,29 +19,29 @@ def readOptions(): - parser = argparse.ArgumentParser('Train CIFAR10 models with TF/PT') - model_formats = list(tf.supported_models().keys()) - model_formats += list(pt.supported_models().keys()) - parser.add_argument('model', choices=model_formats, - help='Model to generate (name implies framework)') - return parser.parse_args() + parser = argparse.ArgumentParser('Train CIFAR10 models with TF/PT') + model_formats = list(tf.supported_models().keys()) + model_formats += list(pt.supported_models().keys()) + parser.add_argument('model', choices=model_formats, + help='Model to generate (name implies framework)') + return parser.parse_args() def main(args): - model_formats = list(tf.supported_models().keys()) - for model_format in model_formats: - if args.model == model_format: - return tf.model_pipeline(args.model) + model_formats = list(tf.supported_models().keys()) + for model_format in model_formats: + if args.model == model_format: + return tf.model_pipeline(args.model) - model_formats = list(pt.supported_models().keys()) - for model_format in model_formats: - if args.model == model_format: - return pt.model_pipeline(args.model) + model_formats = list(pt.supported_models().keys()) + for model_format in model_formats: + if args.model == model_format: + return pt.model_pipeline(args.model) - # we should not reach this case in the normal flow, but cover all corners - raise ValueError("Model format not supported") + # we should not reach this case in the normal flow, but cover all corners + raise ValueError("Model format not supported") if __name__ == '__main__': - args = readOptions() - main(args) + args = readOptions() + main(args) diff --git a/slsa_for_models/pytorch_cifar10.py b/slsa_for_models/pytorch_cifar10.py index 87325081..cb7112a7 100644 --- a/slsa_for_models/pytorch_cifar10.py +++ b/slsa_for_models/pytorch_cifar10.py @@ -25,159 +25,159 @@ def pretraining(): - """Perform setup required before training. - - Does the lazy loading of TensorFlow too, to prevent compatibility issues with - mixing TensorFlow and PyTorch imports. - """ - global torch - global nn - global F - global optim - global torchvision - global transforms - import torch - import torch.nn as nn - import torch.nn.functional as F - import torch.optim as optim - import torchvision - import torchvision.transforms as transforms + """Perform setup required before training. + + Does the lazy loading of TensorFlow too, to prevent compatibility issues + with mixing TensorFlow and PyTorch imports. + """ + global torch + global nn + global F + global optim + global torchvision + global transforms + import torch + import torch.nn as nn + import torch.nn.functional as F + import torch.optim as optim + import torchvision + import torchvision.transforms as transforms def load_data(): - """Load the CIFAR10 data. + """Load the CIFAR10 data. - Obtains both the train and the test splits. According to - https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training - images and 10000 test ones. Each image is 32x32 RGB. + Obtains both the train and the test splits. According to + https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training + images and 10000 test ones. Each image is 32x32 RGB. - Data is normalized to be in range [-1, 1]. + Data is normalized to be in range [-1, 1]. - Returns iterators to train and test sets. - """ - transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) - ]) + Returns iterators to train and test sets. + """ + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) - batch_size = 4 - num_workers = 2 + batch_size = 4 + num_workers = 2 - trainset = torchvision.datasets.CIFAR10(root='./data', train=True, - download=True, transform=transform) - trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, - shuffle=True, - num_workers=num_workers) - testset = torchvision.datasets.CIFAR10(root='./data', train=False, - download=True, transform=transform) - testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, - shuffle=True, - num_workers=num_workers) + trainset = torchvision.datasets.CIFAR10(root='./data', train=True, + download=True, transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, + shuffle=True, + num_workers=num_workers) + testset = torchvision.datasets.CIFAR10(root='./data', train=False, + download=True, transform=transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, + shuffle=True, + num_workers=num_workers) - return trainloader, testloader + return trainloader, testloader def create_model(): - """Create a Torch NN model. - - The model is taken from the tutorial at - https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html. - - Returns the model. - """ - # Train a model based on tutorial from - # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html. - # We inline the class to be able to use lazy loading of PyTorch modules. - class MyModel(nn.Module): - def __init__(self): - super().__init__() - self.conv1 = nn.Conv2d(3, 6, 5) - self.pool = nn.MaxPool2d(2, 2) - self.conv2 = nn.Conv2d(6, 16, 5) - self.fc1 = nn.Linear(16 * 5 * 5, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) - - - def forward(self, x): - x = self.pool(F.relu(self.conv1(x))) - x = self.pool(F.relu(self.conv2(x))) - x = torch.flatten(x, 1) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x - - return MyModel() + """Create a Torch NN model. + + The model is taken from the tutorial at + https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html. + + Returns the model. + """ + # Train a model based on tutorial from + # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html. + # We inline the class to be able to use lazy loading of PyTorch modules. + class MyModel(nn.Module): + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = torch.flatten(x, 1) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + + return MyModel() def prepare_model(model): - """Prepare model for training with loss and optimizer.""" - # We only need to return loss and optimizer - loss = nn.CrossEntropyLoss() - optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) - return loss, optimizer + """Prepare model for training with loss and optimizer.""" + # We only need to return loss and optimizer + loss = nn.CrossEntropyLoss() + optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) + return loss, optimizer def train_model(model, loss, optimizer, train): - """Train a model on the training set.""" - num_epochs = 2 - batch_size = 2000 - for epoch in range(num_epochs): - running_loss = 0.0 - for i, data in enumerate(train, 1): - x, y = data - optimizer.zero_grad() - outputs = model(x) - loss_score = loss(outputs, y) - loss_score.backward() - optimizer.step() - running_loss += loss_score.item() - if i % batch_size == 0: - print(f'[{epoch}, {i:5d}], loss: {running_loss / batch_size :.3f}') + """Train a model on the training set.""" + num_epochs = 2 + batch_size = 2000 + for epoch in range(num_epochs): running_loss = 0.0 + for i, data in enumerate(train, 1): + x, y = data + optimizer.zero_grad() + outputs = model(x) + loss_score = loss(outputs, y) + loss_score.backward() + optimizer.step() + running_loss += loss_score.item() + if i % batch_size == 0: + print(f'[{epoch}, {i:5d}], ' + f'loss: {running_loss / batch_size :.3f}') + running_loss = 0.0 def score_model(model, test): - """Score a trained model on the test set.""" - correct = 0 - total = 0 - with torch.no_grad(): - for data in test: - x, y = data - outputs = model(x) - _, predicted = torch.max(outputs.data, 1) - total += y.size(0) - correct += (predicted == y).sum().item() - print(f'Test accuracy: {correct / total}') + """Score a trained model on the test set.""" + correct = 0 + total = 0 + with torch.no_grad(): + for data in test: + x, y = data + outputs = model(x) + _, predicted = torch.max(outputs.data, 1) + total += y.size(0) + correct += (predicted == y).sum().item() + print(f'Test accuracy: {correct / total}') def supported_models(): - """Returns supported model types paired with method to save them.""" - return { - 'pytorch_model.pth': lambda m, p: torch.save(m.state_dict(), p), - 'pytorch_full_model.pth': lambda m, p: torch.save(m, p), - 'pytorch_jitted_model.pt': lambda m, p: torch.jit.script(m).save(p), - } + """Returns supported model types paired with method to save them.""" + return { + 'pytorch_model.pth': lambda m, p: torch.save(m.state_dict(), p), + 'pytorch_full_model.pth': lambda m, p: torch.save(m, p), + 'pytorch_jitted_model.pt': lambda m, p: torch.jit.script(m).save(p), + } def save_model(model, model_format): - """Save the model after training to be transferred to production. + """Save the model after training to be transferred to production. - Saves in the requested format, if supported by PyTorch. - """ - saver = supported_models().get(model_format, None) - if not saver: - raise ValueError('Requested a model format not supported by PyTorch') - saver(model, './' + model_format) + Saves in the requested format, if supported by PyTorch. + """ + saver = supported_models().get(model_format, None) + if not saver: + raise ValueError('Requested a model format not supported by PyTorch') + saver(model, './' + model_format) def model_pipeline(model_format): - """Train a model and save it in the requested format.""" - pretraining() - data = load_data() - model = create_model() - loss, optimizer = prepare_model(model) - train_model(model, loss, optimizer, data[0]) - score_model(model, data[1]) - save_model(model, model_format) + """Train a model and save it in the requested format.""" + pretraining() + data = load_data() + model = create_model() + loss, optimizer = prepare_model(model) + train_model(model, loss, optimizer, data[0]) + score_model(model, data[1]) + save_model(model, model_format) diff --git a/slsa_for_models/tensorflow_cifar10.py b/slsa_for_models/tensorflow_cifar10.py index e86c343e..ce5dc15b 100644 --- a/slsa_for_models/tensorflow_cifar10.py +++ b/slsa_for_models/tensorflow_cifar10.py @@ -21,137 +21,139 @@ def pretraining(): - """Perform setup required before training. + """Perform setup required before training. - Does the lazy loading of TensorFlow too, to prevent compatibility issues with - mixing TensorFlow and PyTorch imports. - """ - global tf - global tfds - import tensorflow as tf - import tensorflow_datasets as tfds - # Also compile model using XLA for ~20% performance gain - tf.config.optimizer.set_jit(True) + Does the lazy loading of TensorFlow too, to prevent compatibility issues + with mixing TensorFlow and PyTorch imports. + """ + global tf + global tfds + import tensorflow as tf + import tensorflow_datasets as tfds + # Also compile model using XLA for ~20% performance gain + tf.config.optimizer.set_jit(True) def load_data(): - """Load the CIFAR10 data. + """Load the CIFAR10 data. - Obtains both the train and the test splits. According to - https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training - images and 10000 test ones. Each image is 32x32 RGB. + Obtains both the train and the test splits. According to + https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training + images and 10000 test ones. Each image is 32x32 RGB. - Data is normalized to be in [0, 1). Labels are one-hot encoded. + Data is normalized to be in [0, 1). Labels are one-hot encoded. - Returns train and test pairs. Each pair consists of features and labels - vectors of similar size. - """ - result = tfds.load('cifar10', batch_size = -1) - x_train = result['train']['image'] - y_train = result['train']['label'] - x_test = result['test']['image'] - y_test = result['test']['label'] + Returns train and test pairs. Each pair consists of features and labels + vectors of similar size. + """ + result = tfds.load('cifar10', batch_size=-1) + x_train = result['train']['image'] + y_train = result['train']['label'] + x_test = result['test']['image'] + y_test = result['test']['label'] - # transform input - x_train = x_train.numpy().astype('float32') / 256 - x_test = x_test.numpy().astype('float32') / 256 - y_train = tf.keras.utils.to_categorical(y_train, num_classes=10) - y_test = tf.keras.utils.to_categorical(y_test, num_classes=10) + # transform input + x_train = x_train.numpy().astype('float32') / 256 + x_test = x_test.numpy().astype('float32') / 256 + y_train = tf.keras.utils.to_categorical(y_train, num_classes=10) + y_test = tf.keras.utils.to_categorical(y_test, num_classes=10) - return (x_train, y_train), (x_test, y_test) + return (x_train, y_train), (x_test, y_test) def create_model(in_shape): - """Create a TensorFlow NN model. - - The model is taken from the tutorial at - https://www.tensorflow.org/xla/tutorials/autoclustering_xla. - - We need to pass as argument the expected input shape. - - Returns the model. - """ - x, _, c = in_shape - return tf.keras.models.Sequential([ - tf.keras.layers.Conv2D(x, (c, c), padding='same', input_shape=in_shape), - tf.keras.layers.Activation('relu'), - tf.keras.layers.Conv2D(x, (c, c)), - tf.keras.layers.Activation('relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2,2)), - tf.keras.layers.Dropout(0.25), - tf.keras.layers.Conv2D(2*x, (c, c), padding='same'), - tf.keras.layers.Activation('relu'), - tf.keras.layers.Conv2D(2*x, (c, c)), - tf.keras.layers.Activation('relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2,2)), - tf.keras.layers.Dropout(0.25), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(512), - tf.keras.layers.Activation('relu'), - tf.keras.layers.Dropout(0.5), - tf.keras.layers.Dense(10), - tf.keras.layers.Activation('softmax'), - ]) + """Create a TensorFlow NN model. + + The model is taken from the tutorial at + https://www.tensorflow.org/xla/tutorials/autoclustering_xla. + + We need to pass as argument the expected input shape. + + Returns the model. + """ + x, _, c = in_shape + return tf.keras.models.Sequential([ + tf.keras.layers.Conv2D(x, (c, c), padding='same', + input_shape=in_shape), + tf.keras.layers.Activation('relu'), + tf.keras.layers.Conv2D(x, (c, c)), + tf.keras.layers.Activation('relu'), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Dropout(0.25), + tf.keras.layers.Conv2D(2*x, (c, c), padding='same'), + tf.keras.layers.Activation('relu'), + tf.keras.layers.Conv2D(2*x, (c, c)), + tf.keras.layers.Activation('relu'), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Dropout(0.25), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(512), + tf.keras.layers.Activation('relu'), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(10), + tf.keras.layers.Activation('softmax'), + ]) def prepare_model(model): - """Prepare model for training with loss and optimizer.""" - opt = tf.keras.optimizers.RMSprop(learning_rate=0.0001) - model.compile(loss='categorical_crossentropy', - optimizer=opt, - metrics=['accuracy']) - return model + """Prepare model for training with loss and optimizer.""" + opt = tf.keras.optimizers.RMSprop(learning_rate=0.0001) + model.compile(loss='categorical_crossentropy', + optimizer=opt, + metrics=['accuracy']) + return model def train_model(model, train, test): - """Train a model on the training set. + """Train a model on the training set. - The test set is used for cross validation. - """ - x, y = train - model.fit(x, y, batch_size=256, epochs=16, - validation_data=test, shuffle=True) + The test set is used for cross validation. + """ + x, y = train + model.fit(x, y, batch_size=256, epochs=16, + validation_data=test, shuffle=True) def score_model(model, test): - """Score a trained model on the test set.""" - x, y = test - scores = model.evaluate(x, y, verbose=1) - print(f'Test loss: {scores[0]}') - print(f'Test accuracy: {scores[1]}') + """Score a trained model on the test set.""" + x, y = test + scores = model.evaluate(x, y, verbose=1) + print(f'Test loss: {scores[0]}') + print(f'Test accuracy: {scores[1]}') def supported_models(): - """Returns supported model types paired with method to save them.""" - return { - # New Keras format - 'tensorflow_model.keras': lambda m, p: m.save(p, save_format='keras'), - # TF SavedModel formats, full model and weights only - 'tensorflow_saved_model': lambda m, p: m.save(p, save_format='tf'), - 'tensorflow_exported_model': lambda m, p: m.export(p), - # Legacy HDFS format, full model and weights only - 'tensorflow_hdf5_model.h5': lambda m, p: m.save(p, save_format='h5'), - 'tensorflow_hdf5.weights.h5': lambda m, p: m.save_weights(p), - } + """Returns supported model types paired with method to save them.""" + return { + # New Keras format + 'tensorflow_model.keras': lambda m, p: m.save(p, save_format='keras'), + # TF SavedModel formats, full model and weights only + 'tensorflow_saved_model': lambda m, p: m.save(p, save_format='tf'), + 'tensorflow_exported_model': lambda m, p: m.export(p), + # Legacy HDFS format, full model and weights only + 'tensorflow_hdf5_model.h5': lambda m, p: m.save(p, save_format='h5'), + 'tensorflow_hdf5.weights.h5': lambda m, p: m.save_weights(p), + } def save_model(model, model_format): - """Save the model after training to be transferred to production. + """Save the model after training to be transferred to production. - Saves in the requested format, if supported by TensorFlow. - """ - saver = supported_models().get(model_format, None) - if not saver: - raise ValueError('Requested a model format not supported by TensorFlow') - saver(model, './' + model_format) + Saves in the requested format, if supported by TensorFlow. + """ + saver = supported_models().get(model_format, None) + if not saver: + raise ValueError( + 'Requested a model format not supported by TensorFlow') + saver(model, './' + model_format) def model_pipeline(model_format): - """Train a model and save it in the requested format.""" - pretraining() - data = load_data() - model = create_model(data[0][0].shape[1:]) - model = prepare_model(model) - train_model(model, data[0], data[1]) - score_model(model, data[1]) - save_model(model, model_format) + """Train a model and save it in the requested format.""" + pretraining() + data = load_data() + model = create_model(data[0][0].shape[1:]) + model = prepare_model(model) + train_model(model, data[0], data[1]) + score_model(model, data[1]) + save_model(model, model_format)