From 8b3d9cc4a82a3c7f61ec7479f6ea90647b223bff Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Tue, 24 Oct 2023 05:08:58 -0700
Subject: [PATCH] Fix linting on SLSA for models

Signed-off-by: Mihai Maruseac <mihaimaruseac@google.com>
---
 slsa_for_models/main.py               |  36 ++--
 slsa_for_models/pytorch_cifar10.py    | 248 +++++++++++++-------------
 slsa_for_models/tensorflow_cifar10.py | 206 ++++++++++-----------
 3 files changed, 246 insertions(+), 244 deletions(-)

diff --git a/slsa_for_models/main.py b/slsa_for_models/main.py
index 6005cec9..d8a1dc08 100644
--- a/slsa_for_models/main.py
+++ b/slsa_for_models/main.py
@@ -19,29 +19,29 @@
 
 
 def readOptions():
-  parser = argparse.ArgumentParser('Train CIFAR10 models with TF/PT')
-  model_formats = list(tf.supported_models().keys())
-  model_formats += list(pt.supported_models().keys())
-  parser.add_argument('model', choices=model_formats,
-                      help='Model to generate (name implies framework)')
-  return parser.parse_args()
+    parser = argparse.ArgumentParser('Train CIFAR10 models with TF/PT')
+    model_formats = list(tf.supported_models().keys())
+    model_formats += list(pt.supported_models().keys())
+    parser.add_argument('model', choices=model_formats,
+                        help='Model to generate (name implies framework)')
+    return parser.parse_args()
 
 
 def main(args):
-  model_formats = list(tf.supported_models().keys())
-  for model_format in model_formats:
-    if args.model == model_format:
-      return tf.model_pipeline(args.model)
+    model_formats = list(tf.supported_models().keys())
+    for model_format in model_formats:
+        if args.model == model_format:
+            return tf.model_pipeline(args.model)
 
-  model_formats = list(pt.supported_models().keys())
-  for model_format in model_formats:
-    if args.model == model_format:
-      return pt.model_pipeline(args.model)
+    model_formats = list(pt.supported_models().keys())
+    for model_format in model_formats:
+        if args.model == model_format:
+            return pt.model_pipeline(args.model)
 
-  # we should not reach this case in the normal flow, but cover all corners
-  raise ValueError("Model format not supported")
+    # we should not reach this case in the normal flow, but cover all corners
+    raise ValueError("Model format not supported")
 
 
 if __name__ == '__main__':
-  args = readOptions()
-  main(args)
+    args = readOptions()
+    main(args)
diff --git a/slsa_for_models/pytorch_cifar10.py b/slsa_for_models/pytorch_cifar10.py
index 87325081..cb7112a7 100644
--- a/slsa_for_models/pytorch_cifar10.py
+++ b/slsa_for_models/pytorch_cifar10.py
@@ -25,159 +25,159 @@
 
 
 def pretraining():
-  """Perform setup required before training.
-
-  Does the lazy loading of TensorFlow too, to prevent compatibility issues with
-  mixing TensorFlow and PyTorch imports.
-  """
-  global torch
-  global nn
-  global F
-  global optim
-  global torchvision
-  global transforms
-  import torch
-  import torch.nn as nn
-  import torch.nn.functional as F
-  import torch.optim as optim
-  import torchvision
-  import torchvision.transforms as transforms
+    """Perform setup required before training.
+
+    Does the lazy loading of TensorFlow too, to prevent compatibility issues
+    with mixing TensorFlow and PyTorch imports.
+    """
+    global torch
+    global nn
+    global F
+    global optim
+    global torchvision
+    global transforms
+    import torch
+    import torch.nn as nn
+    import torch.nn.functional as F
+    import torch.optim as optim
+    import torchvision
+    import torchvision.transforms as transforms
 
 
 def load_data():
-  """Load the CIFAR10 data.
+    """Load the CIFAR10 data.
 
-  Obtains both the train and the test splits. According to
-  https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training
-  images and 10000 test ones. Each image is 32x32 RGB.
+    Obtains both the train and the test splits. According to
+    https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training
+    images and 10000 test ones. Each image is 32x32 RGB.
 
-  Data is normalized to be in range [-1, 1].
+    Data is normalized to be in range [-1, 1].
 
-  Returns iterators to train and test sets.
-  """
-  transform = transforms.Compose([
-      transforms.ToTensor(),
-      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
-  ])
+    Returns iterators to train and test sets.
+    """
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    ])
 
-  batch_size = 4
-  num_workers = 2
+    batch_size = 4
+    num_workers = 2
 
-  trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
-                                          download=True, transform=transform)
-  trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
-                                            shuffle=True,
-                                            num_workers=num_workers)
-  testset = torchvision.datasets.CIFAR10(root='./data', train=False,
-                                          download=True, transform=transform)
-  testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
-                                            shuffle=True,
-                                            num_workers=num_workers)
+    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
+                                            download=True, transform=transform)
+    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
+                                              shuffle=True,
+                                              num_workers=num_workers)
+    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
+                                           download=True, transform=transform)
+    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
+                                             shuffle=True,
+                                             num_workers=num_workers)
 
-  return trainloader, testloader
+    return trainloader, testloader
 
 
 def create_model():
-  """Create a Torch NN model.
-
-  The model is taken from the tutorial at
-  https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html.
-
-  Returns the model.
-  """
-  # Train a model based on tutorial from
-  # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html.
-  # We inline the class to be able to use lazy loading of PyTorch modules.
-  class MyModel(nn.Module):
-    def __init__(self):
-      super().__init__()
-      self.conv1 = nn.Conv2d(3, 6, 5)
-      self.pool = nn.MaxPool2d(2, 2)
-      self.conv2 = nn.Conv2d(6, 16, 5)
-      self.fc1 = nn.Linear(16 * 5 * 5, 120)
-      self.fc2 = nn.Linear(120, 84)
-      self.fc3 = nn.Linear(84, 10)
-
-
-    def forward(self, x):
-      x = self.pool(F.relu(self.conv1(x)))
-      x = self.pool(F.relu(self.conv2(x)))
-      x = torch.flatten(x, 1)
-      x = F.relu(self.fc1(x))
-      x = F.relu(self.fc2(x))
-      x = self.fc3(x)
-      return x
-
-  return MyModel()
+    """Create a Torch NN model.
+
+    The model is taken from the tutorial at
+    https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html.
+
+    Returns the model.
+    """
+    # Train a model based on tutorial from
+    # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html.
+    # We inline the class to be able to use lazy loading of PyTorch modules.
+    class MyModel(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = nn.Conv2d(3, 6, 5)
+            self.pool = nn.MaxPool2d(2, 2)
+            self.conv2 = nn.Conv2d(6, 16, 5)
+            self.fc1 = nn.Linear(16 * 5 * 5, 120)
+            self.fc2 = nn.Linear(120, 84)
+            self.fc3 = nn.Linear(84, 10)
+
+        def forward(self, x):
+            x = self.pool(F.relu(self.conv1(x)))
+            x = self.pool(F.relu(self.conv2(x)))
+            x = torch.flatten(x, 1)
+            x = F.relu(self.fc1(x))
+            x = F.relu(self.fc2(x))
+            x = self.fc3(x)
+            return x
+
+    return MyModel()
 
 
 def prepare_model(model):
-  """Prepare model for training with loss and optimizer."""
-  # We only need to return loss and optimizer
-  loss = nn.CrossEntropyLoss()
-  optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
-  return loss, optimizer
+    """Prepare model for training with loss and optimizer."""
+    # We only need to return loss and optimizer
+    loss = nn.CrossEntropyLoss()
+    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
+    return loss, optimizer
 
 
 def train_model(model, loss, optimizer, train):
-  """Train a model on the training set."""
-  num_epochs = 2
-  batch_size = 2000
-  for epoch in range(num_epochs):
-    running_loss = 0.0
-    for i, data in enumerate(train, 1):
-      x, y = data
-      optimizer.zero_grad()
-      outputs = model(x)
-      loss_score = loss(outputs, y)
-      loss_score.backward()
-      optimizer.step()
-      running_loss += loss_score.item()
-      if i % batch_size == 0:
-        print(f'[{epoch}, {i:5d}], loss: {running_loss / batch_size :.3f}')
+    """Train a model on the training set."""
+    num_epochs = 2
+    batch_size = 2000
+    for epoch in range(num_epochs):
         running_loss = 0.0
+        for i, data in enumerate(train, 1):
+            x, y = data
+            optimizer.zero_grad()
+            outputs = model(x)
+            loss_score = loss(outputs, y)
+            loss_score.backward()
+            optimizer.step()
+            running_loss += loss_score.item()
+            if i % batch_size == 0:
+                print(f'[{epoch}, {i:5d}], '
+                      f'loss: {running_loss / batch_size :.3f}')
+                running_loss = 0.0
 
 
 def score_model(model, test):
-  """Score a trained model on the test set."""
-  correct = 0
-  total = 0
-  with torch.no_grad():
-    for data in test:
-      x, y = data
-      outputs = model(x)
-      _, predicted = torch.max(outputs.data, 1)
-      total += y.size(0)
-      correct += (predicted == y).sum().item()
-  print(f'Test accuracy: {correct / total}')
+    """Score a trained model on the test set."""
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for data in test:
+            x, y = data
+            outputs = model(x)
+            _, predicted = torch.max(outputs.data, 1)
+            total += y.size(0)
+            correct += (predicted == y).sum().item()
+    print(f'Test accuracy: {correct / total}')
 
 
 def supported_models():
-  """Returns supported model types paired with method to save them."""
-  return {
-      'pytorch_model.pth': lambda m, p: torch.save(m.state_dict(), p),
-      'pytorch_full_model.pth': lambda m, p: torch.save(m, p),
-      'pytorch_jitted_model.pt': lambda m, p: torch.jit.script(m).save(p),
-  }
+    """Returns supported model types paired with method to save them."""
+    return {
+        'pytorch_model.pth': lambda m, p: torch.save(m.state_dict(), p),
+        'pytorch_full_model.pth': lambda m, p: torch.save(m, p),
+        'pytorch_jitted_model.pt': lambda m, p: torch.jit.script(m).save(p),
+    }
 
 
 def save_model(model, model_format):
-  """Save the model after training to be transferred to production.
+    """Save the model after training to be transferred to production.
 
-  Saves in the requested format, if supported by PyTorch.
-  """
-  saver = supported_models().get(model_format, None)
-  if not saver:
-    raise ValueError('Requested a model format not supported by PyTorch')
-  saver(model, './' + model_format)
+    Saves in the requested format, if supported by PyTorch.
+    """
+    saver = supported_models().get(model_format, None)
+    if not saver:
+        raise ValueError('Requested a model format not supported by PyTorch')
+    saver(model, './' + model_format)
 
 
 def model_pipeline(model_format):
-  """Train a model and save it in the requested format."""
-  pretraining()
-  data = load_data()
-  model = create_model()
-  loss, optimizer = prepare_model(model)
-  train_model(model, loss, optimizer, data[0])
-  score_model(model, data[1])
-  save_model(model, model_format)
+    """Train a model and save it in the requested format."""
+    pretraining()
+    data = load_data()
+    model = create_model()
+    loss, optimizer = prepare_model(model)
+    train_model(model, loss, optimizer, data[0])
+    score_model(model, data[1])
+    save_model(model, model_format)
diff --git a/slsa_for_models/tensorflow_cifar10.py b/slsa_for_models/tensorflow_cifar10.py
index e86c343e..ce5dc15b 100644
--- a/slsa_for_models/tensorflow_cifar10.py
+++ b/slsa_for_models/tensorflow_cifar10.py
@@ -21,137 +21,139 @@
 
 
 def pretraining():
-  """Perform setup required before training.
+    """Perform setup required before training.
 
-  Does the lazy loading of TensorFlow too, to prevent compatibility issues with
-  mixing TensorFlow and PyTorch imports.
-  """
-  global tf
-  global tfds
-  import tensorflow as tf
-  import tensorflow_datasets as tfds
-  # Also compile model using XLA for ~20% performance gain
-  tf.config.optimizer.set_jit(True)
+    Does the lazy loading of TensorFlow too, to prevent compatibility issues
+    with mixing TensorFlow and PyTorch imports.
+    """
+    global tf
+    global tfds
+    import tensorflow as tf
+    import tensorflow_datasets as tfds
+    # Also compile model using XLA for ~20% performance gain
+    tf.config.optimizer.set_jit(True)
 
 
 def load_data():
-  """Load the CIFAR10 data.
+    """Load the CIFAR10 data.
 
-  Obtains both the train and the test splits. According to
-  https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training
-  images and 10000 test ones. Each image is 32x32 RGB.
+    Obtains both the train and the test splits. According to
+    https://www.cs.toronto.edu/~kriz/cifar.html, there should be 50000 training
+    images and 10000 test ones. Each image is 32x32 RGB.
 
-  Data is normalized to be in [0, 1). Labels are one-hot encoded.
+    Data is normalized to be in [0, 1). Labels are one-hot encoded.
 
-  Returns train and test pairs. Each pair consists of features and labels
-  vectors of similar size.
-  """
-  result = tfds.load('cifar10', batch_size = -1)
-  x_train = result['train']['image']
-  y_train = result['train']['label']
-  x_test = result['test']['image']
-  y_test = result['test']['label']
+    Returns train and test pairs. Each pair consists of features and labels
+    vectors of similar size.
+    """
+    result = tfds.load('cifar10', batch_size=-1)
+    x_train = result['train']['image']
+    y_train = result['train']['label']
+    x_test = result['test']['image']
+    y_test = result['test']['label']
 
-  # transform input
-  x_train = x_train.numpy().astype('float32') / 256
-  x_test = x_test.numpy().astype('float32') / 256
-  y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
-  y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)
+    # transform input
+    x_train = x_train.numpy().astype('float32') / 256
+    x_test = x_test.numpy().astype('float32') / 256
+    y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
+    y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)
 
-  return (x_train, y_train), (x_test, y_test)
+    return (x_train, y_train), (x_test, y_test)
 
 
 def create_model(in_shape):
-  """Create a TensorFlow NN model.
-
-  The model is taken from the tutorial at
-  https://www.tensorflow.org/xla/tutorials/autoclustering_xla.
-
-  We need to pass as argument the expected input shape.
-
-  Returns the model.
-  """
-  x, _, c = in_shape
-  return tf.keras.models.Sequential([
-      tf.keras.layers.Conv2D(x, (c, c), padding='same', input_shape=in_shape),
-      tf.keras.layers.Activation('relu'),
-      tf.keras.layers.Conv2D(x, (c, c)),
-      tf.keras.layers.Activation('relu'),
-      tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
-      tf.keras.layers.Dropout(0.25),
-      tf.keras.layers.Conv2D(2*x, (c, c), padding='same'),
-      tf.keras.layers.Activation('relu'),
-      tf.keras.layers.Conv2D(2*x, (c, c)),
-      tf.keras.layers.Activation('relu'),
-      tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
-      tf.keras.layers.Dropout(0.25),
-      tf.keras.layers.Flatten(),
-      tf.keras.layers.Dense(512),
-      tf.keras.layers.Activation('relu'),
-      tf.keras.layers.Dropout(0.5),
-      tf.keras.layers.Dense(10),
-      tf.keras.layers.Activation('softmax'),
-  ])
+    """Create a TensorFlow NN model.
+
+    The model is taken from the tutorial at
+    https://www.tensorflow.org/xla/tutorials/autoclustering_xla.
+
+    We need to pass as argument the expected input shape.
+
+    Returns the model.
+    """
+    x, _, c = in_shape
+    return tf.keras.models.Sequential([
+        tf.keras.layers.Conv2D(x, (c, c), padding='same',
+                               input_shape=in_shape),
+        tf.keras.layers.Activation('relu'),
+        tf.keras.layers.Conv2D(x, (c, c)),
+        tf.keras.layers.Activation('relu'),
+        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
+        tf.keras.layers.Dropout(0.25),
+        tf.keras.layers.Conv2D(2*x, (c, c), padding='same'),
+        tf.keras.layers.Activation('relu'),
+        tf.keras.layers.Conv2D(2*x, (c, c)),
+        tf.keras.layers.Activation('relu'),
+        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
+        tf.keras.layers.Dropout(0.25),
+        tf.keras.layers.Flatten(),
+        tf.keras.layers.Dense(512),
+        tf.keras.layers.Activation('relu'),
+        tf.keras.layers.Dropout(0.5),
+        tf.keras.layers.Dense(10),
+        tf.keras.layers.Activation('softmax'),
+    ])
 
 
 def prepare_model(model):
-  """Prepare model for training with loss and optimizer."""
-  opt = tf.keras.optimizers.RMSprop(learning_rate=0.0001)
-  model.compile(loss='categorical_crossentropy',
-                optimizer=opt,
-                metrics=['accuracy'])
-  return model
+    """Prepare model for training with loss and optimizer."""
+    opt = tf.keras.optimizers.RMSprop(learning_rate=0.0001)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=opt,
+                  metrics=['accuracy'])
+    return model
 
 
 def train_model(model, train, test):
-  """Train a model on the training set.
+    """Train a model on the training set.
 
-  The test set is used for cross validation.
-  """
-  x, y = train
-  model.fit(x, y, batch_size=256, epochs=16,
-            validation_data=test, shuffle=True)
+    The test set is used for cross validation.
+    """
+    x, y = train
+    model.fit(x, y, batch_size=256, epochs=16,
+              validation_data=test, shuffle=True)
 
 
 def score_model(model, test):
-  """Score a trained model on the test set."""
-  x, y = test
-  scores = model.evaluate(x, y, verbose=1)
-  print(f'Test loss: {scores[0]}')
-  print(f'Test accuracy: {scores[1]}')
+    """Score a trained model on the test set."""
+    x, y = test
+    scores = model.evaluate(x, y, verbose=1)
+    print(f'Test loss: {scores[0]}')
+    print(f'Test accuracy: {scores[1]}')
 
 
 def supported_models():
-  """Returns supported model types paired with method to save them."""
-  return {
-      # New Keras format
-      'tensorflow_model.keras': lambda m, p: m.save(p, save_format='keras'),
-      # TF SavedModel formats, full model and weights only
-      'tensorflow_saved_model': lambda m, p: m.save(p, save_format='tf'),
-      'tensorflow_exported_model': lambda m, p: m.export(p),
-      # Legacy HDFS format, full model and weights only
-      'tensorflow_hdf5_model.h5': lambda m, p: m.save(p, save_format='h5'),
-      'tensorflow_hdf5.weights.h5': lambda m, p: m.save_weights(p),
-  }
+    """Returns supported model types paired with method to save them."""
+    return {
+        # New Keras format
+        'tensorflow_model.keras': lambda m, p: m.save(p, save_format='keras'),
+        # TF SavedModel formats, full model and weights only
+        'tensorflow_saved_model': lambda m, p: m.save(p, save_format='tf'),
+        'tensorflow_exported_model': lambda m, p: m.export(p),
+        # Legacy HDFS format, full model and weights only
+        'tensorflow_hdf5_model.h5': lambda m, p: m.save(p, save_format='h5'),
+        'tensorflow_hdf5.weights.h5': lambda m, p: m.save_weights(p),
+    }
 
 
 def save_model(model, model_format):
-  """Save the model after training to be transferred to production.
+    """Save the model after training to be transferred to production.
 
-  Saves in the requested format, if supported by TensorFlow.
-  """
-  saver = supported_models().get(model_format, None)
-  if not saver:
-    raise ValueError('Requested a model format not supported by TensorFlow')
-  saver(model, './' + model_format)
+    Saves in the requested format, if supported by TensorFlow.
+    """
+    saver = supported_models().get(model_format, None)
+    if not saver:
+        raise ValueError(
+            'Requested a model format not supported by TensorFlow')
+    saver(model, './' + model_format)
 
 
 def model_pipeline(model_format):
-  """Train a model and save it in the requested format."""
-  pretraining()
-  data = load_data()
-  model = create_model(data[0][0].shape[1:])
-  model = prepare_model(model)
-  train_model(model, data[0], data[1])
-  score_model(model, data[1])
-  save_model(model, model_format)
+    """Train a model and save it in the requested format."""
+    pretraining()
+    data = load_data()
+    model = create_model(data[0][0].shape[1:])
+    model = prepare_model(model)
+    train_model(model, data[0], data[1])
+    score_model(model, data[1])
+    save_model(model, model_format)