sethjuarez · cassiebreviu · Nov 10, 2020 · Nov 10, 2020 · Nov 10, 2020
diff --git a/...tic/img/quickstart/optimization_loops.PNG → _static/img/quickstart/optimizationloops.png b/...tic/img/quickstart/optimization_loops.PNG → _static/img/quickstart/optimizationloops.png
diff --git a/_static/img/quickstart/typesofdata.PNG → _static/img/quickstart/typesdata.png b/_static/img/quickstart/typesofdata.PNG → _static/img/quickstart/typesdata.png
diff --git a/beginner_source/quickstart/build_model_tutorial.py b/beginner_source/quickstart/build_model_tutorial.py
@@ -114,15 +114,7 @@ def forward(self, x):
 #
 # From the docs:
 # 
-# torch.nn.Linear(in_features: int, out_features: int, bias: bool = True)
-# 
-# in_features – size of each input sample
-#
-# out_features – size of each output sample
-#
-# bias – If set to False, the layer will not learn an additive bias. Default: True
-#
-# Lets take a look at the resulting data example with the flatten layer and linear layer added:
+# `torch.nn.Linear(in_features: int, out_features: int, bias: bool = True)`
 #
 
 input = training_data[0][0]

diff --git a/beginner_source/quickstart/data_quickstart_tutorial.py b/beginner_source/quickstart/data_quickstart_tutorial.py
@@ -11,8 +11,8 @@
 #
 
 ###############################################################
-# .. figure:: /_static/img/quickstart/typesofdata.png
-#    :alt: typesofdata
+# .. figure:: /_static/img/quickstart/typesdata.png
+#    :alt: typesdata
 # 
 
 ############################################################

diff --git a/beginner_source/quickstart/optimization_tutorial.py b/beginner_source/quickstart/optimization_tutorial.py
@@ -38,25 +38,29 @@
 # 
 # The optimziation loop is comprized of three main subloops in PyTorch. 
 #
-# .. figure:: /_static/img/quickstart/optimization_loops.png
+
+############################################################
+# .. figure:: /_static/img/quickstart/optimizationloops.png
 #    :alt:
 #
-#
+
+#############################################################
 #  1. The Train Loop -  Core loop iterates over all the epochs 
 #  2. The Validation Loop - Validate  loss after each weight parameter update and can be used to gauge hyper parameter performance and update them for the next batch. 
 #  3. The Test Loop - is used to evaluate our models performance after each epoch on traditional metrics to show how much our model is generalizing from the train and validation dataset to the test dataset it's never seen before.  
 # 
 
-for epoch in range(num_epochs): # Optimization Loop
+for epoch in range(num_epochs): 
+# Optimization Loop
     # Train loop over batches
-        model.train() # set model to train 
-        # Model Update Code
-        model.eval() # After exiting batch loop set model to eval to speed up evaluation and not track gradients (this is explained below) 
-        # Validation Loop
-        # - Put sample validation metric logging and hyperparameter update code here 
+    model.train() # set model to train 
+    # Model Update Code
+    model.eval() # After exiting batch loop set model to eval to speed up evaluation and not track gradients (this is explained below) 
+    # Validation Loop
+    # - Put sample validation metric logging and hyperparameter update code here 
     # After exiting train loop set model to eval to speed up evaluation and not track gradients (this is explained below) 
     # Test Loop
-         # - Put sample test metric logging and hyperparameter update code here 
+    # - Put sample test metric logging and hyperparameter update code here 
 
 ######################################################
 # Loss
@@ -67,40 +71,31 @@
 
 preds = model(inputs)
 loss = cost_function(preds, labels)
-
-######################################################
-# AutoGrad and Optimizer (We might want to split this when we go more in depth on autograd )
-# -----------------
-#
-# By default each tensor maintains a graph of every operation applied on it unless otherwise specified using the torch.no_grad() command. 
-#
-# `Autograd graph <https://discuss.pytorch.org/uploads/default/original/1X/c7e0a44b7bcebfb41315b56f8418ce37f0adbfeb.png>`_
-#
-# PyTorch uses this graph to automatically update parameters with respect to our models loss during training. This is done with one line loss.backwards(). Once we have our gradients the optimizer is used to propgate the gradients from the backwards command to update all the parameters in our model. 
-
-optimizer.zero_grad() # make sure previous gradients are cleared
-loss.backward() # calculates gradients with respect to loss
+# Make sure previous gradients are cleared
+optimizer.zero_grad() 
+# Calculates gradients with respect to loss
+loss.backward() 
 optimizer.step()
 
 ######################################################
-# The standard method for optimization is called Stochastic Gradient Descent, to learn more check out this awesome video by `3blue1brown <https://www.youtube.com/playlist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi>`_. There are many different optimizers and variations of this method in PyTorch such as ADAM and RMSProp that work better for different kinds of models, they are out side the scope of this Blitz, but can check out the full list of optimizers[here](https://pytorch.org/docs/stable/optim.html) 
+# The standard method for optimization is called Stochastic Gradient Descent, to learn more check out this awesome video by `3blue1brown <https://www.youtube.com/playlist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi>`_. There are many different optimizers and variations of this method in PyTorch such as ADAM and RMSProp that work better for different kinds of models, they are out side the scope of this Blitz, but can check out the full list of optimizers `here <https://pytorch.org/docs/stable/optim.html>`_ 
 
 ######################################################
 # Putting it all together lets look at a basic optimization loop
 # -----------------
 #
 # Initilize optimizer and example cost function
 # 
-# # For loop to iterate over epoch
-#     - Train loop over batches 
-#         - Set model to train mode
-#         - Calculate loss using 
-#         - clear optimizer gradient
-#         - loss.backword
-#         - optimizer step
-#         - Set model to evaluate mode and start validation loop
-#         - calculate validation loss and update optimizer hyper parameters
-#     - Set model to evaluate test loop
+# For loop to iterate over epoch
+#   - Train loop over batches 
+#       - Set model to train mode
+#       - Calculate loss using 
+#           - clear optimizer gradient
+#           - loss.backword
+#           - optimizer step
+#       - Set model to evaluate mode and start validation loop
+#           - calculate validation loss and update optimizer hyper parameters
+#       - Set model to evaluate test loop
 
 
 ##################################################################

diff --git a/beginner_source/quickstart/save_load_run_tutorial.py b/beginner_source/quickstart/save_load_run_tutorial.py
@@ -93,6 +93,7 @@
 ##################################################################
 # More help with the PyTorch Quickstart
 # ----------------------------------------
+#
 # | `Tensors <tensor_tutorial.html>`_
 # | `DataSets and DataLoaders <data_quickstart_tutorial.html>`_
 # | `Transformations <transforms_tutorial.html>`_

diff --git a/beginner_source/quickstart/tensor_tutorial.py b/beginner_source/quickstart/tensor_tutorial.py
@@ -25,7 +25,7 @@
 
 
 ######################################################################
-# ..note: When using CPU for computations, tensors converted from arrays
+# .. note:: When using CPU for computations, tensors converted from arrays
 # share the same memory for data. Thus, changing the underlying array will
 # also affect the tensor.
 # 
@@ -187,7 +187,7 @@
 
 
 ######################################################################
-# ..note: ``view`` is similar to ``reshape`` operation in NumPy. There
+# .. note:: ``view`` is similar to ``reshape`` operation in NumPy. There
 # is also a ``reshape`` method available in PyTorch, and it is more
 # powerful than ``view``, because it can also reshape non-contiguous
 # arrays by copying them to the new shape. However, in vast majority of

diff --git a/beginner_source/quickstart/transforms_tutorial.py b/beginner_source/quickstart/transforms_tutorial.py
@@ -61,7 +61,7 @@
 # For the feature transforms we have an array of transforms to process our image data for training. The first transform in the array is `transforms.ToTensor()` this is from class [torchvision.transforms.ToTensor](https://pytorch.org/docs/stable/torchvision/transforms.html#torchvision.transforms.ToTensor). We need to take our images and turn them into a tensor. (To learn more about Tensors check out [this]() resource.) The ToTensor() transformation is doing more than converting our image into a tensor. Its also normalizing our data for us by scaling the images to be between 0 and 1.
 #
 #
-# ..note: ToTensor only normalized image data that is in PIL mode of (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8. In the other cases, tensors are returned without scaling.
+# .. note:: ToTensor only normalized image data that is in PIL mode of (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8. In the other cases, tensors are returned without scaling.
 #
 #
 # Check out the other `TorchVision Transforms <https://pytorch.org/docs/stable/torchvision/transforms.html>`_