pytorch
diff --git a/‎.jenkins/validate_tutorials_built.py‎
Lines changed: 0 additions & 1 deletion b/‎.jenkins/validate_tutorials_built.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎beginner_source/dist_overview.rst‎
Lines changed: 59 additions & 186 deletions b/‎beginner_source/dist_overview.rst‎
Lines changed: 59 additions & 186 deletions
diff --git a/‎en-wordlist.txt‎
Lines changed: 6 additions & 0 deletions b/‎en-wordlist.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎intermediate_source/torch_compile_tutorial.py‎
Lines changed: 1 addition & 1 deletion b/‎intermediate_source/torch_compile_tutorial.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes_source/loading_data_recipe.rst‎
Lines changed: 8 additions & 0 deletions b/‎recipes_source/loading_data_recipe.rst‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎recipes_source/recipes/README.txt‎
Lines changed: 14 additions & 18 deletions b/‎recipes_source/recipes/README.txt‎
Lines changed: 14 additions & 18 deletions
diff --git a/‎recipes_source/recipes/loading_data_recipe.py‎
Lines changed: 0 additions & 163 deletions b/‎recipes_source/recipes/loading_data_recipe.py‎
Lines changed: 0 additions & 163 deletions
diff --git a/‎recipes_source/recipes/tuning_guide.py‎
Lines changed: 32 additions & 0 deletions b/‎recipes_source/recipes/tuning_guide.py‎
Lines changed: 32 additions & 0 deletions
@@ -37,7 +37,6 @@
     "prototype_source/nestedtensor",
     "recipes_source/recipes/saving_and_loading_models_for_inference",
     "recipes_source/recipes/saving_multiple_models_in_one_file",
-    "recipes_source/recipes/loading_data_recipe",
     "recipes_source/recipes/tensorboard_with_pytorch",
     "recipes_source/recipes/what_is_state_dict",
     "recipes_source/recipes/profiler_recipe",
 
@@ -335,6 +335,7 @@ dataset’s
 deallocation
 decompositions
 decorrelated
+devicemesh
 deserialize
 deserialized
 desynchronization
@@ -346,6 +347,7 @@ distractor
 downsample
 downsamples
 dropdown
+dtensor
 duration
 elementwise
 embeddings
@@ -482,6 +484,7 @@ prespecified
 pretrained
 prewritten
 primals
+processgroup
 profiler
 profilers
 protobuf
@@ -503,6 +506,7 @@ relu
 reproducibility
 rescale
 rescaling
+reshard
 resnet
 restride
 rewinded
@@ -515,6 +519,8 @@ runtime
 runtime
 runtimes
 scalable
+sharded
+Sharding
 softmax
 sparsified
 sparsifier
 
@@ -135,7 +135,7 @@ def init_model():
 ######################################################################
 # First, let's compare inference.
 #
-# Note that in the call to ``torch.compile``, we have have the additional
+# Note that in the call to ``torch.compile``, we have the additional
 # ``mode`` argument, which we will discuss below.
 
 model = init_model()
 
@@ -0,0 +1,8 @@
+Loading data in PyTorch
+=======================
+
+The content is deprecated. See `Datasets & DataLoaders <https://pytorch.org/tutorials/beginner/basics/data_tutorial.html>`__ instead.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="1; url='https://pytorch.org/tutorials/beginner/basics/data_tutorial.html'" />
@@ -1,62 +1,58 @@
 PyTorch Recipes
 ---------------------------------------------
-1. loading_data_recipe.py
-	 Loading Data in PyTorch
-         https://pytorch.org/tutorials/recipes/recipes/loading_data_recipe.html
-
-2. defining_a_neural_network.py
+1. defining_a_neural_network.py
 	 Defining a Neural Network in PyTorch
          https://pytorch.org/tutorials/recipes/recipes/defining_a_neural_network.html
 
-3. what_is_state_dict.py
+2. what_is_state_dict.py
 	 What is a state_dict in PyTorch
          https://pytorch.org/tutorials/recipes/recipes/what_is_state_dict.html
 
-4. saving_and_loading_models_for_inference.py
+3. saving_and_loading_models_for_inference.py
 	 Saving and loading models for inference in PyTorch
          https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_models_for_inference.html
 
-5. custom_dataset_transforms_loader.py 
+4. custom_dataset_transforms_loader.py
 	 Developing Custom PyTorch Dataloaders
          https://pytorch.org/tutorials/recipes/recipes/custom_dataset_transforms_loader.html
 
 
-6. Captum_Recipe.py 
+5. Captum_Recipe.py
 	 Model Interpretability using Captum
          https://pytorch.org/tutorials/recipes/recipes/Captum_Recipe.html
 
-7. dynamic_quantization.py
+6. dynamic_quantization.py
          Dynamic Quantization
          https://pytorch.org/tutorials/recipes/recipes/dynamic_quantization.html
 
-8. save_load_across_devices.py
+7. save_load_across_devices.py
          Saving and loading models across devices in PyTorch
          https://pytorch.org/tutorials/recipes/recipes/save_load_across_devices.html
 
-9. saving_and_loading_a_general_checkpoint.py
+8. saving_and_loading_a_general_checkpoint.py
          Saving and loading a general checkpoint in PyTorch
          https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
 
-10. saving_and_loading_models_for_inference.py
+9. saving_and_loading_models_for_inference.py
          Saving and loading models for inference in PyTorch
          https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_models_for_inference.html
 
-11. saving_multiple_models_in_one_file.py
+10. saving_multiple_models_in_one_file.py
          Saving and loading multiple models in one file using PyTorch
          https://pytorch.org/tutorials/recipes/recipes/saving_multiple_models_in_one_file.html
 
-12. warmstarting_model_using_parameters_from_a_different_model.py 
+11. warmstarting_model_using_parameters_from_a_different_model.py
          Warmstarting models using parameters from different model
          https://pytorch.org/tutorials/recipes/recipes/warmstarting_model_using_parameters_from_a_different_model.html
 
-13. zeroing_out_gradients.py
+12. zeroing_out_gradients.py
          Zeroing out gradients
          https://pytorch.org/tutorials/recipes/recipes/zeroing_out_gradients.html
 
-14. mobile_perf.py
+13. mobile_perf.py
          PyTorch Mobile Performance Recipes
          https://pytorch.org/tutorials/recipes/mobile_perf.html
 
-15. amp_recipe.py
+14. amp_recipe.py
          Automatic Mixed Precision
          https://pytorch.org/tutorials/recipes/amp_recipe.html
@@ -213,6 +213,7 @@ def gelu(x):
 
 ###############################################################################
 # Typically, the following environment variables are used to set for CPU affinity with GNU OpenMP implementation. ``OMP_PROC_BIND`` specifies whether threads may be moved between processors. Setting it to CLOSE keeps OpenMP threads close to the primary thread in contiguous place partitions. ``OMP_SCHEDULE`` determines how OpenMP threads are scheduled. ``GOMP_CPU_AFFINITY`` binds threads to specific CPUs.
+# An important tuning parameter is core pinning which prevent the threads of migrating between multiple CPUs, enhancing data location and minimizing inter core communication.
 #
 # .. code-block:: sh
 #
@@ -318,6 +319,37 @@ def gelu(x):
 # GPU specific optimizations
 # --------------------------
 
+###############################################################################
+# Enable Tensor cores
+# ~~~~~~~~~~~~~~~~~~~~~~~
+# Tensor cores are specialized hardware designed to compute matrix-matrix multiplication
+# operations, primarily utilized in deep learning and AI workloads. Tensor cores have
+# specific precision requirements which can be adjusted manually or via the Automatic
+# Mixed Precision API.
+#
+# In particular, tensor operations take advantage of lower precision workloads.
+# Which can be controlled via ``torch.set_float32_matmul_precision``.
+# The default format is set to 'highest,' which utilizes the tensor data type. 
+# However, PyTorch offers alternative precision settings: 'high' and 'medium.'
+# These options prioritize computational speed over numerical precision."
+
+###############################################################################
+# Use CUDA Graphs
+# ~~~~~~~~~~~~~~~~~~~~~~~
+# At the time of using a GPU, work first must be launched from the CPU and
+# in some cases the context switch between CPU and GPU can lead to bad resource
+# utilization. CUDA graphs are a way to keep computation within the GPU without
+# paying the extra cost of kernel launches and host synchronization.
+
+# It can be enabled using 
+torch.compile(m, "reduce-overhead")
+# or
+torch.compile(m, "max-autotune")
+
+###############################################################################
+# Support for CUDA graph is in development, and its usage can incur in increased
+# device memory consumption and some models might not compile.
+
 ###############################################################################
 # Enable cuDNN auto-tuner
 # ~~~~~~~~~~~~~~~~~~~~~~~
Original file line number	Diff line number	Diff line change
`@@ -135,7 +135,7 @@ def init_model():`
`135`	`135`	`######################################################################`
`136`	`136`	`# First, let's compare inference.`
`137`	`137`	`#`
`138`		-# Note that in the call to ``torch.compile``, we have have the additional
	`138`	+# Note that in the call to ``torch.compile``, we have the additional
`139`	`139`	# ``mode`` argument, which we will discuss below.
`140`	`140`
`141`	`141`	`model = init_model()`