diff --git a/docs/_tutorials/advanced-install.md b/docs/_tutorials/advanced-install.md
index b1c54325c3a7..a1493a9e9e24 100755
--- a/docs/_tutorials/advanced-install.md
+++ b/docs/_tutorials/advanced-install.md
@@ -11,10 +11,6 @@ just-in-time (JIT) using [torch's JIT C++ extension loader that relies on
 ninja](https://pytorch.org/docs/stable/cpp_extension.html) to build and
 dynamically link them at runtime.
 
-**Note:** [PyTorch](https://pytorch.org/) must be installed _before_ installing
-DeepSpeed.
-{: .notice--info}
-
 ```bash
 pip install deepspeed
 ```
@@ -30,6 +26,9 @@ ds_report
 
 ## Pre-install DeepSpeed Ops
 
+**Note:** [PyTorch](https://pytorch.org/) must be installed _before_ pre-compiling any DeepSpeed c++/cuda ops. However, this is not required if using the default mode of JIT compilition of ops.
+{: .notice--info}
+
 Sometimes we have found it useful to pre-install either some or all DeepSpeed
 C++/CUDA ops instead of using the JIT compiled path. In order to support
 pre-installation we introduce build environment flags to turn on/off building
diff --git a/op_builder/builder.py b/op_builder/builder.py
index 97ae936c68bf..21547f896473 100644
--- a/op_builder/builder.py
+++ b/op_builder/builder.py
@@ -4,7 +4,6 @@
 import os
 import sys
 import time
-import torch
 import importlib
 from pathlib import Path
 import subprocess
@@ -17,6 +16,13 @@
 DEFAULT_TORCH_EXTENSION_PATH = "/tmp/torch_extensions"
 DEFAULT_COMPUTE_CAPABILITIES = "6.0;6.1;7.0"
 
+try:
+    import torch
+except ImportError:
+    print(
+        f"{WARNING} unable to import torch, please install it if you want to pre-compile any deepspeed ops."
+    )
+
 
 def installed_cuda_version():
     import torch.utils.cpp_extension
diff --git a/op_builder/cpu_adam.py b/op_builder/cpu_adam.py
index b93f27b81709..129ddeea3a29 100644
--- a/op_builder/cpu_adam.py
+++ b/op_builder/cpu_adam.py
@@ -3,7 +3,6 @@
 """
 import os
 import sys
-import torch
 import subprocess
 from .builder import CUDAOpBuilder
 
@@ -26,6 +25,7 @@ def sources(self):
         return ['csrc/adam/cpu_adam.cpp', 'csrc/adam/custom_cuda_kernel.cu']
 
     def include_paths(self):
+        import torch
         CUDA_INCLUDE = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include")
         return ['csrc/includes', CUDA_INCLUDE]
 
@@ -47,6 +47,7 @@ def simd_width(self):
         return '-D__SCALAR__'
 
     def cxx_args(self):
+        import torch
         CUDA_LIB64 = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "lib64")
         SIMD_WIDTH = self.simd_width()
 
diff --git a/op_builder/fused_adam.py b/op_builder/fused_adam.py
index 0340ed02a8fb..c9a0d4436d01 100644
--- a/op_builder/fused_adam.py
+++ b/op_builder/fused_adam.py
@@ -1,7 +1,6 @@
 """
 Copyright 2020 The Microsoft DeepSpeed Team
 """
-import torch
 from .builder import CUDAOpBuilder
 
 
diff --git a/op_builder/fused_lamb.py b/op_builder/fused_lamb.py
index 4c73ecb404e3..169654809d06 100644
--- a/op_builder/fused_lamb.py
+++ b/op_builder/fused_lamb.py
@@ -1,7 +1,6 @@
 """
 Copyright 2020 The Microsoft DeepSpeed Team
 """
-import torch
 from .builder import CUDAOpBuilder
 
 
diff --git a/op_builder/quantizer.py b/op_builder/quantizer.py
index b3d6b2fd4509..43bc5778ea20 100644
--- a/op_builder/quantizer.py
+++ b/op_builder/quantizer.py
@@ -1,4 +1,3 @@
-import torch
 from .builder import CUDAOpBuilder
 
 
diff --git a/op_builder/sparse_attn.py b/op_builder/sparse_attn.py
index 58b8ca4f0e6b..6f30cc84da23 100644
--- a/op_builder/sparse_attn.py
+++ b/op_builder/sparse_attn.py
@@ -1,7 +1,6 @@
 """
 Copyright 2020 The Microsoft DeepSpeed Team
 """
-import torch
 import warnings
 from .builder import OpBuilder
 
@@ -28,6 +27,12 @@ def is_compatible(self):
         #command_status = list(map(self.command_exists, required_commands))
         #deps_compatible = all(command_status)
 
+        try:
+            import torch
+        except ImportError:
+            self.warning(f"unable to import torch, please install it first")
+            return False
+
         # torch-cpu will not have a cuda version
         if torch.version.cuda is None:
             cuda_compatible = False
diff --git a/op_builder/stochastic_transformer.py b/op_builder/stochastic_transformer.py
index b7e2f3845117..aa47c13c49e4 100644
--- a/op_builder/stochastic_transformer.py
+++ b/op_builder/stochastic_transformer.py
@@ -1,7 +1,6 @@
 """
 Copyright 2020 The Microsoft DeepSpeed Team
 """
-import torch
 from .transformer import TransformerBuilder
 
 
diff --git a/op_builder/transformer.py b/op_builder/transformer.py
index 82ab26f7f25c..2d48e2421b82 100644
--- a/op_builder/transformer.py
+++ b/op_builder/transformer.py
@@ -1,7 +1,6 @@
 """
 Copyright 2020 The Microsoft DeepSpeed Team
 """
-import torch
 from .builder import CUDAOpBuilder
 
 
diff --git a/op_builder/transformer_inference.py b/op_builder/transformer_inference.py
index cf5e870c906c..94db63711914 100755
--- a/op_builder/transformer_inference.py
+++ b/op_builder/transformer_inference.py
@@ -1,4 +1,3 @@
-import torch
 from .builder import CUDAOpBuilder
 
 
diff --git a/setup.py b/setup.py
index 654b983eec81..2424fdc6c46a 100755
--- a/setup.py
+++ b/setup.py
@@ -22,12 +22,14 @@
 from setuptools import setup, find_packages
 import time
 
+torch_available = True
 try:
     import torch
     from torch.utils.cpp_extension import BuildExtension
 except ImportError:
-    raise ImportError('Unable to import torch, please visit https://pytorch.org/ '
-                      'to see how to properly install torch on your system.')
+    torch_available = False
+    print('[WARNING] Unable to import torch, pre-compiling ops will be disabled. ' \
+        'Please visit https://pytorch.org/ to see how to properly install torch on your system.')
 
 from op_builder import ALL_OPS, get_default_compute_capatabilities
 
@@ -45,7 +47,7 @@ def fetch_requirements(path):
 }
 
 # If MPI is available add 1bit-adam requirements
-if torch.cuda.is_available():
+if torch_available and torch.cuda.is_available():
     if shutil.which('ompi_info') or shutil.which('mpiname'):
         cupy = f"cupy-cuda{torch.version.cuda.replace('.','')[:3]}"
         extras_require['1bit_adam'].append(cupy)
@@ -60,12 +62,17 @@ def fetch_requirements(path):
 cmdclass = {}
 
 # For any pre-installed ops force disable ninja
-cmdclass['build_ext'] = BuildExtension.with_options(use_ninja=False)
+if torch_available:
+    cmdclass['build_ext'] = BuildExtension.with_options(use_ninja=False)
 
-TORCH_MAJOR = torch.__version__.split('.')[0]
-TORCH_MINOR = torch.__version__.split('.')[1]
+if torch_available:
+    TORCH_MAJOR = torch.__version__.split('.')[0]
+    TORCH_MINOR = torch.__version__.split('.')[1]
+else:
+    TORCH_MAJOR = "0"
+    TORCH_MINOR = "0"
 
-if not torch.cuda.is_available():
+if torch_available and not torch.cuda.is_available():
     # Fix to allow docker builds, similar to https://github.com/NVIDIA/apex/issues/486
     print(
         "[WARNING] Torch did not find cuda available, if cross-compiling or running with cpu only "
@@ -81,6 +88,9 @@ def fetch_requirements(path):
 BUILD_OP_DEFAULT = int(os.environ.get('DS_BUILD_OPS', BUILD_OP_PLATFORM))
 print(f"DS_BUILD_OPS={BUILD_OP_DEFAULT}")
 
+if BUILD_OP_DEFAULT:
+    assert torch_available, "Unable to pre-compile ops without torch installed. Please install torch before attempting to pre-compile ops."
+
 
 def command_exists(cmd):
     if sys.platform == "win32":
@@ -109,6 +119,7 @@ def op_enabled(op_name):
 
     # If op install enabled, add builder to extensions
     if op_enabled(op_name) and op_compatible:
+        assert torch_available, f"Unable to pre-compile {op_name}, please first install torch"
         install_ops[op_name] = op_enabled(op_name)
         ext_modules.append(builder.builder())
 
@@ -170,7 +181,7 @@ def create_dir_symlink(src, dest):
 torch_version = ".".join([TORCH_MAJOR, TORCH_MINOR])
 # Set cuda_version to 0.0 if cpu-only
 cuda_version = "0.0"
-if torch.version.cuda is not None:
+if torch_available and torch.version.cuda is not None:
     cuda_version = ".".join(torch.version.cuda.split('.')[:2])
 torch_info = {"version": torch_version, "cuda_version": cuda_version}