ROCm
diff --git a/‎configure.py‎
Lines changed: 1 addition & 1 deletion b/‎configure.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorflow/compiler/jit/BUILD‎
Lines changed: 12 additions & 2 deletions b/‎tensorflow/compiler/jit/BUILD‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎tensorflow/compiler/jit/kernels/xla_launch_op.cc‎
Lines changed: 3 additions & 1 deletion b/‎tensorflow/compiler/jit/kernels/xla_launch_op.cc‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tensorflow/compiler/jit/xla_gpu_device.cc‎
Lines changed: 3 additions & 1 deletion b/‎tensorflow/compiler/jit/xla_gpu_device.cc‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tensorflow/compiler/tf2xla/BUILD‎
Lines changed: 3 additions & 0 deletions b/‎tensorflow/compiler/tf2xla/BUILD‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tensorflow/compiler/xla/service/BUILD‎
Lines changed: 1 addition & 0 deletions b/‎tensorflow/compiler/xla/service/BUILD‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow/compiler/xla/service/computation_placer.cc‎
Lines changed: 2 additions & 0 deletions b/‎tensorflow/compiler/xla/service/computation_placer.cc‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tensorflow/compiler/xla/service/gpu/BUILD‎
Lines changed: 15 additions & 4 deletions b/‎tensorflow/compiler/xla/service/gpu/BUILD‎
Lines changed: 15 additions & 4 deletions
@@ -1493,7 +1493,7 @@ def main():
   set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform',
                 'with_kafka_support', False, 'kafka')
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
-                False, 'xla')
+                True, 'xla')
   set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support',
                 False, 'gdr')
   set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
 
@@ -25,6 +25,8 @@ load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
 
 # Target that bundles up the XLA CPU and GPU JIT devices.
 cc_library(
@@ -40,6 +42,9 @@ cc_library(
     ] + if_cuda_is_configured([
         ":xla_gpu_device",
         ":xla_gpu_jit",
+    ]) + if_rocm_is_configured([
+        ":xla_gpu_device",
+        ":xla_gpu_jit",
     ]),
     alwayslink = 1,
 )
@@ -59,12 +64,17 @@ cc_library(
 cc_library(
     name = "xla_gpu_jit",
     visibility = ["//visibility:public"],
-    deps = if_cuda([
+    deps = if_cuda_is_configured(if_cuda([
         ":jit_compilation_passes",
         "//tensorflow/compiler/jit/kernels:xla_launch_op",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/compiler/xla/service:gpu_plugin",
-    ]),
+    ])) + if_rocm_is_configured(if_rocm([
+        ":jit_compilation_passes",
+        "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
+        "//tensorflow/compiler/xla/service:gpu_plugin",
+    ])),
     alwayslink = 1,
 )
 
 
@@ -51,7 +51,9 @@ XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx)
   if (device_type_ == DeviceType(DEVICE_CPU)) {
     platform_id_ = se::host::kHostPlatformId;
   } else if (device_type_ == DeviceType(DEVICE_GPU)) {
-    platform_id_ = se::cuda::kCudaPlatformId;
+    // XXX FIXME devise a way to cope with multiple platforms
+    //platform_id_ = se::cuda::kCudaPlatformId;
+    platform_id_ = se::rocm::kROCmPlatformId;
   } else {
     platform_id_ = nullptr;
   }
 
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 // Registers the XLA_GPU device, which is an XlaDevice instantiation that runs
-// operators using XLA via the XLA "CUDA" (GPU) backend.
+// operators using XLA via the XLA "CUDA" or "ROCM" (GPU) backend.
 
 #include "tensorflow/compiler/jit/kernels/xla_launch_op.h"
 #include "tensorflow/compiler/jit/xla_device.h"
@@ -46,6 +46,8 @@ Status XlaGpuDeviceFactory::CreateDevices(const SessionOptions& options,
 
   std::unique_ptr<XlaDevice> device;
   Status status =
+      // XXX FIXME devise a way to cope with multiple platforms
+      //XlaDevice::Create("CUDA", DEVICE_XLA_GPU, 0, DEVICE_GPU_XLA_JIT, options,
       XlaDevice::Create("CUDA", DEVICE_XLA_GPU, 0, DEVICE_GPU_XLA_JIT, options,
                         name_prefix, registration,
                         /*transfer_as_literal=*/false, &device);
 
@@ -23,6 +23,7 @@ package(
 )
 
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
 load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library")
 
 cc_library(
@@ -141,6 +142,8 @@ cc_library(
         "xla_cpu_backend.cc",
     ] + if_cuda_is_configured([
         "xla_gpu_backend.cc",
+    ]) + if_rocm_is_configured([
+        "xla_gpu_backend.cc",
     ]),
     hdrs = [
         "const_analysis.h",
 
@@ -711,6 +711,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/gpu:gpu_transfer_manager",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core/platform/default/build_config:stream_executor_cuda",
+        "//tensorflow/core/platform/default/build_config:stream_executor_rocm",
     ],
 )
 
 
@@ -147,6 +147,8 @@ static bool InitModule() {
       stream_executor::host::kHostPlatformId, &CreateComputationPlacer);
   xla::ComputationPlacer::RegisterComputationPlacer(
       stream_executor::cuda::kCudaPlatformId, &CreateComputationPlacer);
+  xla::ComputationPlacer::RegisterComputationPlacer(
+      stream_executor::rocm::kROCmPlatformId, &CreateComputationPlacer);
   return true;
 }
 static bool module_initialized = InitModule();
@@ -22,6 +22,10 @@ filegroup(
 )
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
 
 cc_library(
     name = "gpu_constants",
@@ -190,6 +194,7 @@ cc_library(
     srcs = ["elemental_ir_emitter.cc"],
     hdrs = ["elemental_ir_emitter.h"],
     deps = [
+        ":ir_emission_utils",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
@@ -246,7 +251,8 @@ cc_library(
         "thunk_schedule.cc",
         "tuple_thunk.cc",
         "while_thunk.cc",
-    ],
+    ] + if_cuda_is_configured(if_cuda(["nvptx_executable.cc"])) +
+        if_rocm_is_configured(if_rocm(["amdgpu_executable.cc"])),
     hdrs = [
         "conditional_thunk.h",
         "convolution_thunk.h",
@@ -264,7 +270,8 @@ cc_library(
         "thunk_schedule.h",
         "tuple_thunk.h",
         "while_thunk.h",
-    ],
+    ] + if_cuda_is_configured(if_cuda(["nvptx_executable.h"])) +
+        if_rocm_is_configured(if_rocm(["amdgpu_executable.h"])),
     deps = [
         ":buffer_allocations",
         ":cudnn_convolution_runner",
@@ -296,6 +303,7 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:cudnn_plugin",
         "//tensorflow/core/platform/default/build_config:cufft_plugin",
         "//tensorflow/core/platform/default/build_config:stream_executor_cuda",  # build_cleaner: keep
+        "//tensorflow/core/platform/default/build_config:stream_executor_rocm",
         "//tensorflow/stream_executor",
     ],
 )
@@ -490,8 +498,10 @@ cc_library(
 
 cc_library(
     name = "gpu_compiler",
-    srcs = ["gpu_compiler.cc"],
-    hdrs = ["gpu_compiler.h"],
+    srcs = if_cuda_is_configured(if_cuda(["nvptx_compiler.cc"])) +
+           if_rocm_is_configured(if_rocm(["amdgpu_compiler.cc"])),
+    hdrs = if_cuda_is_configured(if_cuda(["nvptx_compiler.h"])) +
+           if_rocm_is_configured(if_rocm(["amdgpu_compiler.h"])),
     deps = [
         ":cudnn_convolution_algorithm_picker",
         ":cudnn_convolution_rewriter",
@@ -545,6 +555,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:cuda_libdevice_path",
+        "//tensorflow/core:rocm_rocdl_path",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:regexp_internal",
Original file line number	Diff line number	Diff line change
`@@ -711,6 +711,7 @@ cc_library(`
`711`	`711`	`"//tensorflow/compiler/xla/service/gpu:gpu_transfer_manager",`
`712`	`712`	`"//tensorflow/core:stream_executor_no_cuda",`
`713`	`713`	`"//tensorflow/core/platform/default/build_config:stream_executor_cuda",`
	`714`	`+ "//tensorflow/core/platform/default/build_config:stream_executor_rocm",`
`714`	`715`	`],`
`715`	`716`	`)`
`716`	`717`
Original file line number	Diff line number	Diff line change
`@@ -147,6 +147,8 @@ static bool InitModule() {`
`147`	`147`	`stream_executor::host::kHostPlatformId, &CreateComputationPlacer);`
`148`	`148`	`xla::ComputationPlacer::RegisterComputationPlacer(`
`149`	`149`	`stream_executor::cuda::kCudaPlatformId, &CreateComputationPlacer);`
	`150`	`+ xla::ComputationPlacer::RegisterComputationPlacer(`
	`151`	`+ stream_executor::rocm::kROCmPlatformId, &CreateComputationPlacer);`
`150`	`152`	`return true;`
`151`	`153`	`}`
`152`	`154`	`static bool module_initialized = InitModule();`