Skip to content

Commit

Permalink
Reverts 58dc89e
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 689884643
  • Loading branch information
klucke authored and tensorflower-gardener committed Oct 25, 2024
1 parent 7f37924 commit 2749345
Show file tree
Hide file tree
Showing 18 changed files with 68 additions and 185 deletions.
10 changes: 0 additions & 10 deletions third_party/xla/xla/client/executable_build_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,6 @@ se::DeviceMemoryAllocator* ExecutableBuildOptions::device_allocator() const {
return device_allocator_;
}

ExecutableBuildOptions& ExecutableBuildOptions::set_compute_stream(
se::Stream* stream) {
compute_stream_ = stream;
return *this;
}

se::Stream* ExecutableBuildOptions::compute_stream() const {
return compute_stream_;
}

ExecutableBuildOptions& ExecutableBuildOptions::set_device_ordinal(
int device_ordinal) {
CHECK_GE(device_ordinal, 0);
Expand Down
6 changes: 0 additions & 6 deletions third_party/xla/xla/client/executable_build_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ namespace stream_executor {

// Forward-declared to avoid StreamExecutor dependency.
class DeviceMemoryAllocator;
class Stream;

} // namespace stream_executor

Expand Down Expand Up @@ -92,10 +91,6 @@ class ExecutableBuildOptions {
se::DeviceMemoryAllocator* allocator);
se::DeviceMemoryAllocator* device_allocator() const;

// If set, this specifies a stream that can be used for autotuning.
ExecutableBuildOptions& set_compute_stream(se::Stream* stream);
se::Stream* compute_stream() const;

// The number of replicas of this computation that are to be executed.
// Defaults to 1.
int num_replicas() const { return num_replicas_; }
Expand Down Expand Up @@ -292,7 +287,6 @@ class ExecutableBuildOptions {
std::optional<CompilationEnvironments> comp_envs_;
std::optional<DebugOptions> debug_options_;
se::DeviceMemoryAllocator* device_allocator_ = nullptr;
se::Stream* compute_stream_ = nullptr;
int num_replicas_ = 1;
int num_partitions_ = 1;
bool use_spmd_partitioning_ = false;
Expand Down
2 changes: 0 additions & 2 deletions third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3482,8 +3482,6 @@ PjRtStreamExecutorClient::GetExecutableExtras(CompileOptions* options) {
build_options.set_device_ordinal(
addressable_devices.front()->local_hardware_id().value());
}
build_options.set_compute_stream(
device_state(build_options.device_ordinal()).compute_stream());
}
return extras;
}
Expand Down
1 change: 0 additions & 1 deletion third_party/xla/xla/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1457,7 +1457,6 @@ cc_library(
"//xla/hlo/ir:hlo_module_group",
"//xla/pjrt/distributed:key_value_store_interface",
"//xla/stream_executor:dnn",
"//xla/stream_executor:stream",
"//xla/stream_executor:stream_executor_h",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/status:statusor",
Expand Down
5 changes: 0 additions & 5 deletions third_party/xla/xla/service/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ limitations under the License.
#include "xla/service/executable.h"
#include "xla/service/hlo_module_config.h"
#include "xla/service/metrics_hook_interface.h"
#include "xla/stream_executor/stream.h"
#include "xla/stream_executor/stream_executor.h"
#include "tsl/platform/protobuf.h"
#include "tsl/platform/threadpool.h"
Expand Down Expand Up @@ -159,10 +158,6 @@ class Compiler {
std::optional<TargetConfig> target_config;

MultiProcessKeyValueStore key_value_store;

// If compute_stream is set, this is the stream used for all autotuning
// during compilation.
se::Stream* compute_stream = nullptr;
};

virtual ~Compiler() = default;
Expand Down
7 changes: 1 addition & 6 deletions third_party/xla/xla/service/gpu/autotuning/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ xla_test(
"//xla/stream_executor:device_description",
"//xla/stream_executor:device_description_proto_cc",
"//xla/stream_executor:semantic_version",
"//xla/stream_executor:stream",
"//xla/stream_executor:stream_executor_h",
"//xla/tests:filecheck",
"//xla/tests:hlo_test_base",
Expand Down Expand Up @@ -303,8 +302,8 @@ xla_test(
"//xla/hlo/ir:hlo",
"//xla/service:platform_util",
"//xla/stream_executor:platform",
"//xla/stream_executor:stream",
"//xla/tests:hlo_test_base",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:string_view",
"@com_google_googletest//:gtest_main",
"@local_tsl//tsl/platform:statusor",
Expand Down Expand Up @@ -332,7 +331,6 @@ xla_test(
"//xla/stream_executor:device_description",
"//xla/stream_executor:platform",
"//xla/stream_executor:semantic_version",
"//xla/stream_executor:stream",
"//xla/tests:hlo_test_base",
"//xla/tsl/lib/core:status_test_util",
"//xla/tsl/protobuf:dnn_proto_cc",
Expand Down Expand Up @@ -439,7 +437,6 @@ xla_test(
"//xla/stream_executor:device_description",
"//xla/stream_executor:dnn",
"//xla/stream_executor:platform",
"//xla/stream_executor:stream",
"//xla/tests:hlo_test_base",
"//xla/tsl/lib/core:status_test_util",
"@com_google_absl//absl/strings:string_view",
Expand Down Expand Up @@ -500,12 +497,10 @@ xla_test(
"//xla:xla_proto_cc",
"//xla/hlo/ir:hlo",
"//xla/hlo/pass:hlo_pass_pipeline",
"//xla/stream_executor:stream",
"//xla/tests:hlo_test_base",
"//xla/tests:xla_internal_test_main", # fixdeps: keep
"@com_google_googletest//:gtest",
"@local_tsl//tsl/platform:path",
"@local_tsl//tsl/platform:statusor",
"@local_tsl//tsl/platform:test",
],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ limitations under the License.

#include "xla/service/gpu/autotuning/autotuner_compile_util.h"

#include <memory>
#include <vector>

#include <gtest/gtest.h>
Expand All @@ -24,7 +23,6 @@ limitations under the License.
#include "xla/service/gpu/autotuning/autotuner_util.h"
#include "xla/service/platform_util.h"
#include "xla/stream_executor/platform.h"
#include "xla/stream_executor/stream.h"
#include "xla/tests/hlo_test_base.h"
#include "tsl/platform/statusor.h"

Expand All @@ -49,12 +47,9 @@ ENTRY main {
se::Platform* platform = PlatformUtil::GetDefaultPlatform().value();
TF_ASSERT_OK_AND_ASSIGN(std::vector<se::StreamExecutor*> executors,
PlatformUtil::GetStreamExecutors(platform));
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
executors.at(0)->CreateStream());

AutotuneConfig autotune_config{
DeviceConfig{executors.at(0), nullptr, stream.get()},
GetDebugOptionsForTest()};
AutotuneConfig autotune_config{DeviceConfig{executors.at(0), nullptr},
GetDebugOptionsForTest()};

auto& root = *module->entry_computation()->root_instruction();

Expand Down Expand Up @@ -106,11 +101,8 @@ ENTRY main {
TF_ASSERT_OK_AND_ASSIGN(std::vector<se::StreamExecutor*> executors,
PlatformUtil::GetStreamExecutors(platform));

TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
executors.at(0)->CreateStream());
AutotuneConfig autotune_config{
DeviceConfig{executors.at(0), nullptr, stream.get()},
GetDebugOptionsForTest()};
AutotuneConfig autotune_config{DeviceConfig{executors.at(0), nullptr},
GetDebugOptionsForTest()};

auto& root = *module->entry_computation()->root_instruction();

Expand Down Expand Up @@ -162,11 +154,8 @@ ENTRY main {
TF_ASSERT_OK_AND_ASSIGN(std::vector<se::StreamExecutor*> executors,
PlatformUtil::GetStreamExecutors(platform));

TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
executors.at(0)->CreateStream());
AutotuneConfig autotune_config{
DeviceConfig{executors.at(0), nullptr, stream.get()},
GetDebugOptionsForTest()};
AutotuneConfig autotune_config{DeviceConfig{executors.at(0), nullptr},
GetDebugOptionsForTest()};

auto& root = *module->entry_computation()->root_instruction();

Expand Down
5 changes: 1 addition & 4 deletions third_party/xla/xla/service/gpu/autotuning/autotuner_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ struct DeviceConfig {
// memory while timing the various convolution algorithms. If it's null,
// we'll use the default allocator on the StreamExecutor.
se::DeviceMemoryAllocator* allocator = nullptr; // may be null

se::Stream* compute_stream = nullptr;
};

struct DevicelessConfig {
Expand Down Expand Up @@ -179,8 +177,7 @@ class AutotuneConfig {

absl::StatusOr<se::Stream*> GetStream() const {
CHECK(std::holds_alternative<DeviceConfig>(config_));
se::Stream* stream = std::get<DeviceConfig>(config_).compute_stream;
return stream;
return GetAllocator()->GetStream(GetExecutor()->device_ordinal());
}

const se::GpuComputeCapability& GetGpuComputeCapability() const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ limitations under the License.
#include "xla/service/gpu/autotuning/conv_algorithm_picker.h"

#include <cstdint>
#include <memory>
#include <variant>
#include <vector>

Expand All @@ -36,7 +35,6 @@ limitations under the License.
#include "xla/stream_executor/device_description.h"
#include "xla/stream_executor/dnn.h"
#include "xla/stream_executor/platform.h"
#include "xla/stream_executor/stream.h"
#include "xla/tests/hlo_test_base.h"
#include "xla/tsl/lib/core/status_test_util.h"
#include "xla/xla.pb.h"
Expand Down Expand Up @@ -80,8 +78,6 @@ ENTRY main {
PlatformUtil::GetStreamExecutors(platform));
ASSERT_GT(executors.size(), 0);
se::StreamExecutor* stream_exec = executors[0];
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
stream_exec->CreateStream());

const se::GpuComputeCapability& cc = backend()
.default_stream_executor()
Expand All @@ -92,7 +88,7 @@ ENTRY main {
changed = false;
DebugOptions opts = DefaultDebugOptionsIgnoringFlags();

AutotuneConfig cfg{DeviceConfig{stream_exec, nullptr, stream.get()}, opts};
AutotuneConfig cfg{DeviceConfig{stream_exec, nullptr}, opts};
TF_ASSERT_OK_AND_ASSIGN(changed,
RunHloPass(GpuConvAlgorithmPicker(cfg), m.get()));
ASSERT_TRUE(changed);
Expand Down Expand Up @@ -204,9 +200,7 @@ ENTRY main {
ASSERT_TRUE(changed);

DebugOptions opts = DefaultDebugOptionsIgnoringFlags();
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
stream_exec->CreateStream());
AutotuneConfig cfg{DeviceConfig{stream_exec, nullptr, stream.get()}, opts};
AutotuneConfig cfg{DeviceConfig{stream_exec, nullptr}, opts};
TF_ASSERT_OK_AND_ASSIGN(changed,
RunHloPass(GpuConvAlgorithmPicker(cfg), m.get()));
ASSERT_TRUE(changed);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ limitations under the License.
#include "xla/hlo/ir/hlo_module.h"
#include "xla/hlo/pass/hlo_pass_pipeline.h"
#include "xla/service/gpu/autotuning/autotuner_util.h"
#include "xla/stream_executor/stream.h"
#include "xla/tests/hlo_test_base.h"
#include "xla/xla.pb.h"
#include "tsl/platform/statusor.h"
#include "tsl/platform/test.h"

namespace xla {
Expand Down Expand Up @@ -68,12 +66,9 @@ TEST_F(CustomKernelFusionAutotunerTest, DontRunOnNonCustomFusions) {

HloPassPipeline pipeline("custom_kernel_fusion_autotuner");
DebugOptions debug_options;
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
backend().default_stream_executor()->CreateStream());

AutotuneConfig autotune_config =
AutotuneConfig{DeviceConfig{backend().default_stream_executor(),
backend().memory_allocator(), stream.get()},
backend().memory_allocator()},
debug_options};
pipeline.AddPass<CustomKernelFusionAutotuner>(autotune_config);

Expand Down Expand Up @@ -105,11 +100,9 @@ TEST_F(CustomKernelFusionAutotunerTest,

HloPassPipeline pipeline("custom_kernel_fusion_autotuner");
DebugOptions debug_options;
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
backend().default_stream_executor()->CreateStream());
AutotuneConfig autotune_config =
AutotuneConfig{DeviceConfig{backend().default_stream_executor(),
backend().memory_allocator(), stream.get()},
backend().memory_allocator()},
debug_options};
pipeline.AddPass<CustomKernelFusionAutotuner>(autotune_config);
ASSERT_TRUE(pipeline.Run(hlo_module.get()).ok());
Expand Down Expand Up @@ -138,11 +131,9 @@ TEST_F(CustomKernelFusionAutotunerTest,

HloPassPipeline pipeline("custom_kernel_fusion_autotuner");
DebugOptions debug_options;
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
backend().default_stream_executor()->CreateStream());
AutotuneConfig autotune_config =
AutotuneConfig{DeviceConfig{backend().default_stream_executor(),
backend().memory_allocator(), stream.get()},
backend().memory_allocator()},
debug_options};
pipeline.AddPass<CustomKernelFusionAutotuner>(autotune_config);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.

#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <variant>

Expand All @@ -34,7 +33,6 @@ limitations under the License.
#include "xla/stream_executor/device_description.h"
#include "xla/stream_executor/platform.h"
#include "xla/stream_executor/semantic_version.h"
#include "xla/stream_executor/stream.h"
#include "xla/tests/hlo_test_base.h"
#include "xla/tsl/lib/core/status_test_util.h"
#include "xla/tsl/protobuf/dnn.pb.h"
Expand Down Expand Up @@ -137,10 +135,7 @@ ENTRY main {
/*toolkit_version=*/stream_executor::SemanticVersion{12, 4, 0}),
module.get()));

TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
stream_exec()->CreateStream());
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr, stream.get()},
debug_opts};
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr}, debug_opts};
GemmAlgorithmPicker gpicker(cfg);
// Note that, we do not care if the algorithm index has been changed:
// the thing matters is the # of algorithms left after sorting out.
Expand Down Expand Up @@ -180,10 +175,7 @@ ENTRY main {
/*toolkit_version=*/stream_executor::SemanticVersion{12, 4, 0}),
module.get()));

TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
stream_exec()->CreateStream());
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr, stream.get()},
debug_opts};
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr}, debug_opts};
GemmAlgorithmPicker gpicker(cfg);
TF_ASSERT_OK_AND_ASSIGN(changed, RunHloPass(gpicker, module.get()));
num_left2 = gpicker.num_algorithms_left();
Expand Down Expand Up @@ -216,9 +208,7 @@ ENTRY main {
m.get()));
changed = false;
DebugOptions opts;
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
stream_exec()->CreateStream());
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr, stream.get()}, opts};
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr}, opts};
TF_ASSERT_OK_AND_ASSIGN(changed,
RunHloPass(GemmAlgorithmPicker(cfg), m.get()));
ASSERT_TRUE(changed);
Expand Down Expand Up @@ -283,9 +273,7 @@ ENTRY main {
changed = false;

DebugOptions opts;
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<se::Stream> stream,
stream_exec()->CreateStream());
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr, stream.get()}, opts};
AutotuneConfig cfg{DeviceConfig{stream_exec(), nullptr}, opts};

TF_ASSERT_OK_AND_ASSIGN(changed,
RunHloPass(GemmAlgorithmPicker(cfg), m.get()));
Expand Down
Loading

0 comments on commit 2749345

Please sign in to comment.