Skip to content

Commit 9fc24be

Browse files
authored
Merge pull request #62 from ROCmSoftwarePlatform/develop-upstream-sync-180709
Merging updates from Tensorflow upstream master - 07/09/2018
2 parents b820d65 + 717e25a commit 9fc24be

File tree

722 files changed

+42500
-12012
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

722 files changed

+42500
-12012
lines changed

RELEASE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Update `tf.keras` to the Keras 2.1.6 API.
77
* Added [`tf.keras.layers.CuDNNGRU`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNGRU) and [`tf.keras.layers.CuDNNLSTM`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNLSTM) layers. [Try it](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb?linkId=53292082).
88
* Adding support of core [feature columns](https://www.tensorflow.org/get_started/feature_columns) and [losses](https://www.tensorflow.org/api_docs/python/tf/losses) to [gradient boosted trees estimators](https://github.com/tensorflow/models/tree/master/official/boosted_trees).
9-
* The [python interface](https://tensorflow-dot-devsite.googleplex.com/versions/r1.9/api_docs/python/tf/contrib/lite)
9+
* The [python interface](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/lite)
1010
for the [TFLite Optimizing Converter](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/README.md)
1111
has been expanded, and the command line interface (AKA: `toco`, `tflite_convert`) is once again
1212
included in the standard `pip` installation.

configure.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,8 @@ def set_tf_cuda_version(environ_cp):
835835
'[Default is %s]: ') % (tf_cuda_version, default_cuda_path)
836836
cuda_toolkit_path = get_from_env_or_user_or_default(
837837
environ_cp, 'CUDA_TOOLKIT_PATH', ask_cuda_path, default_cuda_path)
838+
if is_windows() or is_cygwin():
839+
cuda_toolkit_path = cygpath(cuda_toolkit_path)
838840

839841
if is_windows():
840842
cuda_rt_lib_path = 'lib/x64/cudart.lib'

tensorflow/BUILD

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,22 @@ filegroup(
445445
data = glob(["docs_src/**/*.md"]),
446446
)
447447

448+
cc_library(
449+
name = "grpc",
450+
deps = select({
451+
":linux_s390x": ["@grpc//:grpc_unsecure"],
452+
"//conditions:default": ["@grpc"],
453+
}),
454+
)
455+
456+
cc_library(
457+
name = "grpc++",
458+
deps = select({
459+
":linux_s390x": ["@grpc//:grpc++_unsecure"],
460+
"//conditions:default": ["@grpc//:grpc++"],
461+
}),
462+
)
463+
448464
# A shared object which includes registration mechanisms for ops and
449465
# kernels. Does not include the implementations of any ops or kernels. Instead,
450466
# the library which loads libtensorflow_framework.so
@@ -594,19 +610,3 @@ py_library(
594610
visibility = ["//visibility:public"],
595611
deps = ["//tensorflow/python:no_contrib"],
596612
)
597-
598-
cc_library(
599-
name = "grpc",
600-
deps = select({
601-
":linux_s390x": ["@grpc//:grpc_unsecure"],
602-
"//conditions:default": ["@grpc"],
603-
}),
604-
)
605-
606-
cc_library(
607-
name = "grpc++",
608-
deps = select({
609-
":linux_s390x": ["@grpc//:grpc++_unsecure"],
610-
"//conditions:default": ["@grpc//:grpc++"],
611-
}),
612-
)

tensorflow/c/c_api_experimental.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,33 @@ void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) {
5757
}
5858
}
5959

60+
TF_Buffer* TF_CreateConfig(unsigned char enable_xla_compilation,
61+
unsigned char gpu_memory_allow_growth) {
62+
tensorflow::ConfigProto config;
63+
auto* optimizer_options =
64+
config.mutable_graph_options()->mutable_optimizer_options();
65+
if (enable_xla_compilation) {
66+
optimizer_options->set_global_jit_level(tensorflow::OptimizerOptions::ON_1);
67+
68+
// These XLA flags are needed to trigger XLA properly from C (more generally
69+
// non-Python) clients. If this API is called again with `enable` set to
70+
// false, it is safe to keep these flag values as is.
71+
tensorflow::legacy_flags::MarkForCompilationPassFlags* flags =
72+
tensorflow::legacy_flags::GetMarkForCompilationPassFlags();
73+
flags->tf_xla_cpu_global_jit = true;
74+
flags->tf_xla_min_cluster_size = 1;
75+
} else {
76+
optimizer_options->set_global_jit_level(tensorflow::OptimizerOptions::OFF);
77+
}
78+
79+
auto* gpu_options = config.mutable_gpu_options();
80+
gpu_options->set_allow_growth(gpu_memory_allow_growth);
81+
82+
TF_Buffer* ret = TF_NewBuffer();
83+
TF_CHECK_OK(MessageToBuffer(config, ret));
84+
return ret;
85+
}
86+
6087
const char* TF_GraphDebugString(TF_Graph* graph, size_t* len) {
6188
tensorflow::mutex_lock c(graph->mu);
6289
const auto& debug_str = graph->graph.ToGraphDefDebug().DebugString();

tensorflow/c/c_api_experimental.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,21 @@ extern "C" {
5555
// set XLA flag values to prepare for XLA compilation. Otherwise set
5656
// global_jit_level to OFF.
5757
//
58-
// This API is syntax sugar over TF_SetConfig(), and is used by clients that
59-
// cannot read/write the tensorflow.ConfigProto proto.
58+
// This and the next API are syntax sugar over TF_SetConfig(), and is used by
59+
// clients that cannot read/write the tensorflow.ConfigProto proto.
60+
// TODO: Migrate to TF_CreateConfig() below.
6061
TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options,
6162
unsigned char enable);
6263

64+
// Create a serialized tensorflow.ConfigProto proto, where:
65+
//
66+
// a) ConfigProto.optimizer_options.global_jit_level is set to to ON_1 if
67+
// `enable_xla_compilation` is non-zero, and OFF otherwise.
68+
// b) ConfigProto.gpu_options.allow_growth is set to `gpu_memory_allow_growth`.
69+
TF_CAPI_EXPORT extern TF_Buffer* TF_CreateConfig(
70+
unsigned char enable_xla_compilation,
71+
unsigned char gpu_memory_allow_growth);
72+
6373
// Returns the graph content in a human-readable format, with length set in
6474
// `len`. The format is subject to change in the future.
6575
// The returned string is heap-allocated, and caller should call free() on it.

tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1136,7 +1136,10 @@ Status Encapsulator::Subgraph::AddShapeInferenceInfo(
11361136
GraphToFunctionDef(*inference_graph, inference_graph_name, &fdef));
11371137
host_compute->AddAttr("shape_inference_graph", inference_graph_name);
11381138
host_compute->AddAttr("shapes", std::vector<TensorShapeProto>());
1139-
TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef));
1139+
// TODO(sibyl-Aix6ihai): Understand why there are multiple calls to Encapsulator.
1140+
if (library->Find(inference_graph_name) == nullptr) {
1141+
TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef));
1142+
}
11401143
}
11411144
return Status::OK();
11421145
}

tensorflow/compiler/jit/kernels/xla_launch_op.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
117117
const XlaDevice::Metadata* metadata = nullptr;
118118
Status s = XlaDevice::GetMetadata(ctx, &metadata);
119119
bool allocate_xla_tensors = s.ok();
120+
bool use_multiple_streams = s.ok() && metadata->UseMultipleStreams();
120121

121122
// Get the platform_id_ for XLA_* devices.
122123
if (platform_id_ == nullptr) {
@@ -182,8 +183,8 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
182183

183184
VLOG(1) << "Executing XLA Computation...";
184185

185-
XlaComputationLaunchContext launch_context(client, xla_allocator,
186-
allocate_xla_tensors);
186+
XlaComputationLaunchContext launch_context(
187+
client, xla_allocator, allocate_xla_tensors, use_multiple_streams);
187188
launch_context.PopulateInputs(ctx, kernel, variables);
188189

189190
// Execute the computation.

tensorflow/compiler/jit/xla_compilation_cache.cc

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,7 @@ namespace tensorflow {
4040
XlaCompilationCache::XlaCompilationCache(xla::LocalClient* client,
4141
DeviceType device_type)
4242
: client_(client), device_type_(std::move(device_type)) {}
43-
XlaCompilationCache::~XlaCompilationCache() {
44-
// Ensure any use of our programs have completed by waiting for all stream
45-
// executors to complete.
46-
for (auto* executor : client_->backend().stream_executors()) {
47-
bool ok = executor->SynchronizeAllActivity();
48-
if (!ok) {
49-
LOG(ERROR) << "Error synchronizing activity while waiting for all "
50-
"programs to complete";
51-
}
52-
}
53-
// TODO(b/110813685): Think about the program ownership model. Programs are
54-
// currently owned by the compilation cache which means we must wait for
55-
// program completion in the destructor. There are multiple compilation caches
56-
// around, which complicates things a little. Perhaps having programs be
57-
// shared_ptrs (an invasive change) would make the model easier to reason
58-
// about?
59-
}
43+
XlaCompilationCache::~XlaCompilationCache() = default;
6044

6145
string XlaCompilationCache::DebugString() {
6246
return "XLA JIT compilation cache";

tensorflow/compiler/jit/xla_compile_on_demand_op.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx,
5353

5454
// Builds an XLA allocator for the device.
5555
XlaComputationLaunchContext launch_context(
56-
client, client->backend().memory_allocator(), true);
56+
client, client->backend().memory_allocator(),
57+
/*allocate_xla_tensors=*/true,
58+
/*use_multiple_streams=*/metadata.UseMultipleStreams());
5759

5860
launch_context.PopulateInputs(ctx, result, variables);
5961

tensorflow/compiler/jit/xla_cpu_device.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Status XlaCpuDeviceFactory::CreateDevices(const SessionOptions& options,
5454
DEVICE_CPU_XLA_JIT, options, name_prefix,
5555
registration,
5656
/*transfer_as_literal=*/false,
57+
/*use_multiple_streams=*/false,
5758
/*shape_representation_fn=*/{},
5859
/*padded_shape_fn=*/{}, &device));
5960
devices->push_back(device.release());

0 commit comments

Comments
 (0)