diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
index 09ce2981d38268..34a6f5d74b1395 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -227,12 +227,14 @@ def Bufferization_MaterializeInDestinationOp
   let summary = "copy a tensor";
 
   let description = [{
-    This op indicates that the data of the `source` tensor should materialize
-    in `dest`, which can be a tensor or a memref. In case of a tensor, `source`
-    should materialize in the future buffer of `dest` and a the updated
-    destination tensor is returned. In case of a memref, `source` should
-    materialize in `dest`, which is already a buffer. The op has no results in
-    that case.
+    This op indicates that the data of the `source` tensor is guaranteed to
+    materialize in `dest`, which can be a tensor or a memref. In case of a
+    tensor, `source` materializes in the future buffer of `dest` and a the
+    updated destination tensor is returned. If this is not possible, e.g.,
+    because the destination tensor is read-only or because its original
+    contents are still read later, the input IR fails to bufferize. In case of a
+    memref, `source` materializes in `dest`, which is already a buffer. The op
+    has no results in that case.
 
     `source`, `dest` and `result` (if present) must have the same shape and
     element type. If the op has a result, the types of `result` and `dest` must
@@ -252,7 +254,8 @@ def Bufferization_MaterializeInDestinationOp
     indicates that this op is the only way for the tensor IR to access `dest`
     (or an alias thereof). E.g., there must be no other `to_tensor` ops with
     `dest` or with an alias of `dest`. Such IR is not supported by
-    One-Shot Bufferize.
+    One-Shot Bufferize. Ops that have incorrect usage of `restrict` may
+    bufferize incorrectly.
 
     Note: `restrict` and `writable` could be removed from this op because they
     must always be set for memref destinations. This op has these attributes to
@@ -262,7 +265,9 @@ def Bufferization_MaterializeInDestinationOp
     Note: If `dest` is a tensor, `tensor.insert_slice` could be used for the
     same purpose, but since tensor dialect ops only indicate *what* should be
     computed but not *where*, it could fold away, causing the computation to
-    materialize in a different buffer.
+    materialize in a different buffer. It is also possible that the
+    `tensor.insert_slice` destination bufferizes out-of-place, which would also
+    cause the computation to materialize in a buffer different buffer.
   }];
 
   let arguments = (ins AnyTensor:$source, AnyShaped:$dest,
@@ -282,6 +287,9 @@ def Bufferization_MaterializeInDestinationOp
     bool bufferizesToElementwiseAccess(const AnalysisState &state,
                                        ArrayRef<OpOperand *> opOperands);
 
+    bool mustBufferizeInPlace(OpOperand &opOperand,
+                              const AnalysisState &state);
+
     AliasingValueList getAliasingValues(
         OpOperand &opOperand, const AnalysisState &state);
 
@@ -408,6 +416,7 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [
     Note: Only `to_tensor` ops with the `restrict` unit attribute are supported
     by One-Shot Bufferize. Other IR is rejected. (To support `to_tensor`
     without `restrict`, One-Shot Bufferize would have to analyze memref IR.)
+    Ops that have incorrect usage of `restrict` may bufferize incorrectly.
 
     Example:
 
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 1c33f444d15850..738c8374d7add0 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -549,6 +549,14 @@ bool MaterializeInDestinationOp::bufferizesToMemoryWrite(
   return false;
 }
 
+bool MaterializeInDestinationOp::mustBufferizeInPlace(
+    OpOperand &opOperand, const AnalysisState &state) {
+  // The source is only read and not written, so it always bufferizes in-place
+  // by default. The destination is written and is forced to bufferize in-place
+  // (if it is a tensor).
+  return true;
+}
+
 AliasingValueList
 MaterializeInDestinationOp::getAliasingValues(OpOperand &opOperand,
                                               const AnalysisState &state) {
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
index 4c5789306ad758..2ddc51357448a7 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/IR/SubsetInsertionOpInterface.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/Dominance.h"
@@ -184,12 +185,25 @@ struct EmptyTensorElimination
 
 void EmptyTensorElimination::runOnOperation() {
   Operation *op = getOperation();
+  auto moduleOp = dyn_cast<ModuleOp>(op);
   OneShotBufferizationOptions options;
   options.allowReturnAllocsFromLoops = true;
+  if (moduleOp)
+    options.bufferizeFunctionBoundaries = true;
   OneShotAnalysisState state(op, options);
-  if (failed(analyzeOp(op, state))) {
-    signalPassFailure();
-    return;
+  if (moduleOp) {
+    // Module analysis takes into account function boundaries.
+    if (failed(analyzeModuleOp(moduleOp, state))) {
+      signalPassFailure();
+      return;
+    }
+  } else {
+    // Regular One-Shot Bufferize ignores func.func block arguments, func.call,
+    // func.return.
+    if (failed(analyzeOp(op, state))) {
+      signalPassFailure();
+      return;
+    }
   }
 
   IRRewriter rewriter(op->getContext());
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
index 1c85dbb5688be4..59c8c7efb73c0a 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
@@ -40,8 +40,8 @@
 
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 
-#include <random>
 #include <optional>
+#include <random>
 
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
@@ -1182,8 +1182,8 @@ checkPreBufferizationAssumptions(Operation *op, const DominanceInfo &domInfo,
     // not handled in the analysis.
     if (auto toTensorOp = dyn_cast<ToTensorOp>(op.getOperation())) {
       if (!toTensorOp.getRestrict() && !toTensorOp->getUses().empty()) {
-        op->emitError("to_tensor ops without `restrict` are not supported by "
-                      "One-Shot Analysis");
+        op->emitOpError("to_tensor ops without `restrict` are not supported by "
+                        "One-Shot Analysis");
         return WalkResult::interrupt();
       }
     }
@@ -1195,8 +1195,19 @@ checkPreBufferizationAssumptions(Operation *op, const DominanceInfo &domInfo,
                 /*checkConsistencyOnly=*/true)) {
           // This error can happen if certain "mustBufferizeInPlace" interface
           // methods are implemented incorrectly, such that the IR already has
-          // a RaW conflict before making any bufferization decisions.
-          op->emitError("input IR has RaW conflict");
+          // a RaW conflict before making any bufferization decisions. It can
+          // also happen if the bufferization.materialize_in_destination is used
+          // in such a way that a RaW conflict is not avoidable.
+          op->emitOpError("not bufferizable under the given constraints: "
+                          "cannot avoid RaW conflict");
+          return WalkResult::interrupt();
+        }
+
+        if (state.isInPlace(opOperand) &&
+            wouldCreateWriteToNonWritableBuffer(
+                opOperand, state, /*checkConsistencyOnly=*/true)) {
+          op->emitOpError("not bufferizable under the given constraints: would "
+                          "write to read-only buffer");
           return WalkResult::interrupt();
         }
       }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
index 272423de5564b0..611b67e198c000 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -verify-diagnostics -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -verify-diagnostics -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -verify-diagnostics -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -verify-diagnostics -split-input-file -o /dev/null
 
 // Run with top-down analysis.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS
+// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS
 
 // Test without analysis: Insert a copy on every buffer write.
 // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops copy-before-write" -split-input-file | FileCheck %s --check-prefix=CHECK-COPY-BEFORE-WRITE
@@ -235,3 +235,37 @@ func.func @materialize_in_destination_buffer(%t: tensor<5xf32>, %m: memref<5xf32
   return
 }
 
+// -----
+
+func.func @materialize_in_func_bbarg(%t: tensor<?xf32>, %dest: tensor<?xf32>)
+    -> tensor<?xf32> {
+  // This op is not bufferizable because function block arguments are
+  // read-only in regular One-Shot Bufferize. (Run One-Shot Module
+  // Bufferization instead.)
+  // expected-error @below{{not bufferizable under the given constraints: would write to read-only buffer}}
+  %0 = bufferization.materialize_in_destination %t in %dest
+      : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
+  return %0 : tensor<?xf32>
+}
+
+// -----
+
+func.func @materialize_in_dest_raw(%f: f32, %f2: f32, %idx: index) -> (tensor<5xf32>, f32) {
+  %dest = bufferization.alloc_tensor() : tensor<5xf32>
+  // Note: The location of the RaW conflict may not be accurate (such as in this
+  // example). This is because the analysis operates on "alias sets" and not
+  // single SSA values. The location may point to any SSA value in the alias set
+  // that participates in the conflict.
+  // expected-error @below{{not bufferizable under the given constraints: cannot avoid RaW conflict}}
+  %dest_filled = linalg.fill ins(%f : f32) outs(%dest : tensor<5xf32>) -> tensor<5xf32>
+  %src = bufferization.alloc_tensor() : tensor<5xf32>
+  %src_filled = linalg.fill ins(%f2 : f32) outs(%src : tensor<5xf32>) -> tensor<5xf32>
+
+  %0 = bufferization.materialize_in_destination %src_filled in %dest_filled
+      : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xf32>
+  // Read from %dest_filled, which makes it impossible to bufferize the
+  // materialize_in_destination op in-place.
+  %r = tensor.extract %dest_filled[%idx] : tensor<5xf32>
+
+  return %0, %r : tensor<5xf32>, f32
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
index a25b57991baca7..fd74ae0b60dbbb 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
@@ -153,7 +153,7 @@ func.func @yield_alloc_dominance_test_2(%cst : f32, %idx : index,
 func.func @copy_of_unranked_tensor(%t: tensor<*xf32>) -> tensor<*xf32> {
   // Unranked tensor OpOperands always bufferize in-place. With this limitation,
   // there is no way to bufferize this IR correctly.
-  // expected-error @+1 {{input IR has RaW conflict}}
+  // expected-error @+1 {{not bufferizable under the given constraints: cannot avoid RaW conflict}}
   func.call @maybe_writing_func(%t) : (tensor<*xf32>) -> ()
   return %t : tensor<*xf32>
 }