diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 193ba2837b9..f3a29be5e43 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -5284,12 +5284,9 @@ GArrowCastOptions *
 garrow_cast_options_new_raw(const arrow::compute::CastOptions *arrow_options)
 {
   GArrowDataType *to_data_type = NULL;
-  if (arrow_options->to_type) {
-    auto arrow_copied_options = arrow_options->Copy();
-    auto arrow_copied_cast_options =
-      static_cast<arrow::compute::CastOptions *>(arrow_copied_options.get());
-    to_data_type =
-      garrow_data_type_new_raw(&(arrow_copied_cast_options->to_type));
+  if (arrow_options->to_type.type) {
+    auto arrow_to_data_type = arrow_options->to_type.GetSharedPtr();
+    to_data_type = garrow_data_type_new_raw(&arrow_to_data_type);
   }
   auto options =
     g_object_new(GARROW_TYPE_CAST_OPTIONS,
diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp
index cef11578e1c..f8699f34eea 100644
--- a/c_glib/arrow-glib/scalar.cpp
+++ b/c_glib/arrow-glib/scalar.cpp
@@ -2401,9 +2401,31 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
                                gint8 type_code,
                                GArrowScalar *value)
 {
-  return GARROW_SPARSE_UNION_SCALAR(
-    garrow_union_scalar_new<arrow::SparseUnionScalar>(
-      GARROW_DATA_TYPE(data_type), type_code, value));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  const auto &arrow_type_codes =
+    std::dynamic_pointer_cast<arrow::SparseUnionType>(
+      arrow_data_type)->type_codes();
+  auto arrow_value = garrow_scalar_get_raw(value);
+  arrow::SparseUnionScalar::ValueType arrow_field_values;
+  for (int i = 0; i < arrow_data_type->num_fields(); ++i) {
+    if (arrow_type_codes[i] == type_code) {
+      arrow_field_values.emplace_back(arrow_value);
+    } else {
+      arrow_field_values.emplace_back(
+        arrow::MakeNullScalar(arrow_data_type->field(i)->type()));
+    }
+  }
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::SparseUnionScalar>(arrow_field_values,
+                                                 type_code,
+                                                 arrow_data_type));
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  return GARROW_SPARSE_UNION_SCALAR(scalar);
 }
 
 
@@ -2436,9 +2458,19 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
                               gint8 type_code,
                               GArrowScalar *value)
 {
-  return GARROW_DENSE_UNION_SCALAR(
-    garrow_union_scalar_new<arrow::DenseUnionScalar>(
-      GARROW_DATA_TYPE(data_type), type_code, value));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_scalar_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::DenseUnionScalar>(arrow_value,
+                                                type_code,
+                                                arrow_data_type));
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  return GARROW_DENSE_UNION_SCALAR(scalar);
 }
 
 
diff --git a/c_glib/test/test-large-binary-scalar.rb b/c_glib/test/test-large-binary-scalar.rb
index a6bc4addb10..d716e13f3ea 100644
--- a/c_glib/test/test-large-binary-scalar.rb
+++ b/c_glib/test/test-large-binary-scalar.rb
@@ -38,7 +38,11 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-BINARY.strip, @scalar.to_s)
+[
+  030102
+]
+                 BINARY
   end
 
   def test_value
diff --git a/c_glib/test/test-large-string-scalar.rb b/c_glib/test/test-large-string-scalar.rb
index 13e28f647ac..42e24a601b4 100644
--- a/c_glib/test/test-large-string-scalar.rb
+++ b/c_glib/test/test-large-string-scalar.rb
@@ -38,7 +38,11 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-STRING.strip, @scalar.to_s)
+[
+  "Hello"
+]
+                 STRING
   end
 
   def test_value
diff --git a/c_glib/test/test-list-scalar.rb b/c_glib/test/test-list-scalar.rb
index 3fda3f25bbb..0ddbf60bc05 100644
--- a/c_glib/test/test-list-scalar.rb
+++ b/c_glib/test/test-list-scalar.rb
@@ -41,7 +41,17 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-LIST.strip, @scalar.to_s)
+[
+  [
+    [
+      1,
+      2,
+      3
+    ]
+  ]
+]
+                 LIST
   end
 
   def test_value
diff --git a/c_glib/test/test-map-scalar.rb b/c_glib/test/test-map-scalar.rb
index 9c6eb69e0a8..1e004569ef3 100644
--- a/c_glib/test/test-map-scalar.rb
+++ b/c_glib/test/test-map-scalar.rb
@@ -56,7 +56,20 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-MAP.strip, @scalar.to_s)
+[
+  keys:
+  [
+    "hello",
+    "world"
+  ]
+  values:
+  [
+    1,
+    2
+  ]
+]
+                 MAP
   end
 
   def test_value
diff --git a/cpp/examples/arrow/compute_register_example.cc b/cpp/examples/arrow/compute_register_example.cc
index 13d80b29631..113dfd0faf3 100644
--- a/cpp/examples/arrow/compute_register_example.cc
+++ b/cpp/examples/arrow/compute_register_example.cc
@@ -127,8 +127,7 @@ const cp::FunctionDoc func_doc{
 int main(int argc, char** argv) {
   const std::string name = "compute_register_example";
   auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(), func_doc);
-  cp::ScalarKernel kernel({cp::InputType::Array(arrow::int64())}, arrow::int64(),
-                          ExampleFunctionImpl);
+  cp::ScalarKernel kernel({arrow::int64()}, arrow::int64(), ExampleFunctionImpl);
   kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
   ABORT_ON_FAILURE(func->AddKernel(std::move(kernel)));
 
diff --git a/cpp/examples/arrow/udf_example.cc b/cpp/examples/arrow/udf_example.cc
index 47c45411477..ccd804339a2 100644
--- a/cpp/examples/arrow/udf_example.cc
+++ b/cpp/examples/arrow/udf_example.cc
@@ -75,10 +75,8 @@ arrow::Status SampleFunction(cp::KernelContext* ctx, const cp::ExecSpan& batch,
 arrow::Status Execute() {
   const std::string name = "add_three";
   auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Ternary(), func_doc);
-  cp::ScalarKernel kernel(
-      {cp::InputType::Array(arrow::int64()), cp::InputType::Array(arrow::int64()),
-       cp::InputType::Array(arrow::int64())},
-      arrow::int64(), SampleFunction);
+  cp::ScalarKernel kernel({arrow::int64(), arrow::int64(), arrow::int64()},
+                          arrow::int64(), SampleFunction);
 
   kernel.mem_allocation = cp::MemAllocation::PREALLOCATE;
   kernel.null_handling = cp::NullHandling::INTERSECTION;
diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py
index cd687ec8b2e..af3dad9c087 100644
--- a/cpp/gdb_arrow.py
+++ b/cpp/gdb_arrow.py
@@ -1406,13 +1406,12 @@ class FixedSizeBinaryScalarPrinter(BaseBinaryScalarPrinter):
 
     def to_string(self):
         size = self.type['byte_width_']
-        if not self.is_valid:
-            return f"{self._format_type()} of size {size}, null value"
         bufptr = BufferPtr(SharedPtr(self.val['value']).get())
         if bufptr.data is None:
             return f"{self._format_type()} of size {size}, <unallocated>"
+        nullness = '' if self.is_valid else 'null with '
         return (f"{self._format_type()} of size {size}, "
-                f"value {self._format_buf(bufptr)}")
+                f"{nullness}value {self._format_buf(bufptr)}")
 
 
 class DictionaryScalarPrinter(ScalarPrinter):
@@ -1450,6 +1449,8 @@ def display_hint(self):
         return 'map'
 
     def children(self):
+        if not self.is_valid:
+            return None
         eval_fields = StdVector(self.type['children_'])
         eval_values = StdVector(self.val['value'])
         for field, value in zip(eval_fields, eval_values):
@@ -1463,7 +1464,24 @@ def to_string(self):
         return f"{self._format_type()}"
 
 
-class UnionScalarPrinter(ScalarPrinter):
+class SparseUnionScalarPrinter(ScalarPrinter):
+    """
+    Pretty-printer for arrow::UnionScalar and subclasses.
+    """
+
+    def to_string(self):
+        type_code = self.val['type_code'].cast(gdb.lookup_type('int'))
+        if not self.is_valid:
+            return (f"{self._format_type()} of type {self.type}, "
+                    f"type code {type_code}, null value")
+        eval_values = StdVector(self.val['value'])
+        child_id = self.val['child_id'].cast(gdb.lookup_type('int'))
+        return (f"{self._format_type()} of type code {type_code}, "
+                f"value {deref(eval_values[child_id])}")
+
+
+
+class DenseUnionScalarPrinter(ScalarPrinter):
     """
     Pretty-printer for arrow::UnionScalar and subclasses.
     """
@@ -1968,10 +1986,16 @@ class StructTypeClass(DataTypeClass):
     scalar_printer = StructScalarPrinter
 
 
-class UnionTypeClass(DataTypeClass):
+class DenseUnionTypeClass(DataTypeClass):
+    is_parametric = True
+    type_printer = UnionTypePrinter
+    scalar_printer = DenseUnionScalarPrinter
+
+
+class SparseUnionTypeClass(DataTypeClass):
     is_parametric = True
     type_printer = UnionTypePrinter
-    scalar_printer = UnionScalarPrinter
+    scalar_printer = SparseUnionScalarPrinter
 
 
 class DictionaryTypeClass(DataTypeClass):
@@ -2037,8 +2061,8 @@ class ExtensionTypeClass(DataTypeClass):
     Type.MAP: DataTypeTraits(MapTypeClass, 'MapType'),
 
     Type.STRUCT: DataTypeTraits(StructTypeClass, 'StructType'),
-    Type.SPARSE_UNION: DataTypeTraits(UnionTypeClass, 'SparseUnionType'),
-    Type.DENSE_UNION: DataTypeTraits(UnionTypeClass, 'DenseUnionType'),
+    Type.SPARSE_UNION: DataTypeTraits(SparseUnionTypeClass, 'SparseUnionType'),
+    Type.DENSE_UNION: DataTypeTraits(DenseUnionTypeClass, 'DenseUnionType'),
 
     Type.DICTIONARY: DataTypeTraits(DictionaryTypeClass, 'DictionaryType'),
     Type.EXTENSION: DataTypeTraits(ExtensionTypeClass, 'ExtensionType'),
diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index b36fb0fb94a..5d27b2aedfb 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -104,16 +104,15 @@ struct ScalarFromArraySlotImpl {
   }
 
   Status Visit(const SparseUnionArray& a) {
-    const auto type_code = a.type_code(index_);
-    // child array which stores the actual value
-    const auto arr = a.field(a.child_id(index_));
-    // no need to adjust the index
-    ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(index_));
-    if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, type_code, a.type()));
-    } else {
-      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(type_code, a.type()));
+    int8_t type_code = a.type_code(index_);
+
+    ScalarVector children;
+    for (int i = 0; i < a.type()->num_fields(); ++i) {
+      children.emplace_back();
+      ARROW_ASSIGN_OR_RAISE(children.back(), a.field(i)->GetScalar(index_));
     }
+
+    out_ = std::make_shared<SparseUnionScalar>(std::move(children), type_code, a.type());
     return Status::OK();
   }
 
@@ -124,11 +123,7 @@ struct ScalarFromArraySlotImpl {
     // need to look up the value based on offsets
     auto offset = a.value_offset(index_);
     ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(offset));
-    if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, type_code, a.type()));
-    } else {
-      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(type_code, a.type()));
-    }
+    out_ = std::make_shared<DenseUnionScalar>(value, type_code, a.type());
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 0d9afba6ece..d438557a330 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -561,16 +561,16 @@ static ScalarVector GetScalars() {
           },
           struct_({field("min", int32()), field("max", int32())})),
       // Same values, different union type codes
-      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 6,
-                                          sparse_union_ty),
-      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 42,
-                                          sparse_union_ty),
-      std::make_shared<SparseUnionScalar>(42, sparse_union_ty),
+      SparseUnionScalar::FromValue(std::make_shared<Int32Scalar>(100), 1,
+                                   sparse_union_ty),
+      SparseUnionScalar::FromValue(std::make_shared<Int32Scalar>(100), 2,
+                                   sparse_union_ty),
+      SparseUnionScalar::FromValue(MakeNullScalar(int32()), 2, sparse_union_ty),
       std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 6,
                                          dense_union_ty),
       std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 42,
                                          dense_union_ty),
-      std::make_shared<DenseUnionScalar>(42, dense_union_ty),
+      std::make_shared<DenseUnionScalar>(MakeNullScalar(int32()), 42, dense_union_ty),
       DictionaryScalar::Make(ScalarFromJSON(int8(), "1"),
                              ArrayFromJSON(utf8(), R"(["foo", "bar"])")),
       DictionaryScalar::Make(ScalarFromJSON(uint8(), "1"),
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 49abd8e0234..ff37cee5ba1 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -34,6 +34,8 @@
 
 namespace arrow {
 
+using internal::checked_cast;
+
 Status ArrayBuilder::CheckArrayType(const std::shared_ptr<DataType>& expected_type,
                                     const Array& array, const char* message) {
   if (!expected_type->Equals(*array.type())) {
@@ -105,14 +107,13 @@ struct AppendScalarImpl {
                   is_fixed_size_binary_type<T>::value,
               Status>
   Visit(const T&) {
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    auto builder = checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
     RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
 
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
            raw++) {
-        auto scalar =
-            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
         if (scalar->is_valid) {
           builder->UnsafeAppend(scalar->value);
         } else {
@@ -128,22 +129,20 @@ struct AppendScalarImpl {
     int64_t data_size = 0;
     for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
          raw++) {
-      auto scalar =
-          internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+      auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
       if (scalar->is_valid) {
         data_size += scalar->value->size();
       }
     }
 
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    auto builder = checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
     RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
     RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
 
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
            raw++) {
-        auto scalar =
-            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
         if (scalar->is_valid) {
           builder->UnsafeAppend(util::string_view{*scalar->value});
         } else {
@@ -156,13 +155,12 @@ struct AppendScalarImpl {
 
   template <typename T>
   enable_if_list_like<T, Status> Visit(const T&) {
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    auto builder = checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
     int64_t num_children = 0;
     for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
          scalar++) {
       if (!(*scalar)->is_valid) continue;
-      num_children +=
-          internal::checked_cast<const BaseListScalar&>(**scalar).value->length();
+      num_children += checked_cast<const BaseListScalar&>(**scalar).value->length();
     }
     RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));
 
@@ -171,8 +169,7 @@ struct AppendScalarImpl {
            scalar++) {
         if ((*scalar)->is_valid) {
           RETURN_NOT_OK(builder->Append());
-          const Array& list =
-              *internal::checked_cast<const BaseListScalar&>(**scalar).value;
+          const Array& list = *checked_cast<const BaseListScalar&>(**scalar).value;
           for (int64_t i = 0; i < list.length(); i++) {
             ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
             RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
@@ -186,7 +183,7 @@ struct AppendScalarImpl {
   }
 
   Status Visit(const StructType& type) {
-    auto* builder = internal::checked_cast<StructBuilder*>(builder_);
+    auto* builder = checked_cast<StructBuilder*>(builder_);
     auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
     RETURN_NOT_OK(builder->Reserve(count));
     for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
@@ -194,7 +191,7 @@ struct AppendScalarImpl {
     }
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
-        const auto& scalar = internal::checked_cast<const StructScalar&>(**s);
+        const auto& scalar = checked_cast<const StructScalar&>(**s);
         for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
           if (!scalar.is_valid || !scalar.value[field_index]) {
             RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
@@ -213,12 +210,54 @@ struct AppendScalarImpl {
 
   Status Visit(const DenseUnionType& type) { return MakeUnionArray(type); }
 
+  Status AppendUnionScalar(const DenseUnionType& type, const Scalar& s,
+                           DenseUnionBuilder* builder) {
+    const auto& scalar = checked_cast<const DenseUnionScalar&>(s);
+    const auto scalar_field_index = type.child_ids()[scalar.type_code];
+    RETURN_NOT_OK(builder->Append(scalar.type_code));
+
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      auto* child_builder = builder->child_builder(field_index).get();
+      if (field_index == scalar_field_index) {
+        if (scalar.is_valid) {
+          RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value));
+        } else {
+          RETURN_NOT_OK(child_builder->AppendNull());
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  Status AppendUnionScalar(const SparseUnionType& type, const Scalar& s,
+                           SparseUnionBuilder* builder) {
+    // For each scalar,
+    //  1. append the type code,
+    //  2. append the value to the corresponding child,
+    //  3. append null to the other children.
+    const auto& scalar = checked_cast<const SparseUnionScalar&>(s);
+    RETURN_NOT_OK(builder->Append(scalar.type_code));
+
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      auto* child_builder = builder->child_builder(field_index).get();
+      if (field_index == scalar.child_id) {
+        if (scalar.is_valid) {
+          RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value[field_index]));
+        } else {
+          RETURN_NOT_OK(child_builder->AppendNull());
+        }
+      } else {
+        RETURN_NOT_OK(child_builder->AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
   template <typename T>
   Status MakeUnionArray(const T& type) {
     using BuilderType = typename TypeTraits<T>::BuilderType;
-    constexpr bool is_dense = std::is_same<T, DenseUnionType>::value;
 
-    auto* builder = internal::checked_cast<BuilderType*>(builder_);
+    auto* builder = checked_cast<BuilderType*>(builder_);
     const auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
 
     RETURN_NOT_OK(builder->Reserve(count));
@@ -230,26 +269,7 @@ struct AppendScalarImpl {
 
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
-        // For each scalar,
-        //  1. append the type code,
-        //  2. append the value to the corresponding child,
-        //  3. if the union is sparse, append null to the other children.
-        const auto& scalar = internal::checked_cast<const UnionScalar&>(**s);
-        const auto scalar_field_index = type.child_ids()[scalar.type_code];
-        RETURN_NOT_OK(builder->Append(scalar.type_code));
-
-        for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
-          auto* child_builder = builder->child_builder(field_index).get();
-          if (field_index == scalar_field_index) {
-            if (scalar.is_valid) {
-              RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value));
-            } else {
-              RETURN_NOT_OK(child_builder->AppendNull());
-            }
-          } else if (!is_dense) {
-            RETURN_NOT_OK(child_builder->AppendNull());
-          }
-        }
+        RETURN_NOT_OK(AppendUnionScalar(type, **s, builder));
       }
     }
     return Status::OK();
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 3d36cb5f65e..306d861b09f 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -304,10 +304,12 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
       if (!validity || bit_util::GetBit(validity, array.offset + row)) {
         ARROW_RETURN_NOT_OK(Append());
         const int64_t slot_length = offsets[row + 1] - offsets[row];
+        // Add together the inner StructArray offset to the Map/List offset
+        int64_t key_value_offset = array.child_data[0].offset + offsets[row];
         ARROW_RETURN_NOT_OK(key_builder_->AppendArraySlice(
-            array.child_data[0].child_data[0], offsets[row], slot_length));
+            array.child_data[0].child_data[0], key_value_offset, slot_length));
         ARROW_RETURN_NOT_OK(item_builder_->AppendArraySlice(
-            array.child_data[0].child_data[1], offsets[row], slot_length));
+            array.child_data[0].child_data[1], key_value_offset, slot_length));
       } else {
         ARROW_RETURN_NOT_OK(AppendNull());
       }
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 37db8ccb775..c1a597fea62 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -38,6 +38,7 @@
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::CountSetBits;
 
 static inline void AdjustNonNullable(Type::type type_id, int64_t length,
@@ -147,7 +148,7 @@ void ArraySpan::SetMembers(const ArrayData& data) {
     if (buffer) {
       SetBuffer(i, buffer);
     } else {
-      ClearBuffer(i);
+      this->buffers[i] = {};
     }
   }
 
@@ -160,7 +161,7 @@ void ArraySpan::SetMembers(const ArrayData& data) {
 
   // Makes sure any other buffers are seen as null / non-existent
   for (int i = static_cast<int>(data.buffers.size()); i < 3; ++i) {
-    ClearBuffer(i);
+    this->buffers[i] = {};
   }
 
   if (this->type->id() == Type::DICTIONARY) {
@@ -174,27 +175,203 @@ void ArraySpan::SetMembers(const ArrayData& data) {
   }
 }
 
+namespace {
+
+template <typename offset_type>
+void SetOffsetsForScalar(ArraySpan* span, offset_type* buffer, int64_t value_size,
+                         int buffer_index = 1) {
+  buffer[0] = 0;
+  buffer[1] = static_cast<offset_type>(value_size);
+  span->buffers[buffer_index].data = reinterpret_cast<uint8_t*>(buffer);
+  span->buffers[buffer_index].size = 2 * sizeof(offset_type);
+}
+
+int GetNumBuffers(const DataType& type) {
+  switch (type.id()) {
+    case Type::NA:
+    case Type::STRUCT:
+    case Type::FIXED_SIZE_LIST:
+      return 1;
+    case Type::BINARY:
+    case Type::LARGE_BINARY:
+    case Type::STRING:
+    case Type::LARGE_STRING:
+    case Type::DENSE_UNION:
+      return 3;
+    case Type::EXTENSION:
+      // The number of buffers depends on the storage type
+      return GetNumBuffers(
+          *internal::checked_cast<const ExtensionType&>(type).storage_type());
+    default:
+      // Everything else has 2 buffers
+      return 2;
+  }
+}
+
+}  // namespace
+
+namespace internal {
+
+void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
+  memset(span->scratch_space, 0x00, sizeof(span->scratch_space));
+
+  span->type = type;
+  span->length = 0;
+  int num_buffers = GetNumBuffers(*type);
+  for (int i = 0; i < num_buffers; ++i) {
+    span->buffers[i].data = span->scratch_space;
+    span->buffers[i].size = 0;
+  }
+
+  for (int i = num_buffers; i < 3; ++i) {
+    span->buffers[i] = {};
+  }
+
+  // Fill children
+  span->child_data.resize(type->num_fields());
+  for (int i = 0; i < type->num_fields(); ++i) {
+    FillZeroLengthArray(type->field(i)->type().get(), &span->child_data[i]);
+  }
+}
+
+}  // namespace internal
+
 void ArraySpan::FillFromScalar(const Scalar& value) {
-  static const uint8_t kValidByte = 0x01;
-  static const uint8_t kNullByte = 0x00;
+  static uint8_t kTrueBit = 0x01;
+  static uint8_t kFalseBit = 0x00;
 
   this->type = value.type.get();
   this->length = 1;
 
-  // Populate null count and validity bitmap
+  Type::type type_id = value.type->id();
+
+  // Populate null count and validity bitmap (only for non-union/null types)
   this->null_count = value.is_valid ? 0 : 1;
-  this->buffers[0].data = const_cast<uint8_t*>(value.is_valid ? &kValidByte : &kNullByte);
-  this->buffers[0].size = 1;
+  if (!is_union(type_id) && type_id != Type::NA) {
+    this->buffers[0].data = value.is_valid ? &kTrueBit : &kFalseBit;
+    this->buffers[0].size = 1;
+  }
 
-  if (is_primitive(value.type->id())) {
-    const auto& scalar =
-        internal::checked_cast<const internal::PrimitiveScalarBase&>(value);
+  if (type_id == Type::BOOL) {
+    const auto& scalar = checked_cast<const BooleanScalar&>(value);
+    this->buffers[1].data = scalar.value ? &kTrueBit : &kFalseBit;
+    this->buffers[1].size = 1;
+  } else if (is_primitive(type_id) || is_decimal(type_id) ||
+             type_id == Type::DICTIONARY) {
+    const auto& scalar = checked_cast<const internal::PrimitiveScalarBase&>(value);
     const uint8_t* scalar_data = reinterpret_cast<const uint8_t*>(scalar.view().data());
     this->buffers[1].data = const_cast<uint8_t*>(scalar_data);
     this->buffers[1].size = scalar.type->byte_width();
+    if (type_id == Type::DICTIONARY) {
+      // Populate dictionary data
+      const auto& dict_scalar = checked_cast<const DictionaryScalar&>(value);
+      this->child_data.resize(1);
+      this->child_data[0].SetMembers(*dict_scalar.value.dictionary->data());
+    }
+  } else if (is_base_binary_like(type_id)) {
+    const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);
+    this->buffers[1].data = this->scratch_space;
+    const uint8_t* data_buffer = nullptr;
+    int64_t data_size = 0;
+    if (scalar.is_valid) {
+      data_buffer = scalar.value->data();
+      data_size = scalar.value->size();
+    }
+    if (is_binary_like(type_id)) {
+      SetOffsetsForScalar<int32_t>(this, reinterpret_cast<int32_t*>(this->scratch_space),
+                                   data_size);
+    } else {
+      // is_large_binary_like
+      SetOffsetsForScalar<int64_t>(this, reinterpret_cast<int64_t*>(this->scratch_space),
+                                   data_size);
+    }
+    this->buffers[2].data = const_cast<uint8_t*>(data_buffer);
+    this->buffers[2].size = data_size;
+  } else if (type_id == Type::FIXED_SIZE_BINARY) {
+    const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);
+    this->buffers[1].data = const_cast<uint8_t*>(scalar.value->data());
+    this->buffers[1].size = scalar.value->size();
+  } else if (is_list_like(type_id)) {
+    const auto& scalar = checked_cast<const BaseListScalar&>(value);
+
+    int64_t value_length = 0;
+    this->child_data.resize(1);
+    if (scalar.value != nullptr) {
+      // When the scalar is null, scalar.value can also be null
+      this->child_data[0].SetMembers(*scalar.value->data());
+      value_length = scalar.value->length();
+    } else {
+      // Even when the value is null, we still must populate the
+      // child_data to yield a valid array. Tedious
+      internal::FillZeroLengthArray(this->type->field(0)->type().get(),
+                                    &this->child_data[0]);
+    }
+
+    if (type_id == Type::LIST || type_id == Type::MAP) {
+      SetOffsetsForScalar<int32_t>(this, reinterpret_cast<int32_t*>(this->scratch_space),
+                                   value_length);
+    } else if (type_id == Type::LARGE_LIST) {
+      SetOffsetsForScalar<int64_t>(this, reinterpret_cast<int64_t*>(this->scratch_space),
+                                   value_length);
+    } else {
+      // FIXED_SIZE_LIST: does not have a second buffer
+      this->buffers[1] = {};
+    }
+  } else if (type_id == Type::STRUCT) {
+    const auto& scalar = checked_cast<const StructScalar&>(value);
+    this->child_data.resize(this->type->num_fields());
+    DCHECK_EQ(this->type->num_fields(), static_cast<int>(scalar.value.size()));
+    for (size_t i = 0; i < scalar.value.size(); ++i) {
+      this->child_data[i].FillFromScalar(*scalar.value[i]);
+    }
+  } else if (is_union(type_id)) {
+    // First buffer is kept null since unions have no validity vector
+    this->buffers[0] = {};
+
+    this->buffers[1].data = this->scratch_space;
+    this->buffers[1].size = 1;
+    int8_t* type_codes = reinterpret_cast<int8_t*>(this->scratch_space);
+    type_codes[0] = checked_cast<const UnionScalar&>(value).type_code;
+
+    this->child_data.resize(this->type->num_fields());
+    if (type_id == Type::DENSE_UNION) {
+      const auto& scalar = checked_cast<const DenseUnionScalar&>(value);
+      // Has offset; start 4 bytes in so it's aligned to a 32-bit boundaries
+      SetOffsetsForScalar<int32_t>(this,
+                                   reinterpret_cast<int32_t*>(this->scratch_space) + 1, 1,
+                                   /*buffer_index=*/2);
+      // We can't "see" the other arrays in the union, but we put the "active"
+      // union array in the right place and fill zero-length arrays for the
+      // others
+      const std::vector<int>& child_ids =
+          checked_cast<const UnionType*>(this->type)->child_ids();
+      DCHECK_GE(scalar.type_code, 0);
+      DCHECK_LT(scalar.type_code, static_cast<int>(child_ids.size()));
+      for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
+        if (i == child_ids[scalar.type_code]) {
+          this->child_data[i].FillFromScalar(*scalar.value);
+        } else {
+          internal::FillZeroLengthArray(this->type->field(i)->type().get(),
+                                        &this->child_data[i]);
+        }
+      }
+    } else {
+      const auto& scalar = checked_cast<const SparseUnionScalar&>(value);
+      // Sparse union scalars have a full complement of child values even
+      // though only one of them is relevant, so we just fill them in here
+      for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
+        this->child_data[i].FillFromScalar(*scalar.value[i]);
+      }
+    }
+  } else if (type_id == Type::EXTENSION) {
+    // Pass through storage
+    const auto& scalar = checked_cast<const ExtensionScalar&>(value);
+    FillFromScalar(*scalar.value);
+
+    // Restore the extension type
+    this->type = value.type.get();
   } else {
-    // TODO(wesm): implement for other types
-    DCHECK(false) << "need to implement for other types";
+    DCHECK_EQ(Type::NA, type_id) << "should be unreachable: " << *value.type;
   }
 }
 
@@ -212,40 +389,14 @@ int64_t ArraySpan::GetNullCount() const {
   return precomputed;
 }
 
-int GetNumBuffers(const DataType& type) {
-  switch (type.id()) {
-    case Type::NA:
-    case Type::STRUCT:
-    case Type::FIXED_SIZE_LIST:
-      return 1;
-    case Type::BINARY:
-    case Type::LARGE_BINARY:
-    case Type::STRING:
-    case Type::LARGE_STRING:
-    case Type::DENSE_UNION:
-      return 3;
-    case Type::EXTENSION:
-      // The number of buffers depends on the storage type
-      return GetNumBuffers(
-          *internal::checked_cast<const ExtensionType&>(type).storage_type());
-    default:
-      // Everything else has 2 buffers
-      return 2;
-  }
-}
-
 int ArraySpan::num_buffers() const { return GetNumBuffers(*this->type); }
 
 std::shared_ptr<ArrayData> ArraySpan::ToArrayData() const {
-  auto result = std::make_shared<ArrayData>(this->type->Copy(), this->length,
+  auto result = std::make_shared<ArrayData>(this->type->GetSharedPtr(), this->length,
                                             this->null_count, this->offset);
 
   for (int i = 0; i < this->num_buffers(); ++i) {
-    if (this->buffers[i].owner) {
-      result->buffers.emplace_back(this->GetBuffer(i));
-    } else {
-      result->buffers.push_back(nullptr);
-    }
+    result->buffers.emplace_back(this->GetBuffer(i));
   }
 
   if (this->type->id() == Type::NA) {
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index df547aedfaf..fddc60293d8 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -266,6 +266,11 @@ struct ARROW_EXPORT ArraySpan {
   int64_t offset = 0;
   BufferSpan buffers[3];
 
+  // 16 bytes of scratch space to enable this ArraySpan to be a view onto
+  // scalar values including binary scalars (where we need to create a buffer
+  // that looks like two 32-bit or 64-bit offsets)
+  alignas(64) uint8_t scratch_space[16];
+
   ArraySpan() = default;
 
   explicit ArraySpan(const DataType* type, int64_t length) : type(type), length(length) {}
@@ -273,9 +278,7 @@ struct ARROW_EXPORT ArraySpan {
   ArraySpan(const ArrayData& data) {  // NOLINT implicit conversion
     SetMembers(data);
   }
-  ArraySpan(const Scalar& data) {  // NOLINT implicit converstion
-    FillFromScalar(data);
-  }
+  explicit ArraySpan(const Scalar& data) { FillFromScalar(data); }
 
   /// If dictionary-encoded, put dictionary in the first entry
   std::vector<ArraySpan> child_data;
@@ -292,12 +295,6 @@ struct ARROW_EXPORT ArraySpan {
     this->buffers[index].owner = &buffer;
   }
 
-  void ClearBuffer(int index) {
-    this->buffers[index].data = NULLPTR;
-    this->buffers[index].size = 0;
-    this->buffers[index].owner = NULLPTR;
-  }
-
   const ArraySpan& dictionary() const { return child_data[0]; }
 
   /// \brief Return the number of buffers (out of 3) that are used to
@@ -343,10 +340,14 @@ struct ARROW_EXPORT ArraySpan {
   std::shared_ptr<Array> ToArray() const;
 
   std::shared_ptr<Buffer> GetBuffer(int index) const {
-    if (this->buffers[index].owner == NULLPTR) {
-      return NULLPTR;
+    const BufferSpan& buf = this->buffers[index];
+    if (buf.owner) {
+      return *buf.owner;
+    } else if (buf.data != NULLPTR) {
+      // Buffer points to some memory without an owning buffer
+      return std::make_shared<Buffer>(buf.data, buf.size);
     } else {
-      return *this->buffers[index].owner;
+      return NULLPTR;
     }
   }
 
@@ -372,6 +373,8 @@ struct ARROW_EXPORT ArraySpan {
 
 namespace internal {
 
+void FillZeroLengthArray(const DataType* type, ArraySpan* span);
+
 /// Construct a zero-copy view of this ArrayData with the given type.
 ///
 /// This method checks if the types are layout-compatible.
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index e5b4ab39493..c0cdcab730c 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -664,22 +664,20 @@ class RepeatedArrayFactory {
   }
 
   Status Visit(const SparseUnionType& type) {
-    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
-    const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
+    const auto& union_scalar = checked_cast<const SparseUnionScalar&>(scalar_);
     const auto scalar_type_code = union_scalar.type_code;
-    const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
 
     // Create child arrays: most of them are all-null, except for the child array
     // for the given type code (if the scalar is valid).
     ArrayVector fields;
     for (int i = 0; i < type.num_fields(); ++i) {
       fields.emplace_back();
-      if (i == scalar_child_id && scalar_.is_valid) {
-        ARROW_ASSIGN_OR_RAISE(fields.back(),
-                              MakeArrayFromScalar(*union_scalar.value, length_, pool_));
-      } else {
+      if (i == union_scalar.child_id && scalar_.is_valid) {
         ARROW_ASSIGN_OR_RAISE(
-            fields.back(), MakeArrayOfNull(union_type.field(i)->type(), length_, pool_));
+            fields.back(), MakeArrayFromScalar(*union_scalar.value[i], length_, pool_));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(fields.back(),
+                              MakeArrayOfNull(type.field(i)->type(), length_, pool_));
       }
     }
 
@@ -691,7 +689,7 @@ class RepeatedArrayFactory {
   }
 
   Status Visit(const DenseUnionType& type) {
-    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
+    const auto& union_scalar = checked_cast<const DenseUnionScalar&>(scalar_);
     const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
     const auto scalar_type_code = union_scalar.type_code;
     const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 8af319ed9ea..c5406ee583f 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -796,12 +796,19 @@ class ScalarEqualsVisitor {
     return Status::OK();
   }
 
-  Status Visit(const UnionScalar& left) {
-    const auto& right = checked_cast<const UnionScalar&>(right_);
+  Status Visit(const DenseUnionScalar& left) {
+    const auto& right = checked_cast<const DenseUnionScalar&>(right_);
     result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_);
     return Status::OK();
   }
 
+  Status Visit(const SparseUnionScalar& left) {
+    const auto& right = checked_cast<const SparseUnionScalar&>(right_);
+    result_ = ScalarEquals(*left.value[left.child_id], *right.value[right.child_id],
+                           options_, floating_approximate_);
+    return Status::OK();
+  }
+
   Status Visit(const DictionaryScalar& left) {
     const auto& right = checked_cast<const DictionaryScalar&>(right_);
     result_ = ScalarEquals(*left.value.index, *right.value.index, options_,
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index 4ebdecf5e78..ff1d6619905 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -347,11 +347,11 @@ Result<Datum> Filter(const Datum& values, const Datum& filter,
   return CallFunction("filter", {values, filter}, &options, ctx);
 }
 
-Result<Datum> Take(const Datum& values, const Datum& filter, const TakeOptions& options,
+Result<Datum> Take(const Datum& values, const Datum& indices, const TakeOptions& options,
                    ExecContext* ctx) {
   // Invoke metafunction which deals with Datum kinds other than just Array,
   // ChunkedArray.
-  return CallFunction("take", {values, filter}, &options, ctx);
+  return CallFunction("take", {values, indices}, &options, ctx);
 }
 
 Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices,
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index bd49041b4f3..52aecf3e45a 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -66,25 +66,6 @@ void InitCastTable() {
 
 void EnsureInitCastTable() { std::call_once(cast_table_initialized, InitCastTable); }
 
-// Private version of GetCastFunction with better error reporting
-// if the input type is known.
-Result<std::shared_ptr<CastFunction>> GetCastFunctionInternal(
-    const std::shared_ptr<DataType>& to_type, const DataType* from_type = nullptr) {
-  internal::EnsureInitCastTable();
-  auto it = internal::g_cast_table.find(static_cast<int>(to_type->id()));
-  if (it == internal::g_cast_table.end()) {
-    if (from_type != nullptr) {
-      return Status::NotImplemented("Unsupported cast from ", *from_type, " to ",
-                                    *to_type,
-                                    " (no available cast function for target type)");
-    } else {
-      return Status::NotImplemented("Unsupported cast to ", *to_type,
-                                    " (no available cast function for target type)");
-    }
-  }
-  return it->second;
-}
-
 const FunctionDoc cast_doc{"Cast values to another data type",
                            ("Behavior when values wouldn't fit in the target type\n"
                             "can be controlled through CastOptions."),
@@ -116,10 +97,13 @@ class CastMetaFunction : public MetaFunction {
     if (args[0].type()->Equals(*cast_options->to_type)) {
       return args[0];
     }
-    ARROW_ASSIGN_OR_RAISE(
-        std::shared_ptr<CastFunction> cast_func,
-        GetCastFunctionInternal(cast_options->to_type, args[0].type().get()));
-    return cast_func->Execute(args, options, ctx);
+    Result<std::shared_ptr<CastFunction>> result =
+        GetCastFunction(*cast_options->to_type);
+    if (!result.ok()) {
+      Status s = result.status();
+      return s.WithMessage(s.message(), " from ", *args[0].type());
+    }
+    return (*result)->Execute(args, options, ctx);
   }
 };
 
@@ -139,18 +123,6 @@ void RegisterScalarCast(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::make_shared<CastMetaFunction>()));
   DCHECK_OK(registry->AddFunctionOptionsType(kCastOptionsType));
 }
-}  // namespace internal
-
-CastOptions::CastOptions(bool safe)
-    : FunctionOptions(internal::kCastOptionsType),
-      allow_int_overflow(!safe),
-      allow_time_truncate(!safe),
-      allow_time_overflow(!safe),
-      allow_decimal_truncate(!safe),
-      allow_float_truncate(!safe),
-      allow_invalid_utf8(!safe) {}
-
-constexpr char CastOptions::kTypeName[];
 
 CastFunction::CastFunction(std::string name, Type::type out_type_id)
     : ScalarFunction(std::move(name), Arity::Unary(), FunctionDoc::Empty()),
@@ -177,18 +149,18 @@ Status CastFunction::AddKernel(Type::type in_type_id, std::vector<InputType> in_
 }
 
 Result<const Kernel*> CastFunction::DispatchExact(
-    const std::vector<ValueDescr>& values) const {
-  RETURN_NOT_OK(CheckArity(values));
+    const std::vector<TypeHolder>& types) const {
+  RETURN_NOT_OK(CheckArity(types.size()));
 
   std::vector<const ScalarKernel*> candidate_kernels;
   for (const auto& kernel : kernels_) {
-    if (kernel.signature->MatchesInputs(values)) {
+    if (kernel.signature->MatchesInputs(types)) {
       candidate_kernels.push_back(&kernel);
     }
   }
 
   if (candidate_kernels.size() == 0) {
-    return Status::NotImplemented("Unsupported cast from ", values[0].type->ToString(),
+    return Status::NotImplemented("Unsupported cast from ", types[0].type->ToString(),
                                   " to ", ToTypeName(out_type_id_), " using function ",
                                   this->name());
   }
@@ -213,28 +185,45 @@ Result<const Kernel*> CastFunction::DispatchExact(
   return candidate_kernels[0];
 }
 
+Result<std::shared_ptr<CastFunction>> GetCastFunction(const DataType& to_type) {
+  internal::EnsureInitCastTable();
+  auto it = internal::g_cast_table.find(static_cast<int>(to_type.id()));
+  if (it == internal::g_cast_table.end()) {
+    return Status::NotImplemented("Unsupported cast to ", to_type);
+  }
+  return it->second;
+}
+
+}  // namespace internal
+
+CastOptions::CastOptions(bool safe)
+    : FunctionOptions(internal::kCastOptionsType),
+      allow_int_overflow(!safe),
+      allow_time_truncate(!safe),
+      allow_time_overflow(!safe),
+      allow_decimal_truncate(!safe),
+      allow_float_truncate(!safe),
+      allow_invalid_utf8(!safe) {}
+
+constexpr char CastOptions::kTypeName[];
+
 Result<Datum> Cast(const Datum& value, const CastOptions& options, ExecContext* ctx) {
   return CallFunction("cast", {value}, &options, ctx);
 }
 
-Result<Datum> Cast(const Datum& value, std::shared_ptr<DataType> to_type,
+Result<Datum> Cast(const Datum& value, const TypeHolder& to_type,
                    const CastOptions& options, ExecContext* ctx) {
   CastOptions options_with_to_type = options;
   options_with_to_type.to_type = to_type;
   return Cast(value, options_with_to_type, ctx);
 }
 
-Result<std::shared_ptr<Array>> Cast(const Array& value, std::shared_ptr<DataType> to_type,
+Result<std::shared_ptr<Array>> Cast(const Array& value, const TypeHolder& to_type,
                                     const CastOptions& options, ExecContext* ctx) {
   ARROW_ASSIGN_OR_RAISE(Datum result, Cast(Datum(value), to_type, options, ctx));
   return result.make_array();
 }
 
-Result<std::shared_ptr<CastFunction>> GetCastFunction(
-    const std::shared_ptr<DataType>& to_type) {
-  return internal::GetCastFunctionInternal(to_type);
-}
-
 bool CanCast(const DataType& from_type, const DataType& to_type) {
   internal::EnsureInitCastTable();
   auto it = internal::g_cast_table.find(static_cast<int>(to_type.id()));
@@ -242,7 +231,7 @@ bool CanCast(const DataType& from_type, const DataType& to_type) {
     return false;
   }
 
-  const CastFunction* function = it->second.get();
+  const internal::CastFunction* function = it->second.get();
   DCHECK_EQ(function->out_type_id(), to_type.id());
 
   for (auto from_id : function->in_type_ids()) {
@@ -253,21 +242,5 @@ bool CanCast(const DataType& from_type, const DataType& to_type) {
   return false;
 }
 
-Result<std::vector<Datum>> Cast(std::vector<Datum> datums, std::vector<ValueDescr> descrs,
-                                ExecContext* ctx) {
-  for (size_t i = 0; i != datums.size(); ++i) {
-    if (descrs[i] != datums[i].descr()) {
-      if (descrs[i].shape != datums[i].shape()) {
-        return Status::NotImplemented("casting between Datum shapes");
-      }
-
-      ARROW_ASSIGN_OR_RAISE(datums[i],
-                            Cast(datums[i], CastOptions::Safe(descrs[i].type), ctx));
-    }
-  }
-
-  return datums;
-}
-
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index e9c3cf55da9..7432933a124 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -22,8 +22,7 @@
 #include <vector>
 
 #include "arrow/compute/function.h"
-#include "arrow/compute/kernel.h"
-#include "arrow/datum.h"
+#include "arrow/compute/type_fwd.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -46,13 +45,13 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
   explicit CastOptions(bool safe = true);
 
   static constexpr char const kTypeName[] = "CastOptions";
-  static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) {
+  static CastOptions Safe(TypeHolder to_type = {}) {
     CastOptions safe(true);
     safe.to_type = std::move(to_type);
     return safe;
   }
 
-  static CastOptions Unsafe(std::shared_ptr<DataType> to_type = NULLPTR) {
+  static CastOptions Unsafe(TypeHolder to_type = {}) {
     CastOptions unsafe(false);
     unsafe.to_type = std::move(to_type);
     return unsafe;
@@ -60,7 +59,7 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
 
   // Type being casted to. May be passed separate to eager function
   // compute::Cast
-  std::shared_ptr<DataType> to_type;
+  TypeHolder to_type;
 
   bool allow_int_overflow;
   bool allow_time_truncate;
@@ -74,36 +73,6 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
 
 /// @}
 
-// Cast functions are _not_ registered in the FunctionRegistry, though they use
-// the same execution machinery
-class CastFunction : public ScalarFunction {
- public:
-  CastFunction(std::string name, Type::type out_type_id);
-
-  Type::type out_type_id() const { return out_type_id_; }
-  const std::vector<Type::type>& in_type_ids() const { return in_type_ids_; }
-
-  Status AddKernel(Type::type in_type_id, std::vector<InputType> in_types,
-                   OutputType out_type, ArrayKernelExec exec,
-                   NullHandling::type = NullHandling::INTERSECTION,
-                   MemAllocation::type = MemAllocation::PREALLOCATE);
-
-  // Note, this function toggles off memory allocation and sets the init
-  // function to CastInit
-  Status AddKernel(Type::type in_type_id, ScalarKernel kernel);
-
-  Result<const Kernel*> DispatchExact(
-      const std::vector<ValueDescr>& values) const override;
-
- private:
-  std::vector<Type::type> in_type_ids_;
-  const Type::type out_type_id_;
-};
-
-ARROW_EXPORT
-Result<std::shared_ptr<CastFunction>> GetCastFunction(
-    const std::shared_ptr<DataType>& to_type);
-
 /// \brief Return true if a cast function is defined
 ARROW_EXPORT
 bool CanCast(const DataType& from_type, const DataType& to_type);
@@ -121,7 +90,7 @@ bool CanCast(const DataType& from_type, const DataType& to_type);
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<std::shared_ptr<Array>> Cast(const Array& value, std::shared_ptr<DataType> to_type,
+Result<std::shared_ptr<Array>> Cast(const Array& value, const TypeHolder& to_type,
                                     const CastOptions& options = CastOptions::Safe(),
                                     ExecContext* ctx = NULLPTR);
 
@@ -147,21 +116,9 @@ Result<Datum> Cast(const Datum& value, const CastOptions& options,
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Cast(const Datum& value, std::shared_ptr<DataType> to_type,
+Result<Datum> Cast(const Datum& value, const TypeHolder& to_type,
                    const CastOptions& options = CastOptions::Safe(),
                    ExecContext* ctx = NULLPTR);
 
-/// \brief Cast several values simultaneously. Safe cast options are used.
-/// \param[in] values datums to cast
-/// \param[in] descrs ValueDescrs to cast to
-/// \param[in] ctx the function execution context, optional
-/// \return the resulting datums
-///
-/// \since 4.0.0
-/// \note API not yet finalized
-ARROW_EXPORT
-Result<std::vector<Datum>> Cast(std::vector<Datum> values, std::vector<ValueDescr> descrs,
-                                ExecContext* ctx = NULLPTR);
-
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/cast_internal.h b/cpp/src/arrow/compute/cast_internal.h
index 0105d08a573..f00a6cdbf4d 100644
--- a/cpp/src/arrow/compute/cast_internal.h
+++ b/cpp/src/arrow/compute/cast_internal.h
@@ -30,6 +30,32 @@ namespace internal {
 
 using CastState = OptionsWrapper<CastOptions>;
 
+// Cast functions are _not_ registered in the FunctionRegistry, though they use
+// the same execution machinery
+class CastFunction : public ScalarFunction {
+ public:
+  CastFunction(std::string name, Type::type out_type_id);
+
+  Type::type out_type_id() const { return out_type_id_; }
+  const std::vector<Type::type>& in_type_ids() const { return in_type_ids_; }
+
+  Status AddKernel(Type::type in_type_id, std::vector<InputType> in_types,
+                   OutputType out_type, ArrayKernelExec exec,
+                   NullHandling::type = NullHandling::INTERSECTION,
+                   MemAllocation::type = MemAllocation::PREALLOCATE);
+
+  // Note, this function toggles off memory allocation and sets the init
+  // function to CastInit
+  Status AddKernel(Type::type in_type_id, ScalarKernel kernel);
+
+  Result<const Kernel*> DispatchExact(
+      const std::vector<TypeHolder>& types) const override;
+
+ private:
+  std::vector<Type::type> in_type_ids_;
+  const Type::type out_type_id_;
+};
+
 // See kernels/scalar_cast_*.cc for these
 std::vector<std::shared_ptr<CastFunction>> GetBooleanCasts();
 std::vector<std::shared_ptr<CastFunction>> GetNumericCasts();
@@ -38,6 +64,9 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts();
 std::vector<std::shared_ptr<CastFunction>> GetNestedCasts();
 std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts();
 
+ARROW_EXPORT
+Result<std::shared_ptr<CastFunction>> GetCastFunction(const DataType& to_type);
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index a612a83e7a8..e5e256ea6dd 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -219,16 +219,6 @@ void ComputeDataPreallocate(const DataType& type,
 
 namespace detail {
 
-Status CheckAllValues(const std::vector<Datum>& values) {
-  for (const auto& value : values) {
-    if (!value.is_value()) {
-      return Status::Invalid("Tried executing function with non-value type: ",
-                             value.ToString());
-    }
-  }
-  return Status::OK();
-}
-
 ExecBatchIterator::ExecBatchIterator(std::vector<Datum> args, int64_t length,
                                      int64_t max_chunksize)
     : args_(std::move(args)),
@@ -249,9 +239,7 @@ Result<std::unique_ptr<ExecBatchIterator>> ExecBatchIterator::Make(
     }
   }
 
-  // If the arguments are all scalars, then the length is 1
-  int64_t length = 1;
-
+  int64_t length = -1;
   bool length_set = false;
   for (auto& arg : args) {
     if (arg.is_scalar()) {
@@ -267,6 +255,11 @@ Result<std::unique_ptr<ExecBatchIterator>> ExecBatchIterator::Make(
     }
   }
 
+  if (!length_set) {
+    // All scalar case, to be removed soon
+    length = 1;
+  }
+
   max_chunksize = std::min(length, max_chunksize);
 
   return std::unique_ptr<ExecBatchIterator>(
@@ -328,8 +321,34 @@ bool ExecBatchIterator::Next(ExecBatch* batch) {
 // ----------------------------------------------------------------------
 // ExecSpanIterator; to eventually replace ExecBatchIterator
 
-Status ExecSpanIterator::Init(const ExecBatch& batch, ValueDescr::Shape output_shape,
-                              int64_t max_chunksize) {
+namespace {
+
+void PromoteExecSpanScalars(ExecSpan* span) {
+  // In the "all scalar" case, we "promote" the scalars to ArraySpans of
+  // length 1, since the kernel implementations do not handle the all
+  // scalar case
+  for (int i = 0; i < span->num_values(); ++i) {
+    ExecValue* value = &span->values[i];
+    if (value->is_scalar()) {
+      value->array.FillFromScalar(*value->scalar);
+      value->scalar = nullptr;
+    }
+  }
+}
+
+bool CheckIfAllScalar(const ExecBatch& batch) {
+  for (const Datum& value : batch.values) {
+    if (!value.is_scalar()) {
+      DCHECK(value.is_arraylike());
+      return false;
+    }
+  }
+  return batch.num_values() > 0;
+}
+
+}  // namespace
+
+Status ExecSpanIterator::Init(const ExecBatch& batch, int64_t max_chunksize) {
   if (batch.num_values() > 0) {
     // Validate arguments
     bool all_args_same_length = false;
@@ -343,8 +362,9 @@ Status ExecSpanIterator::Init(const ExecBatch& batch, ValueDescr::Shape output_s
   }
   args_ = &batch.values;
   initialized_ = have_chunked_arrays_ = false;
+  have_all_scalars_ = CheckIfAllScalar(batch);
   position_ = 0;
-  length_ = output_shape == ValueDescr::SCALAR ? 1 : batch.length;
+  length_ = batch.length;
   chunk_indexes_.clear();
   chunk_indexes_.resize(args_->size(), 0);
   value_positions_.clear();
@@ -358,8 +378,7 @@ Status ExecSpanIterator::Init(const ExecBatch& batch, ValueDescr::Shape output_s
 int64_t ExecSpanIterator::GetNextChunkSpan(int64_t iteration_size, ExecSpan* span) {
   for (size_t i = 0; i < args_->size() && iteration_size > 0; ++i) {
     // If the argument is not a chunked array, it's either a Scalar or Array,
-    // in which case it doesn't influence the size of this span. Note that if
-    // the args are all scalars the span length is 1
+    // in which case it doesn't influence the size of this span
     if (!args_->at(i).is_chunked_array()) {
       continue;
     }
@@ -386,12 +405,6 @@ int64_t ExecSpanIterator::GetNextChunkSpan(int64_t iteration_size, ExecSpan* spa
 }
 
 bool ExecSpanIterator::Next(ExecSpan* span) {
-  if (position_ == length_) {
-    // This also protects from degenerate cases like ChunkedArrays
-    // without any chunks
-    return false;
-  }
-
   if (!initialized_) {
     span->length = 0;
 
@@ -402,25 +415,37 @@ bool ExecSpanIterator::Next(ExecSpan* span) {
     // iteration
     span->values.resize(args_->size());
     for (size_t i = 0; i < args_->size(); ++i) {
-      if (args_->at(i).is_scalar()) {
-        span->values[i].SetScalar(args_->at(i).scalar().get());
-      } else if (args_->at(i).is_array()) {
-        const ArrayData& arr = *args_->at(i).array();
+      const Datum& arg = (*args_)[i];
+      if (arg.is_scalar()) {
+        span->values[i].SetScalar(arg.scalar().get());
+      } else if (arg.is_array()) {
+        const ArrayData& arr = *arg.array();
         span->values[i].SetArray(arr);
         value_offsets_[i] = arr.offset;
       } else {
         // Populate members from the first chunk
-        const Array* first_chunk = args_->at(i).chunked_array()->chunk(0).get();
-        const ArrayData& arr = *first_chunk->data();
-        span->values[i].SetArray(arr);
-        value_offsets_[i] = arr.offset;
+        const ChunkedArray& carr = *arg.chunked_array();
+        if (carr.num_chunks() > 0) {
+          const ArrayData& arr = *carr.chunk(0)->data();
+          span->values[i].SetArray(arr);
+          value_offsets_[i] = arr.offset;
+        } else {
+          // Fill as zero-length array
+          ::arrow::internal::FillZeroLengthArray(carr.type().get(),
+                                                 &span->values[i].array);
+          span->values[i].scalar = nullptr;
+        }
         have_chunked_arrays_ = true;
       }
     }
-    initialized_ = true;
-  }
 
-  if (position_ == length_) {
+    if (have_all_scalars_) {
+      PromoteExecSpanScalars(span);
+    }
+
+    initialized_ = true;
+  } else if (position_ == length_) {
+    // We've emitted at least one span and we're at the end so we are done
     return false;
   }
 
@@ -441,6 +466,7 @@ bool ExecSpanIterator::Next(ExecSpan* span) {
       value_positions_[i] += iteration_size;
     }
   }
+
   position_ += iteration_size;
   DCHECK_LE(position_, length_);
   return true;
@@ -662,7 +688,7 @@ class NullPropagator {
 };
 
 std::shared_ptr<ChunkedArray> ToChunkedArray(const std::vector<Datum>& values,
-                                             const std::shared_ptr<DataType>& type) {
+                                             const TypeHolder& type) {
   std::vector<std::shared_ptr<Array>> arrays;
   arrays.reserve(values.size());
   for (const Datum& val : values) {
@@ -672,7 +698,7 @@ std::shared_ptr<ChunkedArray> ToChunkedArray(const std::vector<Datum>& values,
     }
     arrays.emplace_back(val.make_array());
   }
-  return std::make_shared<ChunkedArray>(std::move(arrays), type);
+  return std::make_shared<ChunkedArray>(std::move(arrays), type.GetSharedPtr());
 }
 
 bool HaveChunkedArray(const std::vector<Datum>& values) {
@@ -691,9 +717,9 @@ class KernelExecutorImpl : public KernelExecutor {
     kernel_ctx_ = kernel_ctx;
     kernel_ = static_cast<const KernelType*>(args.kernel);
 
-    // Resolve the output descriptor for this kernel
+    // Resolve the output type for this kernel
     ARROW_ASSIGN_OR_RAISE(
-        output_descr_, kernel_->signature->out_type().Resolve(kernel_ctx_, args.inputs));
+        output_type_, kernel_->signature->out_type().Resolve(kernel_ctx_, args.inputs));
 
     return Status::OK();
   }
@@ -703,7 +729,7 @@ class KernelExecutorImpl : public KernelExecutor {
   // Kernel::mem_allocation is not MemAllocation::PREALLOCATE, then no
   // data buffers will be set
   Result<std::shared_ptr<ArrayData>> PrepareOutput(int64_t length) {
-    auto out = std::make_shared<ArrayData>(output_descr_.type, length);
+    auto out = std::make_shared<ArrayData>(output_type_.GetSharedPtr(), length);
     out->buffers.resize(output_num_buffers_);
 
     if (validity_preallocated_) {
@@ -726,10 +752,10 @@ class KernelExecutorImpl : public KernelExecutor {
 
   Status CheckResultType(const Datum& out, const char* function_name) override {
     const auto& type = out.type();
-    if (type != nullptr && !type->Equals(output_descr_.type)) {
+    if (type != nullptr && !type->Equals(*output_type_.type)) {
       return Status::TypeError(
           "kernel type result mismatch for function '", function_name, "': declared as ",
-          output_descr_.type->ToString(), ", actual is ", type->ToString());
+          output_type_.type->ToString(), ", actual is ", type->ToString());
     }
     return Status::OK();
   }
@@ -741,7 +767,7 @@ class KernelExecutorImpl : public KernelExecutor {
 
   KernelContext* kernel_ctx_;
   const KernelType* kernel_;
-  ValueDescr output_descr_;
+  TypeHolder output_type_;
 
   int output_num_buffers_;
 
@@ -757,18 +783,23 @@ class KernelExecutorImpl : public KernelExecutor {
 class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
  public:
   Status Execute(const ExecBatch& batch, ExecListener* listener) override {
-    RETURN_NOT_OK(span_iterator_.Init(batch, output_descr_.shape,
-                                      exec_context()->exec_chunksize()));
+    RETURN_NOT_OK(span_iterator_.Init(batch, exec_context()->exec_chunksize()));
 
-    // TODO(wesm): remove if with ARROW-16757
-    if (output_descr_.shape != ValueDescr::SCALAR) {
-      // If the executor is configured to produce a single large Array output for
-      // kernels supporting preallocation, then we do so up front and then
-      // iterate over slices of that large array. Otherwise, we preallocate prior
-      // to processing each span emitted from the ExecSpanIterator
-      RETURN_NOT_OK(SetupPreallocation(span_iterator_.length(), batch.values));
+    if (batch.length == 0) {
+      // For zero-length batches, we do nothing except return a zero-length
+      // array of the correct output type
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> result,
+                            MakeArrayOfNull(output_type_.GetSharedPtr(), /*length=*/0,
+                                            exec_context()->memory_pool()));
+      return EmitResult(result->data(), listener);
     }
 
+    // If the executor is configured to produce a single large Array output for
+    // kernels supporting preallocation, then we do so up front and then
+    // iterate over slices of that large array. Otherwise, we preallocate prior
+    // to processing each span emitted from the ExecSpanIterator
+    RETURN_NOT_OK(SetupPreallocation(span_iterator_.length(), batch.values));
+
     // ARROW-16756: Here we have to accommodate the distinct cases
     //
     // * Fully-preallocated contiguous output
@@ -784,30 +815,28 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
 
   Datum WrapResults(const std::vector<Datum>& inputs,
                     const std::vector<Datum>& outputs) override {
-    if (output_descr_.shape == ValueDescr::SCALAR) {
-      // TODO(wesm): to remove, see ARROW-16757
-      DCHECK_EQ(outputs.size(), 1);
-      // Return as SCALAR
-      return outputs[0];
+    // If execution yielded multiple chunks (because large arrays were split
+    // based on the ExecContext parameters, then the result is a ChunkedArray
+    if (HaveChunkedArray(inputs) || outputs.size() > 1) {
+      return ToChunkedArray(outputs, output_type_);
     } else {
-      // If execution yielded multiple chunks (because large arrays were split
-      // based on the ExecContext parameters, then the result is a ChunkedArray
-      if (HaveChunkedArray(inputs) || outputs.size() > 1) {
-        return ToChunkedArray(outputs, output_descr_.type);
-      } else if (outputs.size() == 1) {
-        // Outputs have just one element
-        return outputs[0];
-      } else {
-        // XXX: In the case where no outputs are omitted, is returning a 0-length
-        // array always the correct move?
-        return MakeArrayOfNull(output_descr_.type, /*length=*/0,
-                               exec_context()->memory_pool())
-            .ValueOrDie();
-      }
+      // Outputs have just one element
+      return outputs[0];
     }
   }
 
  protected:
+  Status EmitResult(std::shared_ptr<ArrayData> out, ExecListener* listener) {
+    if (span_iterator_.have_all_scalars()) {
+      // ARROW-16757 We boxed scalar inputs as ArraySpan, so now we have to
+      // unbox the output as a scalar
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar, MakeArray(out)->GetScalar(0));
+      return listener->OnResult(std::move(scalar));
+    } else {
+      return listener->OnResult(std::move(out));
+    }
+  }
+
   Status ExecuteSpans(ExecListener* listener) {
     // We put the preallocation in an ArraySpan to be passed to the
     // kernel which is expecting to receive that. More
@@ -817,6 +846,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     ExecSpan input;
     ExecResult output;
     ArraySpan* output_span = output.array_span();
+
     if (preallocate_contiguous_) {
       // Make one big output allocation
       ARROW_ASSIGN_OR_RAISE(preallocation, PrepareOutput(span_iterator_.length()));
@@ -832,7 +862,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       }
 
       // Kernel execution is complete; emit result
-      RETURN_NOT_OK(listener->OnResult(std::move(preallocation)));
+      return EmitResult(std::move(preallocation), listener);
     } else {
       // Fully preallocating, but not contiguously
       // We preallocate (maybe) only for the output of processing the current
@@ -842,15 +872,15 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
         output_span->SetMembers(*preallocation);
         RETURN_NOT_OK(ExecuteSingleSpan(input, &output));
         // Emit the result for this chunk
-        RETURN_NOT_OK(listener->OnResult(std::move(preallocation)));
+        RETURN_NOT_OK(EmitResult(std::move(preallocation), listener));
       }
+      return Status::OK();
     }
-    return Status::OK();
   }
 
   Status ExecuteSingleSpan(const ExecSpan& input, ExecResult* out) {
     ArraySpan* result_span = out->array_span();
-    if (output_descr_.type->id() == Type::NA) {
+    if (output_type_.type->id() == Type::NA) {
       result_span->null_count = result_span->length;
     } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
       if (!elide_validity_bitmap_) {
@@ -859,7 +889,10 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
       result_span->null_count = 0;
     }
-    return kernel_->exec(kernel_ctx_, input, out);
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, input, out));
+    // Output type didn't change
+    DCHECK(out->is_array_span());
+    return Status::OK();
   }
 
   Status ExecuteNonSpans(ExecListener* listener) {
@@ -873,60 +906,32 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     ExecSpan input;
     ExecResult output;
     while (span_iterator_.Next(&input)) {
-      if (output_descr_.shape == ValueDescr::ARRAY) {
-        ARROW_ASSIGN_OR_RAISE(output.value, PrepareOutput(input.length));
-        DCHECK(output.is_array_data());
-      } else {
-        // For scalar outputs, we set a null scalar of the correct type to
-        // communicate the output type to the kernel if needed
-        //
-        // XXX: Is there some way to avoid this step?
-        // TODO: Remove this path in ARROW-16757
-        output.value = MakeNullScalar(output_descr_.type);
-      }
+      ARROW_ASSIGN_OR_RAISE(output.value, PrepareOutput(input.length));
+      DCHECK(output.is_array_data());
 
-      if (output_descr_.shape == ValueDescr::ARRAY) {
-        ArrayData* out_arr = output.array_data().get();
-        if (output_descr_.type->id() == Type::NA) {
-          out_arr->null_count = out_arr->length;
-        } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
-          RETURN_NOT_OK(PropagateNulls(kernel_ctx_, input, out_arr));
-        } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
-          out_arr->null_count = 0;
-        }
-      } else {
-        // TODO(wesm): to remove, see ARROW-16757
-        if (kernel_->null_handling == NullHandling::INTERSECTION) {
-          // set scalar validity
-          output.scalar()->is_valid =
-              std::all_of(input.values.begin(), input.values.end(),
-                          [](const ExecValue& input) { return input.scalar->is_valid; });
-        } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
-          output.scalar()->is_valid = true;
-        }
+      ArrayData* out_arr = output.array_data().get();
+      if (output_type_.type->id() == Type::NA) {
+        out_arr->null_count = out_arr->length;
+      } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
+        RETURN_NOT_OK(PropagateNulls(kernel_ctx_, input, out_arr));
+      } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
+        out_arr->null_count = 0;
       }
 
       RETURN_NOT_OK(kernel_->exec(kernel_ctx_, input, &output));
 
-      // Assert that the kernel did not alter the shape of the output
-      // type. After ARROW-16577 delete this since ValueDescr::SCALAR will not
-      // exist anymore
-      DCHECK(((output_descr_.shape == ValueDescr::ARRAY) && output.is_array_data()) ||
-             ((output_descr_.shape == ValueDescr::SCALAR) && output.is_scalar()));
+      // Output type didn't change
+      DCHECK(output.is_array_data());
 
       // Emit a result for each chunk
-      if (output_descr_.shape == ValueDescr::ARRAY) {
-        RETURN_NOT_OK(listener->OnResult(output.array_data()));
-      } else {
-        RETURN_NOT_OK(listener->OnResult(output.scalar()));
-      }
+      RETURN_NOT_OK(EmitResult(std::move(output.array_data()), listener));
     }
     return Status::OK();
   }
 
   Status SetupPreallocation(int64_t total_length, const std::vector<Datum>& args) {
-    output_num_buffers_ = static_cast<int>(output_descr_.type->layout().buffers.size());
-    auto out_type_id = output_descr_.type->id();
+    output_num_buffers_ = static_cast<int>(output_type_.type->layout().buffers.size());
+    auto out_type_id = output_type_.type->id();
     // Default to no validity pre-allocation for following cases:
     // - Output Array is NullArray
     // - kernel_->null_handling is COMPUTED_NO_PREALLOCATE or OUTPUT_NOT_NULL
@@ -950,7 +955,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       }
     }
     if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) {
-      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_);
+      ComputeDataPreallocate(*output_type_.type, &data_preallocated_);
     }
 
     // Validity bitmap either preallocated or elided, and all data
@@ -995,14 +1000,28 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
   ExecSpanIterator span_iterator_;
 };
 
+namespace {
+
+Status CheckCanExecuteChunked(const VectorKernel* kernel) {
+  if (kernel->exec_chunked == nullptr) {
+    return Status::Invalid(
+        "Vector kernel cannot execute chunkwise and no "
+        "chunked exec function was defined");
+  }
+
+  if (kernel->null_handling == NullHandling::INTERSECTION) {
+    return Status::Invalid(
+        "Null pre-propagation is unsupported for ChunkedArray "
+        "execution in vector kernels");
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
 class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
  public:
   Status Execute(const ExecBatch& batch, ExecListener* listener) override {
-    // TODO(wesm): remove in ARROW-16577
-    if (output_descr_.shape == ValueDescr::SCALAR) {
-      return Status::Invalid("VectorExecutor only supports array output types");
-    }
-
     // Some vector kernels have a separate code path for handling
     // chunked arrays (VectorKernel::exec_chunked) so we check if we
     // have any chunked arrays. If we do and an exec_chunked function
@@ -1012,19 +1031,18 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
       if (arg.is_chunked_array()) have_chunked_arrays = true;
     }
 
-    output_num_buffers_ = static_cast<int>(output_descr_.type->layout().buffers.size());
+    output_num_buffers_ = static_cast<int>(output_type_.type->layout().buffers.size());
 
     // Decide if we need to preallocate memory for this kernel
     validity_preallocated_ =
         (kernel_->null_handling != NullHandling::COMPUTED_NO_PREALLOCATE &&
          kernel_->null_handling != NullHandling::OUTPUT_NOT_NULL);
     if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) {
-      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_);
+      ComputeDataPreallocate(*output_type_.type, &data_preallocated_);
     }
 
     if (kernel_->can_execute_chunkwise) {
-      RETURN_NOT_OK(span_iterator_.Init(batch, output_descr_.shape,
-                                        exec_context()->exec_chunksize()));
+      RETURN_NOT_OK(span_iterator_.Init(batch, exec_context()->exec_chunksize()));
       ExecSpan span;
       while (span_iterator_.Next(&span)) {
         RETURN_NOT_OK(Exec(span, listener));
@@ -1038,7 +1056,11 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
       } else {
         // No chunked arrays. We pack the args into an ExecSpan and
         // call the regular exec code path
-        RETURN_NOT_OK(Exec(ExecSpan(batch), listener));
+        ExecSpan span(batch);
+        if (CheckIfAllScalar(batch)) {
+          PromoteExecSpanScalars(&span);
+        }
+        RETURN_NOT_OK(Exec(span, listener));
       }
     }
 
@@ -1058,63 +1080,46 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
     // If execution yielded multiple chunks (because large arrays were split
     // based on the ExecContext parameters, then the result is a ChunkedArray
     if (kernel_->output_chunked && (HaveChunkedArray(inputs) || outputs.size() > 1)) {
-      return ToChunkedArray(outputs, output_descr_.type);
-    } else if (outputs.size() == 1) {
+      return ToChunkedArray(outputs, output_type_.GetSharedPtr());
+    } else {
       // Outputs have just one element
       return outputs[0];
-    } else {
-      // XXX: In the case where no outputs are omitted, is returning a 0-length
-      // array always the correct move?
-      return MakeArrayOfNull(output_descr_.type, /*length=*/0).ValueOrDie();
     }
   }
 
  protected:
-  Status Exec(const ExecSpan& span, ExecListener* listener) {
-    ExecResult out;
-
-    // We preallocate (maybe) only for the output of processing the current
-    // batch, but create an output ArrayData instance regardless
-    ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(span.length));
-
-    if (kernel_->null_handling == NullHandling::INTERSECTION) {
-      RETURN_NOT_OK(PropagateNulls(kernel_ctx_, span, out.array_data().get()));
-    }
-    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, span, &out));
+  Status EmitResult(Datum result, ExecListener* listener) {
     if (!kernel_->finalize) {
       // If there is no result finalizer (e.g. for hash-based functions, we can
       // emit the processed batch right away rather than waiting
-      RETURN_NOT_OK(listener->OnResult(out.array_data()));
+      RETURN_NOT_OK(listener->OnResult(std::move(result)));
     } else {
-      results_.emplace_back(out.array_data());
+      results_.emplace_back(std::move(result));
     }
     return Status::OK();
   }
 
-  Status ExecChunked(const ExecBatch& batch, ExecListener* listener) {
-    if (kernel_->exec_chunked == nullptr) {
-      return Status::Invalid(
-          "Vector kernel cannot execute chunkwise and no "
-          "chunked exec function was defined");
-    }
-
+  Status Exec(const ExecSpan& span, ExecListener* listener) {
+    ExecResult out;
+    ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(span.length));
     if (kernel_->null_handling == NullHandling::INTERSECTION) {
-      return Status::Invalid(
-          "Null pre-propagation is unsupported for ChunkedArray "
-          "execution in vector kernels");
+      RETURN_NOT_OK(PropagateNulls(kernel_ctx_, span, out.array_data().get()));
     }
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, span, &out));
+    return EmitResult(std::move(out.array_data()), listener);
+  }
 
+  Status ExecChunked(const ExecBatch& batch, ExecListener* listener) {
+    RETURN_NOT_OK(CheckCanExecuteChunked(kernel_));
     Datum out;
     ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(batch.length));
     RETURN_NOT_OK(kernel_->exec_chunked(kernel_ctx_, batch, &out));
-    if (!kernel_->finalize) {
-      // If there is no result finalizer (e.g. for hash-based functions, we can
-      // emit the processed batch right away rather than waiting
-      RETURN_NOT_OK(listener->OnResult(std::move(out)));
+    if (out.is_array()) {
+      return EmitResult(std::move(out.array()), listener);
     } else {
-      results_.emplace_back(std::move(out));
+      DCHECK(out.is_chunked_array());
+      return EmitResult(std::move(out.chunked_array()), listener);
     }
-    return Status::OK();
   }
 
   ExecSpanIterator span_iterator_;
@@ -1124,7 +1129,7 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
 class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
  public:
   Status Init(KernelContext* ctx, KernelInitArgs args) override {
-    input_descrs_ = &args.inputs;
+    input_types_ = &args.inputs;
     options_ = args.options;
     return KernelExecutorImpl<ScalarAggregateKernel>::Init(ctx, args);
   }
@@ -1160,9 +1165,8 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
  private:
   Status Consume(const ExecBatch& batch) {
     // FIXME(ARROW-11840) don't merge *any* aggegates for every batch
-    ARROW_ASSIGN_OR_RAISE(
-        auto batch_state,
-        kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_}));
+    ARROW_ASSIGN_OR_RAISE(auto batch_state,
+                          kernel_->init(kernel_ctx_, {kernel_, *input_types_, options_}));
 
     if (batch_state == nullptr) {
       return Status::Invalid("ScalarAggregation requires non-null kernel state");
@@ -1177,7 +1181,7 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
   }
 
   std::unique_ptr<ExecBatchIterator> batch_iterator_;
-  const std::vector<ValueDescr>* input_descrs_;
+  const std::vector<TypeHolder>* input_types_;
   const FunctionOptions* options_;
 };
 
@@ -1358,8 +1362,7 @@ Result<std::shared_ptr<SelectionVector>> SelectionVector::FromMask(
 Result<Datum> CallFunction(const std::string& func_name, const std::vector<Datum>& args,
                            const FunctionOptions* options, ExecContext* ctx) {
   if (ctx == nullptr) {
-    ExecContext default_ctx;
-    return CallFunction(func_name, args, options, &default_ctx);
+    ctx = default_exec_context();
   }
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<const Function> func,
                         ctx->func_registry()->GetFunction(func_name));
@@ -1374,8 +1377,7 @@ Result<Datum> CallFunction(const std::string& func_name, const std::vector<Datum
 Result<Datum> CallFunction(const std::string& func_name, const ExecBatch& batch,
                            const FunctionOptions* options, ExecContext* ctx) {
   if (ctx == nullptr) {
-    ExecContext default_ctx;
-    return CallFunction(func_name, batch, options, &default_ctx);
+    ctx = default_exec_context();
   }
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<const Function> func,
                         ctx->func_registry()->GetFunction(func_name));
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index 8fd938ce299..f0b951dccb8 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -235,12 +235,11 @@ struct ARROW_EXPORT ExecBatch {
 
   ExecBatch Slice(int64_t offset, int64_t length) const;
 
-  /// \brief A convenience for returning the ValueDescr objects (types and
-  /// shapes) from the batch.
-  std::vector<ValueDescr> GetDescriptors() const {
-    std::vector<ValueDescr> result;
+  /// \brief A convenience for returning the types from the batch.
+  std::vector<TypeHolder> GetTypes() const {
+    std::vector<TypeHolder> result;
     for (const auto& value : this->values) {
-      result.emplace_back(value.descr());
+      result.emplace_back(value.type());
     }
     return result;
   }
@@ -254,19 +253,16 @@ inline bool operator==(const ExecBatch& l, const ExecBatch& r) { return l.Equals
 inline bool operator!=(const ExecBatch& l, const ExecBatch& r) { return !l.Equals(r); }
 
 struct ExecValue {
-  enum Kind { ARRAY, SCALAR };
-  Kind kind = ARRAY;
   ArraySpan array;
-  const Scalar* scalar;
+  const Scalar* scalar = NULLPTR;
 
   ExecValue(Scalar* scalar)  // NOLINT implicit conversion
-      : kind(SCALAR), scalar(scalar) {}
+      : scalar(scalar) {}
 
   ExecValue(ArraySpan array)  // NOLINT implicit conversion
-      : kind(ARRAY), array(std::move(array)) {}
+      : array(std::move(array)) {}
 
-  ExecValue(const ArrayData& array)  // NOLINT implicit conversion
-      : kind(ARRAY) {
+  ExecValue(const ArrayData& array) {  // NOLINT implicit conversion
     this->array.SetMembers(array);
   }
 
@@ -278,31 +274,21 @@ struct ExecValue {
 
   int64_t length() const { return this->is_array() ? this->array.length : 1; }
 
-  bool is_array() const { return this->kind == ARRAY; }
-  bool is_scalar() const { return this->kind == SCALAR; }
+  bool is_array() const { return this->scalar == NULLPTR; }
+  bool is_scalar() const { return !this->is_array(); }
 
   void SetArray(const ArrayData& array) {
-    this->kind = ARRAY;
     this->array.SetMembers(array);
+    this->scalar = NULLPTR;
   }
 
-  void SetScalar(const Scalar* scalar) {
-    this->kind = SCALAR;
-    this->scalar = scalar;
-  }
+  void SetScalar(const Scalar* scalar) { this->scalar = scalar; }
 
   template <typename ExactType>
   const ExactType& scalar_as() const {
     return ::arrow::internal::checked_cast<const ExactType&>(*this->scalar);
   }
 
-  /// XXX: here only temporarily until type resolution can be cleaned
-  /// up to not use ValueDescr
-  ValueDescr descr() const {
-    ValueDescr::Shape shape = this->is_array() ? ValueDescr::ARRAY : ValueDescr::SCALAR;
-    return ValueDescr(const_cast<DataType*>(this->type())->shared_from_this(), shape);
-  }
-
   /// XXX: here temporarily for compatibility with datum, see
   /// e.g. MakeStructExec in scalar_nested.cc
   int64_t null_count() const {
@@ -314,7 +300,7 @@ struct ExecValue {
   }
 
   const DataType* type() const {
-    if (this->kind == ARRAY) {
+    if (this->is_array()) {
       return array.type;
     } else {
       return scalar->type.get();
@@ -324,29 +310,21 @@ struct ExecValue {
 
 struct ARROW_EXPORT ExecResult {
   // The default value of the variant is ArraySpan
-  // TODO(wesm): remove Scalar output modality in ARROW-16577
-  util::Variant<ArraySpan, std::shared_ptr<ArrayData>, std::shared_ptr<Scalar>> value;
+  util::Variant<ArraySpan, std::shared_ptr<ArrayData>> value;
 
   int64_t length() const {
     if (this->is_array_span()) {
       return this->array_span()->length;
-    } else if (this->is_array_data()) {
-      return this->array_data()->length;
     } else {
-      // Should not reach here
-      return 1;
+      return this->array_data()->length;
     }
   }
 
   const DataType* type() const {
-    switch (this->value.index()) {
-      case 0:
-        return this->array_span()->type;
-      case 1:
-        return this->array_data()->type.get();
-      default:
-        // scalar
-        return this->scalar()->type.get();
+    if (this->is_array_span()) {
+      return this->array_span()->type;
+    } else {
+      return this->array_data()->type.get();
     }
   }
 
@@ -360,12 +338,6 @@ struct ARROW_EXPORT ExecResult {
   }
 
   bool is_array_data() const { return this->value.index() == 1; }
-
-  const std::shared_ptr<Scalar>& scalar() const {
-    return util::get<std::shared_ptr<Scalar>>(this->value);
-  }
-
-  bool is_scalar() const { return this->value.index() == 2; }
 };
 
 /// \brief A "lightweight" column batch object which contains no
@@ -395,15 +367,6 @@ struct ARROW_EXPORT ExecSpan {
     }
   }
 
-  bool is_all_scalar() const {
-    for (const ExecValue& value : this->values) {
-      if (value.is_array()) {
-        return false;
-      }
-    }
-    return true;
-  }
-
   /// \brief Return the value at the i-th index
   template <typename index_type>
   inline const ExecValue& operator[](index_type i) const {
@@ -412,7 +375,7 @@ struct ARROW_EXPORT ExecSpan {
 
   void AddOffset(int64_t offset) {
     for (ExecValue& value : values) {
-      if (value.kind == ExecValue::ARRAY) {
+      if (value.is_array()) {
         value.array.AddOffset(offset);
       }
     }
@@ -420,7 +383,7 @@ struct ARROW_EXPORT ExecSpan {
 
   void SetOffset(int64_t offset) {
     for (ExecValue& value : values) {
-      if (value.kind == ExecValue::ARRAY) {
+      if (value.is_array()) {
         value.array.SetOffset(offset);
       }
     }
@@ -429,12 +392,10 @@ struct ARROW_EXPORT ExecSpan {
   /// \brief A convenience for the number of values / arguments.
   int num_values() const { return static_cast<int>(values.size()); }
 
-  // XXX: eliminate the need for ValueDescr; copied temporarily from
-  // ExecBatch
-  std::vector<ValueDescr> GetDescriptors() const {
-    std::vector<ValueDescr> result;
+  std::vector<TypeHolder> GetTypes() const {
+    std::vector<TypeHolder> result;
     for (const auto& value : this->values) {
-      result.emplace_back(value.descr());
+      result.emplace_back(value.type());
     }
     return result;
   }
diff --git a/cpp/src/arrow/compute/exec/aggregate.cc b/cpp/src/arrow/compute/exec/aggregate.cc
index 41b5bb75b66..5cb9a9c5633 100644
--- a/cpp/src/arrow/compute/exec/aggregate.cc
+++ b/cpp/src/arrow/compute/exec/aggregate.cc
@@ -31,20 +31,19 @@ namespace internal {
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
     ExecContext* ctx, const std::vector<Aggregate>& aggregates,
-    const std::vector<ValueDescr>& in_descrs) {
-  if (aggregates.size() != in_descrs.size()) {
+    const std::vector<TypeHolder>& in_types) {
+  if (aggregates.size() != in_types.size()) {
     return Status::Invalid(aggregates.size(), " aggregate functions were specified but ",
-                           in_descrs.size(), " arguments were provided.");
+                           in_types.size(), " arguments were provided.");
   }
 
-  std::vector<const HashAggregateKernel*> kernels(in_descrs.size());
+  std::vector<const HashAggregateKernel*> kernels(in_types.size());
 
   for (size_t i = 0; i < aggregates.size(); ++i) {
     ARROW_ASSIGN_OR_RAISE(auto function,
                           ctx->func_registry()->GetFunction(aggregates[i].function));
-    ARROW_ASSIGN_OR_RAISE(
-        const Kernel* kernel,
-        function->DispatchExact({in_descrs[i], ValueDescr::Array(uint32())}));
+    ARROW_ASSIGN_OR_RAISE(const Kernel* kernel,
+                          function->DispatchExact({in_types[i], uint32()}));
     kernels[i] = static_cast<const HashAggregateKernel*>(kernel);
   }
   return kernels;
@@ -52,7 +51,7 @@ Result<std::vector<const HashAggregateKernel*>> GetKernels(
 
 Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs) {
+    const std::vector<Aggregate>& aggregates, const std::vector<TypeHolder>& in_types) {
   std::vector<std::unique_ptr<KernelState>> states(kernels.size());
 
   for (size_t i = 0; i < aggregates.size(); ++i) {
@@ -69,14 +68,13 @@ Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     }
 
     KernelContext kernel_ctx{ctx};
-    ARROW_ASSIGN_OR_RAISE(
-        states[i],
-        kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
-                                                     {
-                                                         in_descrs[i],
-                                                         ValueDescr::Array(uint32()),
-                                                     },
-                                                     options}));
+    ARROW_ASSIGN_OR_RAISE(states[i],
+                          kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
+                                                                       {
+                                                                           in_types[i],
+                                                                           uint32(),
+                                                                       },
+                                                                       options}));
   }
 
   return std::move(states);
@@ -86,19 +84,16 @@ Result<FieldVector> ResolveKernels(
     const std::vector<Aggregate>& aggregates,
     const std::vector<const HashAggregateKernel*>& kernels,
     const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
-    const std::vector<ValueDescr>& descrs) {
-  FieldVector fields(descrs.size());
+    const std::vector<TypeHolder>& types) {
+  FieldVector fields(types.size());
 
   for (size_t i = 0; i < kernels.size(); ++i) {
     KernelContext kernel_ctx{ctx};
     kernel_ctx.SetState(states[i].get());
 
-    ARROW_ASSIGN_OR_RAISE(auto descr, kernels[i]->signature->out_type().Resolve(
-                                          &kernel_ctx, {
-                                                           descrs[i],
-                                                           ValueDescr::Array(uint32()),
-                                                       }));
-    fields[i] = field(aggregates[i].function, std::move(descr.type));
+    ARROW_ASSIGN_OR_RAISE(auto type, kernels[i]->signature->out_type().Resolve(
+                                         &kernel_ctx, {types[i], uint32()}));
+    fields[i] = field(aggregates[i].function, type.GetSharedPtr());
   }
   return fields;
 }
@@ -122,18 +117,17 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
     ARROW_ASSIGN_OR_RAISE(ExecBatch args_batch, ExecBatch::Make(arguments));
 
     // Construct and initialize HashAggregateKernels
-    auto argument_descrs = args_batch.GetDescriptors();
+    auto argument_types = args_batch.GetTypes();
 
-    ARROW_ASSIGN_OR_RAISE(kernels, GetKernels(ctx, aggregates, argument_descrs));
+    ARROW_ASSIGN_OR_RAISE(kernels, GetKernels(ctx, aggregates, argument_types));
 
     states.resize(task_group->parallelism());
     for (auto& state : states) {
-      ARROW_ASSIGN_OR_RAISE(state,
-                            InitKernels(kernels, ctx, aggregates, argument_descrs));
+      ARROW_ASSIGN_OR_RAISE(state, InitKernels(kernels, ctx, aggregates, argument_types));
     }
 
     ARROW_ASSIGN_OR_RAISE(
-        out_fields, ResolveKernels(aggregates, kernels, states[0], ctx, argument_descrs));
+        out_fields, ResolveKernels(aggregates, kernels, states[0], ctx, argument_types));
 
     ARROW_ASSIGN_OR_RAISE(
         argument_batch_iterator,
@@ -142,19 +136,19 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
   // Construct Groupers
   ARROW_ASSIGN_OR_RAISE(ExecBatch keys_batch, ExecBatch::Make(keys));
-  auto key_descrs = keys_batch.GetDescriptors();
+  auto key_types = keys_batch.GetTypes();
 
   std::vector<std::unique_ptr<Grouper>> groupers(task_group->parallelism());
   for (auto& grouper : groupers) {
-    ARROW_ASSIGN_OR_RAISE(grouper, Grouper::Make(key_descrs, ctx));
+    ARROW_ASSIGN_OR_RAISE(grouper, Grouper::Make(key_types, ctx));
   }
 
   std::mutex mutex;
   std::unordered_map<std::thread::id, size_t> thread_ids;
 
   int i = 0;
-  for (ValueDescr& key_descr : key_descrs) {
-    out_fields.push_back(field("key_" + std::to_string(i++), std::move(key_descr.type)));
+  for (const TypeHolder& key_type : key_types) {
+    out_fields.push_back(field("key_" + std::to_string(i++), key_type.GetSharedPtr()));
   }
 
   ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/src/arrow/compute/exec/aggregate.h b/cpp/src/arrow/compute/exec/aggregate.h
index 753b0a8c47e..72990f3b6e7 100644
--- a/cpp/src/arrow/compute/exec/aggregate.h
+++ b/cpp/src/arrow/compute/exec/aggregate.h
@@ -42,17 +42,17 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
     ExecContext* ctx, const std::vector<Aggregate>& aggregates,
-    const std::vector<ValueDescr>& in_descrs);
+    const std::vector<TypeHolder>& in_types);
 
 Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs);
+    const std::vector<Aggregate>& aggregates, const std::vector<TypeHolder>& in_types);
 
 Result<FieldVector> ResolveKernels(
     const std::vector<Aggregate>& aggregates,
     const std::vector<const HashAggregateKernel*>& kernels,
     const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
-    const std::vector<ValueDescr>& descrs);
+    const std::vector<TypeHolder>& in_types);
 
 }  // namespace internal
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index 8c7899c41ec..0131319be3b 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -104,8 +104,7 @@ class ScalarAggregateNode : public ExecNode {
                                aggregates[i].function);
       }
 
-      auto in_type = ValueDescr::Array(input_schema.field(target_field_ids[i])->type());
-
+      TypeHolder in_type(input_schema.field(target_field_ids[i])->type().get());
       ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, function->DispatchExact({in_type}));
       kernels[i] = static_cast<const ScalarAggregateKernel*>(kernel);
 
@@ -125,10 +124,10 @@ class ScalarAggregateNode : public ExecNode {
 
       // pick one to resolve the kernel signature
       kernel_ctx.SetState(states[i][0].get());
-      ARROW_ASSIGN_OR_RAISE(
-          auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
+      ARROW_ASSIGN_OR_RAISE(auto out_type, kernels[i]->signature->out_type().Resolve(
+                                               &kernel_ctx, {in_type}));
 
-      fields[i] = field(aggregate_options.aggregates[i].name, std::move(descr.type));
+      fields[i] = field(aggregate_options.aggregates[i].name, out_type.GetSharedPtr());
     }
 
     return plan->EmplaceNode<ScalarAggregateNode>(
@@ -313,25 +312,24 @@ class GroupByNode : public ExecNode {
     }
 
     // Build vector of aggregate source field data types
-    std::vector<ValueDescr> agg_src_descrs(aggs.size());
+    std::vector<TypeHolder> agg_src_types(aggs.size());
     for (size_t i = 0; i < aggs.size(); ++i) {
       auto agg_src_field_id = agg_src_field_ids[i];
-      agg_src_descrs[i] =
-          ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+      agg_src_types[i] = input_schema->field(agg_src_field_id)->type().get();
     }
 
     auto ctx = input->plan()->exec_context();
 
     // Construct aggregates
     ARROW_ASSIGN_OR_RAISE(auto agg_kernels,
-                          internal::GetKernels(ctx, aggs, agg_src_descrs));
+                          internal::GetKernels(ctx, aggs, agg_src_types));
 
     ARROW_ASSIGN_OR_RAISE(auto agg_states,
-                          internal::InitKernels(agg_kernels, ctx, aggs, agg_src_descrs));
+                          internal::InitKernels(agg_kernels, ctx, aggs, agg_src_types));
 
     ARROW_ASSIGN_OR_RAISE(
         FieldVector agg_result_fields,
-        internal::ResolveKernels(aggs, agg_kernels, agg_states, ctx, agg_src_descrs));
+        internal::ResolveKernels(aggs, agg_kernels, agg_states, ctx, agg_src_types));
 
     // Build field vector for output schema
     FieldVector output_fields{keys.size() + aggs.size()};
@@ -621,26 +619,24 @@ class GroupByNode : public ExecNode {
     if (state->grouper != nullptr) return Status::OK();
 
     // Build vector of key field data types
-    std::vector<ValueDescr> key_descrs(key_field_ids_.size());
+    std::vector<TypeHolder> key_types(key_field_ids_.size());
     for (size_t i = 0; i < key_field_ids_.size(); ++i) {
       auto key_field_id = key_field_ids_[i];
-      key_descrs[i] = ValueDescr(input_schema->field(key_field_id)->type());
+      key_types[i] = input_schema->field(key_field_id)->type().get();
     }
 
     // Construct grouper
-    ARROW_ASSIGN_OR_RAISE(state->grouper, Grouper::Make(key_descrs, ctx_));
+    ARROW_ASSIGN_OR_RAISE(state->grouper, Grouper::Make(key_types, ctx_));
 
     // Build vector of aggregate source field data types
-    std::vector<ValueDescr> agg_src_descrs(agg_kernels_.size());
+    std::vector<TypeHolder> agg_src_types(agg_kernels_.size());
     for (size_t i = 0; i < agg_kernels_.size(); ++i) {
       auto agg_src_field_id = agg_src_field_ids_[i];
-      agg_src_descrs[i] =
-          ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+      agg_src_types[i] = input_schema->field(agg_src_field_id)->type().get();
     }
 
-    ARROW_ASSIGN_OR_RAISE(
-        state->agg_states,
-        internal::InitKernels(agg_kernels_, ctx_, aggs_, agg_src_descrs));
+    ARROW_ASSIGN_OR_RAISE(state->agg_states, internal::InitKernels(agg_kernels_, ctx_,
+                                                                   aggs_, agg_src_types));
 
     return Status::OK();
   }
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index b796f5cda3b..c890b3c5935 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -64,7 +64,7 @@ Expression::Expression(Parameter parameter)
 Expression literal(Datum lit) { return Expression(std::move(lit)); }
 
 Expression field_ref(FieldRef ref) {
-  return Expression(Expression::Parameter{std::move(ref), ValueDescr{}, {-1}});
+  return Expression(Expression::Parameter{std::move(ref), TypeHolder{}, {-1}});
 }
 
 Expression call(std::string function, std::vector<Expression> arguments,
@@ -93,36 +93,18 @@ const Expression::Call* Expression::call() const {
   return util::get_if<Call>(impl_.get());
 }
 
-ValueDescr Expression::descr() const {
-  if (impl_ == nullptr) return {};
+const DataType* Expression::type() const {
+  if (impl_ == nullptr) return nullptr;
 
-  if (auto lit = literal()) {
-    return lit->descr();
-  }
-
-  if (auto parameter = this->parameter()) {
-    return parameter->descr;
-  }
-
-  return CallNotNull(*this)->descr;
-}
-
-// This is a module-global singleton to avoid synchronization costs of a
-// function-static singleton.
-static const std::shared_ptr<DataType> kNoType;
-
-const std::shared_ptr<DataType>& Expression::type() const {
-  if (impl_ == nullptr) return kNoType;
-
-  if (auto lit = literal()) {
-    return lit->type();
+  if (const Datum* lit = literal()) {
+    return lit->type().get();
   }
 
-  if (auto parameter = this->parameter()) {
-    return parameter->descr.type;
+  if (const Parameter* parameter = this->parameter()) {
+    return parameter->type.type;
   }
 
-  return CallNotNull(*this)->descr.type;
+  return CallNotNull(*this)->type.type;
 }
 
 namespace {
@@ -276,7 +258,7 @@ size_t Expression::hash() const {
 bool Expression::IsBound() const {
   if (type() == nullptr) return false;
 
-  if (auto call = this->call()) {
+  if (const Call* call = this->call()) {
     if (call->kernel == nullptr) return false;
 
     for (const Expression& arg : call->arguments) {
@@ -338,7 +320,7 @@ util::optional<compute::NullHandling::type> GetNullHandling(
 }  // namespace
 
 bool Expression::IsSatisfiable() const {
-  if (!type()) return true;
+  if (type() == nullptr) return true;
   if (type()->id() != Type::BOOL) return true;
 
   if (auto lit = literal()) {
@@ -382,25 +364,20 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
   DCHECK(std::all_of(call.arguments.begin(), call.arguments.end(),
                      [](const Expression& argument) { return argument.IsBound(); }));
 
-  auto descrs = GetDescriptors(call.arguments);
+  std::vector<TypeHolder> types = GetTypes(call.arguments);
   ARROW_ASSIGN_OR_RAISE(call.function, GetFunction(call, exec_context));
 
   if (!insert_implicit_casts) {
-    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchExact(descrs));
+    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchExact(types));
   } else {
-    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&descrs));
+    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&types));
 
-    for (size_t i = 0; i < descrs.size(); ++i) {
-      if (descrs[i] == call.arguments[i].descr()) continue;
+    for (size_t i = 0; i < types.size(); ++i) {
+      if (types[i] == call.arguments[i].type()) continue;
 
-      if (descrs[i].shape != call.arguments[i].descr().shape) {
-        return Status::NotImplemented(
-            "Automatic broadcasting of scalars arguments to arrays in ",
-            Expression(std::move(call)).ToString());
-      }
-
-      if (auto lit = call.arguments[i].literal()) {
-        ARROW_ASSIGN_OR_RAISE(Datum new_lit, compute::Cast(*lit, descrs[i].type));
+      if (const Datum* lit = call.arguments[i].literal()) {
+        ARROW_ASSIGN_OR_RAISE(Datum new_lit,
+                              compute::Cast(*lit, types[i].GetSharedPtr()));
         call.arguments[i] = literal(std::move(new_lit));
         continue;
       }
@@ -409,8 +386,10 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
       Expression::Call implicit_cast;
       implicit_cast.function_name = "cast";
       implicit_cast.arguments = {std::move(call.arguments[i])};
+
+      // TODO(wesm): Use TypeHolder in options
       implicit_cast.options = std::make_shared<compute::CastOptions>(
-          compute::CastOptions::Safe(descrs[i].type));
+          compute::CastOptions::Safe(types[i].GetSharedPtr()));
 
       ARROW_ASSIGN_OR_RAISE(
           call.arguments[i],
@@ -425,43 +404,41 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
         call.options ? call.options.get() : call.function->default_options();
     ARROW_ASSIGN_OR_RAISE(
         call.kernel_state,
-        call.kernel->init(&kernel_context, {call.kernel, descrs, options}));
+        call.kernel->init(&kernel_context, {call.kernel, types, options}));
 
     kernel_context.SetState(call.kernel_state.get());
   }
 
   ARROW_ASSIGN_OR_RAISE(
-      call.descr, call.kernel->signature->out_type().Resolve(&kernel_context, descrs));
+      call.type, call.kernel->signature->out_type().Resolve(&kernel_context, types));
 
   return Expression(std::move(call));
 }
 
 template <typename TypeOrSchema>
 Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in,
-                            ValueDescr::Shape shape, compute::ExecContext* exec_context) {
+                            compute::ExecContext* exec_context) {
   if (exec_context == nullptr) {
     compute::ExecContext exec_context;
-    return BindImpl(std::move(expr), in, shape, &exec_context);
+    return BindImpl(std::move(expr), in, &exec_context);
   }
 
   if (expr.literal()) return expr;
 
-  if (auto ref = expr.field_ref()) {
-    ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
+  if (const FieldRef* ref = expr.field_ref()) {
+    ARROW_ASSIGN_OR_RAISE(FieldPath path, ref->FindOne(in));
 
-    auto bound = *expr.parameter();
-    bound.indices.resize(path.indices().size());
-    std::copy(path.indices().begin(), path.indices().end(), bound.indices.begin());
+    Expression::Parameter param = *expr.parameter();
+    param.indices.resize(path.indices().size());
+    std::copy(path.indices().begin(), path.indices().end(), param.indices.begin());
     ARROW_ASSIGN_OR_RAISE(auto field, path.Get(in));
-    bound.descr.type = field->type();
-    bound.descr.shape = shape;
-    return Expression{std::move(bound)};
+    param.type = field->type();
+    return Expression{std::move(param)};
   }
 
   auto call = *CallNotNull(expr);
   for (auto& argument : call.arguments) {
-    ARROW_ASSIGN_OR_RAISE(argument,
-                          BindImpl(std::move(argument), in, shape, exec_context));
+    ARROW_ASSIGN_OR_RAISE(argument, BindImpl(std::move(argument), in, exec_context));
   }
   return BindNonRecursive(std::move(call),
                           /*insert_implicit_casts=*/true, exec_context);
@@ -469,14 +446,14 @@ Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in,
 
 }  // namespace
 
-Result<Expression> Expression::Bind(const ValueDescr& in,
+Result<Expression> Expression::Bind(const TypeHolder& in,
                                     compute::ExecContext* exec_context) const {
-  return BindImpl(*this, *in.type, in.shape, exec_context);
+  return BindImpl(*this, *in.type, exec_context);
 }
 
 Result<Expression> Expression::Bind(const Schema& in_schema,
                                     compute::ExecContext* exec_context) const {
-  return BindImpl(*this, in_schema, ValueDescr::ARRAY, exec_context);
+  return BindImpl(*this, in_schema, exec_context);
 }
 
 Result<ExecBatch> MakeExecBatch(const Schema& full_schema, const Datum& partial) {
@@ -558,7 +535,7 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
   if (auto lit = expr.literal()) return *lit;
 
   if (auto param = expr.parameter()) {
-    if (param->descr.type->id() == Type::NA) {
+    if (param->type.id() == Type::NA) {
       return MakeNullScalar(null());
     }
 
@@ -569,10 +546,10 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
       ARROW_ASSIGN_OR_RAISE(
           field, compute::CallFunction("struct_field", {std::move(field)}, &options));
     }
-    if (!field.type()->Equals(param->descr.type)) {
+    if (!field.type()->Equals(*param->type.type)) {
       return Status::Invalid("Referenced field ", expr.ToString(), " was ",
                              field.type()->ToString(), " but should have been ",
-                             param->descr.type->ToString());
+                             param->type.ToString());
     }
 
     return field;
@@ -596,10 +573,10 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
   compute::KernelContext kernel_context(exec_context, call->kernel);
   kernel_context.SetState(call->kernel_state.get());
 
-  auto kernel = call->kernel;
-  auto descrs = GetDescriptors(arguments);
+  const Kernel* kernel = call->kernel;
+  std::vector<TypeHolder> types = GetTypes(arguments);
   auto options = call->options.get();
-  RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, descrs, options}));
+  RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, types, options}));
 
   compute::detail::DatumAccumulator listener;
   RETURN_NOT_OK(executor->Execute(
@@ -683,16 +660,16 @@ Result<Expression> FoldConstants(Expression expr) {
         if (GetNullHandling(*call) == compute::NullHandling::INTERSECTION) {
           // kernels which always produce intersected validity can be resolved
           // to null *now* if any of their inputs is a null literal
-          if (!call->descr.type) {
+          if (!call->type.type) {
             return Status::Invalid("Cannot fold constants for unbound expression ",
                                    expr.ToString());
           }
-          for (const auto& argument : call->arguments) {
+          for (const Expression& argument : call->arguments) {
             if (argument.IsNullLiteral()) {
-              if (argument.type()->Equals(*call->descr.type)) {
+              if (argument.type()->Equals(*call->type.type)) {
                 return argument;
               } else {
-                return literal(MakeNullScalar(call->descr.type));
+                return literal(MakeNullScalar(call->type.GetSharedPtr()));
               }
             }
           }
@@ -815,7 +792,7 @@ Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_va
           auto it = known_values.map.find(*ref);
           if (it != known_values.map.end()) {
             Datum lit = it->second;
-            if (lit.descr() == expr.descr()) return literal(std::move(lit));
+            if (lit.type()->Equals(*expr.type())) return literal(std::move(lit));
             // type mismatch, try casting the known value to the correct type
 
             if (expr.type()->id() == Type::DICTIONARY &&
@@ -836,7 +813,7 @@ Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_va
               }
             }
 
-            ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, expr.type()));
+            ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, expr.type()->GetSharedPtr()));
             return literal(std::move(lit));
           }
         }
diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h
index a1765d0fcca..e9026961aa9 100644
--- a/cpp/src/arrow/compute/exec/expression.h
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -55,7 +55,7 @@ class ARROW_EXPORT Expression {
     std::shared_ptr<Function> function;
     const Kernel* kernel = NULLPTR;
     std::shared_ptr<KernelState> kernel_state;
-    ValueDescr descr;
+    TypeHolder type;
 
     void ComputeHash();
   };
@@ -70,7 +70,7 @@ class ARROW_EXPORT Expression {
   /// Bind this expression to the given input type, looking up Kernels and field types.
   /// Some expression simplification may be performed and implicit casts will be inserted.
   /// Any state necessary for execution will be initialized and returned.
-  Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
+  Result<Expression> Bind(const TypeHolder& in, ExecContext* = NULLPTR) const;
   Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
 
   // XXX someday
@@ -82,8 +82,8 @@ class ARROW_EXPORT Expression {
   // Result<ExpressionState> CloneState() const;
   // Status SetState(ExpressionState);
 
-  /// Return true if all an expression's field references have explicit ValueDescr and all
-  /// of its functions' kernels are looked up.
+  /// Return true if all an expression's field references have explicit types
+  /// and all of its functions' kernels are looked up.
   bool IsBound() const;
 
   /// Return true if this expression is composed only of Scalar literals, field
@@ -107,9 +107,8 @@ class ARROW_EXPORT Expression {
   /// Access a FieldRef or return nullptr if this expression is not a field_ref
   const FieldRef* field_ref() const;
 
-  /// The type and shape to which this expression will evaluate
-  ValueDescr descr() const;
-  const std::shared_ptr<DataType>& type() const;
+  /// The type to which this expression will evaluate
+  const DataType* type() const;
   // XXX someday
   // NullGeneralization::type nullable() const;
 
@@ -117,7 +116,7 @@ class ARROW_EXPORT Expression {
     FieldRef ref;
 
     // post-bind properties
-    ValueDescr descr;
+    TypeHolder type;
     ::arrow::internal::SmallVector<int, 2> indices;
   };
   const Parameter* parameter() const;
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index f8c686d2c81..027c954c6d0 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -23,6 +23,7 @@
 
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
@@ -31,6 +32,8 @@
 namespace arrow {
 namespace compute {
 
+using internal::GetCastFunction;
+
 struct KnownFieldValues {
   std::unordered_map<FieldRef, Datum, FieldRef::Hash> map;
 };
@@ -41,21 +44,21 @@ inline const Expression::Call* CallNotNull(const Expression& expr) {
   return call;
 }
 
-inline std::vector<ValueDescr> GetDescriptors(const std::vector<Expression>& exprs) {
-  std::vector<ValueDescr> descrs(exprs.size());
+inline std::vector<TypeHolder> GetTypes(const std::vector<Expression>& exprs) {
+  std::vector<TypeHolder> types(exprs.size());
   for (size_t i = 0; i < exprs.size(); ++i) {
     DCHECK(exprs[i].IsBound());
-    descrs[i] = exprs[i].descr();
+    types[i] = exprs[i].type();
   }
-  return descrs;
+  return types;
 }
 
-inline std::vector<ValueDescr> GetDescriptors(const std::vector<Datum>& values) {
-  std::vector<ValueDescr> descrs(values.size());
+inline std::vector<TypeHolder> GetTypes(const std::vector<Datum>& values) {
+  std::vector<TypeHolder> types(values.size());
   for (size_t i = 0; i < values.size(); ++i) {
-    descrs[i] = values[i].descr();
+    types[i] = values[i].type();
   }
-  return descrs;
+  return types;
 }
 
 struct Comparison {
@@ -279,9 +282,9 @@ inline Result<std::shared_ptr<compute::Function>> GetFunction(
     return exec_context->func_registry()->GetFunction(call.function_name);
   }
   // XXX this special case is strange; why not make "cast" a ScalarFunction?
-  const auto& to_type =
+  const TypeHolder& to_type =
       ::arrow::internal::checked_cast<const compute::CastOptions&>(*call.options).to_type;
-  return compute::GetCastFunction(to_type);
+  return GetCastFunction(*to_type);
 }
 
 /// Modify an Expression with pre-order and post-order visitation.
diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
index 95adb1652eb..b4466d827eb 100644
--- a/cpp/src/arrow/compute/exec/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -493,8 +493,8 @@ TEST(Expression, BindLiteral) {
            Datum(ArrayFromJSON(int32(), "[1,2,3]")),
        }) {
     // literals are always considered bound
-    auto expr = literal(dat);
-    EXPECT_EQ(expr.descr(), dat.descr());
+    Expression expr = literal(dat);
+    EXPECT_TRUE(dat.type()->Equals(*expr.type()));
     EXPECT_TRUE(expr.IsBound());
   }
 }
@@ -518,13 +518,13 @@ void ExpectBindsTo(Expression expr, util::optional<Expression> expected,
 }
 
 TEST(Expression, BindFieldRef) {
-  // an unbound field_ref does not have the output ValueDescr set
+  // an unbound field_ref does not have the output type set
   auto expr = field_ref("alpha");
-  EXPECT_EQ(expr.descr(), ValueDescr{});
+  EXPECT_EQ(expr.type(), nullptr);
   EXPECT_FALSE(expr.IsBound());
 
   ExpectBindsTo(field_ref("i32"), no_change, &expr);
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   // if the field is not found, an error will be raised
   ASSERT_RAISES(Invalid, field_ref("no such field").Bind(*kBoringSchema));
@@ -541,11 +541,11 @@ TEST(Expression, BindNestedFieldRef) {
 
   ExpectBindsTo(field_ref(FieldRef("a", "b")), no_change, &expr, schema);
   EXPECT_TRUE(expr.IsBound());
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   ExpectBindsTo(field_ref(FieldRef(FieldPath({0, 0}))), no_change, &expr, schema);
   EXPECT_TRUE(expr.IsBound());
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   ASSERT_RAISES(Invalid, field_ref(FieldPath({0, 1})).Bind(schema));
   ASSERT_RAISES(Invalid, field_ref(FieldRef("a", "b"))
@@ -558,7 +558,7 @@ TEST(Expression, BindCall) {
   EXPECT_FALSE(expr.IsBound());
 
   ExpectBindsTo(expr, no_change, &expr);
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   ExpectBindsTo(call("add", {field_ref("f32"), literal(3)}),
                 call("add", {field_ref("f32"), literal(3.0F)}));
@@ -607,7 +607,7 @@ TEST(Expression, BindNestedCall) {
   ASSERT_OK_AND_ASSIGN(expr,
                        expr.Bind(Schema({field("a", int32()), field("b", int32()),
                                          field("c", int32()), field("d", int32())})));
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
   EXPECT_TRUE(expr.IsBound());
 }
 
@@ -615,7 +615,7 @@ TEST(Expression, ExecuteFieldRef) {
   auto ExpectRefIs = [](FieldRef ref, Datum in, Datum expected) {
     auto expr = field_ref(ref);
 
-    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.type()));
     ASSERT_OK_AND_ASSIGN(Datum actual,
                          ExecuteScalarExpression(expr, Schema(in.type()->fields()), in));
 
@@ -716,8 +716,8 @@ Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum&
   compute::ExecContext exec_context;
   ARROW_ASSIGN_OR_RAISE(auto function, GetFunction(*call, &exec_context));
 
-  auto descrs = GetDescriptors(call->arguments);
-  ARROW_ASSIGN_OR_RAISE(auto expected_kernel, function->DispatchExact(descrs));
+  std::vector<TypeHolder> types = GetTypes(call->arguments);
+  ARROW_ASSIGN_OR_RAISE(auto expected_kernel, function->DispatchExact(types));
 
   EXPECT_EQ(call->kernel, expected_kernel);
   return function->Execute(arguments, call->options.get(), &exec_context);
@@ -726,7 +726,7 @@ Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum&
 void ExpectExecute(Expression expr, Datum in, Datum* actual_out = NULLPTR) {
   std::shared_ptr<Schema> schm;
   if (in.is_value()) {
-    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.type()));
     schm = schema(in.type()->fields());
   } else {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*in.schema()));
diff --git a/cpp/src/arrow/compute/exec/hash_join.cc b/cpp/src/arrow/compute/exec/hash_join.cc
index a145863e597..a376fb5f57b 100644
--- a/cpp/src/arrow/compute/exec/hash_join.cc
+++ b/cpp/src/arrow/compute/exec/hash_join.cc
@@ -84,13 +84,11 @@ class HashJoinBasicImpl : public HashJoinImpl {
 
  private:
   void InitEncoder(int side, HashJoinProjection projection_handle, RowEncoder* encoder) {
-    std::vector<ValueDescr> data_types;
+    std::vector<TypeHolder> data_types;
     int num_cols = schema_mgr_->proj_maps[side].num_cols(projection_handle);
     data_types.resize(num_cols);
     for (int icol = 0; icol < num_cols; ++icol) {
-      data_types[icol] =
-          ValueDescr(schema_mgr_->proj_maps[side].data_type(projection_handle, icol),
-                     ValueDescr::ARRAY);
+      data_types[icol] = schema_mgr_->proj_maps[side].data_type(projection_handle, icol);
     }
     encoder->Init(data_types, ctx_);
     encoder->Clear();
diff --git a/cpp/src/arrow/compute/exec/hash_join_dict.cc b/cpp/src/arrow/compute/exec/hash_join_dict.cc
index 731a5662d7d..560b0ea8d4d 100644
--- a/cpp/src/arrow/compute/exec/hash_join_dict.cc
+++ b/cpp/src/arrow/compute/exec/hash_join_dict.cc
@@ -224,8 +224,8 @@ Status HashJoinDictBuild::Init(ExecContext* ctx, std::shared_ptr<Array> dictiona
 
   // Initialize encoder
   internal::RowEncoder encoder;
-  std::vector<ValueDescr> encoder_types;
-  encoder_types.emplace_back(value_type_, ValueDescr::ARRAY);
+  std::vector<TypeHolder> encoder_types;
+  encoder_types.emplace_back(value_type_);
   encoder.Init(encoder_types, ctx);
 
   // Encode all dictionary values
@@ -285,8 +285,7 @@ Result<std::shared_ptr<ArrayData>> HashJoinDictBuild::RemapInputValues(
   // Initialize encoder
   //
   internal::RowEncoder encoder;
-  std::vector<ValueDescr> encoder_types;
-  encoder_types.emplace_back(value_type_, ValueDescr::ARRAY);
+  std::vector<TypeHolder> encoder_types = {value_type_};
   encoder.Init(encoder_types, ctx);
 
   // Encode all
@@ -422,8 +421,7 @@ Result<std::shared_ptr<ArrayData>> HashJoinDictProbe::RemapInput(
             remapped_ids_,
             opt_build_side->RemapInputValues(ctx, Datum(dict->data()), dict->length()));
       } else {
-        std::vector<ValueDescr> encoder_types;
-        encoder_types.emplace_back(dict_type.value_type(), ValueDescr::ARRAY);
+        std::vector<TypeHolder> encoder_types = {dict_type.value_type()};
         encoder_.Init(encoder_types, ctx);
         RETURN_NOT_OK(
             encoder_.EncodeAndAppend(ExecSpan({*dict->data()}, dict->length())));
@@ -516,14 +514,14 @@ void HashJoinDictBuildMulti::InitEncoder(
     const SchemaProjectionMaps<HashJoinProjection>& proj_map, RowEncoder* encoder,
     ExecContext* ctx) {
   int num_cols = proj_map.num_cols(HashJoinProjection::KEY);
-  std::vector<ValueDescr> data_types(num_cols);
+  std::vector<TypeHolder> data_types(num_cols);
   for (int icol = 0; icol < num_cols; ++icol) {
     std::shared_ptr<DataType> data_type =
         proj_map.data_type(HashJoinProjection::KEY, icol);
     if (HashJoinDictBuild::KeyNeedsProcessing(data_type)) {
       data_type = HashJoinDictBuild::DataTypeAfterRemapping();
     }
-    data_types[icol] = ValueDescr(data_type, ValueDescr::ARRAY);
+    data_types[icol] = data_type;
   }
   encoder->Init(data_types, ctx);
 }
@@ -610,7 +608,7 @@ void HashJoinDictProbeMulti::InitEncoder(
     const SchemaProjectionMaps<HashJoinProjection>& proj_map_build, RowEncoder* encoder,
     ExecContext* ctx) {
   int num_cols = proj_map_probe.num_cols(HashJoinProjection::KEY);
-  std::vector<ValueDescr> data_types(num_cols);
+  std::vector<TypeHolder> data_types(num_cols);
   for (int icol = 0; icol < num_cols; ++icol) {
     std::shared_ptr<DataType> data_type =
         proj_map_probe.data_type(HashJoinProjection::KEY, icol);
@@ -619,7 +617,7 @@ void HashJoinDictProbeMulti::InitEncoder(
     if (HashJoinDictProbe::KeyNeedsProcessing(data_type, build_data_type)) {
       data_type = HashJoinDictProbe::DataTypeAfterRemapping(build_data_type);
     }
-    data_types[icol] = ValueDescr(data_type, ValueDescr::ARRAY);
+    data_types[icol] = data_type;
   }
   encoder->Init(data_types, ctx);
 }
diff --git a/cpp/src/arrow/compute/exec/hash_join_node_test.cc b/cpp/src/arrow/compute/exec/hash_join_node_test.cc
index 46600a96da3..9a3c7342788 100644
--- a/cpp/src/arrow/compute/exec/hash_join_node_test.cc
+++ b/cpp/src/arrow/compute/exec/hash_join_node_test.cc
@@ -44,13 +44,13 @@ BatchesWithSchema GenerateBatchesFromString(
     const std::vector<util::string_view>& json_strings, int multiplicity = 1) {
   BatchesWithSchema out_batches{{}, schema};
 
-  std::vector<ValueDescr> descrs;
+  std::vector<TypeHolder> types;
   for (auto&& field : schema->fields()) {
-    descrs.emplace_back(field->type());
+    types.emplace_back(field->type());
   }
 
   for (auto&& s : json_strings) {
-    out_batches.batches.push_back(ExecBatchFromJSON(descrs, s));
+    out_batches.batches.push_back(ExecBatchFromJSON(types, s));
   }
 
   size_t batch_count = out_batches.batches.size();
@@ -473,7 +473,7 @@ void TakeUsingVector(ExecContext* ctx, const std::vector<std::shared_ptr<Array>>
   }
 }
 
-// Generate random arrays given list of data type descriptions and null probabilities.
+// Generate random arrays given list of data types and null probabilities.
 // Make sure that all generated records are unique.
 // The actual number of generated records may be lower than desired because duplicates
 // will be removed without replacement.
@@ -485,12 +485,12 @@ std::vector<std::shared_ptr<Array>> GenRandomUniqueRecords(
       GenRandomRecords(rng, data_types.data_types, num_desired);
 
   ExecContext* ctx = default_exec_context();
-  std::vector<ValueDescr> val_descrs;
+  std::vector<TypeHolder> val_types;
   for (size_t i = 0; i < result.size(); ++i) {
-    val_descrs.push_back(ValueDescr(result[i]->type(), ValueDescr::ARRAY));
+    val_types.push_back(result[i]->type());
   }
   internal::RowEncoder encoder;
-  encoder.Init(val_descrs, ctx);
+  encoder.Init(val_types, ctx);
   ExecBatch batch({}, num_desired);
   batch.values.resize(result.size());
   for (size_t i = 0; i < result.size(); ++i) {
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 9efa6623e5a..f67d541e1ea 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -1133,12 +1133,11 @@ TEST(ExecPlanExecution, SourceScalarAggSink) {
           })
           .AddToPlan(plan.get()));
 
-  ASSERT_THAT(
-      StartAndCollect(plan.get(), sink_gen),
-      Finishes(ResultWith(UnorderedElementsAreArray({
-          ExecBatchFromJSON({ValueDescr::Scalar(int64()), ValueDescr::Scalar(boolean())},
-                            "[[22, true]]"),
-      }))));
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray({
+                  ExecBatchFromJSON({int64(), boolean()},
+                                    {ArgShape::SCALAR, ArgShape::SCALAR}, "[[22, true]]"),
+              }))));
 }
 
 TEST(ExecPlanExecution, AggregationPreservesOptions) {
@@ -1168,7 +1167,7 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
 
     ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
                 Finishes(ResultWith(UnorderedElementsAreArray({
-                    ExecBatchFromJSON({ValueDescr::Array(float64())}, "[[5.5]]"),
+                    ExecBatchFromJSON({float64()}, "[[5.5]]"),
                 }))));
   }
   {
@@ -1209,7 +1208,7 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
 
   BatchesWithSchema scalar_data;
   scalar_data.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), ValueDescr::Scalar(boolean())},
+      ExecBatchFromJSON({int32(), boolean()}, {ArgShape::SCALAR, ArgShape::SCALAR},
                         "[[5, false], [5, false], [5, false]]"),
       ExecBatchFromJSON({int32(), boolean()}, "[[5, true], [6, false], [7, true]]")};
   scalar_data.schema = schema({field("a", int32()), field("b", boolean())});
@@ -1239,11 +1238,11 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
       StartAndCollect(plan.get(), sink_gen),
       Finishes(ResultWith(UnorderedElementsAreArray({
           ExecBatchFromJSON(
-              {ValueDescr::Scalar(boolean()), ValueDescr::Scalar(boolean()),
-               ValueDescr::Scalar(int64()), ValueDescr::Scalar(float64()),
-               ValueDescr::Scalar(int64()), ValueDescr::Scalar(float64()),
-               ValueDescr::Scalar(int64()), ValueDescr::Array(float64()),
-               ValueDescr::Scalar(float64())},
+              {boolean(), boolean(), int64(), float64(), int64(), float64(), int64(),
+               float64(), float64()},
+              {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR,
+               ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY,
+               ArgShape::SCALAR},
               R"([[false, true, 6, 5.5, 26250, 0.7637626158259734, 33, 5.0, 0.5833333333333334]])"),
       }))));
 }
@@ -1255,9 +1254,9 @@ TEST(ExecPlanExecution, ScalarSourceGroupedSum) {
 
   BatchesWithSchema scalar_data;
   scalar_data.batches = {
-      ExecBatchFromJSON({int32(), ValueDescr::Scalar(boolean())},
+      ExecBatchFromJSON({int32(), boolean()}, {ArgShape::ARRAY, ArgShape::SCALAR},
                         "[[5, false], [6, false], [7, false]]"),
-      ExecBatchFromJSON({int32(), ValueDescr::Scalar(boolean())},
+      ExecBatchFromJSON({int32(), boolean()}, {ArgShape::ARRAY, ArgShape::SCALAR},
                         "[[1, true], [2, true], [3, true]]"),
   };
   scalar_data.schema = schema({field("a", int32()), field("b", boolean())});
diff --git a/cpp/src/arrow/compute/exec/project_node.cc b/cpp/src/arrow/compute/exec/project_node.cc
index cad8d7c45ae..76925eb6139 100644
--- a/cpp/src/arrow/compute/exec/project_node.cc
+++ b/cpp/src/arrow/compute/exec/project_node.cc
@@ -67,7 +67,7 @@ class ProjectNode : public MapNode {
         ARROW_ASSIGN_OR_RAISE(
             expr, expr.Bind(*inputs[0]->output_schema(), plan->exec_context()));
       }
-      fields[i] = field(std::move(names[i]), expr.type());
+      fields[i] = field(std::move(names[i]), expr.type()->GetSharedPtr());
       ++i;
     }
     return plan->EmplaceNode<ProjectNode>(plan, std::move(inputs),
@@ -82,7 +82,7 @@ class ProjectNode : public MapNode {
     for (size_t i = 0; i < exprs_.size(); ++i) {
       util::tracing::Span span;
       START_COMPUTE_SPAN(span, "Project",
-                         {{"project.descr", exprs_[i].descr().ToString()},
+                         {{"project.type", exprs_[i].type()->ToString()},
                           {"project.length", target.length},
                           {"project.expression", exprs_[i].ToString()}});
       ARROW_ASSIGN_OR_RAISE(Expression simplified_expr,
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index 1e09cb742fa..330ee471126 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -143,16 +143,25 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
   return node;
 }
 
-ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
                             util::string_view json) {
   auto fields = ::arrow::internal::MapVector(
-      [](const ValueDescr& descr) { return field("", descr.type); }, descrs);
+      [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types);
 
   ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
 
+  return batch;
+}
+
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
+                            const std::vector<ArgShape>& shapes, util::string_view json) {
+  DCHECK_EQ(types.size(), shapes.size());
+
+  ExecBatch batch = ExecBatchFromJSON(types, json);
+
   auto value_it = batch.values.begin();
-  for (const auto& descr : descrs) {
-    if (descr.shape == ValueDescr::SCALAR) {
+  for (ArgShape shape : shapes) {
+    if (shape == ArgShape::SCALAR) {
       if (batch.length == 0) {
         *value_it = MakeNullScalar(value_it->type());
       } else {
@@ -232,13 +241,13 @@ BatchesWithSchema MakeBatchesFromString(
     const std::vector<util::string_view>& json_strings, int multiplicity) {
   BatchesWithSchema out_batches{{}, schema};
 
-  std::vector<ValueDescr> descrs;
+  std::vector<TypeHolder> types;
   for (auto&& field : schema->fields()) {
-    descrs.emplace_back(field->type());
+    types.emplace_back(field->type());
   }
 
   for (auto&& s : json_strings) {
-    out_batches.batches.push_back(ExecBatchFromJSON(descrs, s));
+    out_batches.batches.push_back(ExecBatchFromJSON(types, s));
   }
 
   size_t batch_count = out_batches.batches.size();
diff --git a/cpp/src/arrow/compute/exec/test_util.h b/cpp/src/arrow/compute/exec/test_util.h
index ba7e4bb3411..ddbded64d42 100644
--- a/cpp/src/arrow/compute/exec/test_util.h
+++ b/cpp/src/arrow/compute/exec/test_util.h
@@ -27,6 +27,7 @@
 
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/testing/visibility.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/pcg_random.h"
@@ -44,8 +45,16 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
                         int num_outputs, StartProducingFunc = {}, StopProducingFunc = {});
 
 ARROW_TESTING_EXPORT
-ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
-                            util::string_view json);
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, util::string_view json);
+
+/// \brief Shape qualifier for value types. In certain instances
+/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in
+/// other kernels arguments can be arrays or scalars
+enum class ArgShape { ANY, ARRAY, SCALAR };
+
+ARROW_TESTING_EXPORT
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
+                            const std::vector<ArgShape>& shapes, util::string_view json);
 
 struct BatchesWithSchema {
   std::vector<ExecBatch> batches;
diff --git a/cpp/src/arrow/compute/exec_internal.h b/cpp/src/arrow/compute/exec_internal.h
index c475a61c1ba..afca289c20e 100644
--- a/cpp/src/arrow/compute/exec_internal.h
+++ b/cpp/src/arrow/compute/exec_internal.h
@@ -84,8 +84,7 @@ class ARROW_EXPORT ExecSpanIterator {
   /// \param[in] batch the input ExecBatch
   /// \param[in] max_chunksize the maximum length of each ExecSpan. Depending
   /// on the chunk layout of ChunkedArray.
-  Status Init(const ExecBatch& batch, ValueDescr::Shape output_shape = ValueDescr::ARRAY,
-              int64_t max_chunksize = kDefaultMaxChunksize);
+  Status Init(const ExecBatch& batch, int64_t max_chunksize = kDefaultMaxChunksize);
 
   /// \brief Compute the next span by updating the state of the
   /// previous span object. You must keep passing in the previous
@@ -101,6 +100,8 @@ class ARROW_EXPORT ExecSpanIterator {
   int64_t length() const { return length_; }
   int64_t position() const { return position_; }
 
+  bool have_all_scalars() const { return have_all_scalars_; }
+
  private:
   ExecSpanIterator(const std::vector<Datum>& args, int64_t length, int64_t max_chunksize);
 
@@ -108,6 +109,7 @@ class ARROW_EXPORT ExecSpanIterator {
 
   bool initialized_ = false;
   bool have_chunked_arrays_ = false;
+  bool have_all_scalars_ = false;
   const std::vector<Datum>* args_;
   std::vector<int> chunk_indexes_;
   std::vector<int64_t> value_positions_;
@@ -117,8 +119,8 @@ class ARROW_EXPORT ExecSpanIterator {
   // from the relative position within each chunk (which is in
   // value_positions_)
   std::vector<int64_t> value_offsets_;
-  int64_t position_;
-  int64_t length_;
+  int64_t position_ = 0;
+  int64_t length_ = 0;
   int64_t max_chunksize_;
 };
 
@@ -147,11 +149,6 @@ class DatumAccumulator : public ExecListener {
   std::vector<Datum> values_;
 };
 
-/// \brief Check that each Datum is of a "value" type, which means either
-/// SCALAR, ARRAY, or CHUNKED_ARRAY. If there are chunked inputs, then these
-/// inputs will be split into non-chunked ExecBatch values for execution
-Status CheckAllValues(const std::vector<Datum>& values);
-
 class ARROW_EXPORT KernelExecutor {
  public:
   virtual ~KernelExecutor() = default;
diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc
index bd344fb2297..573f4aee4a0 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -728,10 +728,10 @@ TEST_F(TestExecBatchIterator, Basics) {
   ASSERT_EQ(3, batch.num_values());
   ASSERT_EQ(length, batch.length);
 
-  std::vector<ValueDescr> descrs = batch.GetDescriptors();
-  ASSERT_EQ(ValueDescr::Array(int32()), descrs[0]);
-  ASSERT_EQ(ValueDescr::Array(float64()), descrs[1]);
-  ASSERT_EQ(ValueDescr::Scalar(int32()), descrs[2]);
+  std::vector<TypeHolder> types = batch.GetTypes();
+  ASSERT_EQ(types[0], int32());
+  ASSERT_EQ(types[1], float64());
+  ASSERT_EQ(types[2], int32());
 
   AssertArraysEqual(*args[0].make_array(), *batch[0].make_array());
   AssertArraysEqual(*args[1].make_array(), *batch[1].make_array());
@@ -795,13 +795,12 @@ TEST_F(TestExecBatchIterator, ZeroLengthInputs) {
 class TestExecSpanIterator : public TestComputeInternals {
  public:
   void SetupIterator(const ExecBatch& batch,
-                     ValueDescr::Shape output_shape = ValueDescr::ARRAY,
                      int64_t max_chunksize = kDefaultMaxChunksize) {
-    ASSERT_OK(iterator_.Init(batch, output_shape, max_chunksize));
+    ASSERT_OK(iterator_.Init(batch, max_chunksize));
   }
   void CheckIteration(const ExecBatch& input, int chunksize,
                       const std::vector<int>& ex_batch_sizes) {
-    SetupIterator(input, ValueDescr::ARRAY, chunksize);
+    SetupIterator(input, chunksize);
     ExecSpan batch;
     int64_t position = 0;
     for (size_t i = 0; i < ex_batch_sizes.size(); ++i) {
@@ -902,8 +901,10 @@ TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
 
   auto CheckArgs = [&](const ExecBatch& batch) {
     ExecSpanIterator iterator;
-    ASSERT_OK(iterator.Init(batch, ValueDescr::ARRAY));
+    ASSERT_OK(iterator.Init(batch));
     ExecSpan iter_span;
+    ASSERT_TRUE(iterator.Next(&iter_span));
+    ASSERT_EQ(0, iter_span.length);
     ASSERT_FALSE(iterator.Next(&iter_span));
   };
 
@@ -1045,11 +1046,13 @@ Status ExecStateful(KernelContext* ctx, const ExecSpan& batch, ExecResult* out)
   return Status::OK();
 }
 
-// TODO: remove this / refactor it in ARROW-16577
 Status ExecAddInt32(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  const Int32Scalar& arg0 = batch[0].scalar_as<Int32Scalar>();
-  const Int32Scalar& arg1 = batch[1].scalar_as<Int32Scalar>();
-  out->value = std::make_shared<Int32Scalar>(arg0.value + arg1.value);
+  const int32_t* left_data = batch[0].array.GetValues<int32_t>(1);
+  const int32_t* right_data = batch[1].array.GetValues<int32_t>(1);
+  int32_t* out_data = out->array_span()->GetValues<int32_t>(1);
+  for (int64_t i = 0; i < batch.length; ++i) {
+    *out_data++ = *left_data++ + *right_data++;
+  }
   return Status::OK();
 }
 
@@ -1078,16 +1081,15 @@ class TestCallScalarFunction : public TestComputeInternals {
                                                  /*doc=*/FunctionDoc::Empty());
 
     // Add a few kernels. Our implementation only accepts arrays
-    ASSERT_OK(func->AddKernel({InputType::Array(uint8())}, uint8(), ExecCopyArraySpan));
-    ASSERT_OK(func->AddKernel({InputType::Array(int32())}, int32(), ExecCopyArraySpan));
-    ASSERT_OK(
-        func->AddKernel({InputType::Array(float64())}, float64(), ExecCopyArraySpan));
+    ASSERT_OK(func->AddKernel({uint8()}, uint8(), ExecCopyArraySpan));
+    ASSERT_OK(func->AddKernel({int32()}, int32(), ExecCopyArraySpan));
+    ASSERT_OK(func->AddKernel({float64()}, float64(), ExecCopyArraySpan));
     ASSERT_OK(registry->AddFunction(func));
 
     // A version which doesn't want the executor to call PropagateNulls
     auto func2 = std::make_shared<ScalarFunction>(
         "test_copy_computed_bitmap", Arity::Unary(), /*doc=*/FunctionDoc::Empty());
-    ScalarKernel kernel({InputType::Array(uint8())}, uint8(), ExecComputedBitmap);
+    ScalarKernel kernel({uint8()}, uint8(), ExecComputedBitmap);
     kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
     ASSERT_OK(func2->AddKernel(kernel));
     ASSERT_OK(registry->AddFunction(func2));
@@ -1103,7 +1105,7 @@ class TestCallScalarFunction : public TestComputeInternals {
     auto f2 = std::make_shared<ScalarFunction>(
         "test_nopre_validity_or_data", Arity::Unary(), /*doc=*/FunctionDoc::Empty());
 
-    ScalarKernel kernel({InputType::Array(uint8())}, uint8(), ExecNoPreallocatedData);
+    ScalarKernel kernel({uint8()}, uint8(), ExecNoPreallocatedData);
     kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
     ASSERT_OK(f1->AddKernel(kernel));
 
@@ -1123,7 +1125,7 @@ class TestCallScalarFunction : public TestComputeInternals {
     auto func = std::make_shared<ScalarFunction>("test_stateful", Arity::Unary(),
                                                  /*doc=*/FunctionDoc::Empty());
 
-    ScalarKernel kernel({InputType::Array(int32())}, int32(), ExecStateful, InitStateful);
+    ScalarKernel kernel({int32()}, int32(), ExecStateful, InitStateful);
     ASSERT_OK(func->AddKernel(kernel));
     ASSERT_OK(registry->AddFunction(func));
   }
@@ -1133,8 +1135,7 @@ class TestCallScalarFunction : public TestComputeInternals {
 
     auto func = std::make_shared<ScalarFunction>("test_scalar_add_int32", Arity::Binary(),
                                                  /*doc=*/FunctionDoc::Empty());
-    ASSERT_OK(func->AddKernel({InputType::Scalar(int32()), InputType::Scalar(int32())},
-                              int32(), ExecAddInt32));
+    ASSERT_OK(func->AddKernel({int32(), int32()}, int32(), ExecAddInt32));
     ASSERT_OK(registry->AddFunction(func));
   }
 };
@@ -1154,8 +1155,9 @@ TEST_F(TestCallScalarFunction, ArgumentValidation) {
   ASSERT_RAISES(Invalid, CallFunction("test_copy", args));
 
   // Cannot do scalar
-  args = {Datum(std::make_shared<Int32Scalar>(5))};
-  ASSERT_RAISES(NotImplemented, CallFunction("test_copy", args));
+  Datum d1_scalar(std::make_shared<Int32Scalar>(5));
+  ASSERT_OK_AND_ASSIGN(auto result, CallFunction("test_copy", {d1}));
+  ASSERT_OK_AND_ASSIGN(result, CallFunction("test_copy", {d1_scalar}));
 }
 
 TEST_F(TestCallScalarFunction, PreallocationCases) {
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index b5ebc67d180..12d80a8c9ae 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -79,51 +79,35 @@ static const FunctionDoc kEmptyFunctionDoc{};
 
 const FunctionDoc& FunctionDoc::Empty() { return kEmptyFunctionDoc; }
 
-static Status CheckArityImpl(const Function& function, int passed_num_args,
-                             const char* passed_num_args_label) {
-  if (function.arity().is_varargs && passed_num_args < function.arity().num_args) {
-    return Status::Invalid("VarArgs function '", function.name(), "' needs at least ",
-                           function.arity().num_args, " arguments but ",
-                           passed_num_args_label, " only ", passed_num_args);
+static Status CheckArityImpl(const Function& func, int num_args) {
+  if (func.arity().is_varargs && num_args < func.arity().num_args) {
+    return Status::Invalid("VarArgs function '", func.name(), "' needs at least ",
+                           func.arity().num_args, " arguments but only ", num_args,
+                           " passed");
   }
 
-  if (!function.arity().is_varargs && passed_num_args != function.arity().num_args) {
-    return Status::Invalid("Function '", function.name(), "' accepts ",
-                           function.arity().num_args, " arguments but ",
-                           passed_num_args_label, " ", passed_num_args);
+  if (!func.arity().is_varargs && num_args != func.arity().num_args) {
+    return Status::Invalid("Function '", func.name(), "' accepts ", func.arity().num_args,
+                           " arguments but ", num_args, " passed");
   }
-
   return Status::OK();
 }
 
-Status Function::CheckArity(const std::vector<InputType>& in_types) const {
-  return CheckArityImpl(*this, static_cast<int>(in_types.size()), "kernel accepts");
-}
-
-Status Function::CheckArity(const std::vector<ValueDescr>& descrs) const {
-  return CheckArityImpl(*this, static_cast<int>(descrs.size()),
-                        "attempted to look up kernel(s) with");
-}
-
-static Status CheckOptions(const Function& function, const FunctionOptions* options) {
-  if (options == nullptr && function.doc().options_required) {
-    return Status::Invalid("Function '", function.name(),
-                           "' cannot be called without options");
-  }
-  return Status::OK();
+Status Function::CheckArity(size_t num_args) const {
+  return CheckArityImpl(*this, static_cast<int>(num_args));
 }
 
 namespace detail {
 
-Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>& descrs) {
+Status NoMatchingKernel(const Function* func, const std::vector<TypeHolder>& types) {
   return Status::NotImplemented("Function '", func->name(),
                                 "' has no kernel matching input types ",
-                                ValueDescr::ToString(descrs));
+                                TypeHolder::ToString(types));
 }
 
 template <typename KernelType>
 const KernelType* DispatchExactImpl(const std::vector<KernelType*>& kernels,
-                                    const std::vector<ValueDescr>& values) {
+                                    const std::vector<TypeHolder>& values) {
   const KernelType* kernel_matches[SimdLevel::MAX] = {nullptr};
 
   // Validate arity
@@ -159,7 +143,7 @@ const KernelType* DispatchExactImpl(const std::vector<KernelType*>& kernels,
 }
 
 const Kernel* DispatchExactImpl(const Function* func,
-                                const std::vector<ValueDescr>& values) {
+                                const std::vector<TypeHolder>& values) {
   if (func->kind() == Function::SCALAR) {
     return DispatchExactImpl(checked_cast<const ScalarFunction*>(func)->kernels(),
                              values);
@@ -186,11 +170,11 @@ const Kernel* DispatchExactImpl(const Function* func,
 }  // namespace detail
 
 Result<const Kernel*> Function::DispatchExact(
-    const std::vector<ValueDescr>& values) const {
+    const std::vector<TypeHolder>& values) const {
   if (kind_ == Function::META) {
     return Status::NotImplemented("Dispatch for a MetaFunction's Kernels");
   }
-  RETURN_NOT_OK(CheckArity(values));
+  RETURN_NOT_OK(CheckArity(values.size()));
 
   if (auto kernel = detail::DispatchExactImpl(this, values)) {
     return kernel;
@@ -198,75 +182,92 @@ Result<const Kernel*> Function::DispatchExact(
   return detail::NoMatchingKernel(this, values);
 }
 
-Result<const Kernel*> Function::DispatchBest(std::vector<ValueDescr>* values) const {
+Result<const Kernel*> Function::DispatchBest(std::vector<TypeHolder>* values) const {
   // TODO(ARROW-11508) permit generic conversions here
   return DispatchExact(*values);
 }
 
-Result<Datum> Function::Execute(const std::vector<Datum>& args,
-                                const FunctionOptions* options, ExecContext* ctx) const {
-  return ExecuteInternal(args, /*passed_length=*/-1, options, ctx);
+namespace {
+
+Status CheckAllArrayOrScalar(const std::vector<Datum>& values) {
+  for (const auto& value : values) {
+    if (!value.is_value()) {
+      return Status::Invalid("Tried executing function with non-value type: ",
+                             value.ToString());
+    }
+  }
+  return Status::OK();
 }
 
-Result<Datum> Function::Execute(const ExecBatch& batch, const FunctionOptions* options,
-                                ExecContext* ctx) const {
-  return ExecuteInternal(batch.values, batch.length, options, ctx);
+Status CheckOptions(const Function& function, const FunctionOptions* options) {
+  if (options == nullptr && function.doc().options_required) {
+    return Status::Invalid("Function '", function.name(),
+                           "' cannot be called without options");
+  }
+  return Status::OK();
 }
 
-Result<Datum> Function::ExecuteInternal(const std::vector<Datum>& args,
-                                        int64_t passed_length,
-                                        const FunctionOptions* options,
-                                        ExecContext* ctx) const {
+Result<Datum> ExecuteInternal(const Function& func, std::vector<Datum> args,
+                              int64_t passed_length, const FunctionOptions* options,
+                              ExecContext* ctx) {
+  std::unique_ptr<ExecContext> default_ctx;
   if (options == nullptr) {
-    RETURN_NOT_OK(CheckOptions(*this, options));
-    options = default_options();
+    RETURN_NOT_OK(CheckOptions(func, options));
+    options = func.default_options();
   }
   if (ctx == nullptr) {
-    ExecContext default_ctx;
-    return ExecuteInternal(args, passed_length, options, &default_ctx);
+    default_ctx.reset(new ExecContext());
+    ctx = default_ctx.get();
   }
 
   util::tracing::Span span;
 
-  START_COMPUTE_SPAN(span, name(),
-                     {{"function.name", name()},
+  START_COMPUTE_SPAN(span, func.name(),
+                     {{"function.name", func.name()},
                       {"function.options", options ? options->ToString() : "<NULLPTR>"},
-                      {"function.kind", kind()}});
+                      {"function.kind", func.kind()}});
 
   // type-check Datum arguments here. Really we'd like to avoid this as much as
   // possible
-  RETURN_NOT_OK(detail::CheckAllValues(args));
-  std::vector<ValueDescr> inputs(args.size());
+  RETURN_NOT_OK(CheckAllArrayOrScalar(args));
+  std::vector<TypeHolder> in_types(args.size());
   for (size_t i = 0; i != args.size(); ++i) {
-    inputs[i] = args[i].descr();
+    in_types[i] = args[i].type().get();
   }
 
   std::unique_ptr<detail::KernelExecutor> executor;
-  if (kind() == Function::SCALAR) {
+  if (func.kind() == Function::SCALAR) {
     executor = detail::KernelExecutor::MakeScalar();
-  } else if (kind() == Function::VECTOR) {
+  } else if (func.kind() == Function::VECTOR) {
     executor = detail::KernelExecutor::MakeVector();
-  } else if (kind() == Function::SCALAR_AGGREGATE) {
+  } else if (func.kind() == Function::SCALAR_AGGREGATE) {
     executor = detail::KernelExecutor::MakeScalarAggregate();
   } else {
     return Status::NotImplemented("Direct execution of HASH_AGGREGATE functions");
   }
 
-  ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, DispatchBest(&inputs));
-  ARROW_ASSIGN_OR_RAISE(std::vector<Datum> args_with_casts, Cast(args, inputs, ctx));
+  ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, func.DispatchBest(&in_types));
+
+  // Cast arguments if necessary
+  for (size_t i = 0; i != args.size(); ++i) {
+    if (in_types[i] != args[i].type()) {
+      ARROW_ASSIGN_OR_RAISE(args[i], Cast(args[i], CastOptions::Safe(in_types[i]), ctx));
+    }
+  }
 
-  std::unique_ptr<KernelState> state;
   KernelContext kernel_ctx{ctx, kernel};
+
+  std::unique_ptr<KernelState> state;
   if (kernel->init) {
-    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, inputs, options}));
+    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, in_types, options}));
     kernel_ctx.SetState(state.get());
   }
 
-  RETURN_NOT_OK(executor->Init(&kernel_ctx, {kernel, inputs, options}));
+  RETURN_NOT_OK(executor->Init(&kernel_ctx, {kernel, in_types, options}));
 
   detail::DatumAccumulator listener;
 
-  ExecBatch input(std::move(args_with_casts), /*length=*/0);
+  ExecBatch input(std::move(args), /*length=*/0);
   if (input.num_values() == 0) {
     if (passed_length != -1) {
       input.length = passed_length;
@@ -275,9 +276,13 @@ Result<Datum> Function::ExecuteInternal(const std::vector<Datum>& args,
     bool all_same_length = false;
     int64_t inferred_length = detail::InferBatchLength(input.values, &all_same_length);
     input.length = inferred_length;
-    if (kind() == Function::SCALAR) {
-      DCHECK(passed_length == -1 || passed_length == inferred_length);
-    } else if (kind() == Function::VECTOR) {
+    if (func.kind() == Function::SCALAR) {
+      if (passed_length != -1 && passed_length != inferred_length) {
+        return Status::Invalid(
+            "Passed batch length for execution did not match actual"
+            " length of values for scalar function execution");
+      }
+    } else if (func.kind() == Function::VECTOR) {
       auto vkernel = static_cast<const VectorKernel*>(kernel);
       if (!(all_same_length || !vkernel->can_execute_chunkwise)) {
         return Status::Invalid("Vector kernel arguments must all be the same length");
@@ -287,12 +292,25 @@ Result<Datum> Function::ExecuteInternal(const std::vector<Datum>& args,
   RETURN_NOT_OK(executor->Execute(input, &listener));
   const auto out = executor->WrapResults(input.values, listener.values());
 #ifndef NDEBUG
-  DCHECK_OK(executor->CheckResultType(out, name_.c_str()));
+  DCHECK_OK(executor->CheckResultType(out, func.name().c_str()));
 #endif
   return out;
 }
 
+}  // namespace
+
+Result<Datum> Function::Execute(const std::vector<Datum>& args,
+                                const FunctionOptions* options, ExecContext* ctx) const {
+  return ExecuteInternal(*this, args, /*passed_length=*/-1, options, ctx);
+}
+
+Result<Datum> Function::Execute(const ExecBatch& batch, const FunctionOptions* options,
+                                ExecContext* ctx) const {
+  return ExecuteInternal(*this, batch.values, batch.length, options, ctx);
+}
+
 namespace {
+
 Status ValidateFunctionSummary(const std::string& s) {
   if (s.find('\n') != s.npos) {
     return Status::Invalid("summary contains a newline");
@@ -347,7 +365,7 @@ Status Function::Validate() const {
 
 Status ScalarFunction::AddKernel(std::vector<InputType> in_types, OutputType out_type,
                                  ArrayKernelExec exec, KernelInit init) {
-  RETURN_NOT_OK(CheckArity(in_types));
+  RETURN_NOT_OK(CheckArity(in_types.size()));
 
   if (arity_.is_varargs && in_types.size() != 1) {
     return Status::Invalid("VarArgs signatures must have exactly one input type");
@@ -359,7 +377,7 @@ Status ScalarFunction::AddKernel(std::vector<InputType> in_types, OutputType out
 }
 
 Status ScalarFunction::AddKernel(ScalarKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -369,7 +387,7 @@ Status ScalarFunction::AddKernel(ScalarKernel kernel) {
 
 Status VectorFunction::AddKernel(std::vector<InputType> in_types, OutputType out_type,
                                  ArrayKernelExec exec, KernelInit init) {
-  RETURN_NOT_OK(CheckArity(in_types));
+  RETURN_NOT_OK(CheckArity(in_types.size()));
 
   if (arity_.is_varargs && in_types.size() != 1) {
     return Status::Invalid("VarArgs signatures must have exactly one input type");
@@ -381,7 +399,7 @@ Status VectorFunction::AddKernel(std::vector<InputType> in_types, OutputType out
 }
 
 Status VectorFunction::AddKernel(VectorKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -390,7 +408,7 @@ Status VectorFunction::AddKernel(VectorKernel kernel) {
 }
 
 Status ScalarAggregateFunction::AddKernel(ScalarAggregateKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -399,7 +417,7 @@ Status ScalarAggregateFunction::AddKernel(ScalarAggregateKernel kernel) {
 }
 
 Status HashAggregateFunction::AddKernel(HashAggregateKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -410,8 +428,7 @@ Status HashAggregateFunction::AddKernel(HashAggregateKernel kernel) {
 Result<Datum> MetaFunction::Execute(const std::vector<Datum>& args,
                                     const FunctionOptions* options,
                                     ExecContext* ctx) const {
-  RETURN_NOT_OK(
-      CheckArityImpl(*this, static_cast<int>(args.size()), "attempted to Execute with"));
+  RETURN_NOT_OK(CheckArityImpl(*this, static_cast<int>(args.size())));
   RETURN_NOT_OK(CheckOptions(*this, options));
 
   if (options == nullptr) {
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index c32c8766a91..7f2fba68caf 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -211,19 +211,19 @@ class ARROW_EXPORT Function {
   virtual int num_kernels() const = 0;
 
   /// \brief Return a kernel that can execute the function given the exact
-  /// argument types (without implicit type casts or scalar->array promotions).
+  /// argument types (without implicit type casts).
   ///
   /// NB: This function is overridden in CastFunction.
-  virtual Result<const Kernel*> DispatchExact(
-      const std::vector<ValueDescr>& values) const;
+  virtual Result<const Kernel*> DispatchExact(const std::vector<TypeHolder>& types) const;
 
   /// \brief Return a best-match kernel that can execute the function given the argument
   /// types, after implicit casts are applied.
   ///
-  /// \param[in,out] values Argument types. An element may be modified to indicate that
-  /// the returned kernel only approximately matches the input value descriptors; callers
-  /// are responsible for casting inputs to the type and shape required by the kernel.
-  virtual Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const;
+  /// \param[in,out] values Argument types. An element may be modified to
+  /// indicate that the returned kernel only approximately matches the input
+  /// value descriptors; callers are responsible for casting inputs to the type
+  /// required by the kernel.
+  virtual Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* values) const;
 
   /// \brief Execute the function eagerly with the passed input arguments with
   /// kernel dispatch, batch iteration, and memory allocation details taken
@@ -255,11 +255,7 @@ class ARROW_EXPORT Function {
         doc_(std::move(doc)),
         default_options_(default_options) {}
 
-  Result<Datum> ExecuteInternal(const std::vector<Datum>& args, int64_t passed_length,
-                                const FunctionOptions* options, ExecContext* ctx) const;
-
-  Status CheckArity(const std::vector<InputType>&) const;
-  Status CheckArity(const std::vector<ValueDescr>&) const;
+  Status CheckArity(size_t num_args) const;
 
   std::string name_;
   Function::Kind kind_;
@@ -294,11 +290,11 @@ class FunctionImpl : public Function {
 
 /// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
 ARROW_EXPORT
-const Kernel* DispatchExactImpl(const Function* func, const std::vector<ValueDescr>&);
+const Kernel* DispatchExactImpl(const Function* func, const std::vector<TypeHolder>&);
 
 /// \brief Return an error message if no Kernel is found.
 ARROW_EXPORT
-Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>&);
+Status NoMatchingKernel(const Function* func, const std::vector<TypeHolder>&);
 
 }  // namespace detail
 
diff --git a/cpp/src/arrow/compute/function_benchmark.cc b/cpp/src/arrow/compute/function_benchmark.cc
index b508ad047fb..791052358e7 100644
--- a/cpp/src/arrow/compute/function_benchmark.cc
+++ b/cpp/src/arrow/compute/function_benchmark.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/compute/api.h"
+#include "arrow/compute/cast_internal.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/memory_pool.h"
 #include "arrow/scalar.h"
@@ -67,14 +68,13 @@ void BM_CastDispatchBaseline(benchmark::State& state) {
   // Repeatedly invoke a trivial Cast with all dispatch outside the hot loop
   random::RandomArrayGenerator rag(kSeed);
 
-  auto int_scalars = ToScalars(rag.Int64(kScalarCount, 0, 1 << 20));
-
+  auto int_array = rag.Int64(1, 0, 1 << 20);
   auto double_type = float64();
   CastOptions cast_options;
   cast_options.to_type = double_type;
-  ASSERT_OK_AND_ASSIGN(auto cast_function, GetCastFunction(double_type));
+  ASSERT_OK_AND_ASSIGN(auto cast_function, internal::GetCastFunction(*double_type));
   ASSERT_OK_AND_ASSIGN(auto cast_kernel,
-                       cast_function->DispatchExact({int_scalars[0]->type}));
+                       cast_function->DispatchExact({int_array->type()}));
   const auto& exec = static_cast<const ScalarKernel*>(cast_kernel)->exec;
 
   ExecContext exec_context;
@@ -85,15 +85,13 @@ void BM_CastDispatchBaseline(benchmark::State& state) {
                         .ValueOrDie();
   kernel_context.SetState(cast_state.get());
 
-  ExecSpan input;
-  input.length = 1;
+  ExecSpan input({ExecValue(*int_array->data())}, 1);
+  ExecResult result;
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result_space,
+                       MakeArrayOfNull(double_type, 1));
+  result.array_span()->SetMembers(*result_space->data());
   for (auto _ : state) {
-    ExecResult result;
-    result.value = MakeNullScalar(double_type);
-    for (const std::shared_ptr<Scalar>& int_scalar : int_scalars) {
-      input.values = {ExecValue(int_scalar.get())};
-      ABORT_NOT_OK(exec(&kernel_context, input, &result));
-    }
+    ABORT_NOT_OK(exec(&kernel_context, input, &result));
   }
 
   state.SetItemsProcessed(state.iterations() * kScalarCount);
@@ -153,31 +151,26 @@ void BM_ExecuteScalarFunctionOnScalar(benchmark::State& state) {
 
 void BM_ExecuteScalarKernelOnScalar(benchmark::State& state) {
   // Execute a trivial function, with argument dispatch outside the hot path
-  const int64_t N = 10000;
-
   auto function = *GetFunctionRegistry()->GetFunction("is_valid");
-  auto kernel = *function->DispatchExact({ValueDescr::Scalar(int64())});
+  auto kernel = *function->DispatchExact({int64()});
   const auto& exec = static_cast<const ScalarKernel&>(*kernel).exec;
 
-  const auto scalars = MakeScalarsForIsValid(N);
-
   ExecContext exec_context;
   KernelContext kernel_context(&exec_context);
 
-  ExecSpan input;
-  input.length = 1;
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> input_arr, MakeArrayOfNull(int64(), 1));
+  ExecSpan input({*input_arr->data()}, 1);
+
+  ExecResult output;
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> output_arr, MakeArrayOfNull(int64(), 1));
+  output.array_span()->SetMembers(*output_arr->data());
+
+  const int64_t N = 10000;
   for (auto _ : state) {
-    int64_t total = 0;
-    for (const std::shared_ptr<Scalar>& scalar : scalars) {
-      ExecResult result;
-      result.value = MakeNullScalar(int64());
-      input.values = {scalar.get()};
-      ABORT_NOT_OK(exec(&kernel_context, input, &result));
-      total += result.scalar()->is_valid;
+    for (int i = 0; i < N; ++i) {
+      ABORT_NOT_OK(exec(&kernel_context, input, &output));
     }
-    benchmark::DoNotOptimize(total);
   }
-
   state.SetItemsProcessed(state.iterations() * N);
 }
 
diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h
index f2303b87d90..17261332619 100644
--- a/cpp/src/arrow/compute/function_internal.h
+++ b/cpp/src/arrow/compute/function_internal.h
@@ -345,6 +345,10 @@ static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
   return MakeNullScalar(value);
 }
 
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const TypeHolder& value) {
+  return GenericToScalar(value.GetSharedPtr());
+}
+
 static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
     const std::shared_ptr<Scalar>& value) {
   return value;
@@ -430,6 +434,12 @@ static inline enable_if_same_result<T, std::shared_ptr<DataType>> GenericFromSca
   return value->type;
 }
 
+template <typename T>
+static inline enable_if_same_result<T, TypeHolder> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value->type;
+}
+
 template <typename T>
 static inline enable_if_same_result<T, std::shared_ptr<Scalar>> GenericFromScalar(
     const std::shared_ptr<Scalar>& value) {
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index f06f225f5b9..94daa6baa96 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -230,9 +230,9 @@ void CheckAddDispatch(FunctionType* func, ExecType exec) {
   // Duplicate sig is okay
   ASSERT_OK(func->AddKernel(in_types1, out_type1, exec));
 
-  // Add given a descr
-  KernelType descr({float64(), float64()}, float64(), exec);
-  ASSERT_OK(func->AddKernel(descr));
+  // Add a kernel
+  KernelType kernel({float64(), float64()}, float64(), exec);
+  ASSERT_OK(func->AddKernel(kernel));
 
   ASSERT_EQ(4, func->num_kernels());
   ASSERT_EQ(4, func->kernels().size());
@@ -249,9 +249,9 @@ void CheckAddDispatch(FunctionType* func, ExecType exec) {
   KernelType invalid_kernel({boolean()}, boolean(), exec);
   ASSERT_RAISES(Invalid, func->AddKernel(invalid_kernel));
 
-  ASSERT_OK_AND_ASSIGN(const Kernel* kernel, func->DispatchExact({int32(), int32()}));
+  ASSERT_OK_AND_ASSIGN(const Kernel* dispatched, func->DispatchExact({int32(), int32()}));
   KernelSignature expected_sig(in_types1, out_type1);
-  ASSERT_TRUE(kernel->signature->Equals(expected_sig));
+  ASSERT_TRUE(dispatched->signature->Equals(expected_sig));
 
   // No kernel available
   ASSERT_RAISES(NotImplemented, func->DispatchExact({utf8(), utf8()}));
@@ -288,7 +288,7 @@ TEST(ArrayFunction, VarArgs) {
   ScalarKernel non_va_kernel(std::make_shared<KernelSignature>(va_args, int8()), ExecNYI);
   ASSERT_RAISES(Invalid, va_func.AddKernel(non_va_kernel));
 
-  std::vector<ValueDescr> args = {ValueDescr::Scalar(int8()), int8(), int8()};
+  std::vector<TypeHolder> args = {int8(), int8(), int8()};
   ASSERT_OK_AND_ASSIGN(const Kernel* kernel, va_func.DispatchExact(args));
   ASSERT_TRUE(kernel->signature->MatchesInputs(args));
 
@@ -319,7 +319,7 @@ Status NoopFinalize(KernelContext*, Datum*) { return Status::OK(); }
 TEST(ScalarAggregateFunction, DispatchExact) {
   ScalarAggregateFunction func("agg_test", Arity::Unary(), FunctionDoc::Empty());
 
-  std::vector<InputType> in_args = {ValueDescr::Array(int8())};
+  std::vector<InputType> in_args = {int8()};
   ScalarAggregateKernel kernel(std::move(in_args), int64(), NoopInit, NoopConsume,
                                NoopMerge, NoopFinalize);
   ASSERT_OK(func.AddKernel(kernel));
@@ -341,18 +341,14 @@ TEST(ScalarAggregateFunction, DispatchExact) {
   kernel.signature = std::make_shared<KernelSignature>(in_args, float64());
   ASSERT_RAISES(Invalid, func.AddKernel(kernel));
 
-  std::vector<ValueDescr> dispatch_args = {ValueDescr::Array(int8())};
+  std::vector<TypeHolder> dispatch_args = {int8()};
   ASSERT_OK_AND_ASSIGN(const Kernel* selected_kernel, func.DispatchExact(dispatch_args));
   ASSERT_EQ(func.kernels()[0], selected_kernel);
   ASSERT_TRUE(selected_kernel->signature->MatchesInputs(dispatch_args));
 
-  // We declared that only arrays are accepted
-  dispatch_args[0] = {ValueDescr::Scalar(int8())};
-  ASSERT_RAISES(NotImplemented, func.DispatchExact(dispatch_args));
-
   // Didn't qualify the float64() kernel so this actually dispatches (even
   // though that may not be what you want)
-  dispatch_args[0] = {ValueDescr::Scalar(float64())};
+  dispatch_args[0] = {float64()};
   ASSERT_OK_AND_ASSIGN(selected_kernel, func.DispatchExact(dispatch_args));
   ASSERT_TRUE(selected_kernel->signature->MatchesInputs(dispatch_args));
 }
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 909c2399c8e..9a0e9c986a2 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -282,7 +282,6 @@ std::shared_ptr<TypeMatcher> FixedSizeBinaryLike() {
 
 size_t InputType::Hash() const {
   size_t result = kHashSeed;
-  hash_combine(result, static_cast<int>(shape_));
   hash_combine(result, static_cast<int>(kind_));
   switch (kind_) {
     case InputType::EXACT_TYPE:
@@ -296,21 +295,6 @@ size_t InputType::Hash() const {
 
 std::string InputType::ToString() const {
   std::stringstream ss;
-  switch (shape_) {
-    case ValueDescr::ANY:
-      ss << "any";
-      break;
-    case ValueDescr::ARRAY:
-      ss << "array";
-      break;
-    case ValueDescr::SCALAR:
-      ss << "scalar";
-      break;
-    default:
-      DCHECK(false);
-      break;
-  }
-  ss << "[";
   switch (kind_) {
     case InputType::ANY_TYPE:
       ss << "any";
@@ -325,7 +309,6 @@ std::string InputType::ToString() const {
       DCHECK(false);
       break;
   }
-  ss << "]";
   return ss.str();
 }
 
@@ -333,7 +316,7 @@ bool InputType::Equals(const InputType& other) const {
   if (this == &other) {
     return true;
   }
-  if (kind_ != other.kind_ || shape_ != other.shape_) {
+  if (kind_ != other.kind_) {
     return false;
   }
   switch (kind_) {
@@ -348,22 +331,30 @@ bool InputType::Equals(const InputType& other) const {
   }
 }
 
-bool InputType::Matches(const ValueDescr& descr) const {
-  if (shape_ != ValueDescr::ANY && descr.shape != shape_) {
-    return false;
-  }
+bool InputType::Matches(const DataType& type) const {
   switch (kind_) {
     case InputType::EXACT_TYPE:
-      return type_->Equals(*descr.type);
+      return type_->Equals(type);
     case InputType::USE_TYPE_MATCHER:
-      return type_matcher_->Matches(*descr.type);
+      return type_matcher_->Matches(type);
     default:
       // ANY_TYPE
       return true;
   }
 }
 
-bool InputType::Matches(const Datum& value) const { return Matches(value.descr()); }
+bool InputType::Matches(const Datum& value) const {
+  switch (value.kind()) {
+    case Datum::ARRAY:
+    case Datum::CHUNKED_ARRAY:
+    case Datum::SCALAR:
+      break;
+    default:
+      DCHECK(false);
+      return false;
+  }
+  return Matches(*value.type());
+}
 
 const std::shared_ptr<DataType>& InputType::type() const {
   DCHECK_EQ(InputType::EXACT_TYPE, kind_);
@@ -378,21 +369,12 @@ const TypeMatcher& InputType::type_matcher() const {
 // ----------------------------------------------------------------------
 // OutputType
 
-OutputType::OutputType(ValueDescr descr) : OutputType(descr.type) {
-  shape_ = descr.shape;
-}
-
-Result<ValueDescr> OutputType::Resolve(KernelContext* ctx,
-                                       const std::vector<ValueDescr>& args) const {
-  ValueDescr::Shape broadcasted_shape = GetBroadcastShape(args);
+Result<TypeHolder> OutputType::Resolve(KernelContext* ctx,
+                                       const std::vector<TypeHolder>& types) const {
   if (kind_ == OutputType::FIXED) {
-    return ValueDescr(type_, shape_ == ValueDescr::ANY ? broadcasted_shape : shape_);
+    return type_.get();
   } else {
-    ARROW_ASSIGN_OR_RAISE(ValueDescr resolved_descr, resolver_(ctx, args));
-    if (resolved_descr.shape == ValueDescr::ANY) {
-      resolved_descr.shape = broadcasted_shape;
-    }
-    return resolved_descr;
+    return resolver_(ctx, types);
   }
 }
 
@@ -448,19 +430,19 @@ bool KernelSignature::Equals(const KernelSignature& other) const {
   return true;
 }
 
-bool KernelSignature::MatchesInputs(const std::vector<ValueDescr>& args) const {
+bool KernelSignature::MatchesInputs(const std::vector<TypeHolder>& types) const {
   if (is_varargs_) {
-    for (size_t i = 0; i < args.size(); ++i) {
-      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(args[i])) {
+    for (size_t i = 0; i < types.size(); ++i) {
+      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(*types[i])) {
         return false;
       }
     }
   } else {
-    if (args.size() != in_types_.size()) {
+    if (types.size() != in_types_.size()) {
       return false;
     }
     for (size_t i = 0; i < in_types_.size(); ++i) {
-      if (!in_types_[i].Matches(args[i])) {
+      if (!in_types_[i].Matches(*types[i])) {
         return false;
       }
     }
@@ -495,7 +477,7 @@ std::string KernelSignature::ToString() const {
     ss << in_types_[i].ToString();
   }
   if (is_varargs_) {
-    ss << "]";
+    ss << "*]";
   } else {
     ss << ")";
   }
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 93a1c605a99..5463a2de579 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -143,10 +143,9 @@ ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();
 
 }  // namespace match
 
-/// \brief An object used for type- and shape-checking arguments to be passed
-/// to a kernel and stored in a KernelSignature. Distinguishes between ARRAY
-/// and SCALAR arguments using ValueDescr::Shape. The type-checking rule can be
-/// supplied either with an exact DataType instance or a custom TypeMatcher.
+/// \brief An object used for type-checking arguments to be passed to a kernel
+/// and stored in a KernelSignature. The type-checking rule can be supplied
+/// either with an exact DataType instance or a custom TypeMatcher.
 class ARROW_EXPORT InputType {
  public:
   /// \brief The kind of type-checking rule that the InputType contains.
@@ -163,29 +162,21 @@ class ARROW_EXPORT InputType {
     USE_TYPE_MATCHER
   };
 
-  /// \brief Accept any value type but with a specific shape (e.g. any Array or
-  /// any Scalar).
-  InputType(ValueDescr::Shape shape = ValueDescr::ANY)  // NOLINT implicit construction
-      : kind_(ANY_TYPE), shape_(shape) {}
+  /// \brief Accept any value type
+  InputType() : kind_(ANY_TYPE) {}
 
   /// \brief Accept an exact value type.
-  InputType(std::shared_ptr<DataType> type,  // NOLINT implicit construction
-            ValueDescr::Shape shape = ValueDescr::ANY)
-      : kind_(EXACT_TYPE), shape_(shape), type_(std::move(type)) {}
-
-  /// \brief Accept an exact value type and shape provided by a ValueDescr.
-  InputType(const ValueDescr& descr)  // NOLINT implicit construction
-      : InputType(descr.type, descr.shape) {}
+  InputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
+      : kind_(EXACT_TYPE), type_(std::move(type)) {}
 
   /// \brief Use the passed TypeMatcher to type check.
-  InputType(std::shared_ptr<TypeMatcher> type_matcher,  // NOLINT implicit construction
-            ValueDescr::Shape shape = ValueDescr::ANY)
-      : kind_(USE_TYPE_MATCHER), shape_(shape), type_matcher_(std::move(type_matcher)) {}
+  InputType(std::shared_ptr<TypeMatcher> type_matcher)  // NOLINT implicit construction
+      : kind_(USE_TYPE_MATCHER), type_matcher_(std::move(type_matcher)) {}
 
   /// \brief Match any type with the given Type::type. Uses a TypeMatcher for
   /// its implementation.
-  explicit InputType(Type::type type_id, ValueDescr::Shape shape = ValueDescr::ANY)
-      : InputType(match::SameTypeId(type_id), shape) {}
+  InputType(Type::type type_id)  // NOLINT implicit construction
+      : InputType(match::SameTypeId(type_id)) {}
 
   InputType(const InputType& other) { CopyInto(other); }
 
@@ -195,23 +186,8 @@ class ARROW_EXPORT InputType {
 
   void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
 
-  // \brief Match an array with the given exact type. Convenience constructor.
-  static InputType Array(std::shared_ptr<DataType> type) {
-    return InputType(std::move(type), ValueDescr::ARRAY);
-  }
-
-  // \brief Match a scalar with the given exact type. Convenience constructor.
-  static InputType Scalar(std::shared_ptr<DataType> type) {
-    return InputType(std::move(type), ValueDescr::SCALAR);
-  }
-
-  // \brief Match an array with the given Type::type id. Convenience
-  // constructor.
-  static InputType Array(Type::type id) { return InputType(id, ValueDescr::ARRAY); }
-
-  // \brief Match a scalar with the given Type::type id. Convenience
-  // constructor.
-  static InputType Scalar(Type::type id) { return InputType(id, ValueDescr::SCALAR); }
+  // \brief Match any input (array, scalar of any type)
+  static InputType Any() { return InputType(); }
 
   /// \brief Return true if this input type matches the same type cases as the
   /// other.
@@ -227,21 +203,16 @@ class ARROW_EXPORT InputType {
   /// \brief Render a human-readable string representation.
   std::string ToString() const;
 
-  /// \brief Return true if the value matches this argument kind in type
-  /// and shape.
+  /// \brief Return true if the Datum matches this argument kind in
+  /// type (and only allows scalar or array-like Datums).
   bool Matches(const Datum& value) const;
 
-  /// \brief Return true if the value descriptor matches this argument kind in
-  /// type and shape.
-  bool Matches(const ValueDescr& value) const;
+  /// \brief Return true if the type matches this InputType
+  bool Matches(const DataType& type) const;
 
   /// \brief The type matching rule that this InputType uses.
   Kind kind() const { return kind_; }
 
-  /// \brief Indicates whether this InputType matches Array (ValueDescr::ARRAY),
-  /// Scalar (ValueDescr::SCALAR) values, or both (ValueDescr::ANY).
-  ValueDescr::Shape shape() const { return shape_; }
-
   /// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
   /// must match. Otherwise this function should not be used and will assert in
   /// debug builds.
@@ -255,22 +226,18 @@ class ARROW_EXPORT InputType {
  private:
   void CopyInto(const InputType& other) {
     this->kind_ = other.kind_;
-    this->shape_ = other.shape_;
     this->type_ = other.type_;
     this->type_matcher_ = other.type_matcher_;
   }
 
   void MoveInto(InputType&& other) {
     this->kind_ = other.kind_;
-    this->shape_ = other.shape_;
     this->type_ = std::move(other.type_);
     this->type_matcher_ = std::move(other.type_matcher_);
   }
 
   Kind kind_;
 
-  ValueDescr::Shape shape_ = ValueDescr::ANY;
-
   // For EXACT_TYPE Kind
   std::shared_ptr<DataType> type_;
 
@@ -279,43 +246,30 @@ class ARROW_EXPORT InputType {
 };
 
 /// \brief Container to capture both exact and input-dependent output types.
-///
-/// The value shape returned by Resolve will be determined by broadcasting the
-/// shapes of the input arguments, otherwise this is handled by the
-/// user-defined resolver function:
-///
-/// * Any ARRAY shape -> output shape is ARRAY
-/// * All SCALAR shapes -> output shape is SCALAR
 class ARROW_EXPORT OutputType {
  public:
   /// \brief An enum indicating whether the value type is an invariant fixed
   /// value or one that's computed by a kernel-defined resolver function.
   enum ResolveKind { FIXED, COMPUTED };
 
-  /// Type resolution function. Given input types and shapes, return output
-  /// type and shape.  This function MAY may use the kernel state to decide
-  /// the output type based on the functionoptions.
+  /// Type resolution function. Given input types, return output type.  This
+  /// function MAY may use the kernel state to decide the output type based on
+  /// the FunctionOptions.
   ///
   /// This function SHOULD _not_ be used to check for arity, that is to be
   /// performed one or more layers above.
-  using Resolver =
-      std::function<Result<ValueDescr>(KernelContext*, const std::vector<ValueDescr>&)>;
+  typedef Result<TypeHolder> (*Resolver)(KernelContext*, const std::vector<TypeHolder>&);
 
-  /// \brief Output an exact type, but with shape determined by promoting the
-  /// shapes of the inputs (any ARRAY argument yields ARRAY).
+  /// \brief Output an exact type
   OutputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
       : kind_(FIXED), type_(std::move(type)) {}
 
-  /// \brief Output the exact type and shape provided by a ValueDescr
-  OutputType(ValueDescr descr);  // NOLINT implicit construction
-
   /// \brief Output a computed type depending on actual input types
   OutputType(Resolver resolver)  // NOLINT implicit construction
       : kind_(COMPUTED), resolver_(std::move(resolver)) {}
 
   OutputType(const OutputType& other) {
     this->kind_ = other.kind_;
-    this->shape_ = other.shape_;
     this->type_ = other.type_;
     this->resolver_ = other.resolver_;
   }
@@ -323,19 +277,17 @@ class ARROW_EXPORT OutputType {
   OutputType(OutputType&& other) {
     this->kind_ = other.kind_;
     this->type_ = std::move(other.type_);
-    this->shape_ = other.shape_;
     this->resolver_ = other.resolver_;
   }
 
   OutputType& operator=(const OutputType&) = default;
   OutputType& operator=(OutputType&&) = default;
 
-  /// \brief Return the shape and type of the expected output value of the
-  /// kernel given the value descriptors (shapes and types) of the input
-  /// arguments. The resolver may make use of state information kept in the
-  /// KernelContext.
-  Result<ValueDescr> Resolve(KernelContext* ctx,
-                             const std::vector<ValueDescr>& args) const;
+  /// \brief Return the type of the expected output value of the kernel given
+  /// the input argument types. The resolver may make use of state information
+  /// kept in the KernelContext.
+  Result<TypeHolder> Resolve(KernelContext* ctx,
+                             const std::vector<TypeHolder>& args) const;
 
   /// \brief The exact output value type for the FIXED kind.
   const std::shared_ptr<DataType>& type() const;
@@ -352,22 +304,14 @@ class ARROW_EXPORT OutputType {
   /// fixed/invariant or computed by a resolver.
   ResolveKind kind() const { return kind_; }
 
-  /// \brief If the shape is ANY, then Resolve will compute the shape based on
-  /// the input arguments.
-  ValueDescr::Shape shape() const { return shape_; }
-
  private:
   ResolveKind kind_;
 
   // For FIXED resolution
   std::shared_ptr<DataType> type_;
 
-  /// \brief The shape of the output type to return when using Resolve. If ANY
-  /// will promote the input shapes.
-  ValueDescr::Shape shape_ = ValueDescr::ANY;
-
   // For COMPUTED resolution
-  Resolver resolver_;
+  Resolver resolver_ = NULLPTR;
 };
 
 /// \brief Holds the input types and output type of the kernel.
@@ -388,7 +332,7 @@ class ARROW_EXPORT KernelSignature {
 
   /// \brief Return true if the signature if compatible with the list of input
   /// value descriptors.
-  bool MatchesInputs(const std::vector<ValueDescr>& descriptors) const;
+  bool MatchesInputs(const std::vector<TypeHolder>& types) const;
 
   /// \brief Returns true if the input types of each signature are
   /// equal. Well-formed functions should have a deterministic output type
@@ -408,9 +352,10 @@ class ARROW_EXPORT KernelSignature {
   /// function arguments.
   const std::vector<InputType>& in_types() const { return in_types_; }
 
-  /// \brief The output type for the kernel. Use Resolve to return the exact
-  /// output given input argument ValueDescrs, since many kernels' output types
-  /// depend on their input types (or their type metadata).
+  /// \brief The output type for the kernel. Use Resolve to return the
+  /// exact output given input argument types, since many kernels'
+  /// output types depend on their input types (or their type
+  /// metadata).
   const OutputType& out_type() const { return out_type_; }
 
   /// \brief Render a human-readable string representation
@@ -493,12 +438,9 @@ struct KernelInitArgs {
   /// depend on the kernel's KernelSignature or other data contained there.
   const Kernel* kernel;
 
-  /// \brief The types and shapes of the input arguments that the kernel is
+  /// \brief The types of the input arguments that the kernel is
   /// about to be executed against.
-  ///
-  /// TODO: should this be const std::vector<ValueDescr>*? const-ref is being
-  /// used to avoid the cost of copying the struct into the args struct.
-  const std::vector<ValueDescr>& inputs;
+  const std::vector<TypeHolder>& inputs;
 
   /// \brief Opaque options specific to this kernel. May be nullptr for functions
   /// that do not require options.
@@ -523,7 +465,7 @@ struct Kernel {
                std::move(init)) {}
 
   /// \brief The "signature" of the kernel containing the InputType input
-  /// argument validators and OutputType output type and shape resolver.
+  /// argument validators and OutputType output type resolver.
   std::shared_ptr<KernelSignature> signature;
 
   /// \brief Create a new KernelState for invocations of this kernel, e.g. to
@@ -546,6 +488,9 @@ struct Kernel {
   /// contain multiple kernels with the same signature but different levels of SIMD,
   /// so that the most optimized kernel supported on a host's processor can be chosen.
   SimdLevel::type simd_level = SimdLevel::NONE;
+
+  // Additional kernel-specific data
+  std::shared_ptr<KernelState> data;
 };
 
 /// \brief The scalar kernel execution API that must be implemented for SCALAR
@@ -555,8 +500,7 @@ struct Kernel {
 /// endeavor to write into pre-allocated memory if they are able, though for
 /// some kernels (e.g. in cases when a builder like StringBuilder) must be
 /// employed this may not be possible.
-using ArrayKernelExec =
-    std::function<Status(KernelContext*, const ExecSpan&, ExecResult*)>;
+typedef Status (*ArrayKernelExec)(KernelContext*, const ExecSpan&, ExecResult*);
 
 /// \brief Kernel data structure for implementations of ScalarFunction. In
 /// addition to the members found in Kernel, contains the null handling
@@ -566,12 +510,11 @@ struct ScalarKernel : public Kernel {
 
   ScalarKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
                KernelInit init = NULLPTR)
-      : Kernel(std::move(sig), init), exec(std::move(exec)) {}
+      : Kernel(std::move(sig), init), exec(exec) {}
 
   ScalarKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
                KernelInit init = NULLPTR)
-      : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
-        exec(std::move(exec)) {}
+      : Kernel(std::move(in_types), std::move(out_type), std::move(init)), exec(exec) {}
 
   /// \brief Perform a single invocation of this kernel. Depending on the
   /// implementation, it may only write into preallocated memory, while in some
@@ -590,9 +533,6 @@ struct ScalarKernel : public Kernel {
   // bitmaps is a reasonable default
   NullHandling::type null_handling = NullHandling::INTERSECTION;
   MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE;
-
-  // Additional kernel-specific data
-  std::shared_ptr<KernelState> data;
 };
 
 // ----------------------------------------------------------------------
@@ -615,13 +555,13 @@ struct VectorKernel : public Kernel {
   VectorKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
                KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR)
       : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
-        exec(std::move(exec)),
+        exec(exec),
         finalize(std::move(finalize)) {}
 
   VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
                KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR)
       : Kernel(std::move(sig), std::move(init)),
-        exec(std::move(exec)),
+        exec(exec),
         finalize(std::move(finalize)) {}
 
   /// \brief Perform a single invocation of this kernel. Any required state is
diff --git a/cpp/src/arrow/compute/kernel_test.cc b/cpp/src/arrow/compute/kernel_test.cc
index 2d427374426..2676e93c3d7 100644
--- a/cpp/src/arrow/compute/kernel_test.cc
+++ b/cpp/src/arrow/compute/kernel_test.cc
@@ -21,6 +21,7 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/array/util.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
@@ -75,44 +76,24 @@ TEST(InputType, AnyTypeConstructor) {
   // Check the ANY_TYPE ctors
   InputType ty;
   ASSERT_EQ(InputType::ANY_TYPE, ty.kind());
-  ASSERT_EQ(ValueDescr::ANY, ty.shape());
-
-  ty = InputType(ValueDescr::SCALAR);
-  ASSERT_EQ(ValueDescr::SCALAR, ty.shape());
-
-  ty = InputType(ValueDescr::ARRAY);
-  ASSERT_EQ(ValueDescr::ARRAY, ty.shape());
 }
 
 TEST(InputType, Constructors) {
   // Exact type constructor
   InputType ty1(int8());
   ASSERT_EQ(InputType::EXACT_TYPE, ty1.kind());
-  ASSERT_EQ(ValueDescr::ANY, ty1.shape());
   AssertTypeEqual(*int8(), *ty1.type());
 
   InputType ty1_implicit = int8();
   ASSERT_TRUE(ty1.Equals(ty1_implicit));
 
-  InputType ty1_array(int8(), ValueDescr::ARRAY);
-  ASSERT_EQ(ValueDescr::ARRAY, ty1_array.shape());
-
-  InputType ty1_scalar(int8(), ValueDescr::SCALAR);
-  ASSERT_EQ(ValueDescr::SCALAR, ty1_scalar.shape());
-
   // Same type id constructor
   InputType ty2(Type::DECIMAL);
   ASSERT_EQ(InputType::USE_TYPE_MATCHER, ty2.kind());
-  ASSERT_EQ("any[Type::DECIMAL128]", ty2.ToString());
+  ASSERT_EQ("Type::DECIMAL128", ty2.ToString());
   ASSERT_TRUE(ty2.type_matcher().Matches(*decimal(12, 2)));
   ASSERT_FALSE(ty2.type_matcher().Matches(*int16()));
 
-  InputType ty2_array(Type::DECIMAL, ValueDescr::ARRAY);
-  ASSERT_EQ(ValueDescr::ARRAY, ty2_array.shape());
-
-  InputType ty2_scalar(Type::DECIMAL, ValueDescr::SCALAR);
-  ASSERT_EQ(ValueDescr::SCALAR, ty2_scalar.shape());
-
   // Implicit construction in a vector
   std::vector<InputType> types = {int8(), InputType(Type::DECIMAL)};
   ASSERT_TRUE(types[0].Equals(ty1));
@@ -131,69 +112,33 @@ TEST(InputType, Constructors) {
   ASSERT_TRUE(ty6.Equals(ty2));
 
   // ToString
-  ASSERT_EQ("any[int8]", ty1.ToString());
-  ASSERT_EQ("array[int8]", ty1_array.ToString());
-  ASSERT_EQ("scalar[int8]", ty1_scalar.ToString());
-
-  ASSERT_EQ("any[Type::DECIMAL128]", ty2.ToString());
-  ASSERT_EQ("array[Type::DECIMAL128]", ty2_array.ToString());
-  ASSERT_EQ("scalar[Type::DECIMAL128]", ty2_scalar.ToString());
+  ASSERT_EQ("int8", ty1.ToString());
+  ASSERT_EQ("Type::DECIMAL128", ty2.ToString());
 
   InputType ty7(match::TimestampTypeUnit(TimeUnit::MICRO));
-  ASSERT_EQ("any[timestamp(us)]", ty7.ToString());
+  ASSERT_EQ("timestamp(us)", ty7.ToString());
 
   InputType ty8;
-  InputType ty9(ValueDescr::ANY);
-  InputType ty10(ValueDescr::ARRAY);
-  InputType ty11(ValueDescr::SCALAR);
-  ASSERT_EQ("any[any]", ty8.ToString());
-  ASSERT_EQ("any[any]", ty9.ToString());
-  ASSERT_EQ("array[any]", ty10.ToString());
-  ASSERT_EQ("scalar[any]", ty11.ToString());
+  ASSERT_EQ("any", ty8.ToString());
 }
 
 TEST(InputType, Equals) {
   InputType t1 = int8();
   InputType t2 = int8();
-  InputType t3(int8(), ValueDescr::ARRAY);
-  InputType t3_i32(int32(), ValueDescr::ARRAY);
-  InputType t3_scalar(int8(), ValueDescr::SCALAR);
-  InputType t4(int8(), ValueDescr::ARRAY);
-  InputType t4_i32(int32(), ValueDescr::ARRAY);
+  InputType t3 = int32();
 
   InputType t5(Type::DECIMAL);
   InputType t6(Type::DECIMAL);
-  InputType t7(Type::DECIMAL, ValueDescr::SCALAR);
-  InputType t7_i32(Type::INT32, ValueDescr::SCALAR);
-  InputType t8(Type::DECIMAL, ValueDescr::SCALAR);
-  InputType t8_i32(Type::INT32, ValueDescr::SCALAR);
 
   ASSERT_TRUE(t1.Equals(t2));
   ASSERT_EQ(t1, t2);
-
-  // ANY vs SCALAR
   ASSERT_NE(t1, t3);
 
-  ASSERT_EQ(t3, t4);
-
-  // both ARRAY, but different type
-  ASSERT_NE(t3, t3_i32);
-
-  // ARRAY vs SCALAR
-  ASSERT_NE(t3, t3_scalar);
-
-  ASSERT_EQ(t3_i32, t4_i32);
-
   ASSERT_FALSE(t1.Equals(t5));
   ASSERT_NE(t1, t5);
 
   ASSERT_EQ(t5, t5);
   ASSERT_EQ(t5, t6);
-  ASSERT_NE(t5, t7);
-  ASSERT_EQ(t7, t8);
-  ASSERT_EQ(t7, t8);
-  ASSERT_NE(t7, t7_i32);
-  ASSERT_EQ(t7_i32, t8_i32);
 
   // NOTE: For the time being, we treat int32() and Type::INT32 as being
   // different. This could obviously be fixed later to make these equivalent
@@ -208,9 +153,6 @@ TEST(InputType, Equals) {
 
 TEST(InputType, Hash) {
   InputType t0;
-  InputType t0_scalar(ValueDescr::SCALAR);
-  InputType t0_array(ValueDescr::ARRAY);
-
   InputType t1 = int8();
   InputType t2(Type::DECIMAL);
 
@@ -218,36 +160,32 @@ TEST(InputType, Hash) {
   // same value, and whether the elements of the type are all incorporated into
   // the Hash
   ASSERT_EQ(t0.Hash(), t0.Hash());
-  ASSERT_NE(t0.Hash(), t0_scalar.Hash());
-  ASSERT_NE(t0.Hash(), t0_array.Hash());
-  ASSERT_NE(t0_scalar.Hash(), t0_array.Hash());
-
   ASSERT_EQ(t1.Hash(), t1.Hash());
   ASSERT_EQ(t2.Hash(), t2.Hash());
-
   ASSERT_NE(t0.Hash(), t1.Hash());
   ASSERT_NE(t0.Hash(), t2.Hash());
   ASSERT_NE(t1.Hash(), t2.Hash());
 }
 
 TEST(InputType, Matches) {
-  InputType ty1 = int8();
-
-  ASSERT_TRUE(ty1.Matches(ValueDescr::Scalar(int8())));
-  ASSERT_TRUE(ty1.Matches(ValueDescr::Array(int8())));
-  ASSERT_TRUE(ty1.Matches(ValueDescr::Any(int8())));
-  ASSERT_FALSE(ty1.Matches(ValueDescr::Any(int16())));
-
-  InputType ty2(Type::DECIMAL);
-  ASSERT_TRUE(ty2.Matches(ValueDescr::Scalar(decimal(12, 2))));
-  ASSERT_TRUE(ty2.Matches(ValueDescr::Array(decimal(12, 2))));
-  ASSERT_FALSE(ty2.Matches(ValueDescr::Any(float64())));
-
-  InputType ty3(int64(), ValueDescr::SCALAR);
-  ASSERT_FALSE(ty3.Matches(ValueDescr::Array(int64())));
-  ASSERT_TRUE(ty3.Matches(ValueDescr::Scalar(int64())));
-  ASSERT_FALSE(ty3.Matches(ValueDescr::Scalar(int32())));
-  ASSERT_FALSE(ty3.Matches(ValueDescr::Any(int64())));
+  InputType input1 = int8();
+
+  ASSERT_TRUE(input1.Matches(*int8()));
+  ASSERT_TRUE(input1.Matches(*int8()));
+  ASSERT_FALSE(input1.Matches(*int16()));
+
+  InputType input2(Type::DECIMAL);
+  ASSERT_TRUE(input2.Matches(*decimal(12, 2)));
+
+  auto ty2 = decimal(12, 2);
+  auto ty3 = float64();
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> arr2, MakeArrayOfNull(ty2, 1));
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> arr3, MakeArrayOfNull(ty3, 1));
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Scalar> scalar2, arr2->GetScalar(0));
+  ASSERT_TRUE(input2.Matches(Datum(arr2)));
+  ASSERT_TRUE(input2.Matches(Datum(scalar2)));
+  ASSERT_FALSE(input2.Matches(*ty3));
+  ASSERT_FALSE(input2.Matches(arr3));
 }
 
 // ----------------------------------------------------------------------
@@ -259,14 +197,14 @@ TEST(OutputType, Constructors) {
   AssertTypeEqual(*int8(), *ty1.type());
 
   auto DummyResolver = [](KernelContext*,
-                          const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
-    return ValueDescr(int32(), GetBroadcastShape(args));
+                          const std::vector<TypeHolder>& args) -> Result<TypeHolder> {
+    return int32();
   };
   OutputType ty2(DummyResolver);
   ASSERT_EQ(OutputType::COMPUTED, ty2.kind());
 
-  ASSERT_OK_AND_ASSIGN(ValueDescr out_descr2, ty2.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), out_descr2);
+  ASSERT_OK_AND_ASSIGN(TypeHolder out_type2, ty2.Resolve(nullptr, {}));
+  ASSERT_EQ(out_type2, int32());
 
   // Copy constructor
   OutputType ty3 = ty1;
@@ -275,8 +213,8 @@ TEST(OutputType, Constructors) {
 
   OutputType ty4 = ty2;
   ASSERT_EQ(OutputType::COMPUTED, ty4.kind());
-  ASSERT_OK_AND_ASSIGN(ValueDescr out_descr4, ty4.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), out_descr4);
+  ASSERT_OK_AND_ASSIGN(TypeHolder out_type4, ty4.Resolve(nullptr, {}));
+  ASSERT_EQ(out_type4, int32());
 
   // Move constructor
   OutputType ty5 = std::move(ty1);
@@ -285,8 +223,8 @@ TEST(OutputType, Constructors) {
 
   OutputType ty6 = std::move(ty4);
   ASSERT_EQ(OutputType::COMPUTED, ty6.kind());
-  ASSERT_OK_AND_ASSIGN(ValueDescr out_descr6, ty6.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), out_descr6);
+  ASSERT_OK_AND_ASSIGN(TypeHolder out_type6, ty6.Resolve(nullptr, {}));
+  ASSERT_EQ(out_type6, int32());
 
   // ToString
 
@@ -296,89 +234,63 @@ TEST(OutputType, Constructors) {
 }
 
 TEST(OutputType, Resolve) {
-  // Check shape promotion rules for FIXED kind
   OutputType ty1(int32());
 
-  ASSERT_OK_AND_ASSIGN(ValueDescr descr, ty1.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), descr);
+  ASSERT_OK_AND_ASSIGN(TypeHolder result, ty1.Resolve(nullptr, {}));
+  ASSERT_EQ(result, int32());
 
-  ASSERT_OK_AND_ASSIGN(descr,
-                       ty1.Resolve(nullptr, {ValueDescr(int8(), ValueDescr::SCALAR)}));
-  ASSERT_EQ(ValueDescr::Scalar(int32()), descr);
+  ASSERT_OK_AND_ASSIGN(result, ty1.Resolve(nullptr, {int8()}));
+  ASSERT_EQ(result, int32());
 
-  ASSERT_OK_AND_ASSIGN(descr,
-                       ty1.Resolve(nullptr, {ValueDescr(int8(), ValueDescr::SCALAR),
-                                             ValueDescr(int8(), ValueDescr::ARRAY)}));
-  ASSERT_EQ(ValueDescr::Array(int32()), descr);
+  ASSERT_OK_AND_ASSIGN(result, ty1.Resolve(nullptr, {int8(), int8()}));
+  ASSERT_EQ(result, int32());
 
-  OutputType ty2([](KernelContext*, const std::vector<ValueDescr>& args) {
-    return ValueDescr(args[0].type, GetBroadcastShape(args));
-  });
+  auto resolver = [](KernelContext*,
+                     const std::vector<TypeHolder>& args) -> Result<TypeHolder> {
+    return args[0];
+  };
+  OutputType ty2(resolver);
 
-  ASSERT_OK_AND_ASSIGN(descr, ty2.Resolve(nullptr, {ValueDescr::Array(utf8())}));
-  ASSERT_EQ(ValueDescr::Array(utf8()), descr);
+  ASSERT_OK_AND_ASSIGN(result, ty2.Resolve(nullptr, {utf8()}));
+  ASSERT_EQ(result, utf8());
 
   // Type resolver that returns an error
   OutputType ty3(
-      [](KernelContext* ctx, const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
+      [](KernelContext* ctx, const std::vector<TypeHolder>& types) -> Result<TypeHolder> {
         // NB: checking the value types versus the function arity should be
         // validated elsewhere, so this is just for illustration purposes
-        if (args.size() == 0) {
+        if (types.size() == 0) {
           return Status::Invalid("Need at least one argument");
         }
-        return ValueDescr(args[0]);
+        return types[0];
       });
   ASSERT_RAISES(Invalid, ty3.Resolve(nullptr, {}));
 
-  // Type resolver that returns ValueDescr::ANY and needs type promotion
+  // Type resolver that returns a fixed value
   OutputType ty4(
-      [](KernelContext* ctx, const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
+      [](KernelContext* ctx, const std::vector<TypeHolder>& types) -> Result<TypeHolder> {
         return int32();
       });
 
-  ASSERT_OK_AND_ASSIGN(descr, ty4.Resolve(nullptr, {ValueDescr::Array(int8())}));
-  ASSERT_EQ(ValueDescr::Array(int32()), descr);
-  ASSERT_OK_AND_ASSIGN(descr, ty4.Resolve(nullptr, {ValueDescr::Scalar(int8())}));
-  ASSERT_EQ(ValueDescr::Scalar(int32()), descr);
-}
-
-TEST(OutputType, ResolveDescr) {
-  ValueDescr d1 = ValueDescr::Scalar(int32());
-  ValueDescr d2 = ValueDescr::Array(int32());
-
-  OutputType ty1(d1);
-  OutputType ty2(d2);
-
-  ASSERT_EQ(ValueDescr::SCALAR, ty1.shape());
-  ASSERT_EQ(ValueDescr::ARRAY, ty2.shape());
-
-  {
-    ASSERT_OK_AND_ASSIGN(ValueDescr descr, ty1.Resolve(nullptr, {}));
-    ASSERT_EQ(d1, descr);
-  }
-
-  {
-    ASSERT_OK_AND_ASSIGN(ValueDescr descr, ty2.Resolve(nullptr, {}));
-    ASSERT_EQ(d2, descr);
-  }
+  ASSERT_OK_AND_ASSIGN(result, ty4.Resolve(nullptr, {int8()}));
+  ASSERT_EQ(result, int32());
+  ASSERT_OK_AND_ASSIGN(result, ty4.Resolve(nullptr, {int8()}));
+  ASSERT_EQ(result, int32());
 }
 
 // ----------------------------------------------------------------------
 // KernelSignature
 
 TEST(KernelSignature, Basics) {
-  // (any[int8], scalar[decimal]) -> utf8
-  std::vector<InputType> in_types({int8(), InputType(Type::DECIMAL, ValueDescr::SCALAR)});
+  // (int8, decimal) -> utf8
+  std::vector<InputType> in_types({int8(), InputType(Type::DECIMAL)});
   OutputType out_type(utf8());
 
   KernelSignature sig(in_types, out_type);
   ASSERT_EQ(2, sig.in_types().size());
   ASSERT_TRUE(sig.in_types()[0].type()->Equals(*int8()));
-  ASSERT_TRUE(sig.in_types()[0].Matches(ValueDescr::Scalar(int8())));
-  ASSERT_TRUE(sig.in_types()[0].Matches(ValueDescr::Array(int8())));
-
-  ASSERT_TRUE(sig.in_types()[1].Matches(ValueDescr::Scalar(decimal(12, 2))));
-  ASSERT_FALSE(sig.in_types()[1].Matches(ValueDescr::Array(decimal(12, 2))));
+  ASSERT_TRUE(sig.in_types()[0].Matches(*int8()));
+  ASSERT_TRUE(sig.in_types()[1].Matches(*decimal(12, 2)));
 }
 
 TEST(KernelSignature, Equals) {
@@ -393,10 +305,6 @@ TEST(KernelSignature, Equals) {
   KernelSignature sig4_copy({int8(), int16()}, utf8());
   KernelSignature sig5({int8(), int16(), int32()}, utf8());
 
-  // Differ in shape
-  KernelSignature sig6({ValueDescr::Scalar(int8())}, utf8());
-  KernelSignature sig7({ValueDescr::Array(int8())}, utf8());
-
   ASSERT_EQ(sig1, sig1);
 
   ASSERT_EQ(sig2, sig3);
@@ -408,8 +316,6 @@ TEST(KernelSignature, Equals) {
 
   // Match first 2 args, but not third
   ASSERT_NE(sig4, sig5);
-
-  ASSERT_NE(sig6, sig7);
 }
 
 TEST(KernelSignature, VarArgsEquals) {
@@ -441,40 +347,32 @@ TEST(KernelSignature, MatchesInputs) {
   ASSERT_TRUE(sig1.MatchesInputs({}));
   ASSERT_FALSE(sig1.MatchesInputs({int8()}));
 
-  // (any[int8], any[decimal]) -> boolean
+  // (int8, decimal) -> boolean
   KernelSignature sig2({int8(), InputType(Type::DECIMAL)}, boolean());
 
   ASSERT_FALSE(sig2.MatchesInputs({}));
   ASSERT_FALSE(sig2.MatchesInputs({int8()}));
   ASSERT_TRUE(sig2.MatchesInputs({int8(), decimal(12, 2)}));
-  ASSERT_TRUE(sig2.MatchesInputs(
-      {ValueDescr::Scalar(int8()), ValueDescr::Scalar(decimal(12, 2))}));
-  ASSERT_TRUE(
-      sig2.MatchesInputs({ValueDescr::Array(int8()), ValueDescr::Array(decimal(12, 2))}));
 
-  // (scalar[int8], array[int32]) -> boolean
-  KernelSignature sig3({ValueDescr::Scalar(int8()), ValueDescr::Array(int32())},
-                       boolean());
+  // (int8, int32) -> boolean
+  KernelSignature sig3({int8(), int32()}, boolean());
 
   ASSERT_FALSE(sig3.MatchesInputs({}));
 
   // Unqualified, these are ANY type and do not match because the kernel
   // requires a scalar and an array
-  ASSERT_FALSE(sig3.MatchesInputs({int8(), int32()}));
-  ASSERT_TRUE(
-      sig3.MatchesInputs({ValueDescr::Scalar(int8()), ValueDescr::Array(int32())}));
-  ASSERT_FALSE(
-      sig3.MatchesInputs({ValueDescr::Array(int8()), ValueDescr::Array(int32())}));
+  ASSERT_TRUE(sig3.MatchesInputs({int8(), int32()}));
+  ASSERT_FALSE(sig3.MatchesInputs({int8(), int16()}));
 }
 
 TEST(KernelSignature, VarArgsMatchesInputs) {
   {
     KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
 
-    std::vector<ValueDescr> args = {int8()};
+    std::vector<TypeHolder> args = {int8()};
     ASSERT_TRUE(sig.MatchesInputs(args));
-    args.push_back(ValueDescr::Scalar(int8()));
-    args.push_back(ValueDescr::Array(int8()));
+    args.push_back(int8());
+    args.push_back(int8());
     ASSERT_TRUE(sig.MatchesInputs(args));
     args.push_back(int32());
     ASSERT_FALSE(sig.MatchesInputs(args));
@@ -482,10 +380,10 @@ TEST(KernelSignature, VarArgsMatchesInputs) {
   {
     KernelSignature sig({int8(), utf8()}, utf8(), /*is_varargs=*/true);
 
-    std::vector<ValueDescr> args = {int8()};
+    std::vector<TypeHolder> args = {int8()};
     ASSERT_TRUE(sig.MatchesInputs(args));
-    args.push_back(ValueDescr::Scalar(utf8()));
-    args.push_back(ValueDescr::Array(utf8()));
+    args.push_back(utf8());
+    args.push_back(utf8());
     ASSERT_TRUE(sig.MatchesInputs(args));
     args.push_back(int32());
     ASSERT_FALSE(sig.MatchesInputs(args));
@@ -493,23 +391,25 @@ TEST(KernelSignature, VarArgsMatchesInputs) {
 }
 
 TEST(KernelSignature, ToString) {
-  std::vector<InputType> in_types = {InputType(int8(), ValueDescr::SCALAR),
-                                     InputType(Type::DECIMAL, ValueDescr::ARRAY),
+  std::vector<InputType> in_types = {InputType(int8()), InputType(Type::DECIMAL),
                                      InputType(utf8())};
   KernelSignature sig(in_types, utf8());
-  ASSERT_EQ("(scalar[int8], array[Type::DECIMAL128], any[string]) -> string",
-            sig.ToString());
-
-  OutputType out_type([](KernelContext*, const std::vector<ValueDescr>& args) {
-    return Status::Invalid("NYI");
-  });
-  KernelSignature sig2({int8(), InputType(Type::DECIMAL)}, out_type);
-  ASSERT_EQ("(any[int8], any[Type::DECIMAL128]) -> computed", sig2.ToString());
+  ASSERT_EQ("(int8, Type::DECIMAL128, string) -> string", sig.ToString());
+
+  OutputType out_type(
+      [](KernelContext*, const std::vector<TypeHolder>& args) -> Result<TypeHolder> {
+        return Status::Invalid("NYI");
+      });
+  KernelSignature sig2({int8(), Type::DECIMAL}, out_type);
+  ASSERT_EQ("(int8, Type::DECIMAL128) -> computed", sig2.ToString());
 }
 
 TEST(KernelSignature, VarArgsToString) {
   KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
-  ASSERT_EQ("varargs[any[int8]] -> string", sig.ToString());
+  ASSERT_EQ("varargs[int8*] -> string", sig.ToString());
+
+  KernelSignature sig2({utf8(), int8()}, utf8(), /*is_varargs=*/true);
+  ASSERT_EQ("varargs[string, int8*] -> string", sig2.ToString());
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 661b6a4edb1..57cee87f00d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -195,7 +195,7 @@ Result<std::unique_ptr<KernelState>> CountDistinctInit(KernelContext* ctx,
 
 template <typename Type, typename VisitorArgType = typename Type::c_type>
 void AddCountDistinctKernel(InputType type, ScalarAggregateFunction* func) {
-  AddAggKernel(KernelSignature::Make({type}, ValueDescr::Scalar(int64())),
+  AddAggKernel(KernelSignature::Make({type}, int64()),
                CountDistinctInit<Type, VisitorArgType>, func);
 }
 
@@ -252,7 +252,7 @@ struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
 Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
                                              const KernelInitArgs& args) {
   SumLikeInit<SumImplDefault> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -260,7 +260,7 @@ Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
 Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
                                               const KernelInitArgs& args) {
   MeanKernelInit<MeanImplDefault> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -277,7 +277,7 @@ struct ProductImpl : public ScalarAggregator {
   using ProductType = typename TypeTraits<AccType>::CType;
   using OutputType = typename TypeTraits<AccType>::ScalarType;
 
-  explicit ProductImpl(const std::shared_ptr<DataType>& out_type,
+  explicit ProductImpl(std::shared_ptr<DataType> out_type,
                        const ScalarAggregateOptions& options)
       : out_type(out_type),
         options(options),
@@ -356,10 +356,10 @@ struct NullProductImpl : public NullImpl<Int64Type> {
 struct ProductInit {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
-  const std::shared_ptr<DataType>& type;
+  std::shared_ptr<DataType> type;
   const ScalarAggregateOptions& options;
 
-  ProductInit(KernelContext* ctx, const std::shared_ptr<DataType>& type,
+  ProductInit(KernelContext* ctx, std::shared_ptr<DataType> type,
               const ScalarAggregateOptions& options)
       : ctx(ctx), type(type), options(options) {}
 
@@ -402,7 +402,7 @@ struct ProductInit {
 
   static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
-    ProductInit visitor(ctx, args.inputs[0].type,
+    ProductInit visitor(ctx, args.inputs[0].GetSharedPtr(),
                         static_cast<const ScalarAggregateOptions&>(*args.options));
     return visitor.Create();
   }
@@ -413,10 +413,10 @@ struct ProductInit {
 
 Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto out_type,
+  ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::NONE> visitor(
-      ctx, *args.inputs[0].type, std::move(out_type.type),
+      ctx, *args.inputs[0], out_type.GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -425,14 +425,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
 template <MinOrMax min_or_max>
 void AddMinOrMaxAggKernel(ScalarAggregateFunction* func,
                           ScalarAggregateFunction* min_max_func) {
-  auto sig = KernelSignature::Make(
-      {InputType(ValueDescr::ANY)},
-      OutputType([](KernelContext*,
-                    const std::vector<ValueDescr>& descrs) -> Result<ValueDescr> {
-        // any[T] -> scalar[T]
-        return ValueDescr::Scalar(descrs.front().type);
-      }));
-
+  auto sig = KernelSignature::Make({InputType::Any()}, FirstType);
   auto init = [min_max_func](
                   KernelContext* ctx,
                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
@@ -775,8 +768,7 @@ void AddBasicAggKernels(KernelInit init,
                         SimdLevel::type simd_level) {
   for (const auto& ty : types) {
     // array[InT] -> scalar[OutT]
-    auto sig =
-        KernelSignature::Make({InputType::Array(ty->id())}, ValueDescr::Scalar(out_ty));
+    auto sig = KernelSignature::Make({ty->id()}, out_ty);
     AddAggKernel(std::move(sig), init, func, simd_level);
   }
 }
@@ -786,9 +778,7 @@ void AddScalarAggKernels(KernelInit init,
                          std::shared_ptr<DataType> out_ty,
                          ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
-    // scalar[InT] -> scalar[OutT]
-    auto sig =
-        KernelSignature::Make({InputType::Scalar(ty->id())}, ValueDescr::Scalar(out_ty));
+    auto sig = KernelSignature::Make({ty->id()}, out_ty);
     AddAggKernel(std::move(sig), init, func, SimdLevel::NONE);
   }
 }
@@ -804,17 +794,17 @@ void AddArrayScalarAggKernels(KernelInit init,
 
 namespace {
 
-Result<ValueDescr> MinMaxType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  // any[T] -> scalar[struct<min: T, max: T>]
-  auto ty = descrs.front().type;
-  return ValueDescr::Scalar(struct_({field("min", ty), field("max", ty)}));
+Result<TypeHolder> MinMaxType(KernelContext*, const std::vector<TypeHolder>& types) {
+  // T -> struct<min: T, max: T>
+  auto ty = types.front().GetSharedPtr();
+  return struct_({field("min", ty), field("max", ty)});
 }
 
 }  // namespace
 
 void AddMinMaxKernel(KernelInit init, internal::detail::GetTypeId get_id,
                      ScalarAggregateFunction* func, SimdLevel::type simd_level) {
-  auto sig = KernelSignature::Make({InputType(get_id.id)}, OutputType(MinMaxType));
+  auto sig = KernelSignature::Make({InputType(get_id.id)}, MinMaxType);
   AddAggKernel(std::move(sig), init, func, simd_level);
 }
 
@@ -828,13 +818,6 @@ void AddMinMaxKernels(KernelInit init,
 
 namespace {
 
-Result<ValueDescr> ScalarFirstType(KernelContext*,
-                                   const std::vector<ValueDescr>& descrs) {
-  ValueDescr result = descrs.front();
-  result.shape = ValueDescr::SCALAR;
-  return result;
-}
-
 const FunctionDoc count_doc{"Count the number of null / non-null values",
                             ("By default, only non-null values are counted.\n"
                              "This can be changed through CountOptions."),
@@ -922,8 +905,7 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 
   // Takes any input, outputs int64 scalar
   InputType any_input;
-  AddAggKernel(KernelSignature::Make({any_input}, ValueDescr::Scalar(int64())), CountInit,
-               func.get());
+  AddAggKernel(KernelSignature::Make({any_input}, int64()), CountInit, func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   func = std::make_shared<ScalarAggregateFunction>(
@@ -935,12 +917,10 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), sum_doc,
                                                    &default_scalar_aggregate_options);
   AddArrayScalarAggKernels(SumInit, {boolean()}, uint64(), func.get());
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL128)}, OutputType(ScalarFirstType)),
-      SumInit, func.get(), SimdLevel::NONE);
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL256)}, OutputType(ScalarFirstType)),
-      SumInit, func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), SumInit, func.get(),
+               SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), SumInit, func.get(),
+               SimdLevel::NONE);
   AddArrayScalarAggKernels(SumInit, SignedIntTypes(), int64(), func.get());
   AddArrayScalarAggKernels(SumInit, UnsignedIntTypes(), uint64(), func.get());
   AddArrayScalarAggKernels(SumInit, FloatingPointTypes(), float64(), func.get());
@@ -965,12 +945,10 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
                                                    &default_scalar_aggregate_options);
   AddArrayScalarAggKernels(MeanInit, {boolean()}, float64(), func.get());
   AddArrayScalarAggKernels(MeanInit, NumericTypes(), float64(), func.get());
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL128)}, OutputType(ScalarFirstType)),
-      MeanInit, func.get(), SimdLevel::NONE);
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL256)}, OutputType(ScalarFirstType)),
-      MeanInit, func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), MeanInit, func.get(),
+               SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), MeanInit, func.get(),
+               SimdLevel::NONE);
   AddArrayScalarAggKernels(MeanInit, {null()}, float64(), func.get());
   // Add the SIMD variants for mean
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
@@ -1028,12 +1006,10 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   AddArrayScalarAggKernels(ProductInit::Init, UnsignedIntTypes(), uint64(), func.get());
   AddArrayScalarAggKernels(ProductInit::Init, FloatingPointTypes(), float64(),
                            func.get());
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL128)}, OutputType(ScalarFirstType)),
-      ProductInit::Init, func.get(), SimdLevel::NONE);
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL256)}, OutputType(ScalarFirstType)),
-      ProductInit::Init, func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), ProductInit::Init,
+               func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), ProductInit::Init,
+               func.get(), SimdLevel::NONE);
   AddArrayScalarAggKernels(ProductInit::Init, {null()}, int64(), func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index 00e3e2e5fd4..03b45107eec 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -37,7 +37,7 @@ struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
 Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -45,7 +45,7 @@ Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
 Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
                                                   const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx2> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -55,10 +55,10 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
 
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto out_type,
+  ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::AVX2> visitor(
-      ctx, *args.inputs[0].type, std::move(out_type.type),
+      ctx, *args.inputs[0], out_type.GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 8c10eb19b07..0d66ed2ec3e 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -37,7 +37,7 @@ struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
 Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx512> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -45,7 +45,7 @@ Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
 Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx512> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -55,10 +55,10 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
 
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
                                                       const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto out_type,
+  ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::AVX512> visitor(
-      ctx, *args.inputs[0].type, std::move(out_type.type),
+      ctx, *args.inputs[0], out_type.GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index a5b473793a9..6645e1a76bc 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -65,8 +65,7 @@ struct SumImpl : public ScalarAggregator {
   using SumCType = typename TypeTraits<SumType>::CType;
   using OutputType = typename TypeTraits<SumType>::ScalarType;
 
-  SumImpl(const std::shared_ptr<DataType>& out_type,
-          const ScalarAggregateOptions& options_)
+  SumImpl(std::shared_ptr<DataType> out_type, const ScalarAggregateOptions& options_)
       : out_type(out_type), options(options_) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
@@ -216,10 +215,10 @@ template <template <typename> class KernelClass>
 struct SumLikeInit {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
-  const std::shared_ptr<DataType> type;
+  std::shared_ptr<DataType> type;
   const ScalarAggregateOptions& options;
 
-  SumLikeInit(KernelContext* ctx, const std::shared_ptr<DataType>& type,
+  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
               const ScalarAggregateOptions& options)
       : ctx(ctx), type(type), options(options) {}
 
@@ -261,7 +260,7 @@ struct SumLikeInit {
 
 template <template <typename> class KernelClass>
 struct MeanKernelInit : public SumLikeInit<KernelClass> {
-  MeanKernelInit(KernelContext* ctx, const std::shared_ptr<DataType>& type,
+  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
                  const ScalarAggregateOptions& options)
       : SumLikeInit<KernelClass>(ctx, type, options) {}
 
@@ -639,7 +638,7 @@ struct MinMaxInitState {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
   const DataType& in_type;
-  const std::shared_ptr<DataType>& out_type;
+  std::shared_ptr<DataType> out_type;
   const ScalarAggregateOptions& options;
 
   MinMaxInitState(KernelContext* ctx, const DataType& in_type,
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index 6676b86436a..c67ca31422d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -64,7 +64,8 @@ Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
     count_buffer = count_data->template GetMutableValues<int64_t>(1);
   }
 
-  out->value = ArrayData::Make(type.Copy(), n, {nullptr}, {mode_data, count_data}, 0);
+  out->value =
+      ArrayData::Make(type.GetSharedPtr(), n, {nullptr}, {mode_data, count_data}, 0);
   return std::make_pair(mode_buffer, count_buffer);
 }
 
@@ -465,9 +466,9 @@ struct ModeExecutorChunked {
   }
 };
 
-Result<ValueDescr> ModeType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return ValueDescr::Array(
-      struct_({field(kModeFieldName, descrs[0].type), field(kCountFieldName, int64())}));
+Result<TypeHolder> ModeType(KernelContext*, const std::vector<TypeHolder>& types) {
+  return struct_(
+      {field(kModeFieldName, types[0].GetSharedPtr()), field(kCountFieldName, int64())});
 }
 
 VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type, ArrayKernelExec exec,
@@ -485,8 +486,7 @@ VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type, ArrayKernel
     default: {
       auto out_type =
           struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
-      kernel.signature = KernelSignature::Make({InputType(in_type->id())},
-                                               ValueDescr::Array(std::move(out_type)));
+      kernel.signature = KernelSignature::Make({in_type->id()}, std::move(out_type));
       break;
     }
   }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 7b989bfe5f5..921de15c316 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -174,7 +174,7 @@ struct SortQuantiler {
     // copy all chunks to a buffer, ignore nulls and nans
     std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool()));
     FillBuffer(options, values, values.length, values.GetNullCount(), &in_buffer);
-    return ComputeQuantile(ctx, options, values.type->Copy(), in_buffer, out);
+    return ComputeQuantile(ctx, options, values.type->GetSharedPtr(), in_buffer, out);
   }
 
   Status Exec(KernelContext* ctx, const ChunkedArray& values, Datum* out) {
@@ -546,13 +546,13 @@ struct QuantileExecutorChunked {
   }
 };
 
-Result<ValueDescr> ResolveOutput(KernelContext* ctx,
-                                 const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveOutput(KernelContext* ctx,
+                                 const std::vector<TypeHolder>& types) {
   const QuantileOptions& options = QuantileState::Get(ctx);
   if (IsDataPoint(options)) {
-    return ValueDescr::Array(args[0].type);
+    return types[0];
   } else {
-    return ValueDescr::Array(float64());
+    return float64();
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index 037bba42f16..cfb7d3c3b35 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -210,21 +210,20 @@ std::shared_ptr<ScalarAggregateFunction> AddApproximateMedianAggKernels(
       "approximate_median", Arity::Unary(), approximate_median_doc,
       &default_scalar_aggregate_options);
 
-  auto sig =
-      KernelSignature::Make({InputType(ValueDescr::ANY)}, ValueDescr::Scalar(float64()));
+  auto sig = KernelSignature::Make({InputType::Any()}, float64());
 
   auto init = [tdigest_func](
                   KernelContext* ctx,
                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
-    std::vector<ValueDescr> inputs = args.inputs;
-    ARROW_ASSIGN_OR_RAISE(auto kernel, tdigest_func->DispatchBest(&inputs));
+    std::vector<TypeHolder> types = args.inputs;
+    ARROW_ASSIGN_OR_RAISE(auto kernel, tdigest_func->DispatchBest(&types));
     const auto& scalar_options =
         checked_cast<const ScalarAggregateOptions&>(*args.options);
     TDigestOptions options;
     // Default q = 0.5
     options.min_count = scalar_options.min_count;
     options.skip_nulls = scalar_options.skip_nulls;
-    KernelInitArgs new_args{kernel, inputs, &options};
+    KernelInitArgs new_args{kernel, types, &options};
     return kernel->init(ctx, new_args);
   };
 
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index 9e32f9e7f6d..66724727fd5 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -52,81 +52,77 @@ const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
   return example_parametric_types;
 }
 
-Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  ValueDescr result = descrs.front();
-  result.shape = GetBroadcastShape(descrs);
-  return result;
+Result<TypeHolder> FirstType(KernelContext*, const std::vector<TypeHolder>& types) {
+  return types.front();
 }
 
-Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  ValueDescr result = descrs.back();
-  result.shape = GetBroadcastShape(descrs);
-  return result;
+Result<TypeHolder> LastType(KernelContext*, const std::vector<TypeHolder>& types) {
+  return types.back();
 }
 
-Result<ValueDescr> ListValuesType(KernelContext*, const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ListValuesType(KernelContext*, const std::vector<TypeHolder>& args) {
   const auto& list_type = checked_cast<const BaseListType&>(*args[0].type);
-  return ValueDescr(list_type.value_type(), GetBroadcastShape(args));
+  return list_type.value_type().get();
 }
 
-void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs) {
-  EnsureDictionaryDecoded(descrs->data(), descrs->size());
+void EnsureDictionaryDecoded(std::vector<TypeHolder>* types) {
+  EnsureDictionaryDecoded(types->data(), types->size());
 }
 
-void EnsureDictionaryDecoded(ValueDescr* begin, size_t count) {
+void EnsureDictionaryDecoded(TypeHolder* begin, size_t count) {
   auto* end = begin + count;
   for (auto it = begin; it != end; it++) {
-    if (it->type->id() == Type::DICTIONARY) {
-      it->type = checked_cast<const DictionaryType&>(*it->type).value_type();
+    if (it->id() == Type::DICTIONARY) {
+      *it = checked_cast<const DictionaryType&>(*it->type).value_type();
     }
   }
 }
 
-void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs) {
-  ReplaceNullWithOtherType(descrs->data(), descrs->size());
+void ReplaceNullWithOtherType(std::vector<TypeHolder>* types) {
+  ReplaceNullWithOtherType(types->data(), types->size());
 }
 
-void ReplaceNullWithOtherType(ValueDescr* first, size_t count) {
+void ReplaceNullWithOtherType(TypeHolder* first, size_t count) {
   DCHECK_EQ(count, 2);
 
-  ValueDescr* second = first++;
+  TypeHolder* second = first++;
   if (first->type->id() == Type::NA) {
-    first->type = second->type;
+    *first = *second;
     return;
   }
 
   if (second->type->id() == Type::NA) {
-    second->type = first->type;
+    *second = *first;
     return;
   }
 }
 
-void ReplaceTemporalTypes(const TimeUnit::type unit, std::vector<ValueDescr>* descrs) {
-  auto* end = descrs->data() + descrs->size();
+void ReplaceTemporalTypes(const TimeUnit::type unit, std::vector<TypeHolder>* types) {
+  auto* end = types->data() + types->size();
 
-  for (auto* it = descrs->data(); it != end; it++) {
+  for (auto* it = types->data(); it != end; it++) {
     switch (it->type->id()) {
       case Type::TIMESTAMP: {
         const auto& ty = checked_cast<const TimestampType&>(*it->type);
-        it->type = timestamp(unit, ty.timezone());
+        *it = timestamp(unit, ty.timezone());
         continue;
       }
       case Type::TIME32:
       case Type::TIME64: {
         if (unit > TimeUnit::MILLI) {
-          it->type = time64(unit);
+          *it = time64(unit);
         } else {
-          it->type = time32(unit);
+          *it = time32(unit);
         }
         continue;
       }
       case Type::DURATION: {
-        it->type = duration(unit);
+        *it = duration(unit);
         continue;
       }
       case Type::DATE32:
       case Type::DATE64: {
-        it->type = timestamp(unit);
+        *it = timestamp(unit);
         continue;
       }
       default:
@@ -135,29 +131,27 @@ void ReplaceTemporalTypes(const TimeUnit::type unit, std::vector<ValueDescr>* de
   }
 }
 
-void ReplaceTypes(const std::shared_ptr<DataType>& type,
-                  std::vector<ValueDescr>* descrs) {
-  ReplaceTypes(type, descrs->data(), descrs->size());
+void ReplaceTypes(const TypeHolder& replacement, std::vector<TypeHolder>* types) {
+  ReplaceTypes(replacement, types->data(), types->size());
 }
 
-void ReplaceTypes(const std::shared_ptr<DataType>& type, ValueDescr* begin,
-                  size_t count) {
+void ReplaceTypes(const TypeHolder& replacement, TypeHolder* begin, size_t count) {
   auto* end = begin + count;
   for (auto* it = begin; it != end; it++) {
-    it->type = type;
+    *it = replacement;
   }
 }
 
-std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
-  return CommonNumeric(descrs.data(), descrs.size());
+TypeHolder CommonNumeric(const std::vector<TypeHolder>& types) {
+  return CommonNumeric(types.data(), types.size());
 }
 
-std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
+TypeHolder CommonNumeric(const TypeHolder* begin, size_t count) {
   DCHECK_GT(count, 0) << "tried to find CommonNumeric type of an empty set";
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    auto id = descr.type->id();
+    const auto& holder = *(begin + i);
+    auto id = holder.id();
     if (!is_floating(id) && !is_integer(id)) {
       // a common numeric type is only possible if all types are numeric
       return nullptr;
@@ -169,20 +163,20 @@ std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
   }
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    if (descr.type->id() == Type::DOUBLE) return float64();
+    const auto& holder = *(begin + i);
+    if (holder.id() == Type::DOUBLE) return float64();
   }
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    if (descr.type->id() == Type::FLOAT) return float32();
+    const auto& holder = *(begin + i);
+    if (holder.id() == Type::FLOAT) return float32();
   }
 
   int max_width_signed = 0, max_width_unsigned = 0;
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    auto id = descr.type->id();
+    const auto& holder = *(begin + i);
+    auto id = holder.id();
     auto max_width = &(is_signed_integer(id) ? max_width_signed : max_width_unsigned);
     *max_width = std::max(bit_width(id), *max_width);
   }
@@ -206,11 +200,11 @@ std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
   return int8();
 }
 
-bool CommonTemporalResolution(const ValueDescr* begin, size_t count,
+bool CommonTemporalResolution(const TypeHolder* begin, size_t count,
                               TimeUnit::type* finest_unit) {
   bool is_time_unit = false;
   *finest_unit = TimeUnit::SECOND;
-  const ValueDescr* end = begin + count;
+  const TypeHolder* end = begin + count;
   for (auto it = begin; it != end; it++) {
     auto id = it->type->id();
     switch (id) {
@@ -255,13 +249,13 @@ bool CommonTemporalResolution(const ValueDescr* begin, size_t count,
   return is_time_unit;
 }
 
-std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count) {
+TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
   TimeUnit::type finest_unit = TimeUnit::SECOND;
   const std::string* timezone = nullptr;
   bool saw_date32 = false;
   bool saw_date64 = false;
 
-  const ValueDescr* end = begin + count;
+  const TypeHolder* end = begin + count;
   for (auto it = begin; it != end; it++) {
     auto id = it->type->id();
     // a common timestamp is only possible if all types are timestamp like
@@ -276,13 +270,13 @@ std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count)
         continue;
       case Type::TIMESTAMP: {
         const auto& ty = checked_cast<const TimestampType&>(*it->type);
-        if (timezone && *timezone != ty.timezone()) return nullptr;
+        if (timezone && *timezone != ty.timezone()) return TypeHolder(nullptr);
         timezone = &ty.timezone();
         finest_unit = std::max(finest_unit, ty.unit());
         continue;
       }
       default:
-        return nullptr;
+        return TypeHolder(nullptr);
     }
   }
 
@@ -294,13 +288,13 @@ std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count)
   } else if (saw_date32) {
     return date32();
   }
-  return nullptr;
+  return TypeHolder(nullptr);
 }
 
-std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count) {
+TypeHolder CommonBinary(const TypeHolder* begin, size_t count) {
   bool all_utf8 = true, all_offset32 = true, all_fixed_width = true;
 
-  const ValueDescr* end = begin + count;
+  const TypeHolder* end = begin + count;
   for (auto it = begin; it != end; ++it) {
     auto id = it->type->id();
     // a common varbinary type is only possible if all types are binary like
@@ -325,13 +319,13 @@ std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count) {
         all_utf8 = false;
         continue;
       default:
-        return nullptr;
+        return TypeHolder(nullptr);
     }
   }
 
   if (all_fixed_width) {
     // At least for the purposes of comparison, no need to cast.
-    return nullptr;
+    return TypeHolder(nullptr);
   }
 
   if (all_utf8) {
@@ -343,18 +337,17 @@ std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count) {
   return large_binary();
 }
 
-Status CastBinaryDecimalArgs(DecimalPromotion promotion,
-                             std::vector<ValueDescr>* descrs) {
-  auto& left_type = (*descrs)[0].type;
-  auto& right_type = (*descrs)[1].type;
-  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector<TypeHolder>* types) {
+  const DataType& left_type = *(*types)[0];
+  const DataType& right_type = *(*types)[1];
+  DCHECK(is_decimal(left_type.id()) || is_decimal(right_type.id()));
 
   // decimal + float = float
-  if (is_floating(left_type->id())) {
-    right_type = left_type;
+  if (is_floating(left_type.id())) {
+    (*types)[1] = (*types)[0];
     return Status::OK();
-  } else if (is_floating(right_type->id())) {
-    left_type = right_type;
+  } else if (is_floating(right_type.id())) {
+    (*types)[0] = (*types)[1];
     return Status::OK();
   }
 
@@ -362,22 +355,22 @@ Status CastBinaryDecimalArgs(DecimalPromotion promotion,
   int32_t p1, s1, p2, s2;
 
   // decimal + integer = decimal
-  if (is_decimal(left_type->id())) {
-    auto decimal = checked_cast<const DecimalType*>(left_type.get());
-    p1 = decimal->precision();
-    s1 = decimal->scale();
+  if (is_decimal(left_type.id())) {
+    const auto& decimal = checked_cast<const DecimalType&>(left_type);
+    p1 = decimal.precision();
+    s1 = decimal.scale();
   } else {
-    DCHECK(is_integer(left_type->id()));
-    ARROW_ASSIGN_OR_RAISE(p1, MaxDecimalDigitsForInteger(left_type->id()));
+    DCHECK(is_integer(left_type.id()));
+    ARROW_ASSIGN_OR_RAISE(p1, MaxDecimalDigitsForInteger(left_type.id()));
     s1 = 0;
   }
-  if (is_decimal(right_type->id())) {
-    auto decimal = checked_cast<const DecimalType*>(right_type.get());
-    p2 = decimal->precision();
-    s2 = decimal->scale();
+  if (is_decimal(right_type.id())) {
+    const auto& decimal = checked_cast<const DecimalType&>(right_type);
+    p2 = decimal.precision();
+    s2 = decimal.scale();
   } else {
-    DCHECK(is_integer(right_type->id()));
-    ARROW_ASSIGN_OR_RAISE(p2, MaxDecimalDigitsForInteger(right_type->id()));
+    DCHECK(is_integer(right_type.id()));
+    ARROW_ASSIGN_OR_RAISE(p2, MaxDecimalDigitsForInteger(right_type.id()));
     s2 = 0;
   }
   if (s1 < 0 || s2 < 0) {
@@ -386,7 +379,7 @@ Status CastBinaryDecimalArgs(DecimalPromotion promotion,
 
   // decimal128 + decimal256 = decimal256
   Type::type casted_type_id = Type::DECIMAL128;
-  if (left_type->id() == Type::DECIMAL256 || right_type->id() == Type::DECIMAL256) {
+  if (left_type.id() == Type::DECIMAL256 || right_type.id() == Type::DECIMAL256) {
     casted_type_id = Type::DECIMAL256;
   }
 
@@ -414,15 +407,19 @@ Status CastBinaryDecimalArgs(DecimalPromotion promotion,
       DCHECK(false) << "Invalid DecimalPromotion value " << static_cast<int>(promotion);
   }
   ARROW_ASSIGN_OR_RAISE(
-      left_type, DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup));
-  ARROW_ASSIGN_OR_RAISE(right_type, DecimalType::Make(casted_type_id, p2 + right_scaleup,
-                                                      s2 + right_scaleup));
+      auto casted_left,
+      DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup));
+  ARROW_ASSIGN_OR_RAISE(
+      auto casted_right,
+      DecimalType::Make(casted_type_id, p2 + right_scaleup, s2 + right_scaleup));
+  (*types)[0] = casted_left;
+  (*types)[1] = casted_right;
   return Status::OK();
 }
 
-Status CastDecimalArgs(ValueDescr* begin, size_t count) {
+Status CastDecimalArgs(TypeHolder* begin, size_t count) {
   Type::type casted_type_id = Type::DECIMAL128;
-  auto* end = begin + count;
+  TypeHolder* end = begin + count;
 
   int32_t max_scale = 0;
   bool any_floating = false;
@@ -473,17 +470,17 @@ Status CastDecimalArgs(ValueDescr* begin, size_t count) {
     casted_type_id = Type::DECIMAL256;
   }
 
+  ARROW_ASSIGN_OR_RAISE(auto casted_ty,
+                        DecimalType::Make(casted_type_id, common_precision, max_scale));
   for (auto* it = begin; it != end; ++it) {
-    ARROW_ASSIGN_OR_RAISE(it->type,
-                          DecimalType::Make(casted_type_id, common_precision, max_scale));
+    *it = casted_ty;
   }
-
   return Status::OK();
 }
 
-bool HasDecimal(const std::vector<ValueDescr>& descrs) {
-  for (const auto& descr : descrs) {
-    if (is_decimal(descr.type->id())) {
+bool HasDecimal(const std::vector<TypeHolder>& types) {
+  for (const auto& th : types) {
+    if (is_decimal(th.id())) {
       return true;
     }
   }
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index bc21c4efb6a..1d5f5dd9bd5 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -452,9 +452,9 @@ static void VisitTwoArrayValuesInline(const ArraySpan& arr0, const ArraySpan& ar
 // ----------------------------------------------------------------------
 // Reusable type resolvers
 
-Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& descrs);
-Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs);
-Result<ValueDescr> ListValuesType(KernelContext*, const std::vector<ValueDescr>& args);
+Result<TypeHolder> FirstType(KernelContext*, const std::vector<TypeHolder>& types);
+Result<TypeHolder> LastType(KernelContext*, const std::vector<TypeHolder>& types);
+Result<TypeHolder> ListValuesType(KernelContext*, const std::vector<TypeHolder>& types);
 
 // ----------------------------------------------------------------------
 // Helpers for iterating over common DataType instances for adding kernels to
@@ -479,29 +479,11 @@ Result<ValueDescr> ListValuesType(KernelContext*, const std::vector<ValueDescr>&
 const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes();
 
 // ----------------------------------------------------------------------
-// "Applicators" take an operator definition (which may be scalar-valued or
-// array-valued) and creates an ArrayKernelExec which can be used to add an
-// ArrayKernel to a Function.
+// "Applicators" take an operator definition and creates an ArrayKernelExec
+// which can be used to add a kernel to a Function.
 
 namespace applicator {
 
-// Generate an ArrayKernelExec given a functor that handles all of its own
-// iteration, etc.
-//
-// Operator must implement
-//
-// static Status Call(KernelContext*, const ArraySpan& in, ExecResult* out)
-// static Status Call(KernelContext*, const Scalar& in, ExecResult* out)
-template <typename Operator>
-static Status SimpleUnary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  if (batch[0].is_scalar()) {
-    return Operator::Call(ctx, *batch[0].scalar, out);
-  } else if (batch.length > 0) {
-    return Operator::Call(ctx, batch[0].array, out);
-  }
-  return Status::OK();
-}
-
 // Generate an ArrayKernelExec given a functor that handles all of its own
 // iteration, etc.
 //
@@ -513,8 +495,6 @@ static Status SimpleUnary(KernelContext* ctx, const ExecSpan& batch, ExecResult*
 //                    ExecResult* out)
 // static Status Call(KernelContext*, const Scalar& arg0, const ArraySpan& arg1,
 //                    ExecResult* out)
-// static Status Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
-//                    ExecResult* out)
 template <typename Operator>
 static Status SimpleBinary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   if (batch.length == 0) return Status::OK();
@@ -529,7 +509,8 @@ static Status SimpleBinary(KernelContext* ctx, const ExecSpan& batch, ExecResult
     if (batch[1].is_array()) {
       return Operator::Call(ctx, *batch[0].scalar, batch[1].array, out);
     } else {
-      return Operator::Call(ctx, *batch[0].scalar, *batch[1].scalar, out);
+      DCHECK(false);
+      return Status::Invalid("Should be unreachable");
     }
   }
 }
@@ -597,7 +578,9 @@ struct ScalarUnary {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static Status ExecArray(KernelContext* ctx, const ArraySpan& arg0, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    DCHECK(batch[0].is_array());
+    const ArraySpan& arg0 = batch[0].array;
     Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     RETURN_NOT_OK(
@@ -606,28 +589,6 @@ struct ScalarUnary {
         }));
     return st;
   }
-
-  static Status ExecScalar(KernelContext* ctx, const Scalar& arg0, ExecResult* out) {
-    Status st = Status::OK();
-    Scalar* out_scalar = out->scalar().get();
-    if (arg0.is_valid) {
-      Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      out_scalar->is_valid = true;
-      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
-                              out_scalar);
-    } else {
-      out_scalar->is_valid = false;
-    }
-    return st;
-  }
-
-  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ExecArray(ctx, batch[0].array, out);
-    } else {
-      return ExecScalar(ctx, *batch[0].scalar, out);
-    }
-  }
 };
 
 // An alternative to ScalarUnary that Applies a scalar operation with state on
@@ -720,23 +681,9 @@ struct ScalarUnaryNotNullStateful {
     }
   };
 
-  Status Scalar(KernelContext* ctx, const Scalar& arg0, ExecResult* out) {
-    Status st = Status::OK();
-    if (arg0.is_valid) {
-      Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      BoxScalar<OutType>::Box(
-          this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
-          out->scalar().get());
-    }
-    return st;
-  }
-
   Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ArrayExec<OutType>::Exec(*this, ctx, batch[0].array, out);
-    } else {
-      return Scalar(ctx, *batch[0].scalar, out);
-    }
+    DCHECK(batch[0].is_array());
+    return ArrayExec<OutType>::Exec(*this, ctx, batch[0].array, out);
   }
 };
 
@@ -819,19 +766,6 @@ struct ScalarBinary {
     return st;
   }
 
-  static Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                             ExecResult* out) {
-    Status st = Status::OK();
-    if (out->scalar()->is_valid) {
-      auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box(
-          Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
-          out->scalar().get());
-    }
-    return st;
-  }
-
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_array()) {
       if (batch[1].is_array()) {
@@ -843,7 +777,8 @@ struct ScalarBinary {
       if (batch[1].is_array()) {
         return ScalarArray(ctx, *batch[0].scalar, batch[1].array, out);
       } else {
-        return ScalarScalar(ctx, *batch[0].scalar, *batch[1].scalar, out);
+        DCHECK(false);
+        return Status::Invalid("Should be unreachable");
       }
     }
   }
@@ -916,19 +851,6 @@ struct ScalarBinaryNotNullStateful {
     return st;
   }
 
-  Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                      ExecResult* out) {
-    Status st = Status::OK();
-    if (arg0.is_valid && arg1.is_valid) {
-      const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box(
-          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
-          out->scalar().get());
-    }
-    return st;
-  }
-
   Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_array()) {
       if (batch[1].is_array()) {
@@ -940,7 +862,8 @@ struct ScalarBinaryNotNullStateful {
       if (batch[1].is_array()) {
         return ScalarArray(ctx, *batch[0].scalar, batch[1].array, out);
       } else {
-        return ScalarScalar(ctx, *batch[0].scalar, *batch[1].scalar, out);
+        DCHECK(false);
+        return Status::Invalid("Should be unreachable");
       }
     }
   }
@@ -1413,41 +1336,41 @@ ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id) {
 // ----------------------------------------------------------------------
 
 ARROW_EXPORT
-void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs);
+void EnsureDictionaryDecoded(std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-void EnsureDictionaryDecoded(ValueDescr* begin, size_t count);
+void EnsureDictionaryDecoded(TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs);
+void ReplaceNullWithOtherType(std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-void ReplaceNullWithOtherType(ValueDescr* begin, size_t count);
+void ReplaceNullWithOtherType(TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-void ReplaceTypes(const std::shared_ptr<DataType>&, std::vector<ValueDescr>* descrs);
+void ReplaceTypes(const TypeHolder& replacement, std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-void ReplaceTypes(const std::shared_ptr<DataType>&, ValueDescr* descrs, size_t count);
+void ReplaceTypes(const TypeHolder& replacement, TypeHolder* types, size_t count);
 
 ARROW_EXPORT
-void ReplaceTemporalTypes(TimeUnit::type unit, std::vector<ValueDescr>* descrs);
+void ReplaceTemporalTypes(TimeUnit::type unit, std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs);
+TypeHolder CommonNumeric(const std::vector<TypeHolder>& types);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count);
+TypeHolder CommonNumeric(const TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count);
+TypeHolder CommonTemporal(const TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-bool CommonTemporalResolution(const ValueDescr* begin, size_t count,
+bool CommonTemporalResolution(const TypeHolder* begin, size_t count,
                               TimeUnit::type* finest_unit);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count);
+TypeHolder CommonBinary(const TypeHolder* begin, size_t count);
 
 /// How to promote decimal precision/scale in CastBinaryDecimalArgs.
 enum class DecimalPromotion : uint8_t {
@@ -1460,15 +1383,15 @@ enum class DecimalPromotion : uint8_t {
 /// to not necessarily identical types, but types which are compatible
 /// for the given operator (add/multiply/divide).
 ARROW_EXPORT
-Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector<ValueDescr>* descrs);
+Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector<TypeHolder>* types);
 
 /// Given one or more arguments, at least one of which is decimal,
 /// promote all to an identical type.
 ARROW_EXPORT
-Status CastDecimalArgs(ValueDescr* begin, size_t count);
+Status CastDecimalArgs(TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-bool HasDecimal(const std::vector<ValueDescr>& descrs);
+bool HasDecimal(const std::vector<TypeHolder>& types);
 
 }  // namespace internal
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal_test.cc b/cpp/src/arrow/compute/kernels/codegen_internal_test.cc
index 6fb84cf55b3..062100dc148 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal_test.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal_test.cc
@@ -28,7 +28,7 @@ namespace compute {
 namespace internal {
 
 TEST(TestDispatchBest, CastBinaryDecimalArgs) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
   std::vector<DecimalPromotion> modes = {
       DecimalPromotion::kAdd, DecimalPromotion::kMultiply, DecimalPromotion::kDivide};
 
@@ -36,15 +36,15 @@ TEST(TestDispatchBest, CastBinaryDecimalArgs) {
   for (auto mode : modes) {
     args = {decimal128(3, 2), float64()};
     ASSERT_OK(CastBinaryDecimalArgs(mode, &args));
-    AssertTypeEqual(args[0].type, float64());
-    AssertTypeEqual(args[1].type, float64());
+    AssertTypeEqual(*args[0], *float64());
+    AssertTypeEqual(*args[1], *float64());
   }
 
   // Integer -> decimal with common scale
   args = {decimal128(1, 0), int64()};
   ASSERT_OK(CastBinaryDecimalArgs(DecimalPromotion::kAdd, &args));
-  AssertTypeEqual(args[0].type, decimal128(1, 0));
-  AssertTypeEqual(args[1].type, decimal128(19, 0));
+  AssertTypeEqual(*args[0], *decimal128(1, 0));
+  AssertTypeEqual(*args[1], *decimal128(19, 0));
 
   // Add: rescale so all have common scale
   args = {decimal128(3, 2), decimal128(3, -2)};
@@ -54,64 +54,64 @@ TEST(TestDispatchBest, CastBinaryDecimalArgs) {
 }
 
 TEST(TestDispatchBest, CastDecimalArgs) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
 
   // Any float -> all float
   args = {decimal128(3, 2), float64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, float64());
-  AssertTypeEqual(args[1].type, float64());
+  AssertTypeEqual(*args[0], *float64());
+  AssertTypeEqual(*args[1], *float64());
 
   args = {float32(), float64(), decimal128(3, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, float64());
-  AssertTypeEqual(args[1].type, float64());
-  AssertTypeEqual(args[2].type, float64());
+  AssertTypeEqual(*args[0], *float64());
+  AssertTypeEqual(*args[1], *float64());
+  AssertTypeEqual(*args[2], *float64());
 
   // Promote to common decimal width
   args = {decimal128(3, 2), decimal256(3, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal256(3, 2));
-  AssertTypeEqual(args[1].type, decimal256(3, 2));
+  AssertTypeEqual(*args[0], *decimal256(3, 2));
+  AssertTypeEqual(*args[1], *decimal256(3, 2));
 
   // Rescale so all have common scale/precision
   args = {decimal128(3, 2), decimal128(3, 0)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(5, 2));
-  AssertTypeEqual(args[1].type, decimal128(5, 2));
+  AssertTypeEqual(*args[0], *decimal128(5, 2));
+  AssertTypeEqual(*args[1], *decimal128(5, 2));
 
   args = {decimal128(3, 2), decimal128(3, -2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(7, 2));
-  AssertTypeEqual(args[1].type, decimal128(7, 2));
+  AssertTypeEqual(*args[0], *decimal128(7, 2));
+  AssertTypeEqual(*args[1], *decimal128(7, 2));
 
   args = {decimal128(3, 0), decimal128(3, 1), decimal128(3, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(5, 2));
-  AssertTypeEqual(args[1].type, decimal128(5, 2));
-  AssertTypeEqual(args[2].type, decimal128(5, 2));
+  AssertTypeEqual(*args[0], *decimal128(5, 2));
+  AssertTypeEqual(*args[1], *decimal128(5, 2));
+  AssertTypeEqual(*args[2], *decimal128(5, 2));
 
   // Integer -> decimal with appropriate precision
   args = {decimal128(3, 0), int64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(19, 0));
-  AssertTypeEqual(args[1].type, decimal128(19, 0));
+  AssertTypeEqual(*args[0], *decimal128(19, 0));
+  AssertTypeEqual(*args[1], *decimal128(19, 0));
 
   args = {decimal128(3, 1), int64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(20, 1));
-  AssertTypeEqual(args[1].type, decimal128(20, 1));
+  AssertTypeEqual(*args[0], *decimal128(20, 1));
+  AssertTypeEqual(*args[1], *decimal128(20, 1));
 
   args = {decimal128(3, -1), int64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(19, 0));
-  AssertTypeEqual(args[1].type, decimal128(19, 0));
+  AssertTypeEqual(*args[0], *decimal128(19, 0));
+  AssertTypeEqual(*args[1], *decimal128(19, 0));
 
   // Overflow decimal128 max precision -> promote to decimal256
   args = {decimal128(38, 0), decimal128(37, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal256(40, 2));
-  AssertTypeEqual(args[1].type, decimal256(40, 2));
+  AssertTypeEqual(*args[0], *decimal256(40, 2));
+  AssertTypeEqual(*args[1], *decimal256(40, 2));
 
   // Overflow decimal256 max precision
   args = {decimal256(76, 0), decimal256(75, 1)};
@@ -124,44 +124,45 @@ TEST(TestDispatchBest, CastDecimalArgs) {
   // Incompatible, no cast
   args = {decimal256(3, 2), float64(), utf8()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal256(3, 2));
-  AssertTypeEqual(args[1].type, float64());
-  AssertTypeEqual(args[2].type, utf8());
+  AssertTypeEqual(*args[0], *decimal256(3, 2));
+  AssertTypeEqual(*args[1], *float64());
+  AssertTypeEqual(*args[2], *utf8());
 }
 
 TEST(TestDispatchBest, CommonTemporal) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
 
   args = {timestamp(TimeUnit::SECOND), timestamp(TimeUnit::NANO)};
-  AssertTypeEqual(timestamp(TimeUnit::NANO), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::NANO), *CommonTemporal(args.data(), args.size()));
   args = {timestamp(TimeUnit::SECOND, "UTC"), timestamp(TimeUnit::NANO, "UTC")};
-  AssertTypeEqual(timestamp(TimeUnit::NANO, "UTC"),
-                  CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::NANO, "UTC"),
+                  *CommonTemporal(args.data(), args.size()));
   args = {timestamp(TimeUnit::SECOND), date32()};
-  AssertTypeEqual(timestamp(TimeUnit::SECOND), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::SECOND),
+                  *CommonTemporal(args.data(), args.size()));
   args = {date32(), timestamp(TimeUnit::NANO)};
-  AssertTypeEqual(timestamp(TimeUnit::NANO), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::NANO), *CommonTemporal(args.data(), args.size()));
   args = {date64(), timestamp(TimeUnit::SECOND)};
-  AssertTypeEqual(timestamp(TimeUnit::MILLI), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::MILLI), *CommonTemporal(args.data(), args.size()));
   args = {date32(), date32()};
-  AssertTypeEqual(date32(), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*date32(), *CommonTemporal(args.data(), args.size()));
   args = {date64(), date64()};
-  AssertTypeEqual(date64(), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*date64(), *CommonTemporal(args.data(), args.size()));
   args = {date32(), date64()};
-  AssertTypeEqual(date64(), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*date64(), *CommonTemporal(args.data(), args.size()));
   args = {};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
   args = {float64(), int32()};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
   args = {timestamp(TimeUnit::SECOND), timestamp(TimeUnit::SECOND, "UTC")};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
   args = {timestamp(TimeUnit::SECOND, "America/Phoenix"),
           timestamp(TimeUnit::SECOND, "UTC")};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
 }
 
 TEST(TestDispatchBest, CommonTemporalResolution) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
   std::string tz = "Pacific/Marquesas";
   TimeUnit::type ty;
 
@@ -240,75 +241,75 @@ TEST(TestDispatchBest, CommonTemporalResolution) {
 }
 
 TEST(TestDispatchBest, ReplaceTemporalTypes) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
   std::string tz = "Pacific/Marquesas";
   TimeUnit::type ty;
 
   args = {date32(), date32()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::SECOND));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::SECOND));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::SECOND));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::SECOND));
 
   args = {date64(), time32(TimeUnit::SECOND)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::MILLI));
-  AssertTypeEqual(args[1].type, time32(TimeUnit::MILLI));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::MILLI));
+  AssertTypeEqual(*args[1], *time32(TimeUnit::MILLI));
 
   args = {duration(TimeUnit::SECOND), date64()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, duration(TimeUnit::MILLI));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::MILLI));
+  AssertTypeEqual(*args[0], *duration(TimeUnit::MILLI));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::MILLI));
 
   args = {timestamp(TimeUnit::MICRO, tz), timestamp(TimeUnit::NANO)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::NANO, tz));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::NANO));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::NANO, tz));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::NANO));
 
   args = {timestamp(TimeUnit::MICRO, tz), time64(TimeUnit::NANO)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::NANO, tz));
-  AssertTypeEqual(args[1].type, time64(TimeUnit::NANO));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::NANO, tz));
+  AssertTypeEqual(*args[1], *time64(TimeUnit::NANO));
 
   args = {timestamp(TimeUnit::SECOND, tz), date64()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::MILLI, tz));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::MILLI));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::MILLI, tz));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::MILLI));
 
   args = {timestamp(TimeUnit::SECOND, "UTC"), timestamp(TimeUnit::SECOND, tz)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::SECOND, "UTC"));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::SECOND, tz));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::SECOND, "UTC"));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::SECOND, tz));
 
   args = {time32(TimeUnit::SECOND), duration(TimeUnit::SECOND)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, time32(TimeUnit::SECOND));
-  AssertTypeEqual(args[1].type, duration(TimeUnit::SECOND));
+  AssertTypeEqual(*args[0], *time32(TimeUnit::SECOND));
+  AssertTypeEqual(*args[1], *duration(TimeUnit::SECOND));
 
   args = {time64(TimeUnit::MICRO), duration(TimeUnit::SECOND)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, time64(TimeUnit::MICRO));
-  AssertTypeEqual(args[1].type, duration(TimeUnit::MICRO));
+  AssertTypeEqual(*args[0], *time64(TimeUnit::MICRO));
+  AssertTypeEqual(*args[1], *duration(TimeUnit::MICRO));
 
   args = {time32(TimeUnit::SECOND), duration(TimeUnit::NANO)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, time64(TimeUnit::NANO));
-  AssertTypeEqual(args[1].type, duration(TimeUnit::NANO));
+  AssertTypeEqual(*args[0], *time64(TimeUnit::NANO));
+  AssertTypeEqual(*args[1], *duration(TimeUnit::NANO));
 
   args = {duration(TimeUnit::SECOND), int64()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, duration(TimeUnit::SECOND));
-  AssertTypeEqual(args[1].type, int64());
+  AssertTypeEqual(*args[0], *duration(TimeUnit::SECOND));
+  AssertTypeEqual(*args[1], *int64());
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index de632935955..49c88324a91 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -68,8 +68,7 @@ namespace {
 /// Implementations should be default constructible and perform initialization in
 /// Init().
 struct GroupedAggregator : KernelState {
-  virtual Status Init(ExecContext*, const std::vector<ValueDescr>& inputs,
-                      const FunctionOptions*) = 0;
+  virtual Status Init(ExecContext*, const KernelInitArgs& args) = 0;
 
   virtual Status Resize(int64_t new_num_groups) = 0;
 
@@ -86,7 +85,7 @@ template <typename Impl>
 Result<std::unique_ptr<KernelState>> HashAggregateInit(KernelContext* ctx,
                                                        const KernelInitArgs& args) {
   auto impl = ::arrow::internal::make_unique<Impl>();
-  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.inputs, args.options));
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args));
   return std::move(impl);
 }
 
@@ -105,15 +104,17 @@ Status HashAggregateFinalize(KernelContext* ctx, Datum* out) {
   return checked_cast<GroupedAggregator*>(ctx->state())->Finalize().Value(out);
 }
 
+Result<TypeHolder> ResolveGroupOutputType(KernelContext* ctx,
+                                          const std::vector<TypeHolder>&) {
+  return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
+}
+
 HashAggregateKernel MakeKernel(InputType argument_type, KernelInit init) {
   HashAggregateKernel kernel;
   kernel.init = std::move(init);
-  kernel.signature = KernelSignature::Make(
-      {std::move(argument_type), InputType::Array(Type::UINT32)},
-      OutputType(
-          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> {
-            return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
-          }));
+  kernel.signature =
+      KernelSignature::Make({std::move(argument_type), InputType(Type::UINT32)},
+                            OutputType(ResolveGroupOutputType));
   kernel.resize = HashAggregateResize;
   kernel.consume = HashAggregateConsume;
   kernel.merge = HashAggregateMerge;
@@ -224,9 +225,8 @@ void VisitGroupedValuesNonNull(const ExecBatch& batch, ConsumeValue&& valid_func
 // Count implementation
 
 struct GroupedCountImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    options_ = checked_cast<const CountOptions&>(*options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = checked_cast<const CountOptions&>(*args.options);
     counts_ = BufferBuilder(ctx->memory_pool());
     return Status::OK();
   }
@@ -320,14 +320,13 @@ struct GroupedReducingAggregator : public GroupedAggregator {
   using CType = typename TypeTraits<AccType>::CType;
   using InputCType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     pool_ = ctx->memory_pool();
-    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
+    options_ = checked_cast<const ScalarAggregateOptions&>(*args.options);
     reduced_ = TypedBufferBuilder<CType>(pool_);
     counts_ = TypedBufferBuilder<int64_t>(pool_);
     no_nulls_ = TypedBufferBuilder<bool>(pool_);
-    out_type_ = GetOutType(inputs[0].type);
+    out_type_ = GetOutType(args.inputs[0].GetSharedPtr());
     return Status::OK();
   }
 
@@ -447,10 +446,9 @@ struct GroupedReducingAggregator : public GroupedAggregator {
 };
 
 struct GroupedNullImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     pool_ = ctx->memory_pool();
-    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
+    options_ = checked_cast<const ScalarAggregateOptions&>(*args.options);
     return Status::OK();
   }
 
@@ -519,7 +517,7 @@ struct GroupedReducingFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedReducingFactory<Impl, kFriendlyName, NullImpl> factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -705,14 +703,14 @@ template <typename Type>
 struct GroupedVarStdImpl : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
-              const FunctionOptions* options) override {
-    options_ = *checked_cast<const VarianceOptions*>(options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = *checked_cast<const VarianceOptions*>(args.options);
     if (is_decimal_type<Type>::value) {
-      const int32_t scale = checked_cast<const DecimalType&>(*inputs[0].type).scale();
-      return InitInternal(ctx, scale, options);
+      const int32_t scale =
+          checked_cast<const DecimalType&>(*args.inputs[0].type).scale();
+      return InitInternal(ctx, scale, args.options);
     }
-    return InitInternal(ctx, 0, options);
+    return InitInternal(ctx, 0, args.options);
   }
 
   Status InitInternal(ExecContext* ctx, int32_t decimal_scale,
@@ -976,7 +974,7 @@ Result<std::unique_ptr<KernelState>> VarStdInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
   auto impl = ::arrow::internal::make_unique<GroupedVarStdImpl<T>>();
   impl->result_type_ = result_type;
-  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.inputs, args.options));
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args));
   return std::move(impl);
 }
 
@@ -1000,7 +998,7 @@ struct GroupedVarStdFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedVarStdFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1018,11 +1016,10 @@ template <typename Type>
 struct GroupedTDigestImpl : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
-              const FunctionOptions* options) override {
-    options_ = *checked_cast<const TDigestOptions*>(options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = *checked_cast<const TDigestOptions*>(args.options);
     if (is_decimal_type<Type>::value) {
-      decimal_scale_ = checked_cast<const DecimalType&>(*inputs[0].type).scale();
+      decimal_scale_ = checked_cast<const DecimalType&>(*args.inputs[0].type).scale();
     } else {
       decimal_scale_ = 0;
     }
@@ -1163,7 +1160,7 @@ struct GroupedTDigestFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedTDigestFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1187,9 +1184,7 @@ HashAggregateKernel MakeApproximateMedianKernel(HashAggregateFunction* tdigest_f
     KernelInitArgs new_args{kernel, args.inputs, &options};
     return kernel->init(ctx, new_args);
   };
-  kernel.signature =
-      KernelSignature::Make({InputType(ValueDescr::ANY), InputType::Array(Type::UINT32)},
-                            ValueDescr::Array(float64()));
+  kernel.signature = KernelSignature::Make({InputType::Any(), Type::UINT32}, float64());
   kernel.resize = HashAggregateResize;
   kernel.consume = HashAggregateConsume;
   kernel.merge = HashAggregateMerge;
@@ -1248,9 +1243,8 @@ struct GroupedMinMaxImpl final : public GroupedAggregator {
   using ArrType =
       typename std::conditional<is_boolean_type<Type>::value, uint8_t, CType>::type;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    options_ = *checked_cast<const ScalarAggregateOptions*>(options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = *checked_cast<const ScalarAggregateOptions*>(args.options);
     // type_ initialized by MinMaxInit
     mins_ = TypedBufferBuilder<CType>(ctx->memory_pool());
     maxes_ = TypedBufferBuilder<CType>(ctx->memory_pool());
@@ -1355,11 +1349,10 @@ struct GroupedMinMaxImpl<Type,
   using Allocator = arrow::stl::allocator<char>;
   using StringType = std::basic_string<char, std::char_traits<char>, Allocator>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     ctx_ = ctx;
     allocator_ = Allocator(ctx->memory_pool());
-    options_ = *checked_cast<const ScalarAggregateOptions*>(options);
+    options_ = *checked_cast<const ScalarAggregateOptions*>(args.options);
     // type_ initialized by MinMaxInit
     has_values_ = TypedBufferBuilder<bool>(ctx->memory_pool());
     has_nulls_ = TypedBufferBuilder<bool>(ctx->memory_pool());
@@ -1518,10 +1511,7 @@ struct GroupedMinMaxImpl<Type,
 };
 
 struct GroupedNullMinMaxImpl final : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions*) override {
-    return Status::OK();
-  }
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override { return Status::OK(); }
 
   Status Resize(int64_t new_num_groups) override {
     num_groups_ = new_num_groups;
@@ -1555,7 +1545,7 @@ template <typename T>
 Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<GroupedMinMaxImpl<T>>(ctx, args));
-  static_cast<GroupedMinMaxImpl<T>*>(impl.get())->type_ = args.inputs[0].type;
+  static_cast<GroupedMinMaxImpl<T>*>(impl.get())->type_ = args.inputs[0].GetSharedPtr();
   return std::move(impl);
 }
 
@@ -1565,17 +1555,13 @@ HashAggregateKernel MakeMinOrMaxKernel(HashAggregateFunction* min_max_func) {
   kernel.init = [min_max_func](
                     KernelContext* ctx,
                     const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
-    std::vector<ValueDescr> inputs = args.inputs;
+    std::vector<TypeHolder> inputs = args.inputs;
     ARROW_ASSIGN_OR_RAISE(auto kernel, min_max_func->DispatchExact(args.inputs));
     KernelInitArgs new_args{kernel, inputs, args.options};
     return kernel->init(ctx, new_args);
   };
-  kernel.signature = KernelSignature::Make(
-      {InputType(ValueDescr::ANY), InputType::Array(Type::UINT32)},
-      OutputType([](KernelContext* ctx,
-                    const std::vector<ValueDescr>& descrs) -> Result<ValueDescr> {
-        return ValueDescr::Array(descrs[0].type);
-      }));
+  kernel.signature =
+      KernelSignature::Make({InputType::Any(), Type::UINT32}, OutputType(FirstType));
   kernel.resize = HashAggregateResize;
   kernel.consume = HashAggregateConsume;
   kernel.merge = HashAggregateMerge;
@@ -1646,7 +1632,7 @@ struct GroupedMinMaxFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedMinMaxFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1660,9 +1646,8 @@ struct GroupedMinMaxFactory {
 
 template <typename Impl>
 struct GroupedBooleanAggregator : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = checked_cast<const ScalarAggregateOptions&>(*args.options);
     pool_ = ctx->memory_pool();
     reduced_ = TypedBufferBuilder<bool>(pool_);
     no_nulls_ = TypedBufferBuilder<bool>(pool_);
@@ -1831,11 +1816,10 @@ struct GroupedAllImpl : public GroupedBooleanAggregator<GroupedAllImpl> {
 // CountDistinct/Distinct implementation
 
 struct GroupedCountDistinctImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     ctx_ = ctx;
     pool_ = ctx->memory_pool();
-    options_ = checked_cast<const CountOptions&>(*options);
+    options_ = checked_cast<const CountOptions&>(*args.options);
     return Status::OK();
   }
 
@@ -1977,7 +1961,7 @@ Result<std::unique_ptr<KernelState>> GroupedDistinctInit(KernelContext* ctx,
                                                          const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<Impl>(ctx, args));
   auto instance = static_cast<Impl*>(impl.get());
-  instance->out_type_ = args.inputs[0].type;
+  instance->out_type_ = args.inputs[0].GetSharedPtr();
   ARROW_ASSIGN_OR_RAISE(instance->grouper_,
                         Grouper::Make(args.inputs, ctx->exec_context()));
   return std::move(impl);
@@ -1991,8 +1975,7 @@ struct GroupedOneImpl final : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
   using GetSet = GroupedValueTraits<Type>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     // out_type_ initialized by GroupedOneInit
     ones_ = TypedBufferBuilder<CType>(ctx->memory_pool());
     has_one_ = TypedBufferBuilder<bool>(ctx->memory_pool());
@@ -2059,10 +2042,7 @@ struct GroupedOneImpl final : public GroupedAggregator {
 };
 
 struct GroupedNullOneImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    return Status::OK();
-  }
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override { return Status::OK(); }
 
   Status Resize(int64_t new_num_groups) override {
     num_groups_ = new_num_groups;
@@ -2092,8 +2072,7 @@ struct GroupedOneImpl<Type, enable_if_t<is_base_binary_type<Type>::value ||
   using Allocator = arrow::stl::allocator<char>;
   using StringType = std::basic_string<char, std::char_traits<char>, Allocator>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     allocator_ = Allocator(ctx->memory_pool());
     // out_type_ initialized by GroupedOneInit
@@ -2226,7 +2205,7 @@ Result<std::unique_ptr<KernelState>> GroupedOneInit(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<GroupedOneImpl<T>>(ctx, args));
   auto instance = static_cast<GroupedOneImpl<T>*>(impl.get());
-  instance->out_type_ = args.inputs[0].type;
+  instance->out_type_ = args.inputs[0].GetSharedPtr();
   return std::move(impl);
 }
 
@@ -2281,7 +2260,7 @@ struct GroupedOneFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedOneFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -2298,8 +2277,7 @@ struct GroupedListImpl final : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
   using GetSet = GroupedValueTraits<Type>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     has_nulls_ = false;
     // out_type_ initialized by GroupedListInit
@@ -2407,8 +2385,7 @@ struct GroupedListImpl<Type, enable_if_t<is_base_binary_type<Type>::value ||
   using StringType = std::basic_string<char, std::char_traits<char>, Allocator>;
   using GetSet = GroupedValueTraits<Type>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     allocator_ = Allocator(ctx_->memory_pool());
     // out_type_ initialized by GroupedListInit
@@ -2564,8 +2541,7 @@ struct GroupedListImpl<Type, enable_if_t<is_base_binary_type<Type>::value ||
 };
 
 struct GroupedNullListImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     counts_ = TypedBufferBuilder<int64_t>(ctx_->memory_pool());
     return Status::OK();
@@ -2627,7 +2603,7 @@ Result<std::unique_ptr<KernelState>> GroupedListInit(KernelContext* ctx,
                                                      const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<GroupedListImpl<T>>(ctx, args));
   auto instance = static_cast<GroupedListImpl<T>*>(impl.get());
-  instance->out_type_ = args.inputs[0].type;
+  instance->out_type_ = args.inputs[0].GetSharedPtr();
   return std::move(impl);
 }
 
@@ -2682,7 +2658,7 @@ struct GroupedListFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedListFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -2812,7 +2788,7 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
         "hash_count", Arity::Binary(), hash_count_doc, &default_count_options);
 
     DCHECK_OK(func->AddKernel(
-        MakeKernel(ValueDescr::ARRAY, HashAggregateInit<GroupedCountImpl>)));
+        MakeKernel(InputType::Any(), HashAggregateInit<GroupedCountImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
@@ -2970,7 +2946,7 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
         "hash_count_distinct", Arity::Binary(), hash_count_distinct_doc,
         &default_count_options);
     DCHECK_OK(func->AddKernel(
-        MakeKernel(ValueDescr::ARRAY, GroupedDistinctInit<GroupedCountDistinctImpl>)));
+        MakeKernel(InputType::Any(), GroupedDistinctInit<GroupedCountDistinctImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
@@ -2978,7 +2954,7 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
     auto func = std::make_shared<HashAggregateFunction>(
         "hash_distinct", Arity::Binary(), hash_distinct_doc, &default_count_options);
     DCHECK_OK(func->AddKernel(
-        MakeKernel(ValueDescr::ARRAY, GroupedDistinctInit<GroupedDistinctImpl>)));
+        MakeKernel(InputType::Any(), GroupedDistinctInit<GroupedDistinctImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 82d40aba948..156e5896124 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -75,7 +75,7 @@ Result<Datum> NaiveGroupBy(std::vector<Datum> arguments, std::vector<Datum> keys
                            const std::vector<Aggregate>& aggregates) {
   ARROW_ASSIGN_OR_RAISE(auto key_batch, ExecBatch::Make(std::move(keys)));
 
-  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_batch.GetDescriptors()));
+  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_batch.GetTypes()));
 
   ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
 
@@ -305,19 +305,24 @@ TEST(Grouper, SupportedKeys) {
 }
 
 struct TestGrouper {
-  explicit TestGrouper(std::vector<ValueDescr> descrs) : descrs_(std::move(descrs)) {
-    grouper_ = Grouper::Make(descrs_).ValueOrDie();
+  explicit TestGrouper(std::vector<TypeHolder> types, std::vector<ArgShape> shapes = {})
+      : types_(std::move(types)), shapes_(std::move(shapes)) {
+    grouper_ = Grouper::Make(types_).ValueOrDie();
 
     FieldVector fields;
-    for (const auto& descr : descrs_) {
-      fields.push_back(field("", descr.type));
+    for (const auto& type : types_) {
+      fields.push_back(field("", type.GetSharedPtr()));
     }
     key_schema_ = schema(std::move(fields));
   }
 
   void ExpectConsume(const std::string& key_json, const std::string& expected) {
-    ExpectConsume(ExecBatchFromJSON(descrs_, key_json),
-                  ArrayFromJSON(uint32(), expected));
+    auto expected_arr = ArrayFromJSON(uint32(), expected);
+    if (shapes_.size() > 0) {
+      ExpectConsume(ExecBatchFromJSON(types_, shapes_, key_json), expected_arr);
+    } else {
+      ExpectConsume(ExecBatchFromJSON(types_, key_json), expected_arr);
+    }
   }
 
   void ExpectConsume(const std::vector<Datum>& key_values, Datum expected) {
@@ -336,7 +341,11 @@ struct TestGrouper {
   }
 
   void ExpectUniques(const std::string& uniques_json) {
-    ExpectUniques(ExecBatchFromJSON(descrs_, uniques_json));
+    if (shapes_.size() > 0) {
+      ExpectUniques(ExecBatchFromJSON(types_, shapes_, uniques_json));
+    } else {
+      ExpectUniques(ExecBatchFromJSON(types_, uniques_json));
+    }
   }
 
   void AssertEquivalentIds(const Datum& expected, const Datum& actual) {
@@ -422,7 +431,8 @@ struct TestGrouper {
     }
   }
 
-  std::vector<ValueDescr> descrs_;
+  std::vector<TypeHolder> types_;
+  std::vector<ArgShape> shapes_;
   std::shared_ptr<Schema> key_schema_;
   std::unique_ptr<Grouper> grouper_;
   ExecBatch uniques_ = ExecBatch({}, -1);
@@ -700,11 +710,11 @@ TEST(Grouper, ScalarValues) {
   // large_utf8 forces GrouperImpl over GrouperFastImpl
   for (const auto& str_type : {utf8(), large_utf8()}) {
     {
-      TestGrouper g({ValueDescr::Scalar(boolean()), ValueDescr::Scalar(int32()),
-                     ValueDescr::Scalar(decimal128(3, 2)),
-                     ValueDescr::Scalar(decimal256(3, 2)),
-                     ValueDescr::Scalar(fixed_size_binary(2)),
-                     ValueDescr::Scalar(str_type), ValueDescr::Array(int32())});
+      TestGrouper g(
+          {boolean(), int32(), decimal128(3, 2), decimal256(3, 2), fixed_size_binary(2),
+           str_type, int32()},
+          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR,
+           ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY});
       g.ExpectConsume(
           R"([
 [true, 1, "1.00", "2.00", "ab", "foo", 2],
@@ -715,7 +725,7 @@ TEST(Grouper, ScalarValues) {
     }
     {
       auto dict_type = dictionary(int32(), utf8());
-      TestGrouper g({ValueDescr::Scalar(dict_type), ValueDescr::Scalar(str_type)});
+      TestGrouper g({dict_type, str_type}, {ArgShape::SCALAR, ArgShape::SCALAR});
       const auto dict = R"(["foo", null])";
       g.ExpectConsume(
           {DictScalarFromJSON(dict_type, "0", dict), ScalarFromJSON(str_type, R"("")")},
@@ -846,9 +856,9 @@ TEST(GroupBy, CountOnly) {
 TEST(GroupBy, CountScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[1, 1], [1, 1], [1, 2], [1, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({int32(), int64()}, "[[2, 1], [3, 2], [4, 3]]"),
   };
@@ -1061,9 +1071,10 @@ TEST(GroupBy, MeanOnly) {
 TEST(GroupBy, SumMeanProductScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
                         "[[1, 1], [1, 1], [1, 2], [1, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({int32(), int64()}, "[[2, 1], [3, 2], [4, 3]]"),
   };
@@ -1450,11 +1461,12 @@ TEST(GroupBy, ApproximateMedian) {
 TEST(GroupBy, StddevVarianceTDigestScalar) {
   BatchesWithSchema input;
   input.batches = {
+      ExecBatchFromJSON({int32(), float32(), int64()},
+                        {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
+                        "[[1, 1.0, 1], [1, 1.0, 1], [1, 1.0, 2], [1, 1.0, 3]]"),
       ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
-          "[[1, 1.0, 1], [1, 1.0, 1], [1, 1.0, 2], [1, 1.0, 3]]"),
-      ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
+          {int32(), float32(), int64()},
+          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
           "[[null, null, 1], [null, null, 1], [null, null, 2], [null, null, 3]]"),
       ExecBatchFromJSON({int32(), float32(), int64()},
                         "[[2, 2.0, 1], [3, 3.0, 2], [4, 4.0, 3]]"),
@@ -1499,14 +1511,16 @@ TEST(GroupBy, VarianceOptions) {
   BatchesWithSchema input;
   input.batches = {
       ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
+          {int32(), float32(), int64()},
+          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
           "[[1, 1.0, 1], [1, 1.0, 1], [1, 1.0, 2], [1, 1.0, 2], [1, 1.0, 3]]"),
-      ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
-          "[[1, 1.0, 4], [1, 1.0, 4]]"),
-      ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
-          "[[null, null, 1]]"),
+      ExecBatchFromJSON({int32(), float32(), int64()},
+                        {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
+                        "[[1, 1.0, 4], [1, 1.0, 4]]"),
+      ExecBatchFromJSON({int32(), float32(), int64()},
+                        {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
+
+                        "[[null, null, 1]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[2, 2.0, 1], [3, 3.0, 2]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[4, 4.0, 2], [2, 2.0, 4]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[null, null, 4]]"),
@@ -1980,9 +1994,10 @@ TEST(GroupBy, MinOrMax) {
 TEST(GroupBy, MinMaxScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
                         "[[-1, 1], [-1, 1], [-1, 2], [-1, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({int32(), int64()}, "[[2, 1], [3, 2], [4, 3]]"),
   };
@@ -2102,9 +2117,10 @@ TEST(GroupBy, AnyAndAll) {
 TEST(GroupBy, AnyAllScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(boolean()), int64()},
+      ExecBatchFromJSON({boolean(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
                         "[[true, 1], [true, 1], [true, 2], [true, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(boolean()), int64()},
+      ExecBatchFromJSON({boolean(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({boolean(), int64()}, "[[true, 1], [false, 2], [null, 3]]"),
   };
@@ -2730,11 +2746,13 @@ TEST(GroupBy, OneBinaryTypes) {
 
 TEST(GroupBy, OneScalar) {
   BatchesWithSchema input;
-  input.batches = {ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
-                                     R"([[-1, 1], [-1, 1], [-1, 1], [-1, 1]])"),
-                   ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
-                                     R"([[null, 1], [null, 1], [null, 2], [null, 3]])"),
-                   ExecBatchFromJSON({int32(), int64()}, R"([[22, 1], [3, 2], [4, 3]])")};
+  input.batches = {
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
+                        R"([[-1, 1], [-1, 1], [-1, 1], [-1, 1]])"),
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+                        R"([[null, 1], [null, 1], [null, 2], [null, 3]])"),
+      ExecBatchFromJSON({int32(), int64()}, R"([[22, 1], [3, 2], [4, 3]])")};
   input.schema = schema({field("argument", int32()), field("key", int64())});
 
   for (bool use_threads : {true, false}) {
diff --git a/cpp/src/arrow/compute/kernels/row_encoder.cc b/cpp/src/arrow/compute/kernels/row_encoder.cc
index 81437de4ecd..beff3436100 100644
--- a/cpp/src/arrow/compute/kernels/row_encoder.cc
+++ b/cpp/src/arrow/compute/kernels/row_encoder.cc
@@ -254,36 +254,37 @@ Result<std::shared_ptr<ArrayData>> DictionaryKeyEncoder::Decode(uint8_t** encode
   return data;
 }
 
-void RowEncoder::Init(const std::vector<ValueDescr>& column_types, ExecContext* ctx) {
+void RowEncoder::Init(const std::vector<TypeHolder>& column_types, ExecContext* ctx) {
   ctx_ = ctx;
   encoders_.resize(column_types.size());
 
   for (size_t i = 0; i < column_types.size(); ++i) {
-    const auto& column_type = column_types[i].type;
-
-    if (column_type->id() == Type::BOOL) {
+    const TypeHolder& type = column_types[i];
+    if (type.id() == Type::BOOL) {
       encoders_[i] = std::make_shared<BooleanKeyEncoder>();
       continue;
     }
 
-    if (column_type->id() == Type::DICTIONARY) {
+    if (type.id() == Type::DICTIONARY) {
       encoders_[i] =
-          std::make_shared<DictionaryKeyEncoder>(column_type, ctx->memory_pool());
+          std::make_shared<DictionaryKeyEncoder>(type.GetSharedPtr(), ctx->memory_pool());
       continue;
     }
 
-    if (is_fixed_width(column_type->id())) {
-      encoders_[i] = std::make_shared<FixedWidthKeyEncoder>(column_type);
+    if (is_fixed_width(type.id())) {
+      encoders_[i] = std::make_shared<FixedWidthKeyEncoder>(type.GetSharedPtr());
       continue;
     }
 
-    if (is_binary_like(column_type->id())) {
-      encoders_[i] = std::make_shared<VarLengthKeyEncoder<BinaryType>>(column_type);
+    if (is_binary_like(type.id())) {
+      encoders_[i] =
+          std::make_shared<VarLengthKeyEncoder<BinaryType>>(type.GetSharedPtr());
       continue;
     }
 
-    if (is_large_binary_like(column_type->id())) {
-      encoders_[i] = std::make_shared<VarLengthKeyEncoder<LargeBinaryType>>(column_type);
+    if (is_large_binary_like(type.id())) {
+      encoders_[i] =
+          std::make_shared<VarLengthKeyEncoder<LargeBinaryType>>(type.GetSharedPtr());
       continue;
     }
 
diff --git a/cpp/src/arrow/compute/kernels/row_encoder.h b/cpp/src/arrow/compute/kernels/row_encoder.h
index 0ccb18a71d0..57240172488 100644
--- a/cpp/src/arrow/compute/kernels/row_encoder.h
+++ b/cpp/src/arrow/compute/kernels/row_encoder.h
@@ -255,7 +255,7 @@ class ARROW_EXPORT RowEncoder {
  public:
   static constexpr int kRowIdForNulls() { return -1; }
 
-  void Init(const std::vector<ValueDescr>& column_types, ExecContext* ctx);
+  void Init(const std::vector<TypeHolder>& column_types, ExecContext* ctx);
   void Clear();
   Status EncodeAndAppend(const ExecSpan& batch);
   Result<ExecBatch> Decode(int64_t num_rows, const int32_t* row_ids);
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 9b2d3cce89f..e513e07d49f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -689,10 +689,10 @@ struct RoundOptionsWrapper<RoundToMultipleOptions>
     // The output type is not available here so we use the following rule:
     // If `multiple` is neither a floating-point nor a decimal type, then
     // cast to float64, else cast to the kernel's input type.
-    const auto& to_type =
+    std::shared_ptr<DataType> to_type =
         (!is_floating(multiple->type->id()) && !is_decimal(multiple->type->id()))
             ? float64()
-            : args.inputs[0].type;
+            : args.inputs[0].GetSharedPtr();
     if (!multiple->type->Equals(to_type)) {
       ARROW_ASSIGN_OR_RAISE(
           auto casted_multiple,
@@ -1065,24 +1065,24 @@ ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
 
 // resolve decimal binary operation output type per *casted* args
 template <typename OutputGetter>
-Result<ValueDescr> ResolveDecimalBinaryOperationOutput(
-    const std::vector<ValueDescr>& args, OutputGetter&& getter) {
-  // casted args should be same size decimals
-  auto left_type = checked_cast<const DecimalType*>(args[0].type.get());
-  auto right_type = checked_cast<const DecimalType*>(args[1].type.get());
-  DCHECK_EQ(left_type->id(), right_type->id());
+Result<TypeHolder> ResolveDecimalBinaryOperationOutput(
+    const std::vector<TypeHolder>& types, OutputGetter&& getter) {
+  // casted types should be same size decimals
+  const auto& left_type = checked_cast<const DecimalType&>(*types[0]);
+  const auto& right_type = checked_cast<const DecimalType&>(*types[1]);
+  DCHECK_EQ(left_type.id(), right_type.id());
 
   int32_t precision, scale;
-  std::tie(precision, scale) = getter(left_type->precision(), left_type->scale(),
-                                      right_type->precision(), right_type->scale());
-  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type->id(), precision, scale));
-  return ValueDescr(std::move(type), GetBroadcastShape(args));
+  std::tie(precision, scale) = getter(left_type.precision(), left_type.scale(),
+                                      right_type.precision(), right_type.scale());
+  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type.id(), precision, scale));
+  return std::move(type);
 }
 
-Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput(
-    KernelContext*, const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveDecimalAdditionOrSubtractionOutput(
+    KernelContext*, const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
         DCHECK_EQ(s1, s2);
         const int32_t scale = s1;
         const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1;
@@ -1090,20 +1090,20 @@ Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput(
       });
 }
 
-Result<ValueDescr> ResolveDecimalMultiplicationOutput(
-    KernelContext*, const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveDecimalMultiplicationOutput(
+    KernelContext*, const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
         const int32_t scale = s1 + s2;
         const int32_t precision = p1 + p2 + 1;
         return std::make_pair(precision, scale);
       });
 }
 
-Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
-                                                const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveDecimalDivisionOutput(KernelContext*,
+                                                const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
         DCHECK_GE(s1, s2);
         const int32_t scale = s1 - s2;
         const int32_t precision = p1;
@@ -1111,21 +1111,21 @@ Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
       });
 }
 
-Result<ValueDescr> ResolveTemporalOutput(KernelContext*,
-                                         const std::vector<ValueDescr>& args) {
-  DCHECK_EQ(args[0].type->id(), args[1].type->id());
-  auto left_type = checked_cast<const TimestampType*>(args[0].type.get());
-  auto right_type = checked_cast<const TimestampType*>(args[1].type.get());
-  DCHECK_EQ(left_type->unit(), left_type->unit());
+Result<TypeHolder> ResolveTemporalOutput(KernelContext*,
+                                         const std::vector<TypeHolder>& types) {
+  DCHECK_EQ(types[0].id(), types[1].id());
+  const auto& left_type = checked_cast<const TimestampType&>(*types[0]);
+  const auto& right_type = checked_cast<const TimestampType&>(*types[1]);
+  DCHECK_EQ(left_type.unit(), left_type.unit());
 
-  if ((left_type->timezone() == "" || right_type->timezone() == "") &&
-      left_type->timezone() != right_type->timezone()) {
+  if ((left_type.timezone() == "" || right_type.timezone() == "") &&
+      left_type.timezone() != right_type.timezone()) {
     return Status::Invalid("Subtraction of zoned and non-zoned times is ambiguous. (",
-                           left_type->timezone(), right_type->timezone(), ").");
+                           left_type.timezone(), right_type.timezone(), ").");
   }
 
-  auto type = duration(right_type->unit());
-  return ValueDescr(std::move(type), GetBroadcastShape(args));
+  auto type = duration(right_type.unit());
+  return std::move(type);
 }
 
 template <typename Op>
@@ -1195,44 +1195,46 @@ ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id)
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
-    RETURN_NOT_OK(CheckDecimals(values));
+    RETURN_NOT_OK(CheckDecimals(types));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
     // Only promote types for binary functions
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
       TimeUnit::type finest_unit;
-      if (CommonTemporalResolution(values->data(), values->size(), &finest_unit)) {
-        ReplaceTemporalTypes(finest_unit, values);
-      } else if (auto numeric_type = CommonNumeric(*values)) {
-        ReplaceTypes(numeric_type, values);
+      if (CommonTemporalResolution(types->data(), types->size(), &finest_unit)) {
+        ReplaceTemporalTypes(finest_unit, types);
+      } else {
+        if (TypeHolder type = CommonNumeric(*types)) {
+          ReplaceTypes(type, types);
+        }
       }
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 
-  Status CheckDecimals(std::vector<ValueDescr>* values) const {
-    if (!HasDecimal(*values)) return Status::OK();
+  Status CheckDecimals(std::vector<TypeHolder>* types) const {
+    if (!HasDecimal(*types)) return Status::OK();
 
-    if (values->size() == 2) {
+    if (types->size() == 2) {
       // "add_checked" -> "add"
       const auto func_name = name();
       const std::string op = func_name.substr(0, func_name.find("_"));
       if (op == "add" || op == "subtract") {
-        return CastBinaryDecimalArgs(DecimalPromotion::kAdd, values);
+        return CastBinaryDecimalArgs(DecimalPromotion::kAdd, types);
       } else if (op == "multiply") {
-        return CastBinaryDecimalArgs(DecimalPromotion::kMultiply, values);
+        return CastBinaryDecimalArgs(DecimalPromotion::kMultiply, types);
       } else if (op == "divide") {
-        return CastBinaryDecimalArgs(DecimalPromotion::kDivide, values);
+        return CastBinaryDecimalArgs(DecimalPromotion::kDivide, types);
       } else {
         return Status::Invalid("Invalid decimal function: ", func_name);
       }
@@ -1245,29 +1247,30 @@ struct ArithmeticFunction : ScalarFunction {
 struct ArithmeticDecimalToFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
     }
 
-    for (auto& descr : *values) {
-      if (is_decimal(descr.type->id())) {
-        descr.type = float64();
+    for (size_t i = 0; i < types->size(); ++i) {
+      if (is_decimal((*types)[i].type->id())) {
+        (*types)[i] = float64();
       }
     }
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+
+    if (TypeHolder type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1275,30 +1278,31 @@ struct ArithmeticDecimalToFloatingPointFunction : public ArithmeticFunction {
 struct ArithmeticIntegerToFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
-    RETURN_NOT_OK(CheckDecimals(values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
+    RETURN_NOT_OK(CheckDecimals(types));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
     }
 
-    for (auto& descr : *values) {
-      if (is_integer(descr.type->id())) {
-        descr.type = float64();
+    for (size_t i = 0; i < types->size(); ++i) {
+      if (is_integer((*types)[i].type->id())) {
+        (*types)[i] = float64();
       }
     }
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1306,29 +1310,30 @@ struct ArithmeticIntegerToFloatingPointFunction : public ArithmeticFunction {
 struct ArithmeticFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
     }
 
-    for (auto& descr : *values) {
-      if (is_integer(descr.type->id()) || is_decimal(descr.type->id())) {
-        descr.type = float64();
+    for (size_t i = 0; i < types->size(); ++i) {
+      if (is_integer((*types)[i].type->id()) || is_decimal((*types)[i].type->id())) {
+        (*types)[i] = float64();
       }
     }
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1426,27 +1431,28 @@ std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionNotNull(std::string n
 // Exec the round kernel for the given types
 template <typename Type, typename OptionsType,
           template <typename, RoundMode, typename...> class OpImpl>
-Status ExecRound(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  using State = RoundOptionsWrapper<OptionsType>;
-  const auto& state = static_cast<const State&>(*ctx->state());
-  switch (state.options.round_mode) {
-    ROUND_CASE(DOWN)
-    ROUND_CASE(UP)
-    ROUND_CASE(TOWARDS_ZERO)
-    ROUND_CASE(TOWARDS_INFINITY)
-    ROUND_CASE(HALF_DOWN)
-    ROUND_CASE(HALF_UP)
-    ROUND_CASE(HALF_TOWARDS_ZERO)
-    ROUND_CASE(HALF_TOWARDS_INFINITY)
-    ROUND_CASE(HALF_TO_EVEN)
-    ROUND_CASE(HALF_TO_ODD)
-  }
-  DCHECK(false);
-  return Status::NotImplemented(
-      "Internal implementation error: round mode not implemented: ",
-      state.options.ToString());
-}
-
+struct RoundKernel {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    using State = RoundOptionsWrapper<OptionsType>;
+    const auto& state = static_cast<const State&>(*ctx->state());
+    switch (state.options.round_mode) {
+      ROUND_CASE(DOWN)
+      ROUND_CASE(UP)
+      ROUND_CASE(TOWARDS_ZERO)
+      ROUND_CASE(TOWARDS_INFINITY)
+      ROUND_CASE(HALF_DOWN)
+      ROUND_CASE(HALF_UP)
+      ROUND_CASE(HALF_TOWARDS_ZERO)
+      ROUND_CASE(HALF_TOWARDS_INFINITY)
+      ROUND_CASE(HALF_TO_EVEN)
+      ROUND_CASE(HALF_TO_ODD)
+    }
+    DCHECK(false);
+    return Status::NotImplemented(
+        "Internal implementation error: round mode not implemented: ",
+        state.options.ToString());
+  }
+};
 #undef ROUND_CASE
 
 // Like MakeUnaryArithmeticFunction, but for unary rounding functions that control
@@ -1460,22 +1466,24 @@ std::shared_ptr<ScalarFunction> MakeUnaryRoundFunction(std::string name,
       name, Arity::Unary(), std::move(doc), &kDefaultOptions);
   for (const auto& ty : {float32(), float64(), decimal128(1, 0), decimal256(1, 0)}) {
     auto type_id = ty->id();
-    auto exec = [type_id](KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-      switch (type_id) {
-        case Type::FLOAT:
-          return ExecRound<FloatType, OptionsType, Op>(ctx, batch, out);
-        case Type::DOUBLE:
-          return ExecRound<DoubleType, OptionsType, Op>(ctx, batch, out);
-        case Type::DECIMAL128:
-          return ExecRound<Decimal128Type, OptionsType, Op>(ctx, batch, out);
-        case Type::DECIMAL256:
-          return ExecRound<Decimal256Type, OptionsType, Op>(ctx, batch, out);
-        default: {
-          DCHECK(false);
-          return ExecFail(ctx, batch, out);
-        }
-      }
-    };
+    ArrayKernelExec exec = nullptr;
+    switch (type_id) {
+      case Type::FLOAT:
+        exec = RoundKernel<FloatType, OptionsType, Op>::Exec;
+        break;
+      case Type::DOUBLE:
+        exec = RoundKernel<DoubleType, OptionsType, Op>::Exec;
+        break;
+      case Type::DECIMAL128:
+        exec = RoundKernel<Decimal128Type, OptionsType, Op>::Exec;
+        break;
+      case Type::DECIMAL256:
+        exec = RoundKernel<Decimal256Type, OptionsType, Op>::Exec;
+        break;
+      default:
+        DCHECK(false);
+        break;
+    }
     DCHECK_OK(func->AddKernel(
         {InputType(type_id)},
         is_decimal(type_id) ? OutputType(FirstType) : OutputType(ty), exec, State::Init));
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 042c4c1304b..943ca4c2dc2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -95,18 +95,11 @@ inline Bitmap GetBitmap(const ArraySpan& arr, int index) {
   return Bitmap{arr.buffers[index].data, arr.offset, arr.length};
 }
 
-struct InvertOp {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    *checked_cast<BooleanScalar*>(out->scalar().get()) = InvertScalar(in);
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-    GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(in, 1));
-    return Status::OK();
-  }
-};
+Status InvertOpExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  ArraySpan* out_span = out->array_span();
+  GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(batch[0].array, 1));
+  return Status::OK();
+}
 
 template <typename Op>
 struct Commutative {
@@ -119,16 +112,6 @@ struct Commutative {
 struct AndOp : Commutative<AndOp> {
   using Commutative<AndOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    if (left.is_valid && right.is_valid) {
-      checked_cast<BooleanScalar*>(out->scalar().get())->value =
-          checked_cast<const BooleanScalar&>(left).value &&
-          checked_cast<const BooleanScalar&>(right).value;
-    }
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -153,20 +136,6 @@ struct AndOp : Commutative<AndOp> {
 struct KleeneAndOp : Commutative<KleeneAndOp> {
   using Commutative<KleeneAndOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
-    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
-
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
-
-    Scalar* out_scalar = out->scalar().get();
-    checked_cast<BooleanScalar*>(out_scalar)->value = left_true && right_true;
-    out_scalar->is_valid = left_false || right_false || (left_true && right_true);
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -228,17 +197,6 @@ struct KleeneAndOp : Commutative<KleeneAndOp> {
 struct OrOp : Commutative<OrOp> {
   using Commutative<OrOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    if (left.is_valid && right.is_valid) {
-      checked_cast<BooleanScalar*>(out_scalar)->value =
-          checked_cast<const BooleanScalar&>(left).value ||
-          checked_cast<const BooleanScalar&>(right).value;
-    }
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -263,20 +221,6 @@ struct OrOp : Commutative<OrOp> {
 struct KleeneOrOp : Commutative<KleeneOrOp> {
   using Commutative<KleeneOrOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
-    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
-
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
-
-    checked_cast<BooleanScalar*>(out_scalar)->value = left_true || right_true;
-    out_scalar->is_valid = left_true || right_true || (left_false && right_false);
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -339,17 +283,6 @@ struct KleeneOrOp : Commutative<KleeneOrOp> {
 struct XorOp : Commutative<XorOp> {
   using Commutative<XorOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    if (left.is_valid && right.is_valid) {
-      checked_cast<BooleanScalar*>(out_scalar)->value =
-          checked_cast<const BooleanScalar&>(left).value ^
-          checked_cast<const BooleanScalar&>(right).value;
-    }
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -372,11 +305,6 @@ struct XorOp : Commutative<XorOp> {
 };
 
 struct AndNotOp {
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    return AndOp::Call(ctx, left, InvertScalar(right), out);
-  }
-
   static Status Call(KernelContext* ctx, const Scalar& left, const ArraySpan& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -404,11 +332,6 @@ struct AndNotOp {
 };
 
 struct KleeneAndNotOp {
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
-  }
-
   static Status Call(KernelContext* ctx, const Scalar& left, const ArraySpan& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -560,7 +483,7 @@ namespace internal {
 
 void RegisterScalarBoolean(FunctionRegistry* registry) {
   // These functions can write into sliced output bitmaps
-  MakeFunction("invert", 1, applicator::SimpleUnary<InvertOp>, invert_doc, registry);
+  MakeFunction("invert", 1, InvertOpExec, invert_doc, registry);
   MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, and_doc, registry);
   MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, and_not_doc, registry);
   MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, or_doc, registry);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
index 57a5ccd7ab5..13c0d599bf9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -43,40 +43,6 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
     return Status::OK();
   }
 
-  /// TODO: eliminate this code path by no longer supporting
-  /// scalar->scalar direct casting, which increases maintainability
-  if (batch[0].is_scalar()) {  // if input is scalar
-    auto in_scalar = checked_cast<const DictionaryScalar&>(*batch[0].scalar);
-
-    // if invalid scalar, return null scalar
-    if (!in_scalar.is_valid) {
-      out->value = MakeNullScalar(out_type.Copy());
-      return Status::OK();
-    }
-
-    Datum casted_index, casted_dict;
-    if (in_scalar.value.index->type->Equals(out_type.index_type())) {
-      casted_index = in_scalar.value.index;
-    } else {
-      ARROW_ASSIGN_OR_RAISE(casted_index,
-                            Cast(in_scalar.value.index, out_type.index_type(), options,
-                                 ctx->exec_context()));
-    }
-
-    if (in_scalar.value.dictionary->type()->Equals(out_type.value_type())) {
-      casted_dict = in_scalar.value.dictionary;
-    } else {
-      ARROW_ASSIGN_OR_RAISE(
-          casted_dict, Cast(in_scalar.value.dictionary, out_type.value_type(), options,
-                            ctx->exec_context()));
-    }
-
-    out->value = DictionaryScalar::Make(casted_index.scalar(), casted_dict.make_array());
-
-    return Status::OK();
-  }
-
-  // if input is array
   std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
   const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index 49e1e26e6ad..27a86135a63 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -33,38 +33,16 @@ namespace internal {
 
 namespace {
 
-template <typename OutT, typename InT>
-ARROW_DISABLE_UBSAN("float-cast-overflow")
-void DoStaticCast(const void* in_data, int64_t in_offset, int64_t length,
-                  int64_t out_offset, void* out_data) {
-  auto in = reinterpret_cast<const InT*>(in_data) + in_offset;
-  auto out = reinterpret_cast<OutT*>(out_data) + out_offset;
-  for (int64_t i = 0; i < length; ++i) {
-    *out++ = static_cast<OutT>(*in++);
-  }
-}
-
-using StaticCastFunc = std::function<void(const void*, int64_t, int64_t, int64_t, void*)>;
-
 template <typename OutType, typename InType, typename Enable = void>
 struct CastPrimitive {
-  static void Exec(const ExecValue& input, ExecResult* out) {
+  ARROW_DISABLE_UBSAN("float-cast-overflow")
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
     using OutT = typename OutType::c_type;
     using InT = typename InType::c_type;
-
-    StaticCastFunc caster = DoStaticCast<OutT, InT>;
-    if (input.is_array()) {
-      const ArraySpan& arr = input.array;
-      ArraySpan* out_span = out->array_span();
-      caster(arr.buffers[1].data, arr.offset, arr.length, out_span->offset,
-             out_span->buffers[1].data);
-    } else {
-      // Scalar path. Use the caster with length 1 to place the casted value into
-      // the output
-      const auto& in_scalar = input.scalar_as<PrimitiveScalarBase>();
-      auto out_scalar = checked_cast<PrimitiveScalarBase*>(out->scalar().get());
-      caster(reinterpret_cast<const void*>(in_scalar.view().data()), /*in_offset=*/0,
-             /*length=*/1, /*out_offset=*/0, out_scalar->mutable_data());
+    const InT* in_values = arr.GetValues<InT>(1);
+    OutT* out_values = out->GetValues<OutT>(1);
+    for (int64_t i = 0; i < arr.length; ++i) {
+      *out_values++ = static_cast<OutT>(*in_values++);
     }
   }
 };
@@ -72,26 +50,14 @@ struct CastPrimitive {
 template <typename OutType, typename InType>
 struct CastPrimitive<OutType, InType, enable_if_t<std::is_same<OutType, InType>::value>> {
   // memcpy output
-  static void Exec(const ExecValue& input, ExecResult* out) {
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
     using T = typename InType::c_type;
-
-    if (input.is_array()) {
-      const ArraySpan& arr = input.array;
-      std::memcpy(out->array_span()->GetValues<T>(1), arr.GetValues<T>(1),
-                  arr.length * sizeof(T));
-    } else {
-      // Scalar path. Use the caster with length 1 to place the casted value into
-      // the output
-      const auto& in_scalar = input.scalar_as<PrimitiveScalarBase>();
-      auto out_scalar = checked_cast<PrimitiveScalarBase*>(out->scalar().get());
-      *reinterpret_cast<T*>(out_scalar->mutable_data()) =
-          *reinterpret_cast<const T*>(in_scalar.view().data());
-    }
+    std::memcpy(out->GetValues<T>(1), arr.GetValues<T>(1), arr.length * sizeof(T));
   }
 };
 
 template <typename InType>
-void CastNumberImpl(Type::type out_type, const ExecValue& input, ExecResult* out) {
+void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out) {
   switch (out_type) {
     case Type::INT8:
       return CastPrimitive<Int8Type, InType>::Exec(input, out);
@@ -121,7 +87,7 @@ void CastNumberImpl(Type::type out_type, const ExecValue& input, ExecResult* out
 }  // namespace
 
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
-                              const ExecValue& input, ExecResult* out) {
+                              const ArraySpan& input, ArraySpan* out) {
   switch (in_type) {
     case Type::INT8:
       return CastNumberImpl<Int8Type>(out_type, input, out);
@@ -152,89 +118,62 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
 // ----------------------------------------------------------------------
 
 Status UnpackDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  DCHECK(out->is_array_data());
-
   // TODO: is there an implementation more friendly to the "span" data structures?
 
   DictionaryArray dict_arr(batch[0].array.ToArrayData());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
 
   const auto& dict_type = *dict_arr.dictionary()->type();
-  if (!dict_type.Equals(options.to_type) && !CanCast(dict_type, *options.to_type)) {
-    return Status::Invalid("Cast type ", options.to_type->ToString(),
+  const DataType& to_type = *options.to_type;
+  if (!to_type.Equals(dict_type) && !CanCast(dict_type, to_type)) {
+    return Status::Invalid("Cast type ", to_type.ToString(),
                            " incompatible with dictionary type ", dict_type.ToString());
   }
 
-  Datum take_result;
-  ARROW_ASSIGN_OR_RAISE(take_result,
-                        Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()),
+  ARROW_ASSIGN_OR_RAISE(Datum unpacked,
+                        Take(dict_arr.dictionary(), dict_arr.indices(),
                              TakeOptions::Defaults(), ctx->exec_context()));
-
-  if (!dict_type.Equals(options.to_type)) {
-    ARROW_ASSIGN_OR_RAISE(take_result, Cast(take_result, options));
+  if (!dict_type.Equals(to_type)) {
+    ARROW_ASSIGN_OR_RAISE(unpacked, Cast(unpacked, options));
   }
-  out->value = std::move(take_result.array());
+  out->value = std::move(unpacked.array());
   return Status::OK();
 }
 
 Status OutputAllNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  if (out->is_scalar()) {
-    out->scalar()->is_valid = false;
-  } else {
-    // TODO(wesm): there is no good reason to have to use ArrayData here, so we
-    // should clean this up later. This is used in the dict<null>->null cast
-    DCHECK(out->is_array_data());
-    ArrayData* output = out->array_data().get();
-    output->buffers = {nullptr};
-    output->null_count = batch.length;
-  }
+  // TODO(wesm): there is no good reason to have to use ArrayData here, so we
+  // should clean this up later. This is used in the dict<null>->null cast
+  ArrayData* output = out->array_data().get();
+  output->buffers = {nullptr};
+  output->null_count = batch.length;
   return Status::OK();
 }
 
 Status CastFromExtension(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState*>(ctx->state())->options;
 
-  Datum result;
-  if (batch[0].is_scalar()) {
-    const auto& ext_scalar = checked_cast<const ExtensionScalar&>(*batch[0].scalar);
-    if (ext_scalar.is_valid) {
-      RETURN_NOT_OK(
-          Cast(ext_scalar.value, out->type()->Copy(), options, ctx->exec_context())
-              .Value(&result));
-    } else {
-      const auto& storage_type =
-          checked_cast<const ExtensionType&>(*ext_scalar.type).storage_type();
-      RETURN_NOT_OK(Cast(MakeNullScalar(storage_type), out->type()->Copy(), options,
-                         ctx->exec_context())
-                        .Value(&result));
-    }
-    out->value = std::move(result.scalar());
-  } else {
-    DCHECK(batch[0].is_array());
-    ExtensionArray extension(batch[0].array.ToArrayData());
-    std::shared_ptr<Array> result;
-    RETURN_NOT_OK(
-        Cast(*extension.storage(), out->type()->Copy(), options, ctx->exec_context())
-            .Value(&result));
-    out->value = std::move(result->data());
-  }
+  DCHECK(batch[0].is_array());
+  ExtensionArray extension(batch[0].array.ToArrayData());
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(Cast(*extension.storage(), out->type()->GetSharedPtr(), options,
+                     ctx->exec_context())
+                    .Value(&result));
+  out->value = std::move(result->data());
   return Status::OK();
 }
 
 Status CastFromNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   // TODO(wesm): handle this case more gracefully
-  if (!batch[0].is_scalar()) {
-    std::shared_ptr<Array> nulls;
-    RETURN_NOT_OK(MakeArrayOfNull(out->type()->Copy(), batch.length).Value(&nulls));
-    out->value = nulls->data();
-  }
+  std::shared_ptr<Array> nulls;
+  RETURN_NOT_OK(MakeArrayOfNull(out->type()->GetSharedPtr(), batch.length).Value(&nulls));
+  out->value = nulls->data();
   return Status::OK();
 }
 
-Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
-                                            const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveOutputFromOptions(KernelContext* ctx,
+                                            const std::vector<TypeHolder>&) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
-  return ValueDescr(options.to_type, args[0].shape);
+  return options.to_type;
 }
 
 /// You will see some of kernels with
@@ -250,8 +189,6 @@ OutputType kOutputTargetType(ResolveOutputFromOptions);
 
 Status ZeroCopyCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   // TODO(wesm): alternative strategy for zero copy casts after ARROW-16576
-  DCHECK(batch[0].is_array());
-  DCHECK(out->is_array_data());
   std::shared_ptr<ArrayData> input = batch[0].array.ToArrayData();
   ArrayData* output = out->array_data().get();
   output->length = input->length;
@@ -266,8 +203,7 @@ void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_ty
                      CastFunction* func) {
   auto sig = KernelSignature::Make({in_type}, out_type);
   ScalarKernel kernel;
-  kernel.exec = TrivialScalarUnaryAsArraysExec(ZeroCopyCastExec,
-                                               /*use_array_span=*/false);
+  kernel.exec = ZeroCopyCastExec;
   kernel.signature = sig;
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
@@ -295,9 +231,7 @@ void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* fun
     // XXX: Uses Take and does its own memory allocation for the moment. We can
     // fix this later.
     DCHECK_OK(func->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, out_ty,
-                              TrivialScalarUnaryAsArraysExec(UnpackDictionary,
-                                                             /*use_array_span=*/false),
-                              NullHandling::COMPUTED_NO_PREALLOCATE,
+                              UnpackDictionary, NullHandling::COMPUTED_NO_PREALLOCATE,
                               MemAllocation::NO_PREALLOCATE));
   }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
index 6a5f1067a20..4d9afab199c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
@@ -46,7 +46,7 @@ Status CastFromExtension(KernelContext* ctx, const ExecSpan& batch, ExecResult*
 
 // Utility for numeric casts
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
-                              const ExecValue& input, ExecResult* out);
+                              const ArraySpan& input, ArraySpan* out);
 
 // ----------------------------------------------------------------------
 // Dictionary to other things
@@ -58,13 +58,11 @@ Status OutputAllNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out)
 Status CastFromNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out);
 
 // Adds a cast function where CastFunctor is specialized and the input and output
-// types are parameter free (have a type_singleton). Scalar inputs are handled by
-// wrapping with TrivialScalarUnaryAsArraysExec.
+// types are parameter free (have a type_singleton).
 template <typename InType, typename OutType>
 void AddSimpleCast(InputType in_ty, OutputType out_ty, CastFunction* func) {
-  DCHECK_OK(func->AddKernel(
-      InType::type_id, {in_ty}, out_ty,
-      TrivialScalarUnaryAsArraysExec(CastFunctor<OutType, InType>::Exec)));
+  DCHECK_OK(func->AddKernel(InType::type_id, {in_ty}, out_ty,
+                            CastFunctor<OutType, InType>::Exec));
 }
 
 Status ZeroCopyCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out);
@@ -72,10 +70,9 @@ Status ZeroCopyCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
                      CastFunction* func);
 
-// OutputType::Resolver that returns a descr with the shape of the input
-// argument and the type from CastOptions
-Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
-                                            const std::vector<ValueDescr>& args);
+// OutputType::Resolver that returns a type the type from CastOptions
+Result<TypeHolder> ResolveOutputFromOptions(KernelContext* ctx,
+                                            const std::vector<TypeHolder>& args);
 
 ARROW_EXPORT extern OutputType kOutputTargetType;
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index beef99c8e5f..21af2275d82 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -75,21 +75,6 @@ struct CastList {
 
     auto child_type = checked_cast<const DestType&>(*out->type()).value_type();
 
-    if (out->is_scalar()) {
-      // The scalar case is simple, as only the underlying values must be cast
-      const auto& in_scalar = checked_cast<const BaseListScalar&>(*batch[0].scalar);
-      auto out_scalar = checked_cast<BaseListScalar*>(out->scalar().get());
-
-      DCHECK(!out_scalar->is_valid);
-      if (in_scalar.is_valid) {
-        ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type,
-                                                      options, ctx->exec_context()));
-
-        out_scalar->is_valid = true;
-      }
-      return Status::OK();
-    }
-
     const ArraySpan& in_array = batch[0].array;
     auto offsets = in_array.GetValues<src_offset_type>(1);
 
@@ -186,26 +171,6 @@ struct CastStruct {
           in_type.ToString(), " output fields: ", out_type.ToString());
     }
 
-    if (out->is_scalar()) {
-      const auto& in_scalar = checked_cast<const StructScalar&>(*batch[0].scalar);
-      auto out_scalar = checked_cast<StructScalar*>(out->scalar().get());
-
-      DCHECK(!out_scalar->is_valid);
-      if (in_scalar.is_valid) {
-        out_field_index = 0;
-        for (int field_index : fields_to_select) {
-          const auto& values = in_scalar.value[field_index];
-          const auto& target_type = out->type()->field(out_field_index++)->type();
-          ARROW_ASSIGN_OR_RAISE(Datum cast_values,
-                                Cast(values, target_type, options, ctx->exec_context()));
-          DCHECK_EQ(Datum::SCALAR, cast_values.kind());
-          out_scalar->value.push_back(cast_values.scalar());
-        }
-        out_scalar->is_valid = true;
-      }
-      return Status::OK();
-    }
-
     const ArraySpan& in_array = batch[0].array;
     ArrayData* out_array = out->array_data().get();
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index f8d72dc08cc..61e8e90dddc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -41,18 +41,16 @@ namespace internal {
 Status CastIntegerToInteger(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   if (!options.allow_int_overflow) {
-    if (batch[0].is_array()) {
-      RETURN_NOT_OK(IntegersCanFit(batch[0].array, *out->type()));
-    } else {
-      RETURN_NOT_OK(IntegersCanFit(*batch[0].scalar, *out->type()));
-    }
+    RETURN_NOT_OK(IntegersCanFit(batch[0].array, *out->type()));
   }
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
+                           out->array_span());
   return Status::OK();
 }
 
 Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult* out) {
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
+                           out->array_span());
   return Status::OK();
 }
 
@@ -63,7 +61,7 @@ Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult*
 template <typename InType, typename OutType, typename InT = typename InType::c_type,
           typename OutT = typename OutType::c_type>
 ARROW_DISABLE_UBSAN("float-cast-overflow")
-Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
+Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) {
   auto WasTruncated = [&](OutT out_val, InT in_val) -> bool {
     return static_cast<InT>(out_val) != in_val;
   };
@@ -72,31 +70,17 @@ Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
   };
   auto GetErrorMessage = [&](InT val) {
     return Status::Invalid("Float value ", val, " was truncated converting to ",
-                           *output.type());
+                           *output.type);
   };
 
-  if (input.is_scalar()) {
-    DCHECK(output.is_scalar());
-    const auto& in_scalar = input.scalar_as<typename TypeTraits<InType>::ScalarType>();
-    const auto& out_scalar =
-        checked_cast<typename TypeTraits<OutType>::ScalarType&>(*output.scalar());
-    if (WasTruncatedMaybeNull(out_scalar.value, in_scalar.value, out_scalar.is_valid)) {
-      return GetErrorMessage(in_scalar.value);
-    }
-    return Status::OK();
-  }
-
-  const ArraySpan& in_array = input.array;
-  const ArraySpan& out_array = *output.array_span();
-
-  const InT* in_data = in_array.GetValues<InT>(1);
-  const OutT* out_data = out_array.GetValues<OutT>(1);
+  const InT* in_data = input.GetValues<InT>(1);
+  const OutT* out_data = output.GetValues<OutT>(1);
 
-  const uint8_t* bitmap = in_array.buffers[0].data;
-  OptionalBitBlockCounter bit_counter(bitmap, in_array.offset, in_array.length);
+  const uint8_t* bitmap = input.buffers[0].data;
+  OptionalBitBlockCounter bit_counter(bitmap, input.offset, input.length);
   int64_t position = 0;
-  int64_t offset_position = in_array.offset;
-  while (position < in_array.length) {
+  int64_t offset_position = input.offset;
+  while (position < input.length) {
     BitBlockCount block = bit_counter.NextBlock();
     bool block_out_of_bounds = false;
     if (block.popcount == block.length) {
@@ -112,7 +96,7 @@ Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
       }
     }
     if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
-      if (in_array.GetNullCount() > 0) {
+      if (input.GetNullCount() > 0) {
         for (int64_t i = 0; i < block.length; ++i) {
           if (WasTruncatedMaybeNull(out_data[i], in_data[i],
                                     bit_util::GetBit(bitmap, offset_position + i))) {
@@ -136,8 +120,8 @@ Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
 }
 
 template <typename InType>
-Status CheckFloatToIntTruncationImpl(const ExecValue& input, const ExecResult& output) {
-  switch (output.type()->id()) {
+Status CheckFloatToIntTruncationImpl(const ArraySpan& input, const ArraySpan& output) {
+  switch (output.type->id()) {
     case Type::INT8:
       return CheckFloatTruncation<InType, Int8Type>(input, output);
     case Type::INT16:
@@ -164,9 +148,9 @@ Status CheckFloatToIntTruncationImpl(const ExecValue& input, const ExecResult& o
 Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& output) {
   switch (input.type()->id()) {
     case Type::FLOAT:
-      return CheckFloatToIntTruncationImpl<FloatType>(input, output);
+      return CheckFloatToIntTruncationImpl<FloatType>(input.array, *output.array_span());
     case Type::DOUBLE:
-      return CheckFloatToIntTruncationImpl<DoubleType>(input, output);
+      return CheckFloatToIntTruncationImpl<DoubleType>(input.array, *output.array_span());
     default:
       break;
   }
@@ -176,7 +160,8 @@ Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& outpu
 
 Status CastFloatingToInteger(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
+                           out->array_span());
   if (!options.allow_float_truncate) {
     RETURN_NOT_OK(CheckFloatToIntTruncation(batch[0], *out));
   }
@@ -265,7 +250,8 @@ Status CastIntegerToFloating(KernelContext* ctx, const ExecSpan& batch, ExecResu
   if (!options.allow_float_truncate) {
     RETURN_NOT_OK(CheckForIntegerToFloatingTruncation(batch[0], out_type));
   }
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0].array,
+                           out->array_span());
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index e6e0795ab16..dab91ac0346 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -51,10 +51,9 @@ struct NumericToStringCastFunctor {
   using FormatterType = StringFormatter<I>;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    DCHECK(out->is_array_data());
     const ArraySpan& input = batch[0].array;
     FormatterType formatter(input.type);
-    BuilderType builder(input.type->Copy(), ctx->memory_pool());
+    BuilderType builder(input.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(VisitArraySpanInline<I>(
         input,
         [&](value_type v) {
@@ -79,10 +78,9 @@ struct TemporalToStringCastFunctor {
   using FormatterType = StringFormatter<I>;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    DCHECK(out->is_array_data());
     const ArraySpan& input = batch[0].array;
     FormatterType formatter(input.type);
-    BuilderType builder(input.type->Copy(), ctx->memory_pool());
+    BuilderType builder(input.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(VisitArraySpanInline<I>(
         input,
         [&](value_type v) {
@@ -104,11 +102,10 @@ struct TemporalToStringCastFunctor<O, TimestampType> {
   using FormatterType = StringFormatter<TimestampType>;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    DCHECK(out->is_array_data());
     const ArraySpan& input = batch[0].array;
     const auto& timezone = GetInputTimezone(*input.type);
     const auto& ty = checked_cast<const TimestampType&>(*input.type);
-    BuilderType builder(input.type->Copy(), ctx->memory_pool());
+    BuilderType builder(input.type->GetSharedPtr(), ctx->memory_pool());
 
     // Preallocate
     int64_t string_length = 19;  // YYYY-MM-DD HH:MM:SS
@@ -265,7 +262,6 @@ template <typename O, typename I>
 enable_if_base_binary<I, Status> BinaryToBinaryCastExec(KernelContext* ctx,
                                                         const ExecSpan& batch,
                                                         ExecResult* out) {
-  DCHECK(out->is_array_data());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
@@ -287,7 +283,6 @@ enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
                 !std::is_same<O, FixedSizeBinaryType>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  DCHECK(out->is_array_data());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
@@ -323,14 +318,31 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
         arrow::internal::CopyBitmap(ctx->memory_pool(), input.buffers[0].data,
                                     input.offset, input.length));
   }
-  // Data buffer (index 1) for FWBinary becomes data buffer for
-  // VarBinary (index 2)
-  output->buffers[2] = input.GetBuffer(1);
+
+  // This buffer is preallocated
   output_offset_type* offsets = output->GetMutableValues<output_offset_type>(1);
   offsets[0] = static_cast<output_offset_type>(input.offset * width);
   for (int64_t i = 0; i < input.length; i++) {
     offsets[i + 1] = offsets[i] + width;
   }
+
+  // Data buffer (index 1) for FWBinary becomes data buffer for VarBinary
+  // (index 2). After ARROW-16757, we need to copy this memory instead of
+  // zero-copy it because a Scalar value promoted to an ArraySpan may be
+  // referencing a temporary buffer whose scope does not extend beyond the
+  // kernel execution. In that scenario, the validity bitmap above can be
+  // zero-copied because it points to static memory (either a byte with a 1 or
+  // a 0 depending on whether the value is null or not).
+  std::shared_ptr<Buffer> input_data = input.GetBuffer(1);
+  if (input_data != nullptr) {
+    ARROW_ASSIGN_OR_RAISE(output->buffers[2], input_data->CopySlice(0, input_data->size(),
+                                                                    ctx->memory_pool()));
+  } else {
+    // TODO(wesm): it should already be nullptr, so we may be able to remove
+    // this
+    output->buffers[2] = nullptr;
+  }
+
   return Status::OK();
 }
 
@@ -339,14 +351,13 @@ enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
                 std::is_same<O, FixedSizeBinaryType>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  DCHECK(out->is_array_data());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const int32_t in_width = batch[0].type()->byte_width();
   const int32_t out_width =
       checked_cast<const FixedSizeBinaryType&>(*options.to_type).byte_width();
   if (in_width != out_width) {
     return Status::Invalid("Failed casting from ", batch[0].type()->ToString(), " to ",
-                           options.to_type->ToString(), ": widths must match");
+                           options.to_type.ToString(), ": widths must match");
   }
   return ZeroCopyCastExec(ctx, batch, out);
 }
@@ -363,17 +374,13 @@ void AddNumberToStringCasts(CastFunction* func) {
   auto out_ty = TypeTraits<OutType>::type_singleton();
 
   DCHECK_OK(func->AddKernel(Type::BOOL, {boolean()}, out_ty,
-                            TrivialScalarUnaryAsArraysExec(
-                                NumericToStringCastFunctor<OutType, BooleanType>::Exec,
-                                /*use_array_span=*/false),
+                            NumericToStringCastFunctor<OutType, BooleanType>::Exec,
                             NullHandling::COMPUTED_NO_PREALLOCATE));
 
   for (const std::shared_ptr<DataType>& in_ty : NumericTypes()) {
     DCHECK_OK(
         func->AddKernel(in_ty->id(), {in_ty}, out_ty,
-                        TrivialScalarUnaryAsArraysExec(
-                            GenerateNumeric<NumericToStringCastFunctor, OutType>(*in_ty),
-                            /*use_array_span=*/false),
+                        GenerateNumeric<NumericToStringCastFunctor, OutType>(*in_ty),
                         NullHandling::COMPUTED_NO_PREALLOCATE));
   }
 }
@@ -382,12 +389,10 @@ template <typename OutType>
 void AddTemporalToStringCasts(CastFunction* func) {
   auto out_ty = TypeTraits<OutType>::type_singleton();
   for (const std::shared_ptr<DataType>& in_ty : TemporalTypes()) {
-    DCHECK_OK(func->AddKernel(
-        in_ty->id(), {InputType(in_ty->id())}, out_ty,
-        TrivialScalarUnaryAsArraysExec(
-            GenerateTemporal<TemporalToStringCastFunctor, OutType>(*in_ty),
-            /*use_array_span=*/false),
-        NullHandling::COMPUTED_NO_PREALLOCATE));
+    DCHECK_OK(
+        func->AddKernel(in_ty->id(), {InputType(in_ty->id())}, out_ty,
+                        GenerateTemporal<TemporalToStringCastFunctor, OutType>(*in_ty),
+                        NullHandling::COMPUTED_NO_PREALLOCATE));
   }
 }
 
@@ -395,11 +400,9 @@ template <typename OutType, typename InType>
 void AddBinaryToBinaryCast(CastFunction* func) {
   auto out_ty = TypeTraits<OutType>::type_singleton();
 
-  DCHECK_OK(func->AddKernel(
-      InType::type_id, {InputType(InType::type_id)}, out_ty,
-      TrivialScalarUnaryAsArraysExec(BinaryToBinaryCastExec<OutType, InType>,
-                                     /*use_array_span=*/false),
-      NullHandling::COMPUTED_NO_PREALLOCATE));
+  DCHECK_OK(func->AddKernel(InType::type_id, {InputType(InType::type_id)}, out_ty,
+                            BinaryToBinaryCastExec<OutType, InType>,
+                            NullHandling::COMPUTED_NO_PREALLOCATE));
 }
 
 template <typename OutType>
@@ -443,9 +446,7 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   DCHECK_OK(cast_fsb->AddKernel(
       Type::FIXED_SIZE_BINARY, {InputType(Type::FIXED_SIZE_BINARY)},
       OutputType(FirstType),
-      TrivialScalarUnaryAsArraysExec(
-          BinaryToBinaryCastExec<FixedSizeBinaryType, FixedSizeBinaryType>,
-          /*use_array_span=*/false),
+      BinaryToBinaryCastExec<FixedSizeBinaryType, FixedSizeBinaryType>,
       NullHandling::COMPUTED_NO_PREALLOCATE));
 
   return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb};
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index 2c04efa57d9..7625fd7f786 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -447,8 +447,7 @@ struct CastFunctor<TimestampType, I, enable_if_t<is_base_binary_type<I>::value>>
 template <typename Type>
 void AddCrossUnitCast(CastFunction* func) {
   ScalarKernel kernel;
-  kernel.exec = TrivialScalarUnaryAsArraysExec(CastFunctor<Type, Type>::Exec,
-                                               /*use_array_span=*/true);
+  kernel.exec = CastFunctor<Type, Type>::Exec;
   kernel.signature = KernelSignature::Make({InputType(Type::type_id)}, kOutputTargetType);
   DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index b800299658b..963748c9f97 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -110,7 +110,7 @@ static void CheckCast(std::shared_ptr<Array> input, std::shared_ptr<Array> expec
 
 static void CheckCastFails(std::shared_ptr<Array> input, CastOptions options) {
   ASSERT_RAISES(Invalid, Cast(input, options))
-      << "\n  to_type:   " << options.to_type->ToString()
+      << "\n  to_type:   " << options.to_type.ToString()
       << "\n  from_type: " << input->type()->ToString()
       << "\n  input:     " << input->ToString();
 
@@ -1798,7 +1798,7 @@ TEST(Cast, UnsupportedTargetType) {
   const auto to_type = dense_union({field("a", int32())});
 
   // Try through concrete API
-  const char* expected_message = "Unsupported cast from int32 to dense_union";
+  const char* expected_message = "Unsupported cast to dense_union<a: int32=0> from int32";
   EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
                                   Cast(*arr, to_type));
 
@@ -2031,7 +2031,10 @@ TEST(Cast, BinaryToString) {
 
     // N.B. null buffer is not always the same if input sliced
     AssertBufferSame(*invalid_utf8, *strings, 0);
-    ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+
+    // ARROW-16757: we no longer zero copy, but the contents are equal
+    ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
   }
 }
 
@@ -2065,7 +2068,10 @@ TEST(Cast, BinaryOrStringToBinary) {
 
     // N.B. null buffer is not always the same if input sliced
     AssertBufferSame(*invalid_utf8, *strings, 0);
-    ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+
+    // ARROW-16757: we no longer zero copy, but the contents are equal
+    ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
 
     // invalid utf-8 masked by a null bit is not an error
     CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 83585a816c4..07778ca1136 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -192,50 +192,50 @@ void AddGenericCompare(const std::shared_ptr<DataType>& ty, ScalarFunction* func
 struct CompareFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
-    if (HasDecimal(*values)) {
-      RETURN_NOT_OK(CastBinaryDecimalArgs(DecimalPromotion::kAdd, values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
+    if (HasDecimal(*types)) {
+      RETURN_NOT_OK(CastBinaryDecimalArgs(DecimalPromotion::kAdd, types));
     }
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
-    ReplaceNullWithOtherType(values);
+    EnsureDictionaryDecoded(types);
+    ReplaceNullWithOtherType(types);
 
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
-    } else if (auto type = CommonTemporal(values->data(), values->size())) {
-      ReplaceTypes(type, values);
-    } else if (auto type = CommonBinary(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
+    } else if (auto type = CommonTemporal(types->data(), types->size())) {
+      ReplaceTypes(type, types);
+    } else if (auto type = CommonBinary(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
 struct VarArgsCompareFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
-    } else if (auto type = CommonTemporal(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
+    } else if (auto type = CommonTemporal(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -376,14 +376,9 @@ struct ScalarMinMax {
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
-    const auto descrs = batch.GetDescriptors();
     const size_t scalar_count = static_cast<size_t>(
         std::count_if(batch.values.begin(), batch.values.end(),
                       [](const ExecValue& v) { return v.is_scalar(); }));
-    if (scalar_count == batch.values.size()) {
-      ExecScalar(batch, options, out->scalar().get());
-      return Status::OK();
-    }
 
     ArrayData* output = out->array_data().get();
 
@@ -397,7 +392,7 @@ struct ScalarMinMax {
     bool initialize_output = true;
     if (scalar_count > 0) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> temp_scalar,
-                            MakeScalar(out->type()->Copy(), 0));
+                            MakeScalar(out->type()->GetSharedPtr(), 0));
       ExecScalar(batch, options, temp_scalar.get());
       if (temp_scalar->is_valid) {
         const auto value = UnboxScalar<OutType>::Unbox(*temp_scalar);
@@ -486,47 +481,6 @@ struct ScalarMinMax {
   }
 };
 
-template <typename Op>
-Status ExecBinaryMinMaxScalar(KernelContext* ctx,
-                              const ElementWiseAggregateOptions& options,
-                              const ExecSpan& batch, ExecResult* out) {
-  if (batch.values.empty()) {
-    return Status::OK();
-  }
-  auto output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-  if (!options.skip_nulls) {
-    // any nulls in the input will produce a null output
-    for (const ExecValue& value : batch.values) {
-      if (!value.scalar->is_valid) {
-        output->is_valid = false;
-        return Status::OK();
-      }
-    }
-  }
-  const auto& first_scalar = *batch.values.front().scalar;
-  string_view result = checked_cast<const BaseBinaryScalar&>(first_scalar).view();
-  bool valid = first_scalar.is_valid;
-  for (int i = 1; i < batch.num_values(); i++) {
-    const Scalar& scalar = *batch[i].scalar;
-    if (!scalar.is_valid) {
-      DCHECK(options.skip_nulls);
-      continue;
-    } else {
-      string_view value = checked_cast<const BaseBinaryScalar&>(scalar).view();
-      result = !valid ? value : Op::Call(result, value);
-      valid = true;
-    }
-  }
-  if (valid) {
-    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(result.size()));
-    std::copy(result.begin(), result.end(), output->value->mutable_data());
-    output->is_valid = true;
-  } else {
-    output->is_valid = false;
-  }
-  return Status::OK();
-}
-
 template <typename Type, typename Op>
 struct BinaryScalarMinMax {
   using ArrayType = typename TypeTraits<Type>::ArrayType;
@@ -535,15 +489,6 @@ struct BinaryScalarMinMax {
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
-    if (batch.is_all_scalar()) {
-      return ExecBinaryMinMaxScalar<Op>(ctx, options, batch, out);
-    }
-    return ExecContainingArrays(ctx, options, batch, out);
-  }
-
-  static Status ExecContainingArrays(KernelContext* ctx,
-                                     const ElementWiseAggregateOptions& options,
-                                     const ExecSpan& batch, ExecResult* out) {
     // Presize data to avoid reallocations, using an estimation of final size.
     int64_t estimated_final_size = EstimateOutputSize(batch);
     BuilderType builder(ctx->memory_pool());
@@ -591,7 +536,7 @@ struct BinaryScalarMinMax {
     std::shared_ptr<Array> string_array;
     RETURN_NOT_OK(builder.Finish(&string_array));
     out->value = std::move(string_array->data());
-    out->array_data()->type = batch[0].type()->Copy();
+    out->array_data()->type = batch[0].type()->GetSharedPtr();
     DCHECK_EQ(batch.length, out->array_data()->length);
     return Status::OK();
   }
@@ -620,21 +565,12 @@ template <typename Op>
 struct FixedSizeBinaryScalarMinMax {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
-    if (batch.is_all_scalar()) {
-      return ExecBinaryMinMaxScalar<Op>(ctx, options, batch, out);
-    }
-    return ExecContainingArrays(ctx, options, batch, out);
-  }
-
-  static Status ExecContainingArrays(KernelContext* ctx,
-                                     const ElementWiseAggregateOptions& options,
-                                     const ExecSpan& batch, ExecResult* out) {
     const DataType* batch_type = batch[0].type();
     const auto binary_type = checked_cast<const FixedSizeBinaryType*>(batch_type);
     int32_t byte_width = binary_type->byte_width();
     // Presize data to avoid reallocations.
     int64_t estimated_final_size = batch.length * byte_width;
-    FixedSizeBinaryBuilder builder(batch_type->Copy());
+    FixedSizeBinaryBuilder builder(batch_type->GetSharedPtr());
     RETURN_NOT_OK(builder.Reserve(batch.length));
     RETURN_NOT_OK(builder.ReserveData(estimated_final_size));
 
@@ -645,13 +581,17 @@ struct FixedSizeBinaryScalarMinMax {
         result = result.empty() ? value : Op::Call(result, value);
       };
 
+      int num_valid_values = 0;
       for (int col = 0; col < batch.num_values(); col++) {
         if (batch[col].is_scalar()) {
           const Scalar& scalar = *batch[col].scalar;
           if (scalar.is_valid) {
             visit_value(UnboxScalar<FixedSizeBinaryType>::Unbox(scalar));
+            num_valid_values += 1;
           } else if (!options.skip_nulls) {
-            result = string_view();
+            // If we encounter a null, exit the loop and mark num_row_values to
+            // be 0 so we append a null
+            num_valid_values = 0;
             break;
           }
         } else {
@@ -661,14 +601,17 @@ struct FixedSizeBinaryScalarMinMax {
             const auto data = array.GetValues<uint8_t>(1, /*absolute_offset=*/0);
             visit_value(string_view(
                 reinterpret_cast<const char*>(data) + row * byte_width, byte_width));
+            num_valid_values += 1;
           } else if (!options.skip_nulls) {
-            result = string_view();
+            // If we encounter a null, exit the loop and mark num_row_values to
+            // be 0 so we append a null
+            num_valid_values = 0;
             break;
           }
         }
       }
 
-      if (result.empty()) {
+      if (num_valid_values == 0) {
         builder.UnsafeAppendNull();
       } else {
         builder.UnsafeAppend(result);
@@ -678,26 +621,26 @@ struct FixedSizeBinaryScalarMinMax {
     std::shared_ptr<Array> string_array;
     RETURN_NOT_OK(builder.Finish(&string_array));
     out->value = std::move(string_array->data());
-    out->array_data()->type = batch[0].type()->Copy();
+    out->array_data()->type = batch[0].type()->GetSharedPtr();
     DCHECK_EQ(batch.length, out->array_data()->length);
     return Status::OK();
   }
 };
 
-Result<ValueDescr> ResolveMinOrMaxOutputType(KernelContext*,
-                                             const std::vector<ValueDescr>& args) {
-  if (args.empty()) {
+Result<TypeHolder> ResolveMinOrMaxOutputType(KernelContext*,
+                                             const std::vector<TypeHolder>& types) {
+  if (types.empty()) {
     return null();
   }
-  auto first_type = args[0].type;
-  for (size_t i = 1; i < args.size(); ++i) {
-    auto type = args[i].type;
+  auto first_type = types[0].type;
+  for (size_t i = 1; i < types.size(); ++i) {
+    auto type = types[i].type;
     if (*type != *first_type) {
       return Status::NotImplemented(
           "Different input types not supported for {min, max}_element_wise");
     }
   }
-  return ValueDescr(first_type, GetBroadcastShape(args));
+  return first_type;
 }
 
 template <typename Op>
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 1c555771f51..672a8b27977 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -371,16 +371,6 @@ Status RunIfElseScalar(const BooleanScalar& cond, const ExecValue& left,
                        const ExecValue& right, ExecResult* out,
                        const CopyArrayData& copy_array_data,
                        const BroadcastScalar& broadcast_scalar) {
-  if (left.is_scalar() && right.is_scalar()) {  // output will be a scalar
-    if (cond.is_valid) {
-      const Scalar* which_scalar = cond.value ? left.scalar : right.scalar;
-      out->value = which_scalar->Copy();
-    } else {
-      out->value = MakeNullScalar(left.type()->Copy());
-    }
-    return Status::OK();
-  }
-
   // either left or right is an array. Output is always an array`
   ArraySpan* out_array = out->array_span();
   if (!cond.is_valid) {
@@ -390,6 +380,8 @@ Status RunIfElseScalar(const BooleanScalar& cond, const ExecValue& left,
     return Status::OK();
   }
 
+  // One of left or right is an array
+
   // cond is a non-null scalar
   const auto& valid_data = cond.value ? left : right;
   if (valid_data.is_array()) {
@@ -653,22 +645,13 @@ struct IfElseFunctor<Type, enable_if_boolean<Type>> {
 static Status IfElseGenericSXXCall(KernelContext* ctx, const BooleanScalar& cond,
                                    const ExecValue& left, const ExecValue& right,
                                    ExecResult* out) {
-  if (left.is_scalar() && right.is_scalar()) {
-    if (cond.is_valid) {
-      const Scalar* which_scalar = cond.value ? left.scalar : right.scalar;
-      out->value = which_scalar->Copy();
-    } else {
-      out->value = MakeNullScalar(left.type()->Copy());
-    }
-    return Status::OK();
-  }
-  // either left or right is an array. Output is always an array
+  // Either left or right is an array
   int64_t out_arr_len = std::max(left.length(), right.length());
   if (!cond.is_valid) {
     // cond is null; just create a null array
     ARROW_ASSIGN_OR_RAISE(
         std::shared_ptr<Array> result,
-        MakeArrayOfNull(left.type()->Copy(), out_arr_len, ctx->memory_pool()));
+        MakeArrayOfNull(left.type()->GetSharedPtr(), out_arr_len, ctx->memory_pool()));
     out->value = std::move(result->data());
     return Status::OK();
   }
@@ -1063,8 +1046,8 @@ struct NestedIfElseExec {
   static Status RunLoop(KernelContext* ctx, const ArraySpan& cond, ExecResult* out,
                         HandleLeft&& handle_left, HandleRight&& handle_right) {
     std::unique_ptr<ArrayBuilder> raw_builder;
-    RETURN_NOT_OK(
-        MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+    RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                        &raw_builder));
     RETURN_NOT_OK(raw_builder->Reserve(out->length()));
 
     const auto* cond_data = cond.buffers[1].data;
@@ -1169,14 +1152,9 @@ struct ResolveIfElseExec {
 template <typename AllocateMem>
 struct ResolveIfElseExec<NullType, AllocateMem> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    // if all are scalars, return a null scalar
-    if (batch[0].is_scalar() && batch[1].is_scalar() && batch[2].is_scalar()) {
-      out->value = MakeNullScalar(null());
-    } else {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> result,
-                            MakeArrayOfNull(null(), batch.length, ctx->memory_pool()));
-      out->value = std::move(result->data());
-    }
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> result,
+                          MakeArrayOfNull(null(), batch.length, ctx->memory_pool()));
+    out->value = std::move(result->data());
     return Status::OK();
   }
 };
@@ -1184,29 +1162,29 @@ struct ResolveIfElseExec<NullType, AllocateMem> {
 struct IfElseFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
     // Do not DispatchExact here because it'll let through something like (bool,
     // timestamp[s], timestamp[s, "UTC"])
 
-    // if 0th descriptor is null, replace with bool
-    if (values->at(0).type->id() == Type::NA) {
-      values->at(0).type = boolean();
+    // if 0th type is null, replace with bool
+    if (types->at(0).id() == Type::NA) {
+      (*types)[0] = boolean();
     }
 
-    // if-else 0'th descriptor is bool, so skip it
-    ValueDescr* left_arg = &(*values)[1];
+    // if-else 0'th type is bool, so skip it
+    TypeHolder* left_arg = &(*types)[1];
     constexpr size_t num_args = 2;
 
     internal::ReplaceNullWithOtherType(left_arg, num_args);
 
     // If both are identical dictionary types, dispatch to the dictionary kernel
     // TODO(ARROW-14105): apply implicit casts to dictionary types too
-    ValueDescr* right_arg = &(*values)[2];
-    if (is_dictionary(left_arg->type->id()) && left_arg->type->Equals(right_arg->type)) {
-      auto kernel = DispatchExactImpl(this, *values);
+    TypeHolder* right_arg = &(*types)[2];
+    if (is_dictionary(left_arg->id()) && left_arg->type->Equals(*right_arg->type)) {
+      auto kernel = DispatchExactImpl(this, *types);
       DCHECK(kernel);
       return kernel;
     }
@@ -1219,13 +1197,13 @@ struct IfElseFunction : ScalarFunction {
       internal::ReplaceTypes(type, left_arg, num_args);
     } else if (auto type = internal::CommonBinary(left_arg, num_args)) {
       internal::ReplaceTypes(type, left_arg, num_args);
-    } else if (HasDecimal(*values)) {
+    } else if (HasDecimal(*types)) {
       RETURN_NOT_OK(CastDecimalArgs(left_arg, num_args));
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1252,7 +1230,7 @@ void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_fun
       auto unit = checked_cast<const TimestampType&>(*type).unit();
       sig = KernelSignature::Make(
           {boolean(), match::TimestampTypeUnit(unit), match::TimestampTypeUnit(unit)},
-          OutputType(LastType));
+          LastType);
     } else {
       sig = KernelSignature::Make({boolean(), type, type}, type);
     }
@@ -1285,8 +1263,7 @@ void AddBinaryIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_functi
 template <typename T>
 void AddFixedWidthIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
   auto type_id = T::type_id;
-  ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)},
-                      OutputType(LastType),
+  ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)}, LastType,
                       ResolveIfElseExec<T, /*AllocateMem=*/std::false_type>::Exec);
   kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::PREALLOCATE;
@@ -1299,8 +1276,8 @@ void AddNestedIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_functi
   for (const auto type_id :
        {Type::LIST, Type::LARGE_LIST, Type::FIXED_SIZE_LIST, Type::STRUCT,
         Type::DENSE_UNION, Type::SPARSE_UNION, Type::DICTIONARY}) {
-    ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)},
-                        OutputType(LastType), NestedIfElseExec::Exec);
+    ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)}, LastType,
+                        NestedIfElseExec::Exec);
     kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
     kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
     kernel.can_write_into_slices = false;
@@ -1384,21 +1361,21 @@ void CopyOneValue(const ExecValue& in_values, const int64_t in_offset, uint8_t*
 struct CaseWhenFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
     // The first function is a struct of booleans, where the number of fields in the
     // struct is either equal to the number of other arguments or is one less.
-    RETURN_NOT_OK(CheckArity(*values));
-    auto first_type = (*values)[0].type;
+    RETURN_NOT_OK(CheckArity(types->size()));
+    auto first_type = (*types)[0].type;
     if (first_type->id() != Type::STRUCT) {
       return Status::TypeError("case_when: first argument must be STRUCT, not ",
                                *first_type);
     }
     auto num_fields = static_cast<size_t>(first_type->num_fields());
-    if (num_fields < values->size() - 2 || num_fields >= values->size()) {
+    if (num_fields < types->size() - 2 || num_fields >= types->size()) {
       return Status::Invalid(
           "case_when: number of struct fields must be equal to or one less than count of "
           "remaining arguments (",
-          values->size() - 1, "), got: ", first_type->num_fields());
+          types->size() - 1, "), got: ", first_type->num_fields());
     }
     for (const auto& field : first_type->fields()) {
       if (field->type()->id() != Type::BOOL) {
@@ -1409,18 +1386,17 @@ struct CaseWhenFunction : ScalarFunction {
     }
 
     // TODO(ARROW-14105): also apply casts to dictionary indices/values
-    if (is_dictionary((*values)[1].type->id()) &&
-        std::all_of(values->begin() + 2, values->end(), [&](const ValueDescr& descr) {
-          return descr.type->Equals(*(*values)[1].type);
-        })) {
-      auto kernel = DispatchExactImpl(this, *values);
+    if (is_dictionary((*types)[1].id()) &&
+        std::all_of(types->begin() + 2, types->end(),
+                    [&](const TypeHolder& type) { return type == (*types)[1]; })) {
+      auto kernel = DispatchExactImpl(this, *types);
       DCHECK(kernel);
       return kernel;
     }
 
-    EnsureDictionaryDecoded(values);
-    ValueDescr* first_arg = &(*values)[1];
-    const size_t num_args = values->size() - 1;
+    EnsureDictionaryDecoded(types);
+    TypeHolder* first_arg = &(*types)[1];
+    const size_t num_args = types->size() - 1;
     if (auto type = CommonNumeric(first_arg, num_args)) {
       ReplaceTypes(type, first_arg, num_args);
     }
@@ -1430,11 +1406,11 @@ struct CaseWhenFunction : ScalarFunction {
     if (auto type = CommonTemporal(first_arg, num_args)) {
       ReplaceTypes(type, first_arg, num_args);
     }
-    if (HasDecimal(*values)) {
+    if (HasDecimal(*types)) {
       RETURN_NOT_OK(CastDecimalArgs(first_arg, num_args));
     }
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1462,16 +1438,11 @@ Status ExecScalarCaseWhen(KernelContext* ctx, const ExecSpan& batch, ExecResult*
       break;
     }
   }
-  if (out->is_scalar()) {
-    out->value =
-        result.is_scalar() ? result.scalar->Copy() : MakeNullScalar(out->type()->Copy());
-    return Status::OK();
-  }
 
   std::shared_ptr<Scalar> temp;
   if (!has_result) {
     // All conditions false, no 'else' argument
-    temp = MakeNullScalar(out->type()->Copy());
+    temp = MakeNullScalar(out->type()->GetSharedPtr());
     result = temp.get();
   }
 
@@ -1517,7 +1488,9 @@ template <typename Type>
 Status ExecArrayCaseWhen(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const ArraySpan& conds_array = batch[0].array;
   if (conds_array.GetNullCount() > 0) {
-    return Status::Invalid("cond struct must not have top-level nulls");
+    return Status::Invalid(
+        "cond struct must not be a null scalar or "
+        "have top-level nulls");
   }
   ArraySpan* output = out->array_span();
   const int64_t out_offset = output->offset;
@@ -1667,17 +1640,11 @@ Status ExecVarWidthScalarCaseWhen(KernelContext* ctx, const ExecSpan& batch,
       break;
     }
   }
-  if (out->is_scalar()) {
-    DCHECK(result.is_scalar() || !has_result);
-    out->value =
-        result.is_scalar() ? result.scalar->Copy() : MakeNullScalar(out->type()->Copy());
-    return Status::OK();
-  }
   if (!has_result) {
     // All conditions false, no 'else' argument
     ARROW_ASSIGN_OR_RAISE(
         std::shared_ptr<Array> array,
-        MakeArrayOfNull(out->type()->Copy(), batch.length, ctx->memory_pool()));
+        MakeArrayOfNull(out->type()->GetSharedPtr(), batch.length, ctx->memory_pool()));
     out->value = std::move(array->data());
   } else if (result.is_scalar()) {
     ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*result.scalar, batch.length,
@@ -1697,8 +1664,8 @@ static Status ExecVarWidthArrayCaseWhenImpl(
   const ArraySpan& conds_array = batch[0].array;
   const bool have_else_arg = conds_array.type->num_fields() < (batch.num_values() - 1);
   std::unique_ptr<ArrayBuilder> raw_builder;
-  RETURN_NOT_OK(
-      MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+  RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                      &raw_builder));
   RETURN_NOT_OK(raw_builder->Reserve(batch.length));
   RETURN_NOT_OK(reserve_data(raw_builder.get()));
 
@@ -1920,10 +1887,6 @@ struct CaseWhenFunctor<Type, enable_if_union<Type>> {
     if (batch[0].is_scalar()) {
       return ExecVarWidthScalarCaseWhen(ctx, batch, out);
     }
-    return ExecArray(ctx, batch, out);
-  }
-
-  static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     return ExecVarWidthArrayCaseWhen(ctx, batch, out, ReserveNoData);
   }
 };
@@ -1949,50 +1912,38 @@ struct CaseWhenFunctor<DictionaryType> {
 struct CoalesceFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
     using arrow::compute::detail::DispatchExactImpl;
 
     // TODO(ARROW-14105): also apply casts to dictionary indices/values
-    if (is_dictionary((*values)[0].type->id()) &&
-        std::all_of(values->begin() + 1, values->end(), [&](const ValueDescr& descr) {
-          return descr.type->Equals(*(*values)[0].type);
-        })) {
-      auto kernel = DispatchExactImpl(this, *values);
+    if (is_dictionary((*types)[0].id()) &&
+        std::all_of(types->begin() + 1, types->end(),
+                    [&](const TypeHolder& type) { return type == (*types)[0]; })) {
+      auto kernel = DispatchExactImpl(this, *types);
       DCHECK(kernel);
       return kernel;
     }
 
     // Do not DispatchExact here since we want to rescale decimals if necessary
-    EnsureDictionaryDecoded(values);
-    if (auto type = CommonNumeric(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    EnsureDictionaryDecoded(types);
+    if (auto type = CommonNumeric(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
-    if (auto type = CommonBinary(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonBinary(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
-    if (auto type = CommonTemporal(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonTemporal(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
-    if (HasDecimal(*values)) {
-      RETURN_NOT_OK(CastDecimalArgs(values->data(), values->size()));
+    if (HasDecimal(*types)) {
+      RETURN_NOT_OK(CastDecimalArgs(types->data(), types->size()));
     }
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
-// Implement a 'coalesce' (SQL) operator for any number of scalar inputs
-Status ExecScalarCoalesce(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  for (const auto& value : batch.values) {
-    if (value.scalar->is_valid) {
-      out->value = value.scalar->Copy();
-      break;
-    }
-  }
-  return Status::OK();
-}
-
 // Helper: copy from a source value into all null slots of the output
 template <typename Type>
 void CopyValuesAllValid(const ExecValue& source, uint8_t* out_valid, uint8_t* out_values,
@@ -2187,13 +2138,6 @@ Status ExecArrayScalarCoalesce(KernelContext* ctx, const ExecValue& left,
 template <typename Type>
 Status ExecBinaryCoalesce(KernelContext* ctx, const ExecValue& left,
                           const ExecValue& right, int64_t length, ExecResult* out) {
-  // TODO(wesm): remove the scalar output path
-  if (left.is_scalar() && right.is_scalar()) {
-    // Both scalar
-    out->value = (left.scalar->is_valid ? left : right).scalar->Copy();
-    return Status::OK();
-  }
-
   ArraySpan* output = out->array_span();
   const int64_t out_offset = output->offset;
   uint8_t* out_valid = output->buffers[0].data;
@@ -2275,8 +2219,8 @@ static Status ExecVarWidthCoalesceImpl(KernelContext* ctx, const ExecSpan& batch
     break;
   }
   std::unique_ptr<ArrayBuilder> raw_builder;
-  RETURN_NOT_OK(
-      MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+  RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                      &raw_builder));
   RETURN_NOT_OK(raw_builder->Reserve(batch.length));
   RETURN_NOT_OK(reserve_data(raw_builder.get()));
 
@@ -2303,7 +2247,7 @@ static Status ExecVarWidthCoalesceImpl(KernelContext* ctx, const ExecSpan& batch
   }
   ARROW_ASSIGN_OR_RAISE(auto temp_output, raw_builder->Finish());
   out->value = std::move(temp_output->data());
-  out->array_data()->type = batch[0].type()->Copy();
+  out->array_data()->type = batch[0].type()->GetSharedPtr();
   return Status::OK();
 }
 
@@ -2326,12 +2270,7 @@ struct CoalesceFunctor {
     if (batch.num_values() == 2) {
       return ExecBinaryCoalesce<Type>(ctx, batch[0], batch[1], batch.length, out);
     }
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArrayCoalesce<Type>(ctx, batch, out);
-      }
-    }
-    return ExecScalarCoalesce(ctx, batch, out);
+    return ExecArrayCoalesce<Type>(ctx, batch, out);
   }
 };
 
@@ -2352,12 +2291,7 @@ struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
       // Specialized implementation for common case ('fill_null' operation)
       return ExecArrayScalar(ctx, batch[0].array, *batch[1].scalar, out);
     }
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArray(ctx, batch, out);
-      }
-    }
-    return ExecScalarCoalesce(ctx, batch, out);
+    return ExecArray(ctx, batch, out);
   }
 
   static Status ExecArrayScalar(KernelContext* ctx, const ArraySpan& left,
@@ -2368,7 +2302,7 @@ struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
       out->value = left.ToArrayData();
       return Status::OK();
     }
-    BuilderType builder(left.type->Copy(), ctx->memory_pool());
+    BuilderType builder(left.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(left.length));
     const auto& scalar = checked_cast<const BaseBinaryScalar&>(right);
     const offset_type* offsets = left.GetValues<offset_type>(1);
@@ -2387,7 +2321,7 @@ struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
 
     ARROW_ASSIGN_OR_RAISE(auto temp_output, builder.Finish());
     out->value = std::move(temp_output->data());
-    out->array_data()->type = left.type->Copy();
+    out->array_data()->type = left.type->GetSharedPtr();
     return Status::OK();
   }
 
@@ -2424,12 +2358,7 @@ struct CoalesceFunctor<
                       !is_union_type<Type>::value>> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     RETURN_NOT_OK(CheckIdenticalTypes(&batch.values[0], batch.num_values()));
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArray(ctx, batch, out);
-      }
-    }
-    return ExecScalarCoalesce(ctx, batch, out);
+    return ExecArray(ctx, batch, out);
   }
 
   static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
@@ -2437,33 +2366,34 @@ struct CoalesceFunctor<
   }
 };
 
+const Scalar& GetUnionScalar(const DenseUnionType&, const Scalar& value) {
+  return *checked_cast<const DenseUnionScalar&>(value).value;
+}
+
+const Scalar& GetUnionScalar(const SparseUnionType&, const Scalar& value) {
+  const auto& union_scalar = checked_cast<const SparseUnionScalar&>(value);
+  return *union_scalar.value[union_scalar.child_id];
+}
+
 template <typename Type>
 struct CoalesceFunctor<Type, enable_if_union<Type>> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     // Unions don't have top-level nulls, so a specialized implementation is needed
     RETURN_NOT_OK(CheckIdenticalTypes(&batch.values[0], batch.num_values()));
 
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArray(ctx, batch, out);
-      }
-    }
-    return ExecScalar(ctx, batch, out);
-  }
-
-  static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     std::unique_ptr<ArrayBuilder> raw_builder;
-    RETURN_NOT_OK(
-        MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+    RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                        &raw_builder));
     RETURN_NOT_OK(raw_builder->Reserve(batch.length));
 
-    const UnionType& type = checked_cast<const UnionType&>(*out->type());
+    const auto& type = checked_cast<const Type&>(*out->type());
     for (int64_t i = 0; i < batch.length; i++) {
       bool set = false;
       for (const auto& value : batch.values) {
         if (value.is_scalar()) {
-          const auto& scalar = checked_cast<const UnionScalar&>(*value.scalar);
-          if (scalar.is_valid && scalar.value->is_valid) {
+          const Scalar& scalar = *value.scalar;
+          const auto& union_scalar = GetUnionScalar(type, scalar);
+          if (scalar.is_valid && union_scalar.is_valid) {
             RETURN_NOT_OK(raw_builder->AppendScalar(scalar));
             set = true;
             break;
@@ -2502,18 +2432,6 @@ struct CoalesceFunctor<Type, enable_if_union<Type>> {
     out->value = std::move(temp_output->data());
     return Status::OK();
   }
-
-  static Status ExecScalar(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    for (const auto& value : batch.values) {
-      const auto& scalar = checked_cast<const UnionScalar&>(*value.scalar);
-      // Union scalars can have top-level validity
-      if (scalar.is_valid && scalar.value->is_valid) {
-        out->value = value.scalar->Copy();
-        break;
-      }
-    }
-    return Status::OK();
-  }
 };
 
 template <typename Type>
@@ -2525,7 +2443,7 @@ Status ExecScalarChoose(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
     if (out->is_array_span()) {
       // TODO(wesm): more graceful implementation than using
       // MakeNullScalar, which is a little bit lazy
-      std::shared_ptr<Scalar> source = MakeNullScalar(out->type()->Copy());
+      std::shared_ptr<Scalar> source = MakeNullScalar(out->type()->GetSharedPtr());
       ArraySpan* output = out->array_span();
       ExecValue copy_source;
       copy_source.SetScalar(source.get());
@@ -2541,16 +2459,10 @@ Status ExecScalarChoose(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
     return Status::IndexError("choose: index ", index, " out of range");
   }
   auto source = batch[index + 1];
-  if (out->is_scalar()) {
-    // All inputs to choose were scalar values
-    out->value = source.scalar->Copy();
-  } else {
-    ArraySpan* output = out->array_span();
-    CopyValues<Type>(source, /*row=*/0, batch.length,
-                     output->GetValues<uint8_t>(0, /*absolute_offset=*/0),
-                     output->GetValues<uint8_t>(1, /*absolute_offset=*/0),
-                     output->offset);
-  }
+  ArraySpan* output = out->array_span();
+  CopyValues<Type>(source, /*row=*/0, batch.length,
+                   output->GetValues<uint8_t>(0, /*absolute_offset=*/0),
+                   output->GetValues<uint8_t>(1, /*absolute_offset=*/0), output->offset);
   return Status::OK();
 }
 
@@ -2616,9 +2528,9 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
       const Scalar& index_scalar = *batch[0].scalar;
       if (!index_scalar.is_valid) {
         if (out->is_array_data()) {
-          ARROW_ASSIGN_OR_RAISE(
-              std::shared_ptr<Array> temp_array,
-              MakeArrayOfNull(out->type()->Copy(), batch.length, ctx->memory_pool()));
+          ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> temp_array,
+                                MakeArrayOfNull(out->type()->GetSharedPtr(), batch.length,
+                                                ctx->memory_pool()));
           out->value = std::move(temp_array->data());
         }
         return Status::OK();
@@ -2629,15 +2541,10 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
       }
       const ExecValue& source = batch.values[index + 1];
       if (source.is_scalar()) {
-        if (out->is_array_data()) {
-          ARROW_ASSIGN_OR_RAISE(
-              std::shared_ptr<Array> temp_array,
-              MakeArrayFromScalar(*source.scalar, batch.length, ctx->memory_pool()));
-          out->value = std::move(temp_array->data());
-        } else {
-          DCHECK(out->is_scalar());
-          out->value = source.scalar->Copy();
-        }
+        ARROW_ASSIGN_OR_RAISE(
+            std::shared_ptr<Array> temp_array,
+            MakeArrayFromScalar(*source.scalar, batch.length, ctx->memory_pool()));
+        out->value = std::move(temp_array->data());
       } else {
         DCHECK(out->is_array_data());
         // source is an array
@@ -2648,10 +2555,13 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
     }
 
     // Row-wise implementation
-    BuilderType builder(out->type()->Copy(), ctx->memory_pool());
+    BuilderType builder(out->type()->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(batch.length));
     int64_t reserve_data = 0;
-    for (const auto& value : batch.values) {
+
+    // The first value in the batch is the index array which is int64
+    for (int i = 1; i < batch.num_values(); ++i) {
+      const ExecValue& value = batch[i];
       if (value.is_scalar()) {
         if (!value.scalar->is_valid) continue;
         const auto row_length =
@@ -2681,7 +2591,7 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
         }));
     std::shared_ptr<Array> temp_output;
     RETURN_NOT_OK(builder.Finish(&temp_output));
-    std::shared_ptr<DataType> actual_result_type = out->type()->Copy();
+    std::shared_ptr<DataType> actual_result_type = out->type()->GetSharedPtr();
     out->value = std::move(temp_output->data());
     // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
     out->array_data()->type = std::move(actual_result_type);
@@ -2711,29 +2621,30 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
 struct ChooseFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
     // The first argument is always int64 or promoted to it. The kernel is dispatched
     // based on the type of the rest of the arguments.
-    RETURN_NOT_OK(CheckArity(*values));
-    EnsureDictionaryDecoded(values);
-    if (values->front().type->id() != Type::INT64) {
-      values->front().type = int64();
-    }
-    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) {
-      for (auto it = values->begin() + 1; it != values->end(); it++) {
-        it->type = type;
+    RETURN_NOT_OK(CheckArity(types->size()));
+    EnsureDictionaryDecoded(types);
+    if (types->front().id() != Type::INT64) {
+      (*types)[0] = int64();
+    }
+    if (auto type = CommonNumeric(types->data() + 1, types->size() - 1)) {
+      for (auto it = types->begin() + 1; it != types->end(); it++) {
+        *it = type;
       }
     }
-    if (auto kernel = DispatchExactImpl(this, {values->back()})) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, {types->front(), types->back()})) {
+      return kernel;
+    }
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
 void AddCaseWhenKernel(const std::shared_ptr<CaseWhenFunction>& scalar_function,
                        detail::GetTypeId get_id, ArrayKernelExec exec) {
   ScalarKernel kernel(
-      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)},
-                            OutputType(LastType),
+      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)}, LastType,
                             /*is_varargs=*/true),
       exec);
   if (is_fixed_width(get_id.id)) {
@@ -2766,7 +2677,7 @@ void AddBinaryCaseWhenKernels(const std::shared_ptr<CaseWhenFunction>& scalar_fu
 
 void AddCoalesceKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
                        detail::GetTypeId get_id, ArrayKernelExec exec) {
-  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, OutputType(FirstType),
+  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, FirstType,
                                             /*is_varargs=*/true),
                       exec);
   kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
@@ -2785,10 +2696,9 @@ void AddPrimitiveCoalesceKernels(const std::shared_ptr<ScalarFunction>& scalar_f
 
 void AddChooseKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
                      detail::GetTypeId get_id, ArrayKernelExec exec) {
-  ScalarKernel kernel(
-      KernelSignature::Make({Type::INT64, InputType(get_id.id)}, OutputType(LastType),
-                            /*is_varargs=*/true),
-      exec);
+  ScalarKernel kernel(KernelSignature::Make({Type::INT64, InputType(get_id.id)}, LastType,
+                                            /*is_varargs=*/true),
+                      exec);
   kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::PREALLOCATE;
   kernel.can_write_into_slices = is_fixed_width(get_id.id);
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 7a2837c84fd..e3e08ad3d92 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -828,8 +828,8 @@ TEST_F(TestIfElseKernel, ParameterizedTypes) {
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       NotImplemented,
       ::testing::HasSubstr("Function 'if_else' has no kernel matching input types "
-                           "(array[bool], array[timestamp[ms, tz=America/New_York]], "
-                           "array[timestamp[s, tz=America/Phoenix]]"),
+                           "(bool, timestamp[ms, tz=America/New_York], "
+                           "timestamp[s, tz=America/Phoenix]"),
       CallFunction("if_else",
                    {cond, ArrayFromJSON(type0, "[0]"), ArrayFromJSON(type1, "[1]")}));
 }
@@ -1018,17 +1018,20 @@ TYPED_TEST(TestIfElseDict, DifferentDictionaries) {
   CheckDictionary("if_else", {MakeNullScalar(boolean()), values1, values2});
 }
 
-template <typename Type>
-class TestCaseWhenNumeric : public ::testing::Test {};
-
-TYPED_TEST_SUITE(TestCaseWhenNumeric, IfElseNumericBasedTypes);
-
 Datum MakeStruct(const std::vector<Datum>& conds) {
-  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
-  return result;
+  if (conds.size() == 0) {
+    // The tests below want a struct scalar when no condition values passed,
+    // not a StructArray of length 0
+    ScalarVector value;
+    return std::make_shared<StructScalar>(value, struct_({}));
+  } else {
+    EXPECT_OK_AND_ASSIGN(Datum result, CallFunction("make_struct", conds));
+    return result;
+  }
 }
 
-TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
+template <typename TypeParam>
+void TestCaseWhenFixedSize() {
   auto type = default_type_instance<TypeParam>();
   auto cond_true = ScalarFromJSON(boolean(), "true");
   auto cond_false = ScalarFromJSON(boolean(), "false");
@@ -1128,13 +1131,15 @@ TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
 
     // Error cases
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not be null"),
-        CallFunction(
-            "case_when",
-            {Datum(std::make_shared<StructScalar>(struct_({field("", boolean())}))),
-             Datum(scalar1)}));
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
+        CallFunction("case_when",
+                     {MakeNullScalar(struct_({field("", boolean())})), Datum(scalar1)}));
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not have top-level nulls"),
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
         CallFunction("case_when",
                      {Datum(*MakeArrayOfNull(struct_({field("", boolean())}), 4)),
                       Datum(values1)}));
@@ -1215,19 +1220,28 @@ TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
 
     // Error cases
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not be null"),
-        CallFunction(
-            "case_when",
-            {Datum(std::make_shared<StructScalar>(struct_({field("", boolean())}))),
-             Datum(scalar1)}));
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
+        CallFunction("case_when",
+                     {MakeNullScalar(struct_({field("", boolean())})), Datum(scalar1)}));
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not have top-level nulls"),
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
         CallFunction("case_when",
                      {Datum(*MakeArrayOfNull(struct_({field("", boolean())}), 4)),
                       Datum(values1)}));
   }
 }
 
+template <typename Type>
+class TestCaseWhenNumeric : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestCaseWhenNumeric, IfElseNumericBasedTypes);
+
+TYPED_TEST(TestCaseWhenNumeric, FixedSize) { TestCaseWhenFixedSize<TypeParam>(); }
+
 TYPED_TEST(TestCaseWhenNumeric, ListOfType) {
   // More minimal test to check type coverage
   auto type = list(default_type_instance<TypeParam>());
@@ -3389,7 +3403,7 @@ TEST(TestChoose, FixedSizeBinary) {
 
 TEST(TestChooseKernel, DispatchBest) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction("choose"));
-  auto Check = [&](std::vector<ValueDescr> original_values) {
+  auto Check = [&](std::vector<TypeHolder> original_values) {
     auto values = original_values;
     ARROW_EXPECT_OK(function->DispatchBest(&values));
     return values;
@@ -3400,12 +3414,12 @@ TEST(TestChooseKernel, DispatchBest) {
   for (auto ty :
        {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
     // Index always promoted to int64
-    EXPECT_EQ((std::vector<ValueDescr>{int64(), ty}), Check({ty, ty}));
-    EXPECT_EQ((std::vector<ValueDescr>{int64(), int64(), int64()}),
+    EXPECT_EQ((std::vector<TypeHolder>{int64(), ty}), Check({ty, ty}));
+    EXPECT_EQ((std::vector<TypeHolder>{int64(), int64(), int64()}),
               Check({ty, ty, int64()}));
   }
   // Other arguments promoted separately from index
-  EXPECT_EQ((std::vector<ValueDescr>{int64(), int32(), int32()}),
+  EXPECT_EQ((std::vector<TypeHolder>{int64(), int32(), int32()}),
             Check({int8(), int32(), uint8()}));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 3754cc8e1c0..d3e72bea34b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -32,44 +32,24 @@ namespace {
 
 template <typename Type, typename offset_type = typename Type::offset_type>
 Status ListValueLength(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using OffsetScalarType = typename TypeTraits<Type>::OffsetScalarType;
-
-  if (batch[0].is_array()) {
-    const ArraySpan& arr = batch[0].array;
-    ArraySpan* out_arr = out->array_span();
-    auto out_values = out_arr->GetValues<offset_type>(1);
-    const offset_type* offsets = arr.GetValues<offset_type>(1);
-    // Offsets are always well-defined and monotonic, even for null values
-    for (int64_t i = 0; i < arr.length; ++i) {
-      *out_values++ = offsets[i + 1] - offsets[i];
-    }
-  } else {
-    const auto& arg0 = batch[0].scalar_as<ScalarType>();
-    if (arg0.is_valid) {
-      checked_cast<OffsetScalarType*>(out->scalar().get())->value =
-          static_cast<offset_type>(arg0.value->length());
-    }
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_arr = out->array_span();
+  auto out_values = out_arr->GetValues<offset_type>(1);
+  const offset_type* offsets = arr.GetValues<offset_type>(1);
+  // Offsets are always well-defined and monotonic, even for null values
+  for (int64_t i = 0; i < arr.length; ++i) {
+    *out_values++ = offsets[i + 1] - offsets[i];
   }
-
   return Status::OK();
 }
 
 Status FixedSizeListValueLength(KernelContext* ctx, const ExecSpan& batch,
                                 ExecResult* out) {
   auto width = checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size();
-  if (batch[0].is_array()) {
-    const ArraySpan& arr = batch[0].array;
-    ArraySpan* out_arr = out->array_span();
-    int32_t* out_values = out_arr->GetValues<int32_t>(1);
-    std::fill(out_values, out_values + arr.length, width);
-  } else {
-    const auto& arg0 = batch[0].scalar_as<FixedSizeListScalar>();
-    if (arg0.is_valid) {
-      checked_cast<Int32Scalar*>(out->scalar().get())->value = width;
-    }
-  }
-
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_arr = out->array_span();
+  int32_t* out_values = out_arr->GetValues<int32_t>(1);
+  std::fill(out_values, out_values + arr.length, width);
   return Status::OK();
 }
 
@@ -80,26 +60,48 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
+template <typename ScalarType, typename T = typename ScalarType::ValueType>
+Status GetListElementIndex(const ExecValue& value, T* out) {
+  if (value.is_scalar()) {
+    const auto& index_scalar = value.scalar_as<ScalarType>();
+    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+      return Status::Invalid("Index must not be null");
+    }
+    *out = index_scalar.value;
+  } else {
+    const ArraySpan& index_array = value.array;
+    if (index_array.length > 1) {
+      return Status::NotImplemented(
+          "list_element not yet implemented for arrays "
+          "of list indices");
+    }
+    if (index_array.GetNullCount() > 0) {
+      return Status::Invalid("Index must not contain nulls");
+    }
+    *out = index_array.GetValues<T>(1)[0];
+  }
+  if (ARROW_PREDICT_FALSE(*out < 0)) {
+    return Status::Invalid("Index ", *out,
+                           " is out of bounds: should be greater than or equal to 0");
+  }
+  return Status::OK();
+}
+
 template <typename Type, typename IndexType>
-struct ListElementArray {
+struct ListElement {
   using ListArrayType = typename TypeTraits<Type>::ArrayType;
   using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  using IndexValueType = typename IndexScalarType::ValueType;
   using offset_type = typename Type::offset_type;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
-    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
-      return Status::Invalid("Index must not be null");
-    }
     const ArraySpan& list = batch[0].array;
     const ArraySpan& list_values = list.child_data[0];
     const offset_type* offsets = list.GetValues<offset_type>(1);
 
-    auto index = index_scalar.value;
-    if (ARROW_PREDICT_FALSE(index < 0)) {
-      return Status::Invalid("Index ", index,
-                             " is out of bounds: should be greater than or equal to 0");
-    }
+    IndexValueType index;
+    RETURN_NOT_OK(GetListElementIndex<IndexScalarType>(batch[1], &index));
+
     std::unique_ptr<ArrayBuilder> builder;
 
     const Type* list_type = checked_cast<const Type*>(list.type);
@@ -127,25 +129,18 @@ struct ListElementArray {
 };
 
 template <typename Type, typename IndexType>
-struct FixedSizeListElementArray {
+struct FixedSizeListElement {
   using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  using IndexValueType = typename IndexScalarType::ValueType;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
-    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
-      return Status::Invalid("Index must not be null");
-    }
-
     auto item_size = checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size();
-
     const ArraySpan& list = batch[0].array;
     const ArraySpan& list_values = list.child_data[0];
 
-    auto index = index_scalar.value;
-    if (ARROW_PREDICT_FALSE(index < 0)) {
-      return Status::Invalid("Index ", index,
-                             " is out of bounds: should be greater than or equal to 0");
-    }
+    IndexValueType index;
+    RETURN_NOT_OK(GetListElementIndex<IndexScalarType>(batch[1], &index));
+
     std::unique_ptr<ArrayBuilder> builder;
 
     const Type* list_type = checked_cast<const Type*>(list.type);
@@ -170,37 +165,10 @@ struct FixedSizeListElementArray {
   }
 };
 
-template <typename, typename IndexType>
-struct ListElementScalar {
-  static Status Exec(KernelContext* /*ctx*/, const ExecSpan& batch, ExecResult* out) {
-    using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
-    const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
-    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
-      return Status::Invalid("Index must not be null");
-    }
-    const auto& list_scalar = batch[0].scalar_as<BaseListScalar>();
-    if (ARROW_PREDICT_FALSE(!list_scalar.is_valid)) {
-      out->value = MakeNullScalar(
-          checked_cast<const BaseListType&>(*batch[0].type()).value_type());
-      return Status::OK();
-    }
-    auto list = list_scalar.value;
-    auto index = index_scalar.value;
-    auto len = list->length();
-    if (ARROW_PREDICT_FALSE(index < 0 ||
-                            index >= static_cast<typename IndexType::c_type>(len))) {
-      return Status::Invalid("Index ", index, " is out of bounds: should be in [0, ", len,
-                             ")");
-    }
-    ARROW_ASSIGN_OR_RAISE(out->value, list->GetScalar(index));
-    return Status::OK();
-  }
-};
-
 template <typename InListType, template <typename...> class Functor>
-void AddListElementArrayKernels(ScalarFunction* func) {
+void AddListElementKernels(ScalarFunction* func) {
   for (const auto& index_type : IntTypes()) {
-    auto inputs = {InputType::Array(InListType::type_id), InputType::Scalar(index_type)};
+    auto inputs = {InputType(InListType::type_id), InputType(index_type)};
     auto output = OutputType{ListValuesType};
     auto sig = KernelSignature::Make(std::move(inputs), std::move(output),
                                      /*is_varargs=*/false);
@@ -212,26 +180,10 @@ void AddListElementArrayKernels(ScalarFunction* func) {
   }
 }
 
-void AddListElementArrayKernels(ScalarFunction* func) {
-  AddListElementArrayKernels<ListType, ListElementArray>(func);
-  AddListElementArrayKernels<LargeListType, ListElementArray>(func);
-  AddListElementArrayKernels<FixedSizeListType, FixedSizeListElementArray>(func);
-}
-
-void AddListElementScalarKernels(ScalarFunction* func) {
-  for (const auto list_type_id : {Type::LIST, Type::LARGE_LIST, Type::FIXED_SIZE_LIST}) {
-    for (const auto& index_type : IntTypes()) {
-      auto inputs = {InputType::Scalar(list_type_id), InputType::Scalar(index_type)};
-      auto output = OutputType{ListValuesType};
-      auto sig = KernelSignature::Make(std::move(inputs), std::move(output),
-                                       /*is_varargs=*/false);
-      auto scalar_exec = GenerateInteger<ListElementScalar, void>({index_type->id()});
-      ScalarKernel kernel{std::move(sig), std::move(scalar_exec)};
-      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-      DCHECK_OK(func->AddKernel(std::move(kernel)));
-    }
-  }
+void AddListElementKernels(ScalarFunction* func) {
+  AddListElementKernels<ListType, ListElement>(func);
+  AddListElementKernels<LargeListType, ListElement>(func);
+  AddListElementKernels<FixedSizeListType, FixedSizeListElement>(func);
 }
 
 const FunctionDoc list_element_doc(
@@ -242,7 +194,7 @@ const FunctionDoc list_element_doc(
     {"lists", "index"});
 
 struct StructFieldFunctor {
-  static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const auto& options = OptionsWrapper<StructFieldOptions>::Get(ctx);
 
     std::shared_ptr<Array> current = MakeArray(batch[0].array.ToArrayData());
@@ -300,43 +252,6 @@ struct StructFieldFunctor {
     return Status::OK();
   }
 
-  static Status ExecScalar(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    const auto& options = OptionsWrapper<StructFieldOptions>::Get(ctx);
-    const Scalar* current = batch[0].scalar;
-    for (const auto& index : options.indices) {
-      RETURN_NOT_OK(CheckIndex(index, *current->type));
-      if (!current->is_valid) {
-        // out should already be a null scalar of the appropriate type
-        return Status::OK();
-      }
-
-      switch (current->type->id()) {
-        case Type::STRUCT: {
-          current = checked_cast<const StructScalar&>(*current).value[index].get();
-          break;
-        }
-        case Type::DENSE_UNION:
-        case Type::SPARSE_UNION: {
-          const auto& union_scalar = checked_cast<const UnionScalar&>(*current);
-          const auto& union_ty = checked_cast<const UnionType&>(*current->type);
-          if (union_scalar.type_code != union_ty.type_codes()[index]) {
-            // out should already be a null scalar of the appropriate type
-            return Status::OK();
-          }
-          current = union_scalar.value.get();
-          break;
-        }
-        default:
-          // Should have been checked in ResolveStructFieldType
-          return Status::TypeError("struct_field: cannot reference child field of type ",
-                                   *current->type);
-      }
-    }
-    // XXX: Revisit the above to see if we can avoid shared_from_this
-    out->value = current->Copy();
-    return Status::OK();
-  }
-
   static Status CheckIndex(int index, const DataType& type) {
     if (!ValidParentType(type)) {
       return Status::TypeError("struct_field: cannot subscript field of type ", type);
@@ -354,28 +269,24 @@ struct StructFieldFunctor {
   }
 };
 
-Result<ValueDescr> ResolveStructFieldType(KernelContext* ctx,
-                                          const std::vector<ValueDescr>& descrs) {
+Result<TypeHolder> ResolveStructFieldType(KernelContext* ctx,
+                                          const std::vector<TypeHolder>& types) {
   const auto& options = OptionsWrapper<StructFieldOptions>::Get(ctx);
-  const std::shared_ptr<DataType>* type = &descrs.front().type;
+  const DataType* type = types.front().type;
   for (const auto& index : options.indices) {
-    RETURN_NOT_OK(StructFieldFunctor::CheckIndex(index, **type));
-    type = &(*type)->field(index)->type();
+    RETURN_NOT_OK(StructFieldFunctor::CheckIndex(index, *type));
+    type = type->field(index)->type().get();
   }
-  return ValueDescr(*type, descrs.front().shape);
+  return type;
 }
 
 void AddStructFieldKernels(ScalarFunction* func) {
-  for (const auto shape : {ValueDescr::ARRAY, ValueDescr::SCALAR}) {
-    for (const auto in_type : {Type::STRUCT, Type::DENSE_UNION, Type::SPARSE_UNION}) {
-      ScalarKernel kernel({InputType(in_type, shape)}, OutputType(ResolveStructFieldType),
-                          shape == ValueDescr::ARRAY ? StructFieldFunctor::ExecArray
-                                                     : StructFieldFunctor::ExecScalar,
-                          OptionsWrapper<StructFieldOptions>::Init);
-      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-      DCHECK_OK(func->AddKernel(std::move(kernel)));
-    }
+  for (const auto in_type : {Type::STRUCT, Type::DENSE_UNION, Type::SPARSE_UNION}) {
+    ScalarKernel kernel({in_type}, ResolveStructFieldType, StructFieldFunctor::Exec,
+                        OptionsWrapper<StructFieldOptions>::Init);
+    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
   }
 }
 
@@ -392,59 +303,45 @@ const FunctionDoc struct_field_doc(
      "An empty list of indices returns the argument unchanged."),
     {"values"}, "StructFieldOptions", /*options_required=*/true);
 
-Result<ValueDescr> MakeStructResolve(KernelContext* ctx,
-                                     const std::vector<ValueDescr>& descrs) {
+Result<TypeHolder> MakeStructResolve(KernelContext* ctx,
+                                     const std::vector<TypeHolder>& types) {
   auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names;
   auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability;
   auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata;
 
   if (names.size() == 0) {
-    names.resize(descrs.size());
-    nullable.resize(descrs.size(), true);
-    metadata.resize(descrs.size(), nullptr);
+    names.resize(types.size());
+    nullable.resize(types.size(), true);
+    metadata.resize(types.size(), nullptr);
     int i = 0;
     for (auto& name : names) {
       name = std::to_string(i++);
     }
-  } else if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
-             metadata.size() != descrs.size()) {
-    return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ",
+  } else if (names.size() != types.size() || nullable.size() != types.size() ||
+             metadata.size() != types.size()) {
+    return Status::Invalid("make_struct() was passed ", types.size(), " arguments but ",
                            names.size(), " field names, ", nullable.size(),
                            " nullability bits, and ", metadata.size(),
                            " metadata dictionaries.");
   }
 
   size_t i = 0;
-  FieldVector fields(descrs.size());
+  FieldVector fields(types.size());
 
-  ValueDescr::Shape shape = ValueDescr::SCALAR;
-  for (const ValueDescr& descr : descrs) {
-    if (descr.shape != ValueDescr::SCALAR) {
-      shape = ValueDescr::ARRAY;
-    } else {
-      switch (descr.type->id()) {
-        case Type::EXTENSION:
-        case Type::DENSE_UNION:
-        case Type::SPARSE_UNION:
-          return Status::NotImplemented("Broadcasting scalars of type ", *descr.type);
-        default:
-          break;
-      }
-    }
-
-    fields[i] =
-        field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i]));
+  for (const TypeHolder& type : types) {
+    fields[i] = field(std::move(names[i]), type.GetSharedPtr(), nullable[i],
+                      std::move(metadata[i]));
     ++i;
   }
 
-  return ValueDescr{struct_(std::move(fields)), shape};
+  return TypeHolder(struct_(std::move(fields)));
 }
 
 Status MakeStructExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors()));
+  ARROW_ASSIGN_OR_RAISE(TypeHolder type, MakeStructResolve(ctx, batch.GetTypes()));
 
   for (int i = 0; i < batch.num_values(); ++i) {
-    const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
+    const auto& field = checked_cast<const StructType&>(*type.type).field(i);
     if (batch[i].null_count() > 0 && !field->nullable()) {
       return Status::Invalid("Output field ", field, " (#", i,
                              ") does not allow nulls but the corresponding "
@@ -452,20 +349,9 @@ Status MakeStructExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out
     }
   }
 
-  /// TODO: remove this scalar output modality altogether
-  if (descr.shape == ValueDescr::SCALAR) {
-    ScalarVector scalars(batch.num_values());
-    for (int i = 0; i < batch.num_values(); ++i) {
-      scalars[i] = batch[i].scalar->Copy();
-    }
-    out->value =
-        std::make_shared<StructScalar>(std::move(scalars), std::move(descr.type));
-    return Status::OK();
-  }
-
   ArrayData* out_data = out->array_data().get();
   out_data->length = batch.length;
-  out_data->type = descr.type;
+  out_data->type = type.GetSharedPtr();
   out_data->child_data.resize(batch.num_values());
   for (int i = 0; i < batch.num_values(); ++i) {
     if (batch[i].is_array()) {
@@ -526,13 +412,16 @@ struct MapLookupFunctor {
     return Status::OK();
   }
 
-  static Status ExecMapArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const auto& options = OptionsWrapper<MapLookupOptions>::Get(ctx);
     const UnboxedKey query_key = UnboxScalar<KeyType>::Unbox(*options.query_key);
 
     const ArraySpan& map = batch[0].array;
     const int32_t* offsets = map.GetValues<int32_t>(1);
 
+    // The struct holding the keys and values may have an offset
+    int64_t kv_offset = map.child_data[0].offset;
+
     // We create a copy of the keys array because we will adjust the
     // offset and length for the map probes below
     ArraySpan map_keys = map.child_data[0].child_data[0];
@@ -553,7 +442,7 @@ struct MapLookupFunctor {
           continue;
         }
 
-        const int32_t item_offset = offsets[map_index];
+        const int64_t item_offset = offsets[map_index] + kv_offset;
         const int32_t item_size = offsets[map_index + 1] - offsets[map_index];
 
         // Adjust the keys view to just the map slot that we are about to search
@@ -584,7 +473,7 @@ struct MapLookupFunctor {
           continue;
         }
 
-        const int32_t item_offset = offsets[map_index];
+        const int64_t item_offset = offsets[map_index] + kv_offset;
         const int32_t item_size = offsets[map_index + 1] - offsets[map_index];
 
         // Adjust the keys view to just the map slot that we are about to search
@@ -608,77 +497,14 @@ struct MapLookupFunctor {
     }
     return Status::OK();
   }
-
-  /// TODO(ARROW-16577): use array path for scalars to avoid having to
-  /// maintain two code paths
-  static Status ExecMapScalar(KernelContext* ctx, const ExecSpan& batch,
-                              ExecResult* out) {
-    const auto& options = OptionsWrapper<MapLookupOptions>::Get(ctx);
-    UnboxedKey query_key = UnboxScalar<KeyType>::Unbox(*options.query_key);
-
-    std::shared_ptr<DataType> item_type =
-        checked_cast<const MapType&>(*batch[0].type()).item_type();
-    const auto& map_scalar = batch[0].scalar_as<MapScalar>();
-
-    if (ARROW_PREDICT_FALSE(!map_scalar.is_valid)) {
-      if (options.occurrence == MapLookupOptions::Occurrence::ALL) {
-        out->value = MakeNullScalar(list(item_type));
-      } else {
-        out->value = MakeNullScalar(item_type);
-      }
-      return Status::OK();
-    }
-
-    const auto& struct_array = checked_cast<const StructArray&>(*map_scalar.value);
-    ArraySpan map_keys(*struct_array.data()->child_data[0]);
-
-    // Keys offset and length must be adjusted to match its parent
-    map_keys.length = struct_array.length();
-    map_keys.offset = struct_array.offset();
-
-    if (options.occurrence == MapLookupOptions::Occurrence::ALL) {
-      ArraySpan map_items(*struct_array.data()->child_data[1]);
-      // Keys offset and length must be adjusted to match its parent
-      map_items.length = struct_array.length();
-      map_items.offset = struct_array.offset();
-
-      bool found_at_least_one_key = false;
-      std::unique_ptr<ArrayBuilder> builder;
-      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), item_type, &builder));
-      RETURN_NOT_OK(
-          FindMatchingIndices(map_keys, query_key, [&](int64_t index) -> Status {
-            found_at_least_one_key = true;
-            RETURN_NOT_OK(builder->AppendArraySlice(map_items, index, 1));
-            return Status::OK();
-          }));
-      if (!found_at_least_one_key) {
-        out->value = MakeNullScalar(list(item_type));
-      } else {
-        ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
-        ARROW_ASSIGN_OR_RAISE(out->value, MakeScalar(list(item_type), result));
-      }
-    } else { /* occurrence == FIRST || LAST */
-      std::shared_ptr<Array> items = struct_array.field(1);
-      ARROW_ASSIGN_OR_RAISE(
-          int64_t item_index,
-          GetOneMatchingIndex(map_keys, query_key,
-                              options.occurrence == MapLookupOptions::LAST));
-      if (item_index != -1) {
-        ARROW_ASSIGN_OR_RAISE(out->value, items->GetScalar(item_index));
-      } else {
-        out->value = MakeNullScalar(item_type);
-      }
-    }
-    return Status::OK();
-  }
 };
 
-Result<ValueDescr> ResolveMapLookupType(KernelContext* ctx,
-                                        const std::vector<ValueDescr>& descrs) {
+Result<TypeHolder> ResolveMapLookupType(KernelContext* ctx,
+                                        const std::vector<TypeHolder>& types) {
   const auto& options = OptionsWrapper<MapLookupOptions>::Get(ctx);
-  std::shared_ptr<DataType> type = descrs.front().type;
-  std::shared_ptr<DataType> item_type = checked_cast<const MapType&>(*type).item_type();
-  std::shared_ptr<DataType> key_type = checked_cast<const MapType&>(*type).key_type();
+  const auto& type = checked_cast<const MapType&>(*types.front().type);
+  std::shared_ptr<DataType> item_type = type.item_type();
+  std::shared_ptr<DataType> key_type = type.key_type();
 
   if (!options.query_key) {
     return Status::Invalid("map_lookup: query_key can't be empty.");
@@ -692,9 +518,9 @@ Result<ValueDescr> ResolveMapLookupType(KernelContext* ctx,
   }
 
   if (options.occurrence == MapLookupOptions::Occurrence::ALL) {
-    return ValueDescr(list(item_type), descrs.front().shape);
+    return list(item_type);
   } else { /* occurrence == FIRST || LAST */
-    return ValueDescr(item_type, descrs.front().shape);
+    return item_type;
   }
 }
 
@@ -705,10 +531,7 @@ struct ResolveMapLookup {
 
   template <typename KeyType>
   Status Execute() {
-    if (batch[0].is_scalar()) {
-      return MapLookupFunctor<KeyType>::ExecMapScalar(ctx, batch, out);
-    }
-    return MapLookupFunctor<KeyType>::ExecMapArray(ctx, batch, out);
+    return MapLookupFunctor<KeyType>::Exec(ctx, batch, out);
   }
 
   template <typename KeyType>
@@ -780,8 +603,7 @@ void RegisterScalarNested(FunctionRegistry* registry) {
 
   auto list_element =
       std::make_shared<ScalarFunction>("list_element", Arity::Binary(), list_element_doc);
-  AddListElementArrayKernels(list_element.get());
-  AddListElementScalarKernels(list_element.get());
+  AddListElementKernels(list_element.get());
   DCHECK_OK(registry->AddFunction(std::move(list_element)));
 
   auto struct_field =
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index d0f0b12e74f..a2b05c21b79 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -603,11 +603,17 @@ TEST(MakeStruct, Scalar) {
   EXPECT_THAT(MakeStructor({i32, f64, str}),
               ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
 
-  // No field names or input values is fine
-  EXPECT_THAT(MakeStructor({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
-
   // Three field names but one input value
   EXPECT_THAT(MakeStructor({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
+
+  // ARROW-16757: No input values yields empty struct array of length 1
+  ScalarVector value;
+  auto empty_scalar = std::make_shared<StructScalar>(value, struct_({}));
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> empty_result,
+                       MakeArrayFromScalar(*empty_scalar, 0));
+  ASSERT_OK_AND_ASSIGN(Datum empty_actual,
+                       CallFunction("make_struct", std::vector<Datum>({})));
+  AssertDatumsEqual(Datum(empty_result), empty_actual);
 }
 
 TEST(MakeStruct, Array) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_random.cc b/cpp/src/arrow/compute/kernels/scalar_random.cc
index ee89f20f946..b1ebfd312cc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_random.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_random.cc
@@ -87,11 +87,9 @@ const FunctionDoc random_doc{
 
 void RegisterScalarRandom(FunctionRegistry* registry) {
   static auto random_options = RandomOptions::Defaults();
-
   auto random_func = std::make_shared<ScalarFunction>("random", Arity::Nullary(),
                                                       random_doc, &random_options);
-  ScalarKernel kernel{
-      {}, ValueDescr(float64(), ValueDescr::Shape::ARRAY), ExecRandom, RandomState::Init};
+  ScalarKernel kernel{{}, float64(), ExecRandom, RandomState::Init};
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
   DCHECK_OK(random_func->AddKernel(kernel));
   DCHECK_OK(registry->AddFunction(std::move(random_func)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index 383ff30f342..7a0834058f0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -140,13 +140,13 @@ struct UnsignedIntType<8> {
 struct InitStateVisitor {
   KernelContext* ctx;
   SetLookupOptions options;
-  const std::shared_ptr<DataType>& arg_type;
+  TypeHolder arg_type;
   std::unique_ptr<KernelState> result;
 
   InitStateVisitor(KernelContext* ctx, const KernelInitArgs& args)
       : ctx(ctx),
         options(*checked_cast<const SetLookupOptions*>(args.options)),
-        arg_type(args.inputs[0].type) {}
+        arg_type(args.inputs[0]) {}
 
   template <typename Type>
   Status Init() {
@@ -183,7 +183,7 @@ struct InitStateVisitor {
   }
 
   Result<std::unique_ptr<KernelState>> GetResult() {
-    if (arg_type->id() == Type::TIMESTAMP &&
+    if (arg_type.id() == Type::TIMESTAMP &&
         options.value_set.type()->id() == Type::TIMESTAMP) {
       // Other types will fail when casting, so no separate check is needed
       const auto& ty1 = checked_cast<const TimestampType&>(*arg_type);
@@ -193,7 +193,7 @@ struct InitStateVisitor {
             "Cannot compare timestamp with timezone to timestamp without timezone, got: ",
             ty1, " and ", ty2);
       }
-    } else if ((arg_type->id() == Type::STRING || arg_type->id() == Type::LARGE_STRING) &&
+    } else if ((arg_type.id() == Type::STRING || arg_type.id() == Type::LARGE_STRING) &&
                !is_base_binary_like(options.value_set.type()->id())) {
       // This is a bit of a hack, but don't implicitly cast from a non-binary
       // type to string, since most types support casting to string and that
@@ -203,10 +203,11 @@ struct InitStateVisitor {
     }
     if (!options.value_set.is_arraylike()) {
       return Status::Invalid("Set lookup value set must be Array or ChunkedArray");
-    } else if (!options.value_set.type()->Equals(arg_type)) {
+    } else if (!options.value_set.type()->Equals(*arg_type)) {
       ARROW_ASSIGN_OR_RAISE(
           options.value_set,
-          Cast(options.value_set, CastOptions::Safe(arg_type), ctx->exec_context()));
+          Cast(options.value_set, CastOptions::Safe(arg_type.GetSharedPtr()),
+               ctx->exec_context()));
     }
 
     RETURN_NOT_OK(VisitTypeInline(*arg_type, this));
@@ -432,7 +433,7 @@ void AddBasicSetLookupKernels(ScalarKernel kernel,
   std::vector<Type::type> other_types = {Type::BOOL, Type::DECIMAL128, Type::DECIMAL256,
                                          Type::FIXED_SIZE_BINARY};
   for (auto ty : other_types) {
-    kernel.signature = KernelSignature::Make({InputType::Array(ty)}, out_ty);
+    kernel.signature = KernelSignature::Make({ty}, out_ty);
     DCHECK_OK(func->AddKernel(kernel));
   }
 }
@@ -505,7 +506,7 @@ class IndexInMetaBinary : public MetaFunction {
 struct SetLookupFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* values) const override {
     EnsureDictionaryDecoded(values);
     return DispatchExact(*values);
   }
@@ -518,9 +519,7 @@ void RegisterScalarSetLookup(FunctionRegistry* registry) {
   {
     ScalarKernel isin_base;
     isin_base.init = InitSetLookup;
-    isin_base.exec = TrivialScalarUnaryAsArraysExec(ExecIsIn,
-                                                    /*use_array_span=*/true,
-                                                    NullHandling::OUTPUT_NOT_NULL);
+    isin_base.exec = ExecIsIn;
     isin_base.null_handling = NullHandling::OUTPUT_NOT_NULL;
     auto is_in = std::make_shared<SetLookupFunction>("is_in", Arity::Unary(), is_in_doc);
 
@@ -537,9 +536,7 @@ void RegisterScalarSetLookup(FunctionRegistry* registry) {
   {
     ScalarKernel index_in_base;
     index_in_base.init = InitSetLookup;
-    index_in_base.exec = TrivialScalarUnaryAsArraysExec(
-        ExecIndexIn,
-        /*use_array_span=*/true, NullHandling::COMPUTED_PREALLOCATE);
+    index_in_base.exec = ExecIndexIn;
     index_in_base.null_handling = NullHandling::COMPUTED_PREALLOCATE;
     auto index_in =
         std::make_shared<SetLookupFunction>("index_in", Arity::Unary(), index_in_doc);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index dc546b6a995..c362cfa8d99 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -77,15 +77,7 @@ template <typename StringTransform>
 struct FixedSizeBinaryTransformExecBase {
   static Status Execute(KernelContext* ctx, StringTransform* transform,
                         const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ExecArray(ctx, transform, batch[0].array, out);
-    }
-    DCHECK(batch[0].is_scalar());
-    return ExecScalar(ctx, transform, batch[0].scalar, out);
-  }
-
-  static Status ExecArray(KernelContext* ctx, StringTransform* transform,
-                          const ArraySpan& input, ExecResult* out) {
+    const ArraySpan& input = batch[0].array;
     ArrayData* output = out->array_data().get();
 
     const int32_t input_width = input.type->byte_width();
@@ -113,28 +105,6 @@ struct FixedSizeBinaryTransformExecBase {
     output->buffers[1] = std::move(values_buffer);
     return Status::OK();
   }
-
-  static Status ExecScalar(KernelContext* ctx, StringTransform* transform,
-                           const Scalar* scalar, ExecResult* out) {
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar);
-    if (!input.is_valid) {
-      return Status::OK();
-    }
-    const int32_t out_width = out->type()->byte_width();
-    auto result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-
-    const int32_t data_nbytes = static_cast<int32_t>(input.value->size());
-    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(out_width));
-    auto encoded_nbytes = static_cast<int32_t>(transform->Transform(
-        input.value->data(), data_nbytes, value_buffer->mutable_data()));
-    if (encoded_nbytes != out_width) {
-      return transform->InvalidInputSequence();
-    }
-
-    result->is_valid = true;
-    result->value = std::move(value_buffer);
-    return Status::OK();
-  }
 };
 
 template <typename StringTransform>
@@ -149,22 +119,16 @@ struct FixedSizeBinaryTransformExecWithState
     return Execute(ctx, &transform, batch, out);
   }
 
-  static Result<ValueDescr> OutputType(KernelContext* ctx,
-                                       const std::vector<ValueDescr>& descrs) {
-    DCHECK_EQ(1, descrs.size());
+  static Result<TypeHolder> OutputType(KernelContext* ctx,
+                                       const std::vector<TypeHolder>& types) {
+    DCHECK_EQ(1, types.size());
     const auto& options = State::Get(ctx);
-    const int32_t input_width = descrs[0].type->byte_width();
+    const int32_t input_width = types[0].type->byte_width();
     const int32_t output_width = StringTransform::FixedOutputSize(options, input_width);
-    return ValueDescr(fixed_size_binary(output_width), descrs[0].shape);
+    return fixed_size_binary(output_width);
   }
 };
 
-template <typename offset_type>
-static int64_t GetVarBinaryValuesLength(const ArraySpan& span) {
-  const offset_type* offsets = span.GetValues<offset_type>(1);
-  return span.length > 0 ? offsets[span.length] - offsets[0] : 0;
-}
-
 template <typename Type1, typename Type2>
 struct StringBinaryTransformBase {
   using ViewType2 = typename GetViewType<Type2>::T;
@@ -185,11 +149,6 @@ struct StringBinaryTransformBase {
   // given input characteristics for different input shapes.
   // The Status parameter should only be set if an error needs to be signaled.
 
-  // Scalar-Scalar
-  virtual Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits, const ViewType2) {
-    return input1_ncodeunits;
-  }
-
   // Scalar-Array
   virtual Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
                                        const ArraySpan&) {
@@ -223,7 +182,6 @@ struct StringBinaryTransformBase {
   //   }
   //   ...
   // };
-  bool enable_scalar_scalar_ = true;
   bool enable_scalar_array_ = true;
   bool enable_array_scalar_ = true;
   bool enable_array_array_ = true;
@@ -256,11 +214,7 @@ struct StringBinaryTransformExecBase {
   static Status Execute(KernelContext* ctx, StringTransform* transform,
                         const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_scalar()) {
-      if (batch[1].is_scalar()) {
-        if (transform->enable_scalar_scalar_) {
-          return ExecScalarScalar(ctx, transform, batch[0].scalar, batch[1].scalar, out);
-        }
-      } else if (batch[1].is_array()) {
+      if (batch[1].is_array()) {
         if (transform->enable_scalar_array_) {
           return ExecScalarArray(ctx, transform, batch[0].scalar, batch[1].array, out);
         }
@@ -280,43 +234,6 @@ struct StringBinaryTransformExecBase {
         "Binary string transform has no combination of operand kinds enabled.");
   }
 
-  static Status ExecScalarScalar(KernelContext* ctx, StringTransform* transform,
-                                 const Scalar* scalar1, const Scalar* scalar2,
-                                 ExecResult* out) {
-    if (!scalar1->is_valid || !scalar2->is_valid) {
-      return Status::OK();
-    }
-    const auto& binary_scalar1 = checked_cast<const BaseBinaryScalar&>(*scalar1);
-    const auto input_string = binary_scalar1.value->data();
-    const auto input_ncodeunits = binary_scalar1.value->size();
-    const auto value2 = UnboxScalar<Type2>::Unbox(*scalar2);
-
-    // Calculate max number of output codeunits
-    ARROW_ASSIGN_OR_RAISE(const auto max_output_ncodeunits,
-                          transform->MaxCodeunits(input_ncodeunits, value2));
-    RETURN_NOT_OK(CheckOutputCapacity(max_output_ncodeunits));
-
-    // Allocate output string
-    const auto output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    output->is_valid = true;
-    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(max_output_ncodeunits));
-    output->value = value_buffer;
-    auto output_string = output->value->mutable_data();
-
-    // Apply transform
-    ARROW_ASSIGN_OR_RAISE(
-        auto encoded_nbytes_,
-        transform->Transform(input_string, input_ncodeunits, value2, output_string));
-    auto encoded_nbytes = static_cast<offset_type>(encoded_nbytes_);
-    if (encoded_nbytes < 0) {
-      return transform->InvalidInputSequence();
-    }
-    DCHECK_LE(encoded_nbytes, max_output_ncodeunits);
-
-    // Trim the codepoint buffer, since we may have allocated too much
-    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
-  }
-
   static Status ExecArrayScalar(KernelContext* ctx, StringTransform* transform,
                                 const ArraySpan& data1, const Scalar* scalar2,
                                 ExecResult* out) {
@@ -517,68 +434,51 @@ struct StringBinaryTransformExecWithState
 
 using TransformFunc = std::function<void(const uint8_t*, int64_t, uint8_t*)>;
 
-// Transform a buffer of offsets to one which begins with 0 and has same
-// value lengths.
-template <typename T>
-Status GetShiftedOffsets(KernelContext* ctx, const Buffer& input_buffer, int64_t offset,
-                         int64_t length, std::shared_ptr<Buffer>* out) {
-  ARROW_ASSIGN_OR_RAISE(*out, ctx->Allocate((length + 1) * sizeof(T)));
-  const T* input_offsets = reinterpret_cast<const T*>(input_buffer.data()) + offset;
-  T* out_offsets = reinterpret_cast<T*>((*out)->mutable_data());
-  T first_offset = *input_offsets;
-  for (int64_t i = 0; i < length; ++i) {
-    *out_offsets++ = input_offsets[i] - first_offset;
-  }
-  *out_offsets = input_offsets[length] - first_offset;
-  return Status::OK();
-}
-
 // Apply `transform` to input character data- this function cannot change the
 // length
 template <typename Type>
 Status StringDataTransform(KernelContext* ctx, const ExecSpan& batch,
                            TransformFunc transform, ExecResult* out) {
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
   using offset_type = typename Type::offset_type;
 
-  if (batch[0].is_array()) {
-    // TODO(wesm): Rewrite this to note require this, which is expensive
-    std::shared_ptr<ArrayData> input = batch[0].array.ToArrayData();
-    ArrayType input_boxed(input);
-    ArrayData* out_arr = out->array_data().get();
+  const ArraySpan& input = batch[0].array;
+  ArrayData* out_arr = out->array_data().get();
 
-    if (input->offset == 0) {
-      // We can reuse offsets from input
-      out_arr->buffers[1] = input->buffers[1];
+  const auto offsets = input.GetValues<offset_type>(1);
+  int64_t offset_nbytes = (input.length + 1) * sizeof(offset_type);
+  if (input.offset == 0) {
+    // We can reuse offsets from input if the input owns it
+    if (input.buffers[1].owner != nullptr) {
+      out_arr->buffers[1] = input.GetBuffer(1);
     } else {
-      DCHECK(input->buffers[1]);
-      // We must allocate new space for the offsets and shift the existing offsets
-      RETURN_NOT_OK(GetShiftedOffsets<offset_type>(ctx, *input->buffers[1], input->offset,
-                                                   input->length, &out_arr->buffers[1]));
+      RETURN_NOT_OK(ctx->Allocate(offset_nbytes).Value(&out_arr->buffers[1]));
+      std::memcpy(out_arr->buffers[1]->mutable_data(), input.buffers[1].data,
+                  offset_nbytes);
     }
+  } else {
+    // We must allocate new space for the offsets and shift the existing offsets
+    RETURN_NOT_OK(ctx->Allocate(offset_nbytes).Value(&out_arr->buffers[1]));
+    auto out_offsets =
+        reinterpret_cast<offset_type*>(out_arr->buffers[1]->mutable_data());
+    offset_type first_offset = offsets[0];
+    for (int64_t i = 0; i < input.length; ++i) {
+      *out_offsets++ = offsets[i] - first_offset;
+    }
+    *out_offsets = offsets[input.length] - first_offset;
+  }
 
+  int64_t data_nbytes = GetVarBinaryValuesLength<offset_type>(input);
+  if (input.length > 0) {
     // Allocate space for output data
-    int64_t data_nbytes = input_boxed.total_values_length();
-    RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
-    if (input->length > 0) {
-      transform(input->buffers[2]->data() + input_boxed.value_offset(0), data_nbytes,
+    if (data_nbytes > 0) {
+      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
+      transform(input.buffers[2].data + offsets[0], data_nbytes,
                 out_arr->buffers[2]->mutable_data());
+    } else {
+      // Empty buffer
+      out_arr->buffers[2] = Buffer::FromString("");
     }
-  } else {
-    // Isn't an null output scalar already created? Anyway this code
-    // will be deleted soon per ARROW-16577
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar);
-    auto result =
-        checked_pointer_cast<BaseBinaryScalar>(MakeNullScalar(out->type()->Copy()));
-    if (input.is_valid) {
-      result->is_valid = true;
-      int64_t data_nbytes = input.value->size();
-      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&result->value));
-      transform(input.value->data(), data_nbytes, result->value->mutable_data());
-    }
-    out->value = result;
   }
-
   return Status::OK();
 }
 
@@ -952,12 +852,8 @@ struct BinaryLength {
   static Status FixedSizeExec(KernelContext*, const ExecSpan& batch, ExecResult* out) {
     // Output is preallocated and validity buffer is precomputed
     const int32_t width = batch[0].type()->byte_width();
-    if (batch[0].is_array()) {
-      int32_t* buffer = out->array_span()->GetValues<int32_t>(1);
-      std::fill(buffer, buffer + batch.length, width);
-    } else {
-      checked_cast<Int32Scalar*>(out->scalar().get())->value = width;
-    }
+    int32_t* buffer = out->array_span()->GetValues<int32_t>(1);
+    std::fill(buffer, buffer + batch.length, width);
     return Status::OK();
   }
 };
@@ -1301,25 +1197,12 @@ template <typename Type>
 void StringBoolTransform(KernelContext* ctx, const ExecSpan& batch,
                          StrToBoolTransformFunc transform, ExecResult* out) {
   using offset_type = typename Type::offset_type;
-
-  if (batch[0].is_array()) {
-    const ArraySpan& input = batch[0].array;
-    ArraySpan* out_arr = out->array_span();
-    if (input.length > 0) {
-      transform(
-          reinterpret_cast<const offset_type*>(input.buffers[1].data) + input.offset,
-          input.buffers[2].data, input.length, out_arr->offset, out_arr->buffers[1].data);
-    }
-  } else {
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar);
-    if (input.is_valid) {
-      uint8_t result_value = 0;
-      std::array<offset_type, 2> offsets{0,
-                                         static_cast<offset_type>(input.value->size())};
-      transform(offsets.data(), input.value->data(), 1, /*output_offset=*/0,
-                &result_value);
-      out->value = std::make_shared<BooleanScalar>(result_value > 0);
-    }
+  const ArraySpan& input = batch[0].array;
+  ArraySpan* out_arr = out->array_span();
+  if (input.length > 0) {
+    transform(reinterpret_cast<const offset_type*>(input.buffers[1].data) + input.offset,
+              input.buffers[2].data, input.length, out_arr->offset,
+              out_arr->buffers[1].data);
   }
 }
 
@@ -2061,41 +1944,27 @@ struct ReplaceSubstring {
     ValueDataBuilder value_data_builder(ctx->memory_pool());
     OffsetBuilder offset_builder(ctx->memory_pool());
 
-    if (batch[0].is_array()) {
-      // We already know how many strings we have, so we can use Reserve/UnsafeAppend
-      RETURN_NOT_OK(offset_builder.Reserve(batch.length + 1));
-      offset_builder.UnsafeAppend(0);  // offsets start at 0
-
-      RETURN_NOT_OK(VisitArraySpanInline<Type>(
-          batch[0].array,
-          [&](util::string_view s) {
-            RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
-            offset_builder.UnsafeAppend(
-                static_cast<offset_type>(value_data_builder.length()));
-            return Status::OK();
-          },
-          [&]() {
-            // offset for null value
-            offset_builder.UnsafeAppend(
-                static_cast<offset_type>(value_data_builder.length()));
-            return Status::OK();
-          }));
-      ArrayData* output = out->array_data().get();
-      RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2]));
-      RETURN_NOT_OK(offset_builder.Finish(&output->buffers[1]));
-    } else {
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar);
-      auto result = std::make_shared<ScalarType>();
-      if (input.is_valid) {
-        util::string_view s = static_cast<util::string_view>(*input.value);
-        RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
-        RETURN_NOT_OK(value_data_builder.Finish(&result->value));
-        result->is_valid = true;
-      }
-      out->value = result;
-    }
+    // We already know how many strings we have, so we can use Reserve/UnsafeAppend
+    RETURN_NOT_OK(offset_builder.Reserve(batch.length + 1));
+    offset_builder.UnsafeAppend(0);  // offsets start at 0
 
-    return Status::OK();
+    RETURN_NOT_OK(VisitArraySpanInline<Type>(
+        batch[0].array,
+        [&](util::string_view s) {
+          RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+          offset_builder.UnsafeAppend(
+              static_cast<offset_type>(value_data_builder.length()));
+          return Status::OK();
+        },
+        [&]() {
+          // offset for null value
+          offset_builder.UnsafeAppend(
+              static_cast<offset_type>(value_data_builder.length()));
+          return Status::OK();
+        }));
+    ArrayData* output = out->array_data().get();
+    RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2]));
+    return offset_builder.Finish(&output->buffers[1]);
   }
 };
 
@@ -2305,19 +2174,20 @@ struct ExtractRegexData {
     return std::move(data);
   }
 
-  Result<ValueDescr> ResolveOutputType(const std::vector<ValueDescr>& args) const {
-    const auto& input_type = args[0].type;
+  Result<TypeHolder> ResolveOutputType(const std::vector<TypeHolder>& types) const {
+    const DataType* input_type = types[0].type;
     if (input_type == nullptr) {
-      // No input type specified => propagate shape
-      return args[0];
+      // No input type specified
+      return nullptr;
     }
     // Input type is either [Large]Binary or [Large]String and is also the type
     // of each field in the output struct type.
     DCHECK(is_base_binary_like(input_type->id()));
     FieldVector fields;
     fields.reserve(group_names.size());
+    std::shared_ptr<DataType> owned_type = input_type->GetSharedPtr();
     std::transform(group_names.begin(), group_names.end(), std::back_inserter(fields),
-                   [&](const std::string& name) { return field(name, input_type); });
+                   [&](const std::string& name) { return field(name, owned_type); });
     return struct_(std::move(fields));
   }
 
@@ -2326,11 +2196,11 @@ struct ExtractRegexData {
       : regex(new RE2(pattern, MakeRE2Options(is_utf8))) {}
 };
 
-Result<ValueDescr> ResolveExtractRegexOutput(KernelContext* ctx,
-                                             const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveExtractRegexOutput(KernelContext* ctx,
+                                             const std::vector<TypeHolder>& types) {
   ExtractRegexOptions options = ExtractRegexState::Get(ctx);
   ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
-  return data.ResolveOutputType(args);
+  return data.ResolveOutputType(types);
 }
 
 struct ExtractRegexBase {
@@ -2380,54 +2250,37 @@ struct ExtractRegex : public ExtractRegexBase {
   Status Extract(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     // TODO: why is this needed? Type resolution should already be
     // done and the output type set in the output variable
-    ARROW_ASSIGN_OR_RAISE(auto descr, data.ResolveOutputType(batch.GetDescriptors()));
-    DCHECK_NE(descr.type, nullptr);
-    const auto& type = descr.type;
-
-    if (batch[0].is_array()) {
-      std::unique_ptr<ArrayBuilder> array_builder;
-      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder));
-      StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
-
-      std::vector<BuilderType*> field_builders;
-      field_builders.reserve(group_count);
-      for (int i = 0; i < group_count; i++) {
-        field_builders.push_back(
-            checked_cast<BuilderType*>(struct_builder->field_builder(i)));
-      }
+    ARROW_ASSIGN_OR_RAISE(TypeHolder out_type, data.ResolveOutputType(batch.GetTypes()));
+    DCHECK_NE(out_type.type, nullptr);
+    std::shared_ptr<DataType> type = out_type.GetSharedPtr();
 
-      auto visit_null = [&]() { return struct_builder->AppendNull(); };
-      auto visit_value = [&](util::string_view s) {
-        if (Match(s)) {
-          for (int i = 0; i < group_count; i++) {
-            RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i])));
-          }
-          return struct_builder->Append();
-        } else {
-          return struct_builder->AppendNull();
-        }
-      };
-      RETURN_NOT_OK(VisitArraySpanInline<Type>(batch[0].array, visit_value, visit_null));
+    std::unique_ptr<ArrayBuilder> array_builder;
+    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder));
+    StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
 
-      std::shared_ptr<Array> out_array;
-      RETURN_NOT_OK(struct_builder->Finish(&out_array));
-      out->value = std::move(out_array->data());
-    } else {
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar);
-      auto result = std::make_shared<StructScalar>(type);
-      if (input.is_valid && Match(util::string_view(*input.value))) {
-        result->value.reserve(group_count);
+    std::vector<BuilderType*> field_builders;
+    field_builders.reserve(group_count);
+    for (int i = 0; i < group_count; i++) {
+      field_builders.push_back(
+          checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+    }
+
+    auto visit_null = [&]() { return struct_builder->AppendNull(); };
+    auto visit_value = [&](util::string_view s) {
+      if (Match(s)) {
         for (int i = 0; i < group_count; i++) {
-          result->value.push_back(std::make_shared<ScalarType>(
-              Buffer::FromString(found_values[i].as_string())));
+          RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i])));
         }
-        result->is_valid = true;
+        return struct_builder->Append();
       } else {
-        result->is_valid = false;
+        return struct_builder->AppendNull();
       }
-      out->value = std::move(result);
-    }
+    };
+    RETURN_NOT_OK(VisitArraySpanInline<Type>(batch[0].array, visit_value, visit_null));
 
+    std::shared_ptr<Array> out_array;
+    RETURN_NOT_OK(struct_builder->Finish(&out_array));
+    out->value = std::move(out_array->data());
     return Status::OK();
   }
 };
@@ -2783,9 +2636,6 @@ struct BinaryJoin {
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_scalar()) {
-      if (batch[1].is_scalar()) {
-        return ExecScalarScalar(ctx, *batch[0].scalar, *batch[1].scalar, out);
-      }
       DCHECK(batch[1].is_array());
       return ExecScalarArray(ctx, *batch[0].scalar, batch[1].array, out);
     }
@@ -2832,55 +2682,22 @@ struct BinaryJoin {
     util::string_view GetView(int64_t i) { return separators.GetView(i); }
   };
 
-  // Scalar, scalar -> scalar
-  static Status ExecScalarScalar(KernelContext* ctx, const Scalar& left,
-                                 const Scalar& right, ExecResult* out) {
-    const auto& list = checked_cast<const ListScalarType&>(left);
-    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
-    if (!list.is_valid || !separator_scalar.is_valid) {
-      return Status::OK();
-    }
-    util::string_view separator(*separator_scalar.value);
-
-    const auto& strings = checked_cast<const ArrayType&>(*list.value);
-    if (strings.null_count() > 0) {
-      out->scalar()->is_valid = false;
-      return Status::OK();
-    }
-
-    TypedBufferBuilder<uint8_t> builder(ctx->memory_pool());
-    auto Append = [&](util::string_view value) {
-      return builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
-                            static_cast<int64_t>(value.size()));
-    };
-    if (strings.length() > 0) {
-      auto data_length =
-          strings.total_values_length() + (strings.length() - 1) * separator.length();
-      RETURN_NOT_OK(builder.Reserve(data_length));
-      RETURN_NOT_OK(Append(strings.GetView(0)));
-      for (int64_t j = 1; j < strings.length(); j++) {
-        RETURN_NOT_OK(Append(separator));
-        RETURN_NOT_OK(Append(strings.GetView(j)));
-      }
-    }
-    auto out_scalar = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    return builder.Finish(&out_scalar->value);
-  }
-
   // Scalar, array -> array
   static Status ExecScalarArray(KernelContext* ctx, const Scalar& left,
                                 const ArraySpan& right, ExecResult* out) {
     const auto& list_scalar = checked_cast<const BaseListScalar&>(left);
     if (!list_scalar.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(auto nulls, MakeArrayOfNull(right.type->Copy(), right.length,
-                                                        ctx->memory_pool()));
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls,
+          MakeArrayOfNull(right.type->GetSharedPtr(), right.length, ctx->memory_pool()));
       out->value = std::move(nulls->data());
       return Status::OK();
     }
     const auto& strings = checked_cast<const ArrayType&>(*list_scalar.value);
     if (strings.null_count() != 0) {
-      ARROW_ASSIGN_OR_RAISE(auto nulls, MakeArrayOfNull(right.type->Copy(), right.length,
-                                                        ctx->memory_pool()));
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls,
+          MakeArrayOfNull(right.type->GetSharedPtr(), right.length, ctx->memory_pool()));
       out->value = std::move(nulls->data());
       return Status::OK();
     }
@@ -3041,68 +2858,6 @@ struct BinaryJoinElementWise {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     JoinOptions options = BinaryJoinElementWiseState::Get(ctx);
     // Last argument is the separator (for consistency with binary_join)
-    // TODO(wesm): eliminate this scalar output modality altogether to
-    // simplify implementation
-    if (std::all_of(batch.values.begin(), batch.values.end(),
-                    [](const ExecValue& d) { return d.is_scalar(); })) {
-      return ExecOnlyScalar(ctx, options, batch, out);
-    }
-    return ExecContainingArrays(ctx, options, batch, out);
-  }
-
-  static Status ExecOnlyScalar(KernelContext* ctx, const JoinOptions& options,
-                               const ExecSpan& batch, ExecResult* out) {
-    BaseBinaryScalar* output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    const int num_args = batch.num_values();
-    if (num_args == 1) {
-      // Only separator, no values
-      output->is_valid = batch[0].scalar->is_valid;
-      if (output->is_valid) {
-        ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
-      }
-      return Status::OK();
-    }
-
-    int64_t final_size = CalculateRowSize(options, batch, 0);
-    if (final_size < 0) {
-      output->is_valid = false;
-      return Status::OK();
-    }
-    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(final_size));
-    const auto separator = UnboxScalar<Type>::Unbox(*batch.values.back().scalar);
-    uint8_t* buf = output->value->mutable_data();
-    bool first = true;
-    for (int i = 0; i < num_args - 1; i++) {
-      const Scalar& scalar = *batch[i].scalar;
-      util::string_view s;
-      if (scalar.is_valid) {
-        s = UnboxScalar<Type>::Unbox(scalar);
-      } else {
-        switch (options.null_handling) {
-          case JoinOptions::EMIT_NULL:
-            // Handled by CalculateRowSize
-            DCHECK(false) << "unreachable";
-            break;
-          case JoinOptions::SKIP:
-            continue;
-          case JoinOptions::REPLACE:
-            s = options.null_replacement;
-            break;
-        }
-      }
-      if (!first) {
-        buf = std::copy(separator.begin(), separator.end(), buf);
-      }
-      first = false;
-      buf = std::copy(s.begin(), s.end(), buf);
-    }
-    output->is_valid = true;
-    DCHECK_EQ(final_size, buf - output->value->mutable_data());
-    return Status::OK();
-  }
-
-  static Status ExecContainingArrays(KernelContext* ctx, const JoinOptions& options,
-                                     const ExecSpan& batch, ExecResult* out) {
     // Presize data to avoid reallocations
     int64_t final_size = 0;
     for (int64_t i = 0; i < batch.length; i++) {
@@ -3185,7 +2940,7 @@ struct BinaryJoinElementWise {
     std::shared_ptr<Array> string_array;
     RETURN_NOT_OK(builder.Finish(&string_array));
     out->value = std::move(string_array->data());
-    out->array_data()->type = batch[0].type()->Copy();
+    out->array_data()->type = batch[0].type()->GetSharedPtr();
     DCHECK_EQ(batch.length, out->array_data()->length);
     DCHECK_EQ(final_size,
               checked_cast<const ArrayType&>(*string_array).total_values_length());
@@ -3300,22 +3055,22 @@ void AddAsciiStringJoin(FunctionRegistry* registry) {
 struct ScalarCTypeToInt64Function : public ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    for (auto& descr : *values) {
-      if (is_integer(descr.type->id())) {
-        descr.type = int64();
+    for (auto it = types->begin(); it < types->end(); ++it) {
+      if (is_integer(it->id())) {
+        *it = int64();
       }
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -3326,12 +3081,6 @@ struct BinaryRepeatTransform : public StringBinaryTransformBase<Type1, Type2> {
   using offset_type = typename ArrayType1::offset_type;
   using repeat_type = typename Type2::c_type;
 
-  Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
-                               const int64_t num_repeats) override {
-    ARROW_RETURN_NOT_OK(ValidateRepeatCount(num_repeats));
-    return input1_ncodeunits * num_repeats;
-  }
-
   Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
                                const ArraySpan& input2) override {
     int64_t total_num_repeats = 0;
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_internal.h b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
index 635aacf671d..32731414e08 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
@@ -48,6 +48,12 @@ struct StringTransformBase {
   }
 };
 
+template <typename offset_type>
+static int64_t GetVarBinaryValuesLength(const ArraySpan& span) {
+  const offset_type* offsets = span.GetValues<offset_type>(1);
+  return span.length > 0 ? offsets[span.length] - offsets[0] : 0;
+}
+
 /// Kernel exec generator for unary string transforms. Types of template
 /// parameter StringTransform need to define a transform method with the
 /// following signature:
@@ -69,22 +75,13 @@ struct StringTransformExecBase {
 
   static Status Execute(KernelContext* ctx, StringTransform* transform,
                         const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ExecArray(ctx, transform, batch[0].array, out);
-    }
-    DCHECK(batch[0].is_scalar());
-    // TODO: change to execute with array of length 1
-    return ExecScalar(ctx, transform, batch[0].scalar, out);
-  }
+    const ArraySpan& input = batch[0].array;
+    auto offsets = input.GetValues<offset_type>(1);
+    const uint8_t* input_data = input.buffers[2].data;
 
-  static Status ExecArray(KernelContext* ctx, StringTransform* transform,
-                          const ArraySpan& data, ExecResult* out) {
-    // TODO(wesm): reimplement this to not use the array box type
-    ArrayType input(data.ToArrayData());
-    const int64_t input_ncodeunits = input.total_values_length();
-    const int64_t input_nstrings = input.length();
+    const int64_t input_ncodeunits = GetVarBinaryValuesLength<offset_type>(input);
     const int64_t max_output_ncodeunits =
-        transform->MaxCodeunits(input_nstrings, input_ncodeunits);
+        transform->MaxCodeunits(input.length, input_ncodeunits);
     RETURN_NOT_OK(CheckOutputCapacity(max_output_ncodeunits));
 
     ArrayData* output = out->array_data().get();
@@ -96,10 +93,10 @@ struct StringTransformExecBase {
     uint8_t* output_str = output->buffers[2]->mutable_data();
     offset_type output_ncodeunits = 0;
     output_string_offsets[0] = output_ncodeunits;
-    for (int64_t i = 0; i < input_nstrings; i++) {
+    for (int64_t i = 0; i < input.length; i++) {
       if (!input.IsNull(i)) {
-        offset_type input_string_ncodeunits;
-        const uint8_t* input_string = input.GetValue(i, &input_string_ncodeunits);
+        const uint8_t* input_string = input_data + offsets[i];
+        offset_type input_string_ncodeunits = offsets[i + 1] - offsets[i];
         auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
             input_string, input_string_ncodeunits, output_str + output_ncodeunits));
         if (encoded_nbytes < 0) {
@@ -115,29 +112,6 @@ struct StringTransformExecBase {
     return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true);
   }
 
-  static Status ExecScalar(KernelContext* ctx, StringTransform* transform,
-                           const Scalar* scalar, ExecResult* out) {
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar);
-    if (!input.is_valid) {
-      return Status::OK();
-    }
-    const int64_t data_nbytes = static_cast<int64_t>(input.value->size());
-    const int64_t max_output_ncodeunits = transform->MaxCodeunits(1, data_nbytes);
-    RETURN_NOT_OK(CheckOutputCapacity(max_output_ncodeunits));
-
-    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(max_output_ncodeunits));
-    auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    result->is_valid = true;
-    result->value = value_buffer;
-    auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
-        input.value->data(), data_nbytes, value_buffer->mutable_data()));
-    if (encoded_nbytes < 0) {
-      return transform->InvalidInputSequence();
-    }
-    DCHECK_LE(encoded_nbytes, max_output_ncodeunits);
-    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
-  }
-
   static Status CheckOutputCapacity(int64_t ncodeunits) {
     if (ncodeunits > std::numeric_limits<offset_type>::max()) {
       return Status::CapacityError(
@@ -245,27 +219,15 @@ struct StringPredicateFunctor {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     Status st = Status::OK();
     EnsureUtf8LookupTablesFilled();
-    if (batch[0].is_array()) {
-      const ArraySpan& input = batch[0].array;
-      ArrayIterator<Type> input_it(input);
-      ArraySpan* out_arr = out->array_span();
-      ::arrow::internal::GenerateBitsUnrolled(
-          out_arr->buffers[1].data, out_arr->offset, input.length, [&]() -> bool {
-            util::string_view val = input_it();
-            return Predicate::Call(ctx, reinterpret_cast<const uint8_t*>(val.data()),
-                                   val.size(), &st);
-          });
-    } else {
-      const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar);
-      if (input.is_valid) {
-        bool boolean_result = Predicate::Call(
-            ctx, input.value->data(), static_cast<size_t>(input.value->size()), &st);
-        // UTF decoding can lead to issues
-        if (st.ok()) {
-          out->value = std::make_shared<BooleanScalar>(boolean_result);
-        }
-      }
-    }
+    const ArraySpan& input = batch[0].array;
+    ArrayIterator<Type> input_it(input);
+    ArraySpan* out_arr = out->array_span();
+    ::arrow::internal::GenerateBitsUnrolled(
+        out_arr->buffers[1].data, out_arr->offset, input.length, [&]() -> bool {
+          util::string_view val = input_it();
+          return Predicate::Call(ctx, reinterpret_cast<const uint8_t*>(val.data()),
+                                 val.size(), &st);
+        });
     return st;
   }
 };
@@ -357,17 +319,8 @@ struct StringSplitExec {
   Status Execute(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     SplitFinder finder;
     RETURN_NOT_OK(finder.PreExec(options));
-    if (batch[0].is_array()) {
-      return Execute(ctx, &finder, batch[0].array, out);
-    } else {
-      return Execute(ctx, &finder, batch[0].scalar, out);
-    }
-  }
-
-  Status Execute(KernelContext* ctx, SplitFinder* finder, const ArraySpan& data,
-                 ExecResult* out) {
     // TODO(wesm): refactor to not require creating ArrayData
-    const ArrayType input(data.ToArrayData());
+    const ArrayType input(batch[0].array.ToArrayData());
 
     BuilderType builder(input.type(), ctx->memory_pool());
     // A slight overestimate of the data needed
@@ -383,7 +336,7 @@ struct StringSplitExec {
     *list_offsets++ = 0;
     for (int64_t i = 0; i < input.length(); ++i) {
       if (!input.IsNull(i)) {
-        RETURN_NOT_OK(SplitString(input.GetView(i), finder, &builder));
+        RETURN_NOT_OK(SplitString(input.GetView(i), &finder, &builder));
         if (ARROW_PREDICT_FALSE(builder.length() >
                                 std::numeric_limits<list_offset_type>::max())) {
           return Status::CapacityError("List offset does not fit into 32 bit");
@@ -398,20 +351,6 @@ struct StringSplitExec {
     return Status::OK();
   }
 
-  Status Execute(KernelContext* ctx, SplitFinder* finder, const Scalar* scalar,
-                 ExecResult* out) {
-    const auto& input = checked_cast<const ScalarType&>(*scalar);
-    auto result = checked_cast<ListScalarType*>(out->scalar().get());
-    if (input.is_valid) {
-      result->is_valid = true;
-      BuilderType builder(input.type, ctx->memory_pool());
-      util::string_view s(*input.value);
-      RETURN_NOT_OK(SplitString(s, finder, &builder));
-      RETURN_NOT_OK(builder.Finish(&result->value));
-    }
-    return Status::OK();
-  }
-
   Status SplitString(const util::string_view& s, SplitFinder* finder,
                      BuilderType* builder) {
     const uint8_t* begin = reinterpret_cast<const uint8_t*>(s.data());
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
index 434448c6978..02585ed34ac 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
@@ -605,15 +605,8 @@ struct Utf8NormalizeExec : public Utf8NormalizeBase {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const auto& options = State::Get(ctx);
     Utf8NormalizeExec exec{options};
-    if (batch[0].is_array()) {
-      return exec.ExecArray(ctx, batch[0].array, out);
-    } else {
-      DCHECK(batch[0].is_scalar());
-      return exec.ExecScalar(ctx, *batch[0].scalar, out);
-    }
-  }
 
-  Status ExecArray(KernelContext* ctx, const ArraySpan& array, ExecResult* out) {
+    const ArraySpan& array = batch[0].array;
     BufferBuilder data_builder(ctx->memory_pool());
 
     const offset_type* in_offsets = array.GetValues<offset_type>(1);
@@ -631,7 +624,7 @@ struct Utf8NormalizeExec : public Utf8NormalizeBase {
     RETURN_NOT_OK(VisitArraySpanInline<Type>(
         array,
         [&](util::string_view v) {
-          ARROW_ASSIGN_OR_RAISE(auto n_bytes, Decompose(v, &data_builder));
+          ARROW_ASSIGN_OR_RAISE(auto n_bytes, exec.Decompose(v, &data_builder));
           offset += n_bytes;
           *out_offsets++ = static_cast<offset_type>(offset);
           return Status::OK();
@@ -643,19 +636,6 @@ struct Utf8NormalizeExec : public Utf8NormalizeBase {
 
     return data_builder.Finish(&output->buffers[2]);
   }
-
-  Status ExecScalar(KernelContext* ctx, const Scalar& scalar, ExecResult* out) {
-    if (scalar.is_valid) {
-      const auto& string_scalar = checked_cast<const ScalarType&>(scalar);
-      auto* out_scalar = checked_cast<ScalarType*>(out->scalar().get());
-
-      BufferBuilder data_builder(ctx->memory_pool());
-      RETURN_NOT_OK(Decompose(string_scalar.view(), &data_builder));
-      RETURN_NOT_OK(data_builder.Finish(&out_scalar->value));
-      out_scalar->is_valid = true;
-    }
-    return Status::OK();
-  }
 };
 
 const FunctionDoc utf8_normalize_doc(
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index f8da1338b5a..212f9bdad7f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -65,7 +65,6 @@ using arrow_vendored::date::literals::mon;
 using arrow_vendored::date::literals::sun;
 using arrow_vendored::date::literals::thu;
 using arrow_vendored::date::literals::wed;
-using internal::applicator::SimpleUnary;
 using std::chrono::duration_cast;
 using std::chrono::hours;
 using std::chrono::minutes;
@@ -332,23 +331,8 @@ struct YearMonthDayVisitValueFunction<Duration, TimestampType, BuilderType> {
 
 template <typename Duration, typename InType>
 struct YearMonthDay {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    if (in.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(auto year_month_day,
-                            (YearMonthDayWrapper<Duration, InType>::Get(in)));
-      ScalarVector values = {std::make_shared<Int64Scalar>(year_month_day[0]),
-                             std::make_shared<Int64Scalar>(year_month_day[1]),
-                             std::make_shared<Int64Scalar>(year_month_day[2])};
-      *checked_cast<StructScalar*>(out_scalar) =
-          StructScalar(std::move(values), YearMonthDayType());
-    } else {
-      out_scalar->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
 
     std::unique_ptr<ArrayBuilder> array_builder;
@@ -1190,23 +1174,8 @@ struct Strftime {
     return Strftime{options, tz, std::move(locale)};
   }
 
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
-    TimestampFormatter<Duration> formatter{self.options.format, self.tz, self.locale};
-
-    Scalar* output = out->scalar().get();
-    if (in.is_valid) {
-      const int64_t in_val = internal::UnboxScalar<const InType>::Unbox(in);
-      ARROW_ASSIGN_OR_RAISE(auto formatted, formatter(in_val));
-      checked_cast<StringScalar*>(output)->value =
-          Buffer::FromString(std::move(formatted));
-    } else {
-      output->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
     TimestampFormatter<Duration> formatter{self.options.format, self.tz, self.locale};
 
@@ -1273,31 +1242,8 @@ struct Strptime {
                     options.error_is_null};
   }
 
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
-
-    Scalar* output = out->scalar().get();
-    if (in.is_valid) {
-      auto s = internal::UnboxScalar<InType>::Unbox(in);
-      int64_t result;
-      if ((*self.parser)(s.data(), s.size(), self.unit, &result)) {
-        *checked_cast<TimestampScalar*>(output) =
-            TimestampScalar(result, timestamp(self.unit, self.zone));
-      } else {
-        if (self.error_is_null) {
-          output->is_valid = false;
-        } else {
-          return Status::Invalid("Failed to parse string: '", s, "' as a scalar of type ",
-                                 TimestampType(self.unit).ToString());
-        }
-      }
-    } else {
-      output->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
 
     ArraySpan* out_span = out->array_span();
@@ -1360,25 +1306,23 @@ struct Strptime {
   }
 };
 
-Result<ValueDescr> ResolveStrptimeOutput(KernelContext* ctx,
-                                         const std::vector<ValueDescr>&) {
+Result<TypeHolder> ResolveStrptimeOutput(KernelContext* ctx,
+                                         const std::vector<TypeHolder>&) {
   if (!ctx->state()) {
     return Status::Invalid("strptime does not provide default StrptimeOptions");
   }
   const StrptimeOptions& options = StrptimeState::Get(ctx);
-  auto type = timestamp(options.unit, GetZone(options.format));
-  return ValueDescr(std::move(type));
+  return timestamp(options.unit, GetZone(options.format));
 }
 
 // ----------------------------------------------------------------------
 // Convert timestamps from local timestamp without a timezone to timestamps with a
 // timezone, interpreting the local timestamp as being in the specified timezone
 
-Result<ValueDescr> ResolveAssumeTimezoneOutput(KernelContext* ctx,
-                                               const std::vector<ValueDescr>& args) {
-  auto in_type = checked_cast<const TimestampType*>(args[0].type.get());
-  auto type = timestamp(in_type->unit(), AssumeTimezoneState::Get(ctx).timezone);
-  return ValueDescr(std::move(type));
+Result<TypeHolder> ResolveAssumeTimezoneOutput(KernelContext* ctx,
+                                               const std::vector<TypeHolder>& args) {
+  const auto& in_type = checked_cast<const TimestampType&>(*args[0]);
+  return timestamp(in_type.unit(), AssumeTimezoneState::Get(ctx).timezone);
 }
 
 template <typename Duration>
@@ -1528,23 +1472,8 @@ struct ISOCalendarVisitValueFunction<Duration, TimestampType, BuilderType> {
 
 template <typename Duration, typename InType>
 struct ISOCalendar {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* output = out->scalar().get();
-    if (in.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(auto iso_calendar,
-                            (ISOCalendarWrapper<Duration, InType>::Get(in)));
-      ScalarVector values = {std::make_shared<Int64Scalar>(iso_calendar[0]),
-                             std::make_shared<Int64Scalar>(iso_calendar[1]),
-                             std::make_shared<Int64Scalar>(iso_calendar[2])};
-      *checked_cast<StructScalar*>(output) =
-          StructScalar(std::move(values), IsoCalendarType());
-    } else {
-      output->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
 
     std::unique_ptr<ArrayBuilder> array_builder;
@@ -1630,8 +1559,7 @@ struct SimpleUnaryTemporalFactory {
 
   template <typename Duration, typename InType>
   void AddKernel(InputType in_type) {
-    auto exec = SimpleUnary<Op<Duration, InType>>;
-    ScalarKernel kernel({std::move(in_type)}, out_type, std::move(exec), init);
+    ScalarKernel kernel({std::move(in_type)}, out_type, Op<Duration, InType>::Exec, init);
     kernel.null_handling = this->null_handling;
     DCHECK_OK(func->AddKernel(kernel));
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index 1685718b65e..32f9b4ef9ba 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -32,36 +32,29 @@ namespace compute {
 namespace internal {
 namespace {
 
-struct IsValidOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* output = out->scalar().get();
-    checked_cast<BooleanScalar*>(output)->value = in.is_valid;
+Status IsValidExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_span = out->array_span();
+  if (arr.type->id() == Type::NA) {
+    // Input is all nulls => output is entirely false.
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        false);
     return Status::OK();
   }
 
-  static Status Call(KernelContext* ctx, const ArraySpan& arr, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-    if (arr.type->id() == Type::NA) {
-      // Input is all nulls => output is entirely false.
-      bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
-                          false);
-      return Status::OK();
-    }
-
-    DCHECK_EQ(out_span->offset, 0);
-    DCHECK_LE(out_span->length, arr.length);
-    if (arr.MayHaveNulls()) {
-      // We could do a zero-copy optimization, but it isn't worth the added complexity
-      ::arrow::internal::CopyBitmap(arr.buffers[0].data, arr.offset, arr.length,
-                                    out_span->buffers[1].data, out_span->offset);
-    } else {
-      // Input has no nulls => output is entirely true.
-      bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
-                          true);
-    }
-    return Status::OK();
+  DCHECK_EQ(out_span->offset, 0);
+  DCHECK_LE(out_span->length, arr.length);
+  if (arr.MayHaveNulls()) {
+    // We could do a zero-copy optimization, but it isn't worth the added complexity
+    ::arrow::internal::CopyBitmap(arr.buffers[0].data, arr.offset, arr.length,
+                                  out_span->buffers[1].data, out_span->offset);
+  } else {
+    // Input has no nulls => output is entirely true.
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        true);
   }
-};
+  return Status::OK();
+}
 
 struct IsFiniteOperator {
   template <typename OutType, typename InType>
@@ -79,102 +72,51 @@ struct IsInfOperator {
 
 using NanOptionsState = OptionsWrapper<NullOptions>;
 
-struct IsNullOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* output = out->scalar().get();
-
-    const auto& options = NanOptionsState::Get(ctx);
-    bool* out_value = &checked_cast<BooleanScalar*>(output)->value;
-
-    if (in.is_valid) {
-      if (options.nan_is_null && is_floating(in.type->id())) {
-        switch (in.type->id()) {
-          case Type::FLOAT:
-            *out_value = std::isnan(internal::UnboxScalar<FloatType>::Unbox(in));
-            break;
-          case Type::DOUBLE:
-            *out_value = std::isnan(internal::UnboxScalar<DoubleType>::Unbox(in));
-            break;
-          default:
-            return Status::NotImplemented("NaN detection not implemented for type ",
-                                          in.type->ToString());
-        }
-      } else {
-        *out_value = false;
-      }
-    } else {
-      *out_value = true;
-    }
-
-    return Status::OK();
-  }
-
-  template <typename T>
-  static void SetNanBits(const ArraySpan& arr, uint8_t* out_bitmap, int64_t out_offset) {
-    const T* data = arr.GetValues<T>(1);
-    for (int64_t i = 0; i < arr.length; ++i) {
-      if (std::isnan(data[i])) {
-        bit_util::SetBit(out_bitmap, i + out_offset);
-      }
+template <typename T>
+static void SetNanBits(const ArraySpan& arr, uint8_t* out_bitmap, int64_t out_offset) {
+  const T* data = arr.GetValues<T>(1);
+  for (int64_t i = 0; i < arr.length; ++i) {
+    if (std::isnan(data[i])) {
+      bit_util::SetBit(out_bitmap, i + out_offset);
     }
   }
+}
 
-  static Status Call(KernelContext* ctx, const ArraySpan& arr, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-
-    const auto& options = NanOptionsState::Get(ctx);
-    uint8_t* out_bitmap = out_span->buffers[1].data;
-    if (arr.GetNullCount() > 0) {
-      // Input has nulls => output is the inverted null (validity) bitmap.
-      InvertBitmap(arr.buffers[0].data, arr.offset, arr.length, out_bitmap,
-                   out_span->offset);
-    } else {
-      // Input has no nulls => output is entirely false.
-      bit_util::SetBitsTo(out_bitmap, out_span->offset, out_span->length, false);
-    }
-
-    if (is_floating(arr.type->id()) && options.nan_is_null) {
-      switch (arr.type->id()) {
-        case Type::FLOAT:
-          SetNanBits<float>(arr, out_bitmap, out_span->offset);
-          break;
-        case Type::DOUBLE:
-          SetNanBits<double>(arr, out_bitmap, out_span->offset);
-          break;
-        default:
-          return Status::NotImplemented("NaN detection not implemented for type ",
-                                        arr.type->ToString());
-      }
-    }
+Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_span = out->array_span();
+  if (arr.type->id() == Type::NA) {
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        true);
     return Status::OK();
   }
-};
 
-struct TrueUnlessNullOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    BooleanScalar* output = checked_cast<BooleanScalar*>(out->scalar().get());
-    output->is_valid = in.is_valid;
-    output->value = true;
-    return Status::OK();
+  const auto& options = NanOptionsState::Get(ctx);
+  uint8_t* out_bitmap = out_span->buffers[1].data;
+  if (arr.GetNullCount() > 0) {
+    // Input has nulls => output is the inverted null (validity) bitmap.
+    InvertBitmap(arr.buffers[0].data, arr.offset, arr.length, out_bitmap,
+                 out_span->offset);
+  } else {
+    // Input has no nulls => output is entirely false.
+    bit_util::SetBitsTo(out_bitmap, out_span->offset, out_span->length, false);
   }
 
-  static Status Call(KernelContext* ctx, const ArraySpan& arr, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-    if (out_span->buffers[0].data) {
-      // If there is a validity bitmap computed above the kernel
-      // invocation, we copy it to the output buffers
-      ::arrow::internal::CopyBitmap(out_span->buffers[0].data, out_span->offset,
-                                    out_span->length, out_span->buffers[1].data,
-                                    out_span->offset);
-    } else {
-      // But for all-valid inputs, the engine will skip allocating a
-      // validity bitmap, so we set everything to true
-      bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
-                          true);
+  if (is_floating(arr.type->id()) && options.nan_is_null) {
+    switch (arr.type->id()) {
+      case Type::FLOAT:
+        SetNanBits<float>(arr, out_bitmap, out_span->offset);
+        break;
+      case Type::DOUBLE:
+        SetNanBits<double>(arr, out_bitmap, out_span->offset);
+        break;
+      default:
+        return Status::NotImplemented("NaN detection not implemented for type ",
+                                      arr.type->ToString());
     }
-    return Status::OK();
   }
-};
+  return Status::OK();
+}
 
 struct IsNanOperator {
   template <typename OutType, typename InType>
@@ -208,10 +150,6 @@ void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction*
 
 template <bool kConstant>
 Status ConstBoolExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  if (batch[0].is_scalar()) {
-    checked_cast<BooleanScalar*>(out->scalar().get())->value = kConstant;
-    return Status::OK();
-  }
   ArraySpan* array = out->array_span();
   bit_util::SetBitsTo(array->buffers[1].data, array->offset, array->length, kConstant);
   return Status::OK();
@@ -271,29 +209,21 @@ std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name, FunctionDoc
   return func;
 }
 
-Status IsValidExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
-}
-
-Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  const ExecValue& arg0 = batch[0];
-  if (arg0.type()->id() == Type::NA) {
-    if (arg0.is_scalar()) {
-      out->value = std::make_shared<BooleanScalar>(true);
-    } else {
-      // Data is preallocated
-      ArraySpan* out_arr = out->array_span();
-      bit_util::SetBitsTo(out_arr->buffers[1].data, out_arr->offset, out_arr->length,
-                          true);
-    }
-    return Status::OK();
+Status TrueUnlessNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  ArraySpan* out_span = out->array_span();
+  if (out_span->buffers[0].data) {
+    // If there is a validity bitmap computed above the kernel
+    // invocation, we copy it to the output buffers
+    ::arrow::internal::CopyBitmap(out_span->buffers[0].data, out_span->offset,
+                                  out_span->length, out_span->buffers[1].data,
+                                  out_span->offset);
   } else {
-    return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
+    // But for all-valid inputs, the engine will skip allocating a
+    // validity bitmap, so we set everything to true
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        true);
   }
-}
-
-Status TrueUnlessNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  return applicator::SimpleUnary<TrueUnlessNullOperator>(ctx, batch, out);
+  return Status::OK();
 }
 
 const FunctionDoc is_valid_doc(
@@ -331,15 +261,15 @@ const FunctionDoc is_nan_doc("Return true if NaN",
 
 void RegisterScalarValidity(FunctionRegistry* registry) {
   static auto kNullOptions = NullOptions::Defaults();
-  MakeFunction("is_valid", is_valid_doc, {ValueDescr::ANY}, boolean(), IsValidExec,
+  MakeFunction("is_valid", is_valid_doc, {InputType::Any()}, boolean(), IsValidExec,
                registry, NullHandling::OUTPUT_NOT_NULL,
                /*can_write_into_slices=*/false);
 
-  MakeFunction("is_null", is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec, registry,
-               NullHandling::OUTPUT_NOT_NULL,
+  MakeFunction("is_null", is_null_doc, {InputType::Any()}, boolean(), IsNullExec,
+               registry, NullHandling::OUTPUT_NOT_NULL,
                /*can_write_into_slices=*/true, &kNullOptions, NanOptionsState::Init);
 
-  MakeFunction("true_unless_null", true_unless_null_doc, {ValueDescr::ANY}, boolean(),
+  MakeFunction("true_unless_null", true_unless_null_doc, {InputType::Any()}, boolean(),
                TrueUnlessNullExec, registry, NullHandling::INTERSECTION,
                /*can_write_into_slices=*/false);
 
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index 169d8d6935e..177489fa635 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -336,8 +336,8 @@ void ValidateOutput(const Datum& output) {
   }
 }
 
-void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_values,
-                       std::vector<ValueDescr> expected_equivalent_values) {
+void CheckDispatchBest(std::string func_name, std::vector<TypeHolder> original_values,
+                       std::vector<TypeHolder> expected_equivalent_values) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
 
   auto values = original_values;
@@ -347,22 +347,20 @@ void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_v
                        function->DispatchExact(expected_equivalent_values));
 
   EXPECT_EQ(actual_kernel, expected_kernel)
-      << "  DispatchBest" << ValueDescr::ToString(original_values) << " => "
+      << "  DispatchBest" << TypeHolder::ToString(original_values) << " => "
       << actual_kernel->signature->ToString() << "\n"
-      << "  DispatchExact" << ValueDescr::ToString(expected_equivalent_values) << " => "
+      << "  DispatchExact" << TypeHolder::ToString(expected_equivalent_values) << " => "
       << expected_kernel->signature->ToString();
   EXPECT_EQ(values.size(), expected_equivalent_values.size());
   for (size_t i = 0; i < values.size(); i++) {
-    EXPECT_EQ(values[i].shape, expected_equivalent_values[i].shape)
-        << "Argument " << i << " should have the same shape";
-    AssertTypeEqual(values[i].type, expected_equivalent_values[i].type);
+    AssertTypeEqual(*values[i], *expected_equivalent_values[i]);
   }
 }
 
-void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> values) {
+void CheckDispatchFails(std::string func_name, std::vector<TypeHolder> types) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
-  ASSERT_NOT_OK(function->DispatchBest(&values));
-  ASSERT_NOT_OK(function->DispatchExact(values));
+  ASSERT_NOT_OK(function->DispatchBest(&types));
+  ASSERT_NOT_OK(function->DispatchExact(types));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index 81c468958c0..73762a1ac67 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -157,12 +157,12 @@ void TestRandomPrimitiveCTypes() {
 }
 
 // Check that DispatchBest on a given function yields the same Kernel as
-// produced by DispatchExact on another set of ValueDescrs.
-void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> descrs,
-                       std::vector<ValueDescr> exact_descrs);
+// produced by DispatchExact on another set of types
+void CheckDispatchBest(std::string func_name, std::vector<TypeHolder> types,
+                       std::vector<TypeHolder> exact_types);
 
-// Check that function fails to produce a Kernel for the set of ValueDescrs.
-void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> descrs);
+// Check that function fails to produce a Kernel for the set of types
+void CheckDispatchFails(std::string func_name, std::vector<TypeHolder> types);
 
 // Helper to get a default instance of a type, including parameterized types
 template <typename T>
diff --git a/cpp/src/arrow/compute/kernels/util_internal.cc b/cpp/src/arrow/compute/kernels/util_internal.cc
index 25d46d821b4..4293597129b 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/util_internal.cc
@@ -30,70 +30,6 @@ using internal::checked_cast;
 namespace compute {
 namespace internal {
 
-// TODO(wesm): ARROW-16577: this will be unneeded later
-ArrayKernelExec TrivialScalarUnaryAsArraysExec(ArrayKernelExec exec, bool use_array_span,
-                                               NullHandling::type null_handling) {
-  return [=](KernelContext* ctx, const ExecSpan& span, ExecResult* out) -> Status {
-    if (!out->is_scalar()) {
-      return exec(ctx, span, out);
-    }
-
-    if (null_handling == NullHandling::INTERSECTION && !span[0].scalar->is_valid) {
-      out->scalar()->is_valid = false;
-      return Status::OK();
-    }
-
-    ExecSpan span_with_arrays;
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> array_in,
-                          MakeArrayFromScalar(*span[0].scalar, 1));
-    span_with_arrays.length = 1;
-    span_with_arrays.values = {ExecValue(*array_in->data())};
-
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> array_out,
-                          MakeArrayFromScalar(*out->scalar(), 1));
-
-    ExecResult array_result;
-
-    // Send either ArraySpan or ArrayData depending on what modality the kernel
-    // is expecting, which we have to specify manually for now
-    if (!use_array_span) {
-      array_result.value = array_out->data();
-      RETURN_NOT_OK(exec(ctx, span_with_arrays, &array_result));
-      ARROW_ASSIGN_OR_RAISE(out->value,
-                            MakeArray(array_result.array_data())->GetScalar(0));
-    } else {
-      DCHECK(is_fixed_width(out->type()->id()));
-      ArrayData* out_data = array_out->data().get();
-
-      // the null count will be unknown after the kernel executes
-      out_data->null_count = kUnknownNullCount;
-
-      ArraySpan* span = array_result.array_span();
-
-      // TODO(wesm): It isn't safe to write into the memory allocated by
-      // MakeArrayFromScalar because MakeArrayOfNull reuses memory across
-      // buffers. So to be able to write into an ArraySpan we need to allocate
-      // some memory with the same structure as array_out
-      //
-      // Should probably implement a "make empty" array whose buffers are all
-      // safe to modify
-      if (out_data->buffers[0]) {
-        ARROW_ASSIGN_OR_RAISE(out_data->buffers[0],
-                              out_data->buffers[0]->CopySlice(0, 1));
-      }
-      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], out_data->buffers[1]->CopySlice(
-                                                      0, out_data->buffers[1]->size()));
-      span->SetMembers(*out_data);
-      RETURN_NOT_OK(exec(ctx, span_with_arrays, &array_result));
-
-      // XXX(wesm): have to rebox the array after mutating the buffers because
-      // of the cached validity bitmap buffer
-      ARROW_ASSIGN_OR_RAISE(out->value, MakeArray(array_out->data())->GetScalar(0));
-    }
-    return Status::OK();
-  };
-}
-
 ExecValue GetExecValue(const Datum& value) {
   ExecValue result;
   if (value.is_array()) {
diff --git a/cpp/src/arrow/compute/kernels/util_internal.h b/cpp/src/arrow/compute/kernels/util_internal.h
index dba99759eaf..5e283b56180 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/cpp/src/arrow/compute/kernels/util_internal.h
@@ -47,17 +47,6 @@ constexpr Unsigned to_unsigned(T signed_) {
   return static_cast<Unsigned>(signed_);
 }
 
-// Augment a unary ArrayKernelExec which supports only array-like inputs
-// with support for scalar inputs. Scalars will be transformed to 1-long arrays
-// with the scalar's value (or null if the scalar is null) as its only
-// element. This 1-long array will be passed to the original exec, then the
-// only element of the resulting array will be extracted as the output
-// scalar. This could be far more efficient, but instead of optimizing this
-// it'd be better to support scalar inputs "upstream" in original exec.
-ArrayKernelExec TrivialScalarUnaryAsArraysExec(
-    ArrayKernelExec exec, bool use_array_span = true,
-    NullHandling::type null_handling = NullHandling::INTERSECTION);
-
 // Return (min, max) of a numerical array, ignore nulls.
 // For empty array, return the maximal number limit as 'min', and minimal limit as 'max'.
 template <typename T>
diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
index 2eadbe01c4e..324a435441f 100644
--- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
@@ -465,45 +465,44 @@ Status ArraySortIndicesChunked(KernelContext* ctx, const ExecBatch& batch, Datum
 template <template <typename...> class ExecTemplate>
 void AddArraySortingKernels(VectorKernel base, VectorFunction* func) {
   // null type
-  base.signature = KernelSignature::Make({InputType::Array(null())}, uint64());
+  base.signature = KernelSignature::Make({null()}, uint64());
   base.exec = ExecTemplate<UInt64Type, NullType>::Exec;
   DCHECK_OK(func->AddKernel(base));
 
   // bool type
-  base.signature = KernelSignature::Make({InputType::Array(boolean())}, uint64());
+  base.signature = KernelSignature::Make({boolean()}, uint64());
   base.exec = ExecTemplate<UInt64Type, BooleanType>::Exec;
   DCHECK_OK(func->AddKernel(base));
 
   // duration type
-  base.signature = KernelSignature::Make({InputType::Array(Type::DURATION)}, uint64());
+  base.signature = KernelSignature::Make({Type::DURATION}, uint64());
   base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*int64());
   DCHECK_OK(func->AddKernel(base));
 
   for (const auto& ty : NumericTypes()) {
     auto physical_type = GetPhysicalType(ty);
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+    base.signature = KernelSignature::Make({ty}, uint64());
     base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type);
     DCHECK_OK(func->AddKernel(base));
   }
   for (const auto& ty : TemporalTypes()) {
     auto physical_type = GetPhysicalType(ty);
-    base.signature = KernelSignature::Make({InputType::Array(ty->id())}, uint64());
+    base.signature = KernelSignature::Make({ty->id()}, uint64());
     base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type);
     DCHECK_OK(func->AddKernel(base));
   }
   for (const auto id : {Type::DECIMAL128, Type::DECIMAL256}) {
-    base.signature = KernelSignature::Make({InputType::Array(id)}, uint64());
+    base.signature = KernelSignature::Make({id}, uint64());
     base.exec = GenerateDecimal<ExecTemplate, UInt64Type>(id);
     DCHECK_OK(func->AddKernel(base));
   }
   for (const auto& ty : BaseBinaryTypes()) {
     auto physical_type = GetPhysicalType(ty);
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+    base.signature = KernelSignature::Make({ty}, uint64());
     base.exec = GenerateVarBinaryBase<ExecTemplate, UInt64Type>(*physical_type);
     DCHECK_OK(func->AddKernel(base));
   }
-  base.signature =
-      KernelSignature::Make({InputType::Array(Type::FIXED_SIZE_BINARY)}, uint64());
+  base.signature = KernelSignature::Make({Type::FIXED_SIZE_BINARY}, uint64());
   base.exec = ExecTemplate<UInt64Type, FixedSizeBinaryType>::Exec;
   DCHECK_OK(func->AddKernel(base));
 }
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
index 241438c529e..fb221aa9fe6 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
@@ -54,10 +54,10 @@ struct CumulativeOptionsWrapper : public OptionsWrapper<OptionsType> {
     }
 
     // Ensure `start` option matches input type
-    if (!start->type->Equals(args.inputs[0].type)) {
-      ARROW_ASSIGN_OR_RAISE(auto casted_start,
-                            Cast(Datum(start), args.inputs[0].type, CastOptions::Safe(),
-                                 ctx->exec_context()));
+    if (!start->type->Equals(*args.inputs[0])) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto casted_start,
+          Cast(Datum(start), args.inputs[0], CastOptions::Safe(), ctx->exec_context()));
       auto new_options = OptionsType(casted_start.scalar(), options->skip_nulls);
       return ::arrow::internal::make_unique<State>(new_options);
     }
@@ -125,14 +125,7 @@ struct CumulativeKernel {
     accumulator.skip_nulls = options.skip_nulls;
 
     RETURN_NOT_OK(accumulator.builder.Reserve(batch.length));
-
-    if (batch[0].is_array()) {
-      RETURN_NOT_OK(accumulator.Accumulate(batch[0].array));
-    } else {
-      // TODO(wesm): address up-promotion at a higher level per ARROW-16756
-      ArraySpan span(*batch[0].scalar);
-      RETURN_NOT_OK(accumulator.Accumulate(span));
-    }
+    RETURN_NOT_OK(accumulator.Accumulate(batch[0].array));
 
     std::shared_ptr<ArrayData> result;
     RETURN_NOT_OK(accumulator.builder.FinishInternal(&result));
@@ -196,8 +189,7 @@ void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string
     kernel.can_execute_chunkwise = false;
     kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
     kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE;
-    kernel.signature =
-        KernelSignature::Make({InputType::Array(ty)}, OutputType(ValueDescr(ty)));
+    kernel.signature = KernelSignature::Make({ty}, OutputType(ty));
     kernel.exec =
         ArithmeticExecFromOp<CumulativeKernel, Op, ArrayKernelExec, OptionsType>(ty);
     kernel.exec_chunked =
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
index f3fec8870fd..9ec287b537d 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
@@ -66,21 +66,45 @@ TEST(TestCumulativeSum, AllNulls) {
   }
 }
 
-using testing::HasSubstr;
-
-TEST(TestCumulativeSum, ScalarNotSupported) {
-  CumulativeSumOptions options;
-
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("no kernel"),
-      CallFunction("cumulative_sum", {std::make_shared<Int64Scalar>(5)}, &options));
+TEST(TestCumulativeSum, ScalarInput) {
+  CumulativeSumOptions no_start_no_skip;
+  CumulativeSumOptions no_start_do_skip(0, true);
+  CumulativeSumOptions has_start_no_skip(10);
+  CumulativeSumOptions has_start_do_skip(10, true);
 
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("no kernel"),
-      CallFunction("cumulative_sum_checked", {std::make_shared<Int64Scalar>(5)},
-                   &options));
+  for (auto ty : NumericTypes()) {
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[10]"), &no_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[10]"), &no_start_no_skip);
+
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[20]"), &has_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[20]"), &has_start_no_skip);
+
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_no_skip);
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_no_skip);
+
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_do_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_do_skip);
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_do_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_do_skip);
+  }
 }
 
+using testing::HasSubstr;
+
 template <typename ArrowType>
 void CheckCumulativeSumUnsignedOverflow() {
   using CType = typename TypeTraits<ArrowType>::CType;
diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc
index f4b846f1c9e..c8b5173b8d9 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -469,7 +469,7 @@ class DictionaryHashKernel : public HashKernel {
       auto in_dict_array = arr.ToArray();
       ARROW_ASSIGN_OR_RAISE(
           auto tmp, arrow::internal::checked_cast<const DictionaryArray&>(*in_dict_array)
-                        .Transpose(arr.type->Copy(), out_dict, transpose));
+                        .Transpose(arr.type->GetSharedPtr(), out_dict, transpose));
       return indices_kernel_->Append(*tmp->data());
     }
 
@@ -525,7 +525,7 @@ Result<std::unique_ptr<HashKernel>> HashInitImpl(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   using HashKernelType = typename HashKernelTraits<Type, Action>::HashKernel;
   auto result = ::arrow::internal::make_unique<HashKernelType>(
-      args.inputs[0].type, args.options, ctx->memory_pool());
+      args.inputs[0].GetSharedPtr(), args.options, ctx->memory_pool());
   RETURN_NOT_OK(result->Reset());
   return std::move(result);
 }
@@ -698,20 +698,22 @@ Status ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out
   return Status::OK();
 }
 
-ValueDescr DictEncodeOutput(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return ValueDescr::Array(dictionary(int32(), descrs[0].type));
+Result<TypeHolder> DictEncodeOutput(KernelContext*,
+                                    const std::vector<TypeHolder>& types) {
+  return dictionary(int32(), types[0].GetSharedPtr());
 }
 
-ValueDescr ValueCountsOutput(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return ValueDescr::Array(struct_(
-      {field(kValuesFieldName, descrs[0].type), field(kCountsFieldName, int64())}));
+Result<TypeHolder> ValueCountsOutput(KernelContext*,
+                                     const std::vector<TypeHolder>& types) {
+  return struct_({field(kValuesFieldName, types[0].GetSharedPtr()),
+                  field(kCountsFieldName, int64())});
 }
 
 template <typename Action>
 void AddHashKernels(VectorFunction* func, VectorKernel base, OutputType out_ty) {
   for (const auto& ty : PrimitiveTypes()) {
     base.init = GetHashInit<Action>(ty->id());
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, out_ty);
+    base.signature = KernelSignature::Make({ty}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 
@@ -720,19 +722,19 @@ void AddHashKernels(VectorFunction* func, VectorKernel base, OutputType out_ty)
                            timestamp(TimeUnit::SECOND), fixed_size_binary(0)};
   for (const auto& ty : parametric_types) {
     base.init = GetHashInit<Action>(ty->id());
-    base.signature = KernelSignature::Make({InputType::Array(ty->id())}, out_ty);
+    base.signature = KernelSignature::Make({ty->id()}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 
   for (auto t : {Type::DECIMAL128, Type::DECIMAL256}) {
     base.init = GetHashInit<Action>(t);
-    base.signature = KernelSignature::Make({InputType::Array(t)}, out_ty);
+    base.signature = KernelSignature::Make({t}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 
   for (const auto& ty : IntervalTypes()) {
     base.init = GetHashInit<Action>(ty->id());
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, out_ty);
+    base.signature = KernelSignature::Make({ty}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 }
@@ -771,13 +773,12 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   base.finalize = UniqueFinalize;
   base.output_chunked = false;
   auto unique = std::make_shared<VectorFunction>("unique", Arity::Unary(), unique_doc);
-  AddHashKernels<UniqueAction>(unique.get(), base, OutputType(FirstType));
+  AddHashKernels<UniqueAction>(unique.get(), base, FirstType);
 
   // Dictionary unique
   base.init = DictionaryHashInit<UniqueAction>;
   base.finalize = UniqueFinalizeDictionary;
-  base.signature =
-      KernelSignature::Make({InputType::Array(Type::DICTIONARY)}, OutputType(FirstType));
+  base.signature = KernelSignature::Make({Type::DICTIONARY}, FirstType);
   DCHECK_OK(unique->AddKernel(base));
 
   DCHECK_OK(registry->AddFunction(std::move(unique)));
@@ -788,14 +789,12 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   base.finalize = ValueCountsFinalize;
   auto value_counts =
       std::make_shared<VectorFunction>("value_counts", Arity::Unary(), value_counts_doc);
-  AddHashKernels<ValueCountsAction>(value_counts.get(), base,
-                                    OutputType(ValueCountsOutput));
+  AddHashKernels<ValueCountsAction>(value_counts.get(), base, ValueCountsOutput);
 
   // Dictionary value counts
   base.init = DictionaryHashInit<ValueCountsAction>;
   base.finalize = ValueCountsFinalizeDictionary;
-  base.signature = KernelSignature::Make({InputType::Array(Type::DICTIONARY)},
-                                         OutputType(ValueCountsOutput));
+  base.signature = KernelSignature::Make({Type::DICTIONARY}, ValueCountsOutput);
   DCHECK_OK(value_counts->AddKernel(base));
 
   DCHECK_OK(registry->AddFunction(std::move(value_counts)));
@@ -810,7 +809,7 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   auto dict_encode = std::make_shared<VectorFunction>(
       "dictionary_encode", Arity::Unary(), dictionary_encode_doc,
       GetDefaultDictionaryEncodeOptions());
-  AddHashKernels<DictEncodeAction>(dict_encode.get(), base, OutputType(DictEncodeOutput));
+  AddHashKernels<DictEncodeAction>(dict_encode.get(), base, DictEncodeOutput);
 
   // Calling dictionary_encode on dictionary input not supported, but if it
   // ends up being needed (or convenience), a kernel could be added to make it
diff --git a/cpp/src/arrow/compute/kernels/vector_nested.cc b/cpp/src/arrow/compute/kernels/vector_nested.cc
index 1ca96e9f83a..1a17d551ec1 100644
--- a/cpp/src/arrow/compute/kernels/vector_nested.cc
+++ b/cpp/src/arrow/compute/kernels/vector_nested.cc
@@ -158,13 +158,12 @@ class ListParentIndicesFunction : public MetaFunction {
 void RegisterVectorNested(FunctionRegistry* registry) {
   auto flatten =
       std::make_shared<VectorFunction>("list_flatten", Arity::Unary(), list_flatten_doc);
-  DCHECK_OK(flatten->AddKernel({InputType::Array(Type::LIST)}, OutputType(ListValuesType),
+  DCHECK_OK(flatten->AddKernel({Type::LIST}, OutputType(ListValuesType),
                                ListFlatten<ListType>));
-  DCHECK_OK(flatten->AddKernel({InputType::Array(Type::FIXED_SIZE_LIST)},
-                               OutputType(ListValuesType),
+  DCHECK_OK(flatten->AddKernel({Type::FIXED_SIZE_LIST}, OutputType(ListValuesType),
                                ListFlatten<FixedSizeListType>));
-  DCHECK_OK(flatten->AddKernel({InputType::Array(Type::LARGE_LIST)},
-                               OutputType(ListValuesType), ListFlatten<LargeListType>));
+  DCHECK_OK(flatten->AddKernel({Type::LARGE_LIST}, OutputType(ListValuesType),
+                               ListFlatten<LargeListType>));
   DCHECK_OK(registry->AddFunction(std::move(flatten)));
 
   DCHECK_OK(registry->AddFunction(std::make_shared<ListParentIndicesFunction>()));
diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc b/cpp/src/arrow/compute/kernels/vector_replace.cc
index 151757884ba..25c6e5947d9 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace.cc
@@ -126,7 +126,7 @@ struct ReplaceMaskImpl<
     std::shared_ptr<Scalar> null_scalar;
     if (!mask.is_valid) {
       // Output = null
-      null_scalar = MakeNullScalar(out->type()->Copy());
+      null_scalar = MakeNullScalar(out->type()->GetSharedPtr());
       source.SetScalar(null_scalar.get());
     } else if (mask.value) {
       // Output = replacement
@@ -238,7 +238,7 @@ struct ReplaceMaskImpl<Type, enable_if_base_binary<Type>> {
       // Output = null
       ARROW_ASSIGN_OR_RAISE(
           auto replacement_array,
-          MakeArrayOfNull(array.type->Copy(), array.length, ctx->memory_pool()));
+          MakeArrayOfNull(array.type->GetSharedPtr(), array.length, ctx->memory_pool()));
       out->value = std::move(replacement_array->data());
       return replacements_offset;
     } else if (mask.value) {
@@ -269,7 +269,7 @@ struct ReplaceMaskImpl<Type, enable_if_base_binary<Type>> {
                                        const ArraySpan& mask, int64_t mask_offset,
                                        ExecValue replacements,
                                        int64_t replacements_offset, ExecResult* out) {
-    BuilderType builder(array.type->Copy(), ctx->memory_pool());
+    BuilderType builder(array.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(array.length));
     RETURN_NOT_OK(builder.ReserveData(array.buffers[2].size));
     int64_t source_offset = 0;
@@ -312,7 +312,7 @@ struct ReplaceMaskImpl<Type, enable_if_base_binary<Type>> {
     std::shared_ptr<ArrayData> temp_output;
     RETURN_NOT_OK(builder.FinishInternal(&temp_output));
     // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
-    temp_output->type = array.type->Copy();
+    temp_output->type = array.type->GetSharedPtr();
     out->value = std::move(temp_output);
     return replacements_offset;
   }
@@ -376,9 +376,8 @@ struct ReplaceMask {
   }
 
   static std::shared_ptr<KernelSignature> GetSignature(detail::GetTypeId get_id) {
-    return KernelSignature::Make(
-        {InputType::Array(get_id.id), InputType(boolean()), InputType(get_id.id)},
-        OutputType(FirstType));
+    return KernelSignature::Make({InputType(get_id.id), boolean(), InputType(get_id.id)},
+                                 FirstType);
   }
 };
 
@@ -545,7 +544,7 @@ struct FillNullImpl<Type, enable_if_base_binary<Type>> {
                      int64_t* last_valid_value_offset) {
     ArrayData* out_arr = out->array_data().get();
 
-    BuilderType builder(current_chunk.type->Copy(), ctx->memory_pool());
+    BuilderType builder(current_chunk.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(current_chunk.length));
     RETURN_NOT_OK(builder.ReserveData(current_chunk.buffers[2].size));
     int64_t array_value_index = direction == 1 ? 0 : current_chunk.length - 1;
@@ -620,7 +619,7 @@ struct FillNullImpl<Type, enable_if_base_binary<Type>> {
     RETURN_NOT_OK(builder.Finish(&temp_output));
     out->value = std::move(temp_output->data());
     // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
-    out->array_data()->type = current_chunk.type->Copy();
+    out->array_data()->type = current_chunk.type->GetSharedPtr();
     return Status::OK();
   }
 };
@@ -668,7 +667,7 @@ struct FillNullForward {
   }
 
   static std::shared_ptr<KernelSignature> GetSignature(detail::GetTypeId get_id) {
-    return KernelSignature::Make({InputType::Array(get_id.id)}, OutputType(FirstType));
+    return KernelSignature::Make({InputType(get_id.id)}, FirstType);
   }
 };
 
@@ -748,7 +747,7 @@ struct FillNullBackward {
   }
 
   static std::shared_ptr<KernelSignature> GetSignature(detail::GetTypeId get_id) {
-    return KernelSignature::Make({InputType::Array(get_id.id)}, OutputType(FirstType));
+    return KernelSignature::Make({InputType(get_id.id)}, FirstType);
   }
 };
 
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 3a4f957ae5b..5060b06465b 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -1,4 +1,3 @@
-
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@@ -1998,11 +1997,12 @@ class FilterMetaFunction : public MetaFunction {
 // R -> RecordBatch
 // T -> Table
 
-Result<std::shared_ptr<Array>> TakeAA(const Array& values, const Array& indices,
-                                      const TakeOptions& options, ExecContext* ctx) {
+Result<std::shared_ptr<ArrayData>> TakeAA(const std::shared_ptr<ArrayData>& values,
+                                          const std::shared_ptr<ArrayData>& indices,
+                                          const TakeOptions& options, ExecContext* ctx) {
   ARROW_ASSIGN_OR_RAISE(Datum result,
                         CallFunction("array_take", {values, indices}, &options, ctx));
-  return result.make_array();
+  return result.array();
 }
 
 Result<std::shared_ptr<ChunkedArray>> TakeCA(const ChunkedArray& values,
@@ -2010,7 +2010,6 @@ Result<std::shared_ptr<ChunkedArray>> TakeCA(const ChunkedArray& values,
                                              const TakeOptions& options,
                                              ExecContext* ctx) {
   auto num_chunks = values.num_chunks();
-  std::vector<std::shared_ptr<Array>> new_chunks(1);  // Hard-coded 1 for now
   std::shared_ptr<Array> current_chunk;
 
   // Case 1: `values` has a single chunk, so just use it
@@ -2032,8 +2031,10 @@ Result<std::shared_ptr<ChunkedArray>> TakeCA(const ChunkedArray& values,
     }
   }
   // Call Array Take on our single chunk
-  ARROW_ASSIGN_OR_RAISE(new_chunks[0], TakeAA(*current_chunk, indices, options, ctx));
-  return std::make_shared<ChunkedArray>(std::move(new_chunks));
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> new_chunk,
+                        TakeAA(current_chunk->data(), indices.data(), options, ctx));
+  std::vector<std::shared_ptr<Array>> chunks = {MakeArray(new_chunk)};
+  return std::make_shared<ChunkedArray>(std::move(chunks));
 }
 
 Result<std::shared_ptr<ChunkedArray>> TakeCC(const ChunkedArray& values,
@@ -2063,7 +2064,9 @@ Result<std::shared_ptr<ChunkedArray>> TakeAC(const Array& values,
   std::vector<std::shared_ptr<Array>> new_chunks(num_chunks);
   for (int i = 0; i < num_chunks; i++) {
     // Take with that indices chunk
-    ARROW_ASSIGN_OR_RAISE(new_chunks[i], TakeAA(values, *indices.chunk(i), options, ctx));
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> chunk,
+                          TakeAA(values.data(), indices.chunk(i)->data(), options, ctx));
+    new_chunks[i] = MakeArray(chunk);
   }
   return std::make_shared<ChunkedArray>(std::move(new_chunks), values.type());
 }
@@ -2076,7 +2079,9 @@ Result<std::shared_ptr<RecordBatch>> TakeRA(const RecordBatch& batch,
   auto nrows = indices.length();
   std::vector<std::shared_ptr<Array>> columns(ncols);
   for (int j = 0; j < ncols; j++) {
-    ARROW_ASSIGN_OR_RAISE(columns[j], TakeAA(*batch.column(j), indices, options, ctx));
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> col_data,
+                          TakeAA(batch.column(j)->data(), indices.data(), options, ctx));
+    columns[j] = MakeArray(col_data);
   }
   return RecordBatch::Make(batch.schema(), nrows, std::move(columns));
 }
@@ -2131,7 +2136,7 @@ class TakeMetaFunction : public MetaFunction {
     switch (args[0].kind()) {
       case Datum::ARRAY:
         if (index_kind == Datum::ARRAY) {
-          return TakeAA(*args[0].make_array(), *args[1].make_array(), take_opts, ctx);
+          return TakeAA(args[0].array(), args[1].array(), take_opts, ctx);
         } else if (index_kind == Datum::CHUNKED_ARRAY) {
           return TakeAC(*args[0].make_array(), *args[1].chunked_array(), take_opts, ctx);
         }
@@ -2330,22 +2335,22 @@ Status TakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   return kernel.ExecTake();
 }
 
-struct SelectionKernelDescr {
+struct SelectionKernelData {
   InputType input;
   ArrayKernelExec exec;
 };
 
 void RegisterSelectionFunction(const std::string& name, FunctionDoc doc,
                                VectorKernel base_kernel, InputType selection_type,
-                               const std::vector<SelectionKernelDescr>& descrs,
+                               const std::vector<SelectionKernelData>& kernels,
                                const FunctionOptions* default_options,
                                FunctionRegistry* registry) {
   auto func = std::make_shared<VectorFunction>(name, Arity::Binary(), std::move(doc),
                                                default_options);
-  for (auto& descr : descrs) {
-    base_kernel.signature = KernelSignature::Make(
-        {std::move(descr.input), selection_type}, OutputType(FirstType));
-    base_kernel.exec = descr.exec;
+  for (auto& kernel_data : kernels) {
+    base_kernel.signature =
+        KernelSignature::Make({std::move(kernel_data.input), selection_type}, FirstType);
+    base_kernel.exec = kernel_data.exec;
     DCHECK_OK(func->AddKernel(base_kernel));
   }
   DCHECK_OK(registry->AddFunction(std::move(func)));
@@ -2447,7 +2452,7 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
 
   auto AddKernels = [&](const std::vector<std::shared_ptr<DataType>>& types) {
     for (const std::shared_ptr<DataType>& ty : types) {
-      kernel.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+      kernel.signature = KernelSignature::Make({ty}, uint64());
       DCHECK_OK(func->AddKernel(kernel));
     }
   };
@@ -2456,7 +2461,7 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
   AddKernels({boolean()});
 
   for (const auto& ty : {Type::DECIMAL128, Type::DECIMAL256}) {
-    kernel.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+    kernel.signature = KernelSignature::Make({ty}, uint64());
     DCHECK_OK(func->AddKernel(kernel));
   }
 
@@ -2467,62 +2472,59 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
 
 void RegisterVectorSelection(FunctionRegistry* registry) {
   // Filter kernels
-  std::vector<SelectionKernelDescr> filter_kernel_descrs = {
-      {InputType(match::Primitive(), ValueDescr::ARRAY), PrimitiveFilter},
-      {InputType(match::BinaryLike(), ValueDescr::ARRAY), BinaryFilter},
-      {InputType(match::LargeBinaryLike(), ValueDescr::ARRAY), BinaryFilter},
-      {InputType::Array(Type::FIXED_SIZE_BINARY), FilterExec<FSBImpl>},
-      {InputType::Array(null()), NullFilter},
-      {InputType::Array(Type::DECIMAL128), FilterExec<FSBImpl>},
-      {InputType::Array(Type::DECIMAL256), FilterExec<FSBImpl>},
-      {InputType::Array(Type::DICTIONARY), DictionaryFilter},
-      {InputType::Array(Type::EXTENSION), ExtensionFilter},
-      {InputType::Array(Type::LIST), FilterExec<ListImpl<ListType>>},
-      {InputType::Array(Type::LARGE_LIST), FilterExec<ListImpl<LargeListType>>},
-      {InputType::Array(Type::FIXED_SIZE_LIST), FilterExec<FSLImpl>},
-      {InputType::Array(Type::DENSE_UNION), FilterExec<DenseUnionImpl>},
-      {InputType::Array(Type::STRUCT), StructFilter},
+  std::vector<SelectionKernelData> filter_kernels = {
+      {InputType(match::Primitive()), PrimitiveFilter},
+      {InputType(match::BinaryLike()), BinaryFilter},
+      {InputType(match::LargeBinaryLike()), BinaryFilter},
+      {InputType(Type::FIXED_SIZE_BINARY), FilterExec<FSBImpl>},
+      {InputType(null()), NullFilter},
+      {InputType(Type::DECIMAL128), FilterExec<FSBImpl>},
+      {InputType(Type::DECIMAL256), FilterExec<FSBImpl>},
+      {InputType(Type::DICTIONARY), DictionaryFilter},
+      {InputType(Type::EXTENSION), ExtensionFilter},
+      {InputType(Type::LIST), FilterExec<ListImpl<ListType>>},
+      {InputType(Type::LARGE_LIST), FilterExec<ListImpl<LargeListType>>},
+      {InputType(Type::FIXED_SIZE_LIST), FilterExec<FSLImpl>},
+      {InputType(Type::DENSE_UNION), FilterExec<DenseUnionImpl>},
+      {InputType(Type::STRUCT), StructFilter},
       // TODO: Reuse ListType kernel for MAP
-      {InputType::Array(Type::MAP), FilterExec<ListImpl<MapType>>},
+      {InputType(Type::MAP), FilterExec<ListImpl<MapType>>},
   };
 
   VectorKernel filter_base;
   filter_base.init = FilterState::Init;
   RegisterSelectionFunction("array_filter", array_filter_doc, filter_base,
-                            /*selection_type=*/InputType::Array(boolean()),
-                            filter_kernel_descrs, GetDefaultFilterOptions(), registry);
+                            /*selection_type=*/boolean(), filter_kernels,
+                            GetDefaultFilterOptions(), registry);
 
   DCHECK_OK(registry->AddFunction(std::make_shared<FilterMetaFunction>()));
 
   // Take kernels
-  std::vector<SelectionKernelDescr> take_kernel_descrs = {
-      {InputType(match::Primitive(), ValueDescr::ARRAY), PrimitiveTake},
-      {InputType(match::BinaryLike(), ValueDescr::ARRAY),
-       TakeExec<VarBinaryImpl<BinaryType>>},
-      {InputType(match::LargeBinaryLike(), ValueDescr::ARRAY),
-       TakeExec<VarBinaryImpl<LargeBinaryType>>},
-      {InputType::Array(Type::FIXED_SIZE_BINARY), TakeExec<FSBImpl>},
-      {InputType::Array(null()), NullTake},
-      {InputType::Array(Type::DECIMAL128), TakeExec<FSBImpl>},
-      {InputType::Array(Type::DECIMAL256), TakeExec<FSBImpl>},
-      {InputType::Array(Type::DICTIONARY), DictionaryTake},
-      {InputType::Array(Type::EXTENSION), ExtensionTake},
-      {InputType::Array(Type::LIST), TakeExec<ListImpl<ListType>>},
-      {InputType::Array(Type::LARGE_LIST), TakeExec<ListImpl<LargeListType>>},
-      {InputType::Array(Type::FIXED_SIZE_LIST), TakeExec<FSLImpl>},
-      {InputType::Array(Type::DENSE_UNION), TakeExec<DenseUnionImpl>},
-      {InputType::Array(Type::STRUCT), TakeExec<StructImpl>},
+  std::vector<SelectionKernelData> take_kernels = {
+      {InputType(match::Primitive()), PrimitiveTake},
+      {InputType(match::BinaryLike()), TakeExec<VarBinaryImpl<BinaryType>>},
+      {InputType(match::LargeBinaryLike()), TakeExec<VarBinaryImpl<LargeBinaryType>>},
+      {InputType(Type::FIXED_SIZE_BINARY), TakeExec<FSBImpl>},
+      {InputType(null()), NullTake},
+      {InputType(Type::DECIMAL128), TakeExec<FSBImpl>},
+      {InputType(Type::DECIMAL256), TakeExec<FSBImpl>},
+      {InputType(Type::DICTIONARY), DictionaryTake},
+      {InputType(Type::EXTENSION), ExtensionTake},
+      {InputType(Type::LIST), TakeExec<ListImpl<ListType>>},
+      {InputType(Type::LARGE_LIST), TakeExec<ListImpl<LargeListType>>},
+      {InputType(Type::FIXED_SIZE_LIST), TakeExec<FSLImpl>},
+      {InputType(Type::DENSE_UNION), TakeExec<DenseUnionImpl>},
+      {InputType(Type::STRUCT), TakeExec<StructImpl>},
       // TODO: Reuse ListType kernel for MAP
-      {InputType::Array(Type::MAP), TakeExec<ListImpl<MapType>>},
+      {InputType(Type::MAP), TakeExec<ListImpl<MapType>>},
   };
 
   VectorKernel take_base;
   take_base.init = TakeState::Init;
   take_base.can_execute_chunkwise = false;
-  RegisterSelectionFunction(
-      "array_take", array_take_doc, take_base,
-      /*selection_type=*/InputType(match::Integer(), ValueDescr::ARRAY),
-      take_kernel_descrs, GetDefaultTakeOptions(), registry);
+  RegisterSelectionFunction("array_take", array_take_doc, take_base,
+                            /*selection_type=*/match::Integer(), take_kernels,
+                            GetDefaultTakeOptions(), registry);
 
   DCHECK_OK(registry->AddFunction(std::make_shared<TakeMetaFunction>()));
 
diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc
index 719015871ce..ba76bad0d17 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -45,15 +45,16 @@ namespace compute {
 namespace {
 
 struct GrouperImpl : Grouper {
-  static Result<std::unique_ptr<GrouperImpl>> Make(const std::vector<ValueDescr>& keys,
-                                                   ExecContext* ctx) {
+  static Result<std::unique_ptr<GrouperImpl>> Make(
+      const std::vector<TypeHolder>& key_types, ExecContext* ctx) {
     auto impl = ::arrow::internal::make_unique<GrouperImpl>();
 
-    impl->encoders_.resize(keys.size());
+    impl->encoders_.resize(key_types.size());
     impl->ctx_ = ctx;
 
-    for (size_t i = 0; i < keys.size(); ++i) {
-      const auto& key = keys[i].type;
+    for (size_t i = 0; i < key_types.size(); ++i) {
+      // TODO(wesm): eliminate this probably unneeded shared_ptr copy
+      std::shared_ptr<DataType> key = key_types[i].GetSharedPtr();
 
       if (key->id() == Type::BOOL) {
         impl->encoders_[i] =
@@ -198,11 +199,10 @@ struct GrouperFastImpl : Grouper {
   static constexpr int kBitmapPaddingForSIMD = 64;  // bits
   static constexpr int kPaddingForSIMD = 32;        // bytes
 
-  static bool CanUse(const std::vector<ValueDescr>& keys) {
+  static bool CanUse(const std::vector<TypeHolder>& key_types) {
 #if ARROW_LITTLE_ENDIAN
-    for (size_t i = 0; i < keys.size(); ++i) {
-      const auto& key = keys[i].type;
-      if (is_large_binary_like(key->id())) {
+    for (size_t i = 0; i < key_types.size(); ++i) {
+      if (is_large_binary_like(key_types[i].id())) {
         return false;
       }
     }
@@ -213,7 +213,7 @@ struct GrouperFastImpl : Grouper {
   }
 
   static Result<std::unique_ptr<GrouperFastImpl>> Make(
-      const std::vector<ValueDescr>& keys, ExecContext* ctx) {
+      const std::vector<TypeHolder>& keys, ExecContext* ctx) {
     auto impl = ::arrow::internal::make_unique<GrouperFastImpl>();
     impl->ctx_ = ctx;
 
@@ -227,19 +227,19 @@ struct GrouperFastImpl : Grouper {
     impl->key_types_.resize(num_columns);
     impl->dictionaries_.resize(num_columns);
     for (size_t icol = 0; icol < num_columns; ++icol) {
-      const auto& key = keys[icol].type;
-      if (key->id() == Type::DICTIONARY) {
+      const TypeHolder& key = keys[icol];
+      if (key.id() == Type::DICTIONARY) {
         auto bit_width = checked_cast<const FixedWidthType&>(*key).bit_width();
         ARROW_DCHECK(bit_width % 8 == 0);
         impl->col_metadata_[icol] = KeyColumnMetadata(true, bit_width / 8);
-      } else if (key->id() == Type::BOOL) {
+      } else if (key.id() == Type::BOOL) {
         impl->col_metadata_[icol] = KeyColumnMetadata(true, 0);
-      } else if (is_fixed_width(key->id())) {
+      } else if (is_fixed_width(key.id())) {
         impl->col_metadata_[icol] = KeyColumnMetadata(
             true, checked_cast<const FixedWidthType&>(*key).bit_width() / 8);
-      } else if (is_binary_like(key->id())) {
+      } else if (is_binary_like(key.id())) {
         impl->col_metadata_[icol] = KeyColumnMetadata(false, sizeof(uint32_t));
-      } else if (key->id() == Type::NA) {
+      } else if (key.id() == Type::NA) {
         impl->col_metadata_[icol] = KeyColumnMetadata(true, 0, /*is_null_type_in=*/true);
       } else {
         return Status::NotImplemented("Keys of type ", *key);
@@ -306,7 +306,7 @@ struct GrouperFastImpl : Grouper {
     int num_columns = batch.num_values();
     // Process dictionaries
     for (int icol = 0; icol < num_columns; ++icol) {
-      if (key_types_[icol]->id() == Type::DICTIONARY) {
+      if (key_types_[icol].id() == Type::DICTIONARY) {
         auto data = batch[icol].array();
         auto dict = MakeArray(data->dictionary);
         if (dictionaries_[icol]) {
@@ -331,7 +331,7 @@ struct GrouperFastImpl : Grouper {
       const uint8_t* varlen = NULLPTR;
 
       // Skip if the key's type is NULL
-      if (key_types_[icol]->id() != Type::NA) {
+      if (key_types_[icol].id() != Type::NA) {
         if (batch[icol].array()->buffers[0] != NULLPTR) {
           non_nulls = batch[icol].array()->buffers[0]->data();
         }
@@ -488,11 +488,11 @@ struct GrouperFastImpl : Grouper {
 
       if (col_metadata_[i].is_fixed_length) {
         out.values[i] = ArrayData::Make(
-            key_types_[i], num_groups,
+            key_types_[i].GetSharedPtr(), num_groups,
             {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i])}, null_count);
       } else {
         out.values[i] =
-            ArrayData::Make(key_types_[i], num_groups,
+            ArrayData::Make(key_types_[i].GetSharedPtr(), num_groups,
                             {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i]),
                              std::move(varlen_bufs[i])},
                             null_count);
@@ -501,11 +501,12 @@ struct GrouperFastImpl : Grouper {
 
     // Process dictionaries
     for (size_t icol = 0; icol < num_columns; ++icol) {
-      if (key_types_[icol]->id() == Type::DICTIONARY) {
+      if (key_types_[icol].id() == Type::DICTIONARY) {
         if (dictionaries_[icol]) {
           out.values[icol].array()->dictionary = dictionaries_[icol]->data();
         } else {
-          ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(key_types_[icol], 0));
+          ARROW_ASSIGN_OR_RAISE(auto dict,
+                                MakeArrayOfNull(key_types_[icol].GetSharedPtr(), 0));
           out.values[icol].array()->dictionary = dict->data();
         }
       }
@@ -523,7 +524,7 @@ struct GrouperFastImpl : Grouper {
   arrow::util::TempVectorStack temp_stack_;
   LightContext encode_ctx_;
 
-  std::vector<std::shared_ptr<arrow::DataType>> key_types_;
+  std::vector<TypeHolder> key_types_;
   std::vector<KeyColumnMetadata> col_metadata_;
   std::vector<KeyColumnArray> cols_;
   std::vector<uint32_t> minibatch_hashes_;
@@ -538,12 +539,12 @@ struct GrouperFastImpl : Grouper {
 
 }  // namespace
 
-Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& descrs,
+Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<TypeHolder>& key_types,
                                                ExecContext* ctx) {
-  if (GrouperFastImpl::CanUse(descrs)) {
-    return GrouperFastImpl::Make(descrs, ctx);
+  if (GrouperFastImpl::CanUse(key_types)) {
+    return GrouperFastImpl::Make(key_types, ctx);
   }
-  return GrouperImpl::Make(descrs, ctx);
+  return GrouperImpl::Make(key_types, ctx);
 }
 
 Result<std::shared_ptr<ListArray>> Grouper::ApplyGroupings(const ListArray& groupings,
diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h
index 8281b75317f..4c106794573 100644
--- a/cpp/src/arrow/compute/row/grouper.h
+++ b/cpp/src/arrow/compute/row/grouper.h
@@ -36,7 +36,7 @@ class ARROW_EXPORT Grouper {
   virtual ~Grouper() = default;
 
   /// Construct a Grouper which receives the specified key types
-  static Result<std::unique_ptr<Grouper>> Make(const std::vector<ValueDescr>& descrs,
+  static Result<std::unique_ptr<Grouper>> Make(const std::vector<TypeHolder>& key_types,
                                                ExecContext* ctx = default_exec_context());
 
   /// Consume a batch of keys, producing the corresponding group ids as an integer array.
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 127929ced58..62f15c16000 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -20,7 +20,7 @@
 namespace arrow {
 
 struct Datum;
-struct ValueDescr;
+struct TypeHolder;
 
 namespace compute {
 
diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index 7108ff452f8..ca652887210 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -141,7 +141,7 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
     key_batch.values.emplace_back(batch->column_data(i));
   }
 
-  ARROW_ASSIGN_OR_RAISE(auto grouper, compute::Grouper::Make(key_batch.GetDescriptors()));
+  ARROW_ASSIGN_OR_RAISE(auto grouper, compute::Grouper::Make(key_batch.GetTypes()));
 
   ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
 
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 3cd5f1fcc26..d2d0923d03d 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -129,10 +129,10 @@ Status NormalizeScanOptions(const std::shared_ptr<ScanOptions>& scan_options,
               "Top level projection expression call must be make_struct");
         }
         FieldVector fields;
-        for (const auto& arg : call->arguments) {
+        for (const compute::Expression& arg : call->arguments) {
           if (auto field_ref = arg.field_ref()) {
             if (field_ref->IsName()) {
-              fields.push_back(field(*field_ref->name(), arg.type()));
+              fields.push_back(field(*field_ref->name(), arg.type()->GetSharedPtr()));
               break;
             }
           }
diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc
index 84ff0d6ff4e..f06e97a20ec 100644
--- a/cpp/src/arrow/datum.cc
+++ b/cpp/src/arrow/datum.cc
@@ -178,65 +178,6 @@ bool Datum::Equals(const Datum& other) const {
   }
 }
 
-ValueDescr Datum::descr() const {
-  if (this->is_arraylike()) {
-    return ValueDescr(this->type(), ValueDescr::ARRAY);
-  } else if (this->is_scalar()) {
-    return ValueDescr(this->type(), ValueDescr::SCALAR);
-  } else {
-    DCHECK(false) << "Datum is not value-like, this method should not be called";
-    return ValueDescr();
-  }
-}
-
-ValueDescr::Shape Datum::shape() const {
-  if (this->is_arraylike()) {
-    return ValueDescr::ARRAY;
-  } else if (this->is_scalar()) {
-    return ValueDescr::SCALAR;
-  } else {
-    DCHECK(false) << "Datum is not value-like, this method should not be called";
-    return ValueDescr::ANY;
-  }
-}
-
-static std::string FormatValueDescr(const ValueDescr& descr) {
-  std::stringstream ss;
-  switch (descr.shape) {
-    case ValueDescr::ANY:
-      ss << "any";
-      break;
-    case ValueDescr::ARRAY:
-      ss << "array";
-      break;
-    case ValueDescr::SCALAR:
-      ss << "scalar";
-      break;
-    default:
-      DCHECK(false);
-      break;
-  }
-  ss << "[" << descr.type->ToString() << "]";
-  return ss.str();
-}
-
-std::string ValueDescr::ToString() const { return FormatValueDescr(*this); }
-
-std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) {
-  std::stringstream ss;
-  ss << "(";
-  for (size_t i = 0; i < descrs.size(); ++i) {
-    if (i > 0) {
-      ss << ", ";
-    }
-    ss << descrs[i].ToString();
-  }
-  ss << ")";
-  return ss.str();
-}
-
-void PrintTo(const ValueDescr& descr, std::ostream* os) { *os << descr.ToString(); }
-
 std::string Datum::ToString() const {
   switch (this->kind()) {
     case Datum::NONE:
@@ -257,20 +198,6 @@ std::string Datum::ToString() const {
   }
 }
 
-ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args) {
-  // This function to be deleted in ARROW-16577
-  if (args.size() == 0) {
-    return ValueDescr::ARRAY;
-  } else {
-    for (const auto& descr : args) {
-      if (descr.shape == ValueDescr::ARRAY) {
-        return ValueDescr::ARRAY;
-      }
-    }
-    return ValueDescr::SCALAR;
-  }
-}
-
 void PrintTo(const Datum& datum, std::ostream* os) {
   switch (datum.kind()) {
     case Datum::SCALAR:
diff --git a/cpp/src/arrow/datum.h b/cpp/src/arrow/datum.h
index 9460c69b795..d4aaff22ce3 100644
--- a/cpp/src/arrow/datum.h
+++ b/cpp/src/arrow/datum.h
@@ -40,66 +40,6 @@ class ChunkedArray;
 class RecordBatch;
 class Table;
 
-/// \brief A descriptor type that gives the shape (array or scalar) and
-/// DataType of a Value, but without the data
-struct ARROW_EXPORT ValueDescr {
-  std::shared_ptr<DataType> type;
-  enum Shape {
-    /// \brief Either Array or Scalar
-    ANY,
-
-    /// \brief Array type
-    ARRAY,
-
-    /// \brief Only Scalar arguments supported
-    SCALAR
-  };
-
-  Shape shape;
-
-  ValueDescr() : shape(ANY) {}
-
-  ValueDescr(std::shared_ptr<DataType> type, ValueDescr::Shape shape)
-      : type(std::move(type)), shape(shape) {}
-
-  ValueDescr(std::shared_ptr<DataType> type)  // NOLINT implicit conversion
-      : type(std::move(type)), shape(ValueDescr::ANY) {}
-
-  /// \brief Convenience constructor for ANY descr
-  static ValueDescr Any(std::shared_ptr<DataType> type) {
-    return ValueDescr(std::move(type), ANY);
-  }
-
-  /// \brief Convenience constructor for Value::ARRAY descr
-  static ValueDescr Array(std::shared_ptr<DataType> type) {
-    return ValueDescr(std::move(type), ARRAY);
-  }
-
-  /// \brief Convenience constructor for Value::SCALAR descr
-  static ValueDescr Scalar(std::shared_ptr<DataType> type) {
-    return ValueDescr(std::move(type), SCALAR);
-  }
-
-  bool operator==(const ValueDescr& other) const {
-    if (shape != other.shape) return false;
-    if (type == other.type) return true;
-    return type && type->Equals(other.type);
-  }
-
-  bool operator!=(const ValueDescr& other) const { return !(*this == other); }
-
-  std::string ToString() const;
-  static std::string ToString(const std::vector<ValueDescr>&);
-
-  ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*);
-};
-
-/// \brief For use with scalar functions, returns the broadcasted Value::Shape
-/// given a vector of value descriptors. Return SCALAR unless any value is
-/// ARRAY
-ARROW_EXPORT
-ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args);
-
 /// \class Datum
 /// \brief Variant type for various Arrow C++ data structures
 struct ARROW_EXPORT Datum {
@@ -249,14 +189,6 @@ struct ARROW_EXPORT Datum {
 
   int64_t null_count() const;
 
-  /// \brief Return the shape (array or scalar) and type for supported kinds
-  /// (ARRAY, CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
-  ValueDescr descr() const;
-
-  /// \brief Return the shape (array or scalar) for supported kinds (ARRAY,
-  /// CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
-  ValueDescr::Shape shape() const;
-
   /// \brief The value type of the variant, if any
   ///
   /// \return nullptr if no type
diff --git a/cpp/src/arrow/datum_test.cc b/cpp/src/arrow/datum_test.cc
index a5bf1728f33..8f962962a21 100644
--- a/cpp/src/arrow/datum_test.cc
+++ b/cpp/src/arrow/datum_test.cc
@@ -61,7 +61,6 @@ TEST(Datum, ImplicitConstructors) {
 
 TEST(Datum, Constructors) {
   Datum val(std::make_shared<Int64Scalar>(1));
-  ASSERT_EQ(ValueDescr::SCALAR, val.shape());
   AssertTypeEqual(*int64(), *val.type());
   ASSERT_TRUE(val.is_scalar());
   ASSERT_FALSE(val.is_array());
@@ -77,7 +76,6 @@ TEST(Datum, Constructors) {
 
   Datum val2(arr);
   ASSERT_EQ(Datum::ARRAY, val2.kind());
-  ASSERT_EQ(ValueDescr::ARRAY, val2.shape());
   AssertTypeEqual(*int64(), *val2.type());
   AssertArraysEqual(*arr, *val2.make_array());
   ASSERT_TRUE(val2.is_array());
@@ -151,28 +149,4 @@ TEST(Datum, TotalBufferSize) {
   ASSERT_EQ(4, tab_datum.TotalBufferSize());
 }
 
-TEST(ValueDescr, Basics) {
-  ValueDescr d1(utf8(), ValueDescr::SCALAR);
-  ValueDescr d2 = ValueDescr::Any(utf8());
-  ValueDescr d3 = ValueDescr::Scalar(utf8());
-  ValueDescr d4 = ValueDescr::Array(utf8());
-
-  ASSERT_EQ(ValueDescr::SCALAR, d1.shape);
-  AssertTypeEqual(*utf8(), *d1.type);
-  ASSERT_EQ(ValueDescr::Scalar(utf8()), d1);
-
-  ASSERT_EQ(ValueDescr::ANY, d2.shape);
-  AssertTypeEqual(*utf8(), *d2.type);
-  ASSERT_EQ(ValueDescr::Any(utf8()), d2);
-  ASSERT_NE(ValueDescr::Any(int32()), d2);
-
-  ASSERT_EQ(ValueDescr::SCALAR, d3.shape);
-  ASSERT_EQ(ValueDescr::ARRAY, d4.shape);
-
-  ASSERT_EQ("scalar[string]", d1.ToString());
-  ASSERT_EQ("any[string]", d2.ToString());
-  ASSERT_EQ("scalar[string]", d3.ToString());
-  ASSERT_EQ("array[string]", d4.ToString());
-}
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index 580966124ef..667fd00ae21 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -981,8 +981,7 @@ Status ScalarFromJSON(const std::shared_ptr<DataType>& type,
   RETURN_NOT_OK(converter->AppendValue(json_doc));
   RETURN_NOT_OK(converter->Finish(&array));
   DCHECK_EQ(array->length(), 1);
-  ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
-  return Status::OK();
+  return array->GetScalar(0).Value(out);
 }
 
 Status DictScalarFromJSON(const std::shared_ptr<DataType>& type,
diff --git a/cpp/src/arrow/python/gdb.cc b/cpp/src/arrow/python/gdb.cc
index 122186fb35c..944e1e96d71 100644
--- a/cpp/src/arrow/python/gdb.cc
+++ b/cpp/src/arrow/python/gdb.cc
@@ -354,7 +354,8 @@ void TestSession() {
 
   FixedSizeBinaryScalar fixed_size_binary_scalar{Buffer::FromString("abc"),
                                                  fixed_size_binary(3)};
-  FixedSizeBinaryScalar fixed_size_binary_scalar_null{fixed_size_binary(3)};
+  FixedSizeBinaryScalar fixed_size_binary_scalar_null{
+      Buffer::FromString("   "), fixed_size_binary(3), /*is_valid=*/false};
 
   std::shared_ptr<Array> dict_array;
   dict_array = *ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])");
@@ -362,37 +363,43 @@ void TestSession() {
                                dictionary(int8(), utf8())};
   DictionaryScalar dict_scalar_null{dictionary(int8(), utf8())};
 
-  std::shared_ptr<Array> list_value_array;
-  list_value_array = *ArrayFromJSON(int32(), R"([4, 5, 6])");
+  std::shared_ptr<Array> list_value_array = *ArrayFromJSON(int32(), R"([4, 5, 6])");
+  std::shared_ptr<Array> list_zero_length = *ArrayFromJSON(int32(), R"([])");
   ListScalar list_scalar{list_value_array};
-  ListScalar list_scalar_null{list(int32())};
+  ListScalar list_scalar_null{list_zero_length, list(int32()), /*is_valid=*/false};
   LargeListScalar large_list_scalar{list_value_array};
-  LargeListScalar large_list_scalar_null{large_list(int32())};
+  LargeListScalar large_list_scalar_null{list_zero_length, large_list(int32()),
+                                         /*is_valid=*/false};
   FixedSizeListScalar fixed_size_list_scalar{list_value_array};
-  FixedSizeListScalar fixed_size_list_scalar_null{fixed_size_list(int32(), 3)};
+  FixedSizeListScalar fixed_size_list_scalar_null{
+      list_value_array, fixed_size_list(int32(), 3), /*is_valid=*/false};
 
   auto struct_scalar_type = struct_({field("ints", int32()), field("strs", utf8())});
   StructScalar struct_scalar{
       ScalarVector{MakeScalar(int32_t(42)), MakeScalar("some text")}, struct_scalar_type};
-  StructScalar struct_scalar_null{struct_scalar_type};
+  StructScalar struct_scalar_null{struct_scalar.value, struct_scalar_type,
+                                  /*is_valid=*/false};
 
   auto sparse_union_scalar_type =
       sparse_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42});
   auto dense_union_scalar_type =
       dense_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42});
-  SparseUnionScalar sparse_union_scalar{MakeScalar(int32_t(43)), 7,
-                                        sparse_union_scalar_type};
-  SparseUnionScalar sparse_union_scalar_null{7, sparse_union_scalar_type};
-  DenseUnionScalar dense_union_scalar{MakeScalar(int32_t(43)), 7,
-                                      dense_union_scalar_type};
-  DenseUnionScalar dense_union_scalar_null{7, dense_union_scalar_type};
+  std::vector<std::shared_ptr<Scalar>> union_values = {MakeScalar(int32_t(43)),
+                                                       MakeNullScalar(utf8())};
+  SparseUnionScalar sparse_union_scalar{union_values, 7, sparse_union_scalar_type};
+  DenseUnionScalar dense_union_scalar{union_values[0], 7, dense_union_scalar_type};
+
+  union_values[0] = MakeNullScalar(int32());
+  SparseUnionScalar sparse_union_scalar_null{union_values, 7, sparse_union_scalar_type};
+  DenseUnionScalar dense_union_scalar_null{union_values[0], 7, dense_union_scalar_type};
 
   auto extension_scalar_type = std::make_shared<UuidType>();
   ExtensionScalar extension_scalar{
       std::make_shared<FixedSizeBinaryScalar>(Buffer::FromString("0123456789abcdef"),
                                               extension_scalar_type->storage_type()),
       extension_scalar_type};
-  ExtensionScalar extension_scalar_null{extension_scalar_type};
+  ExtensionScalar extension_scalar_null{extension_scalar.value, extension_scalar_type,
+                                        /*is_valid=*/false};
 
   std::shared_ptr<Scalar> heap_map_scalar;
   ARROW_CHECK_OK(
diff --git a/cpp/src/arrow/python/udf.cc b/cpp/src/arrow/python/udf.cc
index 227629eb24e..81bf47c0ade 100644
--- a/cpp/src/arrow/python/udf.cc
+++ b/cpp/src/arrow/python/udf.cc
@@ -27,18 +27,15 @@ using compute::ExecSpan;
 namespace py {
 
 namespace {
-Status CheckOutputType(const DataType& expected, const DataType& actual) {
-  if (!expected.Equals(actual)) {
-    return Status::TypeError("Expected output datatype ", expected.ToString(),
-                             ", but function returned datatype ", actual.ToString());
-  }
-  return Status::OK();
-}
 
-struct PythonUdf {
+struct PythonUdf : public compute::KernelState {
   ScalarUdfWrapperCallback cb;
   std::shared_ptr<OwnedRefNoGIL> function;
-  compute::OutputType output_type;
+  std::shared_ptr<DataType> output_type;
+
+  PythonUdf(ScalarUdfWrapperCallback cb, std::shared_ptr<OwnedRefNoGIL> function,
+            const std::shared_ptr<DataType>& output_type)
+      : cb(cb), function(function), output_type(output_type) {}
 
   // function needs to be destroyed at process exit
   // and Python may no longer be initialized.
@@ -48,11 +45,7 @@ struct PythonUdf {
     }
   }
 
-  Status operator()(compute::KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    return SafeCallIntoPython([&]() -> Status { return Execute(ctx, batch, out); });
-  }
-
-  Status Execute(compute::KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  Status Exec(compute::KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const int num_args = batch.num_values();
     ScalarUdfContext udf_context{ctx->memory_pool(), batch.length};
 
@@ -60,7 +53,7 @@ struct PythonUdf {
     RETURN_NOT_OK(CheckPyError());
     for (int arg_id = 0; arg_id < num_args; arg_id++) {
       if (batch[arg_id].is_scalar()) {
-        std::shared_ptr<Scalar> c_data = batch[arg_id].scalar->Copy();
+        std::shared_ptr<Scalar> c_data = batch[arg_id].scalar->GetSharedPtr();
         PyObject* data = wrap_scalar(c_data);
         PyTuple_SetItem(arg_tuple.obj(), arg_id, data);
       } else {
@@ -73,34 +66,29 @@ struct PythonUdf {
     OwnedRef result(cb(function->obj(), udf_context, arg_tuple.obj()));
     RETURN_NOT_OK(CheckPyError());
     // unwrapping the output for expected output type
-    if (is_scalar(result.obj())) {
-      if (out->is_array_data()) {
-        return Status::TypeError(
-            "UDF executor expected an array result but a "
-            "scalar was returned");
-      }
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> val, unwrap_scalar(result.obj()));
-      RETURN_NOT_OK(CheckOutputType(*output_type.type(), *val->type));
-      out->value = val;
-      return Status::OK();
-    } else if (is_array(result.obj())) {
-      if (out->is_scalar()) {
-        return Status::TypeError(
-            "UDF executor expected a scalar result but an "
-            "array was returned");
-      }
+    if (is_array(result.obj())) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> val, unwrap_array(result.obj()));
-      RETURN_NOT_OK(CheckOutputType(*output_type.type(), *val->type()));
+      if (!output_type->Equals(*val->type())) {
+        return Status::TypeError("Expected output datatype ", output_type->ToString(),
+                                 ", but function returned datatype ",
+                                 val->type()->ToString());
+      }
       out->value = std::move(val->data());
       return Status::OK();
     } else {
       return Status::TypeError("Unexpected output type: ", Py_TYPE(result.obj())->tp_name,
-                               " (expected Scalar or Array)");
+                               " (expected Array)");
     }
     return Status::OK();
   }
 };
 
+Status PythonUdfExec(compute::KernelContext* ctx, const ExecSpan& batch,
+                     ExecResult* out) {
+  auto udf = static_cast<PythonUdf*>(ctx->kernel()->data.get());
+  return SafeCallIntoPython([&]() -> Status { return udf->Exec(ctx, batch, out); });
+}
+
 }  // namespace
 
 Status RegisterScalarFunction(PyObject* user_function, ScalarUdfWrapperCallback wrapper,
@@ -116,11 +104,14 @@ Status RegisterScalarFunction(PyObject* user_function, ScalarUdfWrapperCallback
     input_types.emplace_back(in_dtype);
   }
   compute::OutputType output_type(options.output_type);
-  PythonUdf exec{wrapper, std::make_shared<OwnedRefNoGIL>(user_function), output_type};
+  auto udf_data = std::make_shared<PythonUdf>(
+      wrapper, std::make_shared<OwnedRefNoGIL>(user_function), options.output_type);
   compute::ScalarKernel kernel(
       compute::KernelSignature::Make(std::move(input_types), std::move(output_type),
                                      options.arity.is_varargs),
-      std::move(exec));
+      PythonUdfExec);
+  kernel.data = std::move(udf_data);
+
   kernel.mem_allocation = compute::MemAllocation::NO_PREALLOCATE;
   kernel.null_handling = compute::NullHandling::COMPUTED_NO_PREALLOCATE;
   RETURN_NOT_OK(scalar_func->AddKernel(std::move(kernel)));
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index fb097d59401..7f56c45b3a6 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -27,6 +27,7 @@
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
 #include "arrow/compare.h"
+#include "arrow/pretty_print.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
@@ -109,12 +110,18 @@ struct ScalarHashImpl {
     return Status::OK();
   }
 
-  Status Visit(const UnionScalar& s) {
+  Status Visit(const DenseUnionScalar& s) {
     // type_code is ignored when comparing for equality, so do not hash it either
     AccumulateHashFrom(*s.value);
     return Status::OK();
   }
 
+  Status Visit(const SparseUnionScalar& s) {
+    // type_code is ignored when comparing for equality, so do not hash it either
+    AccumulateHashFrom(*s.value[s.child_id]);
+    return Status::OK();
+  }
+
   Status Visit(const ExtensionScalar& s) {
     AccumulateHashFrom(*s.value);
     return Status::OK();
@@ -221,15 +228,21 @@ struct ScalarValidateImpl {
 
   Status Visit(const LargeStringScalar& s) { return ValidateStringScalar(s); }
 
+  template <typename ScalarType>
+  Status CheckValueNotNull(const ScalarType& s) {
+    if (!s.value) {
+      return Status::Invalid(s.type->ToString(), " value is null");
+    }
+    return Status::OK();
+  }
+
   Status Visit(const FixedSizeBinaryScalar& s) {
-    RETURN_NOT_OK(ValidateBinaryScalar(s));
-    if (s.is_valid) {
-      const auto& byte_width =
-          checked_cast<const FixedSizeBinaryType&>(*s.type).byte_width();
-      if (s.value->size() != byte_width) {
-        return Status::Invalid(s.type->ToString(), " scalar should have a value of size ",
-                               byte_width, ", got ", s.value->size());
-      }
+    const auto& byte_width =
+        checked_cast<const FixedSizeBinaryType&>(*s.type).byte_width();
+    RETURN_NOT_OK(CheckValueNotNull(s));
+    if (s.value->size() != byte_width) {
+      return Status::Invalid(s.type->ToString(), " scalar should have a value of size ",
+                             byte_width, ", got ", s.value->size());
     }
     return Status::OK();
   }
@@ -252,29 +265,36 @@ struct ScalarValidateImpl {
     return Status::OK();
   }
 
-  Status Visit(const BaseListScalar& s) { return ValidateBaseListScalar(s); }
+  Status Visit(const BaseListScalar& s) {
+    RETURN_NOT_OK(CheckValueNotNull(s));
+    const auto st = full_validation_ ? s.value->ValidateFull() : s.value->Validate();
+    if (!st.ok()) {
+      return st.WithMessage(s.type->ToString(),
+                            " scalar fails validation for value: ", st.message());
+    }
+
+    const auto& list_type = checked_cast<const BaseListType&>(*s.type);
+    const auto& value_type = *list_type.value_type();
+    if (!s.value->type()->Equals(value_type)) {
+      return Status::Invalid(list_type.ToString(), " scalar should have a value of type ",
+                             value_type.ToString(), ", got ",
+                             s.value->type()->ToString());
+    }
+    return Status::OK();
+  }
 
   Status Visit(const FixedSizeListScalar& s) {
-    RETURN_NOT_OK(ValidateBaseListScalar(s));
-    if (s.is_valid) {
-      const auto& list_type = checked_cast<const FixedSizeListType&>(*s.type);
-      if (s.value->length() != list_type.list_size()) {
-        return Status::Invalid(s.type->ToString(),
-                               " scalar should have a child value of length ",
-                               list_type.list_size(), ", got ", s.value->length());
-      }
+    RETURN_NOT_OK(Visit(static_cast<const BaseListScalar&>(s)));
+    const auto& list_type = checked_cast<const FixedSizeListType&>(*s.type);
+    if (s.value->length() != list_type.list_size()) {
+      return Status::Invalid(s.type->ToString(),
+                             " scalar should have a child value of length ",
+                             list_type.list_size(), ", got ", s.value->length());
     }
     return Status::OK();
   }
 
   Status Visit(const StructScalar& s) {
-    if (!s.is_valid) {
-      if (!s.value.empty()) {
-        return Status::Invalid(s.type->ToString(),
-                               " scalar is marked null but has child values");
-      }
-      return Status::OK();
-    }
     const int num_fields = s.type->num_fields();
     const auto& fields = s.type->fields();
     if (fields.size() != s.value.size()) {
@@ -282,10 +302,6 @@ struct ScalarValidateImpl {
                              num_fields, " child values, got ", s.value.size());
     }
     for (int i = 0; i < num_fields; ++i) {
-      if (!s.value[i]) {
-        return Status::Invalid("non-null ", s.type->ToString(),
-                               " scalar has missing child value at index ", i);
-      }
       const auto st = Validate(*s.value[i]);
       if (!st.ok()) {
         return st.WithMessage(s.type->ToString(),
@@ -362,8 +378,47 @@ struct ScalarValidateImpl {
     return Status::OK();
   }
 
+  Status ValidateValue(const Scalar& s, const Scalar& value) {
+    const auto st = Validate(value);
+    if (!st.ok()) {
+      return st.WithMessage(
+          s.type->ToString(),
+          " scalar fails validation for underlying value: ", st.message());
+    }
+    return Status::OK();
+  }
+
+  Status ValidateDenseUnion(const DenseUnionScalar& s, int child_id) {
+    const auto& union_type = checked_cast<const DenseUnionType&>(*s.type);
+    const auto& field_type = *union_type.field(child_id)->type();
+    if (!field_type.Equals(*s.value->type)) {
+      return Status::Invalid(s.type->ToString(), " scalar with type code ", s.type_code,
+                             " should have an underlying value of type ",
+                             field_type.ToString(), ", got ", s.value->type->ToString());
+    }
+    return ValidateValue(s, *s.value);
+  }
+
+  Status ValidateSparseUnion(const SparseUnionScalar& s) {
+    const auto& union_type = checked_cast<const SparseUnionType&>(*s.type);
+    if (union_type.num_fields() != static_cast<int>(s.value.size())) {
+      return Status::Invalid("Sparse union scalar value had ", union_type.num_fields(),
+                             " fields but type has ", s.value.size(), " fields.");
+    }
+    for (int j = 0; j < union_type.num_fields(); ++j) {
+      const auto& field_type = *union_type.field(j)->type();
+      const Scalar& field_value = *s.value[j];
+      if (!field_type.Equals(*field_value.type)) {
+        return Status::Invalid(s.type->ToString(), " value for field ",
+                               union_type.field(j)->ToString(), " had incorrect type of ",
+                               field_value.type->ToString());
+      }
+      RETURN_NOT_OK(ValidateValue(s, field_value));
+    }
+    return Status::OK();
+  }
+
   Status Visit(const UnionScalar& s) {
-    RETURN_NOT_OK(ValidateOptionalValue(s));
     const int type_code = s.type_code;  // avoid 8-bit int types for printing
     const auto& union_type = checked_cast<const UnionType&>(*s.type);
     const auto& child_ids = union_type.child_ids();
@@ -372,37 +427,24 @@ struct ScalarValidateImpl {
       return Status::Invalid(s.type->ToString(), " scalar has invalid type code ",
                              type_code);
     }
-    if (s.is_valid) {
-      const auto& field_type = *union_type.field(child_ids[type_code])->type();
-      if (!field_type.Equals(*s.value->type)) {
-        return Status::Invalid(s.type->ToString(), " scalar with type code ", type_code,
-                               " should have an underlying value of type ",
-                               field_type.ToString(), ", got ",
-                               s.value->type->ToString());
-      }
-      const auto st = Validate(*s.value);
-      if (!st.ok()) {
-        return st.WithMessage(
-            s.type->ToString(),
-            " scalar fails validation for underlying value: ", st.message());
-      }
+    if (union_type.id() == Type::DENSE_UNION) {
+      return ValidateDenseUnion(checked_cast<const DenseUnionScalar&>(s),
+                                child_ids[type_code]);
+    } else {
+      return ValidateSparseUnion(checked_cast<const SparseUnionScalar&>(s));
     }
-    return Status::OK();
   }
 
   Status Visit(const ExtensionScalar& s) {
-    if (!s.is_valid) {
-      if (s.value) {
-        return Status::Invalid("null ", s.type->ToString(), " scalar has storage value");
-      }
-      return Status::OK();
-    }
-
     if (!s.value) {
-      return Status::Invalid("non-null ", s.type->ToString(),
-                             " scalar doesn't have storage value");
+      return Status::Invalid(s.type->ToString(), " scalar doesn't have storage value");
     }
-    if (!s.value->is_valid) {
+    if (!s.is_valid && s.value->is_valid) {
+      return Status::Invalid("null ", s.type->ToString(),
+                             " scalar has non-null storage value");
+      return Status::OK();
+    }
+    if (s.is_valid && !s.value->is_valid) {
       return Status::Invalid("non-null ", s.type->ToString(),
                              " scalar has null storage value");
     }
@@ -425,44 +467,13 @@ struct ScalarValidateImpl {
   }
 
   Status ValidateBinaryScalar(const BaseBinaryScalar& s) {
-    return ValidateOptionalValue(s);
-  }
-
-  Status ValidateBaseListScalar(const BaseListScalar& s) {
-    RETURN_NOT_OK(ValidateOptionalValue(s));
-    if (s.is_valid) {
-      const auto st = full_validation_ ? s.value->ValidateFull() : s.value->Validate();
-      if (!st.ok()) {
-        return st.WithMessage(s.type->ToString(),
-                              " scalar fails validation for value: ", st.message());
-      }
-
-      const auto& list_type = checked_cast<const BaseListType&>(*s.type);
-      const auto& value_type = *list_type.value_type();
-      if (!s.value->type()->Equals(value_type)) {
-        return Status::Invalid(
-            list_type.ToString(), " scalar should have a value of type ",
-            value_type.ToString(), ", got ", s.value->type()->ToString());
-      }
-    }
-    return Status::OK();
-  }
-
-  template <typename ScalarType>
-  Status ValidateOptionalValue(const ScalarType& s) {
-    return ValidateOptionalValue(s, s.value, "value");
-  }
-
-  template <typename ScalarType, typename ValueType>
-  Status ValidateOptionalValue(const ScalarType& s, const ValueType& value,
-                               const char* value_desc) {
     if (s.is_valid && !s.value) {
       return Status::Invalid(s.type->ToString(),
-                             " scalar is marked valid but doesn't have a ", value_desc);
+                             " scalar is marked valid but doesn't have a value");
     }
     if (!s.is_valid && s.value) {
-      return Status::Invalid(s.type->ToString(), " scalar is marked null but has a ",
-                             value_desc);
+      return Status::Invalid(s.type->ToString(),
+                             " scalar is marked null but has a value");
     }
     return Status::OK();
   }
@@ -493,29 +504,34 @@ LargeStringScalar::LargeStringScalar(std::string s)
     : LargeStringScalar(Buffer::FromString(std::move(s))) {}
 
 FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::shared_ptr<Buffer> value,
-                                             std::shared_ptr<DataType> type)
+                                             std::shared_ptr<DataType> type,
+                                             bool is_valid)
     : BinaryScalar(std::move(value), std::move(type)) {
   ARROW_CHECK_EQ(checked_cast<const FixedSizeBinaryType&>(*this->type).byte_width(),
                  this->value->size());
+  this->is_valid = is_valid;
 }
 
-FixedSizeBinaryScalar::FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value)
-    : BinaryScalar(value, fixed_size_binary(static_cast<int>(value->size()))) {}
+FixedSizeBinaryScalar::FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
+                                             bool is_valid)
+    : BinaryScalar(value, fixed_size_binary(static_cast<int>(value->size()))) {
+  this->is_valid = is_valid;
+}
 
-FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::string s)
-    : FixedSizeBinaryScalar(Buffer::FromString(std::move(s))) {}
+FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::string s, bool is_valid)
+    : FixedSizeBinaryScalar(Buffer::FromString(std::move(s)), is_valid) {}
 
 BaseListScalar::BaseListScalar(std::shared_ptr<Array> value,
-                               std::shared_ptr<DataType> type)
-    : Scalar{std::move(type), true}, value(std::move(value)) {
+                               std::shared_ptr<DataType> type, bool is_valid)
+    : Scalar{std::move(type), is_valid}, value(std::move(value)) {
   ARROW_CHECK(this->type->field(0)->type()->Equals(this->value->type()));
 }
 
-ListScalar::ListScalar(std::shared_ptr<Array> value)
-    : BaseListScalar(value, list(value->type())) {}
+ListScalar::ListScalar(std::shared_ptr<Array> value, bool is_valid)
+    : BaseListScalar(value, list(value->type()), is_valid) {}
 
-LargeListScalar::LargeListScalar(std::shared_ptr<Array> value)
-    : BaseListScalar(value, large_list(value->type())) {}
+LargeListScalar::LargeListScalar(std::shared_ptr<Array> value, bool is_valid)
+    : BaseListScalar(value, large_list(value->type()), is_valid) {}
 
 inline std::shared_ptr<DataType> MakeMapType(const std::shared_ptr<DataType>& pair_type) {
   ARROW_CHECK_EQ(pair_type->id(), Type::STRUCT);
@@ -523,19 +539,20 @@ inline std::shared_ptr<DataType> MakeMapType(const std::shared_ptr<DataType>& pa
   return map(pair_type->field(0)->type(), pair_type->field(1)->type());
 }
 
-MapScalar::MapScalar(std::shared_ptr<Array> value)
-    : BaseListScalar(value, MakeMapType(value->type())) {}
+MapScalar::MapScalar(std::shared_ptr<Array> value, bool is_valid)
+    : BaseListScalar(value, MakeMapType(value->type()), is_valid) {}
 
 FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value,
-                                         std::shared_ptr<DataType> type)
-    : BaseListScalar(value, std::move(type)) {
+                                         std::shared_ptr<DataType> type, bool is_valid)
+    : BaseListScalar(value, std::move(type), is_valid) {
   ARROW_CHECK_EQ(this->value->length(),
                  checked_cast<const FixedSizeListType&>(*this->type).list_size());
 }
 
-FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value)
+FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value, bool is_valid)
     : BaseListScalar(
-          value, fixed_size_list(value->type(), static_cast<int32_t>(value->length()))) {}
+          value, fixed_size_list(value->type(), static_cast<int32_t>(value->length())),
+          is_valid) {}
 
 Result<std::shared_ptr<StructScalar>> StructScalar::Make(
     ScalarVector values, std::vector<std::string> field_names) {
@@ -629,6 +646,33 @@ std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar>
                                             std::move(type), is_valid);
 }
 
+SparseUnionScalar::SparseUnionScalar(ValueType value, int8_t type_code,
+                                     std::shared_ptr<DataType> type)
+    : UnionScalar(std::move(type), type_code, /*is_valid=*/true),
+      value(std::move(value)) {
+  this->child_id =
+      checked_cast<const SparseUnionType&>(*this->type).child_ids()[type_code];
+
+  // Fix nullness based on whether the selected child is null
+  this->is_valid = this->value[this->child_id]->is_valid;
+}
+
+std::shared_ptr<Scalar> SparseUnionScalar::FromValue(std::shared_ptr<Scalar> value,
+                                                     int field_index,
+                                                     std::shared_ptr<DataType> type) {
+  const auto& union_type = checked_cast<const SparseUnionType&>(*type);
+  int8_t type_code = union_type.type_codes()[field_index];
+  ScalarVector field_values;
+  for (int i = 0; i < type->num_fields(); ++i) {
+    if (i == field_index) {
+      field_values.emplace_back(std::move(value));
+    } else {
+      field_values.emplace_back(MakeNullScalar(type->field(i)->type()));
+    }
+  }
+  return std::make_shared<SparseUnionScalar>(field_values, type_code, std::move(type));
+}
+
 namespace {
 
 template <typename T>
@@ -656,21 +700,66 @@ struct MakeNullImpl {
     return Status::OK();
   }
 
-  Status Visit(const SparseUnionType& type) { return MakeUnionScalar(type); }
+  template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
+  Status VisitListLike(const T& type, int64_t value_size = 0) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> value,
+                          MakeArrayOfNull(type.value_type(), value_size));
+    out_ = std::make_shared<ScalarType>(std::move(value), type_, /*is_valid=*/false);
+    return Status::OK();
+  }
 
-  Status Visit(const DenseUnionType& type) { return MakeUnionScalar(type); }
+  Status Visit(const FixedSizeBinaryType& type) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> value,
+                          AllocateBuffer(type.byte_width()));
+    out_ = std::make_shared<FixedSizeBinaryScalar>(value, type_, /*is_valid=*/false);
+    return Status::OK();
+  }
 
-  template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
-  Status MakeUnionScalar(const T& type) {
+  Status Visit(const ListType& type) { return VisitListLike<ListType>(type); }
+
+  Status Visit(const MapType& type) { return VisitListLike<MapType>(type); }
+
+  Status Visit(const LargeListType& type) { return VisitListLike<LargeListType>(type); }
+
+  Status Visit(const FixedSizeListType& type) {
+    return VisitListLike<FixedSizeListType>(type, type.list_size());
+  }
+
+  Status Visit(const StructType& type) {
+    ScalarVector field_values;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      field_values.push_back(MakeNullScalar(type.field(i)->type()));
+    }
+    out_ = std::make_shared<StructScalar>(std::move(field_values), type_,
+                                          /*is_valid=*/false);
+    return Status::OK();
+  }
+
+  Status Visit(const SparseUnionType& type) {
     if (type.num_fields() == 0) {
       return Status::Invalid("Cannot make scalar of empty union type");
     }
-    out_ = std::make_shared<ScalarType>(type.type_codes()[0], type_);
+    ScalarVector field_values;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      field_values.emplace_back(MakeNullScalar(type.field(i)->type()));
+    }
+    out_ = std::make_shared<SparseUnionScalar>(std::move(field_values),
+                                               type.type_codes()[0], type_);
+    return Status::OK();
+  }
+
+  Status Visit(const DenseUnionType& type) {
+    if (type.num_fields() == 0) {
+      return Status::Invalid("Cannot make scalar of empty union type");
+    }
+    out_ = std::make_shared<DenseUnionScalar>(MakeNullScalar(type.field(0)->type()),
+                                              type.type_codes()[0], type_);
     return Status::OK();
   }
 
   Status Visit(const ExtensionType& type) {
-    out_ = std::make_shared<ExtensionScalar>(type_);
+    out_ = std::make_shared<ExtensionScalar>(MakeNullScalar(type.storage_type()), type_,
+                                             /*is_valid=*/false);
     return Status::OK();
   }
 
@@ -703,7 +792,11 @@ std::string Scalar::ToString() const {
   if (maybe_repr.ok()) {
     return checked_cast<const StringScalar&>(*maybe_repr.ValueOrDie()).value->ToString();
   }
-  return "...";
+
+  std::string result;
+  std::shared_ptr<Array> as_array = *MakeArrayFromScalar(*this, 1);
+  DCHECK_OK(PrettyPrint(*as_array, PrettyPrintOptions::Defaults(), &result));
+  return result;
 }
 
 struct ScalarParseImpl {
@@ -948,8 +1041,15 @@ Status CastImpl(const StructScalar& from, StringScalar* to) {
 Status CastImpl(const UnionScalar& from, StringScalar* to) {
   const auto& union_ty = checked_cast<const UnionType&>(*from.type);
   std::stringstream ss;
+  const Scalar* selected_value;
+  if (from.type->id() == Type::DENSE_UNION) {
+    selected_value = checked_cast<const DenseUnionScalar&>(from).value.get();
+  } else {
+    const auto& sparse_scalar = checked_cast<const SparseUnionScalar&>(from);
+    selected_value = sparse_scalar.value[sparse_scalar.child_id].get();
+  }
   ss << "union{" << union_ty.field(union_ty.child_ids()[from.type_code])->ToString()
-     << " = " << from.value->ToString() << '}';
+     << " = " << selected_value->ToString() << '}';
   to->value = Buffer::FromString(ss.str());
   return Status::OK();
 }
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index dec9715afe1..22532041eca 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -53,8 +53,6 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
                              public util::EqualityComparable<Scalar> {
   virtual ~Scalar() = default;
 
-  explicit Scalar(std::shared_ptr<DataType> type) : type(std::move(type)) {}
-
   /// \brief The type of the scalar value
   std::shared_ptr<DataType> type;
 
@@ -108,9 +106,8 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
   Status Accept(ScalarVisitor* visitor) const;
 
   /// \brief EXPERIMENTAL Enable obtaining shared_ptr<Scalar> from a const
-  /// Scalar& context. Implementation depends on enable_shared_from_this, but
-  /// we may change this in the future
-  std::shared_ptr<Scalar> Copy() const {
+  /// Scalar& context.
+  std::shared_ptr<Scalar> GetSharedPtr() const {
     return const_cast<Scalar*>(this)->shared_from_this();
   }
 
@@ -136,6 +133,9 @@ struct ARROW_EXPORT NullScalar : public Scalar {
 namespace internal {
 
 struct ARROW_EXPORT PrimitiveScalarBase : public Scalar {
+  explicit PrimitiveScalarBase(std::shared_ptr<DataType> type)
+      : Scalar(std::move(type), false) {}
+
   using Scalar::Scalar;
   /// \brief Get a mutable pointer to the value of this scalar. May be null.
   virtual void* mutable_data() = 0;
@@ -311,14 +311,13 @@ struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar {
 struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
   using TypeClass = FixedSizeBinaryType;
 
-  FixedSizeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type);
+  FixedSizeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type,
+                        bool is_valid = true);
 
-  explicit FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value);
+  explicit FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
+                                 bool is_valid = true);
 
-  explicit FixedSizeBinaryScalar(std::string s);
-
-  explicit FixedSizeBinaryScalar(std::shared_ptr<DataType> type)
-      : BinaryScalar(std::move(type)) {}
+  explicit FixedSizeBinaryScalar(std::string s, bool is_valid = true);
 };
 
 template <typename T>
@@ -436,7 +435,8 @@ struct ARROW_EXPORT BaseListScalar : public Scalar {
   using Scalar::Scalar;
   using ValueType = std::shared_ptr<Array>;
 
-  BaseListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
+  BaseListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+                 bool is_valid = true);
 
   std::shared_ptr<Array> value;
 };
@@ -445,30 +445,30 @@ struct ARROW_EXPORT ListScalar : public BaseListScalar {
   using TypeClass = ListType;
   using BaseListScalar::BaseListScalar;
 
-  explicit ListScalar(std::shared_ptr<Array> value);
+  explicit ListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT LargeListScalar : public BaseListScalar {
   using TypeClass = LargeListType;
   using BaseListScalar::BaseListScalar;
 
-  explicit LargeListScalar(std::shared_ptr<Array> value);
+  explicit LargeListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT MapScalar : public BaseListScalar {
   using TypeClass = MapType;
   using BaseListScalar::BaseListScalar;
 
-  explicit MapScalar(std::shared_ptr<Array> value);
+  explicit MapScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar {
   using TypeClass = FixedSizeListType;
-  using BaseListScalar::BaseListScalar;
 
-  FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
+  FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+                      bool is_valid = true);
 
-  explicit FixedSizeListScalar(std::shared_ptr<Array> value);
+  explicit FixedSizeListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT StructScalar : public Scalar {
@@ -479,37 +479,60 @@ struct ARROW_EXPORT StructScalar : public Scalar {
 
   Result<std::shared_ptr<Scalar>> field(FieldRef ref) const;
 
-  StructScalar(ValueType value, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)) {}
+  StructScalar(ValueType value, std::shared_ptr<DataType> type, bool is_valid = true)
+      : Scalar(std::move(type), is_valid), value(std::move(value)) {}
 
   static Result<std::shared_ptr<StructScalar>> Make(ValueType value,
                                                     std::vector<std::string> field_names);
-
-  explicit StructScalar(std::shared_ptr<DataType> type) : Scalar(std::move(type)) {}
 };
 
 struct ARROW_EXPORT UnionScalar : public Scalar {
-  using Scalar::Scalar;
-  using ValueType = std::shared_ptr<Scalar>;
-
-  ValueType value;
   int8_t type_code;
 
-  UnionScalar(int8_t type_code, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), false), type_code(type_code) {}
+  virtual const std::shared_ptr<Scalar>& child_value() const = 0;
 
-  UnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)), type_code(type_code) {}
+ protected:
+  UnionScalar(std::shared_ptr<DataType> type, int8_t type_code, bool is_valid)
+      : Scalar(std::move(type), is_valid), type_code(type_code) {}
 };
 
 struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
-  using UnionScalar::UnionScalar;
   using TypeClass = SparseUnionType;
+
+  // Even though only one of the union values is relevant for this scalar, we
+  // nonetheless construct a vector of scalars, one per union value, to have
+  // enough data to reconstruct a valid ArraySpan of length 1 from this scalar
+  using ValueType = std::vector<std::shared_ptr<Scalar>>;
+  ValueType value;
+
+  // The value index corresponding to the active type code
+  int child_id;
+
+  SparseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type);
+
+  const std::shared_ptr<Scalar>& child_value() const override {
+    return this->value[this->child_id];
+  }
+
+  /// \brief Construct a SparseUnionScalar from a single value, versus having
+  /// to construct a vector of scalars
+  static std::shared_ptr<Scalar> FromValue(std::shared_ptr<Scalar> value, int field_index,
+                                           std::shared_ptr<DataType> type);
 };
 
 struct ARROW_EXPORT DenseUnionScalar : public UnionScalar {
-  using UnionScalar::UnionScalar;
   using TypeClass = DenseUnionType;
+
+  // For DenseUnionScalar, we can make a valid ArraySpan of length 1 from this
+  // scalar
+  using ValueType = std::shared_ptr<Scalar>;
+  ValueType value;
+
+  const std::shared_ptr<Scalar>& child_value() const override { return this->value; }
+
+  DenseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
+      : UnionScalar(std::move(type), type_code, value->is_valid),
+        value(std::move(value)) {}
 };
 
 /// \brief A Scalar value for DictionaryType
@@ -549,17 +572,18 @@ struct ARROW_EXPORT DictionaryScalar : public internal::PrimitiveScalarBase {
 /// The value is the underlying storage scalar.
 /// `is_valid` must only be true if `value` is non-null and `value->is_valid` is true
 struct ARROW_EXPORT ExtensionScalar : public Scalar {
-  using Scalar::Scalar;
   using TypeClass = ExtensionType;
   using ValueType = std::shared_ptr<Scalar>;
 
-  ExtensionScalar(std::shared_ptr<Scalar> storage, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(storage)) {}
+  ExtensionScalar(std::shared_ptr<Scalar> storage, std::shared_ptr<DataType> type,
+                  bool is_valid = true)
+      : Scalar(std::move(type), is_valid), value(std::move(storage)) {}
 
   template <typename Storage,
             typename = enable_if_t<std::is_base_of<Scalar, Storage>::value>>
-  ExtensionScalar(Storage&& storage, std::shared_ptr<DataType> type)
-      : ExtensionScalar(std::make_shared<Storage>(std::move(storage)), std::move(type)) {}
+  ExtensionScalar(Storage&& storage, std::shared_ptr<DataType> type, bool is_valid = true)
+      : ExtensionScalar(std::make_shared<Storage>(std::move(storage)), std::move(type),
+                        is_valid) {}
 
   std::shared_ptr<Scalar> value;
 };
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index c9b3a2217bc..265ee3e94eb 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -522,11 +522,12 @@ TEST(TestBinaryScalar, Hashing) {
 }
 
 TEST(TestBinaryScalar, ValidateErrors) {
-  // Inconsistent is_valid / value
+  // Value must be null when the scalar is null
   BinaryScalar scalar(Buffer::FromString("xxx"));
   scalar.is_valid = false;
   AssertValidationFails(scalar);
 
+  // Value must be non-null
   auto null_scalar = MakeNullScalar(binary());
   null_scalar->is_valid = true;
   AssertValidationFails(*null_scalar);
@@ -615,10 +616,10 @@ TEST(TestFixedSizeBinaryScalar, Basics) {
   ASSERT_TRUE(value.is_valid);
   ASSERT_TRUE(value.type->Equals(*ex_type));
 
-  FixedSizeBinaryScalar null_value(ex_type);
+  FixedSizeBinaryScalar null_value(buf, ex_type, /*is_valid=*/false);
   ASSERT_OK(null_value.ValidateFull());
   ASSERT_FALSE(null_value.is_valid);
-  ASSERT_EQ(null_value.value, nullptr);
+  ASSERT_TRUE(null_value.value->Equals(*buf));
 
   // test Array.GetScalar
   auto ty = fixed_size_binary(3);
@@ -1056,11 +1057,11 @@ class TestListScalar : public ::testing::Test {
   }
 
   void TestValidateErrors() {
-    // Inconsistent is_valid / value
     ScalarType scalar(value_);
     scalar.is_valid = false;
-    AssertValidationFails(scalar);
+    ASSERT_OK(scalar.ValidateFull());
 
+    // Value must be defined
     scalar = ScalarType(value_);
     scalar.value = nullptr;
     AssertValidationFails(scalar);
@@ -1142,7 +1143,9 @@ TEST(TestStructScalar, NullScalar) {
   auto ty = struct_({field("a", boolean()), field("b", int32()), field("b", utf8()),
                      field("d", int64())});
 
-  StructScalar null_scalar(ty);
+  StructScalar null_scalar({MakeNullScalar(boolean()), MakeNullScalar(int32()),
+                            MakeNullScalar(utf8()), MakeNullScalar(int64())},
+                           ty, /*is_valid=*/false);
   ASSERT_OK(null_scalar.ValidateFull());
   ASSERT_FALSE(null_scalar.is_valid);
 
@@ -1153,7 +1156,7 @@ TEST(TestStructScalar, NullScalar) {
 TEST(TestStructScalar, EmptyStruct) {
   auto ty = struct_({});
 
-  StructScalar null_scalar(ty);
+  StructScalar null_scalar({}, ty, /*is_valid=*/false);
   ASSERT_OK(null_scalar.ValidateFull());
   ASSERT_FALSE(null_scalar.is_valid);
 
@@ -1177,12 +1180,12 @@ TEST(TestStructScalar, EmptyStruct) {
 TEST(TestStructScalar, ValidateErrors) {
   auto ty = struct_({field("a", utf8())});
 
-  // Inconsistent is_valid / value
+  // Values must always be defined
   StructScalar scalar({MakeScalar("hello")}, ty);
   scalar.is_valid = false;
-  AssertValidationFails(scalar);
+  ASSERT_OK(scalar.ValidateFull());
 
-  scalar = StructScalar(ty);
+  scalar = StructScalar({}, ty, /*is_valid=*/false);
   scalar.is_valid = true;
   AssertValidationFails(scalar);
 
@@ -1376,21 +1379,47 @@ void CheckGetValidUnionScalar(const Array& arr, int64_t index, const Scalar& exp
   ASSERT_OK(scalar->ValidateFull());
   ASSERT_TRUE(scalar->Equals(expected));
 
-  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
-  ASSERT_TRUE(as_union.is_valid);
-  ASSERT_TRUE(as_union.value->Equals(expected_value));
+  ASSERT_TRUE(scalar->is_valid);
+  ASSERT_TRUE(
+      checked_cast<const UnionScalar&>(*scalar).child_value()->Equals(expected_value));
 }
 
 void CheckGetNullUnionScalar(const Array& arr, int64_t index) {
   ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
   ASSERT_TRUE(scalar->Equals(MakeNullScalar(arr.type())));
 
-  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
-  ASSERT_FALSE(as_union.is_valid);
-  // XXX in reality, the union array doesn't have a validity bitmap.
-  // Validity is inferred from the underlying child value, which should maybe
-  // be reflected here...
-  ASSERT_EQ(as_union.value, nullptr);
+  ASSERT_FALSE(scalar->is_valid);
+  ASSERT_FALSE(checked_cast<const UnionScalar&>(*scalar).child_value()->is_valid);
+}
+
+std::shared_ptr<Scalar> MakeUnionScalar(const SparseUnionType& type,
+                                        std::shared_ptr<Scalar> field_value,
+                                        int field_index) {
+  return SparseUnionScalar::FromValue(field_value, field_index, type.GetSharedPtr());
+}
+
+std::shared_ptr<Scalar> MakeUnionScalar(const DenseUnionType& type,
+                                        std::shared_ptr<Scalar> field_value,
+                                        int field_index) {
+  int8_t type_code = type.type_codes()[field_index];
+  return std::make_shared<DenseUnionScalar>(field_value, type_code, type.GetSharedPtr());
+}
+
+std::shared_ptr<Scalar> MakeSpecificNullScalar(const DenseUnionType& type,
+                                               int field_index) {
+  int8_t type_code = type.type_codes()[field_index];
+  auto value = MakeNullScalar(type.field(field_index)->type());
+  return std::make_shared<DenseUnionScalar>(value, type_code, type.GetSharedPtr());
+}
+
+std::shared_ptr<Scalar> MakeSpecificNullScalar(const SparseUnionType& type,
+                                               int field_index) {
+  ScalarVector field_values;
+  for (int i = 0; i < type.num_fields(); ++i) {
+    field_values.emplace_back(MakeNullScalar(type.field(i)->type()));
+  }
+  return std::make_shared<SparseUnionScalar>(field_values, type.type_codes()[field_index],
+                                             type.GetSharedPtr());
 }
 
 template <typename Type>
@@ -1403,18 +1432,29 @@ class TestUnionScalar : public ::testing::Test {
     type_.reset(new UnionType({field("string", utf8()), field("number", uint64()),
                                field("other_number", uint64())},
                               /*type_codes=*/{3, 42, 43}));
+    union_type_ = static_cast<const Type*>(type_.get());
+
     alpha_ = MakeScalar("alpha");
     beta_ = MakeScalar("beta");
     ASSERT_OK_AND_ASSIGN(two_, MakeScalar(uint64(), 2));
     ASSERT_OK_AND_ASSIGN(three_, MakeScalar(uint64(), 3));
 
-    union_alpha_ = std::make_shared<ScalarType>(alpha_, 3, type_);
-    union_beta_ = std::make_shared<ScalarType>(beta_, 3, type_);
-    union_two_ = std::make_shared<ScalarType>(two_, 42, type_);
-    union_other_two_ = std::make_shared<ScalarType>(two_, 43, type_);
-    union_three_ = std::make_shared<ScalarType>(three_, 42, type_);
-    union_string_null_ = MakeSpecificNullScalar(3);
-    union_number_null_ = MakeSpecificNullScalar(42);
+    union_alpha_ = ScalarFromValue(0, alpha_);
+    union_beta_ = ScalarFromValue(0, beta_);
+    union_two_ = ScalarFromValue(1, two_);
+    union_other_two_ = ScalarFromValue(2, two_);
+    union_three_ = ScalarFromValue(1, three_);
+    union_string_null_ = SpecificNull(0);
+    union_number_null_ = SpecificNull(1);
+  }
+
+  std::shared_ptr<Scalar> ScalarFromValue(int field_index,
+                                          std::shared_ptr<Scalar> field_value) {
+    return MakeUnionScalar(*union_type_, field_value, field_index);
+  }
+
+  std::shared_ptr<Scalar> SpecificNull(int field_index) {
+    return MakeSpecificNullScalar(*union_type_, field_index);
   }
 
   void TestValidate() {
@@ -1429,19 +1469,37 @@ class TestUnionScalar : public ::testing::Test {
 
   void TestValidateErrors() {
     // Type code doesn't exist
-    AssertValidationFails(ScalarType(alpha_, 0, type_));
-    AssertValidationFails(ScalarType(alpha_, 0, type_));
-    AssertValidationFails(ScalarType(0, type_));
-    AssertValidationFails(ScalarType(alpha_, -42, type_));
-    AssertValidationFails(ScalarType(-42, type_));
+    auto scalar = ScalarFromValue(0, alpha_);
+    UnionScalar* union_scalar = static_cast<UnionScalar*>(scalar.get());
+
+    // Invalid type code
+    union_scalar->type_code = 0;
+    AssertValidationFails(*union_scalar);
+
+    union_scalar->is_valid = false;
+    AssertValidationFails(*union_scalar);
+
+    union_scalar->type_code = -42;
+    union_scalar->is_valid = true;
+    AssertValidationFails(*union_scalar);
+
+    union_scalar->is_valid = false;
+    AssertValidationFails(*union_scalar);
 
     // Type code doesn't correspond to child type
-    AssertValidationFails(ScalarType(alpha_, 42, type_));
-    AssertValidationFails(ScalarType(two_, 3, type_));
+    if (type_->id() == ::arrow::Type::DENSE_UNION) {
+      union_scalar->type_code = 42;
+      union_scalar->is_valid = true;
+      AssertValidationFails(*union_scalar);
+
+      scalar = ScalarFromValue(2, two_);
+      union_scalar = static_cast<UnionScalar*>(scalar.get());
+      union_scalar->type_code = 3;
+      AssertValidationFails(*union_scalar);
+    }
 
     // underlying value has invalid UTF8
-    auto invalid_utf8 = std::make_shared<StringScalar>("\xff");
-    auto scalar = std::make_shared<ScalarType>(invalid_utf8, 3, type_);
+    scalar = ScalarFromValue(0, std::make_shared<StringScalar>("\xff"));
     ASSERT_OK(scalar->Validate());
     ASSERT_RAISES(Invalid, scalar->ValidateFull());
   }
@@ -1466,20 +1524,23 @@ class TestUnionScalar : public ::testing::Test {
     const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
     AssertTypeEqual(type_, as_union.type);
     ASSERT_FALSE(as_union.is_valid);
-    ASSERT_EQ(as_union.value, nullptr);
-    // Abstractly, the type code must be valid.
-    // Concretely, the first child field is chosen.
+
+    // The first child field is chosen arbitrarily for the purposes of making
+    // a null scalar
     ASSERT_EQ(as_union.type_code, 3);
-  }
 
- protected:
-  std::shared_ptr<Scalar> MakeSpecificNullScalar(int8_t type_code) {
-    auto scal = MakeNullScalar(type_);
-    checked_cast<UnionScalar*>(scal.get())->type_code = type_code;
-    return scal;
+    if (type_->id() == ::arrow::Type::DENSE_UNION) {
+      const auto& as_dense_union = checked_cast<const DenseUnionScalar&>(*scalar);
+      ASSERT_FALSE(as_dense_union.value->is_valid);
+    } else {
+      const auto& as_sparse_union = checked_cast<const SparseUnionScalar&>(*scalar);
+      ASSERT_FALSE(as_sparse_union.value[as_sparse_union.child_id]->is_valid);
+    }
   }
 
+ protected:
   std::shared_ptr<DataType> type_;
+  const UnionType* union_type_;
   std::shared_ptr<Scalar> alpha_, beta_, two_, three_;
   std::shared_ptr<Scalar> union_alpha_, union_beta_, union_two_, union_three_,
       union_other_two_, union_string_null_, union_number_null_;
@@ -1575,7 +1636,8 @@ TEST_F(TestExtensionScalar, Basics) {
   ASSERT_OK(uuid_scalar2.ValidateFull());
   ASSERT_TRUE(uuid_scalar2.is_valid);
 
-  const ExtensionScalar null_scalar(type_);
+  const ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                                    /*is_valid=*/false);
   ASSERT_OK(null_scalar.ValidateFull());
   ASSERT_FALSE(null_scalar.is_valid);
 
@@ -1585,7 +1647,8 @@ TEST_F(TestExtensionScalar, Basics) {
 }
 
 TEST_F(TestExtensionScalar, MakeScalar) {
-  const ExtensionScalar null_scalar(type_);
+  const ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                                    /*is_valid=*/false);
   const ExtensionScalar uuid_scalar = MakeUuidScalar(uuid_string1_);
 
   auto scalar = CheckMakeNullScalar(type_);
@@ -1608,7 +1671,8 @@ TEST_F(TestExtensionScalar, MakeScalar) {
 }
 
 TEST_F(TestExtensionScalar, GetScalar) {
-  const ExtensionScalar null_scalar(type_);
+  const ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                                    /*is_valid=*/false);
   const ExtensionScalar uuid_scalar = MakeUuidScalar(uuid_string1_);
   const ExtensionScalar uuid_scalar2 = MakeUuidScalar(uuid_string2_);
 
@@ -1639,7 +1703,8 @@ TEST_F(TestExtensionScalar, GetScalar) {
 
 TEST_F(TestExtensionScalar, ValidateErrors) {
   // Mismatching is_valid and value
-  ExtensionScalar null_scalar(type_);
+  ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                              /*is_valid=*/false);
   null_scalar.is_valid = true;
   AssertValidationFails(null_scalar);
 
@@ -1648,17 +1713,20 @@ TEST_F(TestExtensionScalar, ValidateErrors) {
   AssertValidationFails(uuid_scalar);
 
   // Null storage scalar
-  auto null_storage = std::make_shared<FixedSizeBinaryScalar>(storage_type_);
+  auto null_storage = MakeNullScalar(storage_type_);
   ExtensionScalar scalar(null_storage, type_);
   scalar.is_valid = true;
   AssertValidationFails(scalar);
+
+  // If the scalar is null it's okay
   scalar.is_valid = false;
-  AssertValidationFails(scalar);
+  ASSERT_OK(scalar.ValidateFull());
 
   // Invalid storage scalar (wrong length)
-  auto invalid_storage = std::make_shared<FixedSizeBinaryScalar>(storage_type_);
+  std::shared_ptr<Scalar> invalid_storage = MakeNullScalar(storage_type_);
   invalid_storage->is_valid = true;
-  invalid_storage->value = std::make_shared<Buffer>("123");
+  static_cast<FixedSizeBinaryScalar*>(invalid_storage.get())->value =
+      std::make_shared<Buffer>("123");
   AssertValidationFails(*invalid_storage);
   scalar = ExtensionScalar(invalid_storage, type_);
   AssertValidationFails(scalar);
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 40fe748589f..efff07db667 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -388,6 +388,29 @@ std::ostream& operator<<(std::ostream& os, const DataType& type) {
   return os;
 }
 
+std::ostream& operator<<(std::ostream& os, const TypeHolder& type) {
+  os << type.ToString();
+  return os;
+}
+
+// ----------------------------------------------------------------------
+// TypeHolder
+
+std::string TypeHolder::ToString(const std::vector<TypeHolder>& types) {
+  std::stringstream ss;
+  ss << "(";
+  for (size_t i = 0; i < types.size(); ++i) {
+    if (i > 0) {
+      ss << ", ";
+    }
+    ss << types[i].type->ToString();
+  }
+  ss << ")";
+  return ss.str();
+}
+
+// ----------------------------------------------------------------------
+
 FloatingPointType::Precision HalfFloatType::precision() const {
   return FloatingPointType::HALF;
 }
@@ -2146,7 +2169,7 @@ Status DataType::Accept(TypeVisitor* visitor) const {
 }
 
 #define TYPE_FACTORY(NAME, KLASS)                                        \
-  std::shared_ptr<DataType> NAME() {                                     \
+  const std::shared_ptr<DataType>& NAME() {                              \
     static std::shared_ptr<DataType> result = std::make_shared<KLASS>(); \
     return result;                                                       \
   }
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index b1b6d088f92..f3ac2d62d82 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -189,9 +189,8 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
   virtual int bit_width() const { return -1; }
 
   // \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
-  // context. Implementation requires enable_shared_from_this but we may fix
-  // this in the future
-  std::shared_ptr<DataType> Copy() const {
+  // context.
+  std::shared_ptr<DataType> GetSharedPtr() const {
     return const_cast<DataType*>(this)->shared_from_this();
   }
 
@@ -210,9 +209,68 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
   ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
 };
 
+/// \brief EXPERIMENTAL: Container for a type pointer which can hold a
+/// dynamically created shared_ptr<DataType> if it needs to.
+struct ARROW_EXPORT TypeHolder {
+  const DataType* type = NULLPTR;
+  std::shared_ptr<DataType> owned_type;
+
+  TypeHolder() = default;
+  TypeHolder(const TypeHolder& other) = default;
+  TypeHolder& operator=(const TypeHolder& other) = default;
+  TypeHolder(TypeHolder&& other) = default;
+  TypeHolder& operator=(TypeHolder&& other) = default;
+
+  TypeHolder(std::shared_ptr<DataType> owned_type)  // NOLINT implicit construction
+      : type(owned_type.get()), owned_type(std::move(owned_type)) {}
+
+  TypeHolder(const DataType* type)  // NOLINT implicit construction
+      : type(type) {}
+
+  Type::type id() const { return this->type->id(); }
+
+  std::shared_ptr<DataType> GetSharedPtr() const {
+    return this->type != NULLPTR ? this->type->GetSharedPtr() : NULLPTR;
+  }
+
+  const DataType& operator*() const { return *this->type; }
+
+  operator bool() { return this->type != NULLPTR; }
+
+  bool operator==(const TypeHolder& other) const {
+    if (type == other.type) return true;
+    if (type == NULLPTR || other.type == NULLPTR) return false;
+    return type->Equals(*other.type);
+  }
+
+  bool operator==(decltype(NULLPTR)) const { return this->type == NULLPTR; }
+
+  bool operator==(const DataType& other) const {
+    if (this->type == NULLPTR) return false;
+    return other.Equals(*this->type);
+  }
+
+  bool operator!=(const DataType& other) const { return !(*this == other); }
+
+  bool operator==(const std::shared_ptr<DataType>& other) const {
+    return *this == *other;
+  }
+
+  bool operator!=(const TypeHolder& other) const { return !(*this == other); }
+
+  std::string ToString() const {
+    return this->type ? this->type->ToString() : "<NULLPTR>";
+  }
+
+  static std::string ToString(const std::vector<TypeHolder>&);
+};
+
 ARROW_EXPORT
 std::ostream& operator<<(std::ostream& os, const DataType& type);
 
+ARROW_EXPORT
+std::ostream& operator<<(std::ostream& os, const TypeHolder& type);
+
 inline bool operator==(const DataType& lhs, const DataType& rhs) {
   return lhs.Equals(rhs);
 }
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 45afd7af2e6..84a50a12eb3 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -81,7 +81,7 @@ class RecordBatchReader;
 class Table;
 
 struct Datum;
-struct ValueDescr;
+struct TypeHolder;
 
 using ChunkedArrayVector = std::vector<std::shared_ptr<ChunkedArray>>;
 using RecordBatchVector = std::vector<std::shared_ptr<RecordBatch>>;
@@ -416,43 +416,43 @@ struct Type {
 /// @{
 
 /// \brief Return a NullType instance
-std::shared_ptr<DataType> ARROW_EXPORT null();
+ARROW_EXPORT const std::shared_ptr<DataType>& null();
 /// \brief Return a BooleanType instance
-std::shared_ptr<DataType> ARROW_EXPORT boolean();
+ARROW_EXPORT const std::shared_ptr<DataType>& boolean();
 /// \brief Return a Int8Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int8();
+ARROW_EXPORT const std::shared_ptr<DataType>& int8();
 /// \brief Return a Int16Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int16();
+ARROW_EXPORT const std::shared_ptr<DataType>& int16();
 /// \brief Return a Int32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int32();
+ARROW_EXPORT const std::shared_ptr<DataType>& int32();
 /// \brief Return a Int64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int64();
+ARROW_EXPORT const std::shared_ptr<DataType>& int64();
 /// \brief Return a UInt8Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint8();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint8();
 /// \brief Return a UInt16Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint16();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint16();
 /// \brief Return a UInt32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint32();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint32();
 /// \brief Return a UInt64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint64();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint64();
 /// \brief Return a HalfFloatType instance
-std::shared_ptr<DataType> ARROW_EXPORT float16();
+ARROW_EXPORT const std::shared_ptr<DataType>& float16();
 /// \brief Return a FloatType instance
-std::shared_ptr<DataType> ARROW_EXPORT float32();
+ARROW_EXPORT const std::shared_ptr<DataType>& float32();
 /// \brief Return a DoubleType instance
-std::shared_ptr<DataType> ARROW_EXPORT float64();
+ARROW_EXPORT const std::shared_ptr<DataType>& float64();
 /// \brief Return a StringType instance
-std::shared_ptr<DataType> ARROW_EXPORT utf8();
+ARROW_EXPORT const std::shared_ptr<DataType>& utf8();
 /// \brief Return a LargeStringType instance
-std::shared_ptr<DataType> ARROW_EXPORT large_utf8();
+ARROW_EXPORT const std::shared_ptr<DataType>& large_utf8();
 /// \brief Return a BinaryType instance
-std::shared_ptr<DataType> ARROW_EXPORT binary();
+ARROW_EXPORT const std::shared_ptr<DataType>& binary();
 /// \brief Return a LargeBinaryType instance
-std::shared_ptr<DataType> ARROW_EXPORT large_binary();
+ARROW_EXPORT const std::shared_ptr<DataType>& large_binary();
 /// \brief Return a Date32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT date32();
+ARROW_EXPORT const std::shared_ptr<DataType>& date32();
 /// \brief Return a Date64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT date64();
+ARROW_EXPORT const std::shared_ptr<DataType>& date64();
 
 /// \brief Create a FixedSizeBinaryType instance.
 ARROW_EXPORT
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 80d7d66f2fd..221b35ce573 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -1045,6 +1045,19 @@ static inline int bit_width(Type::type type_id) {
   return 0;
 }
 
+static inline bool is_list_like(Type::type type_id) {
+  switch (type_id) {
+    case Type::LIST:
+    case Type::LARGE_LIST:
+    case Type::FIXED_SIZE_LIST:
+    case Type::MAP:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 static inline bool is_nested(Type::type type_id) {
   switch (type_id) {
     case Type::LIST:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index ee5446fd570..4cbcef84e88 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1083,10 +1083,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CResult[shared_ptr[CScalar]] GetEncodedValue()
 
     cdef cppclass CUnionScalar" arrow::UnionScalar"(CScalar):
-        shared_ptr[CScalar] value
         int8_t type_code
 
+    cdef cppclass CDenseUnionScalar" arrow::DenseUnionScalar"(CUnionScalar):
+        shared_ptr[CScalar] value
+
+    cdef cppclass CSparseUnionScalar" arrow::SparseUnionScalar"(CUnionScalar):
+        vector[shared_ptr[CScalar]] value
+        int child_id
+
     cdef cppclass CExtensionScalar" arrow::ExtensionScalar"(CScalar):
+        CExtensionScalar(shared_ptr[CScalar] storage,
+                         shared_ptr[CDataType], c_bool is_valid)
         shared_ptr[CScalar] value
 
     shared_ptr[CScalar] MakeScalar[Value](Value value)
@@ -1112,6 +1120,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CResult[shared_ptr[CTable]] UnifyTable(
             const CTable& table, CMemoryPool* pool)
 
+    shared_ptr[CScalar] MakeNullScalar(shared_ptr[CDataType] type)
+
 
 cdef extern from "arrow/builder.h" namespace "arrow" nogil:
 
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 10e3b7af44f..5995242b2d4 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -860,8 +860,14 @@ cdef class UnionScalar(Scalar):
         """
         Return underlying value as a scalar.
         """
-        cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
-        return Scalar.wrap(sp.value) if sp.is_valid else None
+        cdef CSparseUnionScalar* sp
+        cdef CDenseUnionScalar* dp
+        if self.type.id == _Type_SPARSE_UNION:
+            sp = <CSparseUnionScalar*> self.wrapped.get()
+            return Scalar.wrap(sp.value[sp.child_id]) if sp.is_valid else None
+        else:
+            dp = <CDenseUnionScalar*> self.wrapped.get()
+            return Scalar.wrap(dp.value) if dp.is_valid else None
 
     def as_py(self):
         """
@@ -919,6 +925,7 @@ cdef class ExtensionScalar(Scalar):
         """
         cdef:
             shared_ptr[CExtensionScalar] sp_scalar
+            shared_ptr[CScalar] sp_storage
             CExtensionScalar* ext_scalar
 
         if value is None:
@@ -932,12 +939,15 @@ cdef class ExtensionScalar(Scalar):
         else:
             storage = scalar(value, typ.storage_type)
 
-        sp_scalar = make_shared[CExtensionScalar](typ.sp_type)
-        ext_scalar = sp_scalar.get()
-        ext_scalar.is_valid = storage is not None and storage.is_valid
-        if ext_scalar.is_valid:
-            ext_scalar.value = pyarrow_unwrap_scalar(storage)
-        check_status(ext_scalar.Validate())
+        cdef c_bool is_valid = storage is not None and storage.is_valid
+        if is_valid:
+            sp_storage = pyarrow_unwrap_scalar(storage)
+        else:
+            sp_storage = MakeNullScalar((<DataType> typ.storage_type).sp_type)
+        sp_scalar = make_shared[CExtensionScalar](sp_storage, typ.sp_type,
+                                                  is_valid)
+        with nogil:
+            check_status(sp_scalar.get().Validate())
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index e5a4f9c5084..89c42648d24 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -683,7 +683,7 @@ def test_scalars_stack(gdb_arrow):
         'arrow::FixedSizeBinaryScalar of size 3, value "abc"')
     check_stack_repr(
         gdb_arrow, "fixed_size_binary_scalar_null",
-        'arrow::FixedSizeBinaryScalar of size 3, null value')
+        'arrow::FixedSizeBinaryScalar of size 3, null with value "   "')
 
     check_stack_repr(
         gdb_arrow, "dict_scalar",
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 62c8b6695c8..5e207d657c7 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -4247,7 +4247,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-int RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x);
+r_vec_size RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x);
 extern "C" SEXP _arrow_RecordBatch__num_rows(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type x(x_sexp);
@@ -4842,7 +4842,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-int Table__num_rows(const std::shared_ptr<arrow::Table>& x);
+r_vec_size Table__num_rows(const std::shared_ptr<arrow::Table>& x);
 extern "C" SEXP _arrow_Table__num_rows(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type x(x_sexp);
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index e9d8a2951bb..a845137e09d 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -87,7 +87,7 @@ std::shared_ptr<arrow::DataType> compute___expr__type(
     const std::shared_ptr<compute::Expression>& x,
     const std::shared_ptr<arrow::Schema>& schema) {
   auto bound = ValueOrStop(x->Bind(*schema));
-  return bound.type();
+  return bound.type()->GetSharedPtr();
 }
 
 // [[arrow::export]]
diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R
index fb1ef802e0a..fecda56c6c2 100644
--- a/r/tests/testthat/test-dataset-dplyr.R
+++ b/r/tests/testthat/test-dataset-dplyr.R
@@ -176,7 +176,7 @@ test_that("filter scalar validation doesn't crash (ARROW-7772)", {
     ds %>%
       filter(int == "fff", part == 1) %>%
       collect(),
-    "'equal' has no kernel matching input types .array.int32., scalar.string.."
+    "'equal' has no kernel matching input types .int32, string."
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 183170ff83e..94855fd7d63 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -838,7 +838,7 @@ test_that("semester works with temporal types and integers", {
       arrow_table() %>%
       mutate(sem_month_as_char_pad = semester(month_as_char_pad)) %>%
       collect(),
-    regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[string])",
+    regexp = "NotImplemented: Function 'month' has no kernel matching input types (string)",
     fixed = TRUE
   )
 })
@@ -914,7 +914,7 @@ test_that("month() errors with double input and returns NA with int outside 1:12
       arrow_table() %>%
       mutate(month_dbl_input = month(month_as_double)) %>%
       collect(),
-    regexp = "Function 'month' has no kernel matching input types (array[double])",
+    regexp = "Function 'month' has no kernel matching input types (double)",
     fixed = TRUE
   )
 
@@ -923,7 +923,7 @@ test_that("month() errors with double input and returns NA with int outside 1:12
       record_batch() %>%
       mutate(month_dbl_input = month(month_as_double)) %>%
       collect(),
-    regexp = "Function 'month' has no kernel matching input types (array[double])",
+    regexp = "Function 'month' has no kernel matching input types (double)",
     fixed = TRUE
   )
 })