Rust-GPU · Firestar99 · Jun 30, 2025 · Jun 27, 2025 · Jun 28, 2025 · Jun 28, 2025
@@ -2033,6 +2033,31 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> {
         if val.ty == dest_ty {
             val
         } else {
+            // If casting a constant, directly create a constant of the target type.
+            // This avoids creating intermediate types that might require additional
+            // capabilities. For example, casting a f16 constant to f32 will directly
+            // create a f32 constant, avoiding the need for Float16 capability if it is
+            // not used elsewhere.
+            if let Some(const_val) = self.builder.lookup_const_scalar(val) {
+                if let (SpirvType::Float(src_width), SpirvType::Float(dst_width)) =
+                    (self.lookup_type(val.ty), self.lookup_type(dest_ty))
+                {
+                    if src_width < dst_width {
+                        // Convert the bit representation to the actual float value
+                        let float_val = match src_width {
+                            32 => Some(f32::from_bits(const_val as u32) as f64),
+                            64 => Some(f64::from_bits(const_val as u64)),
+                            _ => None,
+                        };
+
+                        if let Some(val) = float_val {
+                            return self.constant_float(dest_ty, val);
+                        }
+                    }
+                }
+            }
+
+            // Regular conversion
             self.emit()
                 .f_convert(dest_ty, None, val.def(self))
                 .unwrap()
@@ -2198,6 +2223,46 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> {
             // I guess?
             return val;
         }
+
+        // If casting a constant, directly create a constant of the target type. This
+        // avoids creating intermediate types that might require additional
+        // capabilities. For example, casting a u8 constant to u32 will directly create
+        // a u32 constant, avoiding the need for Int8 capability if it is not used
+        // elsewhere.
+        if let Some(const_val) = self.builder.lookup_const_scalar(val) {
+            let src_ty = self.lookup_type(val.ty);
+            let dst_ty_spv = self.lookup_type(dest_ty);
+
+            // Try to optimize the constant cast
+            let optimized_result = match (src_ty, dst_ty_spv) {
+                // Integer to integer cast
+                (SpirvType::Integer(src_width, _), SpirvType::Integer(dst_width, _)) => {
+                    // Only optimize if we're widening. This avoids creating the source
+                    // type when it's safe to do so. For narrowing casts (e.g., u32 as
+                    // u8), we need the proper truncation behavior that the regular cast
+                    // provides.
+                    if src_width < dst_width {
+                        Some(self.constant_int(dest_ty, const_val))
+                    } else {
+                        None
+                    }
+                }
+                // Bool to integer cast - const_val will be 0 or 1
+                (SpirvType::Bool, SpirvType::Integer(_, _)) => {
+                    Some(self.constant_int(dest_ty, const_val))
+                }
+                // Integer to bool cast - compare with zero
+                (SpirvType::Integer(_, _), SpirvType::Bool) => {
+                    Some(self.constant_bool(self.span(), const_val != 0))
+                }
+                _ => None,
+            };
+
+            if let Some(result) = optimized_result {
+                return result;
+            }
+        }
+
         match (self.lookup_type(val.ty), self.lookup_type(dest_ty)) {
             // sign change
             (
@@ -3128,6 +3193,8 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> {
             .and_then(|def_id| self.buffer_store_intrinsics.borrow().get(&def_id).copied());
         let is_panic_entry_point = instance_def_id
             .is_some_and(|def_id| self.panic_entry_points.borrow().contains(&def_id));
+        let from_trait_impl =
+            instance_def_id.and_then(|def_id| self.from_trait_impls.borrow().get(&def_id).copied());
 
         if let Some(libm_intrinsic) = libm_intrinsic {
             let result = self.call_libm_intrinsic(libm_intrinsic, result_type, args);
@@ -3139,8 +3206,10 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> {
                     self.debug_type(result.ty),
                 );
             }
-            result
-        } else if is_panic_entry_point {
+            return result;
+        }
+
+        if is_panic_entry_point {
             // HACK(eddyb) Rust 2021 `panic!` always uses `format_args!`, even
             // in the simple case that used to pass a `&str` constant, which
             // would not remain reachable in the SPIR-V - but `format_args!` is
@@ -3613,24 +3682,59 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> {
             // HACK(eddyb) redirect any possible panic call to an abort, to avoid
             // needing to materialize `&core::panic::Location` or `format_args!`.
             self.abort_with_kind_and_message_debug_printf("panic", message, debug_printf_args);
-            self.undef(result_type)
-        } else if let Some(mode) = buffer_load_intrinsic {
-            self.codegen_buffer_load_intrinsic(result_type, args, mode)
-        } else if let Some(mode) = buffer_store_intrinsic {
+            return self.undef(result_type);
+        }
+
+        if let Some(mode) = buffer_load_intrinsic {
+            return self.codegen_buffer_load_intrinsic(result_type, args, mode);
+        }
+
+        if let Some(mode) = buffer_store_intrinsic {
             self.codegen_buffer_store_intrinsic(args, mode);
 
             let void_ty = SpirvType::Void.def(rustc_span::DUMMY_SP, self);
-            SpirvValue {
+            return SpirvValue {
                 kind: SpirvValueKind::IllegalTypeUsed(void_ty),
                 ty: void_ty,
+            };
+        }
+
+        if let Some((source_ty, target_ty)) = from_trait_impl {
+            // Optimize From::from calls with constant arguments to avoid creating intermediate types.
+            // Since From is only implemented for safe conversions (widening conversions that preserve
+            // the numeric value), we can directly create a constant of the target type for primitive
+            // numeric types.
+            if let [arg] = args {
+                if let Some(const_val) = self.builder.lookup_const_scalar(*arg) {
+                    use rustc_middle::ty::FloatTy;
+                    let optimized_result = match (source_ty.kind(), target_ty.kind()) {
+                        // Integer widening conversions
+                        (ty::Uint(_), ty::Uint(_)) | (ty::Int(_), ty::Int(_)) => {
+                            Some(self.constant_int(result_type, const_val))
+                        }
+                        // Float widening conversions
+                        // TODO(@LegNeato): Handle more float types
+                        (ty::Float(FloatTy::F32), ty::Float(FloatTy::F64)) => {
+                            let float_val = f32::from_bits(const_val as u32) as f64;
+                            Some(self.constant_float(result_type, float_val))
+                        }
+                        // No optimization for narrowing conversions or unsupported types
+                        _ => None,
+                    };
+
+                    if let Some(result) = optimized_result {
+                        return result;
+                    }
+                }
             }
-        } else {
-            let args = args.iter().map(|arg| arg.def(self)).collect::<Vec<_>>();
-            self.emit()
-                .function_call(result_type, None, callee_val, args)
-                .unwrap()
-                .with_type(result_type)
         }
+
+        // Default: emit a regular function call
+        let args = args.iter().map(|arg| arg.def(self)).collect::<Vec<_>>();
+        self.emit()
+            .function_call(result_type, None, callee_val, args)
+            .unwrap()
+            .with_type(result_type)
     }
 
     fn zext(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value {

@@ -172,6 +172,30 @@ impl<'tcx> CodegenCx<'tcx> {
             }
         }
 
+        // Check if this is a From trait implementation
+        if let Some(impl_def_id) = self.tcx.impl_of_method(def_id) {
+            if let Some(trait_ref) = self.tcx.impl_trait_ref(impl_def_id) {
+                let trait_def_id = trait_ref.skip_binder().def_id;
+
+                // Check if this is the From trait.
+                let trait_path = self.tcx.def_path_str(trait_def_id);
+                if matches!(
+                    trait_path.as_str(),
+                    "core::convert::From" | "std::convert::From"
+                ) {
+                    // Extract the source and target types from the trait substitutions
+                    let trait_args = trait_ref.skip_binder().args;
+                    if let (Some(target_ty), Some(source_ty)) =
+                        (trait_args.types().nth(0), trait_args.types().nth(1))
+                    {
+                        self.from_trait_impls
+                            .borrow_mut()
+                            .insert(def_id, (source_ty, target_ty));
+                    }
+                }
+            }
+        }
+
         if [
             self.tcx.lang_items().panic_fn(),
             self.tcx.lang_items().panic_fmt(),

@@ -84,6 +84,10 @@ pub struct CodegenCx<'tcx> {
     /// Intrinsic for storing a `<T>` into a `&[u32]`. The `PassMode` is the mode of the `<T>`.
     pub buffer_store_intrinsics: RefCell<FxHashMap<DefId, &'tcx PassMode>>,
 
+    /// Maps `DefId`s of `From::from` method implementations to their source and target types.
+    /// Used to optimize constant conversions like `u32::from(42u8)` to avoid creating the source type.
+    pub from_trait_impls: RefCell<FxHashMap<DefId, (Ty<'tcx>, Ty<'tcx>)>>,
+
     /// Some runtimes (e.g. intel-compute-runtime) disallow atomics on i8 and i16, even though it's allowed by the spec.
     /// This enables/disables them.
     pub i8_i16_atomics_allowed: bool,
@@ -203,6 +207,7 @@ impl<'tcx> CodegenCx<'tcx> {
             fmt_rt_arg_new_fn_ids_to_ty_and_spec: Default::default(),
             buffer_load_intrinsics: Default::default(),
             buffer_store_intrinsics: Default::default(),
+            from_trait_impls: Default::default(),
             i8_i16_atomics_allowed: false,
             codegen_args,
         }

@@ -477,6 +477,16 @@ pub fn link(
         simple_passes::remove_non_uniform_decorations(sess, &mut output)?;
     }
 
+    {
+        let _timer = sess.timer("link_remove_unused_type_capabilities");
+        simple_passes::remove_unused_type_capabilities(&mut output);
+    }
+
+    {
+        let _timer = sess.timer("link_type_capability_check");
+        simple_passes::check_type_capabilities(sess, &output)?;
+    }
+
     // NOTE(eddyb) SPIR-T pipeline is entirely limited to this block.
     {
         let (spv_words, module_or_err, lower_from_spv_timer) =

@@ -7,6 +7,25 @@ use rustc_session::Session;
 use std::iter::once;
 use std::mem::take;
 
+/// Returns the capability required for an integer type of the given width, if any.
+fn capability_for_int_width(width: u32) -> Option<rspirv::spirv::Capability> {
+    match width {
+        8 => Some(rspirv::spirv::Capability::Int8),
+        16 => Some(rspirv::spirv::Capability::Int16),
+        64 => Some(rspirv::spirv::Capability::Int64),
+        _ => None,
+    }
+}
+
+/// Returns the capability required for a float type of the given width, if any.
+fn capability_for_float_width(width: u32) -> Option<rspirv::spirv::Capability> {
+    match width {
+        16 => Some(rspirv::spirv::Capability::Float16),
+        64 => Some(rspirv::spirv::Capability::Float64),
+        _ => None,
+    }
+}
+
 pub fn shift_ids(module: &mut Module, add: u32) {
     module.all_inst_iter_mut().for_each(|inst| {
         if let Some(ref mut result_id) = &mut inst.result_id {
@@ -266,6 +285,111 @@ pub fn check_fragment_insts(sess: &Session, module: &Module) -> Result<()> {
     }
 }
 
+/// Check that types requiring specific capabilities have those capabilities declared.
+///
+/// This function validates that if a module uses types like u8/i8 (requiring Int8),
+/// u16/i16 (requiring Int16), etc., the corresponding capabilities are declared.
+pub fn check_type_capabilities(sess: &Session, module: &Module) -> Result<()> {
+    use rspirv::spirv::Capability;
+
+    // Collect declared capabilities
+    let declared_capabilities: FxHashSet<Capability> = module
+        .capabilities
+        .iter()
+        .map(|inst| inst.operands[0].unwrap_capability())
+        .collect();
+
+    let mut errors = Vec::new();
+
+    for inst in &module.types_global_values {
+        match inst.class.opcode {
+            Op::TypeInt => {
+                let width = inst.operands[0].unwrap_literal_bit32();
+                let signedness = inst.operands[1].unwrap_literal_bit32() != 0;
+                let type_name = if signedness { "i" } else { "u" };
+
+                if let Some(required_cap) = capability_for_int_width(width) {
+                    if !declared_capabilities.contains(&required_cap) {
+                        errors.push(format!(
+                            "`{type_name}{width}` type used without `OpCapability {required_cap:?}`"
+                        ));
+                    }
+                }
+            }
+            Op::TypeFloat => {
+                let width = inst.operands[0].unwrap_literal_bit32();
+
+                if let Some(required_cap) = capability_for_float_width(width) {
+                    if !declared_capabilities.contains(&required_cap) {
+                        errors.push(format!(
+                            "`f{width}` type used without `OpCapability {required_cap:?}`"
+                        ));
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+
+    if !errors.is_empty() {
+        let mut err = sess
+            .dcx()
+            .struct_err("Missing required capabilities for types");
+        for error in errors {
+            err = err.with_note(error);
+        }
+        Err(err.emit())
+    } else {
+        Ok(())
+    }
+}
+
+/// Remove type-related capabilities that are not required by any types in the module.
+///
+/// This function specifically targets Int8, Int16, Int64, Float16, and Float64 capabilities,
+/// removing them if no types in the module require them. All other capabilities are preserved.
+/// This is part of the fix for issue #300 where constant casts were creating unnecessary types.
+pub fn remove_unused_type_capabilities(module: &mut Module) {
+    use rspirv::spirv::Capability;
+
+    // Collect type-related capabilities that are actually needed
+    let mut needed_type_capabilities = FxHashSet::default();
+
+    // Scan all types to determine which type-related capabilities are needed
+    for inst in &module.types_global_values {
+        match inst.class.opcode {
+            Op::TypeInt => {
+                let width = inst.operands[0].unwrap_literal_bit32();
+                if let Some(cap) = capability_for_int_width(width) {
+                    needed_type_capabilities.insert(cap);
+                }
+            }
+            Op::TypeFloat => {
+                let width = inst.operands[0].unwrap_literal_bit32();
+                if let Some(cap) = capability_for_float_width(width) {
+                    needed_type_capabilities.insert(cap);
+                }
+            }
+            _ => {}
+        }
+    }
+
+    // Remove only type-related capabilities that aren't needed
+    module.capabilities.retain(|inst| {
+        let cap = inst.operands[0].unwrap_capability();
+        match cap {
+            // Only remove these type-related capabilities if they're not used
+            Capability::Int8
+            | Capability::Int16
+            | Capability::Int64
+            | Capability::Float16
+            | Capability::Float64 => needed_type_capabilities.contains(&cap),
+            // Keep all other capabilities
+            _ => true,
+        }
+    });
+}
+
 /// Remove all [`Decoration::NonUniform`] if this module does *not* have [`Capability::ShaderNonUniform`].
 /// This allows image asm to always declare `NonUniform` and not worry about conditional compilation.
 pub fn remove_non_uniform_decorations(_sess: &Session, module: &mut Module) -> Result<()> {