From c711221d821d422ba7c850c5b5639bf5a757a77a Mon Sep 17 00:00:00 2001
From: "Brian J. Tarricone" <brian@tarricone.org>
Date: Mon, 15 Feb 2021 23:32:16 -0800
Subject: [PATCH] Teach rust core about Xtensa VaListImpl and add a custom
 lowering of vaarg for xtensa.

LLVM does not include an implementation of the va_arg instruction for
Xtensa. From what I understand, this is a conscious decision and
instead language frontends are encouraged to implement it themselves.
The rationale seems to be that loading values correctly requires
language and ABI-specific knowledge that LLVM lacks.

This is true of most architectures, and rustc already provides
implementation for a number of them. This commit extends the support to
include Xtensa.

See https://lists.llvm.org/pipermail/llvm-dev/2017-August/116337.html
for some discussion on the topic.

Unfortunately there does not seem to be a reference document for the
semantics of the va_list and va_arg on Xtensa. The most reliable source
is the GCC implementation, which this commit tries to follow. Clang also
provides its own compatible implementation.

This was tested for all the types that rustc allows in variadics.

Co-authored-by: Brian Tarricone <brian@tarricone.org>
Co-authored-by: Jonathan Bastien-Filiatrault <joe@x2a.org>
Co-authored-by: Paul Lietar <paul@lietar.net>
---
 compiler/rustc_codegen_llvm/src/va_arg.rs | 100 +++++++++++++++++++++-
 library/core/Cargo.toml                   |   1 +
 library/core/src/ffi/va_list.rs           |  19 ++++
 3 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index f12b94d5887ea..9266b6dd91474 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -10,6 +10,15 @@ use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
 
+fn round_up_to_alignment<'ll>(
+    bx: &mut Builder<'_, 'll, '_>,
+    mut value: &'ll Value,
+    align: Align,
+) -> &'ll Value {
+    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
+    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
+}
+
 fn round_pointer_up_to_alignment<'ll>(
     bx: &mut Builder<'_, 'll, '_>,
     addr: &'ll Value,
@@ -17,8 +26,7 @@ fn round_pointer_up_to_alignment<'ll>(
     ptr_ty: &'ll Type,
 ) -> &'ll Value {
     let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
-    ptr_as_int = bx.add(ptr_as_int, bx.cx().const_i32(align.bytes() as i32 - 1));
-    ptr_as_int = bx.and(ptr_as_int, bx.cx().const_i32(-(align.bytes() as i32)));
+    ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
     bx.inttoptr(ptr_as_int, ptr_ty)
 }
 
@@ -270,6 +278,93 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     bx.load(val_type, val_addr, layout.align.abi)
 }
 
+fn emit_xtensa_va_arg<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    list: OperandRef<'tcx, &'ll Value>,
+    target_ty: Ty<'tcx>,
+) -> &'ll Value {
+    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
+    // this, other than "what GCC does".
+    //
+    // The va_list type has three fields:
+    // struct __va_list_tag {
+    //   int32_t *va_stk; // Arguments passed on the stack
+    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
+    //   int32_t va_ndx; // Offset into the arguments, in bytes
+    // };
+    //
+    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
+    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
+    // otherwise it must come from va_stk.
+    //
+    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
+    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
+    let va_list_addr = list.immediate();
+    let layout = bx.cx.layout_of(target_ty);
+    let from_stack = bx.append_sibling_block("va_arg.from_stack");
+    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
+    let end = bx.append_sibling_block("va_arg.end");
+
+    // The following code is equivalent to `(*va).va_ndx`
+    let va_reg_offset = 4;
+    let va_ndx_offset = va_reg_offset + 4;
+    let offset_ptr =
+        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_ndx_offset)]);
+
+    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
+    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
+
+    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
+
+    // Update the offset in va_list, by adding the slot's size.
+    let offset_next = bx.add(offset, bx.const_i32(slot_size));
+
+    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
+    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
+    let regsave_size = bx.const_i32(6);
+    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
+    bx.cond_br(use_regsave, from_regsave, from_stack);
+
+    bx.switch_to_block(from_regsave);
+    // update va_ndx
+    bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+    // The following code is equivalent to `(*va).va_reg`
+    let regsave_area_ptr =
+        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_reg_offset)]);
+    let regsave_area =
+        bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let regsave_value_ptr = bx.inbounds_gep(bx.type_i8(), regsave_area, &[offset]);
+    bx.br(end);
+
+    bx.switch_to_block(from_stack);
+
+    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
+    // va_stk is set up such that the first stack argument is always at va_stk + 32.
+    // The corrected offset is written back into the va_list struct.
+    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, regsave_size);
+    let offset_corrected = bx.select(needs_correction, bx.const_i32(32), offset);
+    let offset_next_corrected =
+        bx.select(needs_correction, bx.const_i32(32 + slot_size), offset_next);
+    // update va_ndx
+    bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+
+    // The following code is equivalent to `(*va).va_stk`
+    let stack_area_ptr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(0)]);
+    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let stack_value_ptr = bx.inbounds_gep(bx.type_i8(), stack_area, &[offset_corrected]);
+    bx.br(end);
+
+    bx.switch_to_block(end);
+
+    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
+    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
+    // targets supported by rustc are litte-endian so don't worry about it.
+    assert!(bx.tcx().sess.target.endian == Endian::Little);
+    let value_ptr =
+        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
+    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
+}
+
 pub(super) fn emit_va_arg<'ll, 'tcx>(
     bx: &mut Builder<'_, 'll, 'tcx>,
     addr: OperandRef<'tcx, &'ll Value>,
@@ -302,6 +397,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
             emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
         }
+        "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
         // For all other architecture/OS combinations fall back to using
         // the LLVM va_arg instruction.
         // https://llvm.org/docs/LangRef.html#va-arg-instruction
diff --git a/library/core/Cargo.toml b/library/core/Cargo.toml
index 94f343d06705e..be2310f6f6fe1 100644
--- a/library/core/Cargo.toml
+++ b/library/core/Cargo.toml
@@ -45,6 +45,7 @@ check-cfg = [
     'cfg(stdarch_intel_sde)',
     # #[cfg(bootstrap)] rtems
     'cfg(target_os, values("rtems"))',
+    'cfg(target_arch, values("xtensa"))', # Can be removed once https://github.com/rust-lang/rust/pull/132265 lands
     # core use #[path] imports to portable-simd `core_simd` crate
     # and to stdarch `core_arch` crate which messes-up with Cargo list
     # of declared features, we therefor expect any feature cfg
diff --git a/library/core/src/ffi/va_list.rs b/library/core/src/ffi/va_list.rs
index 3a224e4d8fe5f..f67c592d8d8f7 100644
--- a/library/core/src/ffi/va_list.rs
+++ b/library/core/src/ffi/va_list.rs
@@ -15,6 +15,7 @@ use crate::ops::{Deref, DerefMut};
         not(target_arch = "aarch64"),
         not(target_arch = "powerpc"),
         not(target_arch = "s390x"),
+        not(target_arch = "xtensa"),
         not(target_arch = "x86_64")
     ),
     all(target_arch = "aarch64", target_vendor = "apple"),
@@ -37,6 +38,7 @@ pub struct VaListImpl<'f> {
         not(target_arch = "aarch64"),
         not(target_arch = "powerpc"),
         not(target_arch = "s390x"),
+        not(target_arch = "xtensa"),
         not(target_arch = "x86_64")
     ),
     all(target_arch = "aarch64", target_vendor = "apple"),
@@ -113,6 +115,18 @@ pub struct VaListImpl<'f> {
     _marker: PhantomData<&'f mut &'f c_void>,
 }
 
+/// Xtensa ABI implementation of a `va_list`.
+#[cfg(target_arch = "xtensa")]
+#[repr(C)]
+#[derive(Debug)]
+#[lang = "va_list"]
+pub struct VaListImpl<'f> {
+    stk: *mut i32,
+    reg: *mut i32,
+    ndx: i32,
+    _marker: PhantomData<&'f mut &'f c_void>,
+}
+
 /// A wrapper for a `va_list`
 #[repr(transparent)]
 #[derive(Debug)]
@@ -124,6 +138,7 @@ pub struct VaList<'a, 'f: 'a> {
             not(target_arch = "s390x"),
             not(target_arch = "x86_64")
         ),
+        target_arch = "xtensa",
         all(target_arch = "aarch64", target_vendor = "apple"),
         target_family = "wasm",
         target_os = "uefi",
@@ -138,6 +153,7 @@ pub struct VaList<'a, 'f: 'a> {
             target_arch = "s390x",
             target_arch = "x86_64"
         ),
+        not(target_arch = "xtensa"),
         any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
         not(target_family = "wasm"),
         not(target_os = "uefi"),
@@ -155,6 +171,7 @@ pub struct VaList<'a, 'f: 'a> {
         not(target_arch = "s390x"),
         not(target_arch = "x86_64")
     ),
+    target_arch = "xtensa",
     all(target_arch = "aarch64", target_vendor = "apple"),
     target_family = "wasm",
     target_os = "uefi",
@@ -173,8 +190,10 @@ impl<'f> VaListImpl<'f> {
         target_arch = "aarch64",
         target_arch = "powerpc",
         target_arch = "s390x",
+        target_arch = "xtensa",
         target_arch = "x86_64"
     ),
+    not(target_arch = "xtensa"),
     any(not(target_arch = "aarch64"), not(target_vendor = "apple")),
     not(target_family = "wasm"),
     not(target_os = "uefi"),