Skip to content

Commit

Permalink
s390x: Implement tls_value
Browse files Browse the repository at this point in the history
Implement the tls_value for s390 in the ELF general-dynamic mode.

Notable differences to the x86_64 implementation are:
- We use a __tls_get_offset libcall instead of __tls_get_addr.
- The current thread pointer (stored in a pair of access registers)
  needs to be added to the result of __tls_get_offset.
- __tls_get_offset has a variant ABI that requires the address of
  the GOT (global offset table) is passed in %r12.

This means we need new libcall entries for __tls_get_offset and
the _GLOBAL_OFFSET_TABLE_ symbol.  The latter is a bit weird in
that it is not actually a function, so "libcall" seems a bit
inappropiate.  But there currently is no way, apart from the
libcall mechanism, so refer to a well-known global symbol by name.

We also need to emit a relocation on a symbol placed in a
constant pool, as well as an extra relocation on the call
to __tls_get_offset required for TLS linker optimization.

Needed by the cg_clif frontend.
  • Loading branch information
uweigand committed Aug 4, 2022
1 parent b4d7ab3 commit 392b909
Show file tree
Hide file tree
Showing 13 changed files with 224 additions and 30 deletions.
7 changes: 7 additions & 0 deletions cranelift/codegen/src/binemit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ pub enum Reloc {
/// Set the add immediate field to the low 12 bits of the final address. Does not check for overflow.
/// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12_NC` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage)
Aarch64TlsGdAddLo12Nc,

/// s390x TLS GD64 - 64-bit offset of tls_index for GD symbol in GOT
S390xTlsGd64,
/// s390x TLS GDCall - marker to enable optimization of TLS calls
S390xTlsGdCall,
}

impl fmt::Display for Reloc {
Expand All @@ -79,6 +84,8 @@ impl fmt::Display for Reloc {
Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"),
Self::Aarch64TlsGdAdrPage21 => write!(f, "Aarch64TlsGdAdrPage21"),
Self::Aarch64TlsGdAddLo12Nc => write!(f, "Aarch64TlsGdAddLo12Nc"),
Self::S390xTlsGd64 => write!(f, "TlsGd64"),
Self::S390xTlsGdCall => write!(f, "TlsGdCall"),
}
}
}
Expand Down
12 changes: 11 additions & 1 deletion cranelift/codegen/src/ir/libcall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ pub enum LibCall {

/// Elf __tls_get_addr
ElfTlsGetAddr,
/// Elf __tls_get_offset
ElfTlsGetOffset,
/// Elf _GLOBAL_OFFSET_TABLE_ - N.B. this is a data symbol, not a function
ElfGlobalOffsetTable,
// When adding a new variant make sure to add it to `all_libcalls` too.
}

Expand Down Expand Up @@ -104,6 +108,8 @@ impl FromStr for LibCall {
"Memcmp" => Ok(Self::Memcmp),

"ElfTlsGetAddr" => Ok(Self::ElfTlsGetAddr),
"ElfTlsGetOffset" => Ok(Self::ElfTlsGetOffset),
"ElfGlobalOffsetTable" => Ok(Self::ElfGlobalOffsetTable),
_ => Err(()),
}
}
Expand Down Expand Up @@ -173,6 +179,8 @@ impl LibCall {
Memmove,
Memcmp,
ElfTlsGetAddr,
ElfTlsGetOffset,
ElfGlobalOffsetTable,
]
}

Expand Down Expand Up @@ -214,7 +222,9 @@ impl LibCall {
| LibCall::Memset
| LibCall::Memmove
| LibCall::Memcmp
| LibCall::ElfTlsGetAddr => unimplemented!(),
| LibCall::ElfTlsGetAddr
| LibCall::ElfTlsGetOffset => unimplemented!(),
LibCall::ElfGlobalOffsetTable => unreachable!(),
}

sig
Expand Down
64 changes: 55 additions & 9 deletions cranelift/codegen/src/isa/s390x/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,17 @@
(rd WritableReg)
(imm UImm32Shifted))

;; Load 32-bit access register into GPR.
(LoadAR
(rd WritableReg)
(ar u8))

;; Insert 32-bit access register into low half of a GPR.
;; (Identical operation to LoadAR, but considers rd to be use/def.)
(InsertAR
(rd WritableReg)
(ar u8))

;; A sign- or zero-extend operation.
(Extend
(rd WritableReg)
Expand Down Expand Up @@ -857,11 +868,10 @@
(ridx Reg)
(targets VecMachLabel))

;; Load an inline symbol reference with RelocDistance::Far.
(LoadExtNameFar
;; Load an inline symbol reference with relocation.
(LoadSymbolReloc
(rd WritableReg)
(name BoxExternalName)
(offset i64))
(symbol_reloc BoxSymbolReloc))

;; Load address referenced by `mem` into `rd`.
(LoadAddr
Expand Down Expand Up @@ -903,6 +913,23 @@
(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo))
(type VecMachLabel extern (enum))

;; A symbol reference carrying relocation information.
(type SymbolReloc
(enum
;; Absolute symbol reference (with optional offset).
(Absolute
(name ExternalName)
(offset i64))
;; Reference to a TLS symbol in general-dynamic mode.
(TlsGd
(name ExternalName))))

;; Boxed version of SymbolReloc to save space.
(type BoxSymbolReloc (primitive BoxSymbolReloc))
(decl box_symbol_reloc (SymbolReloc) BoxSymbolReloc)
(extern constructor box_symbol_reloc box_symbol_reloc)
(convert SymbolReloc BoxSymbolReloc box_symbol_reloc)

;; An ALU operation.
(type ALUOp
(enum
Expand Down Expand Up @@ -1613,6 +1640,9 @@
(decl memarg_symbol (ExternalName i32 MemFlags) MemArg)
(extern constructor memarg_symbol memarg_symbol)

(decl memarg_got () MemArg)
(extern constructor memarg_got memarg_got)

;; Create a MemArg refering to a stack address formed by
;; adding a base (relative to SP) and an offset.
(decl memarg_stack_off (i64 i64) MemArg)
Expand Down Expand Up @@ -2120,6 +2150,20 @@
(rule (mvc dst src len_minus_one)
(SideEffectNoResult.Inst (MInst.Mvc dst src len_minus_one)))

;; Helper for emitting `MInst.LoadAR` instructions.
(decl load_ar (u8) Reg)
(rule (load_ar ar)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadAR dst ar))))
dst))

;; Helper for emitting `MInst.InsertAR` instructions.
(decl insert_ar (Reg u8) Reg)
(rule (insert_ar src ar)
(let ((dst WritableReg (copy_writable_reg $I64 src))
(_ Unit (emit (MInst.InsertAR dst ar))))
dst))

;; Helper for emitting `MInst.FpuRR` instructions.
(decl fpu_rr (Type FPUOp1 Reg) Reg)
(rule (fpu_rr ty op src)
Expand Down Expand Up @@ -2393,12 +2437,11 @@
(_ Unit (emit (MInst.VecReplicateLane size dst src lane_imm))))
dst))

;; Helper for emitting `MInst.LoadExtNameFar` instructions.
(decl load_ext_name_far (ExternalName i64) Reg)
(rule (load_ext_name_far name offset)
;; Helper for emitting `MInst.LoadSymbolReloc` instructions.
(decl load_symbol_reloc (SymbolReloc) Reg)
(rule (load_symbol_reloc symbol_reloc)
(let ((dst WritableReg (temp_writable_reg $I64))
(boxed_name BoxExternalName (box_external_name name))
(_ Unit (emit (MInst.LoadExtNameFar dst boxed_name offset))))
(_ Unit (emit (MInst.LoadSymbolReloc dst symbol_reloc))))
dst))

;; Helper for emitting `MInst.LoadAddr` instructions.
Expand Down Expand Up @@ -3405,6 +3448,9 @@
(decl lib_call_info_memcpy () LibCallInfo)
(extern constructor lib_call_info_memcpy lib_call_info_memcpy)

(decl lib_call_info_tls_get_offset (SymbolReloc) LibCallInfo)
(extern constructor lib_call_info_tls_get_offset lib_call_info_tls_get_offset)

(decl lib_call_info (LibCallInfo) BoxCallInfo)
(extern constructor lib_call_info lib_call_info)

Expand Down
25 changes: 21 additions & 4 deletions cranelift/codegen/src/isa/s390x/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2258,17 +2258,25 @@ impl MachInstEmit for Inst {
};
put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits));
}
&Inst::LoadExtNameFar {
&Inst::LoadAR { rd, ar } | &Inst::InsertAR { rd, ar } => {
let rd = allocs.next_writable(rd);
let opcode = 0xb24f; // EAR
put(sink, &enc_rre(opcode, rd.to_reg(), gpr(ar)));
}
&Inst::LoadSymbolReloc {
rd,
ref name,
offset,
ref symbol_reloc,
} => {
let rd = allocs.next_writable(rd);

let opcode = 0xa75; // BRAS
let reg = writable_spilltmp_reg().to_reg();
put(sink, &enc_ri_b(opcode, reg, 12));
sink.add_reloc(Reloc::Abs8, name, offset);
let (reloc, name, offset) = match &**symbol_reloc {
SymbolReloc::Absolute { name, offset } => (Reloc::Abs8, name, *offset),
SymbolReloc::TlsGd { name } => (Reloc::S390xTlsGd64, name, 0),
};
sink.add_reloc(reloc, name, offset);
sink.put8(0);
let inst = Inst::Load64 {
rd,
Expand Down Expand Up @@ -3198,6 +3206,15 @@ impl MachInstEmit for Inst {
&Inst::Call { link, ref info } => {
let link = allocs.next_writable(link);

// Add relocation for TLS libcalls to enable linker optimizations.
match &info.tls_symbol {
None => {}
Some(SymbolReloc::TlsGd { name }) => {
sink.add_reloc(Reloc::S390xTlsGdCall, name, 0)
}
_ => unreachable!(),
}

let opcode = 0xc05; // BRASL
let reloc = Reloc::S390xPCRel32Dbl;
if let Some(s) = state.take_stack_map() {
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6828,6 +6828,7 @@ fn test_s390x_binemit() {
opcode: Opcode::Call,
caller_callconv: CallConv::SystemV,
callee_callconv: CallConv::SystemV,
tls_symbol: None,
}),
},
"C0E500000000",
Expand Down
40 changes: 29 additions & 11 deletions cranelift/codegen/src/isa/s390x/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ mod emit_tests;

pub use crate::isa::s390x::lower::isle::generated_code::{
ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuRoundOp, MInst as Inst, RxSBGOp,
ShiftOp, UnaryOp, VecBinaryOp, VecFloatCmpOp, VecIntCmpOp, VecShiftOp, VecUnaryOp,
ShiftOp, SymbolReloc, UnaryOp, VecBinaryOp, VecFloatCmpOp, VecIntCmpOp, VecShiftOp, VecUnaryOp,
};

/// Additional information for (direct) Call instructions, left out of line to lower the size of
Expand All @@ -43,6 +43,7 @@ pub struct CallInfo {
pub opcode: Opcode,
pub caller_callconv: CallConv,
pub callee_callconv: CallConv,
pub tls_symbol: Option<SymbolReloc>,
}

/// Additional information for CallInd instructions, left out of line to lower the size of the Inst
Expand Down Expand Up @@ -154,6 +155,8 @@ impl Inst {
| Inst::Mov64UImm32Shifted { .. }
| Inst::Insert64UImm16Shifted { .. }
| Inst::Insert64UImm32Shifted { .. }
| Inst::LoadAR { .. }
| Inst::InsertAR { .. }
| Inst::Extend { .. }
| Inst::CMov32 { .. }
| Inst::CMov64 { .. }
Expand Down Expand Up @@ -212,7 +215,7 @@ impl Inst {
| Inst::Debugtrap
| Inst::Trap { .. }
| Inst::JTSequence { .. }
| Inst::LoadExtNameFar { .. }
| Inst::LoadSymbolReloc { .. }
| Inst::LoadAddr { .. }
| Inst::Loop { .. }
| Inst::CondBreak { .. }
Expand Down Expand Up @@ -662,6 +665,12 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
&Inst::Insert64UImm16Shifted { rd, .. } | &Inst::Insert64UImm32Shifted { rd, .. } => {
collector.reg_mod(rd);
}
&Inst::LoadAR { rd, .. } => {
collector.reg_def(rd);
}
&Inst::InsertAR { rd, .. } => {
collector.reg_mod(rd);
}
&Inst::FpuMove32 { rd, rn } | &Inst::FpuMove64 { rd, rn } => {
collector.reg_def(rd);
collector.reg_use(rn);
Expand Down Expand Up @@ -881,7 +890,7 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
collector.reg_use(ridx);
collector.reg_early_def(writable_gpr(1));
}
&Inst::LoadExtNameFar { rd, .. } => {
&Inst::LoadSymbolReloc { rd, .. } => {
collector.reg_def(rd);
collector.reg_def(writable_gpr(1));
}
Expand Down Expand Up @@ -1887,6 +1896,10 @@ impl Inst {
};
format!("{} {}, {}", op, rd, imm.bits)
}
&Inst::LoadAR { rd, ar } | &Inst::InsertAR { rd, ar } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
format!("ear {}, %a{}", rd, ar)
}
&Inst::CMov32 { rd, cond, rm } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rm = pretty_print_reg(rm, allocs);
Expand Down Expand Up @@ -2830,7 +2843,12 @@ impl Inst {
}
&Inst::Call { link, ref info, .. } => {
let link = pretty_print_reg(link.to_reg(), allocs);
format!("brasl {}, {}", link, info.dest)
let tls_symbol = match &info.tls_symbol {
None => "".to_string(),
Some(SymbolReloc::TlsGd { name }) => format!(":tls_gdcall:{}", name),
_ => unreachable!(),
};
format!("brasl {}, {}{}", link, info.dest, tls_symbol)
}
&Inst::CallInd { link, ref info, .. } => {
let link = pretty_print_reg(link.to_reg(), allocs);
Expand Down Expand Up @@ -2891,17 +2909,17 @@ impl Inst {
rtmp, rtmp, rtmp, ridx, rtmp, jt_entries,
)
}
&Inst::LoadExtNameFar {
&Inst::LoadSymbolReloc {
rd,
ref name,
offset,
ref symbol_reloc,
} => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
format!(
"bras {}, 12 ; data {} + {} ; lg {}, 0({})",
tmp, name, offset, rd, tmp
)
let symbol = match &**symbol_reloc {
SymbolReloc::Absolute { name, offset } => format!("{} + {}", name, offset),
SymbolReloc::TlsGd { name } => format!("{}@tlsgd", name),
};
format!("bras {}, 12 ; data {} ; lg {}, 0({})", tmp, symbol, rd, tmp)
}
&Inst::LoadAddr { rd, ref mem } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
Expand Down
33 changes: 30 additions & 3 deletions cranelift/codegen/src/isa/s390x/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2353,7 +2353,7 @@

;; Load the address of a function, general case.
(rule (lower (func_addr (func_ref_data _ name _)))
(load_ext_name_far name 0))
(load_symbol_reloc (SymbolReloc.Absolute name 0)))


;;;; Rules for `symbol_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand All @@ -2366,7 +2366,34 @@

;; Load the address of a symbol, general case.
(rule (lower (symbol_value (symbol_value_data name _ offset)))
(load_ext_name_far name offset))
(load_symbol_reloc (SymbolReloc.Absolute name offset)))


;;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Load the address of a TLS symbol (ELF general-dynamic model).
(rule (lower (tls_value (symbol_value_data name _ 0)))
(if (tls_model_is_elf_gd))
(let ((symbol SymbolReloc (SymbolReloc.TlsGd name))
(got Reg (load_addr (memarg_got)))
(got_offset Reg (load_symbol_reloc symbol))
(tls_offset Reg (lib_call_tls_get_offset got got_offset symbol)))
(add_reg $I64 tls_offset (thread_pointer))))

;; Helper to perform a call to the __tls_get_offset library routine.
(decl lib_call_tls_get_offset (Reg Reg SymbolReloc) Reg)
(rule (lib_call_tls_get_offset got got_offset symbol)
(let ((libcall LibCallInfo (lib_call_info_tls_get_offset symbol))
(_ Unit (lib_accumulate_outgoing_args_size libcall))
(_ Unit (emit_mov $I64 (writable_gpr 12) got))
(_ Unit (emit_mov $I64 (writable_gpr 2) got_offset))
(_ Unit (emit_side_effect (lib_call libcall))))
(copy_reg $I64 (writable_gpr 2))))

;; Helper to extract the current thread pointer from %a0/%a1.
(decl thread_pointer () Reg)
(rule (thread_pointer)
(insert_ar (lshl_imm $I64 (load_ar 0) 32) 1))


;;;; Rules for `load` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down Expand Up @@ -3776,7 +3803,7 @@
(let ((abi ABISig (abi_sig sig_ref))
(_ Unit (abi_accumulate_outgoing_args_size abi))
(_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args))
(target Reg (load_ext_name_far name 0))
(target Reg (load_symbol_reloc (SymbolReloc.Absolute name 0)))
(_ InstOutput (side_effect (abi_call_ind abi target (Opcode.Call)))))
(lower_call_rets abi (range 0 (abi_num_rets abi)) (output_builder_new))))

Expand Down
Loading

0 comments on commit 392b909

Please sign in to comment.