Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade rust version #16

Merged
merged 9 commits into from
Mar 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion aligned-cmov/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "aligned-cmov"
version = "2.0.0"
version = "2.1.0"
description = "Fast constant-time conditional moves of aligned bytes"
authors = ["MobileCoin"]
license = "GPL-3.0"
Expand Down
222 changes: 104 additions & 118 deletions aligned-cmov/src/cmov_impl_asm.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2018-2021 The MobileCoin Foundation
// Copyright (c) 2018-2022 The MobileCoin Foundation

//! Implementation of cmov on x86-64 using inline assembly.
//!
Expand All @@ -19,45 +19,41 @@
//! For now it seems simplest to use inline assembly for all of it.

use super::{A64Bytes, A8Bytes, ArrayLength};
use core::arch::asm;

// CMov for u32 values
#[inline]
pub fn cmov_u32(condition: bool, src: &u32, dest: &mut u32) {
// Create a temporary to be assigned to a register for cmov
let mut temp: u32 = *dest;
// Test condition ($1)
// cmovnz from src into temp ($2 to $0)
unsafe {
llvm_asm!("test $1, $1
cmovnz $0, $2"
:"+&r"(temp)
: "r"(condition), "rm"(*src)
: "cc"
: "volatile", "intel");
asm!(
// Set ZF if cond==0
"test {0}, {0}",
// Conditionally move src into temp (based on ZF)
// `:e` formats to the 32-bit register.
"cmovnz {2:e}, {1:e}",
in(reg_byte) condition as u8,
in(reg) *src,
// inout since we might not write, so we need the existing value.
inout(reg) *dest,
);
}
// "cc" is because we are setting flags in test
// "volatile" is meant to discourage the optimizer from inspecting this
*dest = temp;
}

// CMov for u64 values
#[inline]
pub fn cmov_u64(condition: bool, src: &u64, dest: &mut u64) {
// Create a temporary to be assigned to a register for cmov
let mut temp: u64 = *dest;
// Test condition ($1)
// cmovnz from src into temp ($2 to $0)
unsafe {
llvm_asm!("test $1, $1
cmovnz $0, $2"
:"+&r"(temp)
: "r"(condition), "rm"(*src)
: "cc"
: "volatile", "intel");
asm!(
// Set ZF if cond==0
"test {0}, {0}",
// Conditionally move src into dest (based on ZF)
"cmovnz {2}, {1}",
in(reg_byte) condition as u8,
in(reg) *src,
// inout since we might not write, so we need the existing value.
inout(reg) *dest,
);
}
// "cc" is because we are setting flags in test
// "volatile" is meant to discourage the optimizer from inspecting this
*dest = temp;
}

// CMov for i32 values
Expand Down Expand Up @@ -124,7 +120,6 @@ pub fn cmov_a64_bytes<N: ArrayLength<u8>>(
// If we have avx2 then we can use cmov_byte_slice_a64 implementation
#[cfg(target_feature = "avx2")]
#[inline]
#[allow(unused)]
pub fn cmov_a64_bytes<N: ArrayLength<u8>>(
condition: bool,
src: &A64Bytes<N>,
Expand All @@ -151,13 +146,8 @@ pub fn cmov_a64_bytes<N: ArrayLength<u8>>(
// if condition { memcpy(dest, src, count * 8) }
// for pointers aligned to 8 byte boundary. Assumes count > 0
#[inline]
#[allow(unused)]
unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, mut count: usize) {
// Create a temporary to be assigned to a register for cmov
// We also use it to pass the condition in, which is only needed in a register
// before entering the loop.
let mut temp: u64 = condition as u64;

unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, count: usize) {
debug_assert!(count > 0, "count cannot be 0");
// The idea here is to test once before we enter the loop and reuse the test
// result for every cmov.
// The loop is a dec, jnz loop.
Expand All @@ -167,109 +157,105 @@ unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, m
// indexing. Because dec will clobber ZF, we can't use cmovnz. We store
// the result of outer test in CF which is not clobbered by dec.
// cmovc is contingent on CF
// negb is used to set CF to 1 iff the condition value was 1.
// neg is used to set CF to 1 iff the condition value was 1.
// Pity, we cannot cmov directly to memory
//
// temp is a "register output" because this is the way to obtain a scratch
// register when we don't care what actual register is used and we want the
// compiler to pick.
//
// count is modified by the assembly -- for this reason it has to be labelled
// as an input and an output, otherwise its illegal to modify it.
//
// The condition is passed in through temp, then neg moves the value to CF.
// After that we don't need condition in a register, so temp register can be
// The condition is passed in through cond, then neg moves the value to CF.
// After that we don't need condition in a register, so cond register can be
// reused.
llvm_asm!("neg $0
loop_body_${:uid}:
mov $0, [$3 + 8*$1 - 8]
cmovc $0, [$2 + 8*$1 - 8]
mov [$3 + 8*$1 - 8], $0
dec $1
jnz loop_body_${:uid}"
: "+&r"(temp), "+&r"(count)
: "r"(src), "r"(dest)
: "cc", "memory"
: "volatile", "intel");
// cc is because we are setting flags in test
// memory is because we are dereferencing a bunch of pointers in asm
// volatile is because the output variable "temp" is not the true output
// of this block, the memory side-effects are.
asm!(
// Sets CF=0 iff cond==0, 1 otherwise.
"neg {0}",
// Must use numeric local labels, since this block may be inlined multiple times.
// https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels
"42:",
// Copy into temp register from dest (for NOP default).
"mov {0}, [{3} + 8*{1} - 8]",
// Conditionally copy into temp register from src (based on CF).
"cmovc {0}, [{2} + 8*{1} - 8]",
// Copy from temp register into dest.
"mov [{3} + 8*{1} - 8], {0}",
// Decrement count. Sets ZF=1 when we reach 0.
"dec {1}",
// If ZF is not set, loop.
"jnz 42b",
// Discard outputs; the memory side effects are what's desired.
inout(reg) (condition as u64) => _,
inout(reg) count => _,
in(reg) src,
in(reg) dest,
);
}

// Should be a constant time function equivalent to:
// if condition { memcpy(dest, src, num_bytes) }
// for pointers aligned to *64* byte boundary. Will fault if that is not the
// case. Assumes num_bytes > 0, and num_bytes divisible by 64!
// case. Requires num_bytes > 0, and num_bytes divisible by 64.
// This version uses AVX2 256-bit moves
#[cfg(target_feature = "avx2")]
#[inline]
#[allow(unused)]
unsafe fn cmov_byte_slice_a64(condition: bool, src: *const u64, dest: *mut u64, num_bytes: usize) {
debug_assert!(
num_bytes > 0,
"num_bytes cannot be 0, caller must handle that"
);
debug_assert!(
num_bytes % 64 == 0,
"num_bytes must be divisible by 64, caller must handle that"
);
// Similarly as before, we want to test once and use the test result for the
// whole loop.
//
// Before we enter the loop, we want to set ymm1 to all 0s or all 1s, depending
// on condition. We use neg to make it all 0s or all 1s 64bit, then vmovq to
// move that to xmm2, then vbroadcastsd to fill ymm1 with 1s or zeros.
//
// This time the cmov mechanism is:
// - VMOVDQA to load the source into a ymm register,
// - VPMASKMOVQ to move that to memory, after masking it with ymm1.
// An alternative approach which I didn't test is, MASKMOV to another ymm
// register, then move that register to memory.
//
debug_assert!(num_bytes > 0, "num_bytes cannot be 0");
debug_assert!(num_bytes % 64 == 0, "num_bytes must be divisible by 64");

// Notes:
// temp = $0 is the scratch register
// We aren't allowed to modify condition or num_bytes = $3 unless
// we annotate them appropriately as outputs.
// So, num_bytes is an in/out register, and we pass condition in via
// the scratch register, instead of a dedicated register.
// temp = {0} is the scratch register
//
// Once we have the mask in ymm1 we don't need the condition in a register
// anymore. Then, $0 is the loop variable, counting down from num_bytes in
// steps of 64
//
// The values of $0 in the loop are num_bytes, num_bytes - 64, ... 64,
// The values of {0} in the loop are num_bytes, num_bytes - 64, ... 64,
// rather than num_bytes - 64 ... 0. This is because the semantics of the loop
// are subtract 64, then test for 0, rather than test for 0, then subtract.
// So when we index using $0, we subtract 64 to compensate.
//
// We unroll the loop once because we can assume 64 byte alignment, but ymm
// register holds 32 bytes. So one round of vmovdqa, vpmaskmovq moves 32 bytes.
// So we do this twice in one pass through the loop.
//
// TODO: In AVX512 zmm registers we could move 64 bytes at once...
// So when we index using {0}, we subtract 64 to compensate.
// TODO: Does unrolling the loop more help?
let mut temp: u64 = condition as u64;

llvm_asm!("neg $0
vmovq xmm2, $0
vbroadcastsd ymm1, xmm2
mov $0, $3
loop_body2_${:uid}:
vmovdqa ymm2, [$1 + $0 - 64]
vpmaskmovq [$2 + $0 - 64], ymm1, ymm2
vmovdqa ymm3, [$1 + $0 - 32]
vpmaskmovq [$2 + $0 - 32], ymm1, ymm3
sub $0, 64
jnz loop_body2_${:uid}"
: "+&r"(temp)
: "r"(src), "r"(dest), "rmi"(num_bytes)
: "cc", "memory", "ymm1", "ymm2", "ymm3"
: "volatile", "intel");
// cc is because we are setting flags
// memory is because we are dereferencing a bunch of pointers in asm
// volatile is because the output variable "temp" is not the true output
// of this block, the memory side-effects are.
asm!(
// Similarly to cmov_byte_slice_a8, we want to test once and use the
// result for the whole loop.
// Before we enter the loop, we want to set ymm1 to all 0s or all 1s,
// depending on condition. We use neg to make all 64 bits 0s or all 1s
// (since neg(0) = 0 and neg(1) = -1 = 11111111b in two's complement),
// then vmovq to move that to xmm2, then vbroadcastsd to fill ymm1
// with ones or zeros.
"neg {0}",
"vmovq xmm2, {0}",
"vbroadcastsd ymm1, xmm2",
// Once we have the mask in ymm1 we don't need the condition in
// a register anymore. Then, {0} is the loop variable, counting down
// from num_bytes in steps of 64
"mov {0}, {3}",
// Must use numeric local labels, since this block may be inlined multiple times.
// https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels
"42:",
// This time the cmov mechanism is:
// - VMOVDQA to load the source into a ymm register,
// - VPMASKMOVQ to move that to memory, after masking it with ymm1.
// An alternative approach which I didn't test is, MASKMOV to
// another ymm register, then move that register to memory.
"vmovdqa ymm2, [{1} + {0} - 64]",
"vpmaskmovq [{2} + {0} - 64], ymm1, ymm2",
// We unroll the loop once because we can assume 64 byte alignment,
// but ymm register holds 32 bytes. So one round of
// vmovdqa+vpmaskmovq moves 32 bytes.
// So we do this twice in one pass through the loop.
//
// TODO: In AVX512 zmm registers we could move 64 bytes at once...
"vmovdqa ymm3, [{1} + {0} - 32]",
"vpmaskmovq [{2} + {0} - 32], ymm1, ymm3",
// Decrement num_bytes. Sets ZF=1 when we reach 0.
"sub {0}, 64",
// If ZF is not set, loop.
"jnz 42b",
// Discard output; the memory side effects are what's desired.
inout(reg) condition as u64 => _,
in(reg) src,
in(reg) dest,
in(reg) num_bytes,
// Scratch/Temp registers.
out("ymm1") _,
out("ymm2") _,
out("ymm3") _,
);
}

// TODO: In avx512 there is vmovdqa which takes a mask register, and kmovq to
Expand Down
1 change: 0 additions & 1 deletion aligned-cmov/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright (c) 2018-2021 The MobileCoin Foundation

#![no_std]
#![feature(llvm_asm)]

pub use aligned_array::{subtle, Aligned, AsAlignedChunks, AsNeSlice, A64, A8};
pub use generic_array::{arr, typenum, ArrayLength, GenericArray};
Expand Down
4 changes: 2 additions & 2 deletions balanced-tree-index/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "balanced-tree-index"
version = "2.0.0"
version = "2.1.0"
description = "Utilities for constant-time manipulation of a complete binary tree with a flat in-memory representation."
authors = ["MobileCoin"]
license = "GPL-3.0"
Expand All @@ -11,7 +11,7 @@ keywords = ["binary-tree", "constant-time", "utilities", "no_std"]
categories = ["cryptography", "data-structures"]

[dependencies]
aligned-cmov = { path = "../aligned-cmov", version = "2" }
aligned-cmov = { path = "../aligned-cmov", version = "2.1" }

rand_core = { version = "0.6", default-features = false }

Expand Down
4 changes: 2 additions & 2 deletions mc-oblivious-map/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "mc-oblivious-map"
version = "2.0.0"
version = "2.1.0"
description = "Implementation of Oblivious Hash Map data structures on top of Oblivious RAM"
authors = ["MobileCoin"]
license = "GPL-3.0"
Expand All @@ -15,7 +15,7 @@ no_asm_insecure = ["aligned-cmov/no_asm_insecure"]

[dependencies]
aligned-array = { version = "1", features = ["subtle"] }
aligned-cmov = { path = "../aligned-cmov", version = "2" }
aligned-cmov = { path = "../aligned-cmov", version = "2.1" }
mc-oblivious-traits = { path = "../mc-oblivious-traits", version = "2" }

generic-array = { version = "0.14", default-features = false }
Expand Down
4 changes: 2 additions & 2 deletions mc-oblivious-ram/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "mc-oblivious-ram"
version = "2.0.0"
version = "2.1.0"
description = "Implementations of Oblivious RAM data structures"
authors = ["MobileCoin"]
license = "GPL-3.0"
Expand All @@ -14,7 +14,7 @@ categories = ["cryptography", "data-structures", "no-std"]
no_asm_insecure = ["aligned-cmov/no_asm_insecure"]

[dependencies]
aligned-cmov = { path = "../aligned-cmov", version = "2" }
aligned-cmov = { path = "../aligned-cmov", version = "2.1" }
balanced-tree-index = { path = "../balanced-tree-index", version = "2" }
mc-oblivious-traits = { path = "../mc-oblivious-traits", version = "2" }

Expand Down
Loading