mobilecoinfoundation · wjuan-mob · Oct 4, 2021 · Sep 21, 2021 · Sep 21, 2021 · Oct 1, 2021
diff --git a/aligned-cmov/src/cmov_impl_asm.rs b/aligned-cmov/src/cmov_impl_asm.rs
@@ -13,9 +13,9 @@
 //! The u32, u64, and A8Bytes versions all use some form of CMOV instruction,
 //! and the 64-byte alignment version uses AVX2 VPMASKMOV instruction.
 //!
-//! We could possibly do the AVX2 stuff using intrinsics instead of inline assembly,
-//! but AFAIK we cannot get the CMOV instruction without inline assembly, because
-//! there are no intrinsics for that.
+//! We could possibly do the AVX2 stuff using intrinsics instead of inline
+//! assembly, but AFAIK we cannot get the CMOV instruction without inline
+//! assembly, because there are no intrinsics for that.
 //! For now it seems simplest to use inline assembly for all of it.
 
 use super::{A64Bytes, A8Bytes, ArrayLength};
@@ -140,26 +140,28 @@ unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, m
     // before entering the loop.
     let mut temp: u64 = condition as u64;
 
-    // The idea here is to test once before we enter the loop and reuse the test result
-    // for every cmov.
+    // The idea here is to test once before we enter the loop and reuse the test
+    // result for every cmov.
     // The loop is a dec, jnz loop.
     // Because the semantics of x86 loops are, decrement, then test for 0, and
-    // not test for 0 and then decrement, the values of the loop variable of 1..count
-    // and not 0..count - 1. We adjust for this by subtracting 8 when indexing.
-    // Because dec will clobber ZF, we can't use cmovnz. We store
+    // not test for 0 and then decrement, the values of the loop variable of
+    // 1..count and not 0..count - 1. We adjust for this by subtracting 8 when
+    // indexing. Because dec will clobber ZF, we can't use cmovnz. We store
     // the result of outer test in CF which is not clobbered by dec.
     // cmovc is contingent on CF
     // negb is used to set CF to 1 iff the condition value was 1.
     // Pity, we cannot cmov directly to memory
     //
-    // temp is a "register output" because this is the way to obtain a scratch register
-    // when we don't care what actual register is used and we want the compiler to pick.
+    // temp is a "register output" because this is the way to obtain a scratch
+    // register when we don't care what actual register is used and we want the
+    // compiler to pick.
     //
     // count is modified by the assembly -- for this reason it has to be labelled
     // as an input and an output, otherwise its illegal to modify it.
     //
     // The condition is passed in through temp, then neg moves the value to CF.
-    // After that we don't need condition in a register, so temp register can be reused.
+    // After that we don't need condition in a register, so temp register can be
+    // reused.
     llvm_asm!("neg $0
                loop_body_${:uid}:
                  mov $0, [$3 + 8*$1 - 8]
@@ -179,8 +181,8 @@ unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, m
 
 // Should be a constant time function equivalent to:
 // if condition { memcpy(dest, src, num_bytes) }
-// for pointers aligned to *64* byte boundary. Will fault if that is not the case.
-// Assumes num_bytes > 0, and num_bytes divisible by 64!
+// for pointers aligned to *64* byte boundary. Will fault if that is not the
+// case. Assumes num_bytes > 0, and num_bytes divisible by 64!
 // This version uses AVX2 256-bit moves
 #[cfg(target_feature = "avx2")]
 #[inline]
@@ -194,17 +196,18 @@ unsafe fn cmov_byte_slice_a64(condition: bool, src: *const u64, dest: *mut u64,
         num_bytes % 64 == 0,
         "num_bytes must be divisible by 64, caller must handle that"
     );
-    // Similarly as before, we want to test once and use the test result for the whole loop.
+    // Similarly as before, we want to test once and use the test result for the
+    // whole loop.
     //
-    // Before we enter the loop, we want to set ymm1 to all 0s or all 1s, depending on condition.
-    // We use neg to make it all 0s or all 1s 64bit, then vmovq to move that to xmm2, then vbroadcastsd
-    // to fill ymm1 with 1s or zeros.
+    // Before we enter the loop, we want to set ymm1 to all 0s or all 1s, depending
+    // on condition. We use neg to make it all 0s or all 1s 64bit, then vmovq to
+    // move that to xmm2, then vbroadcastsd to fill ymm1 with 1s or zeros.
     //
     // This time the cmov mechanism is:
     // - VMOVDQA to load the source into a ymm register,
     // - VPMASKMOVQ to move that to memory, after masking it with ymm1.
-    // An alternative approach which I didn't test is, MASKMOV to another ymm register,
-    // then move that register to memory.
+    // An alternative approach which I didn't test is, MASKMOV to another ymm
+    // register, then move that register to memory.
     //
     // Notes:
     // temp = $0 is the scratch register
@@ -213,8 +216,9 @@ unsafe fn cmov_byte_slice_a64(condition: bool, src: *const u64, dest: *mut u64,
     // So, num_bytes is an in/out register, and we pass condition in via
     // the scratch register, instead of a dedicated register.
     //
-    // Once we have the mask in ymm1 we don't need the condition in a register anymore.
-    // Then, $0 is the loop variable, counting down from num_bytes in steps of 64
+    // Once we have the mask in ymm1 we don't need the condition in a register
+    // anymore. Then, $0 is the loop variable, counting down from num_bytes in
+    // steps of 64
     //
     // The values of $0 in the loop are num_bytes, num_bytes - 64, ... 64,
     // rather than num_bytes - 64 ... 0. This is because the semantics of the loop
@@ -250,5 +254,6 @@ unsafe fn cmov_byte_slice_a64(condition: bool, src: *const u64, dest: *mut u64,
     // of this block, the memory side-effects are.
 }
 
-// TODO: In avx512 there is vmovdqa which takes a mask register, and kmovq to move register to mask register
-// which seems like a good candidate to speed this up for 64-byte aligned chunks
+// TODO: In avx512 there is vmovdqa which takes a mask register, and kmovq to
+// move register to mask register which seems like a good candidate to speed
+// this up for 64-byte aligned chunks
diff --git a/aligned-cmov/src/cmov_impl_no_asm.rs b/aligned-cmov/src/cmov_impl_no_asm.rs
@@ -1,7 +1,8 @@
 // Copyright (c) 2018-2021 The MobileCoin Foundation
 
 //! Naive implementation of cmov using a branch
-//! This is not secure, and is meant for testing the *correctness* of large orams quickly.
+//! This is not secure, and is meant for testing the *correctness* of large
+//! orams quickly.
 
 use super::{A64Bytes, A8Bytes, ArrayLength};
 

diff --git a/balanced-tree-index/src/lib.rs b/balanced-tree-index/src/lib.rs
@@ -4,15 +4,16 @@
 #![deny(missing_docs)]
 #![deny(unsafe_code)]
 
-//! Defines an interface for a type that represents an index into a memory-mapped
-//! complete balanced binary tree.
+//! Defines an interface for a type that represents an index into a
+//! memory-mapped complete balanced binary tree.
 //!
-//! The operations that we define mostly help with finding parents or common ancestors
-//! in the tree structure.
+//! The operations that we define mostly help with finding parents or common
+//! ancestors in the tree structure.
 //!
 //! This type is usually u32 or u64, and these operations are usually performed
-//! using bit-twiddling tricks. Coding against this API means that people reading
-//! ORAM code don't necessarily have to understand all the bit-twiddling tricks.
+//! using bit-twiddling tricks. Coding against this API means that people
+//! reading ORAM code don't necessarily have to understand all the bit-twiddling
+//! tricks.
 
 use aligned_cmov::{
     subtle::{ConstantTimeEq, ConstantTimeLess},
@@ -27,7 +28,8 @@ use rand_core::RngCore;
 /// All operations here should be constant time, leaking nothing about the input
 /// and &self, unless otherwise stated.
 pub trait TreeIndex: Copy + Eq + PartialEq + CMov {
-    /// The Zero index that is unused and does not actually refer to a node in the tree.
+    /// The Zero index that is unused and does not actually refer to a node in
+    /// the tree.
     const NONE: Self;
 
     /// The index of the root of the tree, logically 1.
@@ -44,9 +46,10 @@ pub trait TreeIndex: Copy + Eq + PartialEq + CMov {
 
     /// For two nodes promised to be "peers" i.e. at the same height,
     /// compute the distance from (either) to their common ancestor in the tree.
-    /// This is the number of times you have to compute "parent" before they are equal.
-    /// It is illegal to call this if the height of the two arguments is not the same.
-    /// Should not reveal anything else about the arguments.
+    /// This is the number of times you have to compute "parent" before they are
+    /// equal. It is illegal to call this if the height of the two arguments
+    /// is not the same. Should not reveal anything else about the
+    /// arguments.
     fn common_ancestor_distance_of_peers(&self, other: &Self) -> u32;
 
     /// Compute the height of the common ancestor of any two nodes.
@@ -77,7 +80,8 @@ pub trait TreeIndex: Copy + Eq + PartialEq + CMov {
     /// Random child at a given height.
     /// This height must be the same or less than the height of the given node,
     /// otherwise the call is illegal.
-    /// It is legal to call this on the NONE value, it will be as if ROOT was passed.
+    /// It is legal to call this on the NONE value, it will be as if ROOT was
+    /// passed.
     fn random_child_at_height<R: RngCore>(&self, height: u32, rng: &mut R) -> Self;
 
     /// Iterate over the parents of this node, including self.
@@ -149,9 +153,10 @@ macro_rules! implement_tree_index_for_primitive {
                 debug_assert!(height >= myself.height());
                 let num_bits_needed = height.wrapping_sub(myself.height());
 
-                // Note: Would be nice to use mc_util_from_random here instead of (next_u64 as $uint)
-                // Here we are taking the u64, casting to self, then masking it with bit mask for low order bits
-                // equal to number of random bits needed.
+                // Note: Would be nice to use mc_util_from_random here instead of (next_u64 as
+                // $uint) Here we are taking the u64, casting to self, then masking it
+                // with bit mask for low order bits equal to number of random bits
+                // needed.
                 let randomness =
                     (rng.next_u64() as $uint) & (((1 as $uint) << num_bits_needed) - 1);
 
@@ -429,7 +434,8 @@ mod testing {
         counter
     }
 
-    // Test that common_ancestor_distance_of_peers agrees with the naive implementation
+    // Test that common_ancestor_distance_of_peers agrees with the naive
+    // implementation
     #[test]
     fn common_ancestor_distance_conformance_u64() {
         test_helper::run_with_several_seeds(|mut rng| {
@@ -457,7 +463,8 @@ mod testing {
         })
     }
 
-    // Test that common_ancestor_distance_of_peers agrees with the naive implementation
+    // Test that common_ancestor_distance_of_peers agrees with the naive
+    // implementation
     #[test]
     fn common_ancestor_distance_conformance_u32() {
         test_helper::run_with_several_seeds(|mut rng| {