Skip to content

Commit

Permalink
Optimized matrix multiplication; data type for quantization.
Browse files Browse the repository at this point in the history
  • Loading branch information
mkskeller committed Feb 14, 2019
1 parent 216fbdf commit b6a1867
Show file tree
Hide file tree
Showing 108 changed files with 2,671 additions and 994 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Programs/Public-Input/*
*.a
*.static
*.d
local/

# Packages #
############
Expand Down
2 changes: 2 additions & 0 deletions Auth/MAC_Check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,3 +466,5 @@ template class MAC_Check_Base<ShamirShare<gfp>>;
template class MAC_Check_Base<ShamirShare<gf2n>>;
template class MAC_Check_Base<MaliciousShamirShare<gfp>>;
template class MAC_Check_Base<MaliciousShamirShare<gf2n>>;
template class MAC_Check_Base<Share<gfp>>;
template class MAC_Check_Base<Share<gf2n>>;
5 changes: 3 additions & 2 deletions Auth/MaliciousRepMC.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,15 @@ void CommMaliciousRepMC<T>::POpen_Begin(vector<typename T::clear>& values,
for (auto& x : S)
for (int i = 0; i < 2; i++)
x[i].pack(os[1 - i]);
P.send_relative(os);
P.pass_around(os[0], 1);
P.pass_around(os[1], 2);
}

template<class T>
void CommMaliciousRepMC<T>::POpen_End(vector<typename T::clear>& values,
const vector<T>& S, const Player& P)
{
P.receive_relative(os);
(void) P;
if (os[0] != os[1])
throw mac_fail();
values.clear();
Expand Down
2 changes: 1 addition & 1 deletion Auth/MaliciousShamirMC.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

#include "MaliciousShamirMC.h"
#include "Processor/ShamirMachine.h"
#include "Machines/ShamirMachine.h"

template<class T>
MaliciousShamirMC<T>::MaliciousShamirMC()
Expand Down
2 changes: 1 addition & 1 deletion Auth/ShamirMC.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include "MAC_Check.h"
#include "Math/ShamirShare.h"
#include "Processor/ShamirMachine.h"
#include "Machines/ShamirMachine.h"

template<class T>
class ShamirMC : public MAC_Check_Base<T>
Expand Down
5 changes: 5 additions & 0 deletions BMR/AuthValue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ void AuthValue::assign(const word& value, const int128& mac_key, bool not_first_
share = 0;
else
share = value;
#ifdef __PCLMUL__
mac = _mm_clmulepi64_si128(_mm_cvtsi64_si128(mac_key.get_lower()), _mm_cvtsi64_si128(value), 0);
#else
(void) mac_key;
throw runtime_error("need to compile with PCLMUL support");
#endif
}

ostream& operator<<(ostream& o, const AuthValue& auth_value)
Expand Down
11 changes: 9 additions & 2 deletions BMR/Key.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <string.h>

#include "Tools/FlexBuffer.h"
#include "Math/gf2nlong.h"

using namespace std;

Expand Down Expand Up @@ -54,8 +55,7 @@ ostream& operator<<(ostream& o, const __m128i& x);


inline bool Key::operator==(const Key& other) {
__m128i neq = _mm_xor_si128(r, other.r);
return _mm_test_all_zeros(neq,neq);
return int128(r) == other.r;
}

inline Key& Key::operator-=(const Key& other) {
Expand Down Expand Up @@ -88,7 +88,14 @@ inline void Key::set_signal(bool signal)

inline Key Key::doubling(int i) const
{
#ifdef __AVX2__
return _mm_sllv_epi64(r, _mm_set_epi64x(i, i));
#else
uint64_t halfs[2];
halfs[1] = _mm_cvtsi128_si64(_mm_unpackhi_epi64(r, r)) << i;
halfs[0] = _mm_cvtsi128_si64(r) << i;
return _mm_loadu_si128((__m128i*)halfs);
#endif
}


Expand Down
4 changes: 2 additions & 2 deletions BMR/Party.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include <mutex>
#include <boost/atomic.hpp>
#include <sys/sysinfo.h>
#include <boost/thread.hpp>

#include "Register.h"
#include "GarbledGate.h"
Expand All @@ -31,7 +31,7 @@ class BooleanCircuit;
#ifndef N_EVAL_THREADS
// Default Intel desktop processor has 8 half cores.
// This is beneficial if only one AES available per full core.
#define N_EVAL_THREADS (get_nprocs())
#define N_EVAL_THREADS (thread::hardware_concurrency())
#endif


Expand Down
2 changes: 2 additions & 0 deletions BMR/Program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include "GC/Instruction.hpp"
#include "GC/Program.hpp"

#include "Processor/Instruction.hpp"

namespace GC
{

Expand Down
200 changes: 6 additions & 194 deletions BMR/aes.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "aes.h"
#include <stdexcept>

#ifdef _WIN32
#include "StdAfx.h"
Expand All @@ -10,6 +11,7 @@ void AES_128_Key_Expansion(const unsigned char *userkey, AES_KEY *aesKey)
//block *kp = (block *)&aesKey;
aesKey->rd_key[0] = x0 = _mm_loadu_si128((block*)userkey);
x2 = _mm_setzero_si128();
#ifdef __AES__
EXPAND_ASSIST(x0, x1, x2, x0, 255, 1); aesKey->rd_key[1] = x0;
EXPAND_ASSIST(x0, x1, x2, x0, 255, 2); aesKey->rd_key[2] = x0;
EXPAND_ASSIST(x0, x1, x2, x0, 255, 4); aesKey->rd_key[3] = x0;
Expand All @@ -20,198 +22,8 @@ void AES_128_Key_Expansion(const unsigned char *userkey, AES_KEY *aesKey)
EXPAND_ASSIST(x0, x1, x2, x0, 255, 128); aesKey->rd_key[8] = x0;
EXPAND_ASSIST(x0, x1, x2, x0, 255, 27); aesKey->rd_key[9] = x0;
EXPAND_ASSIST(x0, x1, x2, x0, 255, 54); aesKey->rd_key[10] = x0;
}



void AES_192_Key_Expansion(const unsigned char *userkey, AES_KEY *aesKey)
{
__m128i x0,x1,x2,x3,tmp,*kp = (block *)&aesKey;
kp[0] = x0 = _mm_loadu_si128((block*)userkey);
tmp = x3 = _mm_loadu_si128((block*)(userkey+16));
x2 = _mm_setzero_si128();
EXPAND192_STEP(1,1);
EXPAND192_STEP(4,4);
EXPAND192_STEP(7,16);
EXPAND192_STEP(10,64);
}

void AES_256_Key_Expansion(const unsigned char *userkey, AES_KEY *aesKey)
{
__m128i x0, x1, x2, x3;/* , *kp = (block *)&aesKey;*/
aesKey->rd_key[0] = x0 = _mm_loadu_si128((block*)userkey);
aesKey->rd_key[1] = x3 = _mm_loadu_si128((block*)(userkey + 16));
x2 = _mm_setzero_si128();
EXPAND_ASSIST(x0, x1, x2, x3, 255, 1); aesKey->rd_key[2] = x0;
EXPAND_ASSIST(x3, x1, x2, x0, 170, 1); aesKey->rd_key[3] = x3;
EXPAND_ASSIST(x0, x1, x2, x3, 255, 2); aesKey->rd_key[4] = x0;
EXPAND_ASSIST(x3, x1, x2, x0, 170, 2); aesKey->rd_key[5] = x3;
EXPAND_ASSIST(x0, x1, x2, x3, 255, 4); aesKey->rd_key[6] = x0;
EXPAND_ASSIST(x3, x1, x2, x0, 170, 4); aesKey->rd_key[7] = x3;
EXPAND_ASSIST(x0, x1, x2, x3, 255, 8); aesKey->rd_key[8] = x0;
EXPAND_ASSIST(x3, x1, x2, x0, 170, 8); aesKey->rd_key[9] = x3;
EXPAND_ASSIST(x0, x1, x2, x3, 255, 16); aesKey->rd_key[10] = x0;
EXPAND_ASSIST(x3, x1, x2, x0, 170, 16); aesKey->rd_key[11] = x3;
EXPAND_ASSIST(x0, x1, x2, x3, 255, 32); aesKey->rd_key[12] = x0;
EXPAND_ASSIST(x3, x1, x2, x0, 170, 32); aesKey->rd_key[13] = x3;
EXPAND_ASSIST(x0, x1, x2, x3, 255, 64); aesKey->rd_key[14] = x0;
}

void AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *aesKey)
{
if (bits == 128) {
AES_128_Key_Expansion(userKey, aesKey);
} else if (bits == 192) {
AES_192_Key_Expansion(userKey, aesKey);
} else if (bits == 256) {
AES_256_Key_Expansion(userKey, aesKey);
}

aesKey->rounds = 6 + bits / 32;

}

void AES_encryptC(block *in, block *out, AES_KEY *aesKey)
{
int j, rnds = ROUNDS(aesKey);
const __m128i *sched = ((__m128i *)(aesKey->rd_key));
__m128i tmp = _mm_load_si128((__m128i*)in);
tmp = _mm_xor_si128(tmp, sched[0]);
for (j = 1; j<rnds; j++) tmp = _mm_aesenc_si128(tmp, sched[j]);
tmp = _mm_aesenclast_si128(tmp, sched[j]);
_mm_store_si128((__m128i*)out, tmp);
}


void AES_ecb_encrypt(block *blk, AES_KEY *aesKey) {
unsigned j, rnds = ROUNDS(aesKey);
const block *sched = ((block *)(aesKey->rd_key));

*blk = _mm_xor_si128(*blk, sched[0]);
for (j = 1; j<rnds; ++j)
*blk = _mm_aesenc_si128(*blk, sched[j]);
*blk = _mm_aesenclast_si128(*blk, sched[j]);
}

void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *aesKey) {
unsigned i,j,rnds=ROUNDS(aesKey);
const block *sched = ((block *)(aesKey->rd_key));
for (i=0; i<nblks; ++i)
blks[i] =_mm_xor_si128(blks[i], sched[0]);
for(j=1; j<rnds; ++j)
for (i=0; i<nblks; ++i)
blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
for (i=0; i<nblks; ++i)
blks[i] =_mm_aesenclast_si128(blks[i], sched[j]);
}

void AES_ecb_encrypt_blks_4(block *blks, AES_KEY *aesKey) {
unsigned j, rnds = ROUNDS(aesKey);
const block *sched = ((block *)(aesKey->rd_key));
blks[0] = _mm_xor_si128(blks[0], sched[0]);
blks[1] = _mm_xor_si128(blks[1], sched[0]);
blks[2] = _mm_xor_si128(blks[2], sched[0]);
blks[3] = _mm_xor_si128(blks[3], sched[0]);

for (j = 1; j < rnds; ++j){
blks[0] = _mm_aesenc_si128(blks[0], sched[j]);
blks[1] = _mm_aesenc_si128(blks[1], sched[j]);
blks[2] = _mm_aesenc_si128(blks[2], sched[j]);
blks[3] = _mm_aesenc_si128(blks[3], sched[j]);
}
blks[0] = _mm_aesenclast_si128(blks[0], sched[j]);
blks[1] = _mm_aesenclast_si128(blks[1], sched[j]);
blks[2] = _mm_aesenclast_si128(blks[2], sched[j]);
blks[3] = _mm_aesenclast_si128(blks[3], sched[j]);
}


void AES_ecb_encrypt_blks_2_in_out(block *in, block *out, AES_KEY *aesKey) {

unsigned j, rnds = ROUNDS(aesKey);
const block *sched = ((block *)(aesKey->rd_key));

out[0] = _mm_xor_si128(in[0], sched[0]);
out[1] = _mm_xor_si128(in[1], sched[0]);

for (j = 1; j < rnds; ++j){
out[0] = _mm_aesenc_si128(out[0], sched[j]);
out[1] = _mm_aesenc_si128(out[1], sched[j]);

}
out[0] = _mm_aesenclast_si128(out[0], sched[j]);
out[1] = _mm_aesenclast_si128(out[1], sched[j]);
}

void AES_ecb_encrypt_blks_4_in_out(block *in, block *out, AES_KEY *aesKey) {
unsigned j, rnds = ROUNDS(aesKey);
const block *sched = ((block *)(aesKey->rd_key));
//block temp[4];

out[0] = _mm_xor_si128(in[0], sched[0]);
out[1] = _mm_xor_si128(in[1], sched[0]);
out[2] = _mm_xor_si128(in[2], sched[0]);
out[3] = _mm_xor_si128(in[3], sched[0]);

for (j = 1; j < rnds; ++j){
out[0] = _mm_aesenc_si128(out[0], sched[j]);
out[1] = _mm_aesenc_si128(out[1], sched[j]);
out[2] = _mm_aesenc_si128(out[2], sched[j]);
out[3] = _mm_aesenc_si128(out[3], sched[j]);
}
out[0] = _mm_aesenclast_si128(out[0], sched[j]);
out[1] = _mm_aesenclast_si128(out[1], sched[j]);
out[2] = _mm_aesenclast_si128(out[2], sched[j]);
out[3] = _mm_aesenclast_si128(out[3], sched[j]);
}

void AES_ecb_encrypt_chunk_in_out(block *in, block *out, unsigned nblks, AES_KEY *aesKey) {

int numberOfLoops = nblks / 8;
int blocksPipeLined = numberOfLoops * 8;
int remainingEncrypts = nblks - blocksPipeLined;

unsigned j, rnds = ROUNDS(aesKey);
const block *sched = ((block *)(aesKey->rd_key));

for (int i = 0; i < numberOfLoops; i++){

out[0 + i * 8] = _mm_xor_si128(in[0 + i * 8], sched[0]);
out[1 + i * 8] = _mm_xor_si128(in[1 + i * 8], sched[0]);
out[2 + i * 8] = _mm_xor_si128(in[2 + i * 8], sched[0]);
out[3 + i * 8] = _mm_xor_si128(in[3 + i * 8], sched[0]);
out[4 + i * 8] = _mm_xor_si128(in[4 + i * 8], sched[0]);
out[5 + i * 8] = _mm_xor_si128(in[5 + i * 8], sched[0]);
out[6 + i * 8] = _mm_xor_si128(in[6 + i * 8], sched[0]);
out[7 + i * 8] = _mm_xor_si128(in[7 + i * 8], sched[0]);

for (j = 1; j < rnds; ++j){
out[0 + i * 8] = _mm_aesenc_si128(out[0 + i * 8], sched[j]);
out[1 + i * 8] = _mm_aesenc_si128(out[1 + i * 8], sched[j]);
out[2 + i * 8] = _mm_aesenc_si128(out[2 + i * 8], sched[j]);
out[3 + i * 8] = _mm_aesenc_si128(out[3 + i * 8], sched[j]);
out[4 + i * 8] = _mm_aesenc_si128(out[4 + i * 8], sched[j]);
out[5 + i * 8] = _mm_aesenc_si128(out[5 + i * 8], sched[j]);
out[6 + i * 8] = _mm_aesenc_si128(out[6 + i * 8], sched[j]);
out[7 + i * 8] = _mm_aesenc_si128(out[7 + i * 8], sched[j]);
}
out[0 + i * 8] = _mm_aesenclast_si128(out[0 + i * 8], sched[j]);
out[1 + i * 8] = _mm_aesenclast_si128(out[1 + i * 8], sched[j]);
out[2 + i * 8] = _mm_aesenclast_si128(out[2 + i * 8], sched[j]);
out[3 + i * 8] = _mm_aesenclast_si128(out[3 + i * 8], sched[j]);
out[4 + i * 8] = _mm_aesenclast_si128(out[4 + i * 8], sched[j]);
out[5 + i * 8] = _mm_aesenclast_si128(out[5 + i * 8], sched[j]);
out[6 + i * 8] = _mm_aesenclast_si128(out[6 + i * 8], sched[j]);
out[7 + i * 8] = _mm_aesenclast_si128(out[7 + i * 8], sched[j]);
}

for (int i = blocksPipeLined; i < blocksPipeLined + remainingEncrypts; ++i){
out[i] = _mm_xor_si128(in[i], sched[0]);
for (j = 1; j < rnds; ++j)
{
out[i] = _mm_aesenc_si128(out[i], sched[j]);
}
out[i] = _mm_aesenclast_si128(out[i], sched[j]);
}

#else
(void) x1, (void) x2;
throw std::runtime_error("need to compile with AES-NI support");
#endif
}
31 changes: 0 additions & 31 deletions BMR/aes.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@


typedef struct { block rd_key[15]; int rounds; } AES_KEY;
#define ROUNDS(ctx) ((ctx)->rounds)

#define EXPAND_ASSIST(v1,v2,v3,v4,shuff_const,aes_const) \
v2 = _mm_aeskeygenassist_si128(v4,aes_const); \
Expand All @@ -37,36 +36,6 @@ typedef struct { block rd_key[15]; int rounds; } AES_KEY;
v2 = _mm_shuffle_epi32(v2,shuff_const); \
v1 = _mm_xor_si128(v1,v2)

#define EXPAND192_STEP(idx,aes_const) \
EXPAND_ASSIST(x0,x1,x2,x3,85,aes_const); \
x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \
x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \
kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), \
_mm_castsi128_ps(x0), 68)); \
kp[idx+1] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), \
_mm_castsi128_ps(x3), 78)); \
EXPAND_ASSIST(x0,x1,x2,x3,85,(aes_const*2)); \
x3 = _mm_xor_si128(x3,_mm_slli_si128 (x3, 4)); \
x3 = _mm_xor_si128(x3,_mm_shuffle_epi32(x0, 255)); \
kp[idx+2] = x0; tmp = x3





void AES_128_Key_Expansion(const unsigned char *userkey, AES_KEY* aesKey);
void AES_192_Key_Expansion(const unsigned char *userkey, AES_KEY* aesKey);
void AES_256_Key_Expansion(const unsigned char *userkey, AES_KEY* aesKey);
void AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *aesKey);

void AES_encryptC(block *in, block *out, AES_KEY *aesKey);
void AES_ecb_encrypt(block *blk, AES_KEY *aesKey);

void AES_ecb_encrypt_blks(block *blks, unsigned nblks, AES_KEY *aesKey);
void AES_ecb_encrypt_blks_4(block *blk, AES_KEY *aesKey);
void AES_ecb_encrypt_blks_4_in_out(block *in, block *out, AES_KEY *aesKey);
void AES_ecb_encrypt_blks_2_in_out(block *in, block *out, AES_KEY *aesKey);
void AES_ecb_encrypt_chunk_in_out(block *in, block *out, unsigned nblks, AES_KEY *aesKey);


#endif /* PROTOCOL_INC_AES_H_ */
2 changes: 1 addition & 1 deletion BMR/network/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Server::Server(int port, int expected_clients, ServerUpdatable* updatable, unsig
_servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
_servaddr.sin_port = htons(_port);

if( 0 != bind(_servfd, (struct sockaddr *) &_servaddr, sizeof(_servaddr)) )
if( 0 != ::bind(_servfd, (struct sockaddr *) &_servaddr, sizeof(_servaddr)) )
printf("Server:: Error binding to %d: \n%s\n", _port, strerror(errno));

if(0 != listen(_servfd, _expected_clients))
Expand Down
Loading

0 comments on commit b6a1867

Please sign in to comment.