Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added display of operations per seconds in speed-curve25519-donna* #29

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,8 @@ speed-curve25519-donna: speed-curve25519.c curve25519-donna.a
speed-curve25519-donna-c64: speed-curve25519.c curve25519-donna-c64.a
gcc -o speed-curve25519-donna-c64 speed-curve25519.c curve25519-donna-c64.a $(CFLAGS)

speed-curve25519-donna-c64-threaded: speed-curve25519-threaded.c curve25519-donna-c64.a
gcc -o speed-curve25519-donna-c64-threaded speed-curve25519-threaded.c curve25519-donna-c64.a $(CFLAGS) -lpthread

test-sc-curve25519-donna-c64: test-sc-curve25519.c curve25519-donna-c64.a
gcc -o test-sc-curve25519-donna-c64 -O test-sc-curve25519.c curve25519-donna-c64.a test-sc-curve25519.s $(CFLAGS)
65 changes: 21 additions & 44 deletions README
Original file line number Diff line number Diff line change
@@ -1,44 +1,21 @@
See http://code.google.com/p/curve25519-donna/ for details.

BUILDING:

If you run `make`, two .a archives will be built, similar to djb's curve25519
code. Alternatively, read on:

The C implementation is contained within curve25519-donna.c. It has no external
dependancies and is BSD licenced. You can copy/include/link it directly in with
your program. Recommended C flags: -O2

The x86-64 bit implementation is contained within curve25519-donna-x86-64.c and
curve25519-donna-x86-64.s. Build like this:

% cpp curve25519-donna-x86-64.s > curve25519-donna-x86-64.s.pp
% as -o curve25519-donna-x86-64.s.o curve25519-donna-x86-64.s.pp
% gcc -O2 -c curve25519-donna-x86-64.c

Then the two .o files can be linked in

USAGE:

The usage is exactly the same as djb's code (as described at
http://cr.yp.to/ecdh.html) expect that the function is called curve25519_donna.

In short,

To generate a private key, generate 32 random bytes and:

mysecret[0] &= 248;
mysecret[31] &= 127;
mysecret[31] |= 64;

To generate the public key, just do

static const uint8_t basepoint[32] = {9};
curve25519_donna(mypublic, mysecret, basepoint);

To generate an agreed key do:
uint8_t shared_key[32];
curve25519_donna(shared_key, mysecret, theirpublic);

And hash the shared_key with a cryptographic hash function before using.

This project provides a method to test the performance of Curve25519 on the entire CPU, as opposed to a single thread, approximating the performance of a fully loaded server.

To run:
make speed-curve25519-donna-c64-threaded && ./speed-curve25519-donna-c64-threaded

Output:
Will use 5 threads
Waiting in thread 2 ...
Waiting in thread 5 ...
Waiting in thread 3 ...
Waiting in thread 4 ...
Waiting in thread 1 ...
thread 1: 11706.7 op/sec in 8.54 sec
thread 2: 11674 op/sec in 8.57 sec
thread 3: 11584.5 op/sec in 8.63 sec
thread 4: 12096.5 op/sec in 8.27 sec
thread 5: 11876.3 op/sec in 8.42 sec
Total for CPU: 58938.1 op/sec


Derived from https://github.com/agl/curve25519-donna
136 changes: 136 additions & 0 deletions speed-curve25519-threaded.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#include <stdint.h>
#include <pthread.h>
#include <unistd.h>

typedef uint8_t u8;

typedef struct thr_state {
unsigned i;
pthread_t thread_id;
unsigned char *secret;
struct timespec waittime;
volatile unsigned *all_start;
float op_sec;
float total_sec;
unsigned exited;

} thr_state;

extern void curve25519_donna(u8 *output, const u8 *secret, const u8 *bp);

static uint64_t
time_now() {
struct timeval tv;
uint64_t ret;

gettimeofday(&tv, NULL);
ret = tv.tv_sec;
ret *= 1000000;
ret += tv.tv_usec;

return ret;
}

#define DEFAULT_THREAD_COUNT 4

static const unsigned char basepoint[32] = {9};

static void *run( void *state ) {
thr_state *st = (thr_state*)state;
unsigned char mypublic[32];
uint64_t start, end;
unsigned i;
const unsigned iterations = 100000;

printf("Waiting in thread %d ...\n", st->i+1);
while( *st->all_start==0 ) ; /* spin */
//printf("Proceed to testing in thread %d\n", st->i);

// Load the caches
for (i = 0; i < 1000; ++i) {
curve25519_donna(mypublic, st->secret, basepoint);
}

start = time_now();
for (i = 0; i < iterations; ++i) {
curve25519_donna(mypublic, st->secret, basepoint);
}
end = time_now();

st->op_sec = iterations*1000000. / (end - start);
st->total_sec = (end - start)/1000000.;
st->exited = 1;
//printf("Exited thread %d\n", st->i);
return NULL;
}

int
main() {
unsigned char mysecret[32];
unsigned i;
thr_state *st;
unsigned n;
float total_op_sec;
volatile unsigned all_start=0;
unsigned ncores;
unsigned threads_count;

ncores = sysconf( _SC_NPROCESSORS_ONLN );
if( ncores == -1 ) {
ncores = DEFAULT_THREAD_COUNT;
printf("Cannot determine the number of cores; assume %d\n", ncores);
}
threads_count = ncores+1;

st = malloc( sizeof(thr_state) * threads_count );
if( st == NULL ) {
printf("Out of memory\n");
return 1;
}

printf("Will use %d threads\n", threads_count);

memset(mysecret, 42, 32);
mysecret[0] &= 248;
mysecret[31] &= 127;
mysecret[31] |= 64;

/* start the threads */
for( i=0; i<threads_count; i++ ) {
st[i].i = i;
st[i].secret = mysecret;
st[i].all_start = &all_start;
st[i].exited = 0;

if(pthread_create( &st[i].thread_id, NULL, run, st+i )) {
fprintf(stderr, "Error creating thread %d\n", i);
return 1;
}
}

sleep(1);
all_start = 1; /* go! */

/* wait for all the threads to finish */
for( n=0; n!=threads_count; usleep(1) ) {
n=0;
for( i=0; i<threads_count; i++ ) {
n += st[i].exited;
}
}

total_op_sec=0;
for( i=0; i<threads_count; i++ ) {
printf("thread %d: %g op/sec in %.02f sec\n", i+1, st[i].op_sec, st[i].total_sec);
total_op_sec += st[i].op_sec;
}
printf("Total for CPU: %g op/sec\n", total_op_sec);

/* don't bother with cleaning resources */

return 0;
}
24 changes: 22 additions & 2 deletions speed-curve25519.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,27 @@ time_now() {
return ret;
}

/* ticks - not tested on anything other than x86 */
static uint64_t
cycles_now(void) {
#if defined(__GNUC__)
uint32_t lo, hi;
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
return ((uint64_t)lo | ((uint64_t)hi << 32));
#else
return 0; /* Undefined for now; should be obvious in the output */
#endif
}


int
main() {
static const unsigned char basepoint[32] = {9};
unsigned char mysecret[32], mypublic[32];
unsigned i;
uint64_t start, end;
const unsigned iterations = 100000;
uint64_t start_c, end_c;

memset(mysecret, 42, 32);
mysecret[0] &= 248;
Expand All @@ -39,12 +54,17 @@ main() {
}

start = time_now();
for (i = 0; i < 30000; ++i) {
start_c = cycles_now();
for (i = 0; i < iterations; ++i) {
curve25519_donna(mypublic, mysecret, basepoint);
}
end = time_now();
end_c = cycles_now();

printf("%luus\n", (unsigned long) ((end - start) / 30000));
printf("%lu us, %g op/s, %lu cycles/op\n",
(unsigned long) ((end - start) / iterations),
iterations*1000000. / (end - start),
(unsigned long)((end_c-start_c)/iterations) );

return 0;
}