Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization: Removed unnecessary divides #306

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ uint32_t index_alpha(const argon2_instance_t *instance,
* Other lanes : (SYNC_POINTS - 1) last segments
*/
uint32_t reference_area_size;
uint64_t relative_position;
uint32_t start_position, absolute_position;
uint64_t relative_position, absolute_position;
uint32_t start_position;

if (0 == position->pass) {
/* First pass */
Expand Down Expand Up @@ -251,9 +251,10 @@ uint32_t index_alpha(const argon2_instance_t *instance,
}

/* 1.2.6. Computing absolute position */
absolute_position = (start_position + relative_position) %
instance->lane_length; /* absolute position */
return absolute_position;
absolute_position = start_position + relative_position -
instance->lane_length;
absolute_position += instance->lane_length & (absolute_position >> 32);
return (uint32_t) absolute_position;
}

/* Single-threaded version for p=1 case */
Expand Down
24 changes: 16 additions & 8 deletions src/opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ void fill_segment(const argon2_instance_t *instance,
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
uint32_t lanes_reciprocal = 0;
uint32_t lanes = instance->lanes;
#if defined(__AVX512F__)
__m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
#elif defined(__AVX2__)
Expand Down Expand Up @@ -222,23 +224,23 @@ void fill_segment(const argon2_instance_t *instance,
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;

if (0 == curr_offset % instance->lane_length) {
if ((0 == position.slice) && (0 == starting_index)) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}

/* Fixed point multiply constant for dividing by lanes */
if ((lanes & (lanes - 1)) != 0) {
lanes_reciprocal = (uint32_t) (UINT64_C(0x100000000) / lanes);
}

memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);

for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}

++i, ++curr_offset) {
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
Expand All @@ -248,10 +250,16 @@ void fill_segment(const argon2_instance_t *instance,
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
prev_offset = curr_offset;
}

/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if (lanes_reciprocal == 0) {
ref_lane = (pseudo_rand >> 32) & (lanes - 1);
} else {
ref_lane = (pseudo_rand >> 32) - (((pseudo_rand >> 32) * lanes_reciprocal) >> 32) * lanes - lanes;
ref_lane += lanes & (ref_lane >> 32);
}

if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
Expand Down