Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Talk file #34

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open

Talk file #34

wants to merge 1 commit into from

Conversation

newhoggy
Copy link
Member

No description provided.

@newhoggy
Copy link
Member Author

newhoggy commented Mar 15, 2019

State machine based lookup tables

uint32_t hw_json_simd_phi_table_32[] =
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000007, 0x00000007, 0x00000000
, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007
, 0x00000007, 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007
, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007
, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007
, 0x00000007, 0x00000007, 0x00000007, 0x06000006, 0x00000000, 0x01000001, 0x00000000, 0x00000000
, 0x00000000, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007
, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007
, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007, 0x00000007
, 0x00000007, 0x00000007, 0x00000007, 0x06000006, 0x00000000, 0x01000001, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
};

uint32_t hw_json_simd_transition_table_32[] =
{ 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010001, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x03010103, 0x00010100, 0x03010103, 0x03010103, 0x00010100
, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103
, 0x03010103, 0x03010103, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103
, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103
, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103
, 0x03010103, 0x03010103, 0x03010103, 0x00010100, 0x00010200, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103
, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103
, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103, 0x03010103
, 0x03010103, 0x03010103, 0x03010103, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100, 0x00010100
};

@newhoggy
Copy link
Member Author

newhoggy commented Mar 15, 2019

Comparison with pure C code

hw-json-simd/cbits/main.c

Lines 81 to 163 in fe19db8

uint8_t buffer[W8_BUFFER_SIZE];
uint32_t phi_buffer[W8_BUFFER_SIZE];
uint8_t ibs_buffer[W8_BUFFER_SIZE];
uint8_t ops_buffer[W8_BUFFER_SIZE];
uint8_t cls_buffer[W8_BUFFER_SIZE];
// uint32_t result_ib[W8_BUFFER_SIZE];
// uint32_t result_a [W8_BUFFER_SIZE];
// uint32_t result_z [W8_BUFFER_SIZE];
// uint64_t accum = 0;
uint64_t remaining_bp_bits = 0;
size_t remaining_bp_bits_len = 0;
uint64_t out_bp_buffer[W32_BUFFER_SIZE * 2];
size_t total_bytes_read = 0;
uint32_t state = 0x03020100;
while (1) {
size_t bytes_read = fread(buffer, 1, W8_BUFFER_SIZE, in);
total_bytes_read += bytes_read;
if (bytes_read < W8_BUFFER_SIZE) {
if (ferror(in)) {
fprintf(stderr, "Error reading file\n");
exit(1);
}
if (bytes_read == 0) {
if (feof(in)) {
break;
}
}
size_t next_alignment = ((bytes_read + 63) / 64) * 64;
memset(buffer + bytes_read, 0, next_alignment - bytes_read);
bytes_read = next_alignment;
}
uint32_t chunk_state = state;
hw_json_simd_sm_process_chunk(buffer, bytes_read,
&state,
phi_buffer);
hw_json_simd_sm_make_ib_op_cl_chunks(chunk_state, phi_buffer, bytes_read,
ibs_buffer,
ops_buffer,
cls_buffer);
size_t idx_bytes = (bytes_read + 7) / 8;
fwrite(ibs_buffer, 1, idx_bytes, ib_out);
size_t out_bp_bytes = hw_json_simd_sm_write_bp_chunk(
ops_buffer,
cls_buffer,
idx_bytes,
&remaining_bp_bits,
&remaining_bp_bits_len,
out_bp_buffer);
fwrite(out_bp_buffer, out_bp_bytes, sizeof(uint64_t), bp_out);
fflush(ib_out);
fflush(bp_out);
}
hw_json_simd_sm_write_bp_chunk_final(remaining_bp_bits, remaining_bp_bits_len, out_bp_buffer);
fprintf(stderr, "Final state %u\n", state);
fwrite(out_bp_buffer, 2, sizeof(uint64_t), bp_out);
fclose(in);
fclose(ib_out);
return 0;

@newhoggy
Copy link
Member Author

Dealing with incompatible architectures

Allow the code to query how the package was compiled

avx_2 :: Bool
avx_2 = U.unsafePerformIO F.enabled_avx_2 /= 0
{-# NOINLINE avx_2 #-}
sse_4_2 :: Bool
sse_4_2 = U.unsafePerformIO F.enabled_sse_4_2 /= 0
{-# NOINLINE sse_4_2 #-}
bmi_2 :: Bool
bmi_2 = U.unsafePerformIO F.enabled_bmi_2 /= 0
{-# NOINLINE bmi_2 #-}

enabled_avx_2 :: IO Int
enabled_avx_2 = fromIntegral <$> do
{#call unsafe hw_json_simd_avx2_enabled as c_hw_json_simd_avx2_enabled#}
{-# NOINLINE enabled_avx_2 #-}
enabled_sse_4_2 :: IO Int
enabled_sse_4_2 = fromIntegral <$> do
{#call unsafe hw_json_simd_sse4_2_enabled as c_hw_json_simd_sse4_2_enabled#}
{-# NOINLINE enabled_sse_4_2 #-}
enabled_bmi_2 :: IO Int
enabled_bmi_2 = fromIntegral <$> do
{#call unsafe hw_json_simd_bmi2_enabled as c_hw_json_simd_bmi2_enabled#}
{-# NOINLINE enabled_bmi_2 #-}

int hw_json_simd_avx2_enabled() {
#ifdef __AVX2__
return 1;
#else
return 0;
#endif
}
int hw_json_simd_bmi2_enabled() {
#ifdef __BMI2__
return 1;
#else
return 0;
#endif
}
int hw_json_simd_sse4_2_enabled() {
#ifdef __BMI2__
return 1;
#else
return 0;
#endif
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant