Skip to content

Commit

Permalink
Search disk index fix (#20)
Browse files Browse the repository at this point in the history
* change default cache behavior in search_disk_index; scale factors in float to int8

* added int8_to_float_scale file
  • Loading branch information
harsha-simhadri authored Sep 19, 2021
1 parent 3e7d511 commit 10c1b3a
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 4 deletions.
6 changes: 3 additions & 3 deletions tests/search_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ int search_disk_index(int argc, char** argv) {
std::vector<uint32_t> node_list;
diskann::cout << "Caching " << num_nodes_to_cache
<< " BFS nodes around medoid(s)" << std::endl;
_pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list);
// _pFlashIndex->generate_cache_list_from_sample_queries(
// warmup_query_file, 15, 6, num_nodes_to_cache, num_threads, node_list);
//_pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list);
_pFlashIndex->generate_cache_list_from_sample_queries(
warmup_query_file, 15, 6, num_nodes_to_cache, num_threads, node_list);
_pFlashIndex->load_cache_list(node_list);
node_list.clear();
node_list.shrink_to_fit();
Expand Down
9 changes: 9 additions & 0 deletions tests/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ else()
target_link_libraries(int8_to_float ${PROJECT_NAME})
endif()

add_executable(int8_to_float_scale int8_to_float_scale.cpp)
if(MSVC)
target_link_options(int8_to_float_scale PRIVATE /MACHINE:x64)
target_link_libraries(int8_to_float_scale debug ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}/diskann_dll.lib)
target_link_libraries(int8_to_float_scale optimized ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}/diskann_dll.lib)
else()
target_link_libraries(int8_to_float_scale ${PROJECT_NAME})
endif()

add_executable(uint8_to_float uint8_to_float.cpp)
if(MSVC)
target_link_options(uint8_to_float PRIVATE /MACHINE:x64)
Expand Down
2 changes: 1 addition & 1 deletion tests/utils/float_bin_to_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ void block_convert(std::ofstream& writer, int8_t* write_buf,
for (_u64 i = 0; i < npts; i++) {
for (_u64 d = 0; d < ndims; d++) {
write_buf[d + i * ndims] =
(int8_t)((read_buf[d + i * ndims] - bias) * (256.0 / scale));
(int8_t)((read_buf[d + i * ndims] - bias) * (254.0 / scale));
}
}
writer.write((char*) write_buf, npts * ndims);
Expand Down
62 changes: 62 additions & 0 deletions tests/utils/int8_to_float_scale.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.

#include <iostream>
#include "utils.h"

void block_convert(std::ofstream& writer, float* write_buf,
std::ifstream& reader, int8_t* read_buf, _u64 npts,
_u64 ndims, float bias, float scale) {
reader.read((char*) read_buf, npts * ndims * sizeof(int8_t));

for (_u64 i = 0; i < npts; i++) {
for (_u64 d = 0; d < ndims; d++) {
write_buf[d + i * ndims] =
(((float)read_buf[d + i * ndims] - bias) * scale);
}
}
writer.write((char*) write_buf, npts * ndims * sizeof(float));
}

int main(int argc, char** argv) {
if (argc != 5) {
std::cout << "Usage: " << argv[0] << " input-int8.bin output-float.bin bias scale"
<< std::endl;
exit(-1);
}

std::ifstream reader(argv[1], std::ios::binary);
_u32 npts_u32;
_u32 ndims_u32;
reader.read((char*) &npts_u32, sizeof(_s32));
reader.read((char*) &ndims_u32, sizeof(_s32));
size_t npts = npts_u32;
size_t ndims = ndims_u32;
std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims
<< std::endl;

_u64 blk_size = 131072;
_u64 nblks = ROUND_UP(npts, blk_size) / blk_size;

std::ofstream writer(argv[2], std::ios::binary);
auto read_buf = new int8_t[blk_size * ndims];
auto write_buf = new float[blk_size * ndims];
float bias = atof(argv[3]);
float scale = atof(argv[4]);

writer.write((char*) (&npts_u32), sizeof(_u32));
writer.write((char*) (&ndims_u32), sizeof(_u32));

for (_u64 i = 0; i < nblks; i++) {
_u64 cblk_size = std::min(npts - i * blk_size, blk_size);
block_convert(writer, write_buf, reader, read_buf, cblk_size, ndims, bias,
scale);
std::cout << "Block #" << i << " written" << std::endl;
}

delete[] read_buf;
delete[] write_buf;

writer.close();
reader.close();
}

0 comments on commit 10c1b3a

Please sign in to comment.