Skip to content

Commit

Permalink
storage: Enable simsimd for usearch (#224)
Browse files Browse the repository at this point in the history
Signed-off-by: Wish <breezewish@outlook.com>
Co-authored-by: JaySon <tshent@qq.com>
  • Loading branch information
breezewish and JaySon-Huang committed Aug 6, 2024
1 parent 3e20625 commit ec8a3d0
Show file tree
Hide file tree
Showing 10 changed files with 219 additions and 12 deletions.
5 changes: 3 additions & 2 deletions cmake/cpu_features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ if (ARCH_AARCH64)
option (TIFLASH_ENABLE_ASIMD_SUPPORT "Enable Advanced SIMD support." ON)
option (TIFLASH_ENABLE_SVE_SUPPORT "Enable Scalable Vector Extension support." OFF)
# TODO: default ON, to be changed after CI is updated
option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." ON)
# Note: explicitly set to OFF for cse-branch
option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." OFF)

if (NO_ARMV81_OR_HIGHER)
# crc32 is optional in v8.0 and mandatory in v8.1. Enable it as __crc32()* is used in lot's of places and even very old ARM CPUs
Expand Down Expand Up @@ -95,7 +96,7 @@ elseif (ARCH_AMD64)
# so we do not set the flags to avoid core dump in old machines
option (TIFLASH_ENABLE_AVX_SUPPORT "Use AVX/AVX2 instructions on x86_64" ON)
option (TIFLASH_ENABLE_AVX512_SUPPORT "Use AVX512 instructions on x86_64" ON)

# `haswell` was released since 2013 with cpu feature avx2, bmi2. It's a practical arch for optimizer
option (TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT "Use instructions based on architecture `haswell` on x86_64" ON)

Expand Down
13 changes: 13 additions & 0 deletions dbms/src/Common/TiFlashBuildInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include <Common/config.h>
#include <Common/config_version.h>
#include <Storages/DeltaMerge/Index/VectorIndexHNSW/SIMDFeatures.h>
#include <TiDB/Decode/Vector.h>
#include <common/config_common.h>
#include <fmt/core.h>
#include <fmt/format.h>
Expand Down Expand Up @@ -128,6 +130,17 @@ std::string getEnabledFeatures()
"fdo",
#endif
};
{
auto f = DB::DM::VectorIndexHNSWSIMDFeatures::get();
for (const auto & feature : f)
features.push_back(feature);
}
{
auto f = DB::VectorDistanceSIMDFeatures::get();
for (const auto & feature : f)
features.push_back(feature);
}

return fmt::format("{}", fmt::join(features.begin(), features.end(), " "));
}
// clang-format on
Expand Down
2 changes: 0 additions & 2 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@

#include <algorithm>
#include <ext/scope_guard.h>
#include <usearch/index.hpp>
#include <usearch/index_plugins.hpp>

namespace DB::ErrorCodes
{
Expand Down
3 changes: 1 addition & 2 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@

#include <Storages/DeltaMerge/File/dtpb/dmfile.pb.h>
#include <Storages/DeltaMerge/Index/VectorIndex.h>

#include <usearch/index_dense.hpp>
#include <Storages/DeltaMerge/Index/VectorIndexHNSW/USearch.h>

namespace DB::DM
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/Logger.h>
#include <Storages/DeltaMerge/Index/VectorIndexHNSW/SIMDFeatures.h>
#include <Storages/DeltaMerge/Index/VectorIndexHNSW/USearch.h>
#include <common/logger_useful.h>

namespace DB::DM
{

std::vector<std::string> VectorIndexHNSWSIMDFeatures::get()
{
auto m_l2 = unum::usearch::metric_punned_t(3, unum::usearch::metric_kind_t::l2sq_k);
auto m_cos = unum::usearch::metric_punned_t(3, unum::usearch::metric_kind_t::cos_k);
return {
fmt::format("hnsw.l2={}", m_l2.isa_name()),
fmt::format("hnsw.cosine={}", m_cos.isa_name()),
};
}

} // namespace DB::DM
28 changes: 28 additions & 0 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/SIMDFeatures.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>

namespace DB::DM
{

class VectorIndexHNSWSIMDFeatures
{
public:
static std::vector<std::string> get();
};

} // namespace DB::DM
35 changes: 35 additions & 0 deletions dbms/src/Storages/DeltaMerge/Index/VectorIndexHNSW/USearch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

// USearch is header only. We don't use cmake to make these defines to avoid
// polluting all compile units.

#define USEARCH_USE_SIMSIMD 1
#define SIMSIMD_NATIVE_F16 0
#define SIMSIMD_NATIVE_BF16 0

// Force enable all target features.
#define SIMSIMD_TARGET_NEON 1
#define SIMSIMD_TARGET_SVE 0 // Clang13's header does not support enableing SVE for region
#define SIMSIMD_TARGET_HASWELL 1
#define SIMSIMD_TARGET_SKYLAKE 0 // Clang13 does not support AVX512
#define SIMSIMD_TARGET_ICE 0
#define SIMSIMD_TARGET_GENOA 0
#define SIMSIMD_TARGET_SAPPHIRE 0

#include <usearch/index.hpp>
#include <usearch/index_dense.hpp>
#include <usearch/index_plugins.hpp>
101 changes: 96 additions & 5 deletions dbms/src/TiDB/Decode/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,28 @@
#include <IO/WriteHelpers.h>
#include <TiDB/Decode/Vector.h>

#include <compare>

// SIMSIMD is header only. We don't use cmake to make these defines to avoid
// polluting all compile units.

// Note: Be careful that usearch also includes simsimd with a customized config.
// Don't include simsimd and usearch at the same time. Otherwise, the effective
// config depends on the include order.
#define SIMSIMD_NATIVE_F16 0
#define SIMSIMD_NATIVE_BF16 0
#define SIMSIMD_DYNAMIC_DISPATCH 0

// Force enable all target features. We will do our own dynamic dispatch.
#define SIMSIMD_TARGET_NEON 1
#define SIMSIMD_TARGET_SVE 0 // Clang13's header does not support enableing SVE for region
#define SIMSIMD_TARGET_HASWELL 1
#define SIMSIMD_TARGET_SKYLAKE 0 // Clang13 does not support AVX512
#define SIMSIMD_TARGET_ICE 0
#define SIMSIMD_TARGET_GENOA 0
#define SIMSIMD_TARGET_SAPPHIRE 0
#include <simsimd/simsimd.h>

#include <compare>

namespace DB
{

Expand All @@ -31,6 +47,36 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
} // namespace ErrorCodes

namespace simsimd_details
{

simsimd_capability_t simd_capabilities()
{
static simsimd_capability_t static_capabilities = simsimd_cap_any_k;
if (static_capabilities == simsimd_cap_any_k)
static_capabilities = simsimd_capabilities_implementation();
return static_capabilities;
}

} // namespace simsimd_details

std::vector<std::string> VectorDistanceSIMDFeatures::get()
{
simsimd_capability_t caps = simsimd_details::simd_capabilities();
std::vector<std::string> ret{};
if (caps & simsimd_cap_neon_k)
ret.push_back("vec_distance=neon");
if (caps & simsimd_cap_sve_k)
ret.push_back("vec_distance=sve");
if (caps & simsimd_cap_sve2_k)
ret.push_back("vec_distance=sve2");
if (caps & simsimd_cap_haswell_k)
ret.push_back("vec_distance=haswell");
if (caps & simsimd_cap_skylake_k)
ret.push_back("vec_distance=skylake");
return ret;
}

VectorFloat32Ref::VectorFloat32Ref(const Float32 * elements, size_t n)
: elements(elements)
, elements_n(n)
Expand All @@ -56,8 +102,23 @@ Float64 VectorFloat32Ref::l2SquaredDistance(VectorFloat32Ref b) const
{
checkDims(b);

static simsimd_metric_punned_t metric = nullptr;
if (metric == nullptr)
{
simsimd_capability_t used_capability;
simsimd_find_metric_punned(
simsimd_metric_l2sq_k,
simsimd_datatype_f32_k,
simsimd_details::simd_capabilities(),
simsimd_cap_any_k,
&metric,
&used_capability);
if (!metric)
return std::numeric_limits<double>::quiet_NaN();
}

simsimd_distance_t distance;
simsimd_l2sq_f32(elements, b.elements, elements_n, &distance);
metric(elements, b.elements, elements_n, &distance);

return distance;
}
Expand All @@ -66,8 +127,23 @@ Float64 VectorFloat32Ref::innerProduct(VectorFloat32Ref b) const
{
checkDims(b);

static simsimd_metric_punned_t metric = nullptr;
if (metric == nullptr)
{
simsimd_capability_t used_capability;
simsimd_find_metric_punned(
simsimd_metric_dot_k,
simsimd_datatype_f32_k,
simsimd_details::simd_capabilities(),
simsimd_cap_any_k,
&metric,
&used_capability);
if (!metric)
return std::numeric_limits<double>::quiet_NaN();
}

simsimd_distance_t distance;
simsimd_dot_f32(elements, b.elements, elements_n, &distance);
metric(elements, b.elements, elements_n, &distance);

return distance;
}
Expand All @@ -76,8 +152,23 @@ Float64 VectorFloat32Ref::cosineDistance(VectorFloat32Ref b) const
{
checkDims(b);

static simsimd_metric_punned_t metric = nullptr;
if (metric == nullptr)
{
simsimd_capability_t used_capability;
simsimd_find_metric_punned(
simsimd_metric_cos_k,
simsimd_datatype_f32_k,
simsimd_details::simd_capabilities(),
simsimd_cap_any_k,
&metric,
&used_capability);
if (!metric)
return std::numeric_limits<double>::quiet_NaN();
}

simsimd_distance_t distance;
simsimd_cos_f32(elements, b.elements, elements_n, &distance);
metric(elements, b.elements, elements_n, &distance);

return distance;
}
Expand Down
6 changes: 6 additions & 0 deletions dbms/src/TiDB/Decode/Vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
namespace DB
{

class VectorDistanceSIMDFeatures
{
public:
static std::vector<std::string> get();
};

class VectorFloat32Ref
{
public:
Expand Down
5 changes: 4 additions & 1 deletion libs/libdaemon/src/BaseDaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1179,7 +1179,10 @@ void BaseDaemon::logRevision() const
LOG_INFO(log, "Starting daemon with revision " + Poco::NumberFormatter::format(ClickHouseRevision::get()));
std::stringstream ss;
TiFlashBuildInfo::outputDetail(ss);
LOG_INFO(log, "TiFlash build info: {}", ss.str());

std::string line;
while (std::getline(ss, line, '\n'))
LOG_INFO(log, "{}", line);
}

/// Used for exitOnTaskError()
Expand Down

0 comments on commit ec8a3d0

Please sign in to comment.