HNSW#
HNSW is a graph-based nearest neighbors implementation for the CPU. This implementation provides the ability to serialize a CAGRA graph and read it as a base-layer-only hnswlib graph.
#include <raft/neighbors/hnsw.hpp>
namespace raft::neighbors::hnsw
-
template<typename T, typename IdxT>
std::unique_ptr<index<T>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<T, IdxT> cagra_index)# Construct an hnswlib base-layer-only index from a CAGRA index NOTE: 1. This method uses the filesystem to write the CAGRA index in
/tmp/cagra_index.bin
before reading it as an hnswlib index, then deleting the temporary file.This function is only offered as a compiled symbol in
libraft.so
Usage example:
// Build a CAGRA index using namespace raft::neighbors; // use default index parameters cagra::index_params index_params; // create and fill the index from a [N, D] dataset auto index = cagra::build(res, index_params, dataset); // Load CAGRA index as base-layer-only hnswlib index auto hnsw_index = hnsw::from_cagra(res, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
res – [in] raft resources
cagra_index – [in] cagra index
-
template<>
std::unique_ptr<index<float>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<float, uint32_t> cagra_index)#
-
template<>
std::unique_ptr<index<int8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<int8_t, uint32_t> cagra_index)#
-
template<>
std::unique_ptr<index<uint8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<uint8_t, uint32_t> cagra_index)#
-
template<typename T>
void search(raft::resources const &res, const search_params ¶ms, const index<T> &idx, raft::host_matrix_view<const T, int64_t, row_major> queries, raft::host_matrix_view<uint64_t, int64_t, row_major> neighbors, raft::host_matrix_view<float, int64_t, row_major> distances)# Search hnswlib base-layer-only index constructed from a CAGRA index.
Usage example:
// Build a CAGRA index using namespace raft::neighbors; // use default index parameters cagra::index_params index_params; // create and fill the index from a [N, D] dataset auto index = cagra::build(res, index_params, dataset); // Save CAGRA index as base layer only hnswlib index hnsw::serialize(res, "my_index.bin", index); // Load CAGRA index as base layer only hnswlib index raft::neighbors::hnsw::index* hnsw_index; auto hnsw_index = hnsw::deserialize(res, "my_index.bin", D, raft::distance::L2Expanded); // Search K nearest neighbors as an hnswlib index // using host threads for concurrency hnsw::search_params search_params; search_params.ef = 50 // ef >= K; search_params.num_threads = 10; auto neighbors = raft::make_host_matrix<uint32_t>(res, n_queries, k); auto distances = raft::make_host_matrix<float>(res, n_queries, k); hnsw::search(res, search_params, *index, queries, neighbors, distances); // de-allocate hnsw_index delete hnsw_index;
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
res – [in] raft resources
params – [in] configure the search
idx – [in] cagra index
queries – [in] a host matrix view to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a host matrix view to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a host matrix view to the distances to the selected neighbors [n_queries, k]
-
struct search_params : public raft::neighbors::ann::search_params#
- #include <hnsw_types.hpp>
-
template<typename T>
struct index : public raft::neighbors::ann::index# - #include <hnsw_types.hpp>
Public Functions
-
inline index(int dim, raft::distance::DistanceType metric)#
load a base-layer-only hnswlib index originally saved from a built CAGRA index. This is a virtual class and it cannot be used directly. To create an index, use the factory function
raft::neighbors::hnsw::from_cagra
from the headerraft/neighbors/hnsw.hpp
- Parameters:
dim – [in] dimensions of the training dataset
metric – [in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)
-
virtual auto get_index() const -> void const* = 0#
Get underlying index.
-
inline index(int dim, raft::distance::DistanceType metric)#
Serializer Methods#
#include <raft/neighbors/hnsw_serialize.cuh>
namespace raft::neighbors::hnsw
-
template<typename T>
std::unique_ptr<index<T>> deserialize(raft::resources const &handle, const std::string &filename, int dim, raft::distance::DistanceType metric)# Load an hnswlib index which was serialized from a CAGRA index
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); // create an an unallocated pointer int dim = 10; raft::distance::DistanceType = raft::distance::L2Expanded auto index = raft::deserialize(handle, filename, dim, metric);
- Template Parameters:
T – data element type
- Parameters:
handle – [in] the raft handle
filename – [in] the file name for saving the index
dim – [in] dimensionality of the index
metric – [in] metric used to build the index
- Returns:
std::unique_ptr<index<T>>