Brute-Force#
#include <raft/neighbors/brute_force.cuh>
namespace raft::neighbors::brute_force
-
template<typename value_t, typename idx_t>
inline void knn_merge_parts(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, row_major> in_keys, raft::device_matrix_view<const idx_t, idx_t, row_major> in_values, raft::device_matrix_view<value_t, idx_t, row_major> out_keys, raft::device_matrix_view<idx_t, idx_t, row_major> out_values, size_t n_samples, std::optional<raft::device_vector_view<idx_t, idx_t>> translations = std::nullopt)# Performs a k-select across several (contiguous) row-partitioned index/distance matrices formatted like the following:
The example above shows what an aggregated index/distance matrix would look like with two partitions when n_samples=3 and k=4.part1row1: k0, k1, k2, k3 part1row2: k0, k1, k2, k3 part1row3: k0, k1, k2, k3 part2row1: k0, k1, k2, k3 part2row2: k0, k1, k2, k3 part2row3: k0, k1, k2, k3 etc...
When working with extremely large data sets that have been broken over multiple indexes, such as when computing over multiple GPUs, the ids will often start at 0 for each local knn index but the global ids need to be used when merging them together. An optional translations vector can be supplied to map the starting id of each partition to its global id so that the final merged knn is based on the global ids.
Usage example:
#include <raft/core/resources.hpp> #include <raft/neighbors/brute_force.cuh> using namespace raft::neighbors; raft::resources handle; ... compute multiple knn graphs and aggregate row-wise (see detailed description above) ... brute_force::knn_merge_parts(handle, in_keys, in_values, out_keys, out_values, n_samples);
- Template Parameters:
idx_t –
value_t –
- Parameters:
handle – [in]
in_keys – [in] matrix of input keys (size n_samples * n_parts * k)
in_values – [in] matrix of input values (size n_samples * n_parts * k)
out_keys – [out] matrix of output keys (size n_samples * k)
out_values – [out] matrix of output values (size n_samples * k)
n_samples – [in] number of rows in each partition
translations – [in] optional vector of starting global id mappings for each local partition
-
template<typename idx_t, typename value_t, typename matrix_idx, typename index_layout, typename search_layout, typename epilogue_op = raft::identity_op>
void knn(raft::resources const &handle, std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, raft::device_matrix_view<const value_t, matrix_idx, search_layout> search, raft::device_matrix_view<idx_t, matrix_idx, row_major> indices, raft::device_matrix_view<value_t, matrix_idx, row_major> distances, distance::DistanceType metric = distance::DistanceType::L2Unexpanded, std::optional<float> metric_arg = std::make_optional<float>(2.0f), std::optional<idx_t> global_id_offset = std::nullopt, epilogue_op distance_epilogue = raft::identity_op())# Flat C++ API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances. Inputs can be either row- or column-major but the output matrices will always be in row-major format.
Usage example:
#include <raft/core/resources.hpp> #include <raft/neighbors/brute_force.cuh> #include <raft/distance/distance_types.hpp> using namespace raft::neighbors; raft::resources handle; ... auto metric = raft::distance::DistanceType::L2SqrtExpanded; brute_force::knn(handle, index, search, indices, distances, metric);
- Parameters:
handle – [in] the cuml handle to use
index – [in] vector of device matrices (each size m_i*d) to be used as the knn index
search – [in] matrix (size n*d) to be used for searching the index
indices – [out] matrix (size n*k) to store output knn indices
distances – [out] matrix (size n*k) to store the output knn distance
metric – [in] distance metric to use. Euclidean (L2) is used by default
metric_arg – [in] the value of
p
for Minkowski (l-p) distances. This is ignored if the metric_type is not Minkowski.global_id_offset – [in] optional starting global id mapping for the local partition (assumes the index contains contiguous ids in the global id space)
distance_epilogue – [in] optional epilogue function to run after computing distances. This function takes a triple of the (value, rowid, colid) for each element in the pairwise distances and returns a transformed value back.
-
template<typename value_t, typename idx_t, typename idx_layout, typename query_layout>
void fused_l2_knn(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, idx_layout> index, raft::device_matrix_view<const value_t, idx_t, query_layout> query, raft::device_matrix_view<idx_t, idx_t, row_major> out_inds, raft::device_matrix_view<value_t, idx_t, row_major> out_dists, raft::distance::DistanceType metric)# Compute the k-nearest neighbors using L2 expanded/unexpanded distance.
This is a specialized function for fusing the k-selection with the distance computation when k < 64. The value of k will be inferred from the number of columns in the output matrices.
Usage example:
#include <raft/core/resources.hpp> #include <raft/neighbors/brute_force.cuh> #include <raft/distance/distance_types.hpp> using namespace raft::neighbors; raft::resources handle; ... auto metric = raft::distance::DistanceType::L2SqrtExpanded; brute_force::fused_l2_knn(handle, index, search, indices, distances, metric);
- Template Parameters:
value_t – type of values
idx_t – type of indices
idx_layout – layout type of index matrix
query_layout – layout type of query matrix
- Parameters:
handle – [in] raft handle for sharing expensive resources
index – [in] input index array on device (size m * d)
query – [in] input query array on device (size n * d)
out_inds – [out] output indices array on device (size n * k)
out_dists – [out] output dists array on device (size n * k)
metric – [in] type of distance computation to perform (must be a variant of L2)
-
template<typename T, typename Accessor>
index<T> build(raft::resources const &res, mdspan<const T, matrix_extent<int64_t>, row_major, Accessor> dataset, raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded, T metric_arg = 0.0)# Build the index from the dataset for efficient search.
- Template Parameters:
T – data element type
- Parameters:
res – [in]
dataset – [in] a matrix view (host or device) to a row-major matrix [n_rows, dim]
metric – [in] distance metric to use. Euclidean (L2) is used by default
metric_arg – [in] the value of
p
for Minkowski (l-p) distances. This is ignored if the metric_type is not Minkowski.
- Returns:
the constructed brute force index
-
template<typename T, typename IdxT>
void search(raft::resources const &res, const index<T> &idx, raft::device_matrix_view<const T, int64_t, row_major> queries, raft::device_matrix_view<IdxT, int64_t, row_major> neighbors, raft::device_matrix_view<T, int64_t, row_major> distances)# Brute Force search using the constructed index.
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
res – [in] raft resources
idx – [in] brute force index
queries – [in] a device matrix view to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a device matrix view to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a device matrix view to the distances to the selected neighbors [n_queries, k]
-
template<typename T>
struct index : public raft::neighbors::ann::index# - #include <brute_force_types.hpp>
Brute Force index.
The index stores the dataset and norms for the dataset in device memory.
- Template Parameters:
T – data element type
Public Functions
-
inline constexpr raft::distance::DistanceType metric() const noexcept#
Distance metric used for retrieval
-
inline constexpr int64_t size() const noexcept#
Total length of the index (number of vectors).
-
inline constexpr uint32_t dim() const noexcept#
Dimensionality of the data.
-
inline auto dataset() const noexcept -> device_matrix_view<const T, int64_t, row_major>#
Dataset [size, dim]
-
inline auto norms() const -> device_vector_view<const T, int64_t, row_major>#
Dataset norms
-
inline bool has_norms() const noexcept#
Whether or not this index has dataset norms
-
template<typename data_accessor>
inline index(raft::resources const &res, mdspan<const T, matrix_extent<int64_t>, row_major, data_accessor> dataset, std::optional<raft::device_vector<T, int64_t>> &&norms, raft::distance::DistanceType metric, T metric_arg = 0.0)# Construct a brute force index from dataset
Constructs a brute force index from a dataset. This lets us precompute norms for the dataset, providing a speed benefit over doing this at query time.
If the dataset is already in GPU memory, then this class stores a non-owning reference to the dataset. If the dataset is in host memory, it will be copied to the device and the index will own the device memory.
-
inline index(raft::resources const &res, raft::device_matrix_view<const T, int64_t, row_major> dataset_view, std::optional<raft::device_vector_view<const T, int64_t>> norms_view, raft::distance::DistanceType metric, T metric_arg = 0.0)#
Construct a brute force index from dataset
This class stores a non-owning reference to the dataset and norms here. Having precomputed norms gives us a performance advantage at query time.
-
template<typename T, typename IdxT = int64_t>
class batch_k_query# - #include <brute_force_types.hpp>
Interface for performing queries over values of k.
This interface lets you iterate over batches of k from a brute_force::index. This lets you do things like retrieve the first 100 neighbors for a query, apply post processing to remove any unwanted items and then if needed get the next 100 closest neighbors for the query.
This query interface exposes C++ iterators through the begin and end, and is compatible with range based for loops.
Note that this class is an abstract class without any cuda dependencies, meaning that it doesn’t require a cuda compiler to use - but also means it can’t be directly instantiated. See the raft::neighbors::brute_force::make_batch_k_query function for usage examples.
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
-
class iterator#
- #include <brute_force_types.hpp>
Public Functions
-
inline void advance(int64_t next_batch_size)#
Advance the iterator, using a custom size for the next batch.
Using operator++ means that we will load up the same batch_size for each batch. This method allows us to get around this restriction, and load up arbitrary batch sizes on each iteration. See raft::neighbors::brute_force::make_batch_k_query for a usage example.
- Parameters:
next_batch_size – [in] size of the next batch to load up
-
inline void advance(int64_t next_batch_size)#