1
0
mirror of https://github.com/osmarks/meme-search-engine.git synced 2026-06-02 10:52:18 +00:00

Multithread query server

While profiling suggests that most operations are cheap and IO-bound rather than CPU-bound, the GEMM for deduplication is pretty slow. As such, use multiple threads for higher throughput.
This commit is contained in:
osmarks
2025-01-31 13:47:47 +00:00
parent 5215822e39
commit e57931d47f
5 changed files with 130 additions and 86 deletions
+9 -3
View File
@@ -70,7 +70,10 @@ fn main() -> Result<()> {
l: 192,
maxc: 750,
alpha: 65200,
saturate_graph: false
saturate_graph: false,
query_breakpoint: vecs.len() as u32,
query_alpha: 65200,
max_add_per_stitch_iter: 0
};
let mut graph = IndexGraph::empty(vecs.len(), config.r);
@@ -105,13 +108,16 @@ fn main() -> Result<()> {
l: 200,
alpha: 65536,
maxc: 0,
saturate_graph: false
saturate_graph: false,
query_breakpoint: vecs.len() as u32,
query_alpha: 65200,
max_add_per_stitch_iter: 0
};
let mut scratch = Scratch::new(config);
for (i, vec) in tqdm::tqdm(vecs.iter().enumerate()) {
let ctr = greedy_search(&mut scratch, medioid, &vec, &vecs, &graph, config);
let ctr = greedy_search(&mut scratch, medioid, false, &vec, &vecs, &graph, config);
cmps_ctr += ctr.distances;
cmps.push(ctr.distances);
if scratch.neighbour_buffer.ids[0] == (i as u32) {
+24 -2
View File
@@ -1,5 +1,3 @@
use core::f32;
use half::f16;
use simsimd::SpatialSimilarity;
use fastrand::Rng;
@@ -420,6 +418,7 @@ pub fn scale_dot_result_f64(x: f64) -> i64 {
#[cfg(test)]
mod bench {
use super::*;
use half::slice::HalfFloatSliceExt;
use test::Bencher;
#[bench]
@@ -451,4 +450,27 @@ mod bench {
fast_dot_noprefetch(&a, &b)
});
}
#[bench]
fn bench_preprocess_query(be: &mut Bencher) {
let mut rng = fastrand::Rng::with_seed(1);
let pq = rmp_serde::from_slice::<ProductQuantizer>(&std::fs::read("opq.msgpack").unwrap()).unwrap();
let query = Vector::randn(&mut rng, pq.n_dims).to_f32_vec();
be.iter(|| {
pq.preprocess_query(&query)
});
}
#[bench]
fn bench_asymmetric_dot_product(be: &mut Bencher) {
let mut rng = fastrand::Rng::with_seed(1);
let pq = rmp_serde::from_slice::<ProductQuantizer>(&std::fs::read("opq.msgpack").unwrap()).unwrap();
let query = Vector::randn(&mut rng, pq.n_dims).to_f32_vec();
let lut = pq.preprocess_query(&query);
let mut pq_vectors = vec![0; 100 * pq.n_dims / pq.n_dims_per_code];
rng.fill(&mut pq_vectors);
be.iter(|| {
pq.asymmetric_dot_product(&lut, &pq_vectors)
});
}
}