1
0
mirror of https://github.com/osmarks/meme-search-engine.git synced 2025-11-07 02:33:03 +00:00
This commit is contained in:
osmarks
2025-01-29 14:48:15 +00:00
parent 3b2664ca98
commit 5215822e39
3 changed files with 36 additions and 3 deletions

22
Cargo.lock generated
View File

@@ -1982,6 +1982,15 @@ dependencies = [
"imgref", "imgref",
] ]
[[package]]
name = "mach2"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "matchers" name = "matchers"
version = "0.1.0" version = "0.1.0"
@@ -2096,6 +2105,7 @@ dependencies = [
"num_cpus", "num_cpus",
"prometheus", "prometheus",
"regex", "regex",
"region",
"reqwest", "reqwest",
"rmp-serde", "rmp-serde",
"seahash", "seahash",
@@ -3061,6 +3071,18 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "region"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6b6ebd13bc009aef9cd476c1310d49ac354d36e240cf1bd753290f3dc7199a7"
dependencies = [
"bitflags 1.3.2",
"libc",
"mach2",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.12.8" version = "0.12.8"

View File

@@ -64,6 +64,7 @@ monoio-compat = { version = "0.2", features = ["hyper"] }
http-body-util = "0.1" http-body-util = "0.1"
matrixmultiply = "0.3" matrixmultiply = "0.3"
bitvec = "1" bitvec = "1"
region = "3"
[[bin]] [[bin]]
name = "reddit-dump" name = "reddit-dump"

View File

@@ -45,7 +45,9 @@ struct CLIArguments {
#[argh(switch, description="always use full-precision vectors (slow)")] #[argh(switch, description="always use full-precision vectors (slow)")]
disable_pq: bool, disable_pq: bool,
#[argh(option, short='c', description="server config file")] #[argh(option, short='c', description="server config file")]
config_path: Option<String> config_path: Option<String>,
#[argh(switch, short='l', description="lock memory")]
lock_memory: bool
} }
#[derive(Deserialize, Clone)] #[derive(Deserialize, Clone)]
@@ -657,12 +659,20 @@ async fn main() -> Result<()> {
let pq_codes = unsafe { let pq_codes = unsafe {
// This is unsafe because other processes could in principle edit the mmap'd file. // This is unsafe because other processes could in principle edit the mmap'd file.
// It would be annoying to do anything about this possibility, so ignore it. // It would be annoying to do anything about this possibility, so ignore it.
MmapOptions::new().populate().map(&pq_codes_file)? MmapOptions::new().populate().map_copy_read_only(&pq_codes_file)?
}; };
// contains metadata descriptors // contains metadata descriptors
let descriptors_file = fs::File::open(index_path.join("index.descriptor-codes.bin")).await?; let descriptors_file = fs::File::open(index_path.join("index.descriptor-codes.bin")).await?;
let descriptors = unsafe { let descriptors = unsafe {
MmapOptions::new().populate().map(&descriptors_file)? MmapOptions::new().populate().map_copy_read_only(&descriptors_file)?
};
let _guards = if args.lock_memory {
let g1 = region::lock(descriptors.as_ptr(), descriptors.len())?;
let g2 = region::lock(pq_codes.as_ptr(), pq_codes.len())?;
Some((g1, g2))
} else {
None
}; };
println!("{} items {} dead {} shards", header.count, header.dead_count, header.shards.len()); println!("{} items {} dead {} shards", header.count, header.dead_count, header.shards.len());