diff --git a/sparse_vector/Cargo.toml b/sparse_vector/Cargo.toml index ad670e4..a873e05 100644 --- a/sparse_vector/Cargo.toml +++ b/sparse_vector/Cargo.toml @@ -10,4 +10,7 @@ rand = "0.8.5" futures = "0.3.28" jemalloc-ctl = "0.5.0" jemallocator = "0.5.0" -bytesize = "1.2.0" \ No newline at end of file +bytesize = "1.2.0" + +[features] +binary_search = [] \ No newline at end of file diff --git a/sparse_vector/src/main.rs b/sparse_vector/src/main.rs index 4d01c44..e40ca53 100644 --- a/sparse_vector/src/main.rs +++ b/sparse_vector/src/main.rs @@ -1,7 +1,7 @@ -use std::time::Instant; use bytesize::ByteSize; +use jemalloc_ctl::{epoch, stats}; use rand::Rng; -use jemalloc_ctl::{stats, epoch}; +use std::time::Instant; #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; @@ -13,13 +13,35 @@ pub struct SparseVec { } impl SparseVec { - pub fn dot(&self, other: &SparseVec) -> f64 { let mut sum = 0.0; - for index in 0..other.indices.len() { - // exponential search for an element in the second vector to have the same index - sum += binary_search(self.indices[index], &other.indices, &other.values) * self.values[index]; + #[cfg(not(feature="binary_search"))] + { + let mut x = 0; + let mut y = 0; + + while x < self.indices.len() && y < other.indices.len() { + if self.indices[x] == other.indices[y] { + sum += self.values[x] * other.values[y]; + + x += 1; + y += 1; + } else if self.indices[x] > other.indices[y] { + y += 1; + } else { + x += 1; + } + } + } + + #[cfg(feature="binary_search")] + { + for index in 0..other.indices.len() { + // binary search for an element in the second vector to have the same index + sum += binary_search(self.indices[index], &other.indices, &other.values) + * self.values[index]; + } } sum @@ -36,14 +58,12 @@ impl SparseVec { for i in 0..non_zero_elements { values.push(0.5); - let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0); + let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + + rng.gen_range(0.0..3.0); indices.push(idx as usize); } - Self { - values, - indices - } + Self { values, indices } } } @@ -76,11 +96,10 @@ macro_rules! time { let start = Instant::now(); $block; println!("{} took {}s", $name, start.elapsed().as_secs_f64()); - }} + }}; } fn main() { - /// Theoretical size of the vector in elements /// This would mean the we would require 10 GBs of memory to store a single vector const VECTOR_SIZE: usize = 10_000_000_000; @@ -92,7 +111,10 @@ fn main() { let non_zero_elements = (VECTOR_SIZE as f64 * NULL_NON_NULL_RATIO) as usize; let heap_element_size = std::mem::size_of::() + std::mem::size_of::(); - println!("Estimated size on heap: {}", ByteSize::b((non_zero_elements * heap_element_size) as u64)); + println!( + "Estimated size on heap: {}", + ByteSize::b((non_zero_elements * heap_element_size) as u64) + ); println!("Size on stack: {} B", std::mem::size_of::()); let vec: SparseVec; @@ -104,9 +126,12 @@ fn main() { // many statistics are cached and only updated when the epoch is advanced. epoch::advance().unwrap(); - println!("Heap allocated bytes (total): {}", ByteSize::b(stats::allocated::read().unwrap() as u64)); + println!( + "Heap allocated bytes (total): {}", + ByteSize::b(stats::allocated::read().unwrap() as u64) + ); time!("Sparse vector dot product", { vec.dot(&vec); }); -} \ No newline at end of file +}