From 41b8a5d5333b39314cf400d90f9de18230b3857f Mon Sep 17 00:00:00 2001 From: teridax Date: Sun, 30 Apr 2023 18:13:07 +0200 Subject: [PATCH] added compressed index array --- sparse_vector/src/main.rs | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/sparse_vector/src/main.rs b/sparse_vector/src/main.rs index 948087c..56fa6d4 100644 --- a/sparse_vector/src/main.rs +++ b/sparse_vector/src/main.rs @@ -10,10 +10,9 @@ use jemalloc_ctl::{stats, epoch}; #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; -/// Only stores more efficiently when at least 50% of all elements are zeros pub struct SparseVec { values: Vec, - indices: Vec, + indices: Vec, } impl SparseVec { @@ -22,8 +21,9 @@ impl SparseVec { let mut sum = 0.0; for index in 0..other.indices.len() { + let uncompressed_index = decompress_index(index, &self.indices); // exponential search for an element in the second vector to have the same index - sum += binary_search(self.indices[index], &other.indices, &other.values) * self.values[index]; + sum += binary_search(uncompressed_index, &other.indices, &other.values) * self.values[index]; } sum @@ -37,11 +37,15 @@ impl SparseVec { let mut rng = rand::thread_rng(); + let mut last_idx = 0; + for i in 0..non_zero_elements { values.push(0.5); - let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0); - indices.push(idx as usize); + let new_idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0); + let compressed_idx = new_idx as usize - last_idx; + last_idx = new_idx as usize; + indices.push(compressed_idx as u16); } Self { @@ -52,7 +56,16 @@ impl SparseVec { } #[inline] -fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 { +fn decompress_index(index: usize, indices: &[u16]) -> usize { + let mut sum = 0; + for i in 0..=index { + sum += indices[i] as usize; + } + sum +} + +#[inline] +fn binary_search(target: usize, indices: &[u16], values: &[f64]) -> f64 { let mut range = 0..indices.len(); loop { let mut median = (range.end - range.start) >> 1; @@ -61,11 +74,13 @@ fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 { } median += range.start; - if indices[median] == target { + let index = decompress_index(median, indices); + + if index == target { return values[median]; } - if indices[median] > target { + if index > target { range.end = median; } else { range.start = median; @@ -94,7 +109,7 @@ fn main() { const NULL_NON_NULL_RATIO: f64 = 0.02; let non_zero_elements = (VECTOR_SIZE as f64 * NULL_NON_NULL_RATIO) as usize; - let heap_element_size = std::mem::size_of::() + std::mem::size_of::(); + let heap_element_size = std::mem::size_of::() + std::mem::size_of::(); println!("Estimated size on heap: {}", ByteSize::b((non_zero_elements * heap_element_size) as u64)); println!("Size on stack: {} B", std::mem::size_of::());