fixed memory benchmarking

This commit is contained in:
Sven Vogel 2023-04-30 17:34:23 +02:00
parent 7174c6e423
commit 8b685b2128
1 changed files with 36 additions and 18 deletions

View File

@ -32,15 +32,9 @@ impl SparseVec {
pub fn new(elements: usize, non_null: f64) -> Self { pub fn new(elements: usize, non_null: f64) -> Self {
let non_zero_elements = (elements as f64 * non_null) as usize; let non_zero_elements = (elements as f64 * non_null) as usize;
let heap_element_size = std::mem::size_of::<f64>() + std::mem::size_of::<usize>();
println!("Estimated size on heap: {}", ByteSize::b((non_zero_elements * heap_element_size) as u64));
println!("allocating...");
let mut values = Vec::with_capacity(non_zero_elements); let mut values = Vec::with_capacity(non_zero_elements);
let mut indices = Vec::with_capacity(non_zero_elements); let mut indices = Vec::with_capacity(non_zero_elements);
println!("generating some data...");
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
for i in 0..non_zero_elements { for i in 0..non_zero_elements {
@ -57,8 +51,8 @@ impl SparseVec {
} }
} }
#[inline]
fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 { fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 {
let mut range = 0..indices.len(); let mut range = 0..indices.len();
loop { loop {
let mut median = (range.end - range.start) >> 1; let mut median = (range.end - range.start) >> 1;
@ -69,7 +63,9 @@ fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 {
if indices[median] == target { if indices[median] == target {
return values[median]; return values[median];
} else if indices[median] > target { }
if indices[median] > target {
range.end = median; range.end = median;
} else { } else {
range.start = median; range.start = median;
@ -79,20 +75,42 @@ fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 {
0.0 0.0
} }
fn main() { macro_rules! time {
let now = Instant::now(); ($name:literal, $block:expr) => {{
// generate a sparse vector with 10^10 random elements let start = Instant::now();
// but only with 2% of them being non-null $block;
let vec = SparseVec::new(10_usize.pow(10), 0.02); println!("{} took {}s", $name, start.elapsed().as_secs_f64());
println!("Created sparse vector took: {}s", Instant::now().sub(now).as_secs_f32()); }}
}
println!("Sparse vector stack bytes: {} B", std::mem::size_of_val(&vec)); fn main() {
/// Theoretical size of the vector in elements
/// This would mean the we would require 10 GBs of memory to store a single vector
const VECTOR_SIZE: usize = 10_000_000_000;
/// Ratio between null and non null elements per vector
/// this means that only `NULL_NON_NULL_RATIO * 100%` elements of every vector will
/// be non-null and thus actually stored
const NULL_NON_NULL_RATIO: f64 = 0.02;
let non_zero_elements = (VECTOR_SIZE as f64 * NULL_NON_NULL_RATIO) as usize;
let heap_element_size = std::mem::size_of::<f64>() + std::mem::size_of::<usize>();
println!("Estimated size on heap: {}", ByteSize::b((non_zero_elements * heap_element_size) as u64));
println!("Size on stack: {} B", std::mem::size_of::<SparseVec>());
let mut vec: SparseVec;
time!("Sparse vector creation", {
// generate a vector
vec = SparseVec::new(VECTOR_SIZE, NULL_NON_NULL_RATIO);
});
// many statistics are cached and only updated when the epoch is advanced. // many statistics are cached and only updated when the epoch is advanced.
epoch::advance().unwrap(); epoch::advance().unwrap();
println!("Heap allocated bytes (total): {}", ByteSize::b(stats::allocated::read().unwrap() as u64)); println!("Heap allocated bytes (total): {}", ByteSize::b(stats::allocated::read().unwrap() as u64));
let now = Instant::now(); time!("Sparse vector dot product", {
vec.dot(&vec); vec.dot(&vec);
println!("Dot product took: {}s", Instant::now().sub(now).as_secs_f32()); });
} }