added hashmap branch

This commit is contained in:
Sven Vogel 2023-04-30 17:53:23 +02:00
parent 8b685b2128
commit 0bbd3ddc4a
1 changed files with 7 additions and 36 deletions

View File

@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::ops::{Add, Mul, Sub}; use std::ops::{Add, Mul, Sub};
use std::thread; use std::thread;
use std::time::Instant; use std::time::Instant;
@ -12,8 +13,7 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
/// Only stores more efficiently when at least 50% of all elements are zeros /// Only stores more efficiently when at least 50% of all elements are zeros
pub struct SparseVec { pub struct SparseVec {
values: Vec<f64>, data: HashMap<usize, f64>
indices: Vec<usize>,
} }
impl SparseVec { impl SparseVec {
@ -21,9 +21,8 @@ impl SparseVec {
pub fn dot(&self, other: &SparseVec) -> f64 { pub fn dot(&self, other: &SparseVec) -> f64 {
let mut sum = 0.0; let mut sum = 0.0;
for index in 0..other.indices.len() { for (k, v) in self.data.iter() {
// exponential search for an element in the second vector to have the same index sum += v * other.data.get(k).unwrap_or(&0.0);
sum += binary_search(self.indices[index], &other.indices, &other.values) * self.values[index];
} }
sum sum
@ -32,49 +31,21 @@ impl SparseVec {
pub fn new(elements: usize, non_null: f64) -> Self { pub fn new(elements: usize, non_null: f64) -> Self {
let non_zero_elements = (elements as f64 * non_null) as usize; let non_zero_elements = (elements as f64 * non_null) as usize;
let mut values = Vec::with_capacity(non_zero_elements); let mut data = HashMap::with_capacity(non_zero_elements);
let mut indices = Vec::with_capacity(non_zero_elements);
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
for i in 0..non_zero_elements { for i in 0..non_zero_elements {
values.push(0.5);
let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0); let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0);
indices.push(idx as usize); data.insert(idx as usize, 0.5);
} }
Self { Self {
values, data
indices
} }
} }
} }
#[inline]
fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 {
let mut range = 0..indices.len();
loop {
let mut median = (range.end - range.start) >> 1;
if median == 0 {
break;
}
median += range.start;
if indices[median] == target {
return values[median];
}
if indices[median] > target {
range.end = median;
} else {
range.start = median;
}
}
0.0
}
macro_rules! time { macro_rules! time {
($name:literal, $block:expr) => {{ ($name:literal, $block:expr) => {{
let start = Instant::now(); let start = Instant::now();