added btreemap as storage
This commit is contained in:
parent
8b685b2128
commit
968959ade1
|
@ -1,10 +1,7 @@
|
||||||
use std::ops::{Add, Mul, Sub};
|
use std::collections::BTreeMap;
|
||||||
use std::thread;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use bytesize::ByteSize;
|
use bytesize::ByteSize;
|
||||||
use futures::executor::block_on;
|
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use futures::future::{join_all};
|
|
||||||
use jemalloc_ctl::{stats, epoch};
|
use jemalloc_ctl::{stats, epoch};
|
||||||
|
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
|
@ -12,8 +9,7 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
/// Only stores more efficiently when at least 50% of all elements are zeros
|
/// Only stores more efficiently when at least 50% of all elements are zeros
|
||||||
pub struct SparseVec {
|
pub struct SparseVec {
|
||||||
values: Vec<f64>,
|
map: BTreeMap<usize, f64>
|
||||||
indices: Vec<usize>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SparseVec {
|
impl SparseVec {
|
||||||
|
@ -21,9 +17,8 @@ impl SparseVec {
|
||||||
pub fn dot(&self, other: &SparseVec) -> f64 {
|
pub fn dot(&self, other: &SparseVec) -> f64 {
|
||||||
let mut sum = 0.0;
|
let mut sum = 0.0;
|
||||||
|
|
||||||
for index in 0..other.indices.len() {
|
for (k, v) in self.map.iter() {
|
||||||
// exponential search for an element in the second vector to have the same index
|
sum += v * other.map.get(k).unwrap_or(&0.0);
|
||||||
sum += binary_search(self.indices[index], &other.indices, &other.values) * self.values[index];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sum
|
sum
|
||||||
|
@ -32,49 +27,22 @@ impl SparseVec {
|
||||||
pub fn new(elements: usize, non_null: f64) -> Self {
|
pub fn new(elements: usize, non_null: f64) -> Self {
|
||||||
let non_zero_elements = (elements as f64 * non_null) as usize;
|
let non_zero_elements = (elements as f64 * non_null) as usize;
|
||||||
|
|
||||||
let mut values = Vec::with_capacity(non_zero_elements);
|
let mut map = BTreeMap::new();
|
||||||
let mut indices = Vec::with_capacity(non_zero_elements);
|
|
||||||
|
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
for i in 0..non_zero_elements {
|
for i in 0..non_zero_elements {
|
||||||
values.push(0.5);
|
|
||||||
|
|
||||||
let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0);
|
let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0);
|
||||||
indices.push(idx as usize);
|
|
||||||
|
map.insert(idx as usize, 0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
values,
|
map
|
||||||
indices
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 {
|
|
||||||
let mut range = 0..indices.len();
|
|
||||||
loop {
|
|
||||||
let mut median = (range.end - range.start) >> 1;
|
|
||||||
if median == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
median += range.start;
|
|
||||||
|
|
||||||
if indices[median] == target {
|
|
||||||
return values[median];
|
|
||||||
}
|
|
||||||
|
|
||||||
if indices[median] > target {
|
|
||||||
range.end = median;
|
|
||||||
} else {
|
|
||||||
range.start = median;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! time {
|
macro_rules! time {
|
||||||
($name:literal, $block:expr) => {{
|
($name:literal, $block:expr) => {{
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
@ -99,7 +67,7 @@ fn main() {
|
||||||
println!("Estimated size on heap: {}", ByteSize::b((non_zero_elements * heap_element_size) as u64));
|
println!("Estimated size on heap: {}", ByteSize::b((non_zero_elements * heap_element_size) as u64));
|
||||||
println!("Size on stack: {} B", std::mem::size_of::<SparseVec>());
|
println!("Size on stack: {} B", std::mem::size_of::<SparseVec>());
|
||||||
|
|
||||||
let mut vec: SparseVec;
|
let vec: SparseVec;
|
||||||
|
|
||||||
time!("Sparse vector creation", {
|
time!("Sparse vector creation", {
|
||||||
// generate a vector
|
// generate a vector
|
||||||
|
|
Loading…
Reference in New Issue