added async dot product for sparse vector
This commit is contained in:
parent
6c4ffeddab
commit
205c973ff6
|
@ -5,8 +5,10 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/duplicates/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/str_sort/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/sparse_vector/src" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/duplicates/target" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/str_sort/target" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/sparse_vector/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
[package]
|
||||
name = "sparse_vector"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rand = "0.8.5"
|
||||
futures = "0.3.28"
|
|
@ -0,0 +1,116 @@
|
|||
use std::ops::{Add, Mul};
|
||||
use std::thread;
|
||||
use futures::executor::block_on;
|
||||
use rand::Rng;
|
||||
use futures::future::{join_all};
|
||||
|
||||
/// Only stores more efficiently when at least 50% of all elements are zeros
|
||||
pub struct SparseVec {
|
||||
column: Vec<(usize, f32)>
|
||||
}
|
||||
|
||||
impl SparseVec {
|
||||
|
||||
pub fn dot(&self, other: &SparseVec) -> f32 {
|
||||
|
||||
let future = async move {
|
||||
let divisions = 128;
|
||||
|
||||
let k = self.column.len() / divisions;
|
||||
|
||||
let mut futures = Vec::new();
|
||||
|
||||
for i in 0..divisions {
|
||||
let off = i * k;
|
||||
futures.push(dot_threaded(&self.column[off..(off + k)], &other.column[..]));
|
||||
}
|
||||
|
||||
join_all(futures).await
|
||||
};
|
||||
|
||||
let result = block_on(future);
|
||||
|
||||
block_on(async move {
|
||||
let divisions = 16;
|
||||
|
||||
let k = result.len() / divisions;
|
||||
|
||||
let mut futures = Vec::new();
|
||||
|
||||
for i in 0..divisions {
|
||||
let off = i * k;
|
||||
futures.push(sum_async(&result[off..(off + k)]));
|
||||
}
|
||||
|
||||
join_all(futures).await
|
||||
}).iter().fold(0.0, |acc, x| acc + x)
|
||||
}
|
||||
|
||||
pub fn new(elements: usize, null_prop: f32) -> Self {
|
||||
let non_zero_elements = (elements as f32 * (1.0 - null_prop)) as usize;
|
||||
|
||||
let mut column = Vec::with_capacity(non_zero_elements);
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut last_idx = 0;
|
||||
|
||||
for _ in 0..non_zero_elements {
|
||||
last_idx = rng.gen_range(last_idx..elements);
|
||||
column.push((last_idx, rng.gen_range(0.001..1.0)))
|
||||
}
|
||||
|
||||
Self {
|
||||
column
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn sum_async(arr: &[f32]) -> f32 {
|
||||
arr.iter().fold(0.0, |acc, x| acc + x)
|
||||
}
|
||||
|
||||
async fn dot_threaded(a: &[(usize, f32)], b: &[(usize, f32)]) -> f32 {
|
||||
let mut sum = 0.0;
|
||||
|
||||
for pair in a.iter() {
|
||||
|
||||
// exponential search for an element in the second vector to have the same index
|
||||
let mut bound = 1;
|
||||
loop {
|
||||
if bound >= b.len() || b[bound].1 >= pair.1 {
|
||||
break;
|
||||
}
|
||||
|
||||
bound *= 2;
|
||||
}
|
||||
|
||||
let mut range = 0..bound;
|
||||
loop {
|
||||
let mut median = (range.end - range.start) / 2;
|
||||
if median == 0 {
|
||||
break;
|
||||
}
|
||||
median += range.start;
|
||||
|
||||
if b[median].0 == pair.0 {
|
||||
sum += b[median].1 * pair.1;
|
||||
break;
|
||||
}
|
||||
|
||||
if b[median].0 > pair.0 {
|
||||
range.end = median;
|
||||
} else {
|
||||
range.start = median;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sum
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// generate a sparse vector with 10^10 random elements
|
||||
let vec = SparseVec::new(10_000_000_000, 0.99);
|
||||
|
||||
println!("{}", vec.dot(&vec));
|
||||
}
|
Loading…
Reference in New Issue