diff --git a/.idea/Rust-Programming.iml b/.idea/Rust-Programming.iml index a737a93..4779fc9 100644 --- a/.idea/Rust-Programming.iml +++ b/.idea/Rust-Programming.iml @@ -5,8 +5,10 @@ + + diff --git a/sparse_vector/Cargo.toml b/sparse_vector/Cargo.toml new file mode 100644 index 0000000..4803dee --- /dev/null +++ b/sparse_vector/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "sparse_vector" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rand = "0.8.5" +futures = "0.3.28" \ No newline at end of file diff --git a/sparse_vector/src/main.rs b/sparse_vector/src/main.rs new file mode 100644 index 0000000..cc51f0c --- /dev/null +++ b/sparse_vector/src/main.rs @@ -0,0 +1,116 @@ +use std::ops::{Add, Mul}; +use std::thread; +use futures::executor::block_on; +use rand::Rng; +use futures::future::{join_all}; + +/// Only stores more efficiently when at least 50% of all elements are zeros +pub struct SparseVec { + column: Vec<(usize, f32)> +} + +impl SparseVec { + + pub fn dot(&self, other: &SparseVec) -> f32 { + + let future = async move { + let divisions = 128; + + let k = self.column.len() / divisions; + + let mut futures = Vec::new(); + + for i in 0..divisions { + let off = i * k; + futures.push(dot_threaded(&self.column[off..(off + k)], &other.column[..])); + } + + join_all(futures).await + }; + + let result = block_on(future); + + block_on(async move { + let divisions = 16; + + let k = result.len() / divisions; + + let mut futures = Vec::new(); + + for i in 0..divisions { + let off = i * k; + futures.push(sum_async(&result[off..(off + k)])); + } + + join_all(futures).await + }).iter().fold(0.0, |acc, x| acc + x) + } + + pub fn new(elements: usize, null_prop: f32) -> Self { + let non_zero_elements = (elements as f32 * (1.0 - null_prop)) as usize; + + let mut column = Vec::with_capacity(non_zero_elements); + + let mut rng = rand::thread_rng(); + let mut last_idx = 0; + + for _ in 0..non_zero_elements { + last_idx = rng.gen_range(last_idx..elements); + column.push((last_idx, rng.gen_range(0.001..1.0))) + } + + Self { + column + } + } +} + +async fn sum_async(arr: &[f32]) -> f32 { + arr.iter().fold(0.0, |acc, x| acc + x) +} + +async fn dot_threaded(a: &[(usize, f32)], b: &[(usize, f32)]) -> f32 { + let mut sum = 0.0; + + for pair in a.iter() { + + // exponential search for an element in the second vector to have the same index + let mut bound = 1; + loop { + if bound >= b.len() || b[bound].1 >= pair.1 { + break; + } + + bound *= 2; + } + + let mut range = 0..bound; + loop { + let mut median = (range.end - range.start) / 2; + if median == 0 { + break; + } + median += range.start; + + if b[median].0 == pair.0 { + sum += b[median].1 * pair.1; + break; + } + + if b[median].0 > pair.0 { + range.end = median; + } else { + range.start = median; + } + } + } + + sum +} + +fn main() { + // generate a sparse vector with 10^10 random elements + let vec = SparseVec::new(10_000_000_000, 0.99); + + println!("{}", vec.dot(&vec)); +} \ No newline at end of file