pr-ferrisgroup/benches/multithreading.rs

//! Benachmarking funcitonality for [Criterion.rs](https://github.com/bheisler/criterion.rs)
//! This benchmark will compare the performance of various thread pools launched with different amounts of
//! maximum threads.
//! Each thread will calculate a partial dot product of two different vectors composed of 1,000,000 64-bit
//! double precision floating point values.

use std::sync::Arc;

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use imsearch::multithreading::ThreadPool;

/// Amount of elements per vector used to calculate the dot product
const VEC_ELEM_COUNT: usize = 1_000_000;
/// Number of threads to test
const THREAD_COUNTS: [usize; 17] = [
    1, 2, 4, 6, 8, 10, 12, 16, 18, 20, 22, 26, 28, 32, 40, 56, 64,
];
/// seeds used to scramble up the values produced by the hash function for each vector
/// these are just some pseudo random numbers
const VEC_SEEDS: [u64; 2] = [0xa3f8347abce16, 0xa273048ca9dea];

/// Compute the dot product of two vectors
/// # Panics
/// this function assumes both vectors to be of exactly the same length.
/// If this is not the case the function will panic.
fn dot(a: &[f64], b: &[f64]) -> f64 {
    let mut sum = 0.0;

    for i in 0..a.len() {
        sum += a[i] * b[i];
    }

    sum
}

/// Computes the dot product using a thread pool with varying number of threads. The vectors will be both splitted into equally
/// sized slices which then get passed ot their own thread to compute the partial dot product. After all threads have
/// finished the partial dot products will be summed to create the final result.
fn dot_parallel(a: Arc<Vec<f64>>, b: Arc<Vec<f64>>, threads: usize) {
    let mut pool = ThreadPool::with_limit(threads);

    // number of elements in each vector for each thread
    let steps = a.len() / threads;

    for i in 0..threads {
        // offset of the first element for the thread local vec
        let chunk = i * steps;
        // create a new strong reference to the vector
        let aa = a.clone();
        let bb = b.clone();
        // launch a new thread
        pool.enqueue(move || {
            let a = &aa[chunk..(chunk + steps)];
            let b = &bb[chunk..(chunk + steps)];
            dot(a, b)
        });
    }

    pool.join_all();

    black_box(pool.get_results().iter().sum::<f64>());
}

/// Compute a simple hash value for the given index value.
/// This function will return a value between [0, 1].
#[inline]
fn hash(x: f64) -> f64 {
    ((x * 234.8743 + 3.8274).sin() * 87624.58376).fract()
}

/// Create a vector filled with `size` elements of 64-bit floating point numbers
/// each initialized with the function `hash` and the given seed. The vector will
/// be filled with values between [0, 1].
fn create_vec(size: usize, seed: u64) -> Arc<Vec<f64>> {
    let mut vec = Vec::with_capacity(size);

    for i in 0..size {
        vec.push(hash(i as f64 + seed as f64));
    }

    Arc::new(vec)
}

/// Function for executing the thread pool benchmarks using criterion.rs.
/// It will create two different vectors and benchmark the single thread performance
/// as well as the multi threadded performance for varying amounts of threads.
pub fn bench_threadpool(c: &mut Criterion) {
    let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);
    let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);

    let mut group = c.benchmark_group("threadpool with various number of threads");

    for threads in THREAD_COUNTS.iter() {
        group.throughput(Throughput::Bytes(*threads as u64));
        group.bench_with_input(BenchmarkId::from_parameter(threads), threads, |b, _| {
            b.iter(|| {
                dot_parallel(vec_a.clone(), vec_b.clone(), *threads);
            });
        });
    }
    group.finish();
}

/// Benchmark the effects of over and underusing a thread pools thread capacity.
/// The thread pool will automatically choose the number of threads to use.
/// We will then run a custom number of jobs with that pool that may be smaller or larger
/// than the amount of threads the pool can offer.
fn pool_overusage(a: Arc<Vec<f64>>, b: Arc<Vec<f64>>, threads: usize) {
    // automatically choose the number of threads
    let mut pool = ThreadPool::new();

    // number of elements in each vector for each thread
    let steps = a.len() / threads;

    for i in 0..threads {
        // offset of the first element for the thread local vec
        let chunk = i * steps;
        // create a new strong reference to the vector
        let aa = a.clone();
        let bb = b.clone();
        // launch a new thread
        pool.enqueue(move || {
            let a = &aa[chunk..(chunk + steps)];
            let b = &bb[chunk..(chunk + steps)];
            dot(a, b)
        });
    }

    pool.join_all();

    black_box(pool.get_results().iter().sum::<f64>());
}

/// Benchmark the effects of over and underusing a thread pools thread capacity.
/// The thread pool will automatically choose the number of threads to use.
/// We will then run a custom number of jobs with that pool that may be smaller or larger
/// than the amount of threads the pool can offer.
pub fn bench_overusage(c: &mut Criterion) {
    let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);
    let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);

    let mut group = c.benchmark_group("threadpool overusage");

    for threads in THREAD_COUNTS.iter() {
        group.throughput(Throughput::Bytes(*threads as u64));
        group.bench_with_input(BenchmarkId::from_parameter(threads), threads, |b, _| {
            b.iter(|| {
                pool_overusage(vec_a.clone(), vec_b.clone(), *threads);
            });
        });
    }
    group.finish();
}

/// Benchmark the performance of a single thread used to calculate the dot product.
pub fn bench_single_threaded(c: &mut Criterion) {
    let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);
    let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);

    c.bench_function("single threaded", |s| {
        s.iter(|| {
            black_box(dot(&vec_a, &vec_b));
        });
    });
}

criterion_group!(
    benches,
    bench_single_threaded,
    bench_threadpool,
    bench_overusage
);
criterion_main!(benches);
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`//! Benachmarking funcitonality for [Criterion.rs](https://github.com/bheisler/criterion.rs)`
			`//! This benchmark will compare the performance of various thread pools launched with different amounts of`
			`//! maximum threads.`
			`//! Each thread will calculate a partial dot product of two different vectors composed of 1,000,000 64-bit`
			`//! double precision floating point values.`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00
finished documetation for thread pool 2023-06-06 15:56:34 +00:00			`use std::sync::Arc;`
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00
			`use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`use imsearch::multithreading::ThreadPool;`

finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`/// Amount of elements per vector used to calculate the dot product`
			`const VEC_ELEM_COUNT: usize = 1_000_000;`
			`/// Number of threads to test`
			`const THREAD_COUNTS: [usize; 17] = [`
			`1, 2, 4, 6, 8, 10, 12, 16, 18, 20, 22, 26, 28, 32, 40, 56, 64,`
			`];`
			`/// seeds used to scramble up the values produced by the hash function for each vector`
			`/// these are just some pseudo random numbers`
			`const VEC_SEEDS: [u64; 2] = [0xa3f8347abce16, 0xa273048ca9dea];`

			`/// Compute the dot product of two vectors`
			`/// # Panics`
			`/// this function assumes both vectors to be of exactly the same length.`
			`/// If this is not the case the function will panic.`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`fn dot(a: &[f64], b: &[f64]) -> f64 {`
			`let mut sum = 0.0;`

			`for i in 0..a.len() {`
			`sum += a[i] * b[i];`
			`}`

			`sum`
			`}`

finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`/// Computes the dot product using a thread pool with varying number of threads. The vectors will be both splitted into equally`
			`/// sized slices which then get passed ot their own thread to compute the partial dot product. After all threads have`
			`/// finished the partial dot products will be summed to create the final result.`
			`fn dot_parallel(a: Arc<Vec<f64>>, b: Arc<Vec<f64>>, threads: usize) {`
finished documetation for thread pool 2023-06-06 15:56:34 +00:00			`let mut pool = ThreadPool::with_limit(threads);`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`// number of elements in each vector for each thread`
			`let steps = a.len() / threads;`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`for i in 0..threads {`
			`// offset of the first element for the thread local vec`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`let chunk = i * steps;`
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`// create a new strong reference to the vector`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`let aa = a.clone();`
			`let bb = b.clone();`
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`// launch a new thread`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`pool.enqueue(move \|\| {`
			`let a = &aa[chunk..(chunk + steps)];`
			`let b = &bb[chunk..(chunk + steps)];`
			`dot(a, b)`
			`});`
			`}`
fixed format in "multithreading" and "lib" 2023-06-06 18:13:07 +00:00
complete rewrite of `multithreading::ThreadPool` removing: - The limitation of Atomics - Multiple Mutexes And added message passing 2023-06-04 20:31:00 +00:00			`pool.join_all();`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00
complete rewrite of `multithreading::ThreadPool` removing: - The limitation of Atomics - Multiple Mutexes And added message passing 2023-06-04 20:31:00 +00:00			`black_box(pool.get_results().iter().sum::<f64>());`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`}`

finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`/// Compute a simple hash value for the given index value.`
			`/// This function will return a value between [0, 1].`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`#[inline]`
			`fn hash(x: f64) -> f64 {`
			`((x * 234.8743 + 3.8274).sin() * 87624.58376).fract()`
			`}`

finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			/// Create a vector filled with `size` elements of 64-bit floating point numbers
			/// each initialized with the function `hash` and the given seed. The vector will
			`/// be filled with values between [0, 1].`
			`fn create_vec(size: usize, seed: u64) -> Arc<Vec<f64>> {`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`let mut vec = Vec::with_capacity(size);`

			`for i in 0..size {`
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`vec.push(hash(i as f64 + seed as f64));`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`}`

			`Arc::new(vec)`
			`}`

finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`/// Function for executing the thread pool benchmarks using criterion.rs.`
			`/// It will create two different vectors and benchmark the single thread performance`
			`/// as well as the multi threadded performance for varying amounts of threads.`
			`pub fn bench_threadpool(c: &mut Criterion) {`
			`let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);`
			`let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);`

			`let mut group = c.benchmark_group("threadpool with various number of threads");`

			`for threads in THREAD_COUNTS.iter() {`
			`group.throughput(Throughput::Bytes(*threads as u64));`
			`group.bench_with_input(BenchmarkId::from_parameter(threads), threads, \|b, _\| {`
			`b.iter(\|\| {`
			`dot_parallel(vec_a.clone(), vec_b.clone(), *threads);`
			`});`
			`});`
			`}`
			`group.finish();`
			`}`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`/// Benchmark the effects of over and underusing a thread pools thread capacity.`
			`/// The thread pool will automatically choose the number of threads to use.`
			`/// We will then run a custom number of jobs with that pool that may be smaller or larger`
			`/// than the amount of threads the pool can offer.`
			`fn pool_overusage(a: Arc<Vec<f64>>, b: Arc<Vec<f64>>, threads: usize) {`
			`// automatically choose the number of threads`
			`let mut pool = ThreadPool::new();`

			`// number of elements in each vector for each thread`
			`let steps = a.len() / threads;`

			`for i in 0..threads {`
			`// offset of the first element for the thread local vec`
			`let chunk = i * steps;`
			`// create a new strong reference to the vector`
			`let aa = a.clone();`
			`let bb = b.clone();`
			`// launch a new thread`
			`pool.enqueue(move \|\| {`
			`let a = &aa[chunk..(chunk + steps)];`
			`let b = &bb[chunk..(chunk + steps)];`
			`dot(a, b)`
			`});`
			`}`

complete rewrite of `multithreading::ThreadPool` removing: - The limitation of Atomics - Multiple Mutexes And added message passing 2023-06-04 20:31:00 +00:00			`pool.join_all();`

			`black_box(pool.get_results().iter().sum::<f64>());`
finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`}`

			`/// Benchmark the effects of over and underusing a thread pools thread capacity.`
			`/// The thread pool will automatically choose the number of threads to use.`
			`/// We will then run a custom number of jobs with that pool that may be smaller or larger`
			`/// than the amount of threads the pool can offer.`
			`pub fn bench_overusage(c: &mut Criterion) {`
			`let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);`
			`let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);`

			`let mut group = c.benchmark_group("threadpool overusage");`

			`for threads in THREAD_COUNTS.iter() {`
			`group.throughput(Throughput::Bytes(*threads as u64));`
			`group.bench_with_input(BenchmarkId::from_parameter(threads), threads, \|b, _\| {`
			`b.iter(\|\| {`
			`pool_overusage(vec_a.clone(), vec_b.clone(), *threads);`
			`});`
			`});`
			`}`
			`group.finish();`
			`}`

			`/// Benchmark the performance of a single thread used to calculate the dot product.`
			`pub fn bench_single_threaded(c: &mut Criterion) {`
			`let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);`
			`let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);`

			`c.bench_function("single threaded", \|s\| {`
			`s.iter(\|\| {`
			`black_box(dot(&vec_a, &vec_b));`
			`});`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`});`
			`}`

finished benchmark for threadpool and fixed documentation for threadpool 2023-05-31 15:09:44 +00:00			`criterion_group!(`
			`benches,`
			`bench_single_threaded,`
			`bench_threadpool,`
			`bench_overusage`
			`);`
added benchmark for `threadpool` using `criterion`. 2023-05-31 07:00:49 +00:00			`criterion_main!(benches);`