Search index (#47)

* added the search index module

* changed the search function
deleted test feature

* changed the weighted compare for RGBA

Changed the Weighted compare for F32

Changed the Weighted compare for Vec

Changed the Weighted compare for Indices

added Indexed Images and functions

Changed Database that it uses Indexed Images

Added tests

Added the rgb_to_lab function

added pixels function

* added Documentary

* minor changes in Documentary

---------

Co-authored-by: Felix Müller <felixmueller1620@gmail.com>
This commit is contained in:
teridax 2023-06-17 20:14:06 +00:00 committed by GitHub
parent d83c89c8f0
commit 3bb064870d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 838 additions and 151 deletions

View File

@ -1,173 +1,350 @@
//! Benachmarking funcitonality for [Criterion.rs](https://github.com/bheisler/criterion.rs)
//! This benchmark will compare the performance of various thread pools launched with different amounts of
//! maximum threads.
//! Each thread will calculate a partial dot product of two different vectors composed of 1,000,000 64-bit
//! double precision floating point values.
//! This module provides the functionality to create thread pool to execute tasks in parallel.
//! The amount of threads to be used at maximum can be regulated by using `ThreadPool::with_limit`.
//! This implementation is aimed to be of low runtime cost with minimal sychronisation due to blocking.
//! Note that no threads will be spawned until jobs are supplied to be executed. For every supplied job
//! a new thread will be launched until the maximum number is reached. By then every launched thread will
//! be reused to process the remaining elements of the queue. If no jobs are left to be executed
//! all threads will finish and die. This means that if nothing is done, no threads will run in idle in the background.
//! # Example
//! ```rust
//! # use imsearch::multithreading::ThreadPool;
//! # use imsearch::multithreading::Task;
//! let mut pool = ThreadPool::with_limit(2);
//!
//! for i in 0..10 {
//! pool.enqueue(Task::new(i, |i| i));
//! // ^^^^^^ closure or static function
//! }
//!
//! pool.join_all();
//! assert_eq!(pool.get_results().iter().sum::<i32>(), 45);
//! ```
use std::sync::Arc;
use std::{
any::Any,
collections::VecDeque,
num::NonZeroUsize,
sync::{
mpsc::{channel, Receiver, Sender},
Arc, Mutex,
},
thread::{self, JoinHandle},
};
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use imsearch::multithreading::{Task, ThreadPool};
/// Default number if threads to be used in case [`std::thread::available_parallelism`] fails.
pub const DEFAULT_THREAD_POOL_SIZE: usize = 1;
/// Amount of elements per vector used to calculate the dot product
const VEC_ELEM_COUNT: usize = 1_000_000;
/// Number of threads to test
const THREAD_COUNTS: [usize; 17] = [
1, 2, 4, 6, 8, 10, 12, 16, 18, 20, 22, 26, 28, 32, 40, 56, 64,
/// Indicates the priority level of functions or closures which get supplied to the pool.
/// Use [`Priority::High`] to ensure the closue to be executed before all closures that are already supplied
/// Use [`Priority::Low`] to ensure the closue to be executed after all closures that are already supplied
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Priority {
/// Indicate that the closure or function supplied to the thread
/// has higher priority than any other given to the pool until now.
/// The item will get enqueued at the start of the waiting-queue.
High,
/// Indicate that the closure or function supplied to the thread pool
/// has lower priority than the already supplied ones in this pool.
/// The item will get enqueued at the end of the waiting-queue.
Low,
}
/// Traits a return value has to implement when being given back by a function or closure.
pub trait Sendable: Any + Send + 'static {}
impl<T> Sendable for T where T: Any + Send + 'static {}
/// A task that will be executed at some point in the future by the thread pool
/// At the heart of this struct is the function to be executed. This may be a closure.
#[derive(Debug, Copy, Clone)]
pub struct Task<I, T>
where
I: Sendable,
T: Sendable,
{
job: fn(I) -> T,
param: I,
}
impl<I, T> Task<I, T>
where
I: Sendable,
T: Sendable,
{
pub fn new(param: I, job: fn(I) -> T) -> Self {
Self { job, param }
}
}
/// Thread pool which can be used to execute functions or closures in parallel.
/// The amount of threads to be used at maximum can be regulated by using `ThreadPool::with_limit`.
/// This implementation is aimed to be of low runtime cost with minimal sychronisation due to blocking.
/// Note that no threads will be spawned until jobs are supplied to be executed. For every supplied job
/// a new thread will be launched until the maximum number is reached. By then every launched thread will
/// be reused to process the remaining elements of the queue. If no jobs are left to be executed
/// all threads will finish and die. This means that if nothing is done, no threads will run in idle in the background.
/// # Example
/// ```rust
/// # use imsearch::multithreading::ThreadPool;
/// # use imsearch::multithreading::Task;
/// let mut pool = ThreadPool::with_limit(2);
///
/// for i in 0..10 {
/// pool.enqueue(Task::new(i, |i| i));
/// }
///
/// pool.join_all();
/// assert_eq!(pool.get_results().iter().sum::<i32>(), 45);
/// ```
/// # Drop
/// This struct implements the `Drop` trait. Upon being dropped the pool will wait for all threads
/// to finsish. This may take up an arbitrary amount of time.
/// # Panics in the thread
/// When a function or closure panics, the executing thread will detect the unwind performed by `panic` causing the
/// thread to print a message on stderr. The thread itself captures panics and won't terminate execution but continue with
/// the next task in the queue.
/// Its not recommend to use this pool with custom panic hooks or special functions which abort the process.
/// Also panicking code from external program written in C++ or others in undefinied behavior according to [`std::panic::catch_unwind`]
#[derive(Debug)]
pub struct ThreadPool<I, T>
where
I: Sendable,
T: Sendable,
{
/// queue for storing the jobs to be executed
queue: Arc<Mutex<VecDeque<Task<I, T>>>>,
/// handles for all threads currently running and processing jobs
handles: Vec<JoinHandle<()>>,
/// reciver end for channel based communication between threads
receiver: Receiver<T>,
/// sender end for channel based communication between threads
sender: Sender<T>,
/// maximum amount of threads to be used in parallel
limit: NonZeroUsize,
}
impl<I, T> Default for ThreadPool<I, T>
where
I: Sendable,
T: Sendable,
{
fn default() -> Self {
let (sender, receiver) = channel::<T>();
// determine default thread count to use based on the system
let default =
NonZeroUsize::new(DEFAULT_THREAD_POOL_SIZE).expect("Thread limit must be non-zero");
let limit = thread::available_parallelism().unwrap_or(default);
Self {
queue: Arc::new(Mutex::new(VecDeque::new())),
handles: Vec::new(),
receiver,
sender,
limit,
}
}
}
impl<I, T> ThreadPool<I, T>
where
I: Sendable,
T: Sendable,
{
/// Creates a new thread pool with default thread count determined by either
/// [`std::thread::available_parallelism`] or [`DEFAULT_THREAD_POOL_SIZE`] in case it fails.
/// No threads will be lauched until jobs are enqueued.
pub fn new() -> Self {
Default::default()
}
/// Creates a new thread pool with the given thread count. The pool will continue to launch new threads even if
/// the system does not allow for that count of parallelism.
/// No threads will be lauched until jobs are enqueued.
/// # Panic
/// This function will fails if `max_threads` is zero.
pub fn with_limit(max_threads: usize) -> Self {
let (sender, receiver) = channel::<T>();
Self {
limit: NonZeroUsize::new(max_threads).expect("Thread limit must be non-zero"),
queue: Arc::new(Mutex::new(VecDeque::new())),
handles: Vec::new(),
sender,
receiver,
}
}
/// Put a new job into the queue to be executed by a thread in the future.
/// The priority of the job will determine if the job will be put at the start or end of the queue.
/// See [`crate::multithreading::Priority`].
/// This function will create a new thread if the maximum number of threads in not reached.
/// In case the maximum number of threads is already used, the job is stalled and will get executed
/// when a thread is ready and its at the start of the queue.
pub fn enqueue_priorize(&mut self, func: Task<I, T>, priority: Priority) {
// put job into queue
let mut queue = self.queue.lock().unwrap();
// insert new job into queue depending on its priority
match priority {
Priority::High => queue.push_front(func),
Priority::Low => queue.push_back(func),
}
if self.handles.len() < self.limit.get() {
// we can still launch threads to run in parallel
// clone the sender
let tx = self.sender.clone();
let queue = self.queue.clone();
self.handles.push(thread::spawn(move || {
while let Some(task) = queue.lock().unwrap().pop_front() {
tx.send((task.job)(task.param))
.expect("unable to send result over channel");
}
}));
}
self.handles.retain(|h| !h.is_finished());
}
/// Put a new job into the queue to be executed by a thread in the future.
/// The priority of the job is automatically set to [`crate::multithreading::Priority::Low`].
/// This function will create a new thread if the maximum number of threads in not reached.
/// In case the maximum number of threads is already used, the job is stalled and will get executed
/// when a thread is ready and its at the start of the queue.
pub fn enqueue(&mut self, func: Task<I, T>) {
self.enqueue_priorize(func, Priority::Low);
}
/// Wait for all threads to finish executing. This means that by the time all threads have finished
/// every task will have been executed too. In other words the threads finsish when the queue of jobs is empty.
/// This function will block the caller thread.
pub fn join_all(&mut self) {
while let Some(handle) = self.handles.pop() {
handle.join().unwrap();
}
}
/// Sendables all results that have been Sendableed by the threads until now
/// and haven't been consumed yet.
/// All results retrieved from this call won't be Sendableed on a second call.
/// This function is non blocking.
pub fn try_get_results(&mut self) -> Vec<T> {
self.receiver.try_iter().collect()
}
/// Sendables all results that have been returned by the threads until now
/// and haven't been consumed yet. The function will also wait for all threads to finish executing (empty the queue).
/// All results retrieved from this call won't be returned on a second call.
/// This function will block the caller thread.
pub fn get_results(&mut self) -> Vec<T> {
self.join_all();
self.try_get_results()
}
}
impl<I, T> Drop for ThreadPool<I, T>
where
I: Sendable,
T: Sendable,
{
fn drop(&mut self) {
self.join_all();
}
}
#[cfg(test)]
mod test {
use std::panic::UnwindSafe;
use super::*;
#[test]
fn test_default() {
let mut pool = ThreadPool::default();
for i in 0..10 {
pool.enqueue_priorize(Task::new(i, |i| i), Priority::High);
}
pool.join_all();
assert_eq!(pool.try_get_results().iter().sum::<i32>(), 45);
}
#[test]
fn test_limit() {
let mut pool = ThreadPool::with_limit(2);
for i in 0..10 {
pool.enqueue(Task::new(i, |i| i));
}
assert_eq!(pool.handles.len(), 2);
assert_eq!(pool.limit.get(), 2);
pool.join_all();
assert_eq!(pool.get_results().iter().sum::<i32>(), 45);
}
trait Object: Send + UnwindSafe {
fn get(&mut self) -> i32;
}
#[derive(Default)]
struct Test1 {
_int: i32,
}
impl Object for Test1 {
fn get(&mut self) -> i32 {
0
}
}
#[derive(Default)]
struct Test2 {
_c: char,
}
impl Object for Test2 {
fn get(&mut self) -> i32 {
0
}
}
#[derive(Default)]
struct Test3 {
_s: String,
}
impl Object for Test3 {
fn get(&mut self) -> i32 {
0
}
}
#[test]
fn test_1() {
let mut pool = ThreadPool::with_limit(2);
let feats: Vec<Box<dyn Object>> = vec![
Box::new(Test1::default()),
Box::new(Test2::default()),
Box::new(Test3::default()),
];
/// seeds used to scramble up the values produced by the hash function for each vector
/// these are just some pseudo random numbers
const VEC_SEEDS: [u64; 2] = [0xa3f8347abce16, 0xa273048ca9dea];
/// Compute the dot product of two vectors
/// # Panics
/// this function assumes both vectors to be of exactly the same length.
/// If this is not the case the function will panic.
fn dot(a: &[f64], b: &[f64]) -> f64 {
let mut sum = 0.0;
for i in 0..a.len() {
sum += a[i] * b[i];
}
sum
}
/// Computes the dot product using a thread pool with varying number of threads. The vectors will be both splitted into equally
/// sized slices which then get passed ot their own thread to compute the partial dot product. After all threads have
/// finished the partial dot products will be summed to create the final result.
fn dot_parallel(a: Arc<Vec<f64>>, b: Arc<Vec<f64>>, threads: usize) {
let mut pool = ThreadPool::with_limit(threads);
// number of elements in each vector for each thread
let steps = a.len() / threads;
for i in 0..threads {
// offset of the first element for the thread local vec
let chunk = i * steps;
// launch a new thread
pool.enqueue(Task::new(
(chunk, steps, a.clone(), b.clone()),
|(block, inc, a, b)| {
let a = &a[block..(block + inc)];
let b = &b[block..(block + inc)];
dot(a, b)
},
));
for feat in feats {
pool.enqueue(Task::new(feat, |mut i| {
let _ = i.get();
i
}));
}
pool.join_all();
black_box(pool.get_results().iter().sum::<f64>());
let _feats: Vec<Box<dyn Object>> = pool.get_results();
}
/// Compute a simple hash value for the given index value.
/// This function will return a value between [0, 1].
#[inline]
fn hash(x: f64) -> f64 {
((x * 234.8743 + 3.8274).sin() * 87624.58376).fract()
}
/// Create a vector filled with `size` elements of 64-bit floating point numbers
/// each initialized with the function `hash` and the given seed. The vector will
/// be filled with values between [0, 1].
fn create_vec(size: usize, seed: u64) -> Arc<Vec<f64>> {
let mut vec = Vec::with_capacity(size);
for i in 0..size {
vec.push(hash(i as f64 + seed as f64));
}
Arc::new(vec)
}
/// Function for executing the thread pool benchmarks using criterion.rs.
/// It will create two different vectors and benchmark the single thread performance
/// as well as the multi threadded performance for varying amounts of threads.
pub fn bench_threadpool(c: &mut Criterion) {
let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);
let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);
let mut group = c.benchmark_group("threadpool with various number of threads");
for threads in THREAD_COUNTS.iter() {
group.throughput(Throughput::Bytes(*threads as u64));
group.bench_with_input(BenchmarkId::from_parameter(threads), threads, |b, _| {
b.iter(|| {
dot_parallel(vec_a.clone(), vec_b.clone(), *threads);
});
});
}
group.finish();
}
/// Benchmark the effects of over and underusing a thread pools thread capacity.
/// The thread pool will automatically choose the number of threads to use.
/// We will then run a custom number of jobs with that pool that may be smaller or larger
/// than the amount of threads the pool can offer.
fn pool_overusage(a: Arc<Vec<f64>>, b: Arc<Vec<f64>>, threads: usize) {
// automatically choose the number of threads
let mut pool = ThreadPool::new();
// number of elements in each vector for each thread
let steps = a.len() / threads;
for i in 0..threads {
// offset of the first element for the thread local vec
let chunk = i * steps;
// launch a new thread
pool.enqueue(Task::new(
(chunk, steps, a.clone(), b.clone()),
|(block, inc, a, b)| {
let a = &a[block..(block + inc)];
let b = &b[block..(block + inc)];
dot(a, b)
},
));
}
pool.join_all();
black_box(pool.get_results().iter().sum::<f64>());
}
/// Benchmark the effects of over and underusing a thread pools thread capacity.
/// The thread pool will automatically choose the number of threads to use.
/// We will then run a custom number of jobs with that pool that may be smaller or larger
/// than the amount of threads the pool can offer.
pub fn bench_overusage(c: &mut Criterion) {
let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);
let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);
let mut group = c.benchmark_group("threadpool overusage");
for threads in THREAD_COUNTS.iter() {
group.throughput(Throughput::Bytes(*threads as u64));
group.bench_with_input(BenchmarkId::from_parameter(threads), threads, |b, _| {
b.iter(|| {
pool_overusage(vec_a.clone(), vec_b.clone(), *threads);
});
});
}
group.finish();
}
/// Benchmark the performance of a single thread used to calculate the dot product.
pub fn bench_single_threaded(c: &mut Criterion) {
let vec_a = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[0]);
let vec_b = create_vec(VEC_ELEM_COUNT, VEC_SEEDS[1]);
c.bench_function("single threaded", |s| {
s.iter(|| {
black_box(dot(&vec_a, &vec_b));
});
});
}
criterion_group!(
benches,
bench_single_threaded,
bench_threadpool,
bench_overusage
);
criterion_main!(benches);

View File

@ -87,6 +87,10 @@ where
pub fn pixel(&self, index: usize) -> (T, T, T, T) {
*self.index(index)
}
/// Returns all pixel of the image
pub fn pixels(&self, index: usize) -> &Vec<(T, T, T, T)> {
&self.pixels
}
/// Returns the path of the image
pub fn path(&self) -> &PathBuf {
&self.path

View File

@ -3,6 +3,7 @@ extern crate core;
pub mod image;
pub mod image_loader;
pub mod multithreading;
pub mod search_index;
pub fn add(left: usize, right: usize) -> usize {
left + right

505
src/search_index/mod.rs Normal file
View File

@ -0,0 +1,505 @@
//!
//! This module provides the Database for Images and compare methods to search in it.
//!
//! The database Struct provides the Images and has a threadpool to efficiently process all given features for all Images
//!
//!
//! to generate a database you need a vector of paths of picture that you want to save and search in it.
//! You also need a Vector of Feature generator functions that generates the feature of every image
//!
//!
//!```
//! # use std::path::{Path, PathBuf};
//! # use imsearch::image::Image;
//! # use imsearch::search_index;
//! use imsearch::search_index::FeatureGenerator;
//!
//! let path: Vec<PathBuf> = Vec::new();
//! let features: Vec<FeatureGenerator> = Vec::new();
//!
//! let mut database = search_index::Database::new(&path, features );
//! database.add_image(Path::new("testpath"));
//! ```
//!
//!
//!This Library provides some Feature generator functions but you can also create your own.
//!The Feature generator has to fit in the "FeatureGenerator" type to work with the database.
//!
//!
use crate::image::Image;
use crate::multithreading::{Task, ThreadPool};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::default::Default;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
///this trait provides a function to compare objects and returns a f32 between 0 and 1.
/// 1 is identical and 0 is different. with this trait you get the similarity between the objects
trait WeightedCmp {
fn weighted(&self, other: &Self) -> f32;
}
/// Every feature returns a known and sized type from this enum
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FeatureResult {
/// A boolean. Just a boolean
Bool(bool),
/// Signed 32-bit integer
I32(i32),
/// 32-bit single precision floating point
/// can be used for aspect ratio or luminance
F32(f32),
/// Vector for nested multidimensional
Vec(Vec<FeatureResult>),
/// Standard RGBA color
Rgba(f32, f32, f32, f32),
/// Indices intended for the usage in histograms
Indices(Vec<u64>),
///A Character :)
Char(char),
///A String ;)
String(String),
///a f32 between 0 and 1 where 1 is 100% and 0 is 0%
Percent(f32),
}
impl Default for FeatureResult {
fn default() -> Self {
FeatureResult::Bool(false)
}
}
/// For some feature return type we want to implement a custom compare function
impl PartialEq for FeatureResult {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Bool(l0), Self::Bool(r0)) => l0 == r0,
(Self::I32(l0), Self::I32(r0)) => l0 == r0,
(Self::F32(l0), Self::F32(r0)) => l0 == r0,
(Self::Vec(l0), Self::Vec(r0)) => l0 == r0,
(Self::Rgba(l0, l1, l2, l3), Self::Rgba(r0, r1, r2, r3)) => {
l0 == r0 && l1 == r1 && l2 == r2 && l3 == r3
}
(Self::Indices(l), Self::Indices(r)) => l == r,
(Self::Char(l0), Self::Char(r0)) => l0 == r0,
(Self::String(l), Self::String(r)) => l == r,
(Self::Percent(l0), Self::Percent(r0)) => l0 == r0,
_ => false,
}
}
}
///in this trait we compare the types to get the similarity between them where 1 is identical and 0 is completly different
///
/// the Vec type compares each member recursive.
/// the Rgba type returns the Delta E similarity of the Colors
/// the Indices type is compared with the cosines similarity
/// the Percent type returns the 1 - difference
///
///
impl WeightedCmp for FeatureResult {
fn weighted(&self, other: &Self) -> f32 {
match (self, other) {
(Self::Bool(l0), Self::Bool(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::I32(l0), Self::I32(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::F32(l0), Self::F32(r0)) => {
if (l0 - r0).abs() < 1e-4 {
1.
} else {
0.
}
}
(Self::Vec(l), Self::Vec(r)) => {
if l.len() == r.len() {
let mut b: f32 = 0.;
for a in l.iter().enumerate() {
b += a.1.weighted(&r[a.0]);
}
b / l.len() as f32
} else {
0.
}
}
(Self::Rgba(l0, l1, l2, _), Self::Rgba(r0, r1, r2, _)) => {
let lableft = rgb_to_lab(vec![*l0, *l1, *l2]);
let labright = rgb_to_lab(vec![*r0, *r1, *r2]);
let mut result = ((lableft[0] - labright[0]) * (lableft[0] - labright[0])
+ (lableft[1] - labright[1]) * (lableft[1] - labright[1])
+ (lableft[2] - labright[2]) * (lableft[2] - labright[2]))
.sqrt(); //euclidian distance between two colors: Delta E
if result > 100. {
result = 0.;
} else {
result = 1. - result / 100.;
}
result
}
(Self::Indices(l), Self::Indices(r)) => {
let mut up = 0_u64;
let mut left = 0_u64;
let mut right = 0_u64;
for (a, b) in l.iter().zip(r.iter()).map(|(a, b)| (a, b)) {
left += a * a;
right += b * b;
up += a * b;
}
let mut result = up as f32 / ((left * right) as f32).sqrt(); //cosines similarity
if result.is_nan() {
if left == right {
result = 1.;
} else {
result = 0.
}
}
result
}
(Self::Char(l0), Self::Char(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::String(l0), Self::String(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::Percent(l0), Self::Percent(r0)) => 1. - (l0 - r0).abs(),
_ => 0.,
}
}
}
///this function transforms rgb values to lab values
fn rgb_to_lab(rgb: Vec<f32>) -> [f32; 3] {
let r = rgb[0] / 255.0;
let g = rgb[1] / 255.0;
let b = rgb[2] / 255.0;
let r = if r > 0.04045 {
((r + 0.055) / 1.055).powf(2.4)
} else {
r / 12.92
};
let g = if g > 0.04045 {
((g + 0.055) / 1.055).powf(2.4)
} else {
g / 12.92
};
let b = if b > 0.04045 {
((b + 0.055) / 1.055).powf(2.4)
} else {
b / 12.92
};
let x = r * 0.4124 + g * 0.3576 + b * 0.1805;
let y = r * 0.2126 + g * 0.7152 + b * 0.0722;
let z = r * 0.0193 + g * 0.1192 + b * 0.9505;
let x = x / 0.95047;
let y = y / 1.0;
let z = z / 1.08883;
let x = if x > 0.008856 {
x.powf(1.0 / 3.0)
} else {
(7.787 * x) + (16.0 / 116.0)
};
let y = if y > 0.008856 {
y.powf(1.0 / 3.0)
} else {
(7.787 * y) + (16.0 / 116.0)
};
let z = if z > 0.008856 {
z.powf(1.0 / 3.0)
} else {
(7.787 * z) + (16.0 / 116.0)
};
let l = (116.0 * y) - 16.0;
let a = 500.0 * (x - y);
let b = 200.0 * (y - z);
[l, a, b]
}
pub type FeatureGenerator = fn(Arc<Image<f32>>) -> (String, FeatureResult);
///The Database stores the images with the feature generators.
///It also stores the threadpool
///the images of the Database can get serialized using Serde_Json. the complete Database cant get serialized
#[derive(Default)]
pub struct Database {
images: IndexedImages,
/// keep feature generator for the case when we add a new image
/// this field is not serialized and needs to be wrapped in an option
generators: Vec<FeatureGenerator>,
threadpool: ThreadPool<Arc<Image<f32>>, (String, FeatureResult)>,
}
impl Database {
///This function search the Database after the Similarity to a given Image in a specific feature.
/// It returns a Vector of all images and a f32 value which represents the Similarity in percent.
///
pub fn search(&self, imagepath: &Path, feature: FeatureGenerator) -> Vec<(PathBuf, f32)> {
self.images.search(imagepath, feature)
}
///the new function generates a new Database out of a vector of the Paths of the Images and a Vector of features
pub fn new(imagepaths: &Vec<PathBuf>, features: Vec<FeatureGenerator>) -> Self {
let mut threadpool = ThreadPool::new();
Self {
images: IndexedImages::new(imagepaths, &features, &mut threadpool),
generators: features,
threadpool,
}
}
/// with add_image you can add images in a existing database.
/// databases from a file are read only.
pub fn add_image(&mut self, path: &Path) {
if !self.generators.is_empty() {
self.images
.add_image(path, &self.generators, &mut self.threadpool)
} else {
panic!("database without generator functions is immutable")
}
}
/// with from_file you can generate a Database out of a given path to a serialized database
pub fn from_file(path: &Path) -> Self {
let filestring = fs::read_to_string(path).expect("can't read that file");
let images = serde_json::from_str::<IndexedImages>(&filestring)
.expect("unable to deserialize the file");
Self {
images,
generators: Vec::new(),
threadpool: ThreadPool::new(),
}
}
}
///IndexedImages stores the images of the Database and is serializable
#[derive(Serialize, Deserialize, Default, PartialEq, Debug)]
struct IndexedImages {
images: HashMap<PathBuf, HashMap<String, FeatureResult>>,
}
impl IndexedImages {
///the new function generates all images and generates every feature so it can store these.
fn new(
imagepaths: &Vec<PathBuf>,
features: &[FeatureGenerator],
threadpool: &mut ThreadPool<Arc<Image<f32>>, (String, FeatureResult)>,
) -> Self {
let mut images_with_feats = HashMap::new();
for path in imagepaths {
let image: Arc<Image<f32>> = Arc::new(Image::default()); //todo!("Image reader function")
let mut feats = HashMap::new();
for generator in features.iter() {
threadpool.enqueue(Task::new(image.clone(), *generator));
}
let vec = threadpool.get_results();
for (name, result) in vec {
feats.insert(name, result);
}
images_with_feats.insert(image.path().clone(), feats);
}
Self {
images: images_with_feats,
}
}
///This function search the Database after the Similarity to a given Image in a specific feature.
/// It returns a Vector of all images and a f32 value which represents the Similarity in percent.
///
fn search(&self, imagepath: &Path, feature: FeatureGenerator) -> Vec<(PathBuf, f32)> {
let image: Arc<Image<f32>> = Arc::new(Image::default()); //todo!("Image reader function")
let search_feat = feature(image);
let mut result: Vec<(PathBuf, f32)> = Vec::new();
for image in &self.images {
for feat in image.1 {
if search_feat.0 == *feat.0 {
result.push((image.0.clone(), search_feat.1.weighted(feat.1)));
}
}
}
result
}
///this function lets you add images to the Indexed Image struct
fn add_image(
&mut self,
path: &Path,
generator: &Vec<FeatureGenerator>,
threadpool: &mut ThreadPool<Arc<Image<f32>>, (String, FeatureResult)>,
) {
let image: Arc<Image<f32>> = Arc::new(Image::default()); //todo!("Image reader function")
let mut feats = HashMap::new();
for gen in generator {
threadpool.enqueue(Task::new(image.clone(), *gen));
}
let vec = threadpool.get_results();
for (name, result) in vec {
feats.insert(name, result);
}
self.images.insert(image.path().clone(), feats);
}
}
/// example feature implementation
#[allow(dead_code)]
fn average_luminance(image: Arc<Image<f32>>) -> (String, FeatureResult) {
(String::from("average-brightness"), FeatureResult::F32(0.0))
}
#[cfg(test)]
mod tests {
use super::*;
///this function tests the Serialization of the Database
#[test]
fn conversion() {
let mut images: HashMap<PathBuf, HashMap<String, FeatureResult>> = HashMap::new();
let mut feat: HashMap<String, FeatureResult> = HashMap::new();
feat.insert(String::from("average-brightness"), FeatureResult::F32(0.0));
images.insert(PathBuf::new(), feat);
let data = IndexedImages { images };
let _as_json = serde_json::to_string(&data).expect("couldnt convert");
println!("{:?}", _as_json);
let data_after_conversion =
serde_json::from_str::<IndexedImages>(&_as_json).expect("couldnt convert from string");
assert_eq!(data, data_after_conversion);
}
///this function tests Edgecases for the cosine_similarity in the weightet function
#[test]
fn cosine_similarity() {
let vec1 = FeatureResult::Indices(vec![1, 3, 4]);
let vec2 = FeatureResult::Indices(vec![1, 3, 4]);
assert_eq!(1., vec1.weighted(&vec2)); // both are identical
let vec2 = FeatureResult::Indices(vec![0, 0, 0]);
assert_eq!(0., vec1.weighted(&vec2)); // one is 0
let vec1 = FeatureResult::Indices(vec![0, 0, 0]);
assert_eq!(1., vec1.weighted(&vec2)); // both are 0
assert_eq!(1., vec2.weighted(&vec1)); // it shouldn't change if the Values are switched
let vec1 = FeatureResult::Indices(vec![7, 3, 4]);
let vec2 = FeatureResult::Indices(vec![1, 5, 2]);
assert_eq!(vec1.weighted(&vec2), vec2.weighted(&vec1));
println!("{:?}", vec1.weighted(&vec2));
let mut vec1 = vec![5; 9999];
vec1.push(1);
let vec1 = FeatureResult::Indices(vec1);
let vec2 = FeatureResult::Indices(vec![7; 10000]);
println!("{:?}", vec1.weighted(&vec2));
}
///this function tests all of the weighted function
#[test]
fn weighted() {
let vec1 = FeatureResult::Vec(vec![
FeatureResult::Bool(true),
FeatureResult::Char('c'),
FeatureResult::Vec(vec![FeatureResult::Percent(0.5)]),
FeatureResult::F32(44.543),
]);
let vec2 = FeatureResult::Vec(vec![
FeatureResult::Bool(true),
FeatureResult::Char('c'),
FeatureResult::Vec(vec![FeatureResult::Percent(0.5)]),
FeatureResult::F32(44.543),
]);
assert_eq!(1., vec2.weighted(&vec1));
let vec2 = FeatureResult::Vec(vec![
FeatureResult::Bool(true),
FeatureResult::Char('c'),
FeatureResult::F32(44.543),
FeatureResult::Vec(vec![FeatureResult::Percent(0.5)]),
]);
assert_eq!(0.5, vec2.weighted(&vec1));
println!("{:?}", vec1.weighted(&vec2));
let value1 = FeatureResult::F32(44.543);
let value2 = FeatureResult::F32(44.543);
assert_eq!(1., value1.weighted(&value2));
let value1 = FeatureResult::Bool(true);
let value2 = FeatureResult::Bool(false);
assert_eq!(0., value1.weighted(&value2));
let value1 = FeatureResult::String(String::from("Testing"));
let value2 = FeatureResult::String(String::from("notTesting"));
assert_eq!(0., value1.weighted(&value2));
let value2 = FeatureResult::String(String::from("Testing"));
assert_eq!(1., value1.weighted(&value2));
}
///this test is for the rgba values in the weighted function
#[test]
fn weighted_rgba() {
let value1 = FeatureResult::Rgba(32.6754, 42.432, 43.87, 255.);
let value2 = FeatureResult::Rgba(32.6754, 42.432, 43.87, 255.);
assert_eq!(1., value1.weighted(&value2));
let value1 = FeatureResult::Rgba(255., 255., 0., 255.);
let value2 = FeatureResult::Rgba(0., 0., 0., 255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("Yellow to Black: {:?}", value1.weighted(&value2));
let value1 = FeatureResult::Rgba(255., 255., 0., 255.);
let value2 = FeatureResult::Rgba(200., 255., 55., 255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("yellow to light green: {:?}", value1.weighted(&value2));
let value1 = FeatureResult::Rgba(3., 8., 255., 255.);
let value2 = FeatureResult::Rgba(3., 106., 255., 255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("blue to dark blue: {:?}", value1.weighted(&value2));
let value1 = FeatureResult::Rgba(255., 106., 122., 255.);
let value2 = FeatureResult::Rgba(255., 1., 28., 255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("Red to light red: {:?}", value1.weighted(&value2));
}
}