pr-ferrisgroup/src/search_index/mod.rs

438 lines
14 KiB
Rust

use crate::image::Image;
use crate::multithreading::{Task, ThreadPool};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::default::Default;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
trait WeightedCmp {
fn weighted(&self, other: &Self) -> f32;
}
/// Every feature returns a known and sized type
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FeatureResult {
/// A boolean. Just a boolean
Bool(bool),
/// Signed 32-bit integer
I32(i32),
/// 32-bit single precision floating point
/// can be used for aspect ratio or luminance
F32(f32),
/// Vector for nested multidimensional
Vec(Vec<FeatureResult>),
/// Standard RGBA color
Rgba(f32, f32, f32, f32),
/// Indices intended for the usage in histograms
Indices(Vec<u64>),
///A Character :)
Char(char),
///A String ;)
String(String),
///a f32 between 0 and 1
Percent(f32),
}
impl Default for FeatureResult {
fn default() -> Self {
FeatureResult::Bool(false)
}
}
/// For some feature return type we want to implement a custom compare function
/// for example: histograms are compared with cosine similarity
impl PartialEq for FeatureResult {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Bool(l0), Self::Bool(r0)) => l0 == r0,
(Self::I32(l0), Self::I32(r0)) => l0 == r0,
(Self::F32(l0), Self::F32(r0)) => l0 == r0,
(Self::Vec(l0), Self::Vec(r0)) => l0 == r0,
(Self::Rgba(l0, l1, l2, l3), Self::Rgba(r0, r1, r2, r3)) => {
l0 == r0 && l1 == r1 && l2 == r2 && l3 == r3
}
(Self::Indices(l), Self::Indices(r)) => l == r,
(Self::Char(l0), Self::Char(r0)) => l0 == r0,
(Self::String(l), Self::String(r)) => l == r,
(Self::Percent(l0), Self::Percent(r0)) => l0 == r0,
_ => false,
}
}
}
impl WeightedCmp for FeatureResult {
fn weighted(&self, other: &Self) -> f32 {
match (self, other) {
(Self::Bool(l0), Self::Bool(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::I32(l0), Self::I32(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::F32(l0), Self::F32(r0)) => {
if (l0 - r0).abs() < 1e-4 {
1.
} else {
0.
}
}
(Self::Vec(l), Self::Vec(r)) => {
if l.len() == r.len() {
let mut b: f32 = 0.;
for a in l.iter().enumerate() {
b += a.1.weighted(&r[a.0]);
}
b / l.len() as f32
} else {
0.
}
}
(Self::Rgba(l0, l1, l2, _), Self::Rgba(r0, r1, r2,_)) => {
let lableft = rgb_to_lab(vec![*l0,*l1,*l2]);
let labright = rgb_to_lab(vec![*r0,*r1,*r2]);
let mut result = ((lableft[0]-labright[0])*(lableft[0]-labright[0])
+(lableft[1]-labright[1])*(lableft[1]-labright[1])
+(lableft[2]-labright[2])*(lableft[2]-labright[2])).sqrt(); //euclidian distance between two colors: Delta E
if result > 100. {
result = 0.;
}
else {
result = 1. - result/100.;
}
result
}
(Self::Indices(l), Self::Indices(r)) => {
let mut up = 0_u64;
let mut left = 0_u64;
let mut right = 0_u64;
for (a,b) in l.iter().zip(r.iter()).map(|(a, b)| (a,b)){
left += a*a;
right += b*b;
up += a*b;
}
let mut result = up as f32 / ((left * right) as f32).sqrt();//cosines similarity
if result.is_nan() {
if left == right {
result = 1.;
} else {
result = 0.
}
}
result
}
(Self::Char(l0), Self::Char(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::String(l0), Self::String(r0)) => {
if l0 == r0 {
1.
} else {
0.
}
}
(Self::Percent(l0), Self::Percent(r0)) => 1. - (l0 - r0).abs(),
_ => 0.,
}
}
}
fn rgb_to_lab(rgb: Vec<f32>) -> [f32; 3] {
let r = rgb[0] / 255.0;
let g = rgb[1] / 255.0;
let b = rgb[2] / 255.0;
let r = if r > 0.04045 { ((r + 0.055) / 1.055).powf(2.4) } else { r / 12.92 };
let g = if g > 0.04045 { ((g + 0.055) / 1.055).powf(2.4) } else { g / 12.92 };
let b = if b > 0.04045 { ((b + 0.055) / 1.055).powf(2.4) } else { b / 12.92 };
let x = r * 0.4124 + g * 0.3576 + b * 0.1805;
let y = r * 0.2126 + g * 0.7152 + b * 0.0722;
let z = r * 0.0193 + g * 0.1192 + b * 0.9505;
let x = x / 0.95047;
let y = y / 1.0;
let z = z / 1.08883;
let x = if x > 0.008856 { x.powf(1.0 / 3.0) } else { (7.787 * x) + (16.0 / 116.0) };
let y = if y > 0.008856 { y.powf(1.0 / 3.0) } else { (7.787 * y) + (16.0 / 116.0) };
let z = if z > 0.008856 { z.powf(1.0 / 3.0) } else { (7.787 * z) + (16.0 / 116.0) };
let l = (116.0 * y) - 16.0;
let a = 500.0 * (x - y);
let b = 200.0 * (y - z);
[l, a, b]
}
pub type FeatureGenerator = fn(Arc<Image<f32>>) -> (String, FeatureResult);
#[derive(Default)]
pub struct Database {
images: IndexedImages,
/// keep feature generator for the case when we add a new image
/// this field is not serialized and needs to be wrapped in an option
generators: Vec<FeatureGenerator>,
threadpool: ThreadPool<Arc<Image<f32>>, (String, FeatureResult)>,
}
impl Database {
pub fn search(&self, imagepath: &Path, feature: FeatureGenerator) -> Vec<(PathBuf, f32)> {
self.images.search(imagepath, feature)
}
///the new function generates a new Database out of a vector of the Paths of the Images and a Vector of features
pub fn new(imagepaths: &Vec<PathBuf>, features: Vec<FeatureGenerator>) -> Self {
let mut threadpool = ThreadPool::new();
Self {
images: IndexedImages::new(imagepaths, &features, &mut threadpool),
generators: features,
threadpool,
}
}
/// with add_image you can add images in a existing database.
/// databases from a file are read only
pub fn add_image(&mut self, path: &Path) {
if !self.generators.is_empty() {
self.images
.add_image(path, &self.generators, &mut self.threadpool)
} else {
panic!("database without generator functions is immutable")
}
}
pub fn from_file(path: &Path) -> Self {
let filestring = fs::read_to_string(path).expect("can't read that file");
let images = serde_json::from_str::<IndexedImages>(&filestring)
.expect("unable to deserialize the file");
Self {
images,
generators: Vec::new(),
threadpool: ThreadPool::new(),
}
}
}
#[derive(Serialize, Deserialize, Default, PartialEq, Debug)]
struct IndexedImages {
images: HashMap<PathBuf, HashMap<String, FeatureResult>>,
}
impl IndexedImages {
fn new(
imagepaths: &Vec<PathBuf>,
features: &[FeatureGenerator],
threadpool: &mut ThreadPool<Arc<Image<f32>>, (String, FeatureResult)>,
) -> Self {
let mut images_with_feats = HashMap::new();
for path in imagepaths {
let image: Arc<Image<f32>> = Arc::new(Image::default()); //todo!("Image reader function")
let mut feats = HashMap::new();
for generator in features.iter() {
threadpool.enqueue(Task::new(image.clone(), *generator));
}
let vec = threadpool.get_results();
for (name, result) in vec {
feats.insert(name, result);
}
images_with_feats.insert(image.path().clone(), feats);
}
Self {
images: images_with_feats,
}
}
fn search(&self, imagepath: &Path, feature: FeatureGenerator) -> Vec<(PathBuf, f32)> {
let image: Arc<Image<f32>> = Arc::new(Image::default()); //todo!("Image reader function")
let search_feat = feature(image);
let mut result: Vec<(PathBuf, f32)> = Vec::new();
for image in &self.images {
for feat in image.1 {
if search_feat.0 == *feat.0 {
result.push((image.0.clone(), search_feat.1.weighted(feat.1)));
}
}
}
result
}
fn add_image(
&mut self,
path: &Path,
generator: &Vec<FeatureGenerator>,
threadpool: &mut ThreadPool<Arc<Image<f32>>, (String, FeatureResult)>,
) {
let image: Arc<Image<f32>> = Arc::new(Image::default()); //todo!("Image reader function")
let mut feats = HashMap::new();
for gen in generator {
threadpool.enqueue(Task::new(image.clone(), *gen));
}
let vec = threadpool.get_results();
for (name, result) in vec {
feats.insert(name, result);
}
self.images.insert(image.path().clone(), feats);
}
}
/// example feature implementation
#[allow(dead_code)]
fn average_luminance(image: Arc<Image<f32>>) -> (String, FeatureResult) {
(String::from("average-brightness"), FeatureResult::F32(0.0))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn conversion() {
let mut images: HashMap<PathBuf, HashMap<String, FeatureResult>> = HashMap::new();
let mut feat: HashMap<String, FeatureResult> = HashMap::new();
feat.insert(String::from("average-brightness"), FeatureResult::F32(0.0));
images.insert(PathBuf::new(), feat);
let data = IndexedImages { images };
let _as_json = serde_json::to_string(&data).expect("couldnt convert");
println!("{:?}", _as_json);
let data_after_conversion = serde_json::from_str::<IndexedImages>(&_as_json).expect("couldnt convert from string");
assert_eq!(data, data_after_conversion);
}
#[test]
fn cosine_similarity(){
let vec1 = FeatureResult::Indices(vec!{1, 3, 4});
let vec2 = FeatureResult::Indices(vec!{1, 3, 4});
assert_eq!(1., vec1.weighted(&vec2)); // both are identical
let vec2 = FeatureResult::Indices(vec!{0, 0, 0});
assert_eq!(0., vec1.weighted(&vec2)); // one is 0
let vec1 = FeatureResult::Indices(vec!{0, 0, 0});
assert_eq!(1., vec1.weighted(&vec2)); // both are 0
assert_eq!(1., vec2.weighted(&vec1)); // it shouldn't change if the Values are switched
let vec1 = FeatureResult::Indices(vec!{7, 3, 4});
let vec2 = FeatureResult::Indices(vec!{1, 5, 2});
assert_eq!(vec1.weighted(&vec2), vec2.weighted(&vec1));
println!("{:?}", vec1.weighted(&vec2));
let mut vec1 = vec![5;9999];
vec1.push( 1);
let vec1 = FeatureResult::Indices(vec1);
let vec2 = FeatureResult::Indices(vec!{7;10000});
println!("{:?}", vec1.weighted(&vec2));
}
#[test]
fn weighted() {
let vec1 = FeatureResult::Vec(vec![FeatureResult::Bool(true),
FeatureResult::Char('c'),
FeatureResult::Vec(vec![FeatureResult::Percent(0.5)]),
FeatureResult::F32(44.543) ]);
let vec2 = FeatureResult::Vec(vec![FeatureResult::Bool(true),
FeatureResult::Char('c'),
FeatureResult::Vec(vec![FeatureResult::Percent(0.5)]),
FeatureResult::F32(44.543) ]);
assert_eq!(1., vec2.weighted(&vec1));
let vec2 = FeatureResult::Vec(vec![FeatureResult::Bool(true),
FeatureResult::Char('c'),
FeatureResult::F32(44.543) ,
FeatureResult::Vec(vec![FeatureResult::Percent(0.5)])]);
assert_eq!(0.5, vec2.weighted(&vec1));
println!("{:?}", vec1.weighted(&vec2));
let value1 = FeatureResult::F32(44.543);
let value2 = FeatureResult::F32(44.543);
assert_eq!(1., value1.weighted(&value2));
let value1 = FeatureResult::Bool(true);
let value2 = FeatureResult::Bool(false);
assert_eq!(0., value1.weighted(&value2));
let value1 = FeatureResult::String(String::from("Testing"));
let value2 = FeatureResult::String(String::from("notTesting"));
assert_eq!(0., value1.weighted(&value2));
let value2 = FeatureResult::String(String::from("Testing"));
assert_eq!(1., value1.weighted(&value2)) ;
}
#[test]
fn weighted_rgba() {
let value1 = FeatureResult::Rgba(32.6754,42.432,43.87,255.);
let value2 = FeatureResult::Rgba(32.6754,42.432,43.87,255.);
assert_eq!(1., value1.weighted(&value2)) ;
let value1 = FeatureResult::Rgba(255.,255.,0.,255.);
let value2 = FeatureResult::Rgba(0.,0.,0.,255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("Yellow to Black: {:?}", value1.weighted(&value2));
let value1 = FeatureResult::Rgba(255.,255.,0.,255.);
let value2 = FeatureResult::Rgba(200.,255.,55.,255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("yellow to light green: {:?}", value1.weighted(&value2));
let value1 = FeatureResult::Rgba(3.,8.,255.,255.);
let value2 = FeatureResult::Rgba(3.,106.,255.,255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("blue to dark blue: {:?}", value1.weighted(&value2));
let value1 = FeatureResult::Rgba(255.,106.,122.,255.);
let value2 = FeatureResult::Rgba(255.,1.,28.,255.);
//assert_eq!(1., value1.weighted(&value2)) ;
println!("Red to light red: {:?}", value1.weighted(&value2));
}
}