Compare commits
3 Commits
main
...
compressed
Author | SHA1 | Date |
---|---|---|
Sven Vogel | 1f5a530c60 | |
Sven Vogel | 9b7d91ad5b | |
Sven Vogel | 6c0ec16a9c |
|
@ -6,11 +6,9 @@
|
||||||
<sourceFolder url="file://$MODULE_DIR$/duplicates/src" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/duplicates/src" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/str_sort/src" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/str_sort/src" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/sparse_vector/src" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/sparse_vector/src" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/container_type/src" isTestSource="false" />
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/duplicates/target" />
|
<excludeFolder url="file://$MODULE_DIR$/duplicates/target" />
|
||||||
<excludeFolder url="file://$MODULE_DIR$/str_sort/target" />
|
<excludeFolder url="file://$MODULE_DIR$/str_sort/target" />
|
||||||
<excludeFolder url="file://$MODULE_DIR$/sparse_vector/target" />
|
<excludeFolder url="file://$MODULE_DIR$/sparse_vector/target" />
|
||||||
<excludeFolder url="file://$MODULE_DIR$/container_type/target" />
|
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
# Rust-Programming
|
# Rust-Programming
|
||||||
|
|
||||||
Repository hosting code of the excercises made during class.
|
This repository contains several cargo projects that implement various tasks from the lesson rust programming at DHBW.
|
||||||
|
|
||||||
The projects are not sorted chronologically.
|
|
||||||
|
|
||||||
The entire repository and all of its content are licensed under GPLv2 or later with exceptions to specific pieces of code written by the lecturer without licencse notice.
|
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
[package]
|
|
||||||
name = "container_type"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
|
|
@ -1,130 +0,0 @@
|
||||||
use std::{vec::{IntoIter}, ops::Index, ops::IndexMut, println};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* _ _ _ _
|
|
||||||
* __ ___ __(_) |_| |_ ___ _ __ | |__ _ _
|
|
||||||
* \ \ /\ / / '__| | __| __/ _ \ '_ \ | '_ \| | | |
|
|
||||||
* \ V V /| | | | |_| || __/ | | | | |_) | |_| |
|
|
||||||
* \_/\_/ |_| |_|\__|\__\___|_| |_| |_.__/ \__, |
|
|
||||||
* |___/
|
|
||||||
* ____ __ __ _
|
|
||||||
* / ___|_ _____ _ __ \ \ / /__ __ _ ___| |
|
|
||||||
* \___ \ \ / / _ \ '_ \ \ \ / / _ \ / _` |/ _ \ |
|
|
||||||
* ___) \ V / __/ | | | \ V / (_) | (_| | __/ |
|
|
||||||
* |____/ \_/ \___|_| |_| \_/ \___/ \__, |\___|_|
|
|
||||||
* |___/
|
|
||||||
* Licensed under the GPLv2 License, Version 2.0 (the "License");
|
|
||||||
* Copyright (c) Sven Vogel
|
|
||||||
*/
|
|
||||||
|
|
||||||
/// A vector based implementation of an associative map.
|
|
||||||
/// This strucutre maps a given key to a single value.
|
|
||||||
/// The type achives this by storing every pair of key/value pairs
|
|
||||||
/// in a single vector.
|
|
||||||
/// Thus for looking up a value takes a linear amount of time: O(n) in the worst case.
|
|
||||||
/// Adding a new value is a constant time operation since the map is not sorted in any
|
|
||||||
/// particular way.
|
|
||||||
/// Note that it is not possible to insert a new value with the same key. Instead the old value
|
|
||||||
/// associated with the already existing key will be replaced with the new value.
|
|
||||||
/// # Example
|
|
||||||
/// ```rust ignore
|
|
||||||
/// let mut map = HashishMap::new();
|
|
||||||
///
|
|
||||||
/// map.insert("abc", 99);
|
|
||||||
///
|
|
||||||
/// map[&"abc"] += 1;
|
|
||||||
/// ```
|
|
||||||
pub struct HashishMap<K, V> where K: Eq {
|
|
||||||
vec: Vec<(K, V)>
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<K, V> HashishMap<K, V> where K: Eq {
|
|
||||||
/// Create a new empty instance
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self { vec: vec![] }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// retrieve a reference to value associated with the specified key
|
|
||||||
/// if no such key exists in the map [`Option::None`] is returned
|
|
||||||
pub fn get(&self, key: &K) -> Option<&V> {
|
|
||||||
return match self.vec.iter().find(|(k, _)| *k == *key) {
|
|
||||||
Some((_, v)) => Some(v),
|
|
||||||
_ => None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// retrieve a mutable reference to value associated with the specified key
|
|
||||||
/// if no such key exists in the map [`Option::None`] is returned
|
|
||||||
pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
|
|
||||||
return match self.vec.iter_mut().find(|(k, _)| *k == *key) {
|
|
||||||
Some((_, v)) => Some(v),
|
|
||||||
_ => None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// insert a new value at the specified key.
|
|
||||||
/// Overrides the existing value if the key already exists.
|
|
||||||
/// The overriden value is discarded.
|
|
||||||
pub fn insert(&mut self, key: K, value: V) {
|
|
||||||
if let Some(val) = self.get_mut(&key) {
|
|
||||||
*val = value;
|
|
||||||
} else {
|
|
||||||
self.vec.push((key, value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Removes the key/value pair with the specified key from the map and return the value of the pair.
|
|
||||||
/// If no such pair can be found, [`Option::None`] is retuned
|
|
||||||
pub fn remove(&mut self, key: &K) -> Option<V> {
|
|
||||||
// find a key matching the parameter and its according index.
|
|
||||||
// remove the item at the found index from the vector and return its value
|
|
||||||
return match self.vec.iter().enumerate().find(|(_, (k, _))| *k == *key) {
|
|
||||||
Some((idx, _)) => {
|
|
||||||
Some(self.vec.remove(idx).1)
|
|
||||||
},
|
|
||||||
_ => None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<K,V> Index<K> for HashishMap<K, V> where K: Eq {
|
|
||||||
type Output=V;
|
|
||||||
|
|
||||||
fn index(&self, index: K) -> &Self::Output {
|
|
||||||
self.get(&index).unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<K,V> IndexMut<K> for HashishMap<K, V> where K: Eq {
|
|
||||||
|
|
||||||
fn index_mut(&mut self, index: K) -> &mut Self::Output {
|
|
||||||
self.get_mut(&index).unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<K, V> IntoIterator for HashishMap<K, V> where K: Eq {
|
|
||||||
type Item=(K, V);
|
|
||||||
type IntoIter = IntoIter<Self::Item>;
|
|
||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
|
||||||
self.vec.into_iter()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
|
|
||||||
let mut guter_stoff = HashishMap::<&str, u64>::new();
|
|
||||||
|
|
||||||
guter_stoff.insert("helmut", 0xCafeBabe);
|
|
||||||
guter_stoff.insert("dieter", 0xDeadbeef);
|
|
||||||
guter_stoff.insert("eisele", 0xBaadF00d);
|
|
||||||
guter_stoff.insert("bohlen", 0xFaceFeed);
|
|
||||||
|
|
||||||
guter_stoff[&"bohlen"] = 0xBaadB015;
|
|
||||||
|
|
||||||
guter_stoff.remove(&"helmut");
|
|
||||||
|
|
||||||
for (k, v) in guter_stoff {
|
|
||||||
println!("({k}, {:x})", v);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Duplicates
|
|
||||||
|
|
||||||
This project implements functionality to store either a floating point number or a character inside of an enumeration. A function can then find any duplicates in an array of these enumerations.
|
|
||||||
|
|
||||||
Licensed under GPLv2 or later, same as the entire repository
|
|
|
@ -14,7 +14,9 @@
|
||||||
* Licensed under the GPLv2 License, Version 2.0 (the "License");
|
* Licensed under the GPLv2 License, Version 2.0 (the "License");
|
||||||
* Copyright (c) Sven Vogel
|
* Copyright (c) Sven Vogel
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use crate::FloatOrChar::{Char, Float};
|
use crate::FloatOrChar::{Char, Float};
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::usize;
|
use std::usize;
|
||||||
|
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
# Fibonacci
|
|
||||||
|
|
||||||
This project implements two version of the famous fibonacci sequence:
|
|
||||||
|
|
||||||
* an iterative one
|
|
||||||
* and a recursive one
|
|
||||||
|
|
||||||
Licensed under GPLv2 or later, same as the entire repository
|
|
|
@ -25,11 +25,11 @@ fn fib_rec(x: u128) -> u128 {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// iterative variant of the fibonacci function
|
/// iterative variant of the fibonacci function
|
||||||
fn fib_iter(x: u128) -> u128 {
|
fn fib_loop(x: u128) -> u128 {
|
||||||
let mut sum = 0;
|
let mut sum = 0;
|
||||||
let mut sum2 = 1;
|
let mut sum2 = 1;
|
||||||
|
|
||||||
for _ in 0..(x - 1) {
|
for x in 0..(x - 1) {
|
||||||
let t = sum;
|
let t = sum;
|
||||||
sum = sum2;
|
sum = sum2;
|
||||||
sum2 = t + sum;
|
sum2 = t + sum;
|
||||||
|
@ -38,7 +38,5 @@ fn fib_iter(x: u128) -> u128 {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
const VALUE: u128 = 23;
|
println!("{}", fib_loop(5));
|
||||||
println!("fibonacci iterative of {VALUE}: {}", fib_iter(VALUE));
|
|
||||||
println!("fibonacci recursive of {VALUE}: {}", fib_rec(VALUE));
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
[package]
|
|
||||||
name = "line"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
|
|
@ -1,4 +0,0 @@
|
||||||
# Shapes and lines
|
|
||||||
Implements some traits provided by the lecturer regarding the calculation of various distance to the origin function for various shapes.
|
|
||||||
|
|
||||||
Licensed under GPLv2 or later, same as the entire repository
|
|
|
@ -1,96 +0,0 @@
|
||||||
/**
|
|
||||||
* _ _ _ _
|
|
||||||
* __ ___ __(_) |_| |_ ___ _ __ | |__ _ _
|
|
||||||
* \ \ /\ / / '__| | __| __/ _ \ '_ \ | '_ \| | | |
|
|
||||||
* \ V V /| | | | |_| || __/ | | | | |_) | |_| |
|
|
||||||
* \_/\_/ |_| |_|\__|\__\___|_| |_| |_.__/ \__, |
|
|
||||||
* |___/
|
|
||||||
* ____ __ __ _
|
|
||||||
* / ___|_ _____ _ __ \ \ / /__ __ _ ___| |
|
|
||||||
* \___ \ \ / / _ \ '_ \ \ \ / / _ \ / _` |/ _ \ |
|
|
||||||
* ___) \ V / __/ | | | \ V / (_) | (_| | __/ |
|
|
||||||
* |____/ \_/ \___|_| |_| \_/ \___/ \__, |\___|_|
|
|
||||||
* |___/
|
|
||||||
* Licensed under the GPLv2 License, Version 2.0 (the "License");
|
|
||||||
* Copyright (c) Sven Vogel
|
|
||||||
*/
|
|
||||||
use std::cmp::PartialOrd;
|
|
||||||
use std::ops::{Add, Div, Mul, Sub};
|
|
||||||
|
|
||||||
trait Calculate:
|
|
||||||
Mul<Output = Self> + Add<Output = Self> + Sub<Output = Self> + Div<Output = Self> + Copy
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Calculate for T where
|
|
||||||
T: Mul<Output = Self> + Add<Output = Self> + Sub<Output = Self> + Div<Output = Self> + Copy
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Point<T: Mul<Output = T> + Add<Output = T> + Copy> {
|
|
||||||
x: T,
|
|
||||||
y: T,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
impl<T: Mul<Output = T> + Add<Output = T> + Copy> Point<T> {
|
|
||||||
fn squared_dist_to_0(&self) -> T {
|
|
||||||
self.x * self.x + self.y * self.y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
trait MeasureDistanceTo0<T: Calculate> {
|
|
||||||
fn squared_dist_to_0(&self) -> T;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Line<T: Calculate> {
|
|
||||||
p: Point<T>,
|
|
||||||
n: Point<T>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Calculate + Default> MeasureDistanceTo0<T> for Line<T> {
|
|
||||||
fn squared_dist_to_0(&self) -> T {
|
|
||||||
let len = self.n.x * self.n.x + self.n.y * self.n.y;
|
|
||||||
let normalized = Point {
|
|
||||||
x: self.n.x / len,
|
|
||||||
y: self.n.y / len,
|
|
||||||
};
|
|
||||||
|
|
||||||
normalized.x * self.p.x + normalized.y * self.p.y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Calculate + Default> Line<T> {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
p: Point {
|
|
||||||
x: Default::default(),
|
|
||||||
y: Default::default(),
|
|
||||||
},
|
|
||||||
n: Point {
|
|
||||||
x: Default::default(),
|
|
||||||
y: Default::default(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn longest_dist_to_0<T>(p1: Line<T>, p2: Line<T>) -> T
|
|
||||||
where
|
|
||||||
T: Calculate + Default + PartialOrd,
|
|
||||||
{
|
|
||||||
let d1 = p1.squared_dist_to_0();
|
|
||||||
let d2 = p2.squared_dist_to_0();
|
|
||||||
if d1 > d2 {
|
|
||||||
d1
|
|
||||||
} else {
|
|
||||||
d2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let l0: Line<f64> = Line::new();
|
|
||||||
let l1: Line<f64> = Line::new();
|
|
||||||
|
|
||||||
println!("{:?}", longest_dist_to_0(l0, l1));
|
|
||||||
}
|
|
|
@ -10,4 +10,7 @@ rand = "0.8.5"
|
||||||
futures = "0.3.28"
|
futures = "0.3.28"
|
||||||
jemalloc-ctl = "0.5.0"
|
jemalloc-ctl = "0.5.0"
|
||||||
jemallocator = "0.5.0"
|
jemallocator = "0.5.0"
|
||||||
bytesize = "1.2.0"
|
bytesize = "1.2.0"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
binary_search = []
|
|
@ -15,14 +15,9 @@ Though this comes a the cost that we may need to decide between memory saving an
|
||||||
* Hashmap
|
* Hashmap
|
||||||
* Index Array
|
* Index Array
|
||||||
* Compressed Index Array
|
* Compressed Index Array
|
||||||
* Binary Heap
|
|
||||||
|
|
||||||
### Index Array
|
### Index Array
|
||||||
|
|
||||||
> **NOTE**
|
|
||||||
>
|
|
||||||
> Due to poor choice of names and lazyness the implementation can only be found in the branch `compressed_indices_2`
|
|
||||||
|
|
||||||
We can omit all zero elements by storing an index array alongside all non zero values. Each value will be associated with an index in from the index array. This model is only efficient in memory size when the amount of zero elements is at least 50%. Since I used `usize` to store the indices, which is equal to a `u64` in 64-bit architectures, The required memory is:
|
We can omit all zero elements by storing an index array alongside all non zero values. Each value will be associated with an index in from the index array. This model is only efficient in memory size when the amount of zero elements is at least 50%. Since I used `usize` to store the indices, which is equal to a `u64` in 64-bit architectures, The required memory is:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -33,22 +28,12 @@ One significant downside is the cost of finding each corresponding entry when pe
|
||||||
|
|
||||||
### Hashmap Implementation
|
### Hashmap Implementation
|
||||||
|
|
||||||
> **NOTE**
|
|
||||||
>
|
|
||||||
> Due to poor choice of names and lazyness the implementation can only be found in the branch `hashmap`
|
|
||||||
|
|
||||||
This implementation uses a hashmap to associate a value with its corresponding index in the vectors column. In Theory this should be as efficient in memory size as the previous array index method.
|
This implementation uses a hashmap to associate a value with its corresponding index in the vectors column. In Theory this should be as efficient in memory size as the previous array index method.
|
||||||
|
|
||||||
But in comparision this method requires signifacantly more memory since a hashmap allocates more memory than it can fill in order to reduce collisions.
|
But in comparision this method requires signifacantly more memory since a hashmap allocates more memory than it can fill in order to reduce collisions.
|
||||||
|
|
||||||
It has one significant benefit, that being speed in calculations. Looking up values in a hashmap is generally faster than performing a binary seach. Also inserting and deleting is an O(1) operation.
|
It has one significant benefit, that being speed in calculations. Looking up values in a hashmap is generally faster than performing a binary seach. Also inserting and deleting is an O(1) operation.
|
||||||
|
|
||||||
> NOTE
|
|
||||||
>
|
|
||||||
> Two implementations of the dot product can be found:
|
|
||||||
>
|
|
||||||
> One implemented with a simple loop and one with a binary search. From testing I can say, that the simple loop variant is significanty faster than the crude binary search.
|
|
||||||
|
|
||||||
### Compressed Index Array
|
### Compressed Index Array
|
||||||
|
|
||||||
In order to reduce the size required to store the indices of each value we can compress them by only storing the relative offset to the previous value:
|
In order to reduce the size required to store the indices of each value we can compress them by only storing the relative offset to the previous value:
|
||||||
|
@ -62,16 +47,9 @@ This yields smaller values. Thus we can savely reduce the bandwidth of available
|
||||||
|
|
||||||
In this implementation I reduced to size from 64 to 16 bit. This makes memory usage a lot smaller, but computation gets a lot heavier, since all values have to be decompressed on the fly. A possible improvement would be to cache uncompressed values. May be worth investigating futher.
|
In this implementation I reduced to size from 64 to 16 bit. This makes memory usage a lot smaller, but computation gets a lot heavier, since all values have to be decompressed on the fly. A possible improvement would be to cache uncompressed values. May be worth investigating futher.
|
||||||
|
|
||||||
### Binary Heap
|
|
||||||
|
|
||||||
Implementation can be found in the main branch.
|
|
||||||
|
|
||||||
The binary heap has the advantage of being fast with inserting, removing and looking up values in logarithmic time.
|
|
||||||
We use indices again to sort the values of the vector into to binary heap.
|
|
||||||
|
|
||||||
## Comparision
|
## Comparision
|
||||||
|
|
||||||
The following values were achieved by using a randomly initialized vector with a length of 10^10 elements from which 2% were non zero. The dot product implementation was single threaded and run in release mode on hyperthreaded intel hardware.
|
The following values were achieved by using a randomly initialized vector with a length of 10^10 elements from which 2% were non zero. The dot product implementation was single threaded.
|
||||||
|
|
||||||
|
|
||||||
| Implementation | Size on Heap (GB) | Runtime of dot product (s) |
|
| Implementation | Size on Heap (GB) | Runtime of dot product (s) |
|
||||||
|
@ -80,6 +58,3 @@ The following values were achieved by using a randomly initialized vector with a
|
||||||
| Index Array | 3.6 | 6.254261896 |
|
| Index Array | 3.6 | 6.254261896 |
|
||||||
| Hashmap | 5.4 | 0.732189927 |
|
| Hashmap | 5.4 | 0.732189927 |
|
||||||
| Compressed Index Array | 2.0 | > 120 |
|
| Compressed Index Array | 2.0 | > 120 |
|
||||||
| Binary Heap | 1.3 | 2.089960966 |
|
|
||||||
|
|
||||||
Licensed under GPLv2 or later, same as the entire repository
|
|
||||||
|
|
|
@ -1,85 +1,105 @@
|
||||||
/**
|
|
||||||
* _ _ _ _
|
|
||||||
* __ ___ __(_) |_| |_ ___ _ __ | |__ _ _
|
|
||||||
* \ \ /\ / / '__| | __| __/ _ \ '_ \ | '_ \| | | |
|
|
||||||
* \ V V /| | | | |_| || __/ | | | | |_) | |_| |
|
|
||||||
* \_/\_/ |_| |_|\__|\__\___|_| |_| |_.__/ \__, |
|
|
||||||
* |___/
|
|
||||||
* ____ __ __ _
|
|
||||||
* / ___|_ _____ _ __ \ \ / /__ __ _ ___| |
|
|
||||||
* \___ \ \ / / _ \ '_ \ \ \ / / _ \ / _` |/ _ \ |
|
|
||||||
* ___) \ V / __/ | | | \ V / (_) | (_| | __/ |
|
|
||||||
* |____/ \_/ \___|_| |_| \_/ \___/ \__, |\___|_|
|
|
||||||
* |___/
|
|
||||||
* Licensed under the GPLv2 License, Version 2.0 (the "License");
|
|
||||||
* Copyright (c) Sven Vogel
|
|
||||||
*/
|
|
||||||
|
|
||||||
use std::collections::BTreeMap;
|
|
||||||
use std::time::Instant;
|
|
||||||
use bytesize::ByteSize;
|
use bytesize::ByteSize;
|
||||||
|
use jemalloc_ctl::{epoch, stats};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use jemalloc_ctl::{stats, epoch};
|
use std::time::Instant;
|
||||||
|
|
||||||
// we use a custom allocator for tracking heap allocations
|
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
/// Wrapper struct around a BinaryTreeMap that stores the non zero elements of a vector by using the indices
|
/// Only stores more efficiently when at least 50% of all elements are zeros
|
||||||
/// as keys in the tree.
|
|
||||||
pub struct SparseVec {
|
pub struct SparseVec {
|
||||||
map: BTreeMap<usize, f64>
|
values: Vec<f64>,
|
||||||
|
indices: Vec<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SparseVec {
|
impl SparseVec {
|
||||||
|
|
||||||
/// Compute the dot product of two vectors
|
|
||||||
pub fn dot(&self, other: &SparseVec) -> f64 {
|
pub fn dot(&self, other: &SparseVec) -> f64 {
|
||||||
let mut sum = 0.0;
|
let mut sum = 0.0;
|
||||||
|
|
||||||
for (k, v) in self.map.iter() {
|
#[cfg(not(feature="binary_search"))]
|
||||||
sum += v * other.map.get(k).unwrap_or(&0.0);
|
{
|
||||||
|
let mut x = 0;
|
||||||
|
let mut y = 0;
|
||||||
|
|
||||||
|
while x < self.indices.len() && y < other.indices.len() {
|
||||||
|
if self.indices[x] == other.indices[y] {
|
||||||
|
sum += self.values[x] * other.values[y];
|
||||||
|
|
||||||
|
x += 1;
|
||||||
|
y += 1;
|
||||||
|
} else if self.indices[x] > other.indices[y] {
|
||||||
|
y += 1;
|
||||||
|
} else {
|
||||||
|
x += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature="binary_search")]
|
||||||
|
{
|
||||||
|
for index in 0..other.indices.len() {
|
||||||
|
// binary search for an element in the second vector to have the same index
|
||||||
|
sum += binary_search(self.indices[index], &other.indices, &other.values)
|
||||||
|
* self.values[index];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sum
|
sum
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new SparseVec with a theoretical size of `elements`. `non_null`is the ration of non zero elements
|
|
||||||
/// in the sparse vector. A value of 0.0 means that all elements are zero.
|
|
||||||
pub fn new(elements: usize, non_null: f64) -> Self {
|
pub fn new(elements: usize, non_null: f64) -> Self {
|
||||||
// calculate the number of non-zero elements
|
|
||||||
let non_zero_elements = (elements as f64 * non_null) as usize;
|
let non_zero_elements = (elements as f64 * non_null) as usize;
|
||||||
|
|
||||||
// create the map
|
let mut values = Vec::with_capacity(non_zero_elements);
|
||||||
let mut map = BTreeMap::new();
|
let mut indices = Vec::with_capacity(non_zero_elements);
|
||||||
|
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
// generate some random values
|
|
||||||
for i in 0..non_zero_elements {
|
for i in 0..non_zero_elements {
|
||||||
// generate a random index that continuesly increases
|
values.push(0.5);
|
||||||
let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0) + rng.gen_range(0.0..3.0);
|
|
||||||
|
|
||||||
map.insert(idx as usize, 0.5);
|
let idx = i as f32 / non_zero_elements as f32 * (elements as f32 - 4.0)
|
||||||
|
+ rng.gen_range(0.0..3.0);
|
||||||
|
indices.push(idx as usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
Self {
|
Self { values, indices }
|
||||||
map
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// rudimentary macro for timing a block of code
|
#[inline]
|
||||||
|
fn binary_search(target: usize, indices: &[usize], values: &[f64]) -> f64 {
|
||||||
|
let mut range = 0..indices.len();
|
||||||
|
loop {
|
||||||
|
let mut median = (range.end - range.start) >> 1;
|
||||||
|
if median == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
median += range.start;
|
||||||
|
|
||||||
|
if indices[median] == target {
|
||||||
|
return values[median];
|
||||||
|
}
|
||||||
|
|
||||||
|
if indices[median] > target {
|
||||||
|
range.end = median;
|
||||||
|
} else {
|
||||||
|
range.start = median;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
|
||||||
macro_rules! time {
|
macro_rules! time {
|
||||||
($name:literal, $block:expr) => {{
|
($name:literal, $block:expr) => {{
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
$block;
|
$block;
|
||||||
println!("{} took {}s", $name, start.elapsed().as_secs_f64());
|
println!("{} took {}s", $name, start.elapsed().as_secs_f64());
|
||||||
}}
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
|
||||||
/// Theoretical size of the vector in elements
|
/// Theoretical size of the vector in elements
|
||||||
/// This would mean the we would require 10 GBs of memory to store a single vector
|
/// This would mean the we would require 10 GBs of memory to store a single vector
|
||||||
const VECTOR_SIZE: usize = 10_000_000_000;
|
const VECTOR_SIZE: usize = 10_000_000_000;
|
||||||
|
@ -91,7 +111,10 @@ fn main() {
|
||||||
let non_zero_elements = (VECTOR_SIZE as f64 * NULL_NON_NULL_RATIO) as usize;
|
let non_zero_elements = (VECTOR_SIZE as f64 * NULL_NON_NULL_RATIO) as usize;
|
||||||
let heap_element_size = std::mem::size_of::<f64>() + std::mem::size_of::<usize>();
|
let heap_element_size = std::mem::size_of::<f64>() + std::mem::size_of::<usize>();
|
||||||
|
|
||||||
println!("Estimated size on heap: {}", ByteSize::b((non_zero_elements * heap_element_size) as u64));
|
println!(
|
||||||
|
"Estimated size on heap: {}",
|
||||||
|
ByteSize::b((non_zero_elements * heap_element_size) as u64)
|
||||||
|
);
|
||||||
println!("Size on stack: {} B", std::mem::size_of::<SparseVec>());
|
println!("Size on stack: {} B", std::mem::size_of::<SparseVec>());
|
||||||
|
|
||||||
let vec: SparseVec;
|
let vec: SparseVec;
|
||||||
|
@ -103,9 +126,12 @@ fn main() {
|
||||||
|
|
||||||
// many statistics are cached and only updated when the epoch is advanced.
|
// many statistics are cached and only updated when the epoch is advanced.
|
||||||
epoch::advance().unwrap();
|
epoch::advance().unwrap();
|
||||||
println!("Heap allocated bytes (total): {}", ByteSize::b(stats::allocated::read().unwrap() as u64));
|
println!(
|
||||||
|
"Heap allocated bytes (total): {}",
|
||||||
|
ByteSize::b(stats::allocated::read().unwrap() as u64)
|
||||||
|
);
|
||||||
|
|
||||||
time!("Sparse vector dot product", {
|
time!("Sparse vector dot product", {
|
||||||
vec.dot(&vec);
|
vec.dot(&vec);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
# String sorting
|
|
||||||
This repository contains a set of functions to sort the characters of a string.
|
|
||||||
Various different functions are available:
|
|
||||||
* sort the ascii characters only (unsafe but efficient)
|
|
||||||
* sort the UTF-8 characters (requires memory allocation)
|
|
||||||
* sort the UTF-8 characters (more complex implementation but efficient)
|
|
||||||
|
|
||||||
Licensed under GPLv2 or later, same as the entire repository
|
|
|
@ -1,8 +0,0 @@
|
||||||
[package]
|
|
||||||
name = "threads"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
|
|
@ -1 +0,0 @@
|
||||||
af uafi spdh has gh kajs
|
|
|
@ -1 +0,0 @@
|
||||||
tj ah ldsh lkjhasl jkasl
|
|
|
@ -1,33 +0,0 @@
|
||||||
use std::{thread, sync::mpsc};
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let (path_send, path_recv) = mpsc::channel();
|
|
||||||
let (count_send, count_recv) = mpsc::channel();
|
|
||||||
|
|
||||||
let handle = thread::spawn(move || {
|
|
||||||
for path in path_recv.iter() {
|
|
||||||
if let Ok(source) = std::fs::read_to_string(&path) {
|
|
||||||
let words = source.split_whitespace().filter(|ws| !ws.is_empty()).count();
|
|
||||||
count_send.send(format!("path: {} words: {words}", &path)).unwrap();
|
|
||||||
} else {
|
|
||||||
drop(count_send);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
loop {
|
|
||||||
let mut buf = String::new();
|
|
||||||
std::io::stdin().read_line(&mut buf).expect("unable to read from stdin");
|
|
||||||
|
|
||||||
if let Err(_) = path_send.send(String::from(buf.trim())) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
count_recv.try_iter().for_each(|c| {
|
|
||||||
println!("{c}");
|
|
||||||
});
|
|
||||||
|
|
||||||
handle.join().unwrap();
|
|
||||||
}
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Tuple Arithmetic
|
|
||||||
This project contains some implementations of arithmetic operators for pairs of floating point numbers.
|
|
||||||
Note: no trait implementation is provided since at the time of writing this project trait were out of scope for the lesson.
|
|
||||||
|
|
||||||
Licensed under GPLv2 or later, same as the entire repository
|
|
|
@ -28,7 +28,6 @@ fn merge_tuple(a: (f64, f64), b: (f64, f64), f: fn(a: f64, b: f64) -> f64) -> (f
|
||||||
(f(a.0, b.0), f(a.1, b.1))
|
(f(a.0, b.0), f(a.1, b.1))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn add_tuple(a: (f64, f64), b: (f64, f64)) -> (f64, f64) {
|
fn add_tuple(a: (f64, f64), b: (f64, f64)) -> (f64, f64) {
|
||||||
merge_tuple(a, b, |a, b| a + b)
|
merge_tuple(a, b, |a, b| a + b)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue