From 961626616fcac508f8769546617206dc608efdcf Mon Sep 17 00:00:00 2001 From: lluni Date: Sun, 15 Jan 2023 23:18:58 +0100 Subject: [PATCH] Initial commit --- .gitignore | 3 + Cargo.toml | 12 +++ README.md | 8 ++ examples/example_sine.rs | 102 +++++++++++++++++++++++++ examples/example_xor.rs | 61 +++++++++++++++ src/functions/activation_functions.rs | 100 +++++++++++++++++++++++++ src/functions/loss_functions.rs | 51 +++++++++++++ src/functions/mod.rs | 2 + src/layers/activation_layer.rs | 40 ++++++++++ src/layers/fc_layer.rs | 104 ++++++++++++++++++++++++++ src/layers/mod.rs | 9 +++ src/lib.rs | 77 +++++++++++++++++++ 12 files changed, 569 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 examples/example_sine.rs create mode 100644 examples/example_xor.rs create mode 100644 src/functions/activation_functions.rs create mode 100644 src/functions/loss_functions.rs create mode 100644 src/functions/mod.rs create mode 100644 src/layers/activation_layer.rs create mode 100644 src/layers/fc_layer.rs create mode 100644 src/layers/mod.rs create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cf6578b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.vscode +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..71bcee2 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "rust-nn" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +ndarray = "0.15.6" +ndarray-rand = "0.14.0" +plotters = "0.3.4" +rand = "0.8.5" diff --git a/README.md b/README.md new file mode 100644 index 0000000..9dc73ab --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# rust-nn + +This project is more or less a port of [JavaNN](https://git.lluni.de/lluni/JavaNN) into Rust to learn Rust. + +Examples can be run with + +- `cargo run -r --package rust-nn --example example_xor` +- `cargo run -r --package rust-nn --example example_sine` diff --git a/examples/example_sine.rs b/examples/example_sine.rs new file mode 100644 index 0000000..7ab0e1a --- /dev/null +++ b/examples/example_sine.rs @@ -0,0 +1,102 @@ +extern crate rust_nn; + +use std::error::Error; +use std::f64::consts::PI; + +use ndarray_rand::RandomExt; +use ndarray_rand::rand_distr::Uniform; +use plotters::prelude::*; +use rust_nn::Network; +use rust_nn::functions::{activation_functions, loss_functions}; +use rust_nn::layers::activation_layer::ActivationLayer; +use rust_nn::layers::fc_layer::{FCLayer, Initializer}; +use ndarray::Array1; + +fn main() -> Result<(), Box> { + // training data + let training_interval = (0.0f64, 2.0f64 * PI); + let steps = 100000; + let training_values = Array1::random(steps, Uniform::new(training_interval.0, training_interval.1)).to_vec(); + let mut x_train = Vec::new(); + let mut y_train = Vec::new(); + for x in training_values { + x_train.push(Array1::from_elem(1usize, x)); + y_train.push(Array1::from_elem(1usize, x.sin())); + } + // test data + let test_steps = 1000; + let interval_length = training_interval.1 - training_interval.0; + let step_size = interval_length / test_steps as f64; + let testing_values = Array1::range(training_interval.0, training_interval.1, step_size); + let mut x_test = Vec::new(); + let mut y_test_true = Vec::new(); + for x in testing_values { + x_test.push(Array1::from_elem(1usize, x)); + y_test_true.push(Array1::from_elem(1usize, x.sin())); + } + + // initialize neural network + let mut network = Network::new(loss_functions::Type::MSE); + + // add layers + network.add_layer(Box::new(FCLayer::new( + 8, + Initializer::GaussianWFactor(0.0, 1.0, 0.1), + Initializer::GaussianWFactor(0.0, 1.0, 0.1) + ))); + network.add_layer(Box::new(ActivationLayer::new(activation_functions::Type::LeakyRelu))); + network.add_layer(Box::new(FCLayer::new( + 8, + Initializer::GaussianWFactor(0.0, 1.0, 0.1), + Initializer::GaussianWFactor(0.0, 1.0, 0.1) + ))); + network.add_layer(Box::new(ActivationLayer::new(activation_functions::Type::LeakyRelu))); + network.add_layer(Box::new(FCLayer::new( + 1, + Initializer::GaussianWFactor(0.0, 1.0, 0.1), + Initializer::GaussianWFactor(0.0, 1.0, 0.1) + ))); + + // train network on training data + network.fit(x_train, y_train, 100, 0.05, true); + + // predict test dataset + let y_test_pred = network.predict(x_test.clone()); + + // create the chart + let buf = BitMapBackend::new("./examples/sine.png", (800, 600)).into_drawing_area(); + buf.fill(&WHITE)?; + let mut chart = ChartBuilder::on(&buf) + //.caption("sin(x)", ("sans-serif", 30)) + .x_label_area_size(30) + .y_label_area_size(30) + .build_cartesian_2d(training_interval.0..training_interval.1, -1.0f64..1.0f64)?; + + chart + .configure_mesh() + .disable_x_mesh() + .disable_y_mesh() + .draw()?; + + // add the first plot + let mut data1: Vec<(f64,f64)> = x_test.iter().zip(y_test_true.iter()) + .map(|(x, y)| (x[0], y[0])) + .collect(); + data1.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + chart + .draw_series(LineSeries::new(data1, &RED)).unwrap() + .label("true values") + .legend(|(x, y)| PathElement::new(vec![(x, y), (x + 1, y)], &RED)); + + // add the second plot + let mut data2: Vec<(f64,f64)> = x_test.iter().zip(y_test_pred.iter()) + .map(|(x, y)| (x[0], y[0])) + .collect(); + data2.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); + chart + .draw_series(LineSeries::new(data2, &BLUE)).unwrap() + .label("predicted values") + .legend(|(x, y)| PathElement::new(vec![(x, y), (x + 1, y)], &BLUE)); + + Ok(()) +} diff --git a/examples/example_xor.rs b/examples/example_xor.rs new file mode 100644 index 0000000..6d2346b --- /dev/null +++ b/examples/example_xor.rs @@ -0,0 +1,61 @@ +extern crate rust_nn; + +use rust_nn::Network; +use rust_nn::functions::{activation_functions, loss_functions}; +use rust_nn::layers::activation_layer::ActivationLayer; +use rust_nn::layers::fc_layer::{FCLayer, Initializer}; +use ndarray::array; + +fn main() { + // training data + let x_train = vec![ + array![0.0, 0.0], + array![0.0, 1.0], + array![1.0, 0.0], + array![1.0, 1.0] + ]; + let y_train = vec![ + array![0.0], + array![1.0], + array![1.0], + array![0.0] + ]; + // test data + let x_test= vec![ + array![0.0, 0.0], + array![0.0, 1.0], + array![1.0, 0.0], + array![1.0, 1.0] + ]; + + // initialize neural network + let mut network = Network::new(loss_functions::Type::MSE); + + // add layers + network.add_layer(Box::new(FCLayer::new( + 3, + Initializer::Gaussian(0.0, 1.0), + Initializer::Gaussian(0.0, 1.0) + ))); + network.add_layer(Box::new(ActivationLayer::new(activation_functions::Type::Tanh))); + network.add_layer(Box::new(FCLayer::new( + 1, + Initializer::Gaussian(0.0, 1.0), + Initializer::Gaussian(0.0, 1.0) + ))); + network.add_layer(Box::new(ActivationLayer::new(activation_functions::Type::Tanh))); + + // train network on training data + network.fit(x_train, y_train, 1000, 0.1, false); + + // print predictions + let y_test = network.predict(x_test.clone()); + println!("{}", x_test.get(0).unwrap()); + for i in 0..y_test.len() { + print!("input: {}\t\t", x_test.get(i).unwrap()); + let mut prediction = y_test.get(i).unwrap().to_owned(); + // comment the following line to see the exact predictions + prediction.map_mut(|x| *x = x.round()); + print!("prediction: {}\n", prediction); + } +} \ No newline at end of file diff --git a/src/functions/activation_functions.rs b/src/functions/activation_functions.rs new file mode 100644 index 0000000..ac3b6e4 --- /dev/null +++ b/src/functions/activation_functions.rs @@ -0,0 +1,100 @@ +use ndarray::Array1; +use ndarray_rand::rand_distr::num_traits::Pow; + +pub enum Type { + Identity, + Logistic, + Tanh, + Relu, + LeakyRelu +} + +pub fn parse_type(t: Type) -> (fn(&Array1) -> Array1, fn(&Array1) -> Array1) { + match t { + Type::Identity => (identity, identity_prime), + Type::Logistic => (logistic, logistic_prime), + Type::Tanh => (tanh, tanh_prime), + Type::Relu => (relu, relu_prime), + Type::LeakyRelu => (leaky_relu, leaky_relu_prime) + } +} + +pub fn identity(matrix: &Array1) -> Array1 { + matrix.to_owned() +} + +pub fn identity_prime(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = 1.0; + } + result +} + +fn sigmoid(x: f64) -> f64 { + 1.0 / (1.0 + (-x).exp()) +} + +pub fn logistic(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = sigmoid(*x); + } + result +} + +pub fn logistic_prime(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = sigmoid(*x * (1.0 - sigmoid(*x))); + } + result +} + +pub fn tanh(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = (*x).tanh(); + } + result +} + +pub fn tanh_prime(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = 1.0 as f64 - (*x).tanh().pow(2); + } + result +} + +pub fn relu(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = (*x).max(0.0); + } + result +} + +pub fn relu_prime(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = if (*x) <= 0.0 {0.0} else {1.0}; + } + result +} + +pub fn leaky_relu(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = (*x).max(0.001 * (*x)); + } + result +} + +pub fn leaky_relu_prime(matrix: &Array1) -> Array1 { + let mut result = matrix.clone(); + for x in result.iter_mut() { + *x = if (*x) <= 0.0 {0.001} else {1.0}; + } + result +} diff --git a/src/functions/loss_functions.rs b/src/functions/loss_functions.rs new file mode 100644 index 0000000..40cbc8d --- /dev/null +++ b/src/functions/loss_functions.rs @@ -0,0 +1,51 @@ +use std::ops::MulAssign; + +use ndarray::{Array1, ArrayView1}; + +pub enum Type { + MSE, + MAE +} + +pub fn parse_type(t: Type) -> (fn(ArrayView1, ArrayView1) -> f64, fn(ArrayView1, ArrayView1) -> Array1) { + match t { + Type::MSE => (mse, mse_prime), + Type::MAE => (mae, mae_prime) + } +} + +pub fn mse(y_true: ArrayView1, y_pred: ArrayView1) -> f64 { + let mut temp = &y_true - &y_pred; + temp.mul_assign(&temp.clone()); + let mut sum = 0.0; + for i in 0..temp.len() { + sum += temp.get(i).unwrap(); + } + sum / temp.len() as f64 +} + +pub fn mse_prime(y_true: ArrayView1, y_pred: ArrayView1) -> Array1 { + let temp = &y_true - &y_pred; + temp / (y_true.len() as f64 / 2.0) +} + +pub fn mae(y_true: ArrayView1, y_pred: ArrayView1) -> f64 { + let temp = &y_true - &y_pred; + let mut sum = 0.0; + for i in 0..temp.len() { + sum += temp.get(i).unwrap().abs(); + } + sum / temp.len() as f64 +} + +pub fn mae_prime(y_true: ArrayView1, y_pred: ArrayView1) -> Array1 { + let mut result = Array1::zeros(y_true.raw_dim()); + for i in 0..result.len() { + if y_true.get(i).unwrap() < y_pred.get(i).unwrap() { + *result.get_mut(i).unwrap() = 1.0; + } else { + *result.get_mut(i).unwrap() = -1.0; + } + } + result +} diff --git a/src/functions/mod.rs b/src/functions/mod.rs new file mode 100644 index 0000000..d8ee2e3 --- /dev/null +++ b/src/functions/mod.rs @@ -0,0 +1,2 @@ +pub mod activation_functions; +pub mod loss_functions; diff --git a/src/layers/activation_layer.rs b/src/layers/activation_layer.rs new file mode 100644 index 0000000..6ba3c54 --- /dev/null +++ b/src/layers/activation_layer.rs @@ -0,0 +1,40 @@ +use ndarray::{Array1, arr1, ArrayView1}; + +use crate::functions::activation_functions::*; +use super::Layer; + +pub struct ActivationLayer { + input: Array1, + output: Array1, + activation: fn(&Array1) -> Array1, + activation_prime: fn(&Array1) -> Array1 +} + +impl ActivationLayer { + pub fn new(activation_fn: Type) -> Self { + let (activation, activation_prime) = parse_type(activation_fn); + ActivationLayer { + input: arr1(&[]), + output: arr1(&[]), + activation, + activation_prime + } + } +} + +impl Layer for ActivationLayer { + fn forward_pass(&mut self, input: ArrayView1) -> Array1 { + self.input = input.to_owned(); + self.output = (self.activation)(&self.input); + self.output.clone() + } + + fn backward_pass(&mut self, output_error: ArrayView1, _learning_rate: f64) -> Array1 { + // (self.activation_prime)(&self.input).into_shape((1 as usize, output_error.len() as usize)).unwrap().dot(&output_error) + // (self.activation_prime)(&self.input) * &output_error + let mut temp = (self.activation_prime)(&self.input); + temp.zip_mut_with(&output_error, |x, y| *x *= y); + temp + } + +} diff --git a/src/layers/fc_layer.rs b/src/layers/fc_layer.rs new file mode 100644 index 0000000..73150e2 --- /dev/null +++ b/src/layers/fc_layer.rs @@ -0,0 +1,104 @@ +extern crate ndarray; + +use ndarray::{Array1, Array2, arr1, arr2, Array, ArrayView1, ShapeBuilder}; +use ndarray_rand::RandomExt; +use ndarray_rand::rand_distr::{Normal, Uniform}; + +use super::Layer; + +pub enum Initializer { + Zeros, + Ones, + Gaussian(f64, f64), + GaussianWFactor(f64, f64, f64), + Uniform(f64, f64) +} + +impl Initializer { + pub fn init(&self, shape: Sh) -> Array + where + Sh: ShapeBuilder, D: ndarray::Dimension + { + match self { + Self::Zeros => Array::zeros(shape), + Self::Ones => Array::ones(shape), + Self::Gaussian(mean, stddev) => Array::random(shape, Normal::new(*mean, *stddev).unwrap()), + Self::GaussianWFactor(mean, stddev, factor) + => Array::random(shape, Normal::new(*mean, *stddev).unwrap()) * *factor, + Self::Uniform(low, high) => Array::random(shape, Uniform::new(low, high)) + } + } +} + +pub struct FCLayer { + num_neurons: usize, + is_initialized: bool, + weight_initializer: Initializer, + bias_initializer: Initializer, + input: Array1, + output: Array1, + weights: Array2, + biases: Array1, +} + +impl FCLayer { + pub fn new(num_neurons: usize, weight_initializer: Initializer, bias_initializer: Initializer) -> Self { + FCLayer { + num_neurons, + is_initialized: false, + weight_initializer, + bias_initializer, + input: arr1(&[]), + output: arr1(&[]), + weights: arr2(&[[]]), + biases: arr1(&[]) + } + } + + fn initialize(&mut self, input_size: usize) { + self.weights = self.weight_initializer.init((input_size, self.num_neurons)); + self.biases = self.bias_initializer.init(self.num_neurons); + self.is_initialized = true; + } +} + +impl Layer for FCLayer { + fn forward_pass(&mut self, input: ArrayView1) -> Array1 { + if !self.is_initialized { + self.initialize(input.len()); + } + + self.input = input.to_owned(); + self.output = self.input.dot(&self.weights) + &self.biases; + self.output.clone() + } + + fn backward_pass(&mut self, output_error: ArrayView1, learning_rate: f64) -> Array1 { + //let input_error = output_error.dot(&self.weights.clone().reversed_axes()); + /* let input_error = stack(Axis(0), &vec![output_error; self.num_neurons]).unwrap().dot(&self.weights.clone().reversed_axes()); + + // let weights_error = self.input.clone().into_shape((1 as usize, self.num_neurons as usize)).unwrap().dot(&output_error); + // let weights_error = self.input.clone().reversed_axes().dot(&output_error); + // let mut weights_error = self.input.clone(); + // weights_error.zip_mut_with(&output_error, |x, y| *x *= y); + let weights_error = self.input.clone().t().dot(&output_error.broadcast((self.input.len(),)).unwrap()); + + self.weights = &self.weights + learning_rate * weights_error; + self.biases = &self.biases + learning_rate * &output_error; + let len = input_error.len(); + let a = input_error.into_shape((len, )).unwrap(); + a */ + /* let delta_weights = &self.output.t() * &output_error; + let delta_biases = output_error.sum_axis(Axis(0)); + self.weights = &self.weights + learning_rate * delta_weights; + self.biases = &self.biases + learning_rate * delta_biases; + output_error.dot(&self.weights.t()) */ + let input_error = output_error.dot(&self.weights.t()); + let delta_weights = + self.input.to_owned().into_shape((self.input.len(), 1usize)).unwrap() + .dot(&output_error.into_shape((1usize, output_error.len())).unwrap()); + self.weights = &self.weights + learning_rate * &delta_weights; + self.biases = &self.biases + learning_rate * &output_error; + input_error + } +} \ No newline at end of file diff --git a/src/layers/mod.rs b/src/layers/mod.rs new file mode 100644 index 0000000..53c3eee --- /dev/null +++ b/src/layers/mod.rs @@ -0,0 +1,9 @@ +use ndarray::{Array1, ArrayView1}; + +pub mod activation_layer; +pub mod fc_layer; + +pub trait Layer { + fn forward_pass(&mut self, input: ArrayView1) -> Array1; + fn backward_pass(&mut self, output_error: ArrayView1, learning_rate: f64) -> Array1; +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..eef17fc --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,77 @@ +pub mod functions; +pub mod layers; + +use functions::loss_functions::{self, parse_type}; +use layers::*; +use ndarray::{Array1, ArrayView1}; + +pub struct Network { + layers: Vec>, + loss: fn(ArrayView1, ArrayView1) -> f64, + loss_prime: fn(ArrayView1, ArrayView1) -> Array1 +} + +impl Network { + pub fn new(loss_fn: loss_functions::Type) -> Self { + let (loss, loss_prime) = parse_type(loss_fn); + Network { + layers: vec![], + loss, + loss_prime + } + } + + pub fn add_layer(&mut self, layer: Box) { + self.layers.push(layer); + } + + pub fn predict(&mut self, inputs: Vec>) -> Vec> { + assert!(inputs.len() > 0); + let mut result = vec![]; + + for input in inputs.iter() { + let mut output = Array1::default(inputs[0].raw_dim()); + output.assign(&input); + for layer in &mut self.layers { + output = layer.forward_pass(output.view()); + } + result.push(output.to_owned()); + } + + result + } + + pub fn fit(&mut self, x_train: Vec>, y_train: Vec>, epochs: usize, learning_rate: f64, trivial_optimize: bool) { + assert!(x_train.len() > 0); + assert!(x_train.len() == y_train.len()); + let num_samples = x_train.len(); + + for i in 0..epochs { + let mut err = 0.0; + for j in 0..num_samples { + // forward propagation + let mut output = Array1::default(x_train[0].raw_dim()); + output.assign(&x_train[j]); + for layer in self.layers.iter_mut() { + output = layer.forward_pass(output.view()); + } + + // compute loss + err += (self.loss)(y_train[j].view(), output.view()); + + // backward propagation + let mut error = (self.loss_prime)(y_train[j].view(), output.view()); + for layer in self.layers.iter_mut().rev() { + if trivial_optimize { + error = layer.backward_pass(error.view(), learning_rate / (i+1) as f64); + } else { + error = layer.backward_pass(error.view(), learning_rate); + } + } + } + // calculate average error on all samples + err /= num_samples as f64; + println!("epoch {}/{} error={}", i+1, epochs, err); + } + } +}