From 4fc5c1d4b56a0ecd643daa0c76080dc3354e88ce Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Mon, 28 Nov 2022 13:31:37 +0100 Subject: [PATCH 01/10] Refactored reward module --- reCTBN/src/lib.rs | 2 +- reCTBN/src/reward.rs | 41 ++++++++++++++++++++++ reCTBN/src/{ => reward}/reward_function.rs | 40 ++------------------- reCTBN/tests/reward_function.rs | 2 +- 4 files changed, 45 insertions(+), 40 deletions(-) create mode 100644 reCTBN/src/reward.rs rename reCTBN/src/{ => reward}/reward_function.rs (72%) diff --git a/reCTBN/src/lib.rs b/reCTBN/src/lib.rs index 8feddfb..1997fa6 100644 --- a/reCTBN/src/lib.rs +++ b/reCTBN/src/lib.rs @@ -6,7 +6,7 @@ extern crate approx; pub mod parameter_learning; pub mod params; pub mod process; -pub mod reward_function; +pub mod reward; pub mod sampling; pub mod structure_learning; pub mod tools; diff --git a/reCTBN/src/reward.rs b/reCTBN/src/reward.rs new file mode 100644 index 0000000..114ba03 --- /dev/null +++ b/reCTBN/src/reward.rs @@ -0,0 +1,41 @@ +pub mod reward_function; + +use crate::process; + +/// Instantiation of reward function and instantaneous reward +/// +/// +/// # Arguments +/// +/// * `transition_reward`: reward obtained transitioning from one state to another +/// * `instantaneous_reward`: reward per unit of time obtained staying in a specific state + +#[derive(Debug, PartialEq)] +pub struct Reward { + pub transition_reward: f64, + pub instantaneous_reward: f64, +} + +/// The trait RewardFunction describe the methods that all the reward functions must satisfy + +pub trait RewardFunction { + /// Given the current state and the previous state, it compute the reward. + /// + /// # Arguments + /// + /// * `current_state`: the current state of the network represented as a `process::NetworkProcessState` + /// * `previous_state`: an optional argument representing the previous state of the network + + fn call( + &self, + current_state: process::NetworkProcessState, + previous_state: Option, + ) -> Reward; + + /// Initialize the RewardFunction internal accordingly to the structure of a NetworkProcess + /// + /// # Arguments + /// + /// * `p`: any structure that implements the trait `process::NetworkProcess` + fn initialize_from_network_process(p: &T) -> Self; +} diff --git a/reCTBN/src/reward_function.rs b/reCTBN/src/reward/reward_function.rs similarity index 72% rename from reCTBN/src/reward_function.rs rename to reCTBN/src/reward/reward_function.rs index 35e15c8..ae94ff1 100644 --- a/reCTBN/src/reward_function.rs +++ b/reCTBN/src/reward/reward_function.rs @@ -3,46 +3,10 @@ use crate::{ params::{self, ParamsTrait}, process, + reward::{Reward, RewardFunction}, }; -use ndarray; - -/// Instantiation of reward function and instantaneous reward -/// -/// -/// # Arguments -/// -/// * `transition_reward`: reward obtained transitioning from one state to another -/// * `instantaneous_reward`: reward per unit of time obtained staying in a specific state - -#[derive(Debug, PartialEq)] -pub struct Reward { - pub transition_reward: f64, - pub instantaneous_reward: f64, -} - -/// The trait RewardFunction describe the methods that all the reward functions must satisfy -pub trait RewardFunction { - /// Given the current state and the previous state, it compute the reward. - /// - /// # Arguments - /// - /// * `current_state`: the current state of the network represented as a `process::NetworkProcessState` - /// * `previous_state`: an optional argument representing the previous state of the network - - fn call( - &self, - current_state: process::NetworkProcessState, - previous_state: Option, - ) -> Reward; - - /// Initialize the RewardFunction internal accordingly to the structure of a NetworkProcess - /// - /// # Arguments - /// - /// * `p`: any structure that implements the trait `process::NetworkProcess` - fn initialize_from_network_process(p: &T) -> Self; -} +use ndarray; /// Reward function over a factored state space /// diff --git a/reCTBN/tests/reward_function.rs b/reCTBN/tests/reward_function.rs index dcc5e69..03f2ab7 100644 --- a/reCTBN/tests/reward_function.rs +++ b/reCTBN/tests/reward_function.rs @@ -2,7 +2,7 @@ mod utils; use ndarray::*; use utils::generate_discrete_time_continous_node; -use reCTBN::{process::{NetworkProcess, ctbn::*, NetworkProcessState}, reward_function::*, params}; +use reCTBN::{process::{NetworkProcess, ctbn::*, NetworkProcessState}, reward::{*, reward_function::*}, params}; #[test] From cecf16a771d6fd53ec7ad1b909c7310c9a8368d8 Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Tue, 29 Nov 2022 09:43:12 +0100 Subject: [PATCH 02/10] Added sigle state evaluation --- reCTBN/src/reward.rs | 20 ++++- reCTBN/src/reward/reward_evaluation.rs | 71 ++++++++++++++++ reCTBN/src/reward/reward_function.rs | 4 +- reCTBN/src/sampling.rs | 27 +++++-- reCTBN/src/tools.rs | 3 +- reCTBN/tests/reward_evaluation.rs | 107 +++++++++++++++++++++++++ reCTBN/tests/reward_function.rs | 61 +++++++------- 7 files changed, 248 insertions(+), 45 deletions(-) create mode 100644 reCTBN/src/reward/reward_evaluation.rs create mode 100644 reCTBN/tests/reward_evaluation.rs diff --git a/reCTBN/src/reward.rs b/reCTBN/src/reward.rs index 114ba03..1ea575c 100644 --- a/reCTBN/src/reward.rs +++ b/reCTBN/src/reward.rs @@ -1,6 +1,8 @@ pub mod reward_function; +pub mod reward_evaluation; use crate::process; +use ndarray; /// Instantiation of reward function and instantaneous reward /// @@ -28,8 +30,8 @@ pub trait RewardFunction { fn call( &self, - current_state: process::NetworkProcessState, - previous_state: Option, + current_state: &process::NetworkProcessState, + previous_state: Option<&process::NetworkProcessState>, ) -> Reward; /// Initialize the RewardFunction internal accordingly to the structure of a NetworkProcess @@ -39,3 +41,17 @@ pub trait RewardFunction { /// * `p`: any structure that implements the trait `process::NetworkProcess` fn initialize_from_network_process(p: &T) -> Self; } + +pub trait RewardEvaluation { + fn call( + &self, + network_process: &N, + reward_function: &R, + ) -> ndarray::Array1; + fn call_state( + &self, + network_process: &N, + reward_function: &R, + state: &process::NetworkProcessState, + ) -> f64; +} diff --git a/reCTBN/src/reward/reward_evaluation.rs b/reCTBN/src/reward/reward_evaluation.rs new file mode 100644 index 0000000..fca7c1a --- /dev/null +++ b/reCTBN/src/reward/reward_evaluation.rs @@ -0,0 +1,71 @@ +use crate::{ + reward::RewardEvaluation, + sampling::{ForwardSampler, Sampler}, + process::NetworkProcessState +}; + +pub struct MonteCarloDiscountedRward { + n_iterations: usize, + end_time: f64, + discount_factor: f64, + seed: Option, +} + +impl MonteCarloDiscountedRward { + pub fn new( + n_iterations: usize, + end_time: f64, + discount_factor: f64, + seed: Option, + ) -> MonteCarloDiscountedRward { + MonteCarloDiscountedRward { + n_iterations, + end_time, + discount_factor, + seed, + } + } +} + +impl RewardEvaluation for MonteCarloDiscountedRward { + fn call( + &self, + network_process: &N, + reward_function: &R, + ) -> ndarray::Array1 { + todo!() + } + + fn call_state( + &self, + network_process: &N, + reward_function: &R, + state: &NetworkProcessState, + ) -> f64 { + let mut sampler = ForwardSampler::new(network_process, self.seed.clone(), Some(state.clone())); + let mut ret = 0.0; + + for _i in 0..self.n_iterations { + sampler.reset(); + let mut previous = sampler.next().unwrap(); + while previous.t < self.end_time { + let current = sampler.next().unwrap(); + if current.t > self.end_time { + let r = reward_function.call(&previous.state, None); + let discount = std::f64::consts::E.powf(-self.discount_factor * previous.t) + - std::f64::consts::E.powf(-self.discount_factor * self.end_time); + ret += discount * r.instantaneous_reward; + } else { + let r = reward_function.call(&previous.state, Some(¤t.state)); + let discount = std::f64::consts::E.powf(-self.discount_factor * previous.t) + - std::f64::consts::E.powf(-self.discount_factor * current.t); + ret += discount * r.instantaneous_reward; + ret += std::f64::consts::E.powf(-self.discount_factor * current.t) * r.transition_reward; + } + previous = current; + } + } + + ret / self.n_iterations as f64 + } +} diff --git a/reCTBN/src/reward/reward_function.rs b/reCTBN/src/reward/reward_function.rs index ae94ff1..216df6a 100644 --- a/reCTBN/src/reward/reward_function.rs +++ b/reCTBN/src/reward/reward_function.rs @@ -44,8 +44,8 @@ impl FactoredRewardFunction { impl RewardFunction for FactoredRewardFunction { fn call( &self, - current_state: process::NetworkProcessState, - previous_state: Option, + current_state: &process::NetworkProcessState, + previous_state: Option<&process::NetworkProcessState>, ) -> Reward { let instantaneous_reward: f64 = current_state .iter() diff --git a/reCTBN/src/sampling.rs b/reCTBN/src/sampling.rs index 1384872..73c6d78 100644 --- a/reCTBN/src/sampling.rs +++ b/reCTBN/src/sampling.rs @@ -26,10 +26,15 @@ where current_time: f64, current_state: NetworkProcessState, next_transitions: Vec>, + initial_state: Option, } impl<'a, T: NetworkProcess> ForwardSampler<'a, T> { - pub fn new(net: &'a T, seed: Option) -> ForwardSampler<'a, T> { + pub fn new( + net: &'a T, + seed: Option, + initial_state: Option, + ) -> ForwardSampler<'a, T> { let rng: ChaCha8Rng = match seed { //If a seed is present use it to initialize the random generator. Some(seed) => SeedableRng::seed_from_u64(seed), @@ -37,11 +42,12 @@ impl<'a, T: NetworkProcess> ForwardSampler<'a, T> { None => SeedableRng::from_entropy(), }; let mut fs = ForwardSampler { - net: net, - rng: rng, + net, + rng, current_time: 0.0, current_state: vec![], next_transitions: vec![], + initial_state, }; fs.reset(); return fs; @@ -112,11 +118,16 @@ impl<'a, T: NetworkProcess> Iterator for ForwardSampler<'a, T> { impl<'a, T: NetworkProcess> Sampler for ForwardSampler<'a, T> { fn reset(&mut self) { self.current_time = 0.0; - self.current_state = self - .net - .get_node_indices() - .map(|x| self.net.get_node(x).get_random_state_uniform(&mut self.rng)) - .collect(); + match &self.initial_state { + None => { + self.current_state = self + .net + .get_node_indices() + .map(|x| self.net.get_node(x).get_random_state_uniform(&mut self.rng)) + .collect() + } + Some(is) => self.current_state = is.clone(), + }; self.next_transitions = self.net.get_node_indices().map(|_| Option::None).collect(); } } diff --git a/reCTBN/src/tools.rs b/reCTBN/src/tools.rs index ecfeff9..38ebd49 100644 --- a/reCTBN/src/tools.rs +++ b/reCTBN/src/tools.rs @@ -61,8 +61,7 @@ pub fn trajectory_generator( let mut trajectories: Vec = Vec::new(); //Random Generator object - - let mut sampler = ForwardSampler::new(net, seed); + let mut sampler = ForwardSampler::new(net, seed, None); //Each iteration generate one trajectory for _ in 0..n_trajectories { //History of all the moments in which something changed diff --git a/reCTBN/tests/reward_evaluation.rs b/reCTBN/tests/reward_evaluation.rs new file mode 100644 index 0000000..f3938e7 --- /dev/null +++ b/reCTBN/tests/reward_evaluation.rs @@ -0,0 +1,107 @@ +mod utils; + +use approx::{abs_diff_eq, assert_abs_diff_eq}; +use ndarray::*; +use reCTBN::{ + params, + process::{ctbn::*, NetworkProcess, NetworkProcessState}, + reward::{reward_evaluation::*, reward_function::*, *}, +}; +use utils::generate_discrete_time_continous_node; + +#[test] +fn simple_factored_reward_function_binary_node_MC() { + let mut net = CtbnNetwork::new(); + let n1 = net + .add_node(generate_discrete_time_continous_node(String::from("n1"), 2)) + .unwrap(); + + let mut rf = FactoredRewardFunction::initialize_from_network_process(&net); + rf.get_transition_reward_mut(n1) + .assign(&arr2(&[[0.0, 0.0], [0.0, 0.0]])); + rf.get_instantaneous_reward_mut(n1) + .assign(&arr1(&[3.0, 3.0])); + + match &mut net.get_node_mut(n1) { + params::Params::DiscreteStatesContinousTime(param) => { + param.set_cim(arr3(&[[[-3.0, 3.0], [2.0, -2.0]]])).unwrap(); + } + } + + net.initialize_adj_matrix(); + + let s0: NetworkProcessState = vec![params::StateType::Discrete(0)]; + let s1: NetworkProcessState = vec![params::StateType::Discrete(1)]; + + let mc = MonteCarloDiscountedRward::new(100, 10.0, 1.0, Some(215)); + assert_abs_diff_eq!(3.0, mc.call_state(&net, &rf, &s0), epsilon = 1e-2); + assert_abs_diff_eq!(3.0, mc.call_state(&net, &rf, &s1), epsilon = 1e-2); +} + +#[test] +fn simple_factored_reward_function_chain_MC() { + let mut net = CtbnNetwork::new(); + let n1 = net + .add_node(generate_discrete_time_continous_node(String::from("n1"), 2)) + .unwrap(); + + let n2 = net + .add_node(generate_discrete_time_continous_node(String::from("n2"), 2)) + .unwrap(); + + let n3 = net + .add_node(generate_discrete_time_continous_node(String::from("n3"), 2)) + .unwrap(); + + net.add_edge(n1, n2); + net.add_edge(n2, n3); + + match &mut net.get_node_mut(n1) { + params::Params::DiscreteStatesContinousTime(param) => { + param.set_cim(arr3(&[[[-0.1, 0.1], [1.0, -1.0]]])).unwrap(); + } + } + + match &mut net.get_node_mut(n2) { + params::Params::DiscreteStatesContinousTime(param) => { + param + .set_cim(arr3(&[ + [[-0.01, 0.01], [5.0, -5.0]], + [[-5.0, 5.0], [0.01, -0.01]], + ])) + .unwrap(); + } + } + + + match &mut net.get_node_mut(n3) { + params::Params::DiscreteStatesContinousTime(param) => { + param + .set_cim(arr3(&[ + [[-0.01, 0.01], [5.0, -5.0]], + [[-5.0, 5.0], [0.01, -0.01]], + ])) + .unwrap(); + } + } + + + let mut rf = FactoredRewardFunction::initialize_from_network_process(&net); + rf.get_transition_reward_mut(n1) + .assign(&arr2(&[[0.0, 1.0], [1.0, 0.0]])); + + rf.get_transition_reward_mut(n2) + .assign(&arr2(&[[0.0, 1.0], [1.0, 0.0]])); + + rf.get_transition_reward_mut(n3) + .assign(&arr2(&[[0.0, 1.0], [1.0, 0.0]])); + + let s000: NetworkProcessState = vec![ + params::StateType::Discrete(1), + params::StateType::Discrete(0), + params::StateType::Discrete(0), + ]; + + let mc = MonteCarloDiscountedRward::new(10000, 100.0, 1.0, Some(215)); + assert_abs_diff_eq!(2.447, mc.call_state(&net, &rf, &s000), epsilon = 1e-1); +} diff --git a/reCTBN/tests/reward_function.rs b/reCTBN/tests/reward_function.rs index 03f2ab7..853efc9 100644 --- a/reCTBN/tests/reward_function.rs +++ b/reCTBN/tests/reward_function.rs @@ -18,15 +18,15 @@ fn simple_factored_reward_function_binary_node() { let s0: NetworkProcessState = vec![params::StateType::Discrete(0)]; let s1: NetworkProcessState = vec![params::StateType::Discrete(1)]; - assert_eq!(rf.call(s0.clone(), None), Reward{transition_reward: 0.0, instantaneous_reward: 3.0}); - assert_eq!(rf.call(s1.clone(), None), Reward{transition_reward: 0.0, instantaneous_reward: 5.0}); + assert_eq!(rf.call(&s0, None), Reward{transition_reward: 0.0, instantaneous_reward: 3.0}); + assert_eq!(rf.call(&s1, None), Reward{transition_reward: 0.0, instantaneous_reward: 5.0}); - assert_eq!(rf.call(s0.clone(), Some(s1.clone())), Reward{transition_reward: 2.0, instantaneous_reward: 3.0}); - assert_eq!(rf.call(s1.clone(), Some(s0.clone())), Reward{transition_reward: 1.0, instantaneous_reward: 5.0}); + assert_eq!(rf.call(&s0, Some(&s1)), Reward{transition_reward: 2.0, instantaneous_reward: 3.0}); + assert_eq!(rf.call(&s1, Some(&s0)), Reward{transition_reward: 1.0, instantaneous_reward: 5.0}); - assert_eq!(rf.call(s0.clone(), Some(s0.clone())), Reward{transition_reward: 0.0, instantaneous_reward: 3.0}); - assert_eq!(rf.call(s1.clone(), Some(s1.clone())), Reward{transition_reward: 0.0, instantaneous_reward: 5.0}); + assert_eq!(rf.call(&s0, Some(&s0)), Reward{transition_reward: 0.0, instantaneous_reward: 3.0}); + assert_eq!(rf.call(&s1, Some(&s1)), Reward{transition_reward: 0.0, instantaneous_reward: 5.0}); } @@ -46,16 +46,16 @@ fn simple_factored_reward_function_ternary_node() { let s2: NetworkProcessState = vec![params::StateType::Discrete(2)]; - assert_eq!(rf.call(s0.clone(), Some(s1.clone())), Reward{transition_reward: 2.0, instantaneous_reward: 3.0}); - assert_eq!(rf.call(s0.clone(), Some(s2.clone())), Reward{transition_reward: 5.0, instantaneous_reward: 3.0}); + assert_eq!(rf.call(&s0, Some(&s1)), Reward{transition_reward: 2.0, instantaneous_reward: 3.0}); + assert_eq!(rf.call(&s0, Some(&s2)), Reward{transition_reward: 5.0, instantaneous_reward: 3.0}); - assert_eq!(rf.call(s1.clone(), Some(s0.clone())), Reward{transition_reward: 1.0, instantaneous_reward: 5.0}); - assert_eq!(rf.call(s1.clone(), Some(s2.clone())), Reward{transition_reward: 6.0, instantaneous_reward: 5.0}); + assert_eq!(rf.call(&s1, Some(&s0)), Reward{transition_reward: 1.0, instantaneous_reward: 5.0}); + assert_eq!(rf.call(&s1, Some(&s2)), Reward{transition_reward: 6.0, instantaneous_reward: 5.0}); - assert_eq!(rf.call(s2.clone(), Some(s0.clone())), Reward{transition_reward: 3.0, instantaneous_reward: 9.0}); - assert_eq!(rf.call(s2.clone(), Some(s1.clone())), Reward{transition_reward: 4.0, instantaneous_reward: 9.0}); + assert_eq!(rf.call(&s2, Some(&s0)), Reward{transition_reward: 3.0, instantaneous_reward: 9.0}); + assert_eq!(rf.call(&s2, Some(&s1)), Reward{transition_reward: 4.0, instantaneous_reward: 9.0}); } #[test] @@ -77,7 +77,6 @@ fn factored_reward_function_two_nodes() { rf.get_transition_reward_mut(n2).assign(&arr2(&[[12.0, 1.0],[2.0,12.0]])); rf.get_instantaneous_reward_mut(n2).assign(&arr1(&[3.0,5.0])); - let s00: NetworkProcessState = vec![params::StateType::Discrete(0), params::StateType::Discrete(0)]; let s01: NetworkProcessState = vec![params::StateType::Discrete(1), params::StateType::Discrete(0)]; let s02: NetworkProcessState = vec![params::StateType::Discrete(2), params::StateType::Discrete(0)]; @@ -87,32 +86,32 @@ fn factored_reward_function_two_nodes() { let s11: NetworkProcessState = vec![params::StateType::Discrete(1), params::StateType::Discrete(1)]; let s12: NetworkProcessState = vec![params::StateType::Discrete(2), params::StateType::Discrete(1)]; - assert_eq!(rf.call(s00.clone(), Some(s01.clone())), Reward{transition_reward: 2.0, instantaneous_reward: 6.0}); - assert_eq!(rf.call(s00.clone(), Some(s02.clone())), Reward{transition_reward: 5.0, instantaneous_reward: 6.0}); - assert_eq!(rf.call(s00.clone(), Some(s10.clone())), Reward{transition_reward: 2.0, instantaneous_reward: 6.0}); + assert_eq!(rf.call(&s00, Some(&s01)), Reward{transition_reward: 2.0, instantaneous_reward: 6.0}); + assert_eq!(rf.call(&s00, Some(&s02)), Reward{transition_reward: 5.0, instantaneous_reward: 6.0}); + assert_eq!(rf.call(&s00, Some(&s10)), Reward{transition_reward: 2.0, instantaneous_reward: 6.0}); - assert_eq!(rf.call(s01.clone(), Some(s00.clone())), Reward{transition_reward: 1.0, instantaneous_reward: 8.0}); - assert_eq!(rf.call(s01.clone(), Some(s02.clone())), Reward{transition_reward: 6.0, instantaneous_reward: 8.0}); - assert_eq!(rf.call(s01.clone(), Some(s11.clone())), Reward{transition_reward: 2.0, instantaneous_reward: 8.0}); + assert_eq!(rf.call(&s01, Some(&s00)), Reward{transition_reward: 1.0, instantaneous_reward: 8.0}); + assert_eq!(rf.call(&s01, Some(&s02)), Reward{transition_reward: 6.0, instantaneous_reward: 8.0}); + assert_eq!(rf.call(&s01, Some(&s11)), Reward{transition_reward: 2.0, instantaneous_reward: 8.0}); - assert_eq!(rf.call(s02.clone(), Some(s00.clone())), Reward{transition_reward: 3.0, instantaneous_reward: 12.0}); - assert_eq!(rf.call(s02.clone(), Some(s01.clone())), Reward{transition_reward: 4.0, instantaneous_reward: 12.0}); - assert_eq!(rf.call(s02.clone(), Some(s12.clone())), Reward{transition_reward: 2.0, instantaneous_reward: 12.0}); + assert_eq!(rf.call(&s02, Some(&s00)), Reward{transition_reward: 3.0, instantaneous_reward: 12.0}); + assert_eq!(rf.call(&s02, Some(&s01)), Reward{transition_reward: 4.0, instantaneous_reward: 12.0}); + assert_eq!(rf.call(&s02, Some(&s12)), Reward{transition_reward: 2.0, instantaneous_reward: 12.0}); - assert_eq!(rf.call(s10.clone(), Some(s11.clone())), Reward{transition_reward: 2.0, instantaneous_reward: 8.0}); - assert_eq!(rf.call(s10.clone(), Some(s12.clone())), Reward{transition_reward: 5.0, instantaneous_reward: 8.0}); - assert_eq!(rf.call(s10.clone(), Some(s00.clone())), Reward{transition_reward: 1.0, instantaneous_reward: 8.0}); + assert_eq!(rf.call(&s10, Some(&s11)), Reward{transition_reward: 2.0, instantaneous_reward: 8.0}); + assert_eq!(rf.call(&s10, Some(&s12)), Reward{transition_reward: 5.0, instantaneous_reward: 8.0}); + assert_eq!(rf.call(&s10, Some(&s00)), Reward{transition_reward: 1.0, instantaneous_reward: 8.0}); - assert_eq!(rf.call(s11.clone(), Some(s10.clone())), Reward{transition_reward: 1.0, instantaneous_reward: 10.0}); - assert_eq!(rf.call(s11.clone(), Some(s12.clone())), Reward{transition_reward: 6.0, instantaneous_reward: 10.0}); - assert_eq!(rf.call(s11.clone(), Some(s01.clone())), Reward{transition_reward: 1.0, instantaneous_reward: 10.0}); + assert_eq!(rf.call(&s11, Some(&s10)), Reward{transition_reward: 1.0, instantaneous_reward: 10.0}); + assert_eq!(rf.call(&s11, Some(&s12)), Reward{transition_reward: 6.0, instantaneous_reward: 10.0}); + assert_eq!(rf.call(&s11, Some(&s01)), Reward{transition_reward: 1.0, instantaneous_reward: 10.0}); - assert_eq!(rf.call(s12.clone(), Some(s10.clone())), Reward{transition_reward: 3.0, instantaneous_reward: 14.0}); - assert_eq!(rf.call(s12.clone(), Some(s11.clone())), Reward{transition_reward: 4.0, instantaneous_reward: 14.0}); - assert_eq!(rf.call(s12.clone(), Some(s02.clone())), Reward{transition_reward: 1.0, instantaneous_reward: 14.0}); + assert_eq!(rf.call(&s12, Some(&s10)), Reward{transition_reward: 3.0, instantaneous_reward: 14.0}); + assert_eq!(rf.call(&s12, Some(&s11)), Reward{transition_reward: 4.0, instantaneous_reward: 14.0}); + assert_eq!(rf.call(&s12, Some(&s02)), Reward{transition_reward: 1.0, instantaneous_reward: 14.0}); } From bb239aaa0c9b873ac6172a5b92fa85e159b98e16 Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Thu, 1 Dec 2022 08:15:30 +0100 Subject: [PATCH 03/10] Implemented reward_evaluation for an entire process. --- reCTBN/src/params.rs | 2 +- reCTBN/src/reward.rs | 9 ++++-- reCTBN/src/reward/reward_evaluation.rs | 41 +++++++++++++++++++++----- reCTBN/tests/reward_evaluation.rs | 17 ++++++++--- 4 files changed, 54 insertions(+), 15 deletions(-) diff --git a/reCTBN/src/params.rs b/reCTBN/src/params.rs index 9f63860..3d08273 100644 --- a/reCTBN/src/params.rs +++ b/reCTBN/src/params.rs @@ -20,7 +20,7 @@ pub enum ParamsError { } /// Allowed type of states -#[derive(Clone)] +#[derive(Clone, Hash, PartialEq, Eq, Debug)] pub enum StateType { Discrete(usize), } diff --git a/reCTBN/src/reward.rs b/reCTBN/src/reward.rs index 1ea575c..b34db7f 100644 --- a/reCTBN/src/reward.rs +++ b/reCTBN/src/reward.rs @@ -1,6 +1,8 @@ pub mod reward_function; pub mod reward_evaluation; +use std::collections::HashMap; + use crate::process; use ndarray; @@ -43,12 +45,13 @@ pub trait RewardFunction { } pub trait RewardEvaluation { - fn call( + fn evaluate_state_space( &self, network_process: &N, reward_function: &R, - ) -> ndarray::Array1; - fn call_state( + ) -> HashMap; + + fn evaluate_state( &self, network_process: &N, reward_function: &R, diff --git a/reCTBN/src/reward/reward_evaluation.rs b/reCTBN/src/reward/reward_evaluation.rs index fca7c1a..67baa5e 100644 --- a/reCTBN/src/reward/reward_evaluation.rs +++ b/reCTBN/src/reward/reward_evaluation.rs @@ -1,7 +1,12 @@ +use std::collections::HashMap; + +use crate::params::{self, ParamsTrait}; +use crate::process; + use crate::{ + process::NetworkProcessState, reward::RewardEvaluation, sampling::{ForwardSampler, Sampler}, - process::NetworkProcessState }; pub struct MonteCarloDiscountedRward { @@ -28,21 +33,42 @@ impl MonteCarloDiscountedRward { } impl RewardEvaluation for MonteCarloDiscountedRward { - fn call( + fn evaluate_state_space( &self, network_process: &N, reward_function: &R, - ) -> ndarray::Array1 { - todo!() + ) -> HashMap { + let variables_domain: Vec> = network_process + .get_node_indices() + .map(|x| match network_process.get_node(x) { + params::Params::DiscreteStatesContinousTime(x) => + (0..x.get_reserved_space_as_parent()).map(|s| params::StateType::Discrete(s)).collect() + }).collect(); + + let n_states:usize = variables_domain.iter().map(|x| x.len()).product(); + + (0..n_states).map(|s| { + let state: process::NetworkProcessState = variables_domain.iter().fold((s, vec![]), |acc, x| { + let mut acc = acc; + let idx_s = acc.0%x.len(); + acc.1.push(x[idx_s].clone()); + acc.0 = acc.0 / x.len(); + acc + }).1; + + let r = self.evaluate_state(network_process, reward_function, &state); + (state, r) + }).collect() } - fn call_state( + fn evaluate_state( &self, network_process: &N, reward_function: &R, state: &NetworkProcessState, ) -> f64 { - let mut sampler = ForwardSampler::new(network_process, self.seed.clone(), Some(state.clone())); + let mut sampler = + ForwardSampler::new(network_process, self.seed.clone(), Some(state.clone())); let mut ret = 0.0; for _i in 0..self.n_iterations { @@ -60,7 +86,8 @@ impl RewardEvaluation for MonteCarloDiscountedRward { let discount = std::f64::consts::E.powf(-self.discount_factor * previous.t) - std::f64::consts::E.powf(-self.discount_factor * current.t); ret += discount * r.instantaneous_reward; - ret += std::f64::consts::E.powf(-self.discount_factor * current.t) * r.transition_reward; + ret += std::f64::consts::E.powf(-self.discount_factor * current.t) + * r.transition_reward; } previous = current; } diff --git a/reCTBN/tests/reward_evaluation.rs b/reCTBN/tests/reward_evaluation.rs index f3938e7..1650507 100644 --- a/reCTBN/tests/reward_evaluation.rs +++ b/reCTBN/tests/reward_evaluation.rs @@ -34,8 +34,13 @@ fn simple_factored_reward_function_binary_node_MC() { let s1: NetworkProcessState = vec![params::StateType::Discrete(1)]; let mc = MonteCarloDiscountedRward::new(100, 10.0, 1.0, Some(215)); - assert_abs_diff_eq!(3.0, mc.call_state(&net, &rf, &s0), epsilon = 1e-2); - assert_abs_diff_eq!(3.0, mc.call_state(&net, &rf, &s1), epsilon = 1e-2); + assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2); + assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2); + + let rst = mc.evaluate_state_space(&net, &rf); + assert_abs_diff_eq!(3.0, rst[&s0], epsilon = 1e-2); + assert_abs_diff_eq!(3.0, rst[&s1], epsilon = 1e-2); + } #[test] @@ -102,6 +107,10 @@ fn simple_factored_reward_function_chain_MC() { params::StateType::Discrete(0), ]; - let mc = MonteCarloDiscountedRward::new(10000, 100.0, 1.0, Some(215)); - assert_abs_diff_eq!(2.447, mc.call_state(&net, &rf, &s000), epsilon = 1e-1); + let mc = MonteCarloDiscountedRward::new(1000, 10.0, 1.0, Some(215)); + assert_abs_diff_eq!(2.447, mc.evaluate_state(&net, &rf, &s000), epsilon = 1e-1); + + let rst = mc.evaluate_state_space(&net, &rf); + assert_abs_diff_eq!(2.447, rst[&s000], epsilon = 1e-1); + } From 687f19ff1f7a3cec6abe2c8bd62429aab73ce284 Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Thu, 1 Dec 2022 08:45:10 +0100 Subject: [PATCH 04/10] Added FiniteHorizon --- reCTBN/src/reward/reward_evaluation.rs | 87 +++++++++++++++++--------- reCTBN/tests/reward_evaluation.rs | 10 ++- 2 files changed, 65 insertions(+), 32 deletions(-) diff --git a/reCTBN/src/reward/reward_evaluation.rs b/reCTBN/src/reward/reward_evaluation.rs index 67baa5e..9dcb6d2 100644 --- a/reCTBN/src/reward/reward_evaluation.rs +++ b/reCTBN/src/reward/reward_evaluation.rs @@ -9,30 +9,35 @@ use crate::{ sampling::{ForwardSampler, Sampler}, }; -pub struct MonteCarloDiscountedRward { +pub enum RewardCriteria { + FiniteHorizon, + InfiniteHorizon {discount_factor: f64}, +} + +pub struct MonteCarloRward { n_iterations: usize, end_time: f64, - discount_factor: f64, + reward_criteria: RewardCriteria, seed: Option, } -impl MonteCarloDiscountedRward { +impl MonteCarloRward { pub fn new( n_iterations: usize, end_time: f64, - discount_factor: f64, + reward_criteria: RewardCriteria, seed: Option, - ) -> MonteCarloDiscountedRward { - MonteCarloDiscountedRward { + ) -> MonteCarloRward { + MonteCarloRward { n_iterations, end_time, - discount_factor, + reward_criteria, seed, } } } -impl RewardEvaluation for MonteCarloDiscountedRward { +impl RewardEvaluation for MonteCarloRward { fn evaluate_state_space( &self, network_process: &N, @@ -41,24 +46,32 @@ impl RewardEvaluation for MonteCarloDiscountedRward { let variables_domain: Vec> = network_process .get_node_indices() .map(|x| match network_process.get_node(x) { - params::Params::DiscreteStatesContinousTime(x) => - (0..x.get_reserved_space_as_parent()).map(|s| params::StateType::Discrete(s)).collect() - }).collect(); + params::Params::DiscreteStatesContinousTime(x) => (0..x + .get_reserved_space_as_parent()) + .map(|s| params::StateType::Discrete(s)) + .collect(), + }) + .collect(); + + let n_states: usize = variables_domain.iter().map(|x| x.len()).product(); - let n_states:usize = variables_domain.iter().map(|x| x.len()).product(); - - (0..n_states).map(|s| { - let state: process::NetworkProcessState = variables_domain.iter().fold((s, vec![]), |acc, x| { - let mut acc = acc; - let idx_s = acc.0%x.len(); - acc.1.push(x[idx_s].clone()); - acc.0 = acc.0 / x.len(); - acc - }).1; + (0..n_states) + .map(|s| { + let state: process::NetworkProcessState = variables_domain + .iter() + .fold((s, vec![]), |acc, x| { + let mut acc = acc; + let idx_s = acc.0 % x.len(); + acc.1.push(x[idx_s].clone()); + acc.0 = acc.0 / x.len(); + acc + }) + .1; - let r = self.evaluate_state(network_process, reward_function, &state); - (state, r) - }).collect() + let r = self.evaluate_state(network_process, reward_function, &state); + (state, r) + }) + .collect() } fn evaluate_state( @@ -78,16 +91,30 @@ impl RewardEvaluation for MonteCarloDiscountedRward { let current = sampler.next().unwrap(); if current.t > self.end_time { let r = reward_function.call(&previous.state, None); - let discount = std::f64::consts::E.powf(-self.discount_factor * previous.t) - - std::f64::consts::E.powf(-self.discount_factor * self.end_time); + let discount = match self.reward_criteria { + RewardCriteria::FiniteHorizon => self.end_time - previous.t, + RewardCriteria::InfiniteHorizon {discount_factor} => { + std::f64::consts::E.powf(-discount_factor * previous.t) + - std::f64::consts::E.powf(-discount_factor * self.end_time) + } + }; ret += discount * r.instantaneous_reward; } else { let r = reward_function.call(&previous.state, Some(¤t.state)); - let discount = std::f64::consts::E.powf(-self.discount_factor * previous.t) - - std::f64::consts::E.powf(-self.discount_factor * current.t); + let discount = match self.reward_criteria { + RewardCriteria::FiniteHorizon => current.t-previous.t, + RewardCriteria::InfiniteHorizon {discount_factor} => { + std::f64::consts::E.powf(-discount_factor * previous.t) + - std::f64::consts::E.powf(-discount_factor * current.t) + } + }; ret += discount * r.instantaneous_reward; - ret += std::f64::consts::E.powf(-self.discount_factor * current.t) - * r.transition_reward; + ret += match self.reward_criteria { + RewardCriteria::FiniteHorizon => 1.0, + RewardCriteria::InfiniteHorizon {discount_factor} => { + std::f64::consts::E.powf(-discount_factor * current.t) + } + } * r.transition_reward; } previous = current; } diff --git a/reCTBN/tests/reward_evaluation.rs b/reCTBN/tests/reward_evaluation.rs index 1650507..b2cfd29 100644 --- a/reCTBN/tests/reward_evaluation.rs +++ b/reCTBN/tests/reward_evaluation.rs @@ -33,7 +33,7 @@ fn simple_factored_reward_function_binary_node_MC() { let s0: NetworkProcessState = vec![params::StateType::Discrete(0)]; let s1: NetworkProcessState = vec![params::StateType::Discrete(1)]; - let mc = MonteCarloDiscountedRward::new(100, 10.0, 1.0, Some(215)); + let mc = MonteCarloRward::new(100, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2); assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2); @@ -41,6 +41,12 @@ fn simple_factored_reward_function_binary_node_MC() { assert_abs_diff_eq!(3.0, rst[&s0], epsilon = 1e-2); assert_abs_diff_eq!(3.0, rst[&s1], epsilon = 1e-2); + + let mc = MonteCarloRward::new(100, 10.0, RewardCriteria::FiniteHorizon, Some(215)); + assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2); + assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2); + + } #[test] @@ -107,7 +113,7 @@ fn simple_factored_reward_function_chain_MC() { params::StateType::Discrete(0), ]; - let mc = MonteCarloDiscountedRward::new(1000, 10.0, 1.0, Some(215)); + let mc = MonteCarloRward::new(1000, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); assert_abs_diff_eq!(2.447, mc.evaluate_state(&net, &rf, &s000), epsilon = 1e-1); let rst = mc.evaluate_state_space(&net, &rf); From 414aa3186711b36613bd8980c6860435fac12257 Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Mon, 5 Dec 2022 09:21:37 +0100 Subject: [PATCH 05/10] Bugfix --- reCTBN/src/parameter_learning.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/reCTBN/src/parameter_learning.rs b/reCTBN/src/parameter_learning.rs index 2aa518c..3f505f9 100644 --- a/reCTBN/src/parameter_learning.rs +++ b/reCTBN/src/parameter_learning.rs @@ -144,6 +144,12 @@ impl ParameterLearning for BayesianApproach { .zip(M.mapv(|x| x as f64).axis_iter(Axis(2))) .for_each(|(mut C, m)| C.assign(&(&m.mapv(|y| y + alpha) / &T.mapv(|y| y + tau)))); + + CIM.outer_iter_mut() + .for_each(|mut C| { + C.diag_mut().fill(0.0); + }); + //Set the diagonal of the inner matrices to the the row sum multiplied by -1 let tmp_diag_sum: Array2 = CIM.sum_axis(Axis(2)).mapv(|x| x * -1.0); CIM.outer_iter_mut() From 9284ca5dd2facaa30a7439ca9e9f00d2778479a4 Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Mon, 5 Dec 2022 15:24:32 +0100 Subject: [PATCH 06/10] Implemanted NeighborhoodRelativeReward --- reCTBN/src/reward/reward_evaluation.rs | 54 +++++++++++++++++++++++--- reCTBN/tests/reward_evaluation.rs | 6 +-- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/reCTBN/src/reward/reward_evaluation.rs b/reCTBN/src/reward/reward_evaluation.rs index 9dcb6d2..cb7b8f1 100644 --- a/reCTBN/src/reward/reward_evaluation.rs +++ b/reCTBN/src/reward/reward_evaluation.rs @@ -14,21 +14,21 @@ pub enum RewardCriteria { InfiniteHorizon {discount_factor: f64}, } -pub struct MonteCarloRward { +pub struct MonteCarloReward { n_iterations: usize, end_time: f64, reward_criteria: RewardCriteria, seed: Option, } -impl MonteCarloRward { +impl MonteCarloReward { pub fn new( n_iterations: usize, end_time: f64, reward_criteria: RewardCriteria, seed: Option, - ) -> MonteCarloRward { - MonteCarloRward { + ) -> MonteCarloReward { + MonteCarloReward { n_iterations, end_time, reward_criteria, @@ -37,7 +37,7 @@ impl MonteCarloRward { } } -impl RewardEvaluation for MonteCarloRward { +impl RewardEvaluation for MonteCarloReward { fn evaluate_state_space( &self, network_process: &N, @@ -123,3 +123,47 @@ impl RewardEvaluation for MonteCarloRward { ret / self.n_iterations as f64 } } + +pub struct NeighborhoodRelativeReward { + inner_reward: RE +} + +impl NeighborhoodRelativeReward{ + pub fn new(inner_reward: RE) -> NeighborhoodRelativeReward{ + NeighborhoodRelativeReward {inner_reward} + } +} + +impl RewardEvaluation for NeighborhoodRelativeReward { + fn evaluate_state_space( + &self, + network_process: &N, + reward_function: &R, + ) -> HashMap { + + let absolute_reward = self.inner_reward.evaluate_state_space(network_process, reward_function); + + //This approach optimize memory. Maybe optimizing execution time can be better. + absolute_reward.iter().map(|(k1, v1)| { + let mut max_val:f64 = 1.0; + absolute_reward.iter().for_each(|(k2,v2)| { + let count_diff:usize = k1.iter().zip(k2.iter()).map(|(s1, s2)| if s1 == s2 {0} else {1}).sum(); + if count_diff < 2 { + max_val = max_val.max(v1/v2); + } + + }); + (k1.clone(), max_val) + }).collect() + + } + + fn evaluate_state( + &self, + _network_process: &N, + _reward_function: &R, + _state: &process::NetworkProcessState, + ) -> f64 { + unimplemented!(); + } +} diff --git a/reCTBN/tests/reward_evaluation.rs b/reCTBN/tests/reward_evaluation.rs index b2cfd29..63e9c98 100644 --- a/reCTBN/tests/reward_evaluation.rs +++ b/reCTBN/tests/reward_evaluation.rs @@ -33,7 +33,7 @@ fn simple_factored_reward_function_binary_node_MC() { let s0: NetworkProcessState = vec![params::StateType::Discrete(0)]; let s1: NetworkProcessState = vec![params::StateType::Discrete(1)]; - let mc = MonteCarloRward::new(100, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); + let mc = MonteCarloReward::new(100, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2); assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2); @@ -42,7 +42,7 @@ fn simple_factored_reward_function_binary_node_MC() { assert_abs_diff_eq!(3.0, rst[&s1], epsilon = 1e-2); - let mc = MonteCarloRward::new(100, 10.0, RewardCriteria::FiniteHorizon, Some(215)); + let mc = MonteCarloReward::new(100, 10.0, RewardCriteria::FiniteHorizon, Some(215)); assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2); assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2); @@ -113,7 +113,7 @@ fn simple_factored_reward_function_chain_MC() { params::StateType::Discrete(0), ]; - let mc = MonteCarloRward::new(1000, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); + let mc = MonteCarloReward::new(1000, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); assert_abs_diff_eq!(2.447, mc.evaluate_state(&net, &rf, &s000), epsilon = 1e-1); let rst = mc.evaluate_state_space(&net, &rf); From ff235b4b7735e3796720a018843f061bbf39bd0c Mon Sep 17 00:00:00 2001 From: Alessandro Bregoli Date: Sat, 14 Jan 2023 14:20:37 +0100 Subject: [PATCH 07/10] Parallelize score based strucutre learning --- .../structure_learning/score_based_algorithm.rs | 16 ++++++++++++---- reCTBN/src/structure_learning/score_function.rs | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/reCTBN/src/structure_learning/score_based_algorithm.rs b/reCTBN/src/structure_learning/score_based_algorithm.rs index d65ea88..6850027 100644 --- a/reCTBN/src/structure_learning/score_based_algorithm.rs +++ b/reCTBN/src/structure_learning/score_based_algorithm.rs @@ -6,6 +6,9 @@ use crate::structure_learning::score_function::ScoreFunction; use crate::structure_learning::StructureLearningAlgorithm; use crate::{process, tools::Dataset}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use rayon::prelude::ParallelExtend; + pub struct HillClimbing { score_function: S, max_parent_set: Option, @@ -36,8 +39,9 @@ impl StructureLearningAlgorithm for HillClimbing { let max_parent_set = self.max_parent_set.unwrap_or(net.get_number_of_nodes()); //Reset the adj matrix net.initialize_adj_matrix(); + let mut learned_parent_sets: Vec<(usize, BTreeSet)> = vec![]; //Iterate over each node to learn their parent set. - for node in net.get_node_indices() { + learned_parent_sets.par_extend(net.get_node_indices().into_par_iter().map(|node| { //Initialize an empty parent set. let mut parent_set: BTreeSet = BTreeSet::new(); //Compute the score for the empty parent set @@ -76,10 +80,14 @@ impl StructureLearningAlgorithm for HillClimbing { } } } - //Apply the learned parent_set to the network struct. - parent_set.iter().for_each(|p| net.add_edge(*p, node)); + (node, parent_set) + })); + + for (child_node, candidate_parent_set) in learned_parent_sets { + for parent_node in candidate_parent_set.iter() { + net.add_edge(*parent_node, child_node); + } } - return net; } } diff --git a/reCTBN/src/structure_learning/score_function.rs b/reCTBN/src/structure_learning/score_function.rs index f8b38b5..5a56594 100644 --- a/reCTBN/src/structure_learning/score_function.rs +++ b/reCTBN/src/structure_learning/score_function.rs @@ -7,7 +7,7 @@ use statrs::function::gamma; use crate::{parameter_learning, params, process, tools}; -pub trait ScoreFunction { +pub trait ScoreFunction: Sync { fn call( &self, net: &T, From 5d676be18033aee322142acef9e7d439e499071f Mon Sep 17 00:00:00 2001 From: Alessandro Bregoli Date: Mon, 16 Jan 2023 06:50:24 +0100 Subject: [PATCH 08/10] parallelized re --- reCTBN/src/reward.rs | 3 +-- reCTBN/src/reward/reward_evaluation.rs | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/reCTBN/src/reward.rs b/reCTBN/src/reward.rs index b34db7f..f0edf2f 100644 --- a/reCTBN/src/reward.rs +++ b/reCTBN/src/reward.rs @@ -4,7 +4,6 @@ pub mod reward_evaluation; use std::collections::HashMap; use crate::process; -use ndarray; /// Instantiation of reward function and instantaneous reward /// @@ -22,7 +21,7 @@ pub struct Reward { /// The trait RewardFunction describe the methods that all the reward functions must satisfy -pub trait RewardFunction { +pub trait RewardFunction: Sync { /// Given the current state and the previous state, it compute the reward. /// /// # Arguments diff --git a/reCTBN/src/reward/reward_evaluation.rs b/reCTBN/src/reward/reward_evaluation.rs index cb7b8f1..431efde 100644 --- a/reCTBN/src/reward/reward_evaluation.rs +++ b/reCTBN/src/reward/reward_evaluation.rs @@ -1,8 +1,11 @@ use std::collections::HashMap; +use rayon::prelude::{IntoParallelIterator, ParallelIterator}; + use crate::params::{self, ParamsTrait}; use crate::process; + use crate::{ process::NetworkProcessState, reward::RewardEvaluation, @@ -55,7 +58,7 @@ impl RewardEvaluation for MonteCarloReward { let n_states: usize = variables_domain.iter().map(|x| x.len()).product(); - (0..n_states) + (0..n_states).into_par_iter() .map(|s| { let state: process::NetworkProcessState = variables_domain .iter() From adb0f99419fb6dd2920d385f069e61f61052a8ed Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Tue, 7 Feb 2023 14:34:58 +0100 Subject: [PATCH 09/10] Added automatic stopping for MonteCarloReward --- reCTBN/src/parameter_learning.rs | 8 +- reCTBN/src/reward.rs | 2 +- reCTBN/src/reward/reward_evaluation.rs | 97 +++++++++++++------ .../score_based_algorithm.rs | 2 +- reCTBN/tests/reward_evaluation.rs | 6 +- 5 files changed, 73 insertions(+), 42 deletions(-) diff --git a/reCTBN/src/parameter_learning.rs b/reCTBN/src/parameter_learning.rs index 7e45e58..3c34d06 100644 --- a/reCTBN/src/parameter_learning.rs +++ b/reCTBN/src/parameter_learning.rs @@ -144,11 +144,9 @@ impl ParameterLearning for BayesianApproach { .zip(M.mapv(|x| x as f64).axis_iter(Axis(2))) .for_each(|(mut C, m)| C.assign(&(&m.mapv(|y| y + alpha) / &T.mapv(|y| y + tau)))); - - CIM.outer_iter_mut() - .for_each(|mut C| { - C.diag_mut().fill(0.0); - }); + CIM.outer_iter_mut().for_each(|mut C| { + C.diag_mut().fill(0.0); + }); //Set the diagonal of the inner matrices to the the row sum multiplied by -1 let tmp_diag_sum: Array2 = CIM.sum_axis(Axis(2)).mapv(|x| x * -1.0); diff --git a/reCTBN/src/reward.rs b/reCTBN/src/reward.rs index f0edf2f..910954c 100644 --- a/reCTBN/src/reward.rs +++ b/reCTBN/src/reward.rs @@ -1,5 +1,5 @@ -pub mod reward_function; pub mod reward_evaluation; +pub mod reward_function; use std::collections::HashMap; diff --git a/reCTBN/src/reward/reward_evaluation.rs b/reCTBN/src/reward/reward_evaluation.rs index 431efde..3802489 100644 --- a/reCTBN/src/reward/reward_evaluation.rs +++ b/reCTBN/src/reward/reward_evaluation.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; use rayon::prelude::{IntoParallelIterator, ParallelIterator}; +use statrs::distribution::ContinuousCDF; use crate::params::{self, ParamsTrait}; use crate::process; - use crate::{ process::NetworkProcessState, reward::RewardEvaluation, @@ -14,11 +14,13 @@ use crate::{ pub enum RewardCriteria { FiniteHorizon, - InfiniteHorizon {discount_factor: f64}, + InfiniteHorizon { discount_factor: f64 }, } pub struct MonteCarloReward { - n_iterations: usize, + max_iterations: usize, + max_err_stop: f64, + alpha_stop: f64, end_time: f64, reward_criteria: RewardCriteria, seed: Option, @@ -26,13 +28,17 @@ pub struct MonteCarloReward { impl MonteCarloReward { pub fn new( - n_iterations: usize, + max_iterations: usize, + max_err_stop: f64, + alpha_stop: f64, end_time: f64, reward_criteria: RewardCriteria, seed: Option, ) -> MonteCarloReward { MonteCarloReward { - n_iterations, + max_iterations, + max_err_stop, + alpha_stop, end_time, reward_criteria, seed, @@ -58,7 +64,8 @@ impl RewardEvaluation for MonteCarloReward { let n_states: usize = variables_domain.iter().map(|x| x.len()).product(); - (0..n_states).into_par_iter() + (0..n_states) + .into_par_iter() .map(|s| { let state: process::NetworkProcessState = variables_domain .iter() @@ -85,10 +92,13 @@ impl RewardEvaluation for MonteCarloReward { ) -> f64 { let mut sampler = ForwardSampler::new(network_process, self.seed.clone(), Some(state.clone())); - let mut ret = 0.0; + let mut expected_value = 0.0; + let mut squared_expected_value = 0.0; + let normal = statrs::distribution::Normal::new(0.0, 1.0).unwrap(); - for _i in 0..self.n_iterations { + for i in 0..self.max_iterations { sampler.reset(); + let mut ret = 0.0; let mut previous = sampler.next().unwrap(); while previous.t < self.end_time { let current = sampler.next().unwrap(); @@ -96,7 +106,7 @@ impl RewardEvaluation for MonteCarloReward { let r = reward_function.call(&previous.state, None); let discount = match self.reward_criteria { RewardCriteria::FiniteHorizon => self.end_time - previous.t, - RewardCriteria::InfiniteHorizon {discount_factor} => { + RewardCriteria::InfiniteHorizon { discount_factor } => { std::f64::consts::E.powf(-discount_factor * previous.t) - std::f64::consts::E.powf(-discount_factor * self.end_time) } @@ -105,8 +115,8 @@ impl RewardEvaluation for MonteCarloReward { } else { let r = reward_function.call(&previous.state, Some(¤t.state)); let discount = match self.reward_criteria { - RewardCriteria::FiniteHorizon => current.t-previous.t, - RewardCriteria::InfiniteHorizon {discount_factor} => { + RewardCriteria::FiniteHorizon => current.t - previous.t, + RewardCriteria::InfiniteHorizon { discount_factor } => { std::f64::consts::E.powf(-discount_factor * previous.t) - std::f64::consts::E.powf(-discount_factor * current.t) } @@ -114,51 +124,74 @@ impl RewardEvaluation for MonteCarloReward { ret += discount * r.instantaneous_reward; ret += match self.reward_criteria { RewardCriteria::FiniteHorizon => 1.0, - RewardCriteria::InfiniteHorizon {discount_factor} => { + RewardCriteria::InfiniteHorizon { discount_factor } => { std::f64::consts::E.powf(-discount_factor * current.t) } } * r.transition_reward; } previous = current; } + + let float_i = i as f64; + expected_value = + expected_value * float_i as f64 / (float_i + 1.0) + ret / (float_i + 1.0); + squared_expected_value = squared_expected_value * float_i as f64 / (float_i + 1.0) + + ret.powi(2) / (float_i + 1.0); + + if i > 2 { + let var = + (float_i + 1.0) / float_i * (squared_expected_value - expected_value.powi(2)); + if self.alpha_stop + - 2.0 * normal.cdf(-(float_i + 1.0).sqrt() * self.max_err_stop / var.sqrt()) + > 0.0 + { + return expected_value; + } + } } - ret / self.n_iterations as f64 + expected_value } } pub struct NeighborhoodRelativeReward { - inner_reward: RE + inner_reward: RE, } -impl NeighborhoodRelativeReward{ - pub fn new(inner_reward: RE) -> NeighborhoodRelativeReward{ - NeighborhoodRelativeReward {inner_reward} +impl NeighborhoodRelativeReward { + pub fn new(inner_reward: RE) -> NeighborhoodRelativeReward { + NeighborhoodRelativeReward { inner_reward } } } -impl RewardEvaluation for NeighborhoodRelativeReward { +impl RewardEvaluation for NeighborhoodRelativeReward { fn evaluate_state_space( &self, network_process: &N, reward_function: &R, ) -> HashMap { + let absolute_reward = self + .inner_reward + .evaluate_state_space(network_process, reward_function); - let absolute_reward = self.inner_reward.evaluate_state_space(network_process, reward_function); - //This approach optimize memory. Maybe optimizing execution time can be better. - absolute_reward.iter().map(|(k1, v1)| { - let mut max_val:f64 = 1.0; - absolute_reward.iter().for_each(|(k2,v2)| { - let count_diff:usize = k1.iter().zip(k2.iter()).map(|(s1, s2)| if s1 == s2 {0} else {1}).sum(); - if count_diff < 2 { - max_val = max_val.max(v1/v2); - } - - }); - (k1.clone(), max_val) - }).collect() - + absolute_reward + .iter() + .map(|(k1, v1)| { + let mut max_val: f64 = 1.0; + absolute_reward.iter().for_each(|(k2, v2)| { + let count_diff: usize = k1 + .iter() + .zip(k2.iter()) + .map(|(s1, s2)| if s1 == s2 { 0 } else { 1 }) + .sum(); + if count_diff < 2 { + max_val = max_val.max(v1 / v2); + } + }); + (k1.clone(), max_val) + }) + .collect() } fn evaluate_state( diff --git a/reCTBN/src/structure_learning/score_based_algorithm.rs b/reCTBN/src/structure_learning/score_based_algorithm.rs index 6850027..9173b86 100644 --- a/reCTBN/src/structure_learning/score_based_algorithm.rs +++ b/reCTBN/src/structure_learning/score_based_algorithm.rs @@ -82,7 +82,7 @@ impl StructureLearningAlgorithm for HillClimbing { } (node, parent_set) })); - + for (child_node, candidate_parent_set) in learned_parent_sets { for parent_node in candidate_parent_set.iter() { net.add_edge(*parent_node, child_node); diff --git a/reCTBN/tests/reward_evaluation.rs b/reCTBN/tests/reward_evaluation.rs index 63e9c98..c0372b1 100644 --- a/reCTBN/tests/reward_evaluation.rs +++ b/reCTBN/tests/reward_evaluation.rs @@ -33,7 +33,7 @@ fn simple_factored_reward_function_binary_node_MC() { let s0: NetworkProcessState = vec![params::StateType::Discrete(0)]; let s1: NetworkProcessState = vec![params::StateType::Discrete(1)]; - let mc = MonteCarloReward::new(100, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); + let mc = MonteCarloReward::new(10000, 1e-1, 1e-1, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2); assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2); @@ -42,7 +42,7 @@ fn simple_factored_reward_function_binary_node_MC() { assert_abs_diff_eq!(3.0, rst[&s1], epsilon = 1e-2); - let mc = MonteCarloReward::new(100, 10.0, RewardCriteria::FiniteHorizon, Some(215)); + let mc = MonteCarloReward::new(10000, 1e-1, 1e-1, 10.0, RewardCriteria::FiniteHorizon, Some(215)); assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2); assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2); @@ -113,7 +113,7 @@ fn simple_factored_reward_function_chain_MC() { params::StateType::Discrete(0), ]; - let mc = MonteCarloReward::new(1000, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); + let mc = MonteCarloReward::new(10000, 1e-1, 1e-1, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215)); assert_abs_diff_eq!(2.447, mc.evaluate_state(&net, &rf, &s000), epsilon = 1e-1); let rst = mc.evaluate_state_space(&net, &rf); From 776b9aa030fb25ecce5216ad64abad6f8762c9ba Mon Sep 17 00:00:00 2001 From: AlessandroBregoli Date: Tue, 7 Feb 2023 16:34:57 +0100 Subject: [PATCH 10/10] clippy error solved --- reCTBN/tests/reward_evaluation.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/reCTBN/tests/reward_evaluation.rs b/reCTBN/tests/reward_evaluation.rs index c0372b1..355341c 100644 --- a/reCTBN/tests/reward_evaluation.rs +++ b/reCTBN/tests/reward_evaluation.rs @@ -1,6 +1,6 @@ mod utils; -use approx::{abs_diff_eq, assert_abs_diff_eq}; +use approx::assert_abs_diff_eq; use ndarray::*; use reCTBN::{ params, @@ -10,7 +10,7 @@ use reCTBN::{ use utils::generate_discrete_time_continous_node; #[test] -fn simple_factored_reward_function_binary_node_MC() { +fn simple_factored_reward_function_binary_node_mc() { let mut net = CtbnNetwork::new(); let n1 = net .add_node(generate_discrete_time_continous_node(String::from("n1"), 2)) @@ -50,7 +50,7 @@ fn simple_factored_reward_function_binary_node_MC() { } #[test] -fn simple_factored_reward_function_chain_MC() { +fn simple_factored_reward_function_chain_mc() { let mut net = CtbnNetwork::new(); let n1 = net .add_node(generate_discrete_time_continous_node(String::from("n1"), 2))