|
|
|
@ -1,7 +1,12 @@ |
|
|
|
|
use std::collections::HashMap; |
|
|
|
|
|
|
|
|
|
use crate::params::{self, ParamsTrait}; |
|
|
|
|
use crate::process; |
|
|
|
|
|
|
|
|
|
use crate::{ |
|
|
|
|
process::NetworkProcessState, |
|
|
|
|
reward::RewardEvaluation, |
|
|
|
|
sampling::{ForwardSampler, Sampler}, |
|
|
|
|
process::NetworkProcessState |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
pub struct MonteCarloDiscountedRward { |
|
|
|
@ -28,21 +33,42 @@ impl MonteCarloDiscountedRward { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
impl RewardEvaluation for MonteCarloDiscountedRward { |
|
|
|
|
fn call<N: crate::process::NetworkProcess, R: super::RewardFunction>( |
|
|
|
|
fn evaluate_state_space<N: process::NetworkProcess, R: super::RewardFunction>( |
|
|
|
|
&self, |
|
|
|
|
network_process: &N, |
|
|
|
|
reward_function: &R, |
|
|
|
|
) -> ndarray::Array1<f64> { |
|
|
|
|
todo!() |
|
|
|
|
) -> HashMap<process::NetworkProcessState, f64> { |
|
|
|
|
let variables_domain: Vec<Vec<params::StateType>> = network_process |
|
|
|
|
.get_node_indices() |
|
|
|
|
.map(|x| match network_process.get_node(x) { |
|
|
|
|
params::Params::DiscreteStatesContinousTime(x) =>
|
|
|
|
|
(0..x.get_reserved_space_as_parent()).map(|s| params::StateType::Discrete(s)).collect() |
|
|
|
|
}).collect(); |
|
|
|
|
|
|
|
|
|
let n_states:usize = variables_domain.iter().map(|x| x.len()).product(); |
|
|
|
|
|
|
|
|
|
(0..n_states).map(|s| { |
|
|
|
|
let state: process::NetworkProcessState = variables_domain.iter().fold((s, vec![]), |acc, x| { |
|
|
|
|
let mut acc = acc; |
|
|
|
|
let idx_s = acc.0%x.len(); |
|
|
|
|
acc.1.push(x[idx_s].clone()); |
|
|
|
|
acc.0 = acc.0 / x.len(); |
|
|
|
|
acc |
|
|
|
|
}).1; |
|
|
|
|
|
|
|
|
|
let r = self.evaluate_state(network_process, reward_function, &state); |
|
|
|
|
(state, r) |
|
|
|
|
}).collect() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn call_state<N: crate::process::NetworkProcess, R: super::RewardFunction>( |
|
|
|
|
fn evaluate_state<N: crate::process::NetworkProcess, R: super::RewardFunction>( |
|
|
|
|
&self, |
|
|
|
|
network_process: &N, |
|
|
|
|
reward_function: &R, |
|
|
|
|
state: &NetworkProcessState, |
|
|
|
|
) -> f64 { |
|
|
|
|
let mut sampler = ForwardSampler::new(network_process, self.seed.clone(), Some(state.clone())); |
|
|
|
|
let mut sampler = |
|
|
|
|
ForwardSampler::new(network_process, self.seed.clone(), Some(state.clone())); |
|
|
|
|
let mut ret = 0.0; |
|
|
|
|
|
|
|
|
|
for _i in 0..self.n_iterations { |
|
|
|
@ -60,7 +86,8 @@ impl RewardEvaluation for MonteCarloDiscountedRward { |
|
|
|
|
let discount = std::f64::consts::E.powf(-self.discount_factor * previous.t) |
|
|
|
|
- std::f64::consts::E.powf(-self.discount_factor * current.t); |
|
|
|
|
ret += discount * r.instantaneous_reward; |
|
|
|
|
ret += std::f64::consts::E.powf(-self.discount_factor * current.t) * r.transition_reward; |
|
|
|
|
ret += std::f64::consts::E.powf(-self.discount_factor * current.t) |
|
|
|
|
* r.transition_reward; |
|
|
|
|
} |
|
|
|
|
previous = current; |
|
|
|
|
} |
|
|
|
|