|
|
|
@ -1,11 +1,11 @@ |
|
|
|
|
use std::collections::HashMap; |
|
|
|
|
|
|
|
|
|
use rayon::prelude::{IntoParallelIterator, ParallelIterator}; |
|
|
|
|
use statrs::distribution::ContinuousCDF; |
|
|
|
|
|
|
|
|
|
use crate::params::{self, ParamsTrait}; |
|
|
|
|
use crate::process; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use crate::{ |
|
|
|
|
process::NetworkProcessState, |
|
|
|
|
reward::RewardEvaluation, |
|
|
|
@ -18,7 +18,9 @@ pub enum RewardCriteria { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
pub struct MonteCarloReward { |
|
|
|
|
n_iterations: usize, |
|
|
|
|
max_iterations: usize, |
|
|
|
|
max_err_stop: f64, |
|
|
|
|
alpha_stop: f64, |
|
|
|
|
end_time: f64, |
|
|
|
|
reward_criteria: RewardCriteria, |
|
|
|
|
seed: Option<u64>, |
|
|
|
@ -26,13 +28,17 @@ pub struct MonteCarloReward { |
|
|
|
|
|
|
|
|
|
impl MonteCarloReward { |
|
|
|
|
pub fn new( |
|
|
|
|
n_iterations: usize, |
|
|
|
|
max_iterations: usize, |
|
|
|
|
max_err_stop: f64, |
|
|
|
|
alpha_stop: f64, |
|
|
|
|
end_time: f64, |
|
|
|
|
reward_criteria: RewardCriteria, |
|
|
|
|
seed: Option<u64>, |
|
|
|
|
) -> MonteCarloReward { |
|
|
|
|
MonteCarloReward { |
|
|
|
|
n_iterations, |
|
|
|
|
max_iterations, |
|
|
|
|
max_err_stop, |
|
|
|
|
alpha_stop, |
|
|
|
|
end_time, |
|
|
|
|
reward_criteria, |
|
|
|
|
seed, |
|
|
|
@ -58,7 +64,8 @@ impl RewardEvaluation for MonteCarloReward { |
|
|
|
|
|
|
|
|
|
let n_states: usize = variables_domain.iter().map(|x| x.len()).product(); |
|
|
|
|
|
|
|
|
|
(0..n_states).into_par_iter() |
|
|
|
|
(0..n_states) |
|
|
|
|
.into_par_iter() |
|
|
|
|
.map(|s| { |
|
|
|
|
let state: process::NetworkProcessState = variables_domain |
|
|
|
|
.iter() |
|
|
|
@ -85,10 +92,13 @@ impl RewardEvaluation for MonteCarloReward { |
|
|
|
|
) -> f64 { |
|
|
|
|
let mut sampler = |
|
|
|
|
ForwardSampler::new(network_process, self.seed.clone(), Some(state.clone())); |
|
|
|
|
let mut ret = 0.0; |
|
|
|
|
let mut expected_value = 0.0; |
|
|
|
|
let mut squared_expected_value = 0.0; |
|
|
|
|
let normal = statrs::distribution::Normal::new(0.0, 1.0).unwrap(); |
|
|
|
|
|
|
|
|
|
for _i in 0..self.n_iterations { |
|
|
|
|
for i in 0..self.max_iterations { |
|
|
|
|
sampler.reset(); |
|
|
|
|
let mut ret = 0.0; |
|
|
|
|
let mut previous = sampler.next().unwrap(); |
|
|
|
|
while previous.t < self.end_time { |
|
|
|
|
let current = sampler.next().unwrap(); |
|
|
|
@ -121,14 +131,31 @@ impl RewardEvaluation for MonteCarloReward { |
|
|
|
|
} |
|
|
|
|
previous = current; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
let float_i = i as f64; |
|
|
|
|
expected_value = |
|
|
|
|
expected_value * float_i as f64 / (float_i + 1.0) + ret / (float_i + 1.0); |
|
|
|
|
squared_expected_value = squared_expected_value * float_i as f64 / (float_i + 1.0) |
|
|
|
|
+ ret.powi(2) / (float_i + 1.0); |
|
|
|
|
|
|
|
|
|
if i > 2 { |
|
|
|
|
let var = |
|
|
|
|
(float_i + 1.0) / float_i * (squared_expected_value - expected_value.powi(2)); |
|
|
|
|
if self.alpha_stop |
|
|
|
|
- 2.0 * normal.cdf(-(float_i + 1.0).sqrt() * self.max_err_stop / var.sqrt()) |
|
|
|
|
> 0.0 |
|
|
|
|
{ |
|
|
|
|
return expected_value; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ret / self.n_iterations as f64 |
|
|
|
|
expected_value |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
pub struct NeighborhoodRelativeReward<RE: RewardEvaluation> { |
|
|
|
|
inner_reward: RE |
|
|
|
|
inner_reward: RE, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
impl<RE: RewardEvaluation> NeighborhoodRelativeReward<RE> { |
|
|
|
@ -143,22 +170,28 @@ impl<RE:RewardEvaluation> RewardEvaluation for NeighborhoodRelativeReward<RE> { |
|
|
|
|
network_process: &N, |
|
|
|
|
reward_function: &R, |
|
|
|
|
) -> HashMap<process::NetworkProcessState, f64> { |
|
|
|
|
|
|
|
|
|
let absolute_reward = self.inner_reward.evaluate_state_space(network_process, reward_function); |
|
|
|
|
let absolute_reward = self |
|
|
|
|
.inner_reward |
|
|
|
|
.evaluate_state_space(network_process, reward_function); |
|
|
|
|
|
|
|
|
|
//This approach optimize memory. Maybe optimizing execution time can be better.
|
|
|
|
|
absolute_reward.iter().map(|(k1, v1)| { |
|
|
|
|
absolute_reward |
|
|
|
|
.iter() |
|
|
|
|
.map(|(k1, v1)| { |
|
|
|
|
let mut max_val: f64 = 1.0; |
|
|
|
|
absolute_reward.iter().for_each(|(k2, v2)| { |
|
|
|
|
let count_diff:usize = k1.iter().zip(k2.iter()).map(|(s1, s2)| if s1 == s2 {0} else {1}).sum(); |
|
|
|
|
let count_diff: usize = k1 |
|
|
|
|
.iter() |
|
|
|
|
.zip(k2.iter()) |
|
|
|
|
.map(|(s1, s2)| if s1 == s2 { 0 } else { 1 }) |
|
|
|
|
.sum(); |
|
|
|
|
if count_diff < 2 { |
|
|
|
|
max_val = max_val.max(v1 / v2); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
}); |
|
|
|
|
(k1.clone(), max_val) |
|
|
|
|
}).collect() |
|
|
|
|
|
|
|
|
|
}) |
|
|
|
|
.collect() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn evaluate_state<N: process::NetworkProcess, R: super::RewardFunction>( |
|
|
|
|