Implemanted NeighborhoodRelativeReward

pull/87/head
AlessandroBregoli 2 years ago
parent 414aa31867
commit 9284ca5dd2
  1. 54
      reCTBN/src/reward/reward_evaluation.rs
  2. 6
      reCTBN/tests/reward_evaluation.rs

@ -14,21 +14,21 @@ pub enum RewardCriteria {
InfiniteHorizon {discount_factor: f64},
}
pub struct MonteCarloRward {
pub struct MonteCarloReward {
n_iterations: usize,
end_time: f64,
reward_criteria: RewardCriteria,
seed: Option<u64>,
}
impl MonteCarloRward {
impl MonteCarloReward {
pub fn new(
n_iterations: usize,
end_time: f64,
reward_criteria: RewardCriteria,
seed: Option<u64>,
) -> MonteCarloRward {
MonteCarloRward {
) -> MonteCarloReward {
MonteCarloReward {
n_iterations,
end_time,
reward_criteria,
@ -37,7 +37,7 @@ impl MonteCarloRward {
}
}
impl RewardEvaluation for MonteCarloRward {
impl RewardEvaluation for MonteCarloReward {
fn evaluate_state_space<N: process::NetworkProcess, R: super::RewardFunction>(
&self,
network_process: &N,
@ -123,3 +123,47 @@ impl RewardEvaluation for MonteCarloRward {
ret / self.n_iterations as f64
}
}
pub struct NeighborhoodRelativeReward<RE: RewardEvaluation> {
inner_reward: RE
}
impl<RE: RewardEvaluation> NeighborhoodRelativeReward<RE>{
pub fn new(inner_reward: RE) -> NeighborhoodRelativeReward<RE>{
NeighborhoodRelativeReward {inner_reward}
}
}
impl<RE:RewardEvaluation> RewardEvaluation for NeighborhoodRelativeReward<RE> {
fn evaluate_state_space<N: process::NetworkProcess, R: super::RewardFunction>(
&self,
network_process: &N,
reward_function: &R,
) -> HashMap<process::NetworkProcessState, f64> {
let absolute_reward = self.inner_reward.evaluate_state_space(network_process, reward_function);
//This approach optimize memory. Maybe optimizing execution time can be better.
absolute_reward.iter().map(|(k1, v1)| {
let mut max_val:f64 = 1.0;
absolute_reward.iter().for_each(|(k2,v2)| {
let count_diff:usize = k1.iter().zip(k2.iter()).map(|(s1, s2)| if s1 == s2 {0} else {1}).sum();
if count_diff < 2 {
max_val = max_val.max(v1/v2);
}
});
(k1.clone(), max_val)
}).collect()
}
fn evaluate_state<N: process::NetworkProcess, R: super::RewardFunction>(
&self,
_network_process: &N,
_reward_function: &R,
_state: &process::NetworkProcessState,
) -> f64 {
unimplemented!();
}
}

@ -33,7 +33,7 @@ fn simple_factored_reward_function_binary_node_MC() {
let s0: NetworkProcessState = vec![params::StateType::Discrete(0)];
let s1: NetworkProcessState = vec![params::StateType::Discrete(1)];
let mc = MonteCarloRward::new(100, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215));
let mc = MonteCarloReward::new(100, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215));
assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2);
assert_abs_diff_eq!(3.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2);
@ -42,7 +42,7 @@ fn simple_factored_reward_function_binary_node_MC() {
assert_abs_diff_eq!(3.0, rst[&s1], epsilon = 1e-2);
let mc = MonteCarloRward::new(100, 10.0, RewardCriteria::FiniteHorizon, Some(215));
let mc = MonteCarloReward::new(100, 10.0, RewardCriteria::FiniteHorizon, Some(215));
assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s0), epsilon = 1e-2);
assert_abs_diff_eq!(30.0, mc.evaluate_state(&net, &rf, &s1), epsilon = 1e-2);
@ -113,7 +113,7 @@ fn simple_factored_reward_function_chain_MC() {
params::StateType::Discrete(0),
];
let mc = MonteCarloRward::new(1000, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215));
let mc = MonteCarloReward::new(1000, 10.0, RewardCriteria::InfiniteHorizon { discount_factor: 1.0 }, Some(215));
assert_abs_diff_eq!(2.447, mc.evaluate_state(&net, &rf, &s000), epsilon = 1e-1);
let rst = mc.evaluate_state_space(&net, &rf);

Loading…
Cancel
Save