Added coments.

pull/42/head
AlessandroBregoli 3 years ago
parent b357c9efa0
commit 43d01d2bf8
  1. 46
      src/structure_learning/score_based_algorithm.rs
  2. 22
      src/structure_learning/score_function.rs
  3. 69
      src/tools.rs

@ -1,17 +1,20 @@
use crate::network;
use crate::structure_learning::score_function::ScoreFunction; use crate::structure_learning::score_function::ScoreFunction;
use crate::structure_learning::StructureLearningAlgorithm; use crate::structure_learning::StructureLearningAlgorithm;
use crate::tools; use crate::tools;
use crate::network;
use std::collections::BTreeSet; use std::collections::BTreeSet;
pub struct HillClimbing<S: ScoreFunction> { pub struct HillClimbing<S: ScoreFunction> {
score_function: S, score_function: S,
max_parent_set: Option<usize> max_parent_set: Option<usize>,
} }
impl<S: ScoreFunction> HillClimbing<S> { impl<S: ScoreFunction> HillClimbing<S> {
pub fn init(score_function: S, max_parent_set: Option<usize>) -> HillClimbing<S> { pub fn init(score_function: S, max_parent_set: Option<usize>) -> HillClimbing<S> {
HillClimbing { score_function, max_parent_set } HillClimbing {
score_function,
max_parent_set,
}
} }
} }
@ -20,41 +23,58 @@ impl<S: ScoreFunction> StructureLearningAlgorithm for HillClimbing<S> {
where where
T: network::Network, T: network::Network,
{ {
//Check the coherence between dataset and network
if net.get_number_of_nodes() != dataset.get_trajectories()[0].get_events().shape()[1] { if net.get_number_of_nodes() != dataset.get_trajectories()[0].get_events().shape()[1] {
panic!("Dataset and Network must have the same number of variables.") panic!("Dataset and Network must have the same number of variables.")
} }
//Make the network mutable.
let mut net = net; let mut net = net;
//Check if the max_parent_set constraint is present.
let max_parent_set = self.max_parent_set.unwrap_or(net.get_number_of_nodes()); let max_parent_set = self.max_parent_set.unwrap_or(net.get_number_of_nodes());
//Reset the adj matrix
net.initialize_adj_matrix(); net.initialize_adj_matrix();
//Iterate over each node to learn their parent set.
for node in net.get_node_indices() { for node in net.get_node_indices() {
//Initialize an empty parent set.
let mut parent_set: BTreeSet<usize> = BTreeSet::new(); let mut parent_set: BTreeSet<usize> = BTreeSet::new();
let mut current_ll = self.score_function.call(&net, node, &parent_set, dataset); //Compute the score for the empty parent set
let mut old_ll = f64::NEG_INFINITY; let mut current_score = self.score_function.call(&net, node, &parent_set, dataset);
while current_ll > old_ll { //Set the old score to -\infty.
old_ll = current_ll; let mut old_score = f64::NEG_INFINITY;
//Iterate until convergence
while current_score > old_score {
//Save the current_score.
old_score = current_score;
//Iterate over each node.
for parent in net.get_node_indices() { for parent in net.get_node_indices() {
//Continue if the parent and the node are the same.
if parent == node { if parent == node {
continue; continue;
} }
//Try to remove parent from the parent_set.
let is_removed = parent_set.remove(&parent); let is_removed = parent_set.remove(&parent);
//If parent was not in the parent_set add it.
if !is_removed && parent_set.len() < max_parent_set { if !is_removed && parent_set.len() < max_parent_set {
parent_set.insert(parent); parent_set.insert(parent);
} }
//Compute the score with the modified parent_set.
let tmp_ll = self.score_function.call(&net, node, &parent_set, dataset); let tmp_score = self.score_function.call(&net, node, &parent_set, dataset);
//If tmp_score is worst than current_score revert the change to the parent set
if tmp_ll < current_ll { if tmp_score < current_score {
if is_removed { if is_removed {
parent_set.insert(parent); parent_set.insert(parent);
} else { } else {
parent_set.remove(&parent); parent_set.remove(&parent);
} }
} else { }
current_ll = tmp_ll; //Otherwise save the computed score as current_score
else {
current_score = tmp_score;
} }
} }
} }
//Apply the learned parent_set to the network struct.
parent_set.iter().for_each(|p| net.add_edge(*p, node)); parent_set.iter().for_each(|p| net.add_edge(*p, node));
} }

@ -25,6 +25,8 @@ pub struct LogLikelihood {
impl LogLikelihood { impl LogLikelihood {
pub fn init(alpha: usize, tau: f64) -> LogLikelihood { pub fn init(alpha: usize, tau: f64) -> LogLikelihood {
//Tau must be >=0.0
if tau < 0.0 { if tau < 0.0 {
panic!("tau must be >=0.0"); panic!("tau must be >=0.0");
} }
@ -40,14 +42,21 @@ impl LogLikelihood {
) -> (f64, Array3<usize>) ) -> (f64, Array3<usize>)
where where
T: network::Network, T: network::Network,
{ {
//Identify the type of node used
match &net.get_node(node).params { match &net.get_node(node).params {
params::Params::DiscreteStatesContinousTime(params) => { params::Params::DiscreteStatesContinousTime(_params) => {
//Compute the sufficient statistics M (number of transistions) and T (residence
//time)
let (M, T) = let (M, T) =
parameter_learning::sufficient_statistics(net, dataset, node, parent_set); parameter_learning::sufficient_statistics(net, dataset, node, parent_set);
//Scale alpha accordingly to the size of the parent set
let alpha = self.alpha as f64 / M.shape()[0] as f64; let alpha = self.alpha as f64 / M.shape()[0] as f64;
//Scale tau accordingly to the size of the parent set
let tau = self.tau / M.shape()[0] as f64; let tau = self.tau / M.shape()[0] as f64;
//Compute the log likelihood for q
let log_ll_q:f64 = M let log_ll_q:f64 = M
.sum_axis(Axis(2)) .sum_axis(Axis(2))
.iter() .iter()
@ -59,7 +68,8 @@ impl LogLikelihood {
- (alpha + *m as f64 + 1.0) * f64::ln(tau + t) - (alpha + *m as f64 + 1.0) * f64::ln(tau + t)
}) })
.sum(); .sum();
//Compute the log likelihood for theta
let log_ll_theta: f64 = M.outer_iter() let log_ll_theta: f64 = M.outer_iter()
.map(|x| x.outer_iter() .map(|x| x.outer_iter()
.map(|y| gamma::ln_gamma(alpha) .map(|y| gamma::ln_gamma(alpha)
@ -113,10 +123,14 @@ impl ScoreFunction for BIC {
) -> f64 ) -> f64
where where
T: network::Network { T: network::Network {
//Compute the log-likelihood
let (ll, M) = self.ll.compute_score(net, node, parent_set, dataset); let (ll, M) = self.ll.compute_score(net, node, parent_set, dataset);
//Compute the number of parameters
let n_parameters = M.shape()[0] * M.shape()[1] * (M.shape()[2] - 1); let n_parameters = M.shape()[0] * M.shape()[1] * (M.shape()[2] - 1);
//TODO: Optimize this //TODO: Optimize this
//Compute the sample size
let sample_size: usize = dataset.get_trajectories().iter().map(|x| x.get_time().len() - 1).sum(); let sample_size: usize = dataset.get_trajectories().iter().map(|x| x.get_time().len() - 1).sum();
//Compute BIC
ll - f64::ln(sample_size as f64) / 2.0 * n_parameters as f64 ll - f64::ln(sample_size as f64) / 2.0 * n_parameters as f64
} }
} }

@ -13,12 +13,14 @@ pub struct Trajectory {
impl Trajectory { impl Trajectory {
pub fn init(time: Array1<f64>, events: Array2<usize>) -> Trajectory { pub fn init(time: Array1<f64>, events: Array2<usize>) -> Trajectory {
//Events and time are two part of the same trajectory. For this reason they must have the
//same number of sample.
if time.shape()[0] != events.shape()[0] { if time.shape()[0] != events.shape()[0] {
panic!("time.shape[0] must be equal to events.shape[0]"); panic!("time.shape[0] must be equal to events.shape[0]");
} }
Trajectory { time, events } Trajectory { time, events }
} }
pub fn get_time(&self) -> &Array1<f64> { pub fn get_time(&self) -> &Array1<f64> {
&self.time &self.time
} }
@ -34,6 +36,9 @@ pub struct Dataset {
impl Dataset { impl Dataset {
pub fn init(trajectories: Vec<Trajectory>) -> Dataset { pub fn init(trajectories: Vec<Trajectory>) -> Dataset {
//All the trajectories in the same dataset must represent the same process. For this reason
//each trajectory must represent the same number of variables.
if trajectories if trajectories
.iter() .iter()
.any(|x| trajectories[0].get_events().shape()[1] != x.get_events().shape()[1]) .any(|x| trajectories[0].get_events().shape()[1] != x.get_events().shape()[1])
@ -54,23 +59,38 @@ pub fn trajectory_generator<T: network::Network>(
t_end: f64, t_end: f64,
seed: Option<u64>, seed: Option<u64>,
) -> Dataset { ) -> Dataset {
//Tmp growing vector containing generated trajectories.
let mut trajectories: Vec<Trajectory> = Vec::new(); let mut trajectories: Vec<Trajectory> = Vec::new();
let seed = seed.unwrap_or_else(rand::random);
//Random Generator object
let mut rng = ChaCha8Rng::seed_from_u64(seed); let mut rng: ChaCha8Rng = match seed {
//If a seed is present use it to initialize the random generator.
let node_idx: Vec<_> = net.get_node_indices().collect(); Some(seed) => SeedableRng::seed_from_u64(seed),
//Otherwise create a new random generator using the method `from_entropy`
None => SeedableRng::from_entropy()
};
//Each iteration generate one trajectory
for _ in 0..n_trajectories { for _ in 0..n_trajectories {
//Current time of the sampling process
let mut t = 0.0; let mut t = 0.0;
//History of all the moments in which something changed
let mut time: Vec<f64> = Vec::new(); let mut time: Vec<f64> = Vec::new();
let mut events: Vec<Array1<usize>> = Vec::new(); //Configuration of the process variables at time t initialized with an uniform
let mut current_state: Vec<params::StateType> = node_idx //distribution.
.iter() let mut current_state: Vec<params::StateType> = net.get_node_indices()
.map(|x| net.get_node(*x).params.get_random_state_uniform(&mut rng)) .map(|x| net.get_node(x).params.get_random_state_uniform(&mut rng))
.collect(); .collect();
//History of all the configurations of the process variables.
let mut events: Vec<Array1<usize>> = Vec::new();
//Vector containing to time to the next transition for each variable.
let mut next_transitions: Vec<Option<f64>> = let mut next_transitions: Vec<Option<f64>> =
(0..node_idx.len()).map(|_| Option::None).collect(); net.get_node_indices().map(|_| Option::None).collect();
//Add the starting time for the trajectory.
time.push(t.clone());
//Add the starting configuration of the trajectory.
events.push( events.push(
current_state current_state
.iter() .iter()
@ -79,8 +99,9 @@ pub fn trajectory_generator<T: network::Network>(
}) })
.collect(), .collect(),
); );
time.push(t.clone()); //Generate new samples until ending time is reached.
while t < t_end { while t < t_end {
//Generate the next transition time for each uninitialized variable.
for (idx, val) in next_transitions.iter_mut().enumerate() { for (idx, val) in next_transitions.iter_mut().enumerate() {
if let None = val { if let None = val {
*val = Some( *val = Some(
@ -96,19 +117,24 @@ pub fn trajectory_generator<T: network::Network>(
); );
} }
} }
//Get the variable with the smallest transition time.
let next_node_transition = next_transitions let next_node_transition = next_transitions
.iter() .iter()
.enumerate() .enumerate()
.min_by(|x, y| x.1.unwrap().partial_cmp(&y.1.unwrap()).unwrap()) .min_by(|x, y| x.1.unwrap().partial_cmp(&y.1.unwrap()).unwrap())
.unwrap() .unwrap()
.0; .0;
//Check if the next transition take place after the ending time.
if next_transitions[next_node_transition].unwrap() > t_end { if next_transitions[next_node_transition].unwrap() > t_end {
break; break;
} }
//Get the time in which the next transition occurs.
t = next_transitions[next_node_transition].unwrap().clone(); t = next_transitions[next_node_transition].unwrap().clone();
//Add the transition time to next
time.push(t.clone()); time.push(t.clone());
//Compute the new state of the transitioning variable.
current_state[next_node_transition] = net current_state[next_node_transition] = net
.get_node(next_node_transition) .get_node(next_node_transition)
.params .params
@ -120,7 +146,8 @@ pub fn trajectory_generator<T: network::Network>(
&mut rng, &mut rng,
) )
.unwrap(); .unwrap();
//Add the new state to events
events.push(Array::from_vec( events.push(Array::from_vec(
current_state current_state
.iter() .iter()
@ -129,13 +156,16 @@ pub fn trajectory_generator<T: network::Network>(
}) })
.collect(), .collect(),
)); ));
//Reset the next transition time for the transitioning node.
next_transitions[next_node_transition] = None; next_transitions[next_node_transition] = None;
//Reset the next transition time for each child of the transitioning node.
for child in net.get_children_set(next_node_transition) { for child in net.get_children_set(next_node_transition) {
next_transitions[child] = None next_transitions[child] = None
} }
} }
//Add current_state as last state.
events.push( events.push(
current_state current_state
.iter() .iter()
@ -144,8 +174,10 @@ pub fn trajectory_generator<T: network::Network>(
}) })
.collect(), .collect(),
); );
//Add t_end as last time.
time.push(t_end.clone()); time.push(t_end.clone());
//Add the sampled trajectory to trajectories.
trajectories.push(Trajectory::init( trajectories.push(Trajectory::init(
Array::from_vec(time), Array::from_vec(time),
Array2::from_shape_vec( Array2::from_shape_vec(
@ -155,5 +187,6 @@ pub fn trajectory_generator<T: network::Network>(
.unwrap(), .unwrap(),
)); ));
} }
//Return a dataset object with the sampled trajectories.
Dataset::init(trajectories) Dataset::init(trajectories)
} }

Loading…
Cancel
Save