Added coments.

3 years ago · 43d01d2bf8
parent b357c9efa0
commit 43d01d2bf8
3 changed files with 102 additions and 35 deletions
--- a/src/structure_learning/score_based_algorithm.rs
+++ b/src/structure_learning/score_based_algorithm.rs
@ -1,17 +1,20 @@
+use crate::network;
 use crate::structure_learning::score_function::ScoreFunction;
 use crate::structure_learning::StructureLearningAlgorithm;
 use crate::tools;
-use crate::network;
 use std::collections::BTreeSet;

 pub struct HillClimbing<S: ScoreFunction> {
    score_function: S,
-    max_parent_set: Option<usize>
+    max_parent_set: Option<usize>,
 }

 impl<S: ScoreFunction> HillClimbing<S> {
    pub fn init(score_function: S, max_parent_set: Option<usize>) -> HillClimbing<S> {
-        HillClimbing { score_function, max_parent_set }
+        HillClimbing {
+            score_function,
+            max_parent_set,
+        }
    }
 }

@ -20,41 +23,58 @@ impl<S: ScoreFunction> StructureLearningAlgorithm for HillClimbing<S> {
    where
        T: network::Network,
    {
+        //Check the coherence between dataset and network
        if net.get_number_of_nodes() != dataset.get_trajectories()[0].get_events().shape()[1] {
            panic!("Dataset and Network must have the same number of variables.")
        }

+        //Make the network mutable.
        let mut net = net;
+        //Check if the max_parent_set constraint is present.
        let max_parent_set = self.max_parent_set.unwrap_or(net.get_number_of_nodes());
+        //Reset the adj matrix
        net.initialize_adj_matrix();
+        //Iterate over each node to learn their parent set.
        for node in net.get_node_indices() {
+            //Initialize an empty parent set.
            let mut parent_set: BTreeSet<usize> = BTreeSet::new();
-            let mut current_ll = self.score_function.call(&net, node, &parent_set, dataset);
-            let mut old_ll = f64::NEG_INFINITY;
-            while current_ll > old_ll {
-                old_ll = current_ll;
+            //Compute the score for the empty parent set
+            let mut current_score = self.score_function.call(&net, node, &parent_set, dataset);
+            //Set the old score to -\infty.
+            let mut old_score = f64::NEG_INFINITY;
+            //Iterate until convergence
+            while current_score > old_score {
+                //Save the current_score.
+                old_score = current_score;
+                //Iterate over each node.
                for parent in net.get_node_indices() {
+                    //Continue if the parent and the node are the same.
                    if parent == node {
                        continue;
                    }
+                    //Try to remove parent from the parent_set.
                    let is_removed = parent_set.remove(&parent);
+                    //If parent was not in the parent_set add it.
                    if !is_removed && parent_set.len() < max_parent_set {
                        parent_set.insert(parent);
                    }
-
-                    let tmp_ll = self.score_function.call(&net, node, &parent_set, dataset);
-
-                    if tmp_ll < current_ll {
+                    //Compute the score with the modified parent_set.
+                    let tmp_score = self.score_function.call(&net, node, &parent_set, dataset);
+                    //If tmp_score is worst than current_score revert the change to the parent set
+                    if tmp_score < current_score {
                        if is_removed {
                            parent_set.insert(parent);
                        } else {
                            parent_set.remove(&parent);
                        }
-                    } else {
-                        current_ll = tmp_ll;
+                    }
+                    //Otherwise save the computed score as current_score
+                    else {
+                        current_score = tmp_score;
                    }
                }
            }
+            //Apply the learned parent_set to the network struct.
            parent_set.iter().for_each(|p| net.add_edge(*p, node));
        }

--- a/src/structure_learning/score_function.rs
+++ b/src/structure_learning/score_function.rs
@ -25,6 +25,8 @@ pub struct LogLikelihood {

 impl LogLikelihood {
    pub fn init(alpha: usize, tau: f64) -> LogLikelihood {
+
+        //Tau must be >=0.0
        if tau < 0.0 {
            panic!("tau must be >=0.0");
        }
@ -40,14 +42,21 @@ impl LogLikelihood {
    ) -> (f64, Array3<usize>)
    where
        T: network::Network,
-    {
+    {   
+        //Identify the type of node used
        match &net.get_node(node).params {
-            params::Params::DiscreteStatesContinousTime(params) => {
+            params::Params::DiscreteStatesContinousTime(_params) => {
+                //Compute the sufficient statistics M (number of transistions) and T (residence
+                //time)
                let (M, T) =
                    parameter_learning::sufficient_statistics(net, dataset, node, parent_set);
+
+                //Scale alpha accordingly to the size of the parent set
                let alpha = self.alpha as f64 / M.shape()[0] as f64;
+                //Scale tau accordingly to the size of the parent set
                let tau = self.tau / M.shape()[0] as f64;
-
+                
+                //Compute the log likelihood for q
                let log_ll_q:f64 = M
                    .sum_axis(Axis(2))
                    .iter()
@ -59,7 +68,8 @@ impl LogLikelihood {
                            - (alpha + *m as f64 + 1.0) * f64::ln(tau + t)
                    })
                    .sum();
-
+                
+                //Compute the log likelihood for theta
                let log_ll_theta: f64 = M.outer_iter()
                    .map(|x| x.outer_iter()
                         .map(|y| gamma::ln_gamma(alpha) 
@ -113,10 +123,14 @@ impl ScoreFunction for BIC {
    ) -> f64
    where
        T: network::Network {
+        //Compute the log-likelihood
        let (ll, M) = self.ll.compute_score(net, node, parent_set, dataset);
+        //Compute the number of parameters
        let n_parameters = M.shape()[0] * M.shape()[1] * (M.shape()[2] - 1);
        //TODO: Optimize this
+        //Compute the sample size
        let sample_size: usize = dataset.get_trajectories().iter().map(|x| x.get_time().len() - 1).sum();
+        //Compute BIC
        ll - f64::ln(sample_size as f64) / 2.0 * n_parameters as f64
    }
 }
--- a/src/tools.rs
+++ b/src/tools.rs
@ -13,12 +13,14 @@ pub struct Trajectory {

 impl Trajectory {
    pub fn init(time: Array1<f64>, events: Array2<usize>) -> Trajectory {
+        //Events and time are two part of the same trajectory. For this reason they must have the
+        //same number of sample.
        if time.shape()[0] != events.shape()[0] {
            panic!("time.shape[0] must be equal to events.shape[0]");
        }
        Trajectory { time, events }
    }
-
+    
    pub fn get_time(&self) -> &Array1<f64> {
        &self.time
    }
@ -34,6 +36,9 @@ pub struct Dataset {

 impl Dataset {
    pub fn init(trajectories: Vec<Trajectory>) -> Dataset {
+
+        //All the trajectories in the same dataset must represent the same process. For this reason
+        //each trajectory must represent the same number of variables.
        if trajectories
            .iter()
            .any(|x| trajectories[0].get_events().shape()[1] != x.get_events().shape()[1])
@ -54,23 +59,38 @@ pub fn trajectory_generator<T: network::Network>(
    t_end: f64,
    seed: Option<u64>,
 ) -> Dataset {
-
+    
+    //Tmp growing vector containing generated trajectories.
    let mut trajectories: Vec<Trajectory> = Vec::new();
-    let seed = seed.unwrap_or_else(rand::random);
-
-    let mut rng = ChaCha8Rng::seed_from_u64(seed);
-
-    let node_idx: Vec<_> = net.get_node_indices().collect();
+    
+    //Random Generator object
+    let mut rng: ChaCha8Rng = match seed {
+        //If a seed is present use it to initialize the random generator.
+        Some(seed) => SeedableRng::seed_from_u64(seed),
+        //Otherwise create a new random generator using the method `from_entropy`
+        None => SeedableRng::from_entropy()
+    };
+    
+    //Each iteration generate one trajectory
    for _ in 0..n_trajectories {
+        //Current time of the sampling process
        let mut t = 0.0;
+        //History of all the moments in which something changed
        let mut time: Vec<f64> = Vec::new();
-        let mut events: Vec<Array1<usize>> = Vec::new();
-        let mut current_state: Vec<params::StateType> = node_idx
-            .iter()
-            .map(|x| net.get_node(*x).params.get_random_state_uniform(&mut rng))
+        //Configuration of the process variables at time t initialized with an uniform
+        //distribution.
+        let mut current_state: Vec<params::StateType> = net.get_node_indices()
+            .map(|x| net.get_node(x).params.get_random_state_uniform(&mut rng))
            .collect();
+        //History of all the configurations of the process variables. 
+        let mut events: Vec<Array1<usize>> = Vec::new();
+        //Vector containing to time to the next transition for each variable.
        let mut next_transitions: Vec<Option<f64>> =
-            (0..node_idx.len()).map(|_| Option::None).collect();
+            net.get_node_indices().map(|_| Option::None).collect();
+        
+        //Add the starting time for the trajectory.
+        time.push(t.clone());
+        //Add the starting configuration of the trajectory.
        events.push(
            current_state
                .iter()
@ -79,8 +99,9 @@ pub fn trajectory_generator<T: network::Network>(
                })
                .collect(),
        );
-        time.push(t.clone());
+        //Generate new samples until ending time is reached.
        while t < t_end {
+            //Generate the next transition time for each uninitialized variable.
            for (idx, val) in next_transitions.iter_mut().enumerate() {
                if let None = val {
                    *val = Some(
@ -96,19 +117,24 @@ pub fn trajectory_generator<T: network::Network>(
                    );
                }
            }
-
+            
+            //Get the variable with the smallest transition time.
            let next_node_transition = next_transitions
                .iter()
                .enumerate()
                .min_by(|x, y| x.1.unwrap().partial_cmp(&y.1.unwrap()).unwrap())
                .unwrap()
                .0;
+            //Check if the next transition take place after the ending time.
            if next_transitions[next_node_transition].unwrap() > t_end {
                break;
            }
+            //Get the time in which the next transition occurs.
            t = next_transitions[next_node_transition].unwrap().clone();
+            //Add the transition time to next
            time.push(t.clone());
-
+            
+            //Compute the new state of the transitioning variable.
            current_state[next_node_transition] = net
                .get_node(next_node_transition)
                .params
@ -120,7 +146,8 @@ pub fn trajectory_generator<T: network::Network>(
                    &mut rng,
                )
                .unwrap();
-
+            
+            //Add the new state to events
            events.push(Array::from_vec(
                current_state
                    .iter()
@ -129,13 +156,16 @@ pub fn trajectory_generator<T: network::Network>(
                    })
                    .collect(),
            ));
+            //Reset the next transition time for the transitioning node.
            next_transitions[next_node_transition] = None;

+            //Reset the next transition time for each child of the transitioning node.
            for child in net.get_children_set(next_node_transition) {
                next_transitions[child] = None
            }
        }
-
+        
+        //Add current_state as last state.
        events.push(
            current_state
                .iter()
@ -144,8 +174,10 @@ pub fn trajectory_generator<T: network::Network>(
                })
                .collect(),
        );
+        //Add t_end as last time.
        time.push(t_end.clone());
-
+        
+        //Add the sampled trajectory to trajectories.
        trajectories.push(Trajectory::init(
            Array::from_vec(time),
            Array2::from_shape_vec(
@ -155,5 +187,6 @@ pub fn trajectory_generator<T: network::Network>(
            .unwrap(),
        ));
    }
+    //Return a dataset object with the sampled trajectories.
    Dataset::init(trajectories)
 }