Added coments.

pull/42/head
AlessandroBregoli 3 years ago
parent b357c9efa0
commit 43d01d2bf8
  1. 46
      src/structure_learning/score_based_algorithm.rs
  2. 22
      src/structure_learning/score_function.rs
  3. 69
      src/tools.rs

@ -1,17 +1,20 @@
use crate::network;
use crate::structure_learning::score_function::ScoreFunction;
use crate::structure_learning::StructureLearningAlgorithm;
use crate::tools;
use crate::network;
use std::collections::BTreeSet;
pub struct HillClimbing<S: ScoreFunction> {
score_function: S,
max_parent_set: Option<usize>
max_parent_set: Option<usize>,
}
impl<S: ScoreFunction> HillClimbing<S> {
pub fn init(score_function: S, max_parent_set: Option<usize>) -> HillClimbing<S> {
HillClimbing { score_function, max_parent_set }
HillClimbing {
score_function,
max_parent_set,
}
}
}
@ -20,41 +23,58 @@ impl<S: ScoreFunction> StructureLearningAlgorithm for HillClimbing<S> {
where
T: network::Network,
{
//Check the coherence between dataset and network
if net.get_number_of_nodes() != dataset.get_trajectories()[0].get_events().shape()[1] {
panic!("Dataset and Network must have the same number of variables.")
}
//Make the network mutable.
let mut net = net;
//Check if the max_parent_set constraint is present.
let max_parent_set = self.max_parent_set.unwrap_or(net.get_number_of_nodes());
//Reset the adj matrix
net.initialize_adj_matrix();
//Iterate over each node to learn their parent set.
for node in net.get_node_indices() {
//Initialize an empty parent set.
let mut parent_set: BTreeSet<usize> = BTreeSet::new();
let mut current_ll = self.score_function.call(&net, node, &parent_set, dataset);
let mut old_ll = f64::NEG_INFINITY;
while current_ll > old_ll {
old_ll = current_ll;
//Compute the score for the empty parent set
let mut current_score = self.score_function.call(&net, node, &parent_set, dataset);
//Set the old score to -\infty.
let mut old_score = f64::NEG_INFINITY;
//Iterate until convergence
while current_score > old_score {
//Save the current_score.
old_score = current_score;
//Iterate over each node.
for parent in net.get_node_indices() {
//Continue if the parent and the node are the same.
if parent == node {
continue;
}
//Try to remove parent from the parent_set.
let is_removed = parent_set.remove(&parent);
//If parent was not in the parent_set add it.
if !is_removed && parent_set.len() < max_parent_set {
parent_set.insert(parent);
}
let tmp_ll = self.score_function.call(&net, node, &parent_set, dataset);
if tmp_ll < current_ll {
//Compute the score with the modified parent_set.
let tmp_score = self.score_function.call(&net, node, &parent_set, dataset);
//If tmp_score is worst than current_score revert the change to the parent set
if tmp_score < current_score {
if is_removed {
parent_set.insert(parent);
} else {
parent_set.remove(&parent);
}
} else {
current_ll = tmp_ll;
}
//Otherwise save the computed score as current_score
else {
current_score = tmp_score;
}
}
}
//Apply the learned parent_set to the network struct.
parent_set.iter().for_each(|p| net.add_edge(*p, node));
}

@ -25,6 +25,8 @@ pub struct LogLikelihood {
impl LogLikelihood {
pub fn init(alpha: usize, tau: f64) -> LogLikelihood {
//Tau must be >=0.0
if tau < 0.0 {
panic!("tau must be >=0.0");
}
@ -40,14 +42,21 @@ impl LogLikelihood {
) -> (f64, Array3<usize>)
where
T: network::Network,
{
{
//Identify the type of node used
match &net.get_node(node).params {
params::Params::DiscreteStatesContinousTime(params) => {
params::Params::DiscreteStatesContinousTime(_params) => {
//Compute the sufficient statistics M (number of transistions) and T (residence
//time)
let (M, T) =
parameter_learning::sufficient_statistics(net, dataset, node, parent_set);
//Scale alpha accordingly to the size of the parent set
let alpha = self.alpha as f64 / M.shape()[0] as f64;
//Scale tau accordingly to the size of the parent set
let tau = self.tau / M.shape()[0] as f64;
//Compute the log likelihood for q
let log_ll_q:f64 = M
.sum_axis(Axis(2))
.iter()
@ -59,7 +68,8 @@ impl LogLikelihood {
- (alpha + *m as f64 + 1.0) * f64::ln(tau + t)
})
.sum();
//Compute the log likelihood for theta
let log_ll_theta: f64 = M.outer_iter()
.map(|x| x.outer_iter()
.map(|y| gamma::ln_gamma(alpha)
@ -113,10 +123,14 @@ impl ScoreFunction for BIC {
) -> f64
where
T: network::Network {
//Compute the log-likelihood
let (ll, M) = self.ll.compute_score(net, node, parent_set, dataset);
//Compute the number of parameters
let n_parameters = M.shape()[0] * M.shape()[1] * (M.shape()[2] - 1);
//TODO: Optimize this
//Compute the sample size
let sample_size: usize = dataset.get_trajectories().iter().map(|x| x.get_time().len() - 1).sum();
//Compute BIC
ll - f64::ln(sample_size as f64) / 2.0 * n_parameters as f64
}
}

@ -13,12 +13,14 @@ pub struct Trajectory {
impl Trajectory {
pub fn init(time: Array1<f64>, events: Array2<usize>) -> Trajectory {
//Events and time are two part of the same trajectory. For this reason they must have the
//same number of sample.
if time.shape()[0] != events.shape()[0] {
panic!("time.shape[0] must be equal to events.shape[0]");
}
Trajectory { time, events }
}
pub fn get_time(&self) -> &Array1<f64> {
&self.time
}
@ -34,6 +36,9 @@ pub struct Dataset {
impl Dataset {
pub fn init(trajectories: Vec<Trajectory>) -> Dataset {
//All the trajectories in the same dataset must represent the same process. For this reason
//each trajectory must represent the same number of variables.
if trajectories
.iter()
.any(|x| trajectories[0].get_events().shape()[1] != x.get_events().shape()[1])
@ -54,23 +59,38 @@ pub fn trajectory_generator<T: network::Network>(
t_end: f64,
seed: Option<u64>,
) -> Dataset {
//Tmp growing vector containing generated trajectories.
let mut trajectories: Vec<Trajectory> = Vec::new();
let seed = seed.unwrap_or_else(rand::random);
let mut rng = ChaCha8Rng::seed_from_u64(seed);
let node_idx: Vec<_> = net.get_node_indices().collect();
//Random Generator object
let mut rng: ChaCha8Rng = match seed {
//If a seed is present use it to initialize the random generator.
Some(seed) => SeedableRng::seed_from_u64(seed),
//Otherwise create a new random generator using the method `from_entropy`
None => SeedableRng::from_entropy()
};
//Each iteration generate one trajectory
for _ in 0..n_trajectories {
//Current time of the sampling process
let mut t = 0.0;
//History of all the moments in which something changed
let mut time: Vec<f64> = Vec::new();
let mut events: Vec<Array1<usize>> = Vec::new();
let mut current_state: Vec<params::StateType> = node_idx
.iter()
.map(|x| net.get_node(*x).params.get_random_state_uniform(&mut rng))
//Configuration of the process variables at time t initialized with an uniform
//distribution.
let mut current_state: Vec<params::StateType> = net.get_node_indices()
.map(|x| net.get_node(x).params.get_random_state_uniform(&mut rng))
.collect();
//History of all the configurations of the process variables.
let mut events: Vec<Array1<usize>> = Vec::new();
//Vector containing to time to the next transition for each variable.
let mut next_transitions: Vec<Option<f64>> =
(0..node_idx.len()).map(|_| Option::None).collect();
net.get_node_indices().map(|_| Option::None).collect();
//Add the starting time for the trajectory.
time.push(t.clone());
//Add the starting configuration of the trajectory.
events.push(
current_state
.iter()
@ -79,8 +99,9 @@ pub fn trajectory_generator<T: network::Network>(
})
.collect(),
);
time.push(t.clone());
//Generate new samples until ending time is reached.
while t < t_end {
//Generate the next transition time for each uninitialized variable.
for (idx, val) in next_transitions.iter_mut().enumerate() {
if let None = val {
*val = Some(
@ -96,19 +117,24 @@ pub fn trajectory_generator<T: network::Network>(
);
}
}
//Get the variable with the smallest transition time.
let next_node_transition = next_transitions
.iter()
.enumerate()
.min_by(|x, y| x.1.unwrap().partial_cmp(&y.1.unwrap()).unwrap())
.unwrap()
.0;
//Check if the next transition take place after the ending time.
if next_transitions[next_node_transition].unwrap() > t_end {
break;
}
//Get the time in which the next transition occurs.
t = next_transitions[next_node_transition].unwrap().clone();
//Add the transition time to next
time.push(t.clone());
//Compute the new state of the transitioning variable.
current_state[next_node_transition] = net
.get_node(next_node_transition)
.params
@ -120,7 +146,8 @@ pub fn trajectory_generator<T: network::Network>(
&mut rng,
)
.unwrap();
//Add the new state to events
events.push(Array::from_vec(
current_state
.iter()
@ -129,13 +156,16 @@ pub fn trajectory_generator<T: network::Network>(
})
.collect(),
));
//Reset the next transition time for the transitioning node.
next_transitions[next_node_transition] = None;
//Reset the next transition time for each child of the transitioning node.
for child in net.get_children_set(next_node_transition) {
next_transitions[child] = None
}
}
//Add current_state as last state.
events.push(
current_state
.iter()
@ -144,8 +174,10 @@ pub fn trajectory_generator<T: network::Network>(
})
.collect(),
);
//Add t_end as last time.
time.push(t_end.clone());
//Add the sampled trajectory to trajectories.
trajectories.push(Trajectory::init(
Array::from_vec(time),
Array2::from_shape_vec(
@ -155,5 +187,6 @@ pub fn trajectory_generator<T: network::Network>(
.unwrap(),
));
}
//Return a dataset object with the sampled trajectories.
Dataset::init(trajectories)
}

Loading…
Cancel
Save