parent
9eef55fde6
commit
e216be304b
@ -0,0 +1,8 @@ |
||||
__pycache__ |
||||
.vscode |
||||
**/__pycache__ |
||||
**/data |
||||
**/PyCTBN.egg-info |
||||
**/dist |
||||
**/results_data |
||||
**/.scannerwork |
Binary file not shown.
@ -0,0 +1,8 @@ |
||||
import PyCTBN.estimators |
||||
from PyCTBN.estimators import * |
||||
import PyCTBN.optimizers |
||||
from PyCTBN.optimizers import * |
||||
import PyCTBN.structure_graph |
||||
from PyCTBN.structure_graph import * |
||||
import PyCTBN.utility |
||||
from PyCTBN.utility import * |
@ -0,0 +1,5 @@ |
||||
from .fam_score_calculator import FamScoreCalculator |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator |
||||
from .structure_score_based_estimator import StructureScoreBasedEstimator |
@ -0,0 +1,272 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from math import log |
||||
|
||||
from scipy.special import loggamma |
||||
from random import choice |
||||
|
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
|
||||
|
||||
''' |
||||
|
||||
''' |
||||
|
||||
|
||||
class FamScoreCalculator: |
||||
""" |
||||
Has the task of calculating the FamScore of a node by using a Bayesian score function |
||||
""" |
||||
|
||||
def __init__(self): |
||||
#np.seterr('raise') |
||||
pass |
||||
|
||||
# region theta |
||||
|
||||
def marginal_likelihood_theta(self, |
||||
cims: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the FamScore value of the node identified by the label node_id |
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta |
||||
:rtype: float |
||||
""" |
||||
return np.sum( |
||||
[self.variable_cim_xu_marginal_likelihood_theta(cim, |
||||
alpha_xu, |
||||
alpha_xxu) |
||||
for cim in cims]) |
||||
|
||||
def variable_cim_xu_marginal_likelihood_theta(self, |
||||
cim: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the value of the marginal likelihood over theta given a cim |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta |
||||
:rtype: float |
||||
""" |
||||
|
||||
'get cim length' |
||||
values = len(cim._state_residence_times) |
||||
|
||||
'compute the marginal likelihood for the current cim' |
||||
return np.sum([ |
||||
self.single_cim_xu_marginal_likelihood_theta( |
||||
index, |
||||
cim, |
||||
alpha_xu, |
||||
alpha_xxu) |
||||
for index in range(values)]) |
||||
|
||||
def single_cim_xu_marginal_likelihood_theta(self, |
||||
index: int, |
||||
cim: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
||||
and a specif parents's assignment |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
|
||||
values = list(range(len(cim._state_residence_times))) |
||||
|
||||
'remove the index because of the x != x^ condition in the summation ' |
||||
values.remove(index) |
||||
|
||||
'uncomment for alpha xx not uniform' |
||||
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) |
||||
|
||||
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ |
||||
+ \ |
||||
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( |
||||
cim.state_transition_matrix[index,index_x_first], |
||||
alpha_xxu) |
||||
for index_x_first in values]) |
||||
|
||||
|
||||
def single_internal_cim_xxu_marginal_likelihood_theta(self, |
||||
M_xxu_suff_stats: float, |
||||
alpha_xxu: float=1): |
||||
"""Calculate the second part of the marginal likelihood over theta formula |
||||
|
||||
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u] |
||||
:type M_xxu_suff_stats: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu) |
||||
|
||||
# endregion |
||||
|
||||
# region q |
||||
|
||||
def marginal_likelihood_q(self, |
||||
cims: np.array, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the value of the marginal likelihood over q of the node identified by the label node_id |
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood over q |
||||
:rtype: float |
||||
""" |
||||
|
||||
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) |
||||
|
||||
def variable_cim_xu_marginal_likelihood_q(self, |
||||
cim: ConditionalIntensityMatrix, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the value of the marginal likelihood over q given a cim |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood over q |
||||
:rtype: float |
||||
""" |
||||
|
||||
'get cim length' |
||||
values=len(cim._state_residence_times) |
||||
|
||||
'compute the marginal likelihood for the current cim' |
||||
return np.sum([ |
||||
self.single_cim_xu_marginal_likelihood_q( |
||||
cim.state_transition_matrix[index, index], |
||||
cim._state_residence_times[index], |
||||
tau_xu, |
||||
alpha_xu) |
||||
for index in range(values)]) |
||||
|
||||
|
||||
def single_cim_xu_marginal_likelihood_q(self, |
||||
M_xu_suff_stats: float, |
||||
T_xu_suff_stats: float, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
||||
and a specif parents's assignment |
||||
|
||||
:param M_xu_suff_stats: value of the suffucient statistic M[x|u] |
||||
:type M_xxu_suff_stats: float |
||||
:param T_xu_suff_stats: value of the suffucient statistic T[x|u] |
||||
:type T_xu_suff_stats: float |
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood of the node when assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
return ( |
||||
loggamma(alpha_xu + M_xu_suff_stats + 1) + |
||||
(log(tau_xu) |
||||
* |
||||
(alpha_xu+1)) |
||||
) \ |
||||
- \ |
||||
(loggamma(alpha_xu + 1)+( |
||||
log(tau_xu + T_xu_suff_stats) |
||||
* |
||||
(alpha_xu + M_xu_suff_stats + 1)) |
||||
) |
||||
|
||||
# end region |
||||
|
||||
def get_fam_score(self, |
||||
cims: np.array, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the FamScore value of the node |
||||
|
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type tau_xu: float, optional |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
||||
:type alpha_xu: float, optional |
||||
|
||||
|
||||
:return: the FamScore value of the node |
||||
:rtype: float |
||||
""" |
||||
#print("------") |
||||
#print(self.marginal_likelihood_q(cims, |
||||
# tau_xu, |
||||
# alpha_xu)) |
||||
|
||||
#print(self.marginal_likelihood_theta(cims, |
||||
# alpha_xu, |
||||
# alpha_xxu)) |
||||
'calculate alpha_xxu as a uniform distribution' |
||||
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) |
||||
|
||||
return self.marginal_likelihood_q(cims, |
||||
tau_xu, |
||||
alpha_xu) \ |
||||
+ \ |
||||
self.marginal_likelihood_theta(cims, |
||||
alpha_xu, |
||||
alpha_xxu) |
@ -0,0 +1,143 @@ |
||||
import sys |
||||
sys.path.append('../') |
||||
import numpy as np |
||||
|
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
from ..structure_graph.trajectory import Trajectory |
||||
|
||||
|
||||
class ParametersEstimator(object): |
||||
"""Has the task of computing the cims of particular node given the trajectories and the net structure |
||||
in the graph ``_net_graph``. |
||||
|
||||
:param trajectories: the trajectories |
||||
:type trajectories: Trajectory |
||||
:param net_graph: the net structure |
||||
:type net_graph: NetworkGraph |
||||
:_single_set_of_cims: the set of cims object that will hold the cims of the node |
||||
""" |
||||
|
||||
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): |
||||
"""Constructor Method |
||||
""" |
||||
self._trajectories = trajectories |
||||
self._net_graph = net_graph |
||||
self._single_set_of_cims = None |
||||
|
||||
def fast_init(self, node_id: str) -> None: |
||||
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
""" |
||||
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] |
||||
node_states_number = self._net_graph.get_states_number(node_id) |
||||
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) |
||||
|
||||
def compute_parameters_for_node(self, node_id: str) -> SetOfCims: |
||||
"""Compute the CIMS of the node identified by the label ``node_id``. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: A SetOfCims object filled with the computed CIMS |
||||
:rtype: SetOfCims |
||||
""" |
||||
node_indx = self._net_graph.get_node_indx(node_id) |
||||
state_res_times = self._single_set_of_cims._state_residence_times |
||||
transition_matrices = self._single_set_of_cims._transition_matrices |
||||
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, |
||||
self._trajectories.trajectory, |
||||
self._net_graph.time_filtering, |
||||
self._net_graph.time_scalar_indexing_strucure, |
||||
state_res_times) |
||||
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, |
||||
self._net_graph.transition_filtering, |
||||
self._net_graph.transition_scalar_indexing_structure, |
||||
transition_matrices) |
||||
self._single_set_of_cims.build_cims(state_res_times, transition_matrices) |
||||
return self._single_set_of_cims |
||||
|
||||
@staticmethod |
||||
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, |
||||
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, |
||||
T: np.ndarray) -> None: |
||||
"""Compute the state residence times for a node and fill the matrix ``T`` with the results |
||||
|
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param times: the times deltas vector |
||||
:type times: numpy.array |
||||
:param trajectory: the trajectory |
||||
:type trajectory: numpy.ndArray |
||||
:param cols_filter: the columns filtering structure |
||||
:type cols_filter: numpy.array |
||||
:param scalar_indexes_struct: the indexing structure |
||||
:type scalar_indexes_struct: numpy.array |
||||
:param T: the state residence times vectors |
||||
:type T: numpy.ndArray |
||||
""" |
||||
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) |
||||
.astype(np.int), \ |
||||
times, |
||||
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) |
||||
|
||||
@staticmethod |
||||
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, |
||||
scalar_indexing: np.ndarray, M: np.ndarray) -> None: |
||||
"""Compute the state residence times for a node and fill the matrices ``M`` with the results. |
||||
|
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param trajectory: the trajectory |
||||
:type trajectory: numpy.ndArray |
||||
:param cols_filter: the columns filtering structure |
||||
:type cols_filter: numpy.array |
||||
:param scalar_indexing: the indexing structure |
||||
:type scalar_indexing: numpy.array |
||||
:param M: the state transitions matrices |
||||
:type M: numpy.ndArray |
||||
""" |
||||
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], |
||||
dtype=np.int64) |
||||
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] |
||||
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) |
||||
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) |
||||
M_raveled = M.ravel() |
||||
M_raveled[diag_indices] = 0 |
||||
M_raveled[diag_indices] = np.sum(M, axis=2).ravel() |
||||
|
||||
def init_sets_cims_container(self): |
||||
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, |
||||
self.net_graph.nodes_values, |
||||
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), |
||||
self.net_graph.p_combs) |
||||
|
||||
def compute_parameters(self): |
||||
#print(self.net_graph.get_nodes()) |
||||
#print(self.amalgamated_cims_struct.sets_of_cims) |
||||
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) |
||||
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): |
||||
#print(self.net_graph.time_filtering[indx]) |
||||
#print(self.net_graph.time_scalar_indexing_strucure[indx]) |
||||
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, |
||||
self.sample_path.trajectories.trajectory, |
||||
self.net_graph.time_filtering[indx], |
||||
self.net_graph.time_scalar_indexing_strucure[indx], |
||||
aggr[1]._state_residence_times) |
||||
#print(self.net_graph.transition_filtering[indx]) |
||||
#print(self.net_graph.transition_scalar_indexing_structure[indx]) |
||||
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), |
||||
self.sample_path.trajectories.complete_trajectory, |
||||
self.net_graph.transition_filtering[indx], |
||||
self.net_graph.transition_scalar_indexing_structure[indx], |
||||
aggr[1]._transition_matrices) |
||||
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,238 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
import os |
||||
from scipy.stats import chi2 as chi2_dist |
||||
from scipy.stats import f as f_dist |
||||
from tqdm import tqdm |
||||
|
||||
from ..utility.cache import Cache |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer |
||||
|
||||
import concurrent.futures |
||||
|
||||
|
||||
|
||||
import multiprocessing |
||||
from multiprocessing import Pool |
||||
|
||||
|
||||
class StructureConstraintBasedEstimator(StructureEstimator): |
||||
""" |
||||
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:param exp_test_alfa: the significance level for the exponential Hp test |
||||
:type exp_test_alfa: float |
||||
:param chi_test_alfa: the significance level for the chi Hp test |
||||
:type chi_test_alfa: float |
||||
:_nodes: the nodes labels |
||||
:_nodes_vals: the nodes cardinalities |
||||
:_nodes_indxs: the nodes indexes |
||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
||||
:_cache: the Cache object |
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): |
||||
super().__init__(sample_path,known_edges) |
||||
self._exp_test_sign = exp_test_alfa |
||||
self._chi_test_alfa = chi_test_alfa |
||||
self._thumb_threshold = thumb_threshold |
||||
self._cache = Cache() |
||||
|
||||
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, |
||||
tot_vars_count: int, parent_indx, child_indx) -> bool: |
||||
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} |
||||
G2 = {G1 U test_parent} (added as an additional parent of the test_child). |
||||
Generates all the necessary structures and datas to perform the tests. |
||||
|
||||
:param test_parent: the node label of the test parent |
||||
:type test_parent: string |
||||
:param test_child: the node label of the child |
||||
:type test_child: string |
||||
:param parent_set: the common parent set |
||||
:type parent_set: List |
||||
:param child_states_numb: the cardinality of the ``test_child`` |
||||
:type child_states_numb: int |
||||
:param tot_vars_count: the total number of variables in the net |
||||
:type tot_vars_count: int |
||||
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise |
||||
:rtype: bool |
||||
""" |
||||
p_set = parent_set[:] |
||||
complete_info = parent_set[:] |
||||
complete_info.append(test_child) |
||||
|
||||
parents = np.array(parent_set) |
||||
parents = np.append(parents, test_parent) |
||||
sorted_parents = self._nodes[np.isin(self._nodes, parents)] |
||||
cims_filter = sorted_parents != test_parent |
||||
|
||||
p_set.insert(0, test_parent) |
||||
sofc2 = self._cache.find(set(p_set)) |
||||
|
||||
if not sofc2: |
||||
complete_info.append(test_parent) |
||||
bool_mask2 = np.isin(self._nodes, complete_info) |
||||
l2 = list(self._nodes[bool_mask2]) |
||||
indxs2 = self._nodes_indxs[bool_mask2] |
||||
vals2 = self._nodes_vals[bool_mask2] |
||||
eds2 = list(itertools.product(p_set, test_child)) |
||||
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) |
||||
g2 = NetworkGraph(s2) |
||||
g2.fast_init(test_child) |
||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
||||
p2.fast_init(test_child) |
||||
sofc2 = p2.compute_parameters_for_node(test_child) |
||||
self._cache.put(set(p_set), sofc2) |
||||
|
||||
del p_set[0] |
||||
sofc1 = self._cache.find(set(p_set)) |
||||
if not sofc1: |
||||
g2.remove_node(test_parent) |
||||
g2.fast_init(test_child) |
||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
||||
p2.fast_init(test_child) |
||||
sofc1 = p2.compute_parameters_for_node(test_child) |
||||
self._cache.put(set(p_set), sofc1) |
||||
thumb_value = 0.0 |
||||
if child_states_numb > 2: |
||||
parent_val = self._sample_path.structure.get_states_number(test_parent) |
||||
bool_mask_vals = np.isin(self._nodes, parent_set) |
||||
parents_vals = self._nodes_vals[bool_mask_vals] |
||||
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) |
||||
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): |
||||
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) |
||||
for cim2 in cond_cims: |
||||
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): |
||||
return False |
||||
return True |
||||
|
||||
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, |
||||
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: |
||||
"""Compute the actual independence test using two cims. |
||||
It is performed first the exponential test and if the null hypothesis is not rejected, |
||||
it is performed also the chi_test. |
||||
|
||||
:param child_states_numb: the cardinality of the test child |
||||
:type child_states_numb: int |
||||
:param cim1: a cim belonging to the graph without test parent |
||||
:type cim1: ConditionalIntensityMatrix |
||||
:param cim2: a cim belonging to the graph with test parent |
||||
:type cim2: ConditionalIntensityMatrix |
||||
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. |
||||
:rtype: bool |
||||
""" |
||||
M1 = cim1.state_transition_matrix |
||||
M2 = cim2.state_transition_matrix |
||||
r1s = M1.diagonal() |
||||
r2s = M2.diagonal() |
||||
C1 = cim1.cim |
||||
C2 = cim2.cim |
||||
if child_states_numb > 2: |
||||
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: |
||||
self._removable_edges_matrix[parent_indx][child_indx] = False |
||||
return False |
||||
F_stats = C2.diagonal() / C1.diagonal() |
||||
exp_alfa = self._exp_test_sign |
||||
for val in range(0, child_states_numb): |
||||
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ |
||||
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): |
||||
return False |
||||
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) |
||||
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( |
||||
M2.shape[0], -1) |
||||
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) |
||||
Ks = np.sqrt(r1s / r2s) |
||||
Ls = np.sqrt(r2s / r1s) |
||||
for val in range(0, child_states_numb): |
||||
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / |
||||
(M1_no_diag[val] + M2_no_diag[val])) |
||||
if Chi > chi_2_quantile: |
||||
return False |
||||
return True |
||||
|
||||
def compute_thumb_value(self, parent_val, child_val, parent_set_vals): |
||||
"""Compute the value to test against the thumb_threshold. |
||||
|
||||
:param parent_val: test parent's variable cardinality |
||||
:type parent_val: int |
||||
:param child_val: test child's variable cardinality |
||||
:type child_val: int |
||||
:param parent_set_vals: the cardinalities of the nodes in the current sep-set |
||||
:type parent_set_vals: List |
||||
:return: the thumb value for the current independence test |
||||
:rtype: int |
||||
""" |
||||
df = (child_val - 1) ** 2 |
||||
df = df * parent_val |
||||
for v in parent_set_vals: |
||||
df = df * v |
||||
return df |
||||
|
||||
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: |
||||
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. |
||||
|
||||
:param var_id: the node label of the test child |
||||
:type var_id: string |
||||
""" |
||||
optimizer_obj = ConstraintBasedOptimizer( |
||||
node_id = var_id, |
||||
structure_estimator = self, |
||||
tot_vars_count = tot_vars_count) |
||||
return optimizer_obj.optimize_structure() |
||||
|
||||
|
||||
def ctpc_algorithm(self,disable_multiprocessing:bool= False ): |
||||
"""Compute the CTPC algorithm over the entire net. |
||||
""" |
||||
ctpc_algo = self.one_iteration_of_CTPC_algorithm |
||||
total_vars_numb = self._sample_path.total_variables_count |
||||
|
||||
n_nodes= len(self._nodes) |
||||
|
||||
total_vars_numb_array = [total_vars_numb] * n_nodes |
||||
|
||||
'get the number of CPU' |
||||
cpu_count = multiprocessing.cpu_count() |
||||
|
||||
|
||||
|
||||
'Remove all the edges from the structure' |
||||
self._sample_path.structure.clean_structure_edges() |
||||
|
||||
'Estimate the best parents for each node' |
||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
||||
if disable_multiprocessing: |
||||
print("DISABILITATO") |
||||
cpu_count = 1 |
||||
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
||||
else: |
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
||||
list_edges_partial = executor.map(ctpc_algo, |
||||
self._nodes, |
||||
total_vars_numb_array) |
||||
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
||||
|
||||
return set(itertools.chain.from_iterable(list_edges_partial)) |
||||
|
||||
|
||||
def estimate_structure(self,disable_multiprocessing:bool=False): |
||||
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,187 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import matplotlib.pyplot as plt |
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from abc import ABC |
||||
|
||||
import abc |
||||
|
||||
from ..utility.cache import Cache |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
|
||||
|
||||
class StructureEstimator(object): |
||||
"""Has the task of estimating the network structure given the trajectories in ``samplepath``. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:_nodes: the nodes labels |
||||
:_nodes_vals: the nodes cardinalities |
||||
:_nodes_indxs: the nodes indexes |
||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): |
||||
self._sample_path = sample_path |
||||
self._nodes = np.array(self._sample_path.structure.nodes_labels) |
||||
self._nodes_vals = self._sample_path.structure.nodes_values |
||||
self._nodes_indxs = self._sample_path.structure.nodes_indexes |
||||
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) |
||||
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) |
||||
|
||||
|
||||
def build_removable_edges_matrix(self, known_edges: typing.List): |
||||
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: |
||||
|
||||
:param known_edges: the list of nodes labels |
||||
:type known_edges: List |
||||
:return: a boolean matrix |
||||
:rtype: np.ndarray |
||||
""" |
||||
tot_vars_count = self._sample_path.total_variables_count |
||||
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) |
||||
if known_edges: |
||||
for edge in known_edges: |
||||
i = self._sample_path.structure.get_node_indx(edge[0]) |
||||
j = self._sample_path.structure.get_node_indx(edge[1]) |
||||
complete_adj_matrix[i][j] = False |
||||
return complete_adj_matrix |
||||
|
||||
@staticmethod |
||||
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: |
||||
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: |
||||
|
||||
:param node_ids: the list of nodes labels |
||||
:type node_ids: List |
||||
:return: a complete Digraph Object |
||||
:rtype: networkx.DiGraph |
||||
""" |
||||
complete_graph = nx.DiGraph() |
||||
complete_graph.add_nodes_from(node_ids) |
||||
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) |
||||
return complete_graph |
||||
|
||||
|
||||
@staticmethod |
||||
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): |
||||
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``, |
||||
that do not contains a the node identified by ``parent_label``. |
||||
|
||||
:param u: the list of nodes |
||||
:type u: List |
||||
:param size: the size of the subsets |
||||
:type size: int |
||||
:param parent_label: the node to exclude in the subsets generation |
||||
:type parent_label: string |
||||
:return: an Iterator Object containing a list of lists |
||||
:rtype: Iterator |
||||
""" |
||||
list_without_test_parent = u[:] |
||||
list_without_test_parent.remove(parent_label) |
||||
return map(list, itertools.combinations(list_without_test_parent, size)) |
||||
|
||||
def save_results(self) -> None: |
||||
"""Save the estimated Structure to a .json file in the path where the data are loaded from. |
||||
The file is named as the input dataset but the `results_` word is appended to the results file. |
||||
""" |
||||
res = json_graph.node_link_data(self._complete_graph) |
||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
||||
name = name.split('.', 1)[0] |
||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
||||
name += '.json' |
||||
file_name = 'results_' + name |
||||
with open(file_name, 'w') as f: |
||||
json.dump(res, f) |
||||
|
||||
|
||||
def remove_diagonal_elements(self, matrix): |
||||
m = matrix.shape[0] |
||||
strided = np.lib.stride_tricks.as_strided |
||||
s0, s1 = matrix.strides |
||||
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) |
||||
|
||||
|
||||
@abc.abstractmethod |
||||
def estimate_structure(self) -> typing.List: |
||||
"""Abstract method to estimate the structure |
||||
|
||||
:return: List of estimated edges |
||||
:rtype: Typing.List |
||||
""" |
||||
pass |
||||
|
||||
|
||||
def adjacency_matrix(self) -> np.ndarray: |
||||
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. |
||||
|
||||
:return: The adjacency matrix of the graph ``_complete_graph`` |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return nx.adj_matrix(self._complete_graph).toarray().astype(bool) |
||||
|
||||
def spurious_edges(self) -> typing.List: |
||||
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in |
||||
``_sample_path.structure``. |
||||
|
||||
:return: A list containing the spurious edges |
||||
:rtype: List |
||||
""" |
||||
if not self._sample_path.has_prior_net_structure: |
||||
raise RuntimeError("Can not compute spurious edges with no prior net structure!") |
||||
real_graph = nx.DiGraph() |
||||
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) |
||||
real_graph.add_edges_from(self._sample_path.structure.edges) |
||||
return nx.difference(real_graph, self._complete_graph).edges |
||||
|
||||
def save_plot_estimated_structure_graph(self) -> None: |
||||
"""Plot the estimated structure in a graphical model style. |
||||
Spurious edges are colored in red. |
||||
""" |
||||
graph_to_draw = nx.DiGraph() |
||||
spurious_edges = self.spurious_edges() |
||||
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) |
||||
print(non_spurious_edges) |
||||
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] |
||||
graph_to_draw.add_edges_from(spurious_edges) |
||||
graph_to_draw.add_edges_from(non_spurious_edges) |
||||
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) |
||||
options = { |
||||
"node_size": 2000, |
||||
"node_color": "white", |
||||
"edgecolors": "black", |
||||
'linewidths':2, |
||||
"with_labels":True, |
||||
"font_size":13, |
||||
'connectionstyle': 'arc3, rad = 0.1', |
||||
"arrowsize": 15, |
||||
"arrowstyle": '<|-', |
||||
"width": 1, |
||||
"edge_color":edges_colors, |
||||
} |
||||
|
||||
nx.draw(graph_to_draw, pos, **options) |
||||
ax = plt.gca() |
||||
ax.margins(0.20) |
||||
plt.axis("off") |
||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
||||
name = name.split('.', 1)[0] |
||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
||||
name += '.png' |
||||
plt.savefig(name) |
||||
plt.clf() |
||||
print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) |
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,244 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from random import choice |
||||
|
||||
import concurrent.futures |
||||
|
||||
import copy |
||||
|
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
from .fam_score_calculator import FamScoreCalculator |
||||
from ..optimizers.hill_climbing_search import HillClimbing |
||||
from ..optimizers.tabu_search import TabuSearch |
||||
|
||||
|
||||
import multiprocessing |
||||
from multiprocessing import Pool |
||||
|
||||
|
||||
|
||||
|
||||
class StructureScoreBasedEstimator(StructureEstimator): |
||||
""" |
||||
Has the task of estimating the network structure given the trajectories in samplepath by |
||||
using a score based approach. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type tau_xu: float, optional |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
||||
:type alpha_xu: float, optional |
||||
:param known_edges: List of known edges, default to [] |
||||
:type known_edges: List, optional |
||||
|
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): |
||||
super().__init__(sample_path,known_edges) |
||||
self.tau_xu=tau_xu |
||||
self.alpha_xu=alpha_xu |
||||
|
||||
|
||||
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, |
||||
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None, |
||||
optimizer: str = 'tabu',disable_multiprocessing:bool= False ): |
||||
""" |
||||
Compute the score-based algorithm to find the optimal structure |
||||
|
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
||||
:type optimizer: string, optional |
||||
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False |
||||
:type disable_multiprocessing: Boolean, optional |
||||
""" |
||||
'Save the true edges structure in tuples' |
||||
true_edges = copy.deepcopy(self._sample_path.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
'Remove all the edges from the structure' |
||||
self._sample_path.structure.clean_structure_edges() |
||||
|
||||
estimate_parents = self.estimate_parents |
||||
|
||||
n_nodes= len(self._nodes) |
||||
|
||||
l_max_parents= [max_parents] * n_nodes |
||||
l_iterations_number = [iterations_number] * n_nodes |
||||
l_patience = [patience] * n_nodes |
||||
l_tabu_length = [tabu_length] * n_nodes |
||||
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes |
||||
l_optimizer = [optimizer] * n_nodes |
||||
|
||||
|
||||
'get the number of CPU' |
||||
cpu_count = multiprocessing.cpu_count() |
||||
print(f"CPU COUNT: {cpu_count}") |
||||
|
||||
if disable_multiprocessing: |
||||
cpu_count = 1 |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
||||
|
||||
'Estimate the best parents for each node' |
||||
if disable_multiprocessing: |
||||
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] |
||||
else: |
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
||||
list_edges_partial = executor.map(estimate_parents, |
||||
self._nodes, |
||||
l_max_parents, |
||||
l_iterations_number, |
||||
l_patience, |
||||
l_tabu_length, |
||||
l_tabu_rules_duration, |
||||
l_optimizer) |
||||
|
||||
|
||||
|
||||
#list_edges_partial = p.map(estimate_parents, self._nodes) |
||||
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) |
||||
|
||||
'Concatenate all the edges list' |
||||
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) |
||||
|
||||
#print('-------------------------') |
||||
|
||||
|
||||
'calculate precision and recall' |
||||
n_missing_edges = 0 |
||||
n_added_fake_edges = 0 |
||||
|
||||
try: |
||||
n_added_fake_edges = len(set_list_edges.difference(true_edges)) |
||||
|
||||
n_missing_edges = len(true_edges.difference(set_list_edges)) |
||||
|
||||
n_true_positive = len(true_edges) - n_missing_edges |
||||
|
||||
precision = n_true_positive / (n_true_positive + n_added_fake_edges) |
||||
|
||||
recall = n_true_positive / (n_true_positive + n_missing_edges) |
||||
|
||||
|
||||
# print(f"n archi reali non trovati: {n_missing_edges}") |
||||
# print(f"n archi non reali aggiunti: {n_added_fake_edges}") |
||||
print(true_edges) |
||||
print(set_list_edges) |
||||
print(f"precision: {precision} ") |
||||
print(f"recall: {recall} ") |
||||
except Exception as e: |
||||
print(f"errore: {e}") |
||||
|
||||
return set_list_edges |
||||
|
||||
|
||||
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, |
||||
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, |
||||
optimizer:str = 'hill' ): |
||||
""" |
||||
Use the FamScore of a node in order to find the best parent nodes |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
||||
:type optimizer: string, optional |
||||
|
||||
:return: A list of the best edges for the currente node |
||||
:rtype: List |
||||
""" |
||||
|
||||
"choose the optimizer algotithm" |
||||
if optimizer == 'tabu': |
||||
optimizer = TabuSearch( |
||||
node_id = node_id, |
||||
structure_estimator = self, |
||||
max_parents = max_parents, |
||||
iterations_number = iterations_number, |
||||
patience = patience, |
||||
tabu_length = tabu_length, |
||||
tabu_rules_duration = tabu_rules_duration) |
||||
else: #if optimizer == 'hill': |
||||
optimizer = HillClimbing( |
||||
node_id = node_id, |
||||
structure_estimator = self, |
||||
max_parents = max_parents, |
||||
iterations_number = iterations_number, |
||||
patience = patience) |
||||
|
||||
"call the optmizer's function that calculates the current node's parents" |
||||
return optimizer.optimize_structure() |
||||
|
||||
|
||||
def get_score_from_graph(self, |
||||
graph: NetworkGraph, |
||||
node_id:str): |
||||
""" |
||||
Get the FamScore of a node |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param graph: current graph to be computed |
||||
:type graph: class:'NetworkGraph' |
||||
|
||||
|
||||
:return: The FamSCore for this graph structure |
||||
:rtype: float |
||||
""" |
||||
|
||||
'inizialize the graph for a single node' |
||||
graph.fast_init(node_id) |
||||
|
||||
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) |
||||
|
||||
'Inizialize and compute parameters for node' |
||||
params_estimation.fast_init(node_id) |
||||
SoCims = params_estimation.compute_parameters_for_node(node_id) |
||||
|
||||
'calculate the FamScore for the node' |
||||
fam_score_obj = FamScoreCalculator() |
||||
|
||||
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) |
||||
|
||||
#print(f" lo score per {node_id} risulta: {score} ") |
||||
return score |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,4 @@ |
||||
from .optimizer import Optimizer |
||||
from .tabu_search import TabuSearch |
||||
from .hill_climbing_search import HillClimbing |
||||
from .constraint_based_optimizer import ConstraintBasedOptimizer |
@ -0,0 +1,87 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice |
||||
|
||||
from abc import ABC |
||||
|
||||
import copy |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
|
||||
class ConstraintBasedOptimizer(Optimizer): |
||||
""" |
||||
Optimizer class that implement a CTPC Algorithm |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param tot_vars_count: number of variables in the dataset |
||||
:type tot_vars_count: int |
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
tot_vars_count:int |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.tot_vars_count = tot_vars_count |
||||
|
||||
|
||||
|
||||
def optimize_structure(self): |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a CTPC Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
print("##################TESTING VAR################", self.node_id) |
||||
|
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] |
||||
|
||||
for possible_parent in other_nodes: |
||||
graph.add_edges([(possible_parent,self.node_id)]) |
||||
|
||||
|
||||
u = other_nodes |
||||
#tests_parents_numb = len(u) |
||||
#complete_frame = self.complete_graph_frame |
||||
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])] |
||||
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) |
||||
b = 0 |
||||
while b < len(u): |
||||
parent_indx = 0 |
||||
while parent_indx < len(u): |
||||
removed = False |
||||
test_parent = u[parent_indx] |
||||
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) |
||||
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) |
||||
if self.structure_estimator._removable_edges_matrix[i][j]: |
||||
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) |
||||
for parents_set in S: |
||||
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): |
||||
graph.remove_edges([(test_parent, self.node_id)]) |
||||
u.remove(test_parent) |
||||
removed = True |
||||
break |
||||
if not removed: |
||||
parent_indx += 1 |
||||
b += 1 |
||||
self.structure_estimator._cache.clear() |
||||
return graph.edges |
@ -0,0 +1,135 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice |
||||
|
||||
from abc import ABC |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
|
||||
class HillClimbing(Optimizer): |
||||
""" |
||||
Optimizer class that implement Hill Climbing Search |
||||
|
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
|
||||
|
||||
|
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
max_parents:int = None, |
||||
iterations_number:int= 40, |
||||
patience:int = None |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.max_parents = max_parents |
||||
self.iterations_number = iterations_number |
||||
self.patience = patience |
||||
|
||||
|
||||
|
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
|
||||
#'Create the graph for the single node' |
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
'get the index for the current node' |
||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
||||
|
||||
'list of prior edges' |
||||
prior_parents = set() |
||||
|
||||
'Add the edges from prior knowledge' |
||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
||||
prior_parents.add(parent_id) |
||||
|
||||
'Add the node to the starting structure' |
||||
graph.add_edges([(parent_id, self.node_id)]) |
||||
|
||||
|
||||
|
||||
'get all the possible parents' |
||||
other_nodes = [node for node in |
||||
self.structure_estimator._sample_path.structure.nodes_labels if |
||||
node != self.node_id and |
||||
not prior_parents.__contains__(node)] |
||||
|
||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
patince_count = 0 |
||||
for i in range(self.iterations_number): |
||||
'choose a new random edge' |
||||
current_new_parent = choice(other_nodes) |
||||
current_edge = (current_new_parent,self.node_id) |
||||
added = False |
||||
parent_removed = None |
||||
|
||||
|
||||
if graph.has_edge(current_edge): |
||||
graph.remove_edges([current_edge]) |
||||
else: |
||||
'check the max_parents constraint' |
||||
if self.max_parents is not None: |
||||
parents_list = graph.get_parents_by_id(self.node_id) |
||||
if len(parents_list) >= self.max_parents : |
||||
parent_removed = (choice(parents_list), self.node_id) |
||||
graph.remove_edges([parent_removed]) |
||||
graph.add_edges([current_edge]) |
||||
added = True |
||||
#print('**************************') |
||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
if current_score > actual_best_score: |
||||
'update current best score' |
||||
actual_best_score = current_score |
||||
patince_count = 0 |
||||
else: |
||||
'undo the last update' |
||||
if added: |
||||
graph.remove_edges([current_edge]) |
||||
'If a parent was removed, add it again to the graph' |
||||
if parent_removed is not None: |
||||
graph.add_edges([parent_removed]) |
||||
else: |
||||
graph.add_edges([current_edge]) |
||||
'update patience count' |
||||
patince_count += 1 |
||||
|
||||
if self.patience is not None and patince_count > self.patience: |
||||
break |
||||
|
||||
print(f"finito variabile: {self.node_id}") |
||||
return graph.edges |
@ -0,0 +1,39 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
import abc |
||||
|
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
|
||||
|
||||
|
||||
class Optimizer(abc.ABC): |
||||
""" |
||||
Interface class for all the optimizer's child PyCTBN |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:param structure_estimator: A structureEstimator Object to predict the structure |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
|
||||
""" |
||||
|
||||
def __init__(self, node_id:str, structure_estimator: StructureEstimator): |
||||
self.node_id = node_id |
||||
self.structure_estimator = structure_estimator |
||||
|
||||
|
||||
@abc.abstractmethod |
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
pass |
@ -0,0 +1,199 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice,sample |
||||
|
||||
from abc import ABC |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
import queue |
||||
|
||||
|
||||
class TabuSearch(Optimizer): |
||||
""" |
||||
Optimizer class that implement Tabu Search |
||||
|
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
|
||||
|
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
max_parents:int = None, |
||||
iterations_number:int= 40, |
||||
patience:int = None, |
||||
tabu_length:int = None, |
||||
tabu_rules_duration = None |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.max_parents = max_parents |
||||
self.iterations_number = iterations_number |
||||
self.patience = patience |
||||
self.tabu_length = tabu_length |
||||
self.tabu_rules_duration = tabu_rules_duration |
||||
|
||||
|
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
print(f"tabu search is processing the structure of {self.node_id}") |
||||
|
||||
'Create the graph for the single node' |
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
'get the index for the current node' |
||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
||||
|
||||
'list of prior edges' |
||||
prior_parents = set() |
||||
|
||||
'Add the edges from prior knowledge' |
||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
||||
prior_parents.add(parent_id) |
||||
|
||||
'Add the node to the starting structure' |
||||
graph.add_edges([(parent_id, self.node_id)]) |
||||
|
||||
|
||||
|
||||
'get all the possible parents' |
||||
other_nodes = set([node for node in |
||||
self.structure_estimator._sample_path.structure.nodes_labels if |
||||
node != self.node_id and |
||||
not prior_parents.__contains__(node)]) |
||||
|
||||
'calculate the score for the node without parents' |
||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
'initialize tabu_length and tabu_rules_duration if None' |
||||
if self.tabu_length is None: |
||||
self.tabu_length = len(other_nodes) |
||||
|
||||
if self.tabu_rules_duration is None: |
||||
self.tabu_tabu_rules_durationength = len(other_nodes) |
||||
|
||||
'inizialize the data structures' |
||||
tabu_set = set() |
||||
tabu_queue = queue.Queue() |
||||
|
||||
patince_count = 0 |
||||
tabu_count = 0 |
||||
for i in range(self.iterations_number): |
||||
|
||||
current_possible_nodes = other_nodes.difference(tabu_set) |
||||
|
||||
'choose a new random edge according to tabu restiction' |
||||
if(len(current_possible_nodes) > 0): |
||||
current_new_parent = sample(current_possible_nodes,k=1)[0] |
||||
else: |
||||
current_new_parent = tabu_queue.get() |
||||
tabu_set.remove(current_new_parent) |
||||
|
||||
|
||||
|
||||
current_edge = (current_new_parent,self.node_id) |
||||
added = False |
||||
parent_removed = None |
||||
|
||||
if graph.has_edge(current_edge): |
||||
graph.remove_edges([current_edge]) |
||||
else: |
||||
'check the max_parents constraint' |
||||
if self.max_parents is not None: |
||||
parents_list = graph.get_parents_by_id(self.node_id) |
||||
if len(parents_list) >= self.max_parents : |
||||
parent_removed = (choice(parents_list), self.node_id) |
||||
graph.remove_edges([parent_removed]) |
||||
graph.add_edges([current_edge]) |
||||
added = True |
||||
#print('**************************') |
||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
# print("-------------------------------------------") |
||||
# print(f"Current new parent: {current_new_parent}") |
||||
# print(f"Current score: {current_score}") |
||||
# print(f"Current best score: {actual_best_score}") |
||||
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") |
||||
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") |
||||
# print(f"graph edges: {graph.edges}") |
||||
|
||||
# print("-------------------------------------------") |
||||
# input() |
||||
if current_score > actual_best_score: |
||||
'update current best score' |
||||
actual_best_score = current_score |
||||
patince_count = 0 |
||||
'update tabu list' |
||||
|
||||
|
||||
else: |
||||
'undo the last update' |
||||
if added: |
||||
graph.remove_edges([current_edge]) |
||||
'If a parent was removed, add it again to the graph' |
||||
if parent_removed is not None: |
||||
graph.add_edges([parent_removed]) |
||||
else: |
||||
graph.add_edges([current_edge]) |
||||
'update patience count' |
||||
patince_count += 1 |
||||
|
||||
|
||||
if tabu_queue.qsize() >= self.tabu_length: |
||||
current_removed = tabu_queue.get() |
||||
tabu_set.remove(current_removed) |
||||
'Add the node on the tabu list' |
||||
tabu_queue.put(current_new_parent) |
||||
tabu_set.add(current_new_parent) |
||||
|
||||
tabu_count += 1 |
||||
|
||||
'Every tabu_rules_duration step remove an item from the tabu list ' |
||||
if tabu_count % self.tabu_rules_duration == 0: |
||||
if tabu_queue.qsize() > 0: |
||||
current_removed = tabu_queue.get() |
||||
tabu_set.remove(current_removed) |
||||
tabu_count = 0 |
||||
else: |
||||
tabu_count = 0 |
||||
|
||||
if self.patience is not None and patince_count > self.patience: |
||||
break |
||||
|
||||
print(f"finito variabile: {self.node_id}") |
||||
return graph.edges |
@ -0,0 +1,6 @@ |
||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from .network_graph import NetworkGraph |
||||
from .sample_path import SamplePath |
||||
from .set_of_cims import SetOfCims |
||||
from .structure import Structure |
||||
from .trajectory import Trajectory |
@ -0,0 +1,42 @@ |
||||
import numpy as np |
||||
|
||||
|
||||
class ConditionalIntensityMatrix(object): |
||||
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector |
||||
and state transition matrix and the actual CIM matrix. |
||||
|
||||
:param state_residence_times: state residence times vector |
||||
:type state_residence_times: numpy.array |
||||
:param state_transition_matrix: the transitions count matrix |
||||
:type state_transition_matrix: numpy.ndArray |
||||
:_cim: the actual cim of the node |
||||
""" |
||||
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): |
||||
"""Constructor Method |
||||
""" |
||||
self._state_residence_times = state_residence_times |
||||
self._state_transition_matrix = state_transition_matrix |
||||
self._cim = self.state_transition_matrix.astype(np.float64) |
||||
|
||||
def compute_cim_coefficients(self) -> None: |
||||
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. |
||||
The class member ``_cim`` will contain the computed cim |
||||
""" |
||||
np.fill_diagonal(self._cim, self._cim.diagonal() * -1) |
||||
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T |
||||
|
||||
@property |
||||
def state_residence_times(self) -> np.ndarray: |
||||
return self._state_residence_times |
||||
|
||||
@property |
||||
def state_transition_matrix(self) -> np.ndarray: |
||||
return self._state_transition_matrix |
||||
|
||||
@property |
||||
def cim(self) -> np.ndarray: |
||||
return self._cim |
||||
|
||||
def __repr__(self): |
||||
return 'CIM:\n' + str(self.cim) |
||||
|
@ -0,0 +1,293 @@ |
||||
|
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from .structure import Structure |
||||
|
||||
|
||||
class NetworkGraph(object): |
||||
"""Abstracts the infos contained in the Structure class in the form of a directed graph. |
||||
Has the task of creating all the necessary filtering and indexing structures for parameters estimation |
||||
|
||||
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted |
||||
:type graph_struct: Structure |
||||
:_graph: directed graph |
||||
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos |
||||
about every parents of the node of which all the indexing and filtering structures will be constructed. |
||||
:_time_scalar_indexing_structure: the indexing structure for state res time estimation |
||||
:_transition_scalar_indexing_structure: the indexing structure for transition computation |
||||
:_time_filtering: the columns filtering structure used in the computation of the state res times |
||||
:_transition_filtering: the columns filtering structure used in the computation of the transition |
||||
from one state to another |
||||
:_p_combs_structure: all the possible parents states combination for the node of interest |
||||
""" |
||||
|
||||
def __init__(self, graph_struct: Structure): |
||||
"""Constructor Method |
||||
""" |
||||
self._graph_struct = graph_struct |
||||
self._graph = nx.DiGraph() |
||||
self._aggregated_info_about_nodes_parents = None |
||||
self._time_scalar_indexing_structure = None |
||||
self._transition_scalar_indexing_structure = None |
||||
self._time_filtering = None |
||||
self._transition_filtering = None |
||||
self._p_combs_structure = None |
||||
|
||||
def init_graph(self): |
||||
self.add_nodes(self._nodes_labels) |
||||
self.add_edges(self.graph_struct.edges) |
||||
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() |
||||
self._fancy_indexing = self.build_fancy_indexing_structure(0) |
||||
self.build_scalar_indexing_structures() |
||||
self.build_time_columns_filtering_structure() |
||||
self.build_transition_columns_filtering_structure() |
||||
self._p_combs_structure = self.build_p_combs_structure() |
||||
|
||||
def fast_init(self, node_id: str) -> None: |
||||
"""Initializes all the necessary structures for parameters estimation of the node identified by the label |
||||
node_id |
||||
|
||||
:param node_id: the label of the node |
||||
:type node_id: string |
||||
""" |
||||
self.add_nodes(self._graph_struct.nodes_labels) |
||||
self.add_edges(self._graph_struct.edges) |
||||
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) |
||||
p_indxs = self._aggregated_info_about_nodes_parents[1] |
||||
p_vals = self._aggregated_info_about_nodes_parents[2] |
||||
node_states = self.get_states_number(node_id) |
||||
node_indx = self.get_node_indx(node_id) |
||||
cols_number = self._graph_struct.total_variables_number |
||||
self._time_scalar_indexing_structure = NetworkGraph.\ |
||||
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) |
||||
self._transition_scalar_indexing_structure = NetworkGraph.\ |
||||
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) |
||||
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) |
||||
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) |
||||
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) |
||||
|
||||
def add_nodes(self, list_of_nodes: typing.List) -> None: |
||||
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. |
||||
Sets all the properties that identify a nodes (index, positional index, cardinality) |
||||
|
||||
:param list_of_nodes: the nodes to add to ``_graph`` |
||||
:type list_of_nodes: List |
||||
""" |
||||
nodes_indxs = self._graph_struct.nodes_indexes |
||||
nodes_vals = self._graph_struct.nodes_values |
||||
pos = 0 |
||||
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): |
||||
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) |
||||
pos += 1 |
||||
|
||||
def has_edge(self,edge:tuple)-> bool: |
||||
""" |
||||
Check if the graph contains a specific edge |
||||
|
||||
Parameters: |
||||
edge: a tuple that rappresents the edge |
||||
Returns: |
||||
bool |
||||
""" |
||||
return self._graph.has_edge(edge[0],edge[1]) |
||||
|
||||
def add_edges(self, list_of_edges: typing.List) -> None: |
||||
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``. |
||||
|
||||
:param list_of_edges: the list containing of tuples containing the edges |
||||
:type list_of_edges: List |
||||
""" |
||||
self._graph.add_edges_from(list_of_edges) |
||||
|
||||
def remove_node(self, node_id: str) -> None: |
||||
"""Remove the node ``node_id`` from all the class members. |
||||
Initialize all the filtering/indexing structures. |
||||
""" |
||||
self._graph.remove_node(node_id) |
||||
self._graph_struct.remove_node(node_id) |
||||
self.clear_indexing_filtering_structures() |
||||
|
||||
def clear_indexing_filtering_structures(self) -> None: |
||||
"""Initialize all the filtering/indexing structures. |
||||
""" |
||||
self._aggregated_info_about_nodes_parents = None |
||||
self._time_scalar_indexing_structure = None |
||||
self._transition_scalar_indexing_structure = None |
||||
self._time_filtering = None |
||||
self._transition_filtering = None |
||||
self._p_combs_structure = None |
||||
|
||||
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: |
||||
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely |
||||
(parents_labels, parents_indexes, parents_cardinalities). |
||||
|
||||
:param node: the label of the node |
||||
:type node: string |
||||
:return: a tuple containing all the parent set infos |
||||
:rtype: Tuple |
||||
""" |
||||
parents = self.get_parents_by_id(node) |
||||
nodes = self._graph_struct.nodes_labels |
||||
d = {v: i for i, v in enumerate(nodes)} |
||||
sorted_parents = sorted(parents, key=lambda v: d[v]) |
||||
get_node_indx = self.get_node_indx |
||||
p_indxes = [get_node_indx(node) for node in sorted_parents] |
||||
p_values = [self.get_states_number(node) for node in sorted_parents] |
||||
return sorted_parents, p_indxes, p_values |
||||
|
||||
def remove_edges(self, list_of_edges: typing.List) -> None: |
||||
"""Remove the edges to the graph contained in the list list_of_edges. |
||||
|
||||
:param list_of_edges: The edges to remove from the graph |
||||
:type list_of_edges: List |
||||
""" |
||||
self._graph.remove_edges_from(list_of_edges) |
||||
|
||||
@staticmethod |
||||
def build_time_scalar_indexing_structure_for_a_node(node_states: int, |
||||
parents_vals: typing.List) -> np.ndarray: |
||||
"""Builds an indexing structure for the computation of state residence times values. |
||||
|
||||
:param node_states: the node cardinality |
||||
:type node_states: int |
||||
:param parents_vals: the caridinalites of the node's parents |
||||
:type parents_vals: List |
||||
:return: The time indexing structure |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
T_vector = np.array([node_states]) |
||||
T_vector = np.append(T_vector, parents_vals) |
||||
T_vector = T_vector.cumprod().astype(np.int) |
||||
return T_vector |
||||
|
||||
@staticmethod |
||||
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ |
||||
-> np.ndarray: |
||||
"""Builds an indexing structure for the computation of state transitions values. |
||||
|
||||
:param node_states_number: the node cardinality |
||||
:type node_states_number: int |
||||
:param parents_vals: the caridinalites of the node's parents |
||||
:type parents_vals: List |
||||
:return: The transition indexing structure |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
M_vector = np.array([node_states_number, |
||||
node_states_number]) |
||||
M_vector = np.append(M_vector, parents_vals) |
||||
M_vector = M_vector.cumprod().astype(np.int) |
||||
return M_vector |
||||
|
||||
@staticmethod |
||||
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: |
||||
""" |
||||
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
||||
in the dataset. |
||||
This structute will be used in the computation of the state res times. |
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param p_indxs: the indexes of the node's parents |
||||
:type p_indxs: List |
||||
:return: The filtering structure for times estimation |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) |
||||
|
||||
@staticmethod |
||||
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ |
||||
-> np.ndarray: |
||||
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
||||
in the dataset. |
||||
This structure will be used in the computation of the state transitions values. |
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param p_indxs: the indexes of the node's parents |
||||
:type p_indxs: List |
||||
:param nodes_number: the total number of nodes in the dataset |
||||
:type nodes_number: int |
||||
:return: The filtering structure for transitions estimation |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) |
||||
|
||||
@staticmethod |
||||
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: |
||||
""" |
||||
Builds the combinatorial structure that contains the combinations of all the values contained in |
||||
``parents_values``. |
||||
|
||||
:param parents_values: the cardinalities of the nodes |
||||
:type parents_values: List |
||||
:return: A numpy matrix containing a grid of the combinations |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
tmp = [] |
||||
for val in parents_values: |
||||
tmp.append([x for x in range(val)]) |
||||
if len(parents_values) > 0: |
||||
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) |
||||
if len(parents_values) > 1: |
||||
tmp_comb = parents_comb[:, 1].copy() |
||||
parents_comb[:, 1] = parents_comb[:, 0].copy() |
||||
parents_comb[:, 0] = tmp_comb |
||||
else: |
||||
parents_comb = np.array([[]], dtype=np.int) |
||||
return parents_comb |
||||
|
||||
def get_parents_by_id(self, node_id) -> typing.List: |
||||
"""Returns a list of labels of the parents of the node ``node_id`` |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: a List of labels of the parents |
||||
:rtype: List |
||||
""" |
||||
return list(self._graph.predecessors(node_id)) |
||||
|
||||
def get_states_number(self, node_id) -> int: |
||||
return self._graph.nodes[node_id]['val'] |
||||
|
||||
def get_node_indx(self, node_id) -> int: |
||||
return nx.get_node_attributes(self._graph, 'indx')[node_id] |
||||
|
||||
def get_positional_node_indx(self, node_id) -> int: |
||||
return self._graph.nodes[node_id]['pos_indx'] |
||||
|
||||
@property |
||||
def nodes(self) -> typing.List: |
||||
return self._graph_struct.nodes_labels |
||||
|
||||
@property |
||||
def edges(self) -> typing.List: |
||||
return list(self._graph.edges) |
||||
|
||||
@property |
||||
def nodes_indexes(self) -> np.ndarray: |
||||
return self._graph_struct.nodes_indexes |
||||
|
||||
@property |
||||
def nodes_values(self) -> np.ndarray: |
||||
return self._graph_struct.nodes_values |
||||
|
||||
@property |
||||
def time_scalar_indexing_strucure(self) -> np.ndarray: |
||||
return self._time_scalar_indexing_structure |
||||
|
||||
@property |
||||
def time_filtering(self) -> np.ndarray: |
||||
return self._time_filtering |
||||
|
||||
@property |
||||
def transition_scalar_indexing_structure(self) -> np.ndarray: |
||||
return self._transition_scalar_indexing_structure |
||||
|
||||
@property |
||||
def transition_filtering(self) -> np.ndarray: |
||||
return self._transition_filtering |
||||
|
||||
@property |
||||
def p_combs(self) -> np.ndarray: |
||||
return self._p_combs_structure |
@ -0,0 +1,91 @@ |
||||
|
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
from .structure import Structure |
||||
from .trajectory import Trajectory |
||||
from ..utility.abstract_importer import AbstractImporter |
||||
|
||||
|
||||
|
||||
class SamplePath(object): |
||||
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables |
||||
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will |
||||
contain the mentioned data. |
||||
|
||||
:param importer: the Importer object which contains the imported and processed data |
||||
:type importer: AbstractImporter |
||||
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories |
||||
:_structure: the ``Structure`` Object that will contain all the structural infos about the net |
||||
:_total_variables_count: the number of variables in the net |
||||
""" |
||||
def __init__(self, importer: AbstractImporter): |
||||
"""Constructor Method |
||||
""" |
||||
self._importer = importer |
||||
if self._importer._df_variables is None or self._importer._concatenated_samples is None: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if self._importer._df_variables.empty: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if isinstance(self._importer._concatenated_samples, pd.DataFrame): |
||||
if self._importer._concatenated_samples.empty: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if isinstance(self._importer._concatenated_samples, np.ndarray): |
||||
if self._importer._concatenated_samples.size == 0: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
self._trajectories = None |
||||
self._structure = None |
||||
self._total_variables_count = None |
||||
|
||||
def build_trajectories(self) -> None: |
||||
"""Builds the Trajectory object that will contain all the trajectories. |
||||
Clears all the unused dataframes in ``_importer`` Object |
||||
""" |
||||
self._trajectories = \ |
||||
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), |
||||
len(self._importer.sorter) + 1) |
||||
self._importer.clear_concatenated_frame() |
||||
|
||||
def build_structure(self) -> None: |
||||
""" |
||||
Builds the ``Structure`` object that aggregates all the infos about the net. |
||||
""" |
||||
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): |
||||
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") |
||||
|
||||
self._total_variables_count = len(self._importer.sorter) |
||||
labels = self._importer.variables.iloc[:, 0].to_list() |
||||
indxs = self._importer.variables.index.to_numpy() |
||||
vals = self._importer.variables.iloc[:, 1].to_numpy() |
||||
if self._importer.structure is None or self._importer.structure.empty: |
||||
edges = [] |
||||
else: |
||||
edges = list(self._importer.structure.to_records(index=False)) |
||||
self._structure = Structure(labels, indxs, vals, edges, |
||||
self._total_variables_count) |
||||
|
||||
def clear_memory(self): |
||||
self._importer._raw_data = [] |
||||
|
||||
@property |
||||
def trajectories(self) -> Trajectory: |
||||
return self._trajectories |
||||
|
||||
@property |
||||
def structure(self) -> Structure: |
||||
return self._structure |
||||
|
||||
@property |
||||
def total_variables_count(self) -> int: |
||||
return self._total_variables_count |
||||
|
||||
@property |
||||
def has_prior_net_structure(self) -> bool: |
||||
return bool(self._structure.edges) |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,97 @@ |
||||
|
||||
|
||||
import typing |
||||
|
||||
import numpy as np |
||||
|
||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
|
||||
|
||||
class SetOfCims(object): |
||||
"""Aggregates all the CIMS of the node identified by the label _node_id. |
||||
|
||||
:param node_id: the node label |
||||
:type node_ind: string |
||||
:param parents_states_number: the cardinalities of the parents |
||||
:type parents_states_number: List |
||||
:param node_states_number: the caridinality of the node |
||||
:type node_states_number: int |
||||
:param p_combs: the p_comb structure bound to this node |
||||
:type p_combs: numpy.ndArray |
||||
:_state_residence_time: matrix containing all the state residence time vectors for the node |
||||
:_transition_matrices: matrix containing all the transition matrices for the node |
||||
:_actual_cims: the cims of the node |
||||
""" |
||||
|
||||
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): |
||||
"""Constructor Method |
||||
""" |
||||
self._node_id = node_id |
||||
self._parents_states_number = parents_states_number |
||||
self._node_states_number = node_states_number |
||||
self._actual_cims = [] |
||||
self._state_residence_times = None |
||||
self._transition_matrices = None |
||||
self._p_combs = p_combs |
||||
self.build_times_and_transitions_structures() |
||||
|
||||
def build_times_and_transitions_structures(self) -> None: |
||||
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices. |
||||
""" |
||||
if not self._parents_states_number: |
||||
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) |
||||
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) |
||||
else: |
||||
self._state_residence_times = \ |
||||
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) |
||||
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, |
||||
self._node_states_number], dtype=np.int) |
||||
|
||||
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: |
||||
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. |
||||
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. |
||||
|
||||
:param state_res_times: the state residence times matrix |
||||
:type state_res_times: numpy.ndArray |
||||
:param transition_matrices: the transition matrices |
||||
:type transition_matrices: numpy.ndArray |
||||
""" |
||||
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): |
||||
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) |
||||
cim_to_add.compute_cim_coefficients() |
||||
self._actual_cims.append(cim_to_add) |
||||
self._actual_cims = np.array(self._actual_cims) |
||||
self._transition_matrices = None |
||||
self._state_residence_times = None |
||||
|
||||
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: |
||||
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index |
||||
``comb``. |
||||
|
||||
:param mask_arr: the boolean mask that indicates which parent to consider |
||||
:type mask_arr: numpy.array |
||||
:param comb: the state/s of the filtered parents |
||||
:type comb: numpy.array |
||||
:return: Array of ``ConditionalIntensityMatrix`` objects |
||||
:rtype: numpy.array |
||||
""" |
||||
if mask_arr.size <= 1: |
||||
return self._actual_cims |
||||
else: |
||||
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() |
||||
return self._actual_cims[flat_indxs] |
||||
|
||||
@property |
||||
def actual_cims(self) -> np.ndarray: |
||||
return self._actual_cims |
||||
|
||||
@property |
||||
def p_combs(self) -> np.ndarray: |
||||
return self._p_combs |
||||
|
||||
def get_cims_number(self): |
||||
return len(self._actual_cims) |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,124 @@ |
||||
|
||||
import typing as ty |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
class Structure(object): |
||||
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) |
||||
|
||||
:param nodes_labels_list: the symbolic names of the variables |
||||
:type nodes_labels_list: List |
||||
:param nodes_indexes_arr: the indexes of the nodes |
||||
:type nodes_indexes_arr: numpy.ndArray |
||||
:param nodes_vals_arr: the cardinalites of the nodes |
||||
:type nodes_vals_arr: numpy.ndArray |
||||
:param edges_list: the edges of the network |
||||
:type edges_list: List |
||||
:param total_variables_number: the total number of variables in the dataset |
||||
:type total_variables_number: int |
||||
""" |
||||
|
||||
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, |
||||
edges_list: ty.List, total_variables_number: int): |
||||
"""Constructor Method |
||||
""" |
||||
self._nodes_labels_list = nodes_labels_list |
||||
self._nodes_indexes_arr = nodes_indexes_arr |
||||
self._nodes_vals_arr = nodes_vals_arr |
||||
self._edges_list = edges_list |
||||
self._total_variables_number = total_variables_number |
||||
|
||||
def remove_node(self, node_id: str) -> None: |
||||
"""Remove the node ``node_id`` from all the class members. |
||||
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. |
||||
""" |
||||
node_positional_indx = self._nodes_labels_list.index(node_id) |
||||
del self._nodes_labels_list[node_positional_indx] |
||||
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) |
||||
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) |
||||
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id |
||||
and to_node != node_id)] |
||||
|
||||
@property |
||||
def edges(self) -> ty.List: |
||||
return self._edges_list |
||||
|
||||
@property |
||||
def nodes_labels(self) -> ty.List: |
||||
return self._nodes_labels_list |
||||
|
||||
@property |
||||
def nodes_indexes(self) -> np.ndarray: |
||||
return self._nodes_indexes_arr |
||||
|
||||
@property |
||||
def nodes_values(self) -> np.ndarray: |
||||
return self._nodes_vals_arr |
||||
|
||||
@property |
||||
def total_variables_number(self) -> int: |
||||
return self._total_variables_number |
||||
|
||||
def get_node_id(self, node_indx: int) -> str: |
||||
"""Given the ``node_index`` returns the node label. |
||||
|
||||
:param node_indx: the node index |
||||
:type node_indx: int |
||||
:return: the node label |
||||
:rtype: string |
||||
""" |
||||
return self._nodes_labels_list[node_indx] |
||||
|
||||
def clean_structure_edges(self): |
||||
self._edges_list = list() |
||||
|
||||
def add_edge(self,edge: tuple): |
||||
self._edges_list.append(tuple) |
||||
print(self._edges_list) |
||||
|
||||
def remove_edge(self,edge: tuple): |
||||
self._edges_list.remove(tuple) |
||||
|
||||
def contains_edge(self,edge:tuple) -> bool: |
||||
return edge in self._edges_list |
||||
|
||||
def get_node_indx(self, node_id: str) -> int: |
||||
"""Given the ``node_index`` returns the node label. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: the node index |
||||
:rtype: int |
||||
""" |
||||
pos_indx = self._nodes_labels_list.index(node_id) |
||||
return self._nodes_indexes_arr[pos_indx] |
||||
|
||||
def get_positional_node_indx(self, node_id: str) -> int: |
||||
return self._nodes_labels_list.index(node_id) |
||||
|
||||
def get_states_number(self, node: str) -> int: |
||||
"""Given the node label ``node`` returns the cardinality of the node. |
||||
|
||||
:param node: the node label |
||||
:type node: string |
||||
:return: the node cardinality |
||||
:rtype: int |
||||
""" |
||||
pos_indx = self._nodes_labels_list.index(node) |
||||
return self._nodes_vals_arr[pos_indx] |
||||
|
||||
def __repr__(self): |
||||
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ |
||||
"\nEdges: \n" + str(self._edges_list) |
||||
|
||||
def __eq__(self, other): |
||||
"""Overrides the default implementation""" |
||||
if isinstance(other, Structure): |
||||
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ |
||||
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ |
||||
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ |
||||
self._edges_list == other._edges_list |
||||
|
||||
return False |
||||
|
@ -0,0 +1,45 @@ |
||||
|
||||
import typing |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
class Trajectory(object): |
||||
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles |
||||
(the time deltas) and a numpy matrix of ints (the changes of states). |
||||
|
||||
:param list_of_columns: the list containing the times array and values matrix |
||||
:type list_of_columns: List |
||||
:param original_cols_number: total number of cols in the data |
||||
:type original_cols_number: int |
||||
:_actual_trajectory: the trajectory containing also the duplicated/shifted values |
||||
:_times: the array containing the time deltas |
||||
""" |
||||
|
||||
def __init__(self, list_of_columns: typing.List, original_cols_number: int): |
||||
"""Constructor Method |
||||
""" |
||||
self._times = list_of_columns[0] |
||||
self._actual_trajectory = list_of_columns[1] |
||||
self._original_cols_number = original_cols_number |
||||
|
||||
@property |
||||
def trajectory(self) -> np.ndarray: |
||||
return self._actual_trajectory[:, :self._original_cols_number - 1] |
||||
|
||||
@property |
||||
def complete_trajectory(self) -> np.ndarray: |
||||
return self._actual_trajectory |
||||
|
||||
@property |
||||
def times(self): |
||||
return self._times |
||||
|
||||
def size(self): |
||||
return self._actual_trajectory.shape[0] |
||||
|
||||
def __repr__(self): |
||||
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \ |
||||
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__() |
||||
|
||||
|
@ -0,0 +1,4 @@ |
||||
from .abstract_importer import AbstractImporter |
||||
from .cache import Cache |
||||
from .json_importer import JsonImporter |
||||
from .sample_importer import SampleImporter |
@ -0,0 +1,164 @@ |
||||
|
||||
import typing |
||||
from abc import ABC, abstractmethod |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
import copy |
||||
|
||||
#from sklearn.utils import resample |
||||
|
||||
|
||||
class AbstractImporter(ABC): |
||||
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure. |
||||
|
||||
:param file_path: the file path, or dataset name if you import already processed data |
||||
:type file_path: str |
||||
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories |
||||
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray] |
||||
:param variables: Dataframe containing the nodes labels and cardinalities |
||||
:type variables: pandas.DataFrame |
||||
:prior_net_structure: Dataframe containing the structure of the network (edges) |
||||
:type prior_net_structure: pandas.DataFrame |
||||
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` |
||||
|
||||
.. warning:: |
||||
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed |
||||
as Pandas Dataframes with the following structure: |
||||
Header of _df_structure = [From_Node | To_Node] |
||||
Header of _df_variables = [Variable_Label | Variable_Cardinality] |
||||
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. |
||||
|
||||
.. note:: |
||||
See :class:``JsonImporter`` for an example implementation |
||||
|
||||
""" |
||||
|
||||
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None, |
||||
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): |
||||
"""Constructor |
||||
""" |
||||
self._file_path = file_path |
||||
self._df_samples_list = trajectory_list |
||||
self._concatenated_samples = [] |
||||
self._df_variables = variables |
||||
self._df_structure = prior_net_structure |
||||
self._sorter = None |
||||
super().__init__() |
||||
|
||||
@abstractmethod |
||||
def build_sorter(self, trajecory_header: object) -> typing.List: |
||||
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame |
||||
and keeping ONLY the variables symbolic labels, cutting out the time label in the header. |
||||
|
||||
:param trajecory_header: an object that will be used to define the header |
||||
:type trajecory_header: object |
||||
:return: A list containing the processed header. |
||||
:rtype: List |
||||
""" |
||||
pass |
||||
|
||||
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, |
||||
columns_header: typing.List, shifted_cols_header: typing.List) \ |
||||
-> pd.DataFrame: |
||||
"""Computes the difference between each value present in th time column. |
||||
Copies and shift by one position up all the values present in the remaining columns. |
||||
|
||||
:param sample_frame: the traj to be processed |
||||
:type sample_frame: pandas.Dataframe |
||||
:param columns_header: the original header of sample_frame |
||||
:type columns_header: List |
||||
:param shifted_cols_header: a copy of columns_header with changed names of the contents |
||||
:type shifted_cols_header: List |
||||
:return: The processed dataframe |
||||
:rtype: pandas.Dataframe |
||||
|
||||
.. warning:: |
||||
the Dataframe ``sample_frame`` has to follow the column structure of this header: |
||||
Header of sample_frame = [Time | Variable values] |
||||
""" |
||||
sample_frame = copy.deepcopy(sample_frame) |
||||
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) |
||||
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') |
||||
shifted_cols.columns = shifted_cols_header |
||||
sample_frame = sample_frame.assign(**shifted_cols) |
||||
sample_frame.drop(sample_frame.tail(1).index, inplace=True) |
||||
return sample_frame |
||||
|
||||
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: |
||||
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list |
||||
``df_samples_list``. |
||||
Concatenates the result in the dataframe ``concatanated_samples`` |
||||
|
||||
:param df_samples_list: the datframe's list to be processed and concatenated |
||||
:type df_samples_list: List |
||||
|
||||
.. warning:: |
||||
The Dataframe sample_frame has to follow the column structure of this header: |
||||
Header of sample_frame = [Time | Variable values] |
||||
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) |
||||
.. note:: |
||||
After the call of this method the class member ``concatanated_samples`` will contain all processed |
||||
and merged trajectories |
||||
""" |
||||
if not self._sorter: |
||||
raise RuntimeError("The class member self._sorter has to be INITIALIZED!") |
||||
shifted_cols_header = [s + "S" for s in self._sorter] |
||||
compute_row_delta = self.compute_row_delta_sigle_samples_frame |
||||
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) |
||||
for sample in df_samples_list] |
||||
self._concatenated_samples = pd.concat(proc_samples_list) |
||||
|
||||
complete_header = self._sorter[:] |
||||
complete_header.insert(0,'Time') |
||||
complete_header.extend(shifted_cols_header) |
||||
self._concatenated_samples = self._concatenated_samples[complete_header] |
||||
|
||||
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List: |
||||
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix |
||||
|
||||
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted |
||||
and converted |
||||
:type concatenated_sample: pandas.Dataframe |
||||
:return: the resulting list of numpy arrays |
||||
:rtype: List |
||||
""" |
||||
|
||||
concatenated_array = concatenated_sample.to_numpy() |
||||
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] |
||||
|
||||
return columns_list |
||||
|
||||
def clear_concatenated_frame(self) -> None: |
||||
"""Removes all values in the dataframe concatenated_samples. |
||||
""" |
||||
if isinstance(self._concatenated_samples, pd.DataFrame): |
||||
self._concatenated_samples = self._concatenated_samples.iloc[0:0] |
||||
|
||||
@abstractmethod |
||||
def dataset_id(self) -> object: |
||||
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current |
||||
dataset |
||||
""" |
||||
pass |
||||
|
||||
@property |
||||
def concatenated_samples(self) -> pd.DataFrame: |
||||
return self._concatenated_samples |
||||
|
||||
@property |
||||
def variables(self) -> pd.DataFrame: |
||||
return self._df_variables |
||||
|
||||
@property |
||||
def structure(self) -> pd.DataFrame: |
||||
return self._df_structure |
||||
|
||||
@property |
||||
def sorter(self) -> typing.List: |
||||
return self._sorter |
||||
|
||||
@property |
||||
def file_path(self) -> str: |
||||
return self._file_path |
@ -0,0 +1,58 @@ |
||||
|
||||
import typing |
||||
|
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
|
||||
|
||||
class Cache: |
||||
"""This class acts as a cache of ``SetOfCims`` objects for a node. |
||||
|
||||
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME |
||||
index is related |
||||
:__actual_cache: a list of setOfCims objects |
||||
""" |
||||
|
||||
def __init__(self): |
||||
"""Constructor Method |
||||
""" |
||||
self._list_of_sets_of_parents = [] |
||||
self._actual_cache = [] |
||||
|
||||
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] |
||||
""" |
||||
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims`` |
||||
related to that ``parents_comb``. |
||||
|
||||
:param parents_comb: the parents related to that ``SetOfCims`` |
||||
:type parents_comb: Set |
||||
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``. |
||||
None otherwise. |
||||
:rtype: SetOfCims |
||||
""" |
||||
try: |
||||
#print("Cache State:", self.list_of_sets_of_indxs) |
||||
#print("Look For:", parents_comb) |
||||
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] |
||||
#print("CACHE HIT!!!!", parents_comb) |
||||
return result |
||||
except ValueError: |
||||
return None |
||||
|
||||
def put(self, parents_comb: typing.Set, socim: SetOfCims): |
||||
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in |
||||
``__list_of_sets_of_parents``. |
||||
|
||||
:param parents_comb: the symbolic set index |
||||
:type parents_comb: Set |
||||
:param socim: the related SetOfCims object |
||||
:type socim: SetOfCims |
||||
""" |
||||
#print("Putting in cache:", parents_comb) |
||||
self._list_of_sets_of_parents.append(parents_comb) |
||||
self._actual_cache.append(socim) |
||||
|
||||
def clear(self): |
||||
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``. |
||||
""" |
||||
del self._list_of_sets_of_parents[:] |
||||
del self._actual_cache[:] |
@ -0,0 +1,176 @@ |
||||
import json |
||||
import typing |
||||
|
||||
import pandas as pd |
||||
|
||||
|
||||
from .abstract_importer import AbstractImporter |
||||
|
||||
|
||||
class JsonImporter(AbstractImporter): |
||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
||||
the data in json extension. |
||||
|
||||
:param file_path: the path of the file that contains tha data to be imported |
||||
:type file_path: string |
||||
:param samples_label: the reference key for the samples in the trajectories |
||||
:type samples_label: string |
||||
:param structure_label: the reference key for the structure of the network data |
||||
:type structure_label: string |
||||
:param variables_label: the reference key for the cardinalites of the nodes data |
||||
:type variables_label: string |
||||
:param time_key: the key used to identify the timestamps in each trajectory |
||||
:type time_key: string |
||||
:param variables_key: the key used to identify the names of the variables in the net |
||||
:type variables_key: string |
||||
:_array_indx: the index of the outer JsonArray to extract the data from |
||||
:type _array_indx: int |
||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
||||
:_raw_data: The raw contents of the json file to import |
||||
:type _raw_data: List |
||||
""" |
||||
|
||||
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, |
||||
variables_key: str): |
||||
"""Constructor method |
||||
|
||||
.. note:: |
||||
This constructor calls also the method ``read_json_file()``, so after the construction of the object |
||||
the class member ``_raw_data`` will contain the raw imported json data. |
||||
|
||||
""" |
||||
self._samples_label = samples_label |
||||
self._structure_label = structure_label |
||||
self._variables_label = variables_label |
||||
self._time_key = time_key |
||||
self._variables_key = variables_key |
||||
self._df_samples_list = None |
||||
self._array_indx = None |
||||
super(JsonImporter, self).__init__(file_path) |
||||
self._raw_data = self.read_json_file() |
||||
|
||||
def import_data(self, indx: int) -> None: |
||||
"""Implements the abstract method of :class:`AbstractImporter`. |
||||
|
||||
:param indx: the index of the outer JsonArray to extract the data from |
||||
:type indx: int |
||||
""" |
||||
self._array_indx = indx |
||||
self._df_samples_list = self.import_trajectories(self._raw_data) |
||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
||||
self.compute_row_delta_in_all_samples_frames(self._df_samples_list) |
||||
self.clear_data_frame_list() |
||||
self._df_structure = self.import_structure(self._raw_data) |
||||
self._df_variables = self.import_variables(self._raw_data) |
||||
|
||||
def import_trajectories(self, raw_data: typing.List) -> typing.List: |
||||
"""Imports the trajectories from the list of dicts ``raw_data``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: List of dataframes containing all the trajectories |
||||
:rtype: List |
||||
""" |
||||
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) |
||||
|
||||
def import_structure(self, raw_data: typing.List) -> pd.DataFrame: |
||||
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: Dataframe containg the starting node a ending node of every arc of the network |
||||
:rtype: pandas.Dataframe |
||||
""" |
||||
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) |
||||
|
||||
def import_variables(self, raw_data: typing.List) -> pd.DataFrame: |
||||
"""Imports the data in ``raw_data`` at the key ``_variables_label``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: Datframe containg the variables simbolic labels and their cardinalities |
||||
:rtype: pandas.Dataframe |
||||
""" |
||||
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) |
||||
|
||||
def read_json_file(self) -> typing.List: |
||||
"""Reads the JSON file in the path self.filePath. |
||||
|
||||
:return: The contents of the json file |
||||
:rtype: List |
||||
""" |
||||
with open(self._file_path) as f: |
||||
data = json.load(f) |
||||
return data |
||||
|
||||
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: |
||||
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param key: the key for the Dicts from which exctract data |
||||
:type key: string |
||||
:return: A normalized dataframe |
||||
:rtype: pandas.Datframe |
||||
""" |
||||
return pd.DataFrame(raw_data[indx][key]) |
||||
|
||||
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: |
||||
""" |
||||
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param trajectories_key: the key of the trajectories objects |
||||
:type trajectories_key: string |
||||
:return: A list of daframes containg the trajectories |
||||
:rtype: List |
||||
""" |
||||
dataframe = pd.DataFrame |
||||
smps = raw_data[indx][trajectories_key] |
||||
df_samples_list = [dataframe(sample) for sample in smps] |
||||
return df_samples_list |
||||
|
||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. |
||||
""" |
||||
columns_header = list(sample_frame.columns.values) |
||||
columns_header.remove(self._time_key) |
||||
return columns_header |
||||
|
||||
def clear_data_frame_list(self) -> None: |
||||
"""Removes all values present in the dataframes in the list ``_df_samples_list``. |
||||
""" |
||||
for indx in range(len(self._df_samples_list)): |
||||
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] |
||||
|
||||
def dataset_id(self) -> object: |
||||
return self._array_indx |
||||
|
||||
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: |
||||
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels |
||||
as keys for the set of CIMS of a particular node. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param cims_key: the key where the json object cims are placed |
||||
:type cims_key: string |
||||
:return: a dictionary containing the sampled CIMS for all the variables in the net |
||||
:rtype: Dictionary |
||||
""" |
||||
cims_for_all_vars = {} |
||||
for var in raw_data[indx][cims_key]: |
||||
sampled_cims_list = [] |
||||
cims_for_all_vars[var] = sampled_cims_list |
||||
for p_comb in raw_data[indx][cims_key][var]: |
||||
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) |
||||
return cims_for_all_vars |
||||
|
||||
|
||||
|
@ -0,0 +1,65 @@ |
||||
import json |
||||
import typing |
||||
|
||||
import pandas as pd |
||||
import numpy as np |
||||
|
||||
from .abstract_importer import AbstractImporter |
||||
|
||||
|
||||
|
||||
class SampleImporter(AbstractImporter): |
||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
||||
the data loaded directly by using DataFrame |
||||
|
||||
:param trajectory_list: the data that describes the trajectories |
||||
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
:param variables: the data that describes the variables with name and cardinality |
||||
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
:param prior_net_structure: the data of the real structure, if it exists |
||||
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
|
||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
||||
:_raw_data: The raw contents of the json file to import |
||||
:type _raw_data: List |
||||
""" |
||||
|
||||
def __init__(self, |
||||
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
||||
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
||||
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): |
||||
|
||||
'If the data are not DataFrame, it will be converted' |
||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
||||
variables = pd.DataFrame(variables) |
||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
||||
prior_net_structure=pd.DataFrame(prior_net_structure) |
||||
|
||||
super(SampleImporter, self).__init__(trajectory_list =trajectory_list, |
||||
variables= variables, |
||||
prior_net_structure=prior_net_structure) |
||||
|
||||
def import_data(self, header_column = None): |
||||
|
||||
if header_column is not None: |
||||
self._sorter = header_column |
||||
else: |
||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
||||
|
||||
samples_list= self._df_samples_list |
||||
|
||||
if isinstance(samples_list, np.ndarray): |
||||
samples_list = samples_list.tolist() |
||||
|
||||
self.compute_row_delta_in_all_samples_frames(samples_list) |
||||
|
||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. |
||||
""" |
||||
columns_header = list(sample_frame.columns.values) |
||||
del columns_header[0] |
||||
return columns_header |
||||
|
||||
|
||||
def dataset_id(self) -> object: |
||||
pass |
@ -0,0 +1,39 @@ |
||||
import glob |
||||
import os |
||||
|
||||
import sys |
||||
sys.path.append("./PyCTBN/") |
||||
|
||||
import structure_graph.network_graph as ng |
||||
import structure_graph.sample_path as sp |
||||
import structure_graph.set_of_cims as sofc |
||||
import estimators.parameters_estimator as pe |
||||
import utility.json_importer as ji |
||||
|
||||
|
||||
def main(): |
||||
read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir |
||||
#import data |
||||
importer = ji.JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
#Create a SamplePath Obj |
||||
s1 = sp.SamplePath(importer) |
||||
#Build The trajectries and the structural infos |
||||
s1.build_trajectories() |
||||
s1.build_structure() |
||||
#From The Structure Object build the Graph |
||||
g = ng.NetworkGraph(s1.structure) |
||||
#Select a node you want to estimate the parameters |
||||
node = g.nodes[1] |
||||
#Init the graph specifically for THIS node |
||||
g.fast_init(node) |
||||
#Use SamplePath and Grpah to create a ParametersEstimator Object |
||||
p1 = pe.ParametersEstimator(s1, g) |
||||
#Init the peEst specifically for THIS node |
||||
p1.fast_init(node) |
||||
#Compute the parameters |
||||
sofc1 = p1.compute_parameters_for_node(node) |
||||
#The est CIMS are inside the resultant SetOfCIms Obj |
||||
print(sofc1.actual_cims) |
||||
|
||||
if __name__ == "__main__": |
||||
main() |
@ -0,0 +1,8 @@ |
||||
import PyCTBN.estimators |
||||
from PyCTBN.estimators import * |
||||
import PyCTBN.optimizers |
||||
from PyCTBN.optimizers import * |
||||
import PyCTBN.structure_graph |
||||
from PyCTBN.structure_graph import * |
||||
import PyCTBN.utility |
||||
from PyCTBN.utility import * |
@ -0,0 +1,5 @@ |
||||
from .fam_score_calculator import FamScoreCalculator |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator |
||||
from .structure_score_based_estimator import StructureScoreBasedEstimator |
@ -0,0 +1,272 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from math import log |
||||
|
||||
from scipy.special import loggamma |
||||
from random import choice |
||||
|
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
|
||||
|
||||
''' |
||||
|
||||
''' |
||||
|
||||
|
||||
class FamScoreCalculator: |
||||
""" |
||||
Has the task of calculating the FamScore of a node by using a Bayesian score function |
||||
""" |
||||
|
||||
def __init__(self): |
||||
#np.seterr('raise') |
||||
pass |
||||
|
||||
# region theta |
||||
|
||||
def marginal_likelihood_theta(self, |
||||
cims: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the FamScore value of the node identified by the label node_id |
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta |
||||
:rtype: float |
||||
""" |
||||
return np.sum( |
||||
[self.variable_cim_xu_marginal_likelihood_theta(cim, |
||||
alpha_xu, |
||||
alpha_xxu) |
||||
for cim in cims]) |
||||
|
||||
def variable_cim_xu_marginal_likelihood_theta(self, |
||||
cim: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the value of the marginal likelihood over theta given a cim |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta |
||||
:rtype: float |
||||
""" |
||||
|
||||
'get cim length' |
||||
values = len(cim._state_residence_times) |
||||
|
||||
'compute the marginal likelihood for the current cim' |
||||
return np.sum([ |
||||
self.single_cim_xu_marginal_likelihood_theta( |
||||
index, |
||||
cim, |
||||
alpha_xu, |
||||
alpha_xxu) |
||||
for index in range(values)]) |
||||
|
||||
def single_cim_xu_marginal_likelihood_theta(self, |
||||
index: int, |
||||
cim: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
||||
and a specif parents's assignment |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
|
||||
values = list(range(len(cim._state_residence_times))) |
||||
|
||||
'remove the index because of the x != x^ condition in the summation ' |
||||
values.remove(index) |
||||
|
||||
'uncomment for alpha xx not uniform' |
||||
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) |
||||
|
||||
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ |
||||
+ \ |
||||
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( |
||||
cim.state_transition_matrix[index,index_x_first], |
||||
alpha_xxu) |
||||
for index_x_first in values]) |
||||
|
||||
|
||||
def single_internal_cim_xxu_marginal_likelihood_theta(self, |
||||
M_xxu_suff_stats: float, |
||||
alpha_xxu: float=1): |
||||
"""Calculate the second part of the marginal likelihood over theta formula |
||||
|
||||
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u] |
||||
:type M_xxu_suff_stats: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu) |
||||
|
||||
# endregion |
||||
|
||||
# region q |
||||
|
||||
def marginal_likelihood_q(self, |
||||
cims: np.array, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the value of the marginal likelihood over q of the node identified by the label node_id |
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood over q |
||||
:rtype: float |
||||
""" |
||||
|
||||
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) |
||||
|
||||
def variable_cim_xu_marginal_likelihood_q(self, |
||||
cim: ConditionalIntensityMatrix, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the value of the marginal likelihood over q given a cim |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood over q |
||||
:rtype: float |
||||
""" |
||||
|
||||
'get cim length' |
||||
values=len(cim._state_residence_times) |
||||
|
||||
'compute the marginal likelihood for the current cim' |
||||
return np.sum([ |
||||
self.single_cim_xu_marginal_likelihood_q( |
||||
cim.state_transition_matrix[index, index], |
||||
cim._state_residence_times[index], |
||||
tau_xu, |
||||
alpha_xu) |
||||
for index in range(values)]) |
||||
|
||||
|
||||
def single_cim_xu_marginal_likelihood_q(self, |
||||
M_xu_suff_stats: float, |
||||
T_xu_suff_stats: float, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
||||
and a specif parents's assignment |
||||
|
||||
:param M_xu_suff_stats: value of the suffucient statistic M[x|u] |
||||
:type M_xxu_suff_stats: float |
||||
:param T_xu_suff_stats: value of the suffucient statistic T[x|u] |
||||
:type T_xu_suff_stats: float |
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood of the node when assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
return ( |
||||
loggamma(alpha_xu + M_xu_suff_stats + 1) + |
||||
(log(tau_xu) |
||||
* |
||||
(alpha_xu+1)) |
||||
) \ |
||||
- \ |
||||
(loggamma(alpha_xu + 1)+( |
||||
log(tau_xu + T_xu_suff_stats) |
||||
* |
||||
(alpha_xu + M_xu_suff_stats + 1)) |
||||
) |
||||
|
||||
# end region |
||||
|
||||
def get_fam_score(self, |
||||
cims: np.array, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the FamScore value of the node |
||||
|
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type tau_xu: float, optional |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
||||
:type alpha_xu: float, optional |
||||
|
||||
|
||||
:return: the FamScore value of the node |
||||
:rtype: float |
||||
""" |
||||
#print("------") |
||||
#print(self.marginal_likelihood_q(cims, |
||||
# tau_xu, |
||||
# alpha_xu)) |
||||
|
||||
#print(self.marginal_likelihood_theta(cims, |
||||
# alpha_xu, |
||||
# alpha_xxu)) |
||||
'calculate alpha_xxu as a uniform distribution' |
||||
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) |
||||
|
||||
return self.marginal_likelihood_q(cims, |
||||
tau_xu, |
||||
alpha_xu) \ |
||||
+ \ |
||||
self.marginal_likelihood_theta(cims, |
||||
alpha_xu, |
||||
alpha_xxu) |
@ -0,0 +1,143 @@ |
||||
import sys |
||||
sys.path.append('../') |
||||
import numpy as np |
||||
|
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
from ..structure_graph.trajectory import Trajectory |
||||
|
||||
|
||||
class ParametersEstimator(object): |
||||
"""Has the task of computing the cims of particular node given the trajectories and the net structure |
||||
in the graph ``_net_graph``. |
||||
|
||||
:param trajectories: the trajectories |
||||
:type trajectories: Trajectory |
||||
:param net_graph: the net structure |
||||
:type net_graph: NetworkGraph |
||||
:_single_set_of_cims: the set of cims object that will hold the cims of the node |
||||
""" |
||||
|
||||
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): |
||||
"""Constructor Method |
||||
""" |
||||
self._trajectories = trajectories |
||||
self._net_graph = net_graph |
||||
self._single_set_of_cims = None |
||||
|
||||
def fast_init(self, node_id: str) -> None: |
||||
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
""" |
||||
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] |
||||
node_states_number = self._net_graph.get_states_number(node_id) |
||||
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) |
||||
|
||||
def compute_parameters_for_node(self, node_id: str) -> SetOfCims: |
||||
"""Compute the CIMS of the node identified by the label ``node_id``. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: A SetOfCims object filled with the computed CIMS |
||||
:rtype: SetOfCims |
||||
""" |
||||
node_indx = self._net_graph.get_node_indx(node_id) |
||||
state_res_times = self._single_set_of_cims._state_residence_times |
||||
transition_matrices = self._single_set_of_cims._transition_matrices |
||||
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, |
||||
self._trajectories.trajectory, |
||||
self._net_graph.time_filtering, |
||||
self._net_graph.time_scalar_indexing_strucure, |
||||
state_res_times) |
||||
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, |
||||
self._net_graph.transition_filtering, |
||||
self._net_graph.transition_scalar_indexing_structure, |
||||
transition_matrices) |
||||
self._single_set_of_cims.build_cims(state_res_times, transition_matrices) |
||||
return self._single_set_of_cims |
||||
|
||||
@staticmethod |
||||
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, |
||||
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, |
||||
T: np.ndarray) -> None: |
||||
"""Compute the state residence times for a node and fill the matrix ``T`` with the results |
||||
|
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param times: the times deltas vector |
||||
:type times: numpy.array |
||||
:param trajectory: the trajectory |
||||
:type trajectory: numpy.ndArray |
||||
:param cols_filter: the columns filtering structure |
||||
:type cols_filter: numpy.array |
||||
:param scalar_indexes_struct: the indexing structure |
||||
:type scalar_indexes_struct: numpy.array |
||||
:param T: the state residence times vectors |
||||
:type T: numpy.ndArray |
||||
""" |
||||
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) |
||||
.astype(np.int), \ |
||||
times, |
||||
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) |
||||
|
||||
@staticmethod |
||||
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, |
||||
scalar_indexing: np.ndarray, M: np.ndarray) -> None: |
||||
"""Compute the state residence times for a node and fill the matrices ``M`` with the results. |
||||
|
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param trajectory: the trajectory |
||||
:type trajectory: numpy.ndArray |
||||
:param cols_filter: the columns filtering structure |
||||
:type cols_filter: numpy.array |
||||
:param scalar_indexing: the indexing structure |
||||
:type scalar_indexing: numpy.array |
||||
:param M: the state transitions matrices |
||||
:type M: numpy.ndArray |
||||
""" |
||||
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], |
||||
dtype=np.int64) |
||||
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] |
||||
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) |
||||
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) |
||||
M_raveled = M.ravel() |
||||
M_raveled[diag_indices] = 0 |
||||
M_raveled[diag_indices] = np.sum(M, axis=2).ravel() |
||||
|
||||
def init_sets_cims_container(self): |
||||
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, |
||||
self.net_graph.nodes_values, |
||||
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), |
||||
self.net_graph.p_combs) |
||||
|
||||
def compute_parameters(self): |
||||
#print(self.net_graph.get_nodes()) |
||||
#print(self.amalgamated_cims_struct.sets_of_cims) |
||||
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) |
||||
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): |
||||
#print(self.net_graph.time_filtering[indx]) |
||||
#print(self.net_graph.time_scalar_indexing_strucure[indx]) |
||||
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, |
||||
self.sample_path.trajectories.trajectory, |
||||
self.net_graph.time_filtering[indx], |
||||
self.net_graph.time_scalar_indexing_strucure[indx], |
||||
aggr[1]._state_residence_times) |
||||
#print(self.net_graph.transition_filtering[indx]) |
||||
#print(self.net_graph.transition_scalar_indexing_structure[indx]) |
||||
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), |
||||
self.sample_path.trajectories.complete_trajectory, |
||||
self.net_graph.transition_filtering[indx], |
||||
self.net_graph.transition_scalar_indexing_structure[indx], |
||||
aggr[1]._transition_matrices) |
||||
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,238 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
import os |
||||
from scipy.stats import chi2 as chi2_dist |
||||
from scipy.stats import f as f_dist |
||||
from tqdm import tqdm |
||||
|
||||
from ..utility.cache import Cache |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer |
||||
|
||||
import concurrent.futures |
||||
|
||||
|
||||
|
||||
import multiprocessing |
||||
from multiprocessing import Pool |
||||
|
||||
|
||||
class StructureConstraintBasedEstimator(StructureEstimator): |
||||
""" |
||||
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:param exp_test_alfa: the significance level for the exponential Hp test |
||||
:type exp_test_alfa: float |
||||
:param chi_test_alfa: the significance level for the chi Hp test |
||||
:type chi_test_alfa: float |
||||
:_nodes: the nodes labels |
||||
:_nodes_vals: the nodes cardinalities |
||||
:_nodes_indxs: the nodes indexes |
||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
||||
:_cache: the Cache object |
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): |
||||
super().__init__(sample_path,known_edges) |
||||
self._exp_test_sign = exp_test_alfa |
||||
self._chi_test_alfa = chi_test_alfa |
||||
self._thumb_threshold = thumb_threshold |
||||
self._cache = Cache() |
||||
|
||||
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, |
||||
tot_vars_count: int, parent_indx, child_indx) -> bool: |
||||
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} |
||||
G2 = {G1 U test_parent} (added as an additional parent of the test_child). |
||||
Generates all the necessary structures and datas to perform the tests. |
||||
|
||||
:param test_parent: the node label of the test parent |
||||
:type test_parent: string |
||||
:param test_child: the node label of the child |
||||
:type test_child: string |
||||
:param parent_set: the common parent set |
||||
:type parent_set: List |
||||
:param child_states_numb: the cardinality of the ``test_child`` |
||||
:type child_states_numb: int |
||||
:param tot_vars_count: the total number of variables in the net |
||||
:type tot_vars_count: int |
||||
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise |
||||
:rtype: bool |
||||
""" |
||||
p_set = parent_set[:] |
||||
complete_info = parent_set[:] |
||||
complete_info.append(test_child) |
||||
|
||||
parents = np.array(parent_set) |
||||
parents = np.append(parents, test_parent) |
||||
sorted_parents = self._nodes[np.isin(self._nodes, parents)] |
||||
cims_filter = sorted_parents != test_parent |
||||
|
||||
p_set.insert(0, test_parent) |
||||
sofc2 = self._cache.find(set(p_set)) |
||||
|
||||
if not sofc2: |
||||
complete_info.append(test_parent) |
||||
bool_mask2 = np.isin(self._nodes, complete_info) |
||||
l2 = list(self._nodes[bool_mask2]) |
||||
indxs2 = self._nodes_indxs[bool_mask2] |
||||
vals2 = self._nodes_vals[bool_mask2] |
||||
eds2 = list(itertools.product(p_set, test_child)) |
||||
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) |
||||
g2 = NetworkGraph(s2) |
||||
g2.fast_init(test_child) |
||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
||||
p2.fast_init(test_child) |
||||
sofc2 = p2.compute_parameters_for_node(test_child) |
||||
self._cache.put(set(p_set), sofc2) |
||||
|
||||
del p_set[0] |
||||
sofc1 = self._cache.find(set(p_set)) |
||||
if not sofc1: |
||||
g2.remove_node(test_parent) |
||||
g2.fast_init(test_child) |
||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
||||
p2.fast_init(test_child) |
||||
sofc1 = p2.compute_parameters_for_node(test_child) |
||||
self._cache.put(set(p_set), sofc1) |
||||
thumb_value = 0.0 |
||||
if child_states_numb > 2: |
||||
parent_val = self._sample_path.structure.get_states_number(test_parent) |
||||
bool_mask_vals = np.isin(self._nodes, parent_set) |
||||
parents_vals = self._nodes_vals[bool_mask_vals] |
||||
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) |
||||
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): |
||||
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) |
||||
for cim2 in cond_cims: |
||||
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): |
||||
return False |
||||
return True |
||||
|
||||
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, |
||||
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: |
||||
"""Compute the actual independence test using two cims. |
||||
It is performed first the exponential test and if the null hypothesis is not rejected, |
||||
it is performed also the chi_test. |
||||
|
||||
:param child_states_numb: the cardinality of the test child |
||||
:type child_states_numb: int |
||||
:param cim1: a cim belonging to the graph without test parent |
||||
:type cim1: ConditionalIntensityMatrix |
||||
:param cim2: a cim belonging to the graph with test parent |
||||
:type cim2: ConditionalIntensityMatrix |
||||
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. |
||||
:rtype: bool |
||||
""" |
||||
M1 = cim1.state_transition_matrix |
||||
M2 = cim2.state_transition_matrix |
||||
r1s = M1.diagonal() |
||||
r2s = M2.diagonal() |
||||
C1 = cim1.cim |
||||
C2 = cim2.cim |
||||
if child_states_numb > 2: |
||||
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: |
||||
self._removable_edges_matrix[parent_indx][child_indx] = False |
||||
return False |
||||
F_stats = C2.diagonal() / C1.diagonal() |
||||
exp_alfa = self._exp_test_sign |
||||
for val in range(0, child_states_numb): |
||||
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ |
||||
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): |
||||
return False |
||||
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) |
||||
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( |
||||
M2.shape[0], -1) |
||||
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) |
||||
Ks = np.sqrt(r1s / r2s) |
||||
Ls = np.sqrt(r2s / r1s) |
||||
for val in range(0, child_states_numb): |
||||
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / |
||||
(M1_no_diag[val] + M2_no_diag[val])) |
||||
if Chi > chi_2_quantile: |
||||
return False |
||||
return True |
||||
|
||||
def compute_thumb_value(self, parent_val, child_val, parent_set_vals): |
||||
"""Compute the value to test against the thumb_threshold. |
||||
|
||||
:param parent_val: test parent's variable cardinality |
||||
:type parent_val: int |
||||
:param child_val: test child's variable cardinality |
||||
:type child_val: int |
||||
:param parent_set_vals: the cardinalities of the nodes in the current sep-set |
||||
:type parent_set_vals: List |
||||
:return: the thumb value for the current independence test |
||||
:rtype: int |
||||
""" |
||||
df = (child_val - 1) ** 2 |
||||
df = df * parent_val |
||||
for v in parent_set_vals: |
||||
df = df * v |
||||
return df |
||||
|
||||
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: |
||||
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. |
||||
|
||||
:param var_id: the node label of the test child |
||||
:type var_id: string |
||||
""" |
||||
optimizer_obj = ConstraintBasedOptimizer( |
||||
node_id = var_id, |
||||
structure_estimator = self, |
||||
tot_vars_count = tot_vars_count) |
||||
return optimizer_obj.optimize_structure() |
||||
|
||||
|
||||
def ctpc_algorithm(self,disable_multiprocessing:bool= False ): |
||||
"""Compute the CTPC algorithm over the entire net. |
||||
""" |
||||
ctpc_algo = self.one_iteration_of_CTPC_algorithm |
||||
total_vars_numb = self._sample_path.total_variables_count |
||||
|
||||
n_nodes= len(self._nodes) |
||||
|
||||
total_vars_numb_array = [total_vars_numb] * n_nodes |
||||
|
||||
'get the number of CPU' |
||||
cpu_count = multiprocessing.cpu_count() |
||||
|
||||
|
||||
|
||||
'Remove all the edges from the structure' |
||||
self._sample_path.structure.clean_structure_edges() |
||||
|
||||
'Estimate the best parents for each node' |
||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
||||
if disable_multiprocessing: |
||||
print("DISABILITATO") |
||||
cpu_count = 1 |
||||
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
||||
else: |
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
||||
list_edges_partial = executor.map(ctpc_algo, |
||||
self._nodes, |
||||
total_vars_numb_array) |
||||
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
||||
|
||||
return set(itertools.chain.from_iterable(list_edges_partial)) |
||||
|
||||
|
||||
def estimate_structure(self,disable_multiprocessing:bool=False): |
||||
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,187 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import matplotlib.pyplot as plt |
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from abc import ABC |
||||
|
||||
import abc |
||||
|
||||
from ..utility.cache import Cache |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
|
||||
|
||||
class StructureEstimator(object): |
||||
"""Has the task of estimating the network structure given the trajectories in ``samplepath``. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:_nodes: the nodes labels |
||||
:_nodes_vals: the nodes cardinalities |
||||
:_nodes_indxs: the nodes indexes |
||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): |
||||
self._sample_path = sample_path |
||||
self._nodes = np.array(self._sample_path.structure.nodes_labels) |
||||
self._nodes_vals = self._sample_path.structure.nodes_values |
||||
self._nodes_indxs = self._sample_path.structure.nodes_indexes |
||||
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) |
||||
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) |
||||
|
||||
|
||||
def build_removable_edges_matrix(self, known_edges: typing.List): |
||||
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: |
||||
|
||||
:param known_edges: the list of nodes labels |
||||
:type known_edges: List |
||||
:return: a boolean matrix |
||||
:rtype: np.ndarray |
||||
""" |
||||
tot_vars_count = self._sample_path.total_variables_count |
||||
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) |
||||
if known_edges: |
||||
for edge in known_edges: |
||||
i = self._sample_path.structure.get_node_indx(edge[0]) |
||||
j = self._sample_path.structure.get_node_indx(edge[1]) |
||||
complete_adj_matrix[i][j] = False |
||||
return complete_adj_matrix |
||||
|
||||
@staticmethod |
||||
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: |
||||
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: |
||||
|
||||
:param node_ids: the list of nodes labels |
||||
:type node_ids: List |
||||
:return: a complete Digraph Object |
||||
:rtype: networkx.DiGraph |
||||
""" |
||||
complete_graph = nx.DiGraph() |
||||
complete_graph.add_nodes_from(node_ids) |
||||
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) |
||||
return complete_graph |
||||
|
||||
|
||||
@staticmethod |
||||
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): |
||||
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``, |
||||
that do not contains a the node identified by ``parent_label``. |
||||
|
||||
:param u: the list of nodes |
||||
:type u: List |
||||
:param size: the size of the subsets |
||||
:type size: int |
||||
:param parent_label: the node to exclude in the subsets generation |
||||
:type parent_label: string |
||||
:return: an Iterator Object containing a list of lists |
||||
:rtype: Iterator |
||||
""" |
||||
list_without_test_parent = u[:] |
||||
list_without_test_parent.remove(parent_label) |
||||
return map(list, itertools.combinations(list_without_test_parent, size)) |
||||
|
||||
def save_results(self) -> None: |
||||
"""Save the estimated Structure to a .json file in the path where the data are loaded from. |
||||
The file is named as the input dataset but the `results_` word is appended to the results file. |
||||
""" |
||||
res = json_graph.node_link_data(self._complete_graph) |
||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
||||
name = name.split('.', 1)[0] |
||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
||||
name += '.json' |
||||
file_name = 'results_' + name |
||||
with open(file_name, 'w') as f: |
||||
json.dump(res, f) |
||||
|
||||
|
||||
def remove_diagonal_elements(self, matrix): |
||||
m = matrix.shape[0] |
||||
strided = np.lib.stride_tricks.as_strided |
||||
s0, s1 = matrix.strides |
||||
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) |
||||
|
||||
|
||||
@abc.abstractmethod |
||||
def estimate_structure(self) -> typing.List: |
||||
"""Abstract method to estimate the structure |
||||
|
||||
:return: List of estimated edges |
||||
:rtype: Typing.List |
||||
""" |
||||
pass |
||||
|
||||
|
||||
def adjacency_matrix(self) -> np.ndarray: |
||||
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. |
||||
|
||||
:return: The adjacency matrix of the graph ``_complete_graph`` |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return nx.adj_matrix(self._complete_graph).toarray().astype(bool) |
||||
|
||||
def spurious_edges(self) -> typing.List: |
||||
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in |
||||
``_sample_path.structure``. |
||||
|
||||
:return: A list containing the spurious edges |
||||
:rtype: List |
||||
""" |
||||
if not self._sample_path.has_prior_net_structure: |
||||
raise RuntimeError("Can not compute spurious edges with no prior net structure!") |
||||
real_graph = nx.DiGraph() |
||||
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) |
||||
real_graph.add_edges_from(self._sample_path.structure.edges) |
||||
return nx.difference(real_graph, self._complete_graph).edges |
||||
|
||||
def save_plot_estimated_structure_graph(self) -> None: |
||||
"""Plot the estimated structure in a graphical model style. |
||||
Spurious edges are colored in red. |
||||
""" |
||||
graph_to_draw = nx.DiGraph() |
||||
spurious_edges = self.spurious_edges() |
||||
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) |
||||
print(non_spurious_edges) |
||||
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] |
||||
graph_to_draw.add_edges_from(spurious_edges) |
||||
graph_to_draw.add_edges_from(non_spurious_edges) |
||||
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) |
||||
options = { |
||||
"node_size": 2000, |
||||
"node_color": "white", |
||||
"edgecolors": "black", |
||||
'linewidths':2, |
||||
"with_labels":True, |
||||
"font_size":13, |
||||
'connectionstyle': 'arc3, rad = 0.1', |
||||
"arrowsize": 15, |
||||
"arrowstyle": '<|-', |
||||
"width": 1, |
||||
"edge_color":edges_colors, |
||||
} |
||||
|
||||
nx.draw(graph_to_draw, pos, **options) |
||||
ax = plt.gca() |
||||
ax.margins(0.20) |
||||
plt.axis("off") |
||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
||||
name = name.split('.', 1)[0] |
||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
||||
name += '.png' |
||||
plt.savefig(name) |
||||
plt.clf() |
||||
print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) |
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,244 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from random import choice |
||||
|
||||
import concurrent.futures |
||||
|
||||
import copy |
||||
|
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
from .fam_score_calculator import FamScoreCalculator |
||||
from ..optimizers.hill_climbing_search import HillClimbing |
||||
from ..optimizers.tabu_search import TabuSearch |
||||
|
||||
|
||||
import multiprocessing |
||||
from multiprocessing import Pool |
||||
|
||||
|
||||
|
||||
|
||||
class StructureScoreBasedEstimator(StructureEstimator): |
||||
""" |
||||
Has the task of estimating the network structure given the trajectories in samplepath by |
||||
using a score based approach. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type tau_xu: float, optional |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
||||
:type alpha_xu: float, optional |
||||
:param known_edges: List of known edges, default to [] |
||||
:type known_edges: List, optional |
||||
|
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): |
||||
super().__init__(sample_path,known_edges) |
||||
self.tau_xu=tau_xu |
||||
self.alpha_xu=alpha_xu |
||||
|
||||
|
||||
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, |
||||
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None, |
||||
optimizer: str = 'tabu',disable_multiprocessing:bool= False ): |
||||
""" |
||||
Compute the score-based algorithm to find the optimal structure |
||||
|
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
||||
:type optimizer: string, optional |
||||
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False |
||||
:type disable_multiprocessing: Boolean, optional |
||||
""" |
||||
'Save the true edges structure in tuples' |
||||
true_edges = copy.deepcopy(self._sample_path.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
'Remove all the edges from the structure' |
||||
self._sample_path.structure.clean_structure_edges() |
||||
|
||||
estimate_parents = self.estimate_parents |
||||
|
||||
n_nodes= len(self._nodes) |
||||
|
||||
l_max_parents= [max_parents] * n_nodes |
||||
l_iterations_number = [iterations_number] * n_nodes |
||||
l_patience = [patience] * n_nodes |
||||
l_tabu_length = [tabu_length] * n_nodes |
||||
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes |
||||
l_optimizer = [optimizer] * n_nodes |
||||
|
||||
|
||||
'get the number of CPU' |
||||
cpu_count = multiprocessing.cpu_count() |
||||
print(f"CPU COUNT: {cpu_count}") |
||||
|
||||
if disable_multiprocessing: |
||||
cpu_count = 1 |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
||||
|
||||
'Estimate the best parents for each node' |
||||
if disable_multiprocessing: |
||||
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] |
||||
else: |
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
||||
list_edges_partial = executor.map(estimate_parents, |
||||
self._nodes, |
||||
l_max_parents, |
||||
l_iterations_number, |
||||
l_patience, |
||||
l_tabu_length, |
||||
l_tabu_rules_duration, |
||||
l_optimizer) |
||||
|
||||
|
||||
|
||||
#list_edges_partial = p.map(estimate_parents, self._nodes) |
||||
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) |
||||
|
||||
'Concatenate all the edges list' |
||||
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) |
||||
|
||||
#print('-------------------------') |
||||
|
||||
|
||||
'calculate precision and recall' |
||||
n_missing_edges = 0 |
||||
n_added_fake_edges = 0 |
||||
|
||||
try: |
||||
n_added_fake_edges = len(set_list_edges.difference(true_edges)) |
||||
|
||||
n_missing_edges = len(true_edges.difference(set_list_edges)) |
||||
|
||||
n_true_positive = len(true_edges) - n_missing_edges |
||||
|
||||
precision = n_true_positive / (n_true_positive + n_added_fake_edges) |
||||
|
||||
recall = n_true_positive / (n_true_positive + n_missing_edges) |
||||
|
||||
|
||||
# print(f"n archi reali non trovati: {n_missing_edges}") |
||||
# print(f"n archi non reali aggiunti: {n_added_fake_edges}") |
||||
print(true_edges) |
||||
print(set_list_edges) |
||||
print(f"precision: {precision} ") |
||||
print(f"recall: {recall} ") |
||||
except Exception as e: |
||||
print(f"errore: {e}") |
||||
|
||||
return set_list_edges |
||||
|
||||
|
||||
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, |
||||
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, |
||||
optimizer:str = 'hill' ): |
||||
""" |
||||
Use the FamScore of a node in order to find the best parent nodes |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
||||
:type optimizer: string, optional |
||||
|
||||
:return: A list of the best edges for the currente node |
||||
:rtype: List |
||||
""" |
||||
|
||||
"choose the optimizer algotithm" |
||||
if optimizer == 'tabu': |
||||
optimizer = TabuSearch( |
||||
node_id = node_id, |
||||
structure_estimator = self, |
||||
max_parents = max_parents, |
||||
iterations_number = iterations_number, |
||||
patience = patience, |
||||
tabu_length = tabu_length, |
||||
tabu_rules_duration = tabu_rules_duration) |
||||
else: #if optimizer == 'hill': |
||||
optimizer = HillClimbing( |
||||
node_id = node_id, |
||||
structure_estimator = self, |
||||
max_parents = max_parents, |
||||
iterations_number = iterations_number, |
||||
patience = patience) |
||||
|
||||
"call the optmizer's function that calculates the current node's parents" |
||||
return optimizer.optimize_structure() |
||||
|
||||
|
||||
def get_score_from_graph(self, |
||||
graph: NetworkGraph, |
||||
node_id:str): |
||||
""" |
||||
Get the FamScore of a node |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param graph: current graph to be computed |
||||
:type graph: class:'NetworkGraph' |
||||
|
||||
|
||||
:return: The FamSCore for this graph structure |
||||
:rtype: float |
||||
""" |
||||
|
||||
'inizialize the graph for a single node' |
||||
graph.fast_init(node_id) |
||||
|
||||
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) |
||||
|
||||
'Inizialize and compute parameters for node' |
||||
params_estimation.fast_init(node_id) |
||||
SoCims = params_estimation.compute_parameters_for_node(node_id) |
||||
|
||||
'calculate the FamScore for the node' |
||||
fam_score_obj = FamScoreCalculator() |
||||
|
||||
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) |
||||
|
||||
#print(f" lo score per {node_id} risulta: {score} ") |
||||
return score |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,4 @@ |
||||
from .optimizer import Optimizer |
||||
from .tabu_search import TabuSearch |
||||
from .hill_climbing_search import HillClimbing |
||||
from .constraint_based_optimizer import ConstraintBasedOptimizer |
@ -0,0 +1,87 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice |
||||
|
||||
from abc import ABC |
||||
|
||||
import copy |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
|
||||
class ConstraintBasedOptimizer(Optimizer): |
||||
""" |
||||
Optimizer class that implement a CTPC Algorithm |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param tot_vars_count: number of variables in the dataset |
||||
:type tot_vars_count: int |
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
tot_vars_count:int |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.tot_vars_count = tot_vars_count |
||||
|
||||
|
||||
|
||||
def optimize_structure(self): |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a CTPC Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
print("##################TESTING VAR################", self.node_id) |
||||
|
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] |
||||
|
||||
for possible_parent in other_nodes: |
||||
graph.add_edges([(possible_parent,self.node_id)]) |
||||
|
||||
|
||||
u = other_nodes |
||||
#tests_parents_numb = len(u) |
||||
#complete_frame = self.complete_graph_frame |
||||
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])] |
||||
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) |
||||
b = 0 |
||||
while b < len(u): |
||||
parent_indx = 0 |
||||
while parent_indx < len(u): |
||||
removed = False |
||||
test_parent = u[parent_indx] |
||||
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) |
||||
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) |
||||
if self.structure_estimator._removable_edges_matrix[i][j]: |
||||
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) |
||||
for parents_set in S: |
||||
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): |
||||
graph.remove_edges([(test_parent, self.node_id)]) |
||||
u.remove(test_parent) |
||||
removed = True |
||||
break |
||||
if not removed: |
||||
parent_indx += 1 |
||||
b += 1 |
||||
self.structure_estimator._cache.clear() |
||||
return graph.edges |
@ -0,0 +1,135 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice |
||||
|
||||
from abc import ABC |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
|
||||
class HillClimbing(Optimizer): |
||||
""" |
||||
Optimizer class that implement Hill Climbing Search |
||||
|
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
|
||||
|
||||
|
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
max_parents:int = None, |
||||
iterations_number:int= 40, |
||||
patience:int = None |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.max_parents = max_parents |
||||
self.iterations_number = iterations_number |
||||
self.patience = patience |
||||
|
||||
|
||||
|
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
|
||||
#'Create the graph for the single node' |
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
'get the index for the current node' |
||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
||||
|
||||
'list of prior edges' |
||||
prior_parents = set() |
||||
|
||||
'Add the edges from prior knowledge' |
||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
||||
prior_parents.add(parent_id) |
||||
|
||||
'Add the node to the starting structure' |
||||
graph.add_edges([(parent_id, self.node_id)]) |
||||
|
||||
|
||||
|
||||
'get all the possible parents' |
||||
other_nodes = [node for node in |
||||
self.structure_estimator._sample_path.structure.nodes_labels if |
||||
node != self.node_id and |
||||
not prior_parents.__contains__(node)] |
||||
|
||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
patince_count = 0 |
||||
for i in range(self.iterations_number): |
||||
'choose a new random edge' |
||||
current_new_parent = choice(other_nodes) |
||||
current_edge = (current_new_parent,self.node_id) |
||||
added = False |
||||
parent_removed = None |
||||
|
||||
|
||||
if graph.has_edge(current_edge): |
||||
graph.remove_edges([current_edge]) |
||||
else: |
||||
'check the max_parents constraint' |
||||
if self.max_parents is not None: |
||||
parents_list = graph.get_parents_by_id(self.node_id) |
||||
if len(parents_list) >= self.max_parents : |
||||
parent_removed = (choice(parents_list), self.node_id) |
||||
graph.remove_edges([parent_removed]) |
||||
graph.add_edges([current_edge]) |
||||
added = True |
||||
#print('**************************') |
||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
if current_score > actual_best_score: |
||||
'update current best score' |
||||
actual_best_score = current_score |
||||
patince_count = 0 |
||||
else: |
||||
'undo the last update' |
||||
if added: |
||||
graph.remove_edges([current_edge]) |
||||
'If a parent was removed, add it again to the graph' |
||||
if parent_removed is not None: |
||||
graph.add_edges([parent_removed]) |
||||
else: |
||||
graph.add_edges([current_edge]) |
||||
'update patience count' |
||||
patince_count += 1 |
||||
|
||||
if self.patience is not None and patince_count > self.patience: |
||||
break |
||||
|
||||
print(f"finito variabile: {self.node_id}") |
||||
return graph.edges |
@ -0,0 +1,39 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
import abc |
||||
|
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
|
||||
|
||||
|
||||
class Optimizer(abc.ABC): |
||||
""" |
||||
Interface class for all the optimizer's child PyCTBN |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:param structure_estimator: A structureEstimator Object to predict the structure |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
|
||||
""" |
||||
|
||||
def __init__(self, node_id:str, structure_estimator: StructureEstimator): |
||||
self.node_id = node_id |
||||
self.structure_estimator = structure_estimator |
||||
|
||||
|
||||
@abc.abstractmethod |
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
pass |
@ -0,0 +1,199 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice,sample |
||||
|
||||
from abc import ABC |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
import queue |
||||
|
||||
|
||||
class TabuSearch(Optimizer): |
||||
""" |
||||
Optimizer class that implement Tabu Search |
||||
|
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
|
||||
|
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
max_parents:int = None, |
||||
iterations_number:int= 40, |
||||
patience:int = None, |
||||
tabu_length:int = None, |
||||
tabu_rules_duration = None |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.max_parents = max_parents |
||||
self.iterations_number = iterations_number |
||||
self.patience = patience |
||||
self.tabu_length = tabu_length |
||||
self.tabu_rules_duration = tabu_rules_duration |
||||
|
||||
|
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
print(f"tabu search is processing the structure of {self.node_id}") |
||||
|
||||
'Create the graph for the single node' |
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
'get the index for the current node' |
||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
||||
|
||||
'list of prior edges' |
||||
prior_parents = set() |
||||
|
||||
'Add the edges from prior knowledge' |
||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
||||
prior_parents.add(parent_id) |
||||
|
||||
'Add the node to the starting structure' |
||||
graph.add_edges([(parent_id, self.node_id)]) |
||||
|
||||
|
||||
|
||||
'get all the possible parents' |
||||
other_nodes = set([node for node in |
||||
self.structure_estimator._sample_path.structure.nodes_labels if |
||||
node != self.node_id and |
||||
not prior_parents.__contains__(node)]) |
||||
|
||||
'calculate the score for the node without parents' |
||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
'initialize tabu_length and tabu_rules_duration if None' |
||||
if self.tabu_length is None: |
||||
self.tabu_length = len(other_nodes) |
||||
|
||||
if self.tabu_rules_duration is None: |
||||
self.tabu_tabu_rules_durationength = len(other_nodes) |
||||
|
||||
'inizialize the data structures' |
||||
tabu_set = set() |
||||
tabu_queue = queue.Queue() |
||||
|
||||
patince_count = 0 |
||||
tabu_count = 0 |
||||
for i in range(self.iterations_number): |
||||
|
||||
current_possible_nodes = other_nodes.difference(tabu_set) |
||||
|
||||
'choose a new random edge according to tabu restiction' |
||||
if(len(current_possible_nodes) > 0): |
||||
current_new_parent = sample(current_possible_nodes,k=1)[0] |
||||
else: |
||||
current_new_parent = tabu_queue.get() |
||||
tabu_set.remove(current_new_parent) |
||||
|
||||
|
||||
|
||||
current_edge = (current_new_parent,self.node_id) |
||||
added = False |
||||
parent_removed = None |
||||
|
||||
if graph.has_edge(current_edge): |
||||
graph.remove_edges([current_edge]) |
||||
else: |
||||
'check the max_parents constraint' |
||||
if self.max_parents is not None: |
||||
parents_list = graph.get_parents_by_id(self.node_id) |
||||
if len(parents_list) >= self.max_parents : |
||||
parent_removed = (choice(parents_list), self.node_id) |
||||
graph.remove_edges([parent_removed]) |
||||
graph.add_edges([current_edge]) |
||||
added = True |
||||
#print('**************************') |
||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
# print("-------------------------------------------") |
||||
# print(f"Current new parent: {current_new_parent}") |
||||
# print(f"Current score: {current_score}") |
||||
# print(f"Current best score: {actual_best_score}") |
||||
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") |
||||
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") |
||||
# print(f"graph edges: {graph.edges}") |
||||
|
||||
# print("-------------------------------------------") |
||||
# input() |
||||
if current_score > actual_best_score: |
||||
'update current best score' |
||||
actual_best_score = current_score |
||||
patince_count = 0 |
||||
'update tabu list' |
||||
|
||||
|
||||
else: |
||||
'undo the last update' |
||||
if added: |
||||
graph.remove_edges([current_edge]) |
||||
'If a parent was removed, add it again to the graph' |
||||
if parent_removed is not None: |
||||
graph.add_edges([parent_removed]) |
||||
else: |
||||
graph.add_edges([current_edge]) |
||||
'update patience count' |
||||
patince_count += 1 |
||||
|
||||
|
||||
if tabu_queue.qsize() >= self.tabu_length: |
||||
current_removed = tabu_queue.get() |
||||
tabu_set.remove(current_removed) |
||||
'Add the node on the tabu list' |
||||
tabu_queue.put(current_new_parent) |
||||
tabu_set.add(current_new_parent) |
||||
|
||||
tabu_count += 1 |
||||
|
||||
'Every tabu_rules_duration step remove an item from the tabu list ' |
||||
if tabu_count % self.tabu_rules_duration == 0: |
||||
if tabu_queue.qsize() > 0: |
||||
current_removed = tabu_queue.get() |
||||
tabu_set.remove(current_removed) |
||||
tabu_count = 0 |
||||
else: |
||||
tabu_count = 0 |
||||
|
||||
if self.patience is not None and patince_count > self.patience: |
||||
break |
||||
|
||||
print(f"finito variabile: {self.node_id}") |
||||
return graph.edges |
@ -0,0 +1,6 @@ |
||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from .network_graph import NetworkGraph |
||||
from .sample_path import SamplePath |
||||
from .set_of_cims import SetOfCims |
||||
from .structure import Structure |
||||
from .trajectory import Trajectory |
@ -0,0 +1,42 @@ |
||||
import numpy as np |
||||
|
||||
|
||||
class ConditionalIntensityMatrix(object): |
||||
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector |
||||
and state transition matrix and the actual CIM matrix. |
||||
|
||||
:param state_residence_times: state residence times vector |
||||
:type state_residence_times: numpy.array |
||||
:param state_transition_matrix: the transitions count matrix |
||||
:type state_transition_matrix: numpy.ndArray |
||||
:_cim: the actual cim of the node |
||||
""" |
||||
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): |
||||
"""Constructor Method |
||||
""" |
||||
self._state_residence_times = state_residence_times |
||||
self._state_transition_matrix = state_transition_matrix |
||||
self._cim = self.state_transition_matrix.astype(np.float64) |
||||
|
||||
def compute_cim_coefficients(self) -> None: |
||||
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. |
||||
The class member ``_cim`` will contain the computed cim |
||||
""" |
||||
np.fill_diagonal(self._cim, self._cim.diagonal() * -1) |
||||
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T |
||||
|
||||
@property |
||||
def state_residence_times(self) -> np.ndarray: |
||||
return self._state_residence_times |
||||
|
||||
@property |
||||
def state_transition_matrix(self) -> np.ndarray: |
||||
return self._state_transition_matrix |
||||
|
||||
@property |
||||
def cim(self) -> np.ndarray: |
||||
return self._cim |
||||
|
||||
def __repr__(self): |
||||
return 'CIM:\n' + str(self.cim) |
||||
|
@ -0,0 +1,293 @@ |
||||
|
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from .structure import Structure |
||||
|
||||
|
||||
class NetworkGraph(object): |
||||
"""Abstracts the infos contained in the Structure class in the form of a directed graph. |
||||
Has the task of creating all the necessary filtering and indexing structures for parameters estimation |
||||
|
||||
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted |
||||
:type graph_struct: Structure |
||||
:_graph: directed graph |
||||
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos |
||||
about every parents of the node of which all the indexing and filtering structures will be constructed. |
||||
:_time_scalar_indexing_structure: the indexing structure for state res time estimation |
||||
:_transition_scalar_indexing_structure: the indexing structure for transition computation |
||||
:_time_filtering: the columns filtering structure used in the computation of the state res times |
||||
:_transition_filtering: the columns filtering structure used in the computation of the transition |
||||
from one state to another |
||||
:_p_combs_structure: all the possible parents states combination for the node of interest |
||||
""" |
||||
|
||||
def __init__(self, graph_struct: Structure): |
||||
"""Constructor Method |
||||
""" |
||||
self._graph_struct = graph_struct |
||||
self._graph = nx.DiGraph() |
||||
self._aggregated_info_about_nodes_parents = None |
||||
self._time_scalar_indexing_structure = None |
||||
self._transition_scalar_indexing_structure = None |
||||
self._time_filtering = None |
||||
self._transition_filtering = None |
||||
self._p_combs_structure = None |
||||
|
||||
def init_graph(self): |
||||
self.add_nodes(self._nodes_labels) |
||||
self.add_edges(self.graph_struct.edges) |
||||
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() |
||||
self._fancy_indexing = self.build_fancy_indexing_structure(0) |
||||
self.build_scalar_indexing_structures() |
||||
self.build_time_columns_filtering_structure() |
||||
self.build_transition_columns_filtering_structure() |
||||
self._p_combs_structure = self.build_p_combs_structure() |
||||
|
||||
def fast_init(self, node_id: str) -> None: |
||||
"""Initializes all the necessary structures for parameters estimation of the node identified by the label |
||||
node_id |
||||
|
||||
:param node_id: the label of the node |
||||
:type node_id: string |
||||
""" |
||||
self.add_nodes(self._graph_struct.nodes_labels) |
||||
self.add_edges(self._graph_struct.edges) |
||||
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) |
||||
p_indxs = self._aggregated_info_about_nodes_parents[1] |
||||
p_vals = self._aggregated_info_about_nodes_parents[2] |
||||
node_states = self.get_states_number(node_id) |
||||
node_indx = self.get_node_indx(node_id) |
||||
cols_number = self._graph_struct.total_variables_number |
||||
self._time_scalar_indexing_structure = NetworkGraph.\ |
||||
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) |
||||
self._transition_scalar_indexing_structure = NetworkGraph.\ |
||||
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) |
||||
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) |
||||
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) |
||||
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) |
||||
|
||||
def add_nodes(self, list_of_nodes: typing.List) -> None: |
||||
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. |
||||
Sets all the properties that identify a nodes (index, positional index, cardinality) |
||||
|
||||
:param list_of_nodes: the nodes to add to ``_graph`` |
||||
:type list_of_nodes: List |
||||
""" |
||||
nodes_indxs = self._graph_struct.nodes_indexes |
||||
nodes_vals = self._graph_struct.nodes_values |
||||
pos = 0 |
||||
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): |
||||
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) |
||||
pos += 1 |
||||
|
||||
def has_edge(self,edge:tuple)-> bool: |
||||
""" |
||||
Check if the graph contains a specific edge |
||||
|
||||
Parameters: |
||||
edge: a tuple that rappresents the edge |
||||
Returns: |
||||
bool |
||||
""" |
||||
return self._graph.has_edge(edge[0],edge[1]) |
||||
|
||||
def add_edges(self, list_of_edges: typing.List) -> None: |
||||
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``. |
||||
|
||||
:param list_of_edges: the list containing of tuples containing the edges |
||||
:type list_of_edges: List |
||||
""" |
||||
self._graph.add_edges_from(list_of_edges) |
||||
|
||||
def remove_node(self, node_id: str) -> None: |
||||
"""Remove the node ``node_id`` from all the class members. |
||||
Initialize all the filtering/indexing structures. |
||||
""" |
||||
self._graph.remove_node(node_id) |
||||
self._graph_struct.remove_node(node_id) |
||||
self.clear_indexing_filtering_structures() |
||||
|
||||
def clear_indexing_filtering_structures(self) -> None: |
||||
"""Initialize all the filtering/indexing structures. |
||||
""" |
||||
self._aggregated_info_about_nodes_parents = None |
||||
self._time_scalar_indexing_structure = None |
||||
self._transition_scalar_indexing_structure = None |
||||
self._time_filtering = None |
||||
self._transition_filtering = None |
||||
self._p_combs_structure = None |
||||
|
||||
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: |
||||
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely |
||||
(parents_labels, parents_indexes, parents_cardinalities). |
||||
|
||||
:param node: the label of the node |
||||
:type node: string |
||||
:return: a tuple containing all the parent set infos |
||||
:rtype: Tuple |
||||
""" |
||||
parents = self.get_parents_by_id(node) |
||||
nodes = self._graph_struct.nodes_labels |
||||
d = {v: i for i, v in enumerate(nodes)} |
||||
sorted_parents = sorted(parents, key=lambda v: d[v]) |
||||
get_node_indx = self.get_node_indx |
||||
p_indxes = [get_node_indx(node) for node in sorted_parents] |
||||
p_values = [self.get_states_number(node) for node in sorted_parents] |
||||
return sorted_parents, p_indxes, p_values |
||||
|
||||
def remove_edges(self, list_of_edges: typing.List) -> None: |
||||
"""Remove the edges to the graph contained in the list list_of_edges. |
||||
|
||||
:param list_of_edges: The edges to remove from the graph |
||||
:type list_of_edges: List |
||||
""" |
||||
self._graph.remove_edges_from(list_of_edges) |
||||
|
||||
@staticmethod |
||||
def build_time_scalar_indexing_structure_for_a_node(node_states: int, |
||||
parents_vals: typing.List) -> np.ndarray: |
||||
"""Builds an indexing structure for the computation of state residence times values. |
||||
|
||||
:param node_states: the node cardinality |
||||
:type node_states: int |
||||
:param parents_vals: the caridinalites of the node's parents |
||||
:type parents_vals: List |
||||
:return: The time indexing structure |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
T_vector = np.array([node_states]) |
||||
T_vector = np.append(T_vector, parents_vals) |
||||
T_vector = T_vector.cumprod().astype(np.int) |
||||
return T_vector |
||||
|
||||
@staticmethod |
||||
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ |
||||
-> np.ndarray: |
||||
"""Builds an indexing structure for the computation of state transitions values. |
||||
|
||||
:param node_states_number: the node cardinality |
||||
:type node_states_number: int |
||||
:param parents_vals: the caridinalites of the node's parents |
||||
:type parents_vals: List |
||||
:return: The transition indexing structure |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
M_vector = np.array([node_states_number, |
||||
node_states_number]) |
||||
M_vector = np.append(M_vector, parents_vals) |
||||
M_vector = M_vector.cumprod().astype(np.int) |
||||
return M_vector |
||||
|
||||
@staticmethod |
||||
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: |
||||
""" |
||||
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
||||
in the dataset. |
||||
This structute will be used in the computation of the state res times. |
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param p_indxs: the indexes of the node's parents |
||||
:type p_indxs: List |
||||
:return: The filtering structure for times estimation |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) |
||||
|
||||
@staticmethod |
||||
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ |
||||
-> np.ndarray: |
||||
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
||||
in the dataset. |
||||
This structure will be used in the computation of the state transitions values. |
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param p_indxs: the indexes of the node's parents |
||||
:type p_indxs: List |
||||
:param nodes_number: the total number of nodes in the dataset |
||||
:type nodes_number: int |
||||
:return: The filtering structure for transitions estimation |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) |
||||
|
||||
@staticmethod |
||||
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: |
||||
""" |
||||
Builds the combinatorial structure that contains the combinations of all the values contained in |
||||
``parents_values``. |
||||
|
||||
:param parents_values: the cardinalities of the nodes |
||||
:type parents_values: List |
||||
:return: A numpy matrix containing a grid of the combinations |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
tmp = [] |
||||
for val in parents_values: |
||||
tmp.append([x for x in range(val)]) |
||||
if len(parents_values) > 0: |
||||
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) |
||||
if len(parents_values) > 1: |
||||
tmp_comb = parents_comb[:, 1].copy() |
||||
parents_comb[:, 1] = parents_comb[:, 0].copy() |
||||
parents_comb[:, 0] = tmp_comb |
||||
else: |
||||
parents_comb = np.array([[]], dtype=np.int) |
||||
return parents_comb |
||||
|
||||
def get_parents_by_id(self, node_id) -> typing.List: |
||||
"""Returns a list of labels of the parents of the node ``node_id`` |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: a List of labels of the parents |
||||
:rtype: List |
||||
""" |
||||
return list(self._graph.predecessors(node_id)) |
||||
|
||||
def get_states_number(self, node_id) -> int: |
||||
return self._graph.nodes[node_id]['val'] |
||||
|
||||
def get_node_indx(self, node_id) -> int: |
||||
return nx.get_node_attributes(self._graph, 'indx')[node_id] |
||||
|
||||
def get_positional_node_indx(self, node_id) -> int: |
||||
return self._graph.nodes[node_id]['pos_indx'] |
||||
|
||||
@property |
||||
def nodes(self) -> typing.List: |
||||
return self._graph_struct.nodes_labels |
||||
|
||||
@property |
||||
def edges(self) -> typing.List: |
||||
return list(self._graph.edges) |
||||
|
||||
@property |
||||
def nodes_indexes(self) -> np.ndarray: |
||||
return self._graph_struct.nodes_indexes |
||||
|
||||
@property |
||||
def nodes_values(self) -> np.ndarray: |
||||
return self._graph_struct.nodes_values |
||||
|
||||
@property |
||||
def time_scalar_indexing_strucure(self) -> np.ndarray: |
||||
return self._time_scalar_indexing_structure |
||||
|
||||
@property |
||||
def time_filtering(self) -> np.ndarray: |
||||
return self._time_filtering |
||||
|
||||
@property |
||||
def transition_scalar_indexing_structure(self) -> np.ndarray: |
||||
return self._transition_scalar_indexing_structure |
||||
|
||||
@property |
||||
def transition_filtering(self) -> np.ndarray: |
||||
return self._transition_filtering |
||||
|
||||
@property |
||||
def p_combs(self) -> np.ndarray: |
||||
return self._p_combs_structure |
@ -0,0 +1,91 @@ |
||||
|
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
from .structure import Structure |
||||
from .trajectory import Trajectory |
||||
from ..utility.abstract_importer import AbstractImporter |
||||
|
||||
|
||||
|
||||
class SamplePath(object): |
||||
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables |
||||
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will |
||||
contain the mentioned data. |
||||
|
||||
:param importer: the Importer object which contains the imported and processed data |
||||
:type importer: AbstractImporter |
||||
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories |
||||
:_structure: the ``Structure`` Object that will contain all the structural infos about the net |
||||
:_total_variables_count: the number of variables in the net |
||||
""" |
||||
def __init__(self, importer: AbstractImporter): |
||||
"""Constructor Method |
||||
""" |
||||
self._importer = importer |
||||
if self._importer._df_variables is None or self._importer._concatenated_samples is None: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if self._importer._df_variables.empty: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if isinstance(self._importer._concatenated_samples, pd.DataFrame): |
||||
if self._importer._concatenated_samples.empty: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if isinstance(self._importer._concatenated_samples, np.ndarray): |
||||
if self._importer._concatenated_samples.size == 0: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
self._trajectories = None |
||||
self._structure = None |
||||
self._total_variables_count = None |
||||
|
||||
def build_trajectories(self) -> None: |
||||
"""Builds the Trajectory object that will contain all the trajectories. |
||||
Clears all the unused dataframes in ``_importer`` Object |
||||
""" |
||||
self._trajectories = \ |
||||
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), |
||||
len(self._importer.sorter) + 1) |
||||
self._importer.clear_concatenated_frame() |
||||
|
||||
def build_structure(self) -> None: |
||||
""" |
||||
Builds the ``Structure`` object that aggregates all the infos about the net. |
||||
""" |
||||
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): |
||||
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") |
||||
|
||||
self._total_variables_count = len(self._importer.sorter) |
||||
labels = self._importer.variables.iloc[:, 0].to_list() |
||||
indxs = self._importer.variables.index.to_numpy() |
||||
vals = self._importer.variables.iloc[:, 1].to_numpy() |
||||
if self._importer.structure is None or self._importer.structure.empty: |
||||
edges = [] |
||||
else: |
||||
edges = list(self._importer.structure.to_records(index=False)) |
||||
self._structure = Structure(labels, indxs, vals, edges, |
||||
self._total_variables_count) |
||||
|
||||
def clear_memory(self): |
||||
self._importer._raw_data = [] |
||||
|
||||
@property |
||||
def trajectories(self) -> Trajectory: |
||||
return self._trajectories |
||||
|
||||
@property |
||||
def structure(self) -> Structure: |
||||
return self._structure |
||||
|
||||
@property |
||||
def total_variables_count(self) -> int: |
||||
return self._total_variables_count |
||||
|
||||
@property |
||||
def has_prior_net_structure(self) -> bool: |
||||
return bool(self._structure.edges) |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,97 @@ |
||||
|
||||
|
||||
import typing |
||||
|
||||
import numpy as np |
||||
|
||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
|
||||
|
||||
class SetOfCims(object): |
||||
"""Aggregates all the CIMS of the node identified by the label _node_id. |
||||
|
||||
:param node_id: the node label |
||||
:type node_ind: string |
||||
:param parents_states_number: the cardinalities of the parents |
||||
:type parents_states_number: List |
||||
:param node_states_number: the caridinality of the node |
||||
:type node_states_number: int |
||||
:param p_combs: the p_comb structure bound to this node |
||||
:type p_combs: numpy.ndArray |
||||
:_state_residence_time: matrix containing all the state residence time vectors for the node |
||||
:_transition_matrices: matrix containing all the transition matrices for the node |
||||
:_actual_cims: the cims of the node |
||||
""" |
||||
|
||||
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): |
||||
"""Constructor Method |
||||
""" |
||||
self._node_id = node_id |
||||
self._parents_states_number = parents_states_number |
||||
self._node_states_number = node_states_number |
||||
self._actual_cims = [] |
||||
self._state_residence_times = None |
||||
self._transition_matrices = None |
||||
self._p_combs = p_combs |
||||
self.build_times_and_transitions_structures() |
||||
|
||||
def build_times_and_transitions_structures(self) -> None: |
||||
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices. |
||||
""" |
||||
if not self._parents_states_number: |
||||
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) |
||||
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) |
||||
else: |
||||
self._state_residence_times = \ |
||||
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) |
||||
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, |
||||
self._node_states_number], dtype=np.int) |
||||
|
||||
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: |
||||
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. |
||||
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. |
||||
|
||||
:param state_res_times: the state residence times matrix |
||||
:type state_res_times: numpy.ndArray |
||||
:param transition_matrices: the transition matrices |
||||
:type transition_matrices: numpy.ndArray |
||||
""" |
||||
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): |
||||
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) |
||||
cim_to_add.compute_cim_coefficients() |
||||
self._actual_cims.append(cim_to_add) |
||||
self._actual_cims = np.array(self._actual_cims) |
||||
self._transition_matrices = None |
||||
self._state_residence_times = None |
||||
|
||||
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: |
||||
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index |
||||
``comb``. |
||||
|
||||
:param mask_arr: the boolean mask that indicates which parent to consider |
||||
:type mask_arr: numpy.array |
||||
:param comb: the state/s of the filtered parents |
||||
:type comb: numpy.array |
||||
:return: Array of ``ConditionalIntensityMatrix`` objects |
||||
:rtype: numpy.array |
||||
""" |
||||
if mask_arr.size <= 1: |
||||
return self._actual_cims |
||||
else: |
||||
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() |
||||
return self._actual_cims[flat_indxs] |
||||
|
||||
@property |
||||
def actual_cims(self) -> np.ndarray: |
||||
return self._actual_cims |
||||
|
||||
@property |
||||
def p_combs(self) -> np.ndarray: |
||||
return self._p_combs |
||||
|
||||
def get_cims_number(self): |
||||
return len(self._actual_cims) |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,124 @@ |
||||
|
||||
import typing as ty |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
class Structure(object): |
||||
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) |
||||
|
||||
:param nodes_labels_list: the symbolic names of the variables |
||||
:type nodes_labels_list: List |
||||
:param nodes_indexes_arr: the indexes of the nodes |
||||
:type nodes_indexes_arr: numpy.ndArray |
||||
:param nodes_vals_arr: the cardinalites of the nodes |
||||
:type nodes_vals_arr: numpy.ndArray |
||||
:param edges_list: the edges of the network |
||||
:type edges_list: List |
||||
:param total_variables_number: the total number of variables in the dataset |
||||
:type total_variables_number: int |
||||
""" |
||||
|
||||
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, |
||||
edges_list: ty.List, total_variables_number: int): |
||||
"""Constructor Method |
||||
""" |
||||
self._nodes_labels_list = nodes_labels_list |
||||
self._nodes_indexes_arr = nodes_indexes_arr |
||||
self._nodes_vals_arr = nodes_vals_arr |
||||
self._edges_list = edges_list |
||||
self._total_variables_number = total_variables_number |
||||
|
||||
def remove_node(self, node_id: str) -> None: |
||||
"""Remove the node ``node_id`` from all the class members. |
||||
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. |
||||
""" |
||||
node_positional_indx = self._nodes_labels_list.index(node_id) |
||||
del self._nodes_labels_list[node_positional_indx] |
||||
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) |
||||
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) |
||||
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id |
||||
and to_node != node_id)] |
||||
|
||||
@property |
||||
def edges(self) -> ty.List: |
||||
return self._edges_list |
||||
|
||||
@property |
||||
def nodes_labels(self) -> ty.List: |
||||
return self._nodes_labels_list |
||||
|
||||
@property |
||||
def nodes_indexes(self) -> np.ndarray: |
||||
return self._nodes_indexes_arr |
||||
|
||||
@property |
||||
def nodes_values(self) -> np.ndarray: |
||||
return self._nodes_vals_arr |
||||
|
||||
@property |
||||
def total_variables_number(self) -> int: |
||||
return self._total_variables_number |
||||
|
||||
def get_node_id(self, node_indx: int) -> str: |
||||
"""Given the ``node_index`` returns the node label. |
||||
|
||||
:param node_indx: the node index |
||||
:type node_indx: int |
||||
:return: the node label |
||||
:rtype: string |
||||
""" |
||||
return self._nodes_labels_list[node_indx] |
||||
|
||||
def clean_structure_edges(self): |
||||
self._edges_list = list() |
||||
|
||||
def add_edge(self,edge: tuple): |
||||
self._edges_list.append(tuple) |
||||
print(self._edges_list) |
||||
|
||||
def remove_edge(self,edge: tuple): |
||||
self._edges_list.remove(tuple) |
||||
|
||||
def contains_edge(self,edge:tuple) -> bool: |
||||
return edge in self._edges_list |
||||
|
||||
def get_node_indx(self, node_id: str) -> int: |
||||
"""Given the ``node_index`` returns the node label. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: the node index |
||||
:rtype: int |
||||
""" |
||||
pos_indx = self._nodes_labels_list.index(node_id) |
||||
return self._nodes_indexes_arr[pos_indx] |
||||
|
||||
def get_positional_node_indx(self, node_id: str) -> int: |
||||
return self._nodes_labels_list.index(node_id) |
||||
|
||||
def get_states_number(self, node: str) -> int: |
||||
"""Given the node label ``node`` returns the cardinality of the node. |
||||
|
||||
:param node: the node label |
||||
:type node: string |
||||
:return: the node cardinality |
||||
:rtype: int |
||||
""" |
||||
pos_indx = self._nodes_labels_list.index(node) |
||||
return self._nodes_vals_arr[pos_indx] |
||||
|
||||
def __repr__(self): |
||||
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ |
||||
"\nEdges: \n" + str(self._edges_list) |
||||
|
||||
def __eq__(self, other): |
||||
"""Overrides the default implementation""" |
||||
if isinstance(other, Structure): |
||||
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ |
||||
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ |
||||
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ |
||||
self._edges_list == other._edges_list |
||||
|
||||
return False |
||||
|
@ -0,0 +1,45 @@ |
||||
|
||||
import typing |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
class Trajectory(object): |
||||
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles |
||||
(the time deltas) and a numpy matrix of ints (the changes of states). |
||||
|
||||
:param list_of_columns: the list containing the times array and values matrix |
||||
:type list_of_columns: List |
||||
:param original_cols_number: total number of cols in the data |
||||
:type original_cols_number: int |
||||
:_actual_trajectory: the trajectory containing also the duplicated/shifted values |
||||
:_times: the array containing the time deltas |
||||
""" |
||||
|
||||
def __init__(self, list_of_columns: typing.List, original_cols_number: int): |
||||
"""Constructor Method |
||||
""" |
||||
self._times = list_of_columns[0] |
||||
self._actual_trajectory = list_of_columns[1] |
||||
self._original_cols_number = original_cols_number |
||||
|
||||
@property |
||||
def trajectory(self) -> np.ndarray: |
||||
return self._actual_trajectory[:, :self._original_cols_number - 1] |
||||
|
||||
@property |
||||
def complete_trajectory(self) -> np.ndarray: |
||||
return self._actual_trajectory |
||||
|
||||
@property |
||||
def times(self): |
||||
return self._times |
||||
|
||||
def size(self): |
||||
return self._actual_trajectory.shape[0] |
||||
|
||||
def __repr__(self): |
||||
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \ |
||||
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__() |
||||
|
||||
|
@ -0,0 +1,4 @@ |
||||
from .abstract_importer import AbstractImporter |
||||
from .cache import Cache |
||||
from .json_importer import JsonImporter |
||||
from .sample_importer import SampleImporter |
@ -0,0 +1,164 @@ |
||||
|
||||
import typing |
||||
from abc import ABC, abstractmethod |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
import copy |
||||
|
||||
#from sklearn.utils import resample |
||||
|
||||
|
||||
class AbstractImporter(ABC): |
||||
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure. |
||||
|
||||
:param file_path: the file path, or dataset name if you import already processed data |
||||
:type file_path: str |
||||
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories |
||||
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray] |
||||
:param variables: Dataframe containing the nodes labels and cardinalities |
||||
:type variables: pandas.DataFrame |
||||
:prior_net_structure: Dataframe containing the structure of the network (edges) |
||||
:type prior_net_structure: pandas.DataFrame |
||||
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` |
||||
|
||||
.. warning:: |
||||
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed |
||||
as Pandas Dataframes with the following structure: |
||||
Header of _df_structure = [From_Node | To_Node] |
||||
Header of _df_variables = [Variable_Label | Variable_Cardinality] |
||||
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. |
||||
|
||||
.. note:: |
||||
See :class:``JsonImporter`` for an example implementation |
||||
|
||||
""" |
||||
|
||||
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None, |
||||
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): |
||||
"""Constructor |
||||
""" |
||||
self._file_path = file_path |
||||
self._df_samples_list = trajectory_list |
||||
self._concatenated_samples = [] |
||||
self._df_variables = variables |
||||
self._df_structure = prior_net_structure |
||||
self._sorter = None |
||||
super().__init__() |
||||
|
||||
@abstractmethod |
||||
def build_sorter(self, trajecory_header: object) -> typing.List: |
||||
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame |
||||
and keeping ONLY the variables symbolic labels, cutting out the time label in the header. |
||||
|
||||
:param trajecory_header: an object that will be used to define the header |
||||
:type trajecory_header: object |
||||
:return: A list containing the processed header. |
||||
:rtype: List |
||||
""" |
||||
pass |
||||
|
||||
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, |
||||
columns_header: typing.List, shifted_cols_header: typing.List) \ |
||||
-> pd.DataFrame: |
||||
"""Computes the difference between each value present in th time column. |
||||
Copies and shift by one position up all the values present in the remaining columns. |
||||
|
||||
:param sample_frame: the traj to be processed |
||||
:type sample_frame: pandas.Dataframe |
||||
:param columns_header: the original header of sample_frame |
||||
:type columns_header: List |
||||
:param shifted_cols_header: a copy of columns_header with changed names of the contents |
||||
:type shifted_cols_header: List |
||||
:return: The processed dataframe |
||||
:rtype: pandas.Dataframe |
||||
|
||||
.. warning:: |
||||
the Dataframe ``sample_frame`` has to follow the column structure of this header: |
||||
Header of sample_frame = [Time | Variable values] |
||||
""" |
||||
sample_frame = copy.deepcopy(sample_frame) |
||||
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) |
||||
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') |
||||
shifted_cols.columns = shifted_cols_header |
||||
sample_frame = sample_frame.assign(**shifted_cols) |
||||
sample_frame.drop(sample_frame.tail(1).index, inplace=True) |
||||
return sample_frame |
||||
|
||||
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: |
||||
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list |
||||
``df_samples_list``. |
||||
Concatenates the result in the dataframe ``concatanated_samples`` |
||||
|
||||
:param df_samples_list: the datframe's list to be processed and concatenated |
||||
:type df_samples_list: List |
||||
|
||||
.. warning:: |
||||
The Dataframe sample_frame has to follow the column structure of this header: |
||||
Header of sample_frame = [Time | Variable values] |
||||
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) |
||||
.. note:: |
||||
After the call of this method the class member ``concatanated_samples`` will contain all processed |
||||
and merged trajectories |
||||
""" |
||||
if not self._sorter: |
||||
raise RuntimeError("The class member self._sorter has to be INITIALIZED!") |
||||
shifted_cols_header = [s + "S" for s in self._sorter] |
||||
compute_row_delta = self.compute_row_delta_sigle_samples_frame |
||||
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) |
||||
for sample in df_samples_list] |
||||
self._concatenated_samples = pd.concat(proc_samples_list) |
||||
|
||||
complete_header = self._sorter[:] |
||||
complete_header.insert(0,'Time') |
||||
complete_header.extend(shifted_cols_header) |
||||
self._concatenated_samples = self._concatenated_samples[complete_header] |
||||
|
||||
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List: |
||||
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix |
||||
|
||||
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted |
||||
and converted |
||||
:type concatenated_sample: pandas.Dataframe |
||||
:return: the resulting list of numpy arrays |
||||
:rtype: List |
||||
""" |
||||
|
||||
concatenated_array = concatenated_sample.to_numpy() |
||||
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] |
||||
|
||||
return columns_list |
||||
|
||||
def clear_concatenated_frame(self) -> None: |
||||
"""Removes all values in the dataframe concatenated_samples. |
||||
""" |
||||
if isinstance(self._concatenated_samples, pd.DataFrame): |
||||
self._concatenated_samples = self._concatenated_samples.iloc[0:0] |
||||
|
||||
@abstractmethod |
||||
def dataset_id(self) -> object: |
||||
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current |
||||
dataset |
||||
""" |
||||
pass |
||||
|
||||
@property |
||||
def concatenated_samples(self) -> pd.DataFrame: |
||||
return self._concatenated_samples |
||||
|
||||
@property |
||||
def variables(self) -> pd.DataFrame: |
||||
return self._df_variables |
||||
|
||||
@property |
||||
def structure(self) -> pd.DataFrame: |
||||
return self._df_structure |
||||
|
||||
@property |
||||
def sorter(self) -> typing.List: |
||||
return self._sorter |
||||
|
||||
@property |
||||
def file_path(self) -> str: |
||||
return self._file_path |
@ -0,0 +1,58 @@ |
||||
|
||||
import typing |
||||
|
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
|
||||
|
||||
class Cache: |
||||
"""This class acts as a cache of ``SetOfCims`` objects for a node. |
||||
|
||||
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME |
||||
index is related |
||||
:__actual_cache: a list of setOfCims objects |
||||
""" |
||||
|
||||
def __init__(self): |
||||
"""Constructor Method |
||||
""" |
||||
self._list_of_sets_of_parents = [] |
||||
self._actual_cache = [] |
||||
|
||||
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] |
||||
""" |
||||
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims`` |
||||
related to that ``parents_comb``. |
||||
|
||||
:param parents_comb: the parents related to that ``SetOfCims`` |
||||
:type parents_comb: Set |
||||
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``. |
||||
None otherwise. |
||||
:rtype: SetOfCims |
||||
""" |
||||
try: |
||||
#print("Cache State:", self.list_of_sets_of_indxs) |
||||
#print("Look For:", parents_comb) |
||||
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] |
||||
#print("CACHE HIT!!!!", parents_comb) |
||||
return result |
||||
except ValueError: |
||||
return None |
||||
|
||||
def put(self, parents_comb: typing.Set, socim: SetOfCims): |
||||
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in |
||||
``__list_of_sets_of_parents``. |
||||
|
||||
:param parents_comb: the symbolic set index |
||||
:type parents_comb: Set |
||||
:param socim: the related SetOfCims object |
||||
:type socim: SetOfCims |
||||
""" |
||||
#print("Putting in cache:", parents_comb) |
||||
self._list_of_sets_of_parents.append(parents_comb) |
||||
self._actual_cache.append(socim) |
||||
|
||||
def clear(self): |
||||
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``. |
||||
""" |
||||
del self._list_of_sets_of_parents[:] |
||||
del self._actual_cache[:] |
@ -0,0 +1,176 @@ |
||||
import json |
||||
import typing |
||||
|
||||
import pandas as pd |
||||
|
||||
|
||||
from .abstract_importer import AbstractImporter |
||||
|
||||
|
||||
class JsonImporter(AbstractImporter): |
||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
||||
the data in json extension. |
||||
|
||||
:param file_path: the path of the file that contains tha data to be imported |
||||
:type file_path: string |
||||
:param samples_label: the reference key for the samples in the trajectories |
||||
:type samples_label: string |
||||
:param structure_label: the reference key for the structure of the network data |
||||
:type structure_label: string |
||||
:param variables_label: the reference key for the cardinalites of the nodes data |
||||
:type variables_label: string |
||||
:param time_key: the key used to identify the timestamps in each trajectory |
||||
:type time_key: string |
||||
:param variables_key: the key used to identify the names of the variables in the net |
||||
:type variables_key: string |
||||
:_array_indx: the index of the outer JsonArray to extract the data from |
||||
:type _array_indx: int |
||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
||||
:_raw_data: The raw contents of the json file to import |
||||
:type _raw_data: List |
||||
""" |
||||
|
||||
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, |
||||
variables_key: str): |
||||
"""Constructor method |
||||
|
||||
.. note:: |
||||
This constructor calls also the method ``read_json_file()``, so after the construction of the object |
||||
the class member ``_raw_data`` will contain the raw imported json data. |
||||
|
||||
""" |
||||
self._samples_label = samples_label |
||||
self._structure_label = structure_label |
||||
self._variables_label = variables_label |
||||
self._time_key = time_key |
||||
self._variables_key = variables_key |
||||
self._df_samples_list = None |
||||
self._array_indx = None |
||||
super(JsonImporter, self).__init__(file_path) |
||||
self._raw_data = self.read_json_file() |
||||
|
||||
def import_data(self, indx: int) -> None: |
||||
"""Implements the abstract method of :class:`AbstractImporter`. |
||||
|
||||
:param indx: the index of the outer JsonArray to extract the data from |
||||
:type indx: int |
||||
""" |
||||
self._array_indx = indx |
||||
self._df_samples_list = self.import_trajectories(self._raw_data) |
||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
||||
self.compute_row_delta_in_all_samples_frames(self._df_samples_list) |
||||
self.clear_data_frame_list() |
||||
self._df_structure = self.import_structure(self._raw_data) |
||||
self._df_variables = self.import_variables(self._raw_data) |
||||
|
||||
def import_trajectories(self, raw_data: typing.List) -> typing.List: |
||||
"""Imports the trajectories from the list of dicts ``raw_data``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: List of dataframes containing all the trajectories |
||||
:rtype: List |
||||
""" |
||||
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) |
||||
|
||||
def import_structure(self, raw_data: typing.List) -> pd.DataFrame: |
||||
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: Dataframe containg the starting node a ending node of every arc of the network |
||||
:rtype: pandas.Dataframe |
||||
""" |
||||
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) |
||||
|
||||
def import_variables(self, raw_data: typing.List) -> pd.DataFrame: |
||||
"""Imports the data in ``raw_data`` at the key ``_variables_label``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: Datframe containg the variables simbolic labels and their cardinalities |
||||
:rtype: pandas.Dataframe |
||||
""" |
||||
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) |
||||
|
||||
def read_json_file(self) -> typing.List: |
||||
"""Reads the JSON file in the path self.filePath. |
||||
|
||||
:return: The contents of the json file |
||||
:rtype: List |
||||
""" |
||||
with open(self._file_path) as f: |
||||
data = json.load(f) |
||||
return data |
||||
|
||||
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: |
||||
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param key: the key for the Dicts from which exctract data |
||||
:type key: string |
||||
:return: A normalized dataframe |
||||
:rtype: pandas.Datframe |
||||
""" |
||||
return pd.DataFrame(raw_data[indx][key]) |
||||
|
||||
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: |
||||
""" |
||||
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param trajectories_key: the key of the trajectories objects |
||||
:type trajectories_key: string |
||||
:return: A list of daframes containg the trajectories |
||||
:rtype: List |
||||
""" |
||||
dataframe = pd.DataFrame |
||||
smps = raw_data[indx][trajectories_key] |
||||
df_samples_list = [dataframe(sample) for sample in smps] |
||||
return df_samples_list |
||||
|
||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. |
||||
""" |
||||
columns_header = list(sample_frame.columns.values) |
||||
columns_header.remove(self._time_key) |
||||
return columns_header |
||||
|
||||
def clear_data_frame_list(self) -> None: |
||||
"""Removes all values present in the dataframes in the list ``_df_samples_list``. |
||||
""" |
||||
for indx in range(len(self._df_samples_list)): |
||||
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] |
||||
|
||||
def dataset_id(self) -> object: |
||||
return self._array_indx |
||||
|
||||
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: |
||||
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels |
||||
as keys for the set of CIMS of a particular node. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param cims_key: the key where the json object cims are placed |
||||
:type cims_key: string |
||||
:return: a dictionary containing the sampled CIMS for all the variables in the net |
||||
:rtype: Dictionary |
||||
""" |
||||
cims_for_all_vars = {} |
||||
for var in raw_data[indx][cims_key]: |
||||
sampled_cims_list = [] |
||||
cims_for_all_vars[var] = sampled_cims_list |
||||
for p_comb in raw_data[indx][cims_key][var]: |
||||
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) |
||||
return cims_for_all_vars |
||||
|
||||
|
||||
|
@ -0,0 +1,65 @@ |
||||
import json |
||||
import typing |
||||
|
||||
import pandas as pd |
||||
import numpy as np |
||||
|
||||
from .abstract_importer import AbstractImporter |
||||
|
||||
|
||||
|
||||
class SampleImporter(AbstractImporter): |
||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
||||
the data loaded directly by using DataFrame |
||||
|
||||
:param trajectory_list: the data that describes the trajectories |
||||
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
:param variables: the data that describes the variables with name and cardinality |
||||
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
:param prior_net_structure: the data of the real structure, if it exists |
||||
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
|
||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
||||
:_raw_data: The raw contents of the json file to import |
||||
:type _raw_data: List |
||||
""" |
||||
|
||||
def __init__(self, |
||||
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
||||
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
||||
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): |
||||
|
||||
'If the data are not DataFrame, it will be converted' |
||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
||||
variables = pd.DataFrame(variables) |
||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
||||
prior_net_structure=pd.DataFrame(prior_net_structure) |
||||
|
||||
super(SampleImporter, self).__init__(trajectory_list =trajectory_list, |
||||
variables= variables, |
||||
prior_net_structure=prior_net_structure) |
||||
|
||||
def import_data(self, header_column = None): |
||||
|
||||
if header_column is not None: |
||||
self._sorter = header_column |
||||
else: |
||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
||||
|
||||
samples_list= self._df_samples_list |
||||
|
||||
if isinstance(samples_list, np.ndarray): |
||||
samples_list = samples_list.tolist() |
||||
|
||||
self.compute_row_delta_in_all_samples_frames(samples_list) |
||||
|
||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. |
||||
""" |
||||
columns_header = list(sample_frame.columns.values) |
||||
del columns_header[0] |
||||
return columns_header |
||||
|
||||
|
||||
def dataset_id(self) -> object: |
||||
pass |
@ -0,0 +1,8 @@ |
||||
import PyCTBN.PyCTBN.estimators |
||||
from PyCTBN.PyCTBN.estimators import * |
||||
import PyCTBN.PyCTBN.optimizers |
||||
from PyCTBN.PyCTBN.optimizers import * |
||||
import PyCTBN.PyCTBN.structure_graph |
||||
from PyCTBN.PyCTBN.structure_graph import * |
||||
import PyCTBN.PyCTBN.utility |
||||
from PyCTBN.PyCTBN.utility import * |
@ -0,0 +1,5 @@ |
||||
from .fam_score_calculator import FamScoreCalculator |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator |
||||
from .structure_score_based_estimator import StructureScoreBasedEstimator |
@ -0,0 +1,272 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from math import log |
||||
|
||||
from scipy.special import loggamma |
||||
from random import choice |
||||
|
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
|
||||
|
||||
''' |
||||
|
||||
''' |
||||
|
||||
|
||||
class FamScoreCalculator: |
||||
""" |
||||
Has the task of calculating the FamScore of a node by using a Bayesian score function |
||||
""" |
||||
|
||||
def __init__(self): |
||||
#np.seterr('raise') |
||||
pass |
||||
|
||||
# region theta |
||||
|
||||
def marginal_likelihood_theta(self, |
||||
cims: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the FamScore value of the node identified by the label node_id |
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta |
||||
:rtype: float |
||||
""" |
||||
return np.sum( |
||||
[self.variable_cim_xu_marginal_likelihood_theta(cim, |
||||
alpha_xu, |
||||
alpha_xxu) |
||||
for cim in cims]) |
||||
|
||||
def variable_cim_xu_marginal_likelihood_theta(self, |
||||
cim: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the value of the marginal likelihood over theta given a cim |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta |
||||
:rtype: float |
||||
""" |
||||
|
||||
'get cim length' |
||||
values = len(cim._state_residence_times) |
||||
|
||||
'compute the marginal likelihood for the current cim' |
||||
return np.sum([ |
||||
self.single_cim_xu_marginal_likelihood_theta( |
||||
index, |
||||
cim, |
||||
alpha_xu, |
||||
alpha_xxu) |
||||
for index in range(values)]) |
||||
|
||||
def single_cim_xu_marginal_likelihood_theta(self, |
||||
index: int, |
||||
cim: ConditionalIntensityMatrix, |
||||
alpha_xu: float, |
||||
alpha_xxu: float): |
||||
""" |
||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
||||
and a specif parents's assignment |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
|
||||
values = list(range(len(cim._state_residence_times))) |
||||
|
||||
'remove the index because of the x != x^ condition in the summation ' |
||||
values.remove(index) |
||||
|
||||
'uncomment for alpha xx not uniform' |
||||
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) |
||||
|
||||
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ |
||||
+ \ |
||||
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( |
||||
cim.state_transition_matrix[index,index_x_first], |
||||
alpha_xxu) |
||||
for index_x_first in values]) |
||||
|
||||
|
||||
def single_internal_cim_xxu_marginal_likelihood_theta(self, |
||||
M_xxu_suff_stats: float, |
||||
alpha_xxu: float=1): |
||||
"""Calculate the second part of the marginal likelihood over theta formula |
||||
|
||||
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u] |
||||
:type M_xxu_suff_stats: float |
||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
||||
:type alpha_xxu: float |
||||
|
||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu) |
||||
|
||||
# endregion |
||||
|
||||
# region q |
||||
|
||||
def marginal_likelihood_q(self, |
||||
cims: np.array, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the value of the marginal likelihood over q of the node identified by the label node_id |
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood over q |
||||
:rtype: float |
||||
""" |
||||
|
||||
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) |
||||
|
||||
def variable_cim_xu_marginal_likelihood_q(self, |
||||
cim: ConditionalIntensityMatrix, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the value of the marginal likelihood over q given a cim |
||||
|
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood over q |
||||
:rtype: float |
||||
""" |
||||
|
||||
'get cim length' |
||||
values=len(cim._state_residence_times) |
||||
|
||||
'compute the marginal likelihood for the current cim' |
||||
return np.sum([ |
||||
self.single_cim_xu_marginal_likelihood_q( |
||||
cim.state_transition_matrix[index, index], |
||||
cim._state_residence_times[index], |
||||
tau_xu, |
||||
alpha_xu) |
||||
for index in range(values)]) |
||||
|
||||
|
||||
def single_cim_xu_marginal_likelihood_q(self, |
||||
M_xu_suff_stats: float, |
||||
T_xu_suff_stats: float, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
||||
and a specif parents's assignment |
||||
|
||||
:param M_xu_suff_stats: value of the suffucient statistic M[x|u] |
||||
:type M_xxu_suff_stats: float |
||||
:param T_xu_suff_stats: value of the suffucient statistic T[x|u] |
||||
:type T_xu_suff_stats: float |
||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
||||
:type cim: class:'ConditionalIntensityMatrix' |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
||||
:type tau_xu: float |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
||||
:type alpha_xu: float |
||||
|
||||
|
||||
:return: the value of the marginal likelihood of the node when assumes a specif value |
||||
:rtype: float |
||||
""" |
||||
return ( |
||||
loggamma(alpha_xu + M_xu_suff_stats + 1) + |
||||
(log(tau_xu) |
||||
* |
||||
(alpha_xu+1)) |
||||
) \ |
||||
- \ |
||||
(loggamma(alpha_xu + 1)+( |
||||
log(tau_xu + T_xu_suff_stats) |
||||
* |
||||
(alpha_xu + M_xu_suff_stats + 1)) |
||||
) |
||||
|
||||
# end region |
||||
|
||||
def get_fam_score(self, |
||||
cims: np.array, |
||||
tau_xu: float=0.1, |
||||
alpha_xu: float=1): |
||||
""" |
||||
Calculate the FamScore value of the node |
||||
|
||||
|
||||
:param cims: np.array with all the node's cims |
||||
:type cims: np.array |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type tau_xu: float, optional |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
||||
:type alpha_xu: float, optional |
||||
|
||||
|
||||
:return: the FamScore value of the node |
||||
:rtype: float |
||||
""" |
||||
#print("------") |
||||
#print(self.marginal_likelihood_q(cims, |
||||
# tau_xu, |
||||
# alpha_xu)) |
||||
|
||||
#print(self.marginal_likelihood_theta(cims, |
||||
# alpha_xu, |
||||
# alpha_xxu)) |
||||
'calculate alpha_xxu as a uniform distribution' |
||||
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) |
||||
|
||||
return self.marginal_likelihood_q(cims, |
||||
tau_xu, |
||||
alpha_xu) \ |
||||
+ \ |
||||
self.marginal_likelihood_theta(cims, |
||||
alpha_xu, |
||||
alpha_xxu) |
@ -0,0 +1,143 @@ |
||||
import sys |
||||
sys.path.append('../') |
||||
import numpy as np |
||||
|
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
from ..structure_graph.trajectory import Trajectory |
||||
|
||||
|
||||
class ParametersEstimator(object): |
||||
"""Has the task of computing the cims of particular node given the trajectories and the net structure |
||||
in the graph ``_net_graph``. |
||||
|
||||
:param trajectories: the trajectories |
||||
:type trajectories: Trajectory |
||||
:param net_graph: the net structure |
||||
:type net_graph: NetworkGraph |
||||
:_single_set_of_cims: the set of cims object that will hold the cims of the node |
||||
""" |
||||
|
||||
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): |
||||
"""Constructor Method |
||||
""" |
||||
self._trajectories = trajectories |
||||
self._net_graph = net_graph |
||||
self._single_set_of_cims = None |
||||
|
||||
def fast_init(self, node_id: str) -> None: |
||||
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
""" |
||||
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] |
||||
node_states_number = self._net_graph.get_states_number(node_id) |
||||
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) |
||||
|
||||
def compute_parameters_for_node(self, node_id: str) -> SetOfCims: |
||||
"""Compute the CIMS of the node identified by the label ``node_id``. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: A SetOfCims object filled with the computed CIMS |
||||
:rtype: SetOfCims |
||||
""" |
||||
node_indx = self._net_graph.get_node_indx(node_id) |
||||
state_res_times = self._single_set_of_cims._state_residence_times |
||||
transition_matrices = self._single_set_of_cims._transition_matrices |
||||
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, |
||||
self._trajectories.trajectory, |
||||
self._net_graph.time_filtering, |
||||
self._net_graph.time_scalar_indexing_strucure, |
||||
state_res_times) |
||||
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, |
||||
self._net_graph.transition_filtering, |
||||
self._net_graph.transition_scalar_indexing_structure, |
||||
transition_matrices) |
||||
self._single_set_of_cims.build_cims(state_res_times, transition_matrices) |
||||
return self._single_set_of_cims |
||||
|
||||
@staticmethod |
||||
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, |
||||
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, |
||||
T: np.ndarray) -> None: |
||||
"""Compute the state residence times for a node and fill the matrix ``T`` with the results |
||||
|
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param times: the times deltas vector |
||||
:type times: numpy.array |
||||
:param trajectory: the trajectory |
||||
:type trajectory: numpy.ndArray |
||||
:param cols_filter: the columns filtering structure |
||||
:type cols_filter: numpy.array |
||||
:param scalar_indexes_struct: the indexing structure |
||||
:type scalar_indexes_struct: numpy.array |
||||
:param T: the state residence times vectors |
||||
:type T: numpy.ndArray |
||||
""" |
||||
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) |
||||
.astype(np.int), \ |
||||
times, |
||||
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) |
||||
|
||||
@staticmethod |
||||
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, |
||||
scalar_indexing: np.ndarray, M: np.ndarray) -> None: |
||||
"""Compute the state residence times for a node and fill the matrices ``M`` with the results. |
||||
|
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param trajectory: the trajectory |
||||
:type trajectory: numpy.ndArray |
||||
:param cols_filter: the columns filtering structure |
||||
:type cols_filter: numpy.array |
||||
:param scalar_indexing: the indexing structure |
||||
:type scalar_indexing: numpy.array |
||||
:param M: the state transitions matrices |
||||
:type M: numpy.ndArray |
||||
""" |
||||
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], |
||||
dtype=np.int64) |
||||
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] |
||||
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) |
||||
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) |
||||
M_raveled = M.ravel() |
||||
M_raveled[diag_indices] = 0 |
||||
M_raveled[diag_indices] = np.sum(M, axis=2).ravel() |
||||
|
||||
def init_sets_cims_container(self): |
||||
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, |
||||
self.net_graph.nodes_values, |
||||
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), |
||||
self.net_graph.p_combs) |
||||
|
||||
def compute_parameters(self): |
||||
#print(self.net_graph.get_nodes()) |
||||
#print(self.amalgamated_cims_struct.sets_of_cims) |
||||
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) |
||||
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): |
||||
#print(self.net_graph.time_filtering[indx]) |
||||
#print(self.net_graph.time_scalar_indexing_strucure[indx]) |
||||
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, |
||||
self.sample_path.trajectories.trajectory, |
||||
self.net_graph.time_filtering[indx], |
||||
self.net_graph.time_scalar_indexing_strucure[indx], |
||||
aggr[1]._state_residence_times) |
||||
#print(self.net_graph.transition_filtering[indx]) |
||||
#print(self.net_graph.transition_scalar_indexing_structure[indx]) |
||||
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), |
||||
self.sample_path.trajectories.complete_trajectory, |
||||
self.net_graph.transition_filtering[indx], |
||||
self.net_graph.transition_scalar_indexing_structure[indx], |
||||
aggr[1]._transition_matrices) |
||||
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,238 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
import os |
||||
from scipy.stats import chi2 as chi2_dist |
||||
from scipy.stats import f as f_dist |
||||
from tqdm import tqdm |
||||
|
||||
from ..utility.cache import Cache |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer |
||||
|
||||
import concurrent.futures |
||||
|
||||
|
||||
|
||||
import multiprocessing |
||||
from multiprocessing import Pool |
||||
|
||||
|
||||
class StructureConstraintBasedEstimator(StructureEstimator): |
||||
""" |
||||
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:param exp_test_alfa: the significance level for the exponential Hp test |
||||
:type exp_test_alfa: float |
||||
:param chi_test_alfa: the significance level for the chi Hp test |
||||
:type chi_test_alfa: float |
||||
:_nodes: the nodes labels |
||||
:_nodes_vals: the nodes cardinalities |
||||
:_nodes_indxs: the nodes indexes |
||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
||||
:_cache: the Cache object |
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): |
||||
super().__init__(sample_path,known_edges) |
||||
self._exp_test_sign = exp_test_alfa |
||||
self._chi_test_alfa = chi_test_alfa |
||||
self._thumb_threshold = thumb_threshold |
||||
self._cache = Cache() |
||||
|
||||
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, |
||||
tot_vars_count: int, parent_indx, child_indx) -> bool: |
||||
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} |
||||
G2 = {G1 U test_parent} (added as an additional parent of the test_child). |
||||
Generates all the necessary structures and datas to perform the tests. |
||||
|
||||
:param test_parent: the node label of the test parent |
||||
:type test_parent: string |
||||
:param test_child: the node label of the child |
||||
:type test_child: string |
||||
:param parent_set: the common parent set |
||||
:type parent_set: List |
||||
:param child_states_numb: the cardinality of the ``test_child`` |
||||
:type child_states_numb: int |
||||
:param tot_vars_count: the total number of variables in the net |
||||
:type tot_vars_count: int |
||||
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise |
||||
:rtype: bool |
||||
""" |
||||
p_set = parent_set[:] |
||||
complete_info = parent_set[:] |
||||
complete_info.append(test_child) |
||||
|
||||
parents = np.array(parent_set) |
||||
parents = np.append(parents, test_parent) |
||||
sorted_parents = self._nodes[np.isin(self._nodes, parents)] |
||||
cims_filter = sorted_parents != test_parent |
||||
|
||||
p_set.insert(0, test_parent) |
||||
sofc2 = self._cache.find(set(p_set)) |
||||
|
||||
if not sofc2: |
||||
complete_info.append(test_parent) |
||||
bool_mask2 = np.isin(self._nodes, complete_info) |
||||
l2 = list(self._nodes[bool_mask2]) |
||||
indxs2 = self._nodes_indxs[bool_mask2] |
||||
vals2 = self._nodes_vals[bool_mask2] |
||||
eds2 = list(itertools.product(p_set, test_child)) |
||||
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) |
||||
g2 = NetworkGraph(s2) |
||||
g2.fast_init(test_child) |
||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
||||
p2.fast_init(test_child) |
||||
sofc2 = p2.compute_parameters_for_node(test_child) |
||||
self._cache.put(set(p_set), sofc2) |
||||
|
||||
del p_set[0] |
||||
sofc1 = self._cache.find(set(p_set)) |
||||
if not sofc1: |
||||
g2.remove_node(test_parent) |
||||
g2.fast_init(test_child) |
||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
||||
p2.fast_init(test_child) |
||||
sofc1 = p2.compute_parameters_for_node(test_child) |
||||
self._cache.put(set(p_set), sofc1) |
||||
thumb_value = 0.0 |
||||
if child_states_numb > 2: |
||||
parent_val = self._sample_path.structure.get_states_number(test_parent) |
||||
bool_mask_vals = np.isin(self._nodes, parent_set) |
||||
parents_vals = self._nodes_vals[bool_mask_vals] |
||||
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) |
||||
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): |
||||
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) |
||||
for cim2 in cond_cims: |
||||
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): |
||||
return False |
||||
return True |
||||
|
||||
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, |
||||
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: |
||||
"""Compute the actual independence test using two cims. |
||||
It is performed first the exponential test and if the null hypothesis is not rejected, |
||||
it is performed also the chi_test. |
||||
|
||||
:param child_states_numb: the cardinality of the test child |
||||
:type child_states_numb: int |
||||
:param cim1: a cim belonging to the graph without test parent |
||||
:type cim1: ConditionalIntensityMatrix |
||||
:param cim2: a cim belonging to the graph with test parent |
||||
:type cim2: ConditionalIntensityMatrix |
||||
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. |
||||
:rtype: bool |
||||
""" |
||||
M1 = cim1.state_transition_matrix |
||||
M2 = cim2.state_transition_matrix |
||||
r1s = M1.diagonal() |
||||
r2s = M2.diagonal() |
||||
C1 = cim1.cim |
||||
C2 = cim2.cim |
||||
if child_states_numb > 2: |
||||
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: |
||||
self._removable_edges_matrix[parent_indx][child_indx] = False |
||||
return False |
||||
F_stats = C2.diagonal() / C1.diagonal() |
||||
exp_alfa = self._exp_test_sign |
||||
for val in range(0, child_states_numb): |
||||
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ |
||||
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): |
||||
return False |
||||
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) |
||||
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( |
||||
M2.shape[0], -1) |
||||
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) |
||||
Ks = np.sqrt(r1s / r2s) |
||||
Ls = np.sqrt(r2s / r1s) |
||||
for val in range(0, child_states_numb): |
||||
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / |
||||
(M1_no_diag[val] + M2_no_diag[val])) |
||||
if Chi > chi_2_quantile: |
||||
return False |
||||
return True |
||||
|
||||
def compute_thumb_value(self, parent_val, child_val, parent_set_vals): |
||||
"""Compute the value to test against the thumb_threshold. |
||||
|
||||
:param parent_val: test parent's variable cardinality |
||||
:type parent_val: int |
||||
:param child_val: test child's variable cardinality |
||||
:type child_val: int |
||||
:param parent_set_vals: the cardinalities of the nodes in the current sep-set |
||||
:type parent_set_vals: List |
||||
:return: the thumb value for the current independence test |
||||
:rtype: int |
||||
""" |
||||
df = (child_val - 1) ** 2 |
||||
df = df * parent_val |
||||
for v in parent_set_vals: |
||||
df = df * v |
||||
return df |
||||
|
||||
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: |
||||
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. |
||||
|
||||
:param var_id: the node label of the test child |
||||
:type var_id: string |
||||
""" |
||||
optimizer_obj = ConstraintBasedOptimizer( |
||||
node_id = var_id, |
||||
structure_estimator = self, |
||||
tot_vars_count = tot_vars_count) |
||||
return optimizer_obj.optimize_structure() |
||||
|
||||
|
||||
def ctpc_algorithm(self,disable_multiprocessing:bool= False ): |
||||
"""Compute the CTPC algorithm over the entire net. |
||||
""" |
||||
ctpc_algo = self.one_iteration_of_CTPC_algorithm |
||||
total_vars_numb = self._sample_path.total_variables_count |
||||
|
||||
n_nodes= len(self._nodes) |
||||
|
||||
total_vars_numb_array = [total_vars_numb] * n_nodes |
||||
|
||||
'get the number of CPU' |
||||
cpu_count = multiprocessing.cpu_count() |
||||
|
||||
|
||||
|
||||
'Remove all the edges from the structure' |
||||
self._sample_path.structure.clean_structure_edges() |
||||
|
||||
'Estimate the best parents for each node' |
||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
||||
if disable_multiprocessing: |
||||
print("DISABILITATO") |
||||
cpu_count = 1 |
||||
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
||||
else: |
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
||||
list_edges_partial = executor.map(ctpc_algo, |
||||
self._nodes, |
||||
total_vars_numb_array) |
||||
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
||||
|
||||
return set(itertools.chain.from_iterable(list_edges_partial)) |
||||
|
||||
|
||||
def estimate_structure(self,disable_multiprocessing:bool=False): |
||||
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,187 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import matplotlib.pyplot as plt |
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from abc import ABC |
||||
|
||||
import abc |
||||
|
||||
from ..utility.cache import Cache |
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
|
||||
|
||||
class StructureEstimator(object): |
||||
"""Has the task of estimating the network structure given the trajectories in ``samplepath``. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:_nodes: the nodes labels |
||||
:_nodes_vals: the nodes cardinalities |
||||
:_nodes_indxs: the nodes indexes |
||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): |
||||
self._sample_path = sample_path |
||||
self._nodes = np.array(self._sample_path.structure.nodes_labels) |
||||
self._nodes_vals = self._sample_path.structure.nodes_values |
||||
self._nodes_indxs = self._sample_path.structure.nodes_indexes |
||||
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) |
||||
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) |
||||
|
||||
|
||||
def build_removable_edges_matrix(self, known_edges: typing.List): |
||||
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: |
||||
|
||||
:param known_edges: the list of nodes labels |
||||
:type known_edges: List |
||||
:return: a boolean matrix |
||||
:rtype: np.ndarray |
||||
""" |
||||
tot_vars_count = self._sample_path.total_variables_count |
||||
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) |
||||
if known_edges: |
||||
for edge in known_edges: |
||||
i = self._sample_path.structure.get_node_indx(edge[0]) |
||||
j = self._sample_path.structure.get_node_indx(edge[1]) |
||||
complete_adj_matrix[i][j] = False |
||||
return complete_adj_matrix |
||||
|
||||
@staticmethod |
||||
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: |
||||
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: |
||||
|
||||
:param node_ids: the list of nodes labels |
||||
:type node_ids: List |
||||
:return: a complete Digraph Object |
||||
:rtype: networkx.DiGraph |
||||
""" |
||||
complete_graph = nx.DiGraph() |
||||
complete_graph.add_nodes_from(node_ids) |
||||
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) |
||||
return complete_graph |
||||
|
||||
|
||||
@staticmethod |
||||
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): |
||||
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``, |
||||
that do not contains a the node identified by ``parent_label``. |
||||
|
||||
:param u: the list of nodes |
||||
:type u: List |
||||
:param size: the size of the subsets |
||||
:type size: int |
||||
:param parent_label: the node to exclude in the subsets generation |
||||
:type parent_label: string |
||||
:return: an Iterator Object containing a list of lists |
||||
:rtype: Iterator |
||||
""" |
||||
list_without_test_parent = u[:] |
||||
list_without_test_parent.remove(parent_label) |
||||
return map(list, itertools.combinations(list_without_test_parent, size)) |
||||
|
||||
def save_results(self) -> None: |
||||
"""Save the estimated Structure to a .json file in the path where the data are loaded from. |
||||
The file is named as the input dataset but the `results_` word is appended to the results file. |
||||
""" |
||||
res = json_graph.node_link_data(self._complete_graph) |
||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
||||
name = name.split('.', 1)[0] |
||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
||||
name += '.json' |
||||
file_name = 'results_' + name |
||||
with open(file_name, 'w') as f: |
||||
json.dump(res, f) |
||||
|
||||
|
||||
def remove_diagonal_elements(self, matrix): |
||||
m = matrix.shape[0] |
||||
strided = np.lib.stride_tricks.as_strided |
||||
s0, s1 = matrix.strides |
||||
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) |
||||
|
||||
|
||||
@abc.abstractmethod |
||||
def estimate_structure(self) -> typing.List: |
||||
"""Abstract method to estimate the structure |
||||
|
||||
:return: List of estimated edges |
||||
:rtype: Typing.List |
||||
""" |
||||
pass |
||||
|
||||
|
||||
def adjacency_matrix(self) -> np.ndarray: |
||||
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. |
||||
|
||||
:return: The adjacency matrix of the graph ``_complete_graph`` |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return nx.adj_matrix(self._complete_graph).toarray().astype(bool) |
||||
|
||||
def spurious_edges(self) -> typing.List: |
||||
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in |
||||
``_sample_path.structure``. |
||||
|
||||
:return: A list containing the spurious edges |
||||
:rtype: List |
||||
""" |
||||
if not self._sample_path.has_prior_net_structure: |
||||
raise RuntimeError("Can not compute spurious edges with no prior net structure!") |
||||
real_graph = nx.DiGraph() |
||||
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) |
||||
real_graph.add_edges_from(self._sample_path.structure.edges) |
||||
return nx.difference(real_graph, self._complete_graph).edges |
||||
|
||||
def save_plot_estimated_structure_graph(self) -> None: |
||||
"""Plot the estimated structure in a graphical model style. |
||||
Spurious edges are colored in red. |
||||
""" |
||||
graph_to_draw = nx.DiGraph() |
||||
spurious_edges = self.spurious_edges() |
||||
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) |
||||
print(non_spurious_edges) |
||||
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] |
||||
graph_to_draw.add_edges_from(spurious_edges) |
||||
graph_to_draw.add_edges_from(non_spurious_edges) |
||||
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) |
||||
options = { |
||||
"node_size": 2000, |
||||
"node_color": "white", |
||||
"edgecolors": "black", |
||||
'linewidths':2, |
||||
"with_labels":True, |
||||
"font_size":13, |
||||
'connectionstyle': 'arc3, rad = 0.1', |
||||
"arrowsize": 15, |
||||
"arrowstyle": '<|-', |
||||
"width": 1, |
||||
"edge_color":edges_colors, |
||||
} |
||||
|
||||
nx.draw(graph_to_draw, pos, **options) |
||||
ax = plt.gca() |
||||
ax.margins(0.20) |
||||
plt.axis("off") |
||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
||||
name = name.split('.', 1)[0] |
||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
||||
name += '.png' |
||||
plt.savefig(name) |
||||
plt.clf() |
||||
print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) |
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,244 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
from networkx.readwrite import json_graph |
||||
|
||||
from random import choice |
||||
|
||||
import concurrent.futures |
||||
|
||||
import copy |
||||
|
||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
from .parameters_estimator import ParametersEstimator |
||||
from .structure_estimator import StructureEstimator |
||||
from ..structure_graph.sample_path import SamplePath |
||||
from ..structure_graph.structure import Structure |
||||
from .fam_score_calculator import FamScoreCalculator |
||||
from ..optimizers.hill_climbing_search import HillClimbing |
||||
from ..optimizers.tabu_search import TabuSearch |
||||
|
||||
|
||||
import multiprocessing |
||||
from multiprocessing import Pool |
||||
|
||||
|
||||
|
||||
|
||||
class StructureScoreBasedEstimator(StructureEstimator): |
||||
""" |
||||
Has the task of estimating the network structure given the trajectories in samplepath by |
||||
using a score based approach. |
||||
|
||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
||||
:type sample_path: SamplePath |
||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
||||
:type tau_xu: float, optional |
||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
||||
:type alpha_xu: float, optional |
||||
:param known_edges: List of known edges, default to [] |
||||
:type known_edges: List, optional |
||||
|
||||
""" |
||||
|
||||
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): |
||||
super().__init__(sample_path,known_edges) |
||||
self.tau_xu=tau_xu |
||||
self.alpha_xu=alpha_xu |
||||
|
||||
|
||||
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, |
||||
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None, |
||||
optimizer: str = 'tabu',disable_multiprocessing:bool= False ): |
||||
""" |
||||
Compute the score-based algorithm to find the optimal structure |
||||
|
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
||||
:type optimizer: string, optional |
||||
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False |
||||
:type disable_multiprocessing: Boolean, optional |
||||
""" |
||||
'Save the true edges structure in tuples' |
||||
true_edges = copy.deepcopy(self._sample_path.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
'Remove all the edges from the structure' |
||||
self._sample_path.structure.clean_structure_edges() |
||||
|
||||
estimate_parents = self.estimate_parents |
||||
|
||||
n_nodes= len(self._nodes) |
||||
|
||||
l_max_parents= [max_parents] * n_nodes |
||||
l_iterations_number = [iterations_number] * n_nodes |
||||
l_patience = [patience] * n_nodes |
||||
l_tabu_length = [tabu_length] * n_nodes |
||||
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes |
||||
l_optimizer = [optimizer] * n_nodes |
||||
|
||||
|
||||
'get the number of CPU' |
||||
cpu_count = multiprocessing.cpu_count() |
||||
print(f"CPU COUNT: {cpu_count}") |
||||
|
||||
if disable_multiprocessing: |
||||
cpu_count = 1 |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
||||
|
||||
'Estimate the best parents for each node' |
||||
if disable_multiprocessing: |
||||
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] |
||||
else: |
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
||||
list_edges_partial = executor.map(estimate_parents, |
||||
self._nodes, |
||||
l_max_parents, |
||||
l_iterations_number, |
||||
l_patience, |
||||
l_tabu_length, |
||||
l_tabu_rules_duration, |
||||
l_optimizer) |
||||
|
||||
|
||||
|
||||
#list_edges_partial = p.map(estimate_parents, self._nodes) |
||||
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) |
||||
|
||||
'Concatenate all the edges list' |
||||
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) |
||||
|
||||
#print('-------------------------') |
||||
|
||||
|
||||
'calculate precision and recall' |
||||
n_missing_edges = 0 |
||||
n_added_fake_edges = 0 |
||||
|
||||
try: |
||||
n_added_fake_edges = len(set_list_edges.difference(true_edges)) |
||||
|
||||
n_missing_edges = len(true_edges.difference(set_list_edges)) |
||||
|
||||
n_true_positive = len(true_edges) - n_missing_edges |
||||
|
||||
precision = n_true_positive / (n_true_positive + n_added_fake_edges) |
||||
|
||||
recall = n_true_positive / (n_true_positive + n_missing_edges) |
||||
|
||||
|
||||
# print(f"n archi reali non trovati: {n_missing_edges}") |
||||
# print(f"n archi non reali aggiunti: {n_added_fake_edges}") |
||||
print(true_edges) |
||||
print(set_list_edges) |
||||
print(f"precision: {precision} ") |
||||
print(f"recall: {recall} ") |
||||
except Exception as e: |
||||
print(f"errore: {e}") |
||||
|
||||
return set_list_edges |
||||
|
||||
|
||||
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, |
||||
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, |
||||
optimizer:str = 'hill' ): |
||||
""" |
||||
Use the FamScore of a node in order to find the best parent nodes |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
||||
:type optimizer: string, optional |
||||
|
||||
:return: A list of the best edges for the currente node |
||||
:rtype: List |
||||
""" |
||||
|
||||
"choose the optimizer algotithm" |
||||
if optimizer == 'tabu': |
||||
optimizer = TabuSearch( |
||||
node_id = node_id, |
||||
structure_estimator = self, |
||||
max_parents = max_parents, |
||||
iterations_number = iterations_number, |
||||
patience = patience, |
||||
tabu_length = tabu_length, |
||||
tabu_rules_duration = tabu_rules_duration) |
||||
else: #if optimizer == 'hill': |
||||
optimizer = HillClimbing( |
||||
node_id = node_id, |
||||
structure_estimator = self, |
||||
max_parents = max_parents, |
||||
iterations_number = iterations_number, |
||||
patience = patience) |
||||
|
||||
"call the optmizer's function that calculates the current node's parents" |
||||
return optimizer.optimize_structure() |
||||
|
||||
|
||||
def get_score_from_graph(self, |
||||
graph: NetworkGraph, |
||||
node_id:str): |
||||
""" |
||||
Get the FamScore of a node |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param graph: current graph to be computed |
||||
:type graph: class:'NetworkGraph' |
||||
|
||||
|
||||
:return: The FamSCore for this graph structure |
||||
:rtype: float |
||||
""" |
||||
|
||||
'inizialize the graph for a single node' |
||||
graph.fast_init(node_id) |
||||
|
||||
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) |
||||
|
||||
'Inizialize and compute parameters for node' |
||||
params_estimation.fast_init(node_id) |
||||
SoCims = params_estimation.compute_parameters_for_node(node_id) |
||||
|
||||
'calculate the FamScore for the node' |
||||
fam_score_obj = FamScoreCalculator() |
||||
|
||||
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) |
||||
|
||||
#print(f" lo score per {node_id} risulta: {score} ") |
||||
return score |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,4 @@ |
||||
from .optimizer import Optimizer |
||||
from .tabu_search import TabuSearch |
||||
from .hill_climbing_search import HillClimbing |
||||
from .constraint_based_optimizer import ConstraintBasedOptimizer |
@ -0,0 +1,87 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice |
||||
|
||||
from abc import ABC |
||||
|
||||
import copy |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
|
||||
class ConstraintBasedOptimizer(Optimizer): |
||||
""" |
||||
Optimizer class that implement a CTPC Algorithm |
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param tot_vars_count: number of variables in the dataset |
||||
:type tot_vars_count: int |
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
tot_vars_count:int |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.tot_vars_count = tot_vars_count |
||||
|
||||
|
||||
|
||||
def optimize_structure(self): |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a CTPC Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
print("##################TESTING VAR################", self.node_id) |
||||
|
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] |
||||
|
||||
for possible_parent in other_nodes: |
||||
graph.add_edges([(possible_parent,self.node_id)]) |
||||
|
||||
|
||||
u = other_nodes |
||||
#tests_parents_numb = len(u) |
||||
#complete_frame = self.complete_graph_frame |
||||
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])] |
||||
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) |
||||
b = 0 |
||||
while b < len(u): |
||||
parent_indx = 0 |
||||
while parent_indx < len(u): |
||||
removed = False |
||||
test_parent = u[parent_indx] |
||||
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) |
||||
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) |
||||
if self.structure_estimator._removable_edges_matrix[i][j]: |
||||
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) |
||||
for parents_set in S: |
||||
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): |
||||
graph.remove_edges([(test_parent, self.node_id)]) |
||||
u.remove(test_parent) |
||||
removed = True |
||||
break |
||||
if not removed: |
||||
parent_indx += 1 |
||||
b += 1 |
||||
self.structure_estimator._cache.clear() |
||||
return graph.edges |
@ -0,0 +1,135 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice |
||||
|
||||
from abc import ABC |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
|
||||
class HillClimbing(Optimizer): |
||||
""" |
||||
Optimizer class that implement Hill Climbing Search |
||||
|
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
|
||||
|
||||
|
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
max_parents:int = None, |
||||
iterations_number:int= 40, |
||||
patience:int = None |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.max_parents = max_parents |
||||
self.iterations_number = iterations_number |
||||
self.patience = patience |
||||
|
||||
|
||||
|
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
|
||||
#'Create the graph for the single node' |
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
'get the index for the current node' |
||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
||||
|
||||
'list of prior edges' |
||||
prior_parents = set() |
||||
|
||||
'Add the edges from prior knowledge' |
||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
||||
prior_parents.add(parent_id) |
||||
|
||||
'Add the node to the starting structure' |
||||
graph.add_edges([(parent_id, self.node_id)]) |
||||
|
||||
|
||||
|
||||
'get all the possible parents' |
||||
other_nodes = [node for node in |
||||
self.structure_estimator._sample_path.structure.nodes_labels if |
||||
node != self.node_id and |
||||
not prior_parents.__contains__(node)] |
||||
|
||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
patince_count = 0 |
||||
for i in range(self.iterations_number): |
||||
'choose a new random edge' |
||||
current_new_parent = choice(other_nodes) |
||||
current_edge = (current_new_parent,self.node_id) |
||||
added = False |
||||
parent_removed = None |
||||
|
||||
|
||||
if graph.has_edge(current_edge): |
||||
graph.remove_edges([current_edge]) |
||||
else: |
||||
'check the max_parents constraint' |
||||
if self.max_parents is not None: |
||||
parents_list = graph.get_parents_by_id(self.node_id) |
||||
if len(parents_list) >= self.max_parents : |
||||
parent_removed = (choice(parents_list), self.node_id) |
||||
graph.remove_edges([parent_removed]) |
||||
graph.add_edges([current_edge]) |
||||
added = True |
||||
#print('**************************') |
||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
if current_score > actual_best_score: |
||||
'update current best score' |
||||
actual_best_score = current_score |
||||
patince_count = 0 |
||||
else: |
||||
'undo the last update' |
||||
if added: |
||||
graph.remove_edges([current_edge]) |
||||
'If a parent was removed, add it again to the graph' |
||||
if parent_removed is not None: |
||||
graph.add_edges([parent_removed]) |
||||
else: |
||||
graph.add_edges([current_edge]) |
||||
'update patience count' |
||||
patince_count += 1 |
||||
|
||||
if self.patience is not None and patince_count > self.patience: |
||||
break |
||||
|
||||
print(f"finito variabile: {self.node_id}") |
||||
return graph.edges |
@ -0,0 +1,39 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
import abc |
||||
|
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
|
||||
|
||||
|
||||
class Optimizer(abc.ABC): |
||||
""" |
||||
Interface class for all the optimizer's child PyCTBN |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:param structure_estimator: A structureEstimator Object to predict the structure |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
|
||||
""" |
||||
|
||||
def __init__(self, node_id:str, structure_estimator: StructureEstimator): |
||||
self.node_id = node_id |
||||
self.structure_estimator = structure_estimator |
||||
|
||||
|
||||
@abc.abstractmethod |
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
pass |
@ -0,0 +1,199 @@ |
||||
|
||||
import itertools |
||||
import json |
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from random import choice,sample |
||||
|
||||
from abc import ABC |
||||
|
||||
|
||||
from .optimizer import Optimizer |
||||
from ..estimators.structure_estimator import StructureEstimator |
||||
from ..structure_graph.network_graph import NetworkGraph |
||||
|
||||
import queue |
||||
|
||||
|
||||
class TabuSearch(Optimizer): |
||||
""" |
||||
Optimizer class that implement Tabu Search |
||||
|
||||
|
||||
:param node_id: current node's id |
||||
:type node_id: string |
||||
:param structure_estimator: a structure estimator object with the information about the net |
||||
:type structure_estimator: class:'StructureEstimator' |
||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
||||
:type max_parents: int, optional |
||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
||||
:type iterations_number: int, optional |
||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
||||
:type patience: int, optional |
||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
||||
:type tabu_length: int, optional |
||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
||||
:type tabu_rules_duration: int, optional |
||||
|
||||
|
||||
""" |
||||
def __init__(self, |
||||
node_id:str, |
||||
structure_estimator: StructureEstimator, |
||||
max_parents:int = None, |
||||
iterations_number:int= 40, |
||||
patience:int = None, |
||||
tabu_length:int = None, |
||||
tabu_rules_duration = None |
||||
): |
||||
""" |
||||
Constructor |
||||
""" |
||||
super().__init__(node_id, structure_estimator) |
||||
self.max_parents = max_parents |
||||
self.iterations_number = iterations_number |
||||
self.patience = patience |
||||
self.tabu_length = tabu_length |
||||
self.tabu_rules_duration = tabu_rules_duration |
||||
|
||||
|
||||
def optimize_structure(self) -> typing.List: |
||||
""" |
||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
||||
|
||||
:return: the estimated structure for the node |
||||
:rtype: List |
||||
""" |
||||
print(f"tabu search is processing the structure of {self.node_id}") |
||||
|
||||
'Create the graph for the single node' |
||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
||||
|
||||
'get the index for the current node' |
||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
||||
|
||||
'list of prior edges' |
||||
prior_parents = set() |
||||
|
||||
'Add the edges from prior knowledge' |
||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
||||
prior_parents.add(parent_id) |
||||
|
||||
'Add the node to the starting structure' |
||||
graph.add_edges([(parent_id, self.node_id)]) |
||||
|
||||
|
||||
|
||||
'get all the possible parents' |
||||
other_nodes = set([node for node in |
||||
self.structure_estimator._sample_path.structure.nodes_labels if |
||||
node != self.node_id and |
||||
not prior_parents.__contains__(node)]) |
||||
|
||||
'calculate the score for the node without parents' |
||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
'initialize tabu_length and tabu_rules_duration if None' |
||||
if self.tabu_length is None: |
||||
self.tabu_length = len(other_nodes) |
||||
|
||||
if self.tabu_rules_duration is None: |
||||
self.tabu_tabu_rules_durationength = len(other_nodes) |
||||
|
||||
'inizialize the data structures' |
||||
tabu_set = set() |
||||
tabu_queue = queue.Queue() |
||||
|
||||
patince_count = 0 |
||||
tabu_count = 0 |
||||
for i in range(self.iterations_number): |
||||
|
||||
current_possible_nodes = other_nodes.difference(tabu_set) |
||||
|
||||
'choose a new random edge according to tabu restiction' |
||||
if(len(current_possible_nodes) > 0): |
||||
current_new_parent = sample(current_possible_nodes,k=1)[0] |
||||
else: |
||||
current_new_parent = tabu_queue.get() |
||||
tabu_set.remove(current_new_parent) |
||||
|
||||
|
||||
|
||||
current_edge = (current_new_parent,self.node_id) |
||||
added = False |
||||
parent_removed = None |
||||
|
||||
if graph.has_edge(current_edge): |
||||
graph.remove_edges([current_edge]) |
||||
else: |
||||
'check the max_parents constraint' |
||||
if self.max_parents is not None: |
||||
parents_list = graph.get_parents_by_id(self.node_id) |
||||
if len(parents_list) >= self.max_parents : |
||||
parent_removed = (choice(parents_list), self.node_id) |
||||
graph.remove_edges([parent_removed]) |
||||
graph.add_edges([current_edge]) |
||||
added = True |
||||
#print('**************************') |
||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
||||
|
||||
|
||||
# print("-------------------------------------------") |
||||
# print(f"Current new parent: {current_new_parent}") |
||||
# print(f"Current score: {current_score}") |
||||
# print(f"Current best score: {actual_best_score}") |
||||
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") |
||||
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") |
||||
# print(f"graph edges: {graph.edges}") |
||||
|
||||
# print("-------------------------------------------") |
||||
# input() |
||||
if current_score > actual_best_score: |
||||
'update current best score' |
||||
actual_best_score = current_score |
||||
patince_count = 0 |
||||
'update tabu list' |
||||
|
||||
|
||||
else: |
||||
'undo the last update' |
||||
if added: |
||||
graph.remove_edges([current_edge]) |
||||
'If a parent was removed, add it again to the graph' |
||||
if parent_removed is not None: |
||||
graph.add_edges([parent_removed]) |
||||
else: |
||||
graph.add_edges([current_edge]) |
||||
'update patience count' |
||||
patince_count += 1 |
||||
|
||||
|
||||
if tabu_queue.qsize() >= self.tabu_length: |
||||
current_removed = tabu_queue.get() |
||||
tabu_set.remove(current_removed) |
||||
'Add the node on the tabu list' |
||||
tabu_queue.put(current_new_parent) |
||||
tabu_set.add(current_new_parent) |
||||
|
||||
tabu_count += 1 |
||||
|
||||
'Every tabu_rules_duration step remove an item from the tabu list ' |
||||
if tabu_count % self.tabu_rules_duration == 0: |
||||
if tabu_queue.qsize() > 0: |
||||
current_removed = tabu_queue.get() |
||||
tabu_set.remove(current_removed) |
||||
tabu_count = 0 |
||||
else: |
||||
tabu_count = 0 |
||||
|
||||
if self.patience is not None and patince_count > self.patience: |
||||
break |
||||
|
||||
print(f"finito variabile: {self.node_id}") |
||||
return graph.edges |
@ -0,0 +1,6 @@ |
||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
from .network_graph import NetworkGraph |
||||
from .sample_path import SamplePath |
||||
from .set_of_cims import SetOfCims |
||||
from .structure import Structure |
||||
from .trajectory import Trajectory |
@ -0,0 +1,42 @@ |
||||
import numpy as np |
||||
|
||||
|
||||
class ConditionalIntensityMatrix(object): |
||||
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector |
||||
and state transition matrix and the actual CIM matrix. |
||||
|
||||
:param state_residence_times: state residence times vector |
||||
:type state_residence_times: numpy.array |
||||
:param state_transition_matrix: the transitions count matrix |
||||
:type state_transition_matrix: numpy.ndArray |
||||
:_cim: the actual cim of the node |
||||
""" |
||||
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): |
||||
"""Constructor Method |
||||
""" |
||||
self._state_residence_times = state_residence_times |
||||
self._state_transition_matrix = state_transition_matrix |
||||
self._cim = self.state_transition_matrix.astype(np.float64) |
||||
|
||||
def compute_cim_coefficients(self) -> None: |
||||
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. |
||||
The class member ``_cim`` will contain the computed cim |
||||
""" |
||||
np.fill_diagonal(self._cim, self._cim.diagonal() * -1) |
||||
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T |
||||
|
||||
@property |
||||
def state_residence_times(self) -> np.ndarray: |
||||
return self._state_residence_times |
||||
|
||||
@property |
||||
def state_transition_matrix(self) -> np.ndarray: |
||||
return self._state_transition_matrix |
||||
|
||||
@property |
||||
def cim(self) -> np.ndarray: |
||||
return self._cim |
||||
|
||||
def __repr__(self): |
||||
return 'CIM:\n' + str(self.cim) |
||||
|
@ -0,0 +1,293 @@ |
||||
|
||||
import typing |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
|
||||
from .structure import Structure |
||||
|
||||
|
||||
class NetworkGraph(object): |
||||
"""Abstracts the infos contained in the Structure class in the form of a directed graph. |
||||
Has the task of creating all the necessary filtering and indexing structures for parameters estimation |
||||
|
||||
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted |
||||
:type graph_struct: Structure |
||||
:_graph: directed graph |
||||
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos |
||||
about every parents of the node of which all the indexing and filtering structures will be constructed. |
||||
:_time_scalar_indexing_structure: the indexing structure for state res time estimation |
||||
:_transition_scalar_indexing_structure: the indexing structure for transition computation |
||||
:_time_filtering: the columns filtering structure used in the computation of the state res times |
||||
:_transition_filtering: the columns filtering structure used in the computation of the transition |
||||
from one state to another |
||||
:_p_combs_structure: all the possible parents states combination for the node of interest |
||||
""" |
||||
|
||||
def __init__(self, graph_struct: Structure): |
||||
"""Constructor Method |
||||
""" |
||||
self._graph_struct = graph_struct |
||||
self._graph = nx.DiGraph() |
||||
self._aggregated_info_about_nodes_parents = None |
||||
self._time_scalar_indexing_structure = None |
||||
self._transition_scalar_indexing_structure = None |
||||
self._time_filtering = None |
||||
self._transition_filtering = None |
||||
self._p_combs_structure = None |
||||
|
||||
def init_graph(self): |
||||
self.add_nodes(self._nodes_labels) |
||||
self.add_edges(self.graph_struct.edges) |
||||
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() |
||||
self._fancy_indexing = self.build_fancy_indexing_structure(0) |
||||
self.build_scalar_indexing_structures() |
||||
self.build_time_columns_filtering_structure() |
||||
self.build_transition_columns_filtering_structure() |
||||
self._p_combs_structure = self.build_p_combs_structure() |
||||
|
||||
def fast_init(self, node_id: str) -> None: |
||||
"""Initializes all the necessary structures for parameters estimation of the node identified by the label |
||||
node_id |
||||
|
||||
:param node_id: the label of the node |
||||
:type node_id: string |
||||
""" |
||||
self.add_nodes(self._graph_struct.nodes_labels) |
||||
self.add_edges(self._graph_struct.edges) |
||||
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) |
||||
p_indxs = self._aggregated_info_about_nodes_parents[1] |
||||
p_vals = self._aggregated_info_about_nodes_parents[2] |
||||
node_states = self.get_states_number(node_id) |
||||
node_indx = self.get_node_indx(node_id) |
||||
cols_number = self._graph_struct.total_variables_number |
||||
self._time_scalar_indexing_structure = NetworkGraph.\ |
||||
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) |
||||
self._transition_scalar_indexing_structure = NetworkGraph.\ |
||||
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) |
||||
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) |
||||
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) |
||||
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) |
||||
|
||||
def add_nodes(self, list_of_nodes: typing.List) -> None: |
||||
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. |
||||
Sets all the properties that identify a nodes (index, positional index, cardinality) |
||||
|
||||
:param list_of_nodes: the nodes to add to ``_graph`` |
||||
:type list_of_nodes: List |
||||
""" |
||||
nodes_indxs = self._graph_struct.nodes_indexes |
||||
nodes_vals = self._graph_struct.nodes_values |
||||
pos = 0 |
||||
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): |
||||
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) |
||||
pos += 1 |
||||
|
||||
def has_edge(self,edge:tuple)-> bool: |
||||
""" |
||||
Check if the graph contains a specific edge |
||||
|
||||
Parameters: |
||||
edge: a tuple that rappresents the edge |
||||
Returns: |
||||
bool |
||||
""" |
||||
return self._graph.has_edge(edge[0],edge[1]) |
||||
|
||||
def add_edges(self, list_of_edges: typing.List) -> None: |
||||
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``. |
||||
|
||||
:param list_of_edges: the list containing of tuples containing the edges |
||||
:type list_of_edges: List |
||||
""" |
||||
self._graph.add_edges_from(list_of_edges) |
||||
|
||||
def remove_node(self, node_id: str) -> None: |
||||
"""Remove the node ``node_id`` from all the class members. |
||||
Initialize all the filtering/indexing structures. |
||||
""" |
||||
self._graph.remove_node(node_id) |
||||
self._graph_struct.remove_node(node_id) |
||||
self.clear_indexing_filtering_structures() |
||||
|
||||
def clear_indexing_filtering_structures(self) -> None: |
||||
"""Initialize all the filtering/indexing structures. |
||||
""" |
||||
self._aggregated_info_about_nodes_parents = None |
||||
self._time_scalar_indexing_structure = None |
||||
self._transition_scalar_indexing_structure = None |
||||
self._time_filtering = None |
||||
self._transition_filtering = None |
||||
self._p_combs_structure = None |
||||
|
||||
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: |
||||
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely |
||||
(parents_labels, parents_indexes, parents_cardinalities). |
||||
|
||||
:param node: the label of the node |
||||
:type node: string |
||||
:return: a tuple containing all the parent set infos |
||||
:rtype: Tuple |
||||
""" |
||||
parents = self.get_parents_by_id(node) |
||||
nodes = self._graph_struct.nodes_labels |
||||
d = {v: i for i, v in enumerate(nodes)} |
||||
sorted_parents = sorted(parents, key=lambda v: d[v]) |
||||
get_node_indx = self.get_node_indx |
||||
p_indxes = [get_node_indx(node) for node in sorted_parents] |
||||
p_values = [self.get_states_number(node) for node in sorted_parents] |
||||
return sorted_parents, p_indxes, p_values |
||||
|
||||
def remove_edges(self, list_of_edges: typing.List) -> None: |
||||
"""Remove the edges to the graph contained in the list list_of_edges. |
||||
|
||||
:param list_of_edges: The edges to remove from the graph |
||||
:type list_of_edges: List |
||||
""" |
||||
self._graph.remove_edges_from(list_of_edges) |
||||
|
||||
@staticmethod |
||||
def build_time_scalar_indexing_structure_for_a_node(node_states: int, |
||||
parents_vals: typing.List) -> np.ndarray: |
||||
"""Builds an indexing structure for the computation of state residence times values. |
||||
|
||||
:param node_states: the node cardinality |
||||
:type node_states: int |
||||
:param parents_vals: the caridinalites of the node's parents |
||||
:type parents_vals: List |
||||
:return: The time indexing structure |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
T_vector = np.array([node_states]) |
||||
T_vector = np.append(T_vector, parents_vals) |
||||
T_vector = T_vector.cumprod().astype(np.int) |
||||
return T_vector |
||||
|
||||
@staticmethod |
||||
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ |
||||
-> np.ndarray: |
||||
"""Builds an indexing structure for the computation of state transitions values. |
||||
|
||||
:param node_states_number: the node cardinality |
||||
:type node_states_number: int |
||||
:param parents_vals: the caridinalites of the node's parents |
||||
:type parents_vals: List |
||||
:return: The transition indexing structure |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
M_vector = np.array([node_states_number, |
||||
node_states_number]) |
||||
M_vector = np.append(M_vector, parents_vals) |
||||
M_vector = M_vector.cumprod().astype(np.int) |
||||
return M_vector |
||||
|
||||
@staticmethod |
||||
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: |
||||
""" |
||||
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
||||
in the dataset. |
||||
This structute will be used in the computation of the state res times. |
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param p_indxs: the indexes of the node's parents |
||||
:type p_indxs: List |
||||
:return: The filtering structure for times estimation |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) |
||||
|
||||
@staticmethod |
||||
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ |
||||
-> np.ndarray: |
||||
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
||||
in the dataset. |
||||
This structure will be used in the computation of the state transitions values. |
||||
:param node_indx: the index of the node |
||||
:type node_indx: int |
||||
:param p_indxs: the indexes of the node's parents |
||||
:type p_indxs: List |
||||
:param nodes_number: the total number of nodes in the dataset |
||||
:type nodes_number: int |
||||
:return: The filtering structure for transitions estimation |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) |
||||
|
||||
@staticmethod |
||||
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: |
||||
""" |
||||
Builds the combinatorial structure that contains the combinations of all the values contained in |
||||
``parents_values``. |
||||
|
||||
:param parents_values: the cardinalities of the nodes |
||||
:type parents_values: List |
||||
:return: A numpy matrix containing a grid of the combinations |
||||
:rtype: numpy.ndArray |
||||
""" |
||||
tmp = [] |
||||
for val in parents_values: |
||||
tmp.append([x for x in range(val)]) |
||||
if len(parents_values) > 0: |
||||
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) |
||||
if len(parents_values) > 1: |
||||
tmp_comb = parents_comb[:, 1].copy() |
||||
parents_comb[:, 1] = parents_comb[:, 0].copy() |
||||
parents_comb[:, 0] = tmp_comb |
||||
else: |
||||
parents_comb = np.array([[]], dtype=np.int) |
||||
return parents_comb |
||||
|
||||
def get_parents_by_id(self, node_id) -> typing.List: |
||||
"""Returns a list of labels of the parents of the node ``node_id`` |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: a List of labels of the parents |
||||
:rtype: List |
||||
""" |
||||
return list(self._graph.predecessors(node_id)) |
||||
|
||||
def get_states_number(self, node_id) -> int: |
||||
return self._graph.nodes[node_id]['val'] |
||||
|
||||
def get_node_indx(self, node_id) -> int: |
||||
return nx.get_node_attributes(self._graph, 'indx')[node_id] |
||||
|
||||
def get_positional_node_indx(self, node_id) -> int: |
||||
return self._graph.nodes[node_id]['pos_indx'] |
||||
|
||||
@property |
||||
def nodes(self) -> typing.List: |
||||
return self._graph_struct.nodes_labels |
||||
|
||||
@property |
||||
def edges(self) -> typing.List: |
||||
return list(self._graph.edges) |
||||
|
||||
@property |
||||
def nodes_indexes(self) -> np.ndarray: |
||||
return self._graph_struct.nodes_indexes |
||||
|
||||
@property |
||||
def nodes_values(self) -> np.ndarray: |
||||
return self._graph_struct.nodes_values |
||||
|
||||
@property |
||||
def time_scalar_indexing_strucure(self) -> np.ndarray: |
||||
return self._time_scalar_indexing_structure |
||||
|
||||
@property |
||||
def time_filtering(self) -> np.ndarray: |
||||
return self._time_filtering |
||||
|
||||
@property |
||||
def transition_scalar_indexing_structure(self) -> np.ndarray: |
||||
return self._transition_scalar_indexing_structure |
||||
|
||||
@property |
||||
def transition_filtering(self) -> np.ndarray: |
||||
return self._transition_filtering |
||||
|
||||
@property |
||||
def p_combs(self) -> np.ndarray: |
||||
return self._p_combs_structure |
@ -0,0 +1,91 @@ |
||||
|
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
from .structure import Structure |
||||
from .trajectory import Trajectory |
||||
from ..utility.abstract_importer import AbstractImporter |
||||
|
||||
|
||||
|
||||
class SamplePath(object): |
||||
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables |
||||
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will |
||||
contain the mentioned data. |
||||
|
||||
:param importer: the Importer object which contains the imported and processed data |
||||
:type importer: AbstractImporter |
||||
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories |
||||
:_structure: the ``Structure`` Object that will contain all the structural infos about the net |
||||
:_total_variables_count: the number of variables in the net |
||||
""" |
||||
def __init__(self, importer: AbstractImporter): |
||||
"""Constructor Method |
||||
""" |
||||
self._importer = importer |
||||
if self._importer._df_variables is None or self._importer._concatenated_samples is None: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if self._importer._df_variables.empty: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if isinstance(self._importer._concatenated_samples, pd.DataFrame): |
||||
if self._importer._concatenated_samples.empty: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
if isinstance(self._importer._concatenated_samples, np.ndarray): |
||||
if self._importer._concatenated_samples.size == 0: |
||||
raise RuntimeError('The importer object has to contain the all processed data!') |
||||
self._trajectories = None |
||||
self._structure = None |
||||
self._total_variables_count = None |
||||
|
||||
def build_trajectories(self) -> None: |
||||
"""Builds the Trajectory object that will contain all the trajectories. |
||||
Clears all the unused dataframes in ``_importer`` Object |
||||
""" |
||||
self._trajectories = \ |
||||
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), |
||||
len(self._importer.sorter) + 1) |
||||
self._importer.clear_concatenated_frame() |
||||
|
||||
def build_structure(self) -> None: |
||||
""" |
||||
Builds the ``Structure`` object that aggregates all the infos about the net. |
||||
""" |
||||
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): |
||||
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") |
||||
|
||||
self._total_variables_count = len(self._importer.sorter) |
||||
labels = self._importer.variables.iloc[:, 0].to_list() |
||||
indxs = self._importer.variables.index.to_numpy() |
||||
vals = self._importer.variables.iloc[:, 1].to_numpy() |
||||
if self._importer.structure is None or self._importer.structure.empty: |
||||
edges = [] |
||||
else: |
||||
edges = list(self._importer.structure.to_records(index=False)) |
||||
self._structure = Structure(labels, indxs, vals, edges, |
||||
self._total_variables_count) |
||||
|
||||
def clear_memory(self): |
||||
self._importer._raw_data = [] |
||||
|
||||
@property |
||||
def trajectories(self) -> Trajectory: |
||||
return self._trajectories |
||||
|
||||
@property |
||||
def structure(self) -> Structure: |
||||
return self._structure |
||||
|
||||
@property |
||||
def total_variables_count(self) -> int: |
||||
return self._total_variables_count |
||||
|
||||
@property |
||||
def has_prior_net_structure(self) -> bool: |
||||
return bool(self._structure.edges) |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,97 @@ |
||||
|
||||
|
||||
import typing |
||||
|
||||
import numpy as np |
||||
|
||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
|
||||
|
||||
class SetOfCims(object): |
||||
"""Aggregates all the CIMS of the node identified by the label _node_id. |
||||
|
||||
:param node_id: the node label |
||||
:type node_ind: string |
||||
:param parents_states_number: the cardinalities of the parents |
||||
:type parents_states_number: List |
||||
:param node_states_number: the caridinality of the node |
||||
:type node_states_number: int |
||||
:param p_combs: the p_comb structure bound to this node |
||||
:type p_combs: numpy.ndArray |
||||
:_state_residence_time: matrix containing all the state residence time vectors for the node |
||||
:_transition_matrices: matrix containing all the transition matrices for the node |
||||
:_actual_cims: the cims of the node |
||||
""" |
||||
|
||||
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): |
||||
"""Constructor Method |
||||
""" |
||||
self._node_id = node_id |
||||
self._parents_states_number = parents_states_number |
||||
self._node_states_number = node_states_number |
||||
self._actual_cims = [] |
||||
self._state_residence_times = None |
||||
self._transition_matrices = None |
||||
self._p_combs = p_combs |
||||
self.build_times_and_transitions_structures() |
||||
|
||||
def build_times_and_transitions_structures(self) -> None: |
||||
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices. |
||||
""" |
||||
if not self._parents_states_number: |
||||
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) |
||||
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) |
||||
else: |
||||
self._state_residence_times = \ |
||||
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) |
||||
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, |
||||
self._node_states_number], dtype=np.int) |
||||
|
||||
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: |
||||
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. |
||||
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. |
||||
|
||||
:param state_res_times: the state residence times matrix |
||||
:type state_res_times: numpy.ndArray |
||||
:param transition_matrices: the transition matrices |
||||
:type transition_matrices: numpy.ndArray |
||||
""" |
||||
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): |
||||
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) |
||||
cim_to_add.compute_cim_coefficients() |
||||
self._actual_cims.append(cim_to_add) |
||||
self._actual_cims = np.array(self._actual_cims) |
||||
self._transition_matrices = None |
||||
self._state_residence_times = None |
||||
|
||||
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: |
||||
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index |
||||
``comb``. |
||||
|
||||
:param mask_arr: the boolean mask that indicates which parent to consider |
||||
:type mask_arr: numpy.array |
||||
:param comb: the state/s of the filtered parents |
||||
:type comb: numpy.array |
||||
:return: Array of ``ConditionalIntensityMatrix`` objects |
||||
:rtype: numpy.array |
||||
""" |
||||
if mask_arr.size <= 1: |
||||
return self._actual_cims |
||||
else: |
||||
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() |
||||
return self._actual_cims[flat_indxs] |
||||
|
||||
@property |
||||
def actual_cims(self) -> np.ndarray: |
||||
return self._actual_cims |
||||
|
||||
@property |
||||
def p_combs(self) -> np.ndarray: |
||||
return self._p_combs |
||||
|
||||
def get_cims_number(self): |
||||
return len(self._actual_cims) |
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,124 @@ |
||||
|
||||
import typing as ty |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
class Structure(object): |
||||
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) |
||||
|
||||
:param nodes_labels_list: the symbolic names of the variables |
||||
:type nodes_labels_list: List |
||||
:param nodes_indexes_arr: the indexes of the nodes |
||||
:type nodes_indexes_arr: numpy.ndArray |
||||
:param nodes_vals_arr: the cardinalites of the nodes |
||||
:type nodes_vals_arr: numpy.ndArray |
||||
:param edges_list: the edges of the network |
||||
:type edges_list: List |
||||
:param total_variables_number: the total number of variables in the dataset |
||||
:type total_variables_number: int |
||||
""" |
||||
|
||||
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, |
||||
edges_list: ty.List, total_variables_number: int): |
||||
"""Constructor Method |
||||
""" |
||||
self._nodes_labels_list = nodes_labels_list |
||||
self._nodes_indexes_arr = nodes_indexes_arr |
||||
self._nodes_vals_arr = nodes_vals_arr |
||||
self._edges_list = edges_list |
||||
self._total_variables_number = total_variables_number |
||||
|
||||
def remove_node(self, node_id: str) -> None: |
||||
"""Remove the node ``node_id`` from all the class members. |
||||
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. |
||||
""" |
||||
node_positional_indx = self._nodes_labels_list.index(node_id) |
||||
del self._nodes_labels_list[node_positional_indx] |
||||
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) |
||||
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) |
||||
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id |
||||
and to_node != node_id)] |
||||
|
||||
@property |
||||
def edges(self) -> ty.List: |
||||
return self._edges_list |
||||
|
||||
@property |
||||
def nodes_labels(self) -> ty.List: |
||||
return self._nodes_labels_list |
||||
|
||||
@property |
||||
def nodes_indexes(self) -> np.ndarray: |
||||
return self._nodes_indexes_arr |
||||
|
||||
@property |
||||
def nodes_values(self) -> np.ndarray: |
||||
return self._nodes_vals_arr |
||||
|
||||
@property |
||||
def total_variables_number(self) -> int: |
||||
return self._total_variables_number |
||||
|
||||
def get_node_id(self, node_indx: int) -> str: |
||||
"""Given the ``node_index`` returns the node label. |
||||
|
||||
:param node_indx: the node index |
||||
:type node_indx: int |
||||
:return: the node label |
||||
:rtype: string |
||||
""" |
||||
return self._nodes_labels_list[node_indx] |
||||
|
||||
def clean_structure_edges(self): |
||||
self._edges_list = list() |
||||
|
||||
def add_edge(self,edge: tuple): |
||||
self._edges_list.append(tuple) |
||||
print(self._edges_list) |
||||
|
||||
def remove_edge(self,edge: tuple): |
||||
self._edges_list.remove(tuple) |
||||
|
||||
def contains_edge(self,edge:tuple) -> bool: |
||||
return edge in self._edges_list |
||||
|
||||
def get_node_indx(self, node_id: str) -> int: |
||||
"""Given the ``node_index`` returns the node label. |
||||
|
||||
:param node_id: the node label |
||||
:type node_id: string |
||||
:return: the node index |
||||
:rtype: int |
||||
""" |
||||
pos_indx = self._nodes_labels_list.index(node_id) |
||||
return self._nodes_indexes_arr[pos_indx] |
||||
|
||||
def get_positional_node_indx(self, node_id: str) -> int: |
||||
return self._nodes_labels_list.index(node_id) |
||||
|
||||
def get_states_number(self, node: str) -> int: |
||||
"""Given the node label ``node`` returns the cardinality of the node. |
||||
|
||||
:param node: the node label |
||||
:type node: string |
||||
:return: the node cardinality |
||||
:rtype: int |
||||
""" |
||||
pos_indx = self._nodes_labels_list.index(node) |
||||
return self._nodes_vals_arr[pos_indx] |
||||
|
||||
def __repr__(self): |
||||
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ |
||||
"\nEdges: \n" + str(self._edges_list) |
||||
|
||||
def __eq__(self, other): |
||||
"""Overrides the default implementation""" |
||||
if isinstance(other, Structure): |
||||
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ |
||||
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ |
||||
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ |
||||
self._edges_list == other._edges_list |
||||
|
||||
return False |
||||
|
@ -0,0 +1,45 @@ |
||||
|
||||
import typing |
||||
|
||||
import numpy as np |
||||
|
||||
|
||||
class Trajectory(object): |
||||
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles |
||||
(the time deltas) and a numpy matrix of ints (the changes of states). |
||||
|
||||
:param list_of_columns: the list containing the times array and values matrix |
||||
:type list_of_columns: List |
||||
:param original_cols_number: total number of cols in the data |
||||
:type original_cols_number: int |
||||
:_actual_trajectory: the trajectory containing also the duplicated/shifted values |
||||
:_times: the array containing the time deltas |
||||
""" |
||||
|
||||
def __init__(self, list_of_columns: typing.List, original_cols_number: int): |
||||
"""Constructor Method |
||||
""" |
||||
self._times = list_of_columns[0] |
||||
self._actual_trajectory = list_of_columns[1] |
||||
self._original_cols_number = original_cols_number |
||||
|
||||
@property |
||||
def trajectory(self) -> np.ndarray: |
||||
return self._actual_trajectory[:, :self._original_cols_number - 1] |
||||
|
||||
@property |
||||
def complete_trajectory(self) -> np.ndarray: |
||||
return self._actual_trajectory |
||||
|
||||
@property |
||||
def times(self): |
||||
return self._times |
||||
|
||||
def size(self): |
||||
return self._actual_trajectory.shape[0] |
||||
|
||||
def __repr__(self): |
||||
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \ |
||||
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__() |
||||
|
||||
|
@ -0,0 +1,4 @@ |
||||
from .abstract_importer import AbstractImporter |
||||
from .cache import Cache |
||||
from .json_importer import JsonImporter |
||||
from .sample_importer import SampleImporter |
@ -0,0 +1,164 @@ |
||||
|
||||
import typing |
||||
from abc import ABC, abstractmethod |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
import copy |
||||
|
||||
#from sklearn.utils import resample |
||||
|
||||
|
||||
class AbstractImporter(ABC): |
||||
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure. |
||||
|
||||
:param file_path: the file path, or dataset name if you import already processed data |
||||
:type file_path: str |
||||
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories |
||||
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray] |
||||
:param variables: Dataframe containing the nodes labels and cardinalities |
||||
:type variables: pandas.DataFrame |
||||
:prior_net_structure: Dataframe containing the structure of the network (edges) |
||||
:type prior_net_structure: pandas.DataFrame |
||||
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` |
||||
|
||||
.. warning:: |
||||
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed |
||||
as Pandas Dataframes with the following structure: |
||||
Header of _df_structure = [From_Node | To_Node] |
||||
Header of _df_variables = [Variable_Label | Variable_Cardinality] |
||||
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. |
||||
|
||||
.. note:: |
||||
See :class:``JsonImporter`` for an example implementation |
||||
|
||||
""" |
||||
|
||||
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None, |
||||
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): |
||||
"""Constructor |
||||
""" |
||||
self._file_path = file_path |
||||
self._df_samples_list = trajectory_list |
||||
self._concatenated_samples = [] |
||||
self._df_variables = variables |
||||
self._df_structure = prior_net_structure |
||||
self._sorter = None |
||||
super().__init__() |
||||
|
||||
@abstractmethod |
||||
def build_sorter(self, trajecory_header: object) -> typing.List: |
||||
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame |
||||
and keeping ONLY the variables symbolic labels, cutting out the time label in the header. |
||||
|
||||
:param trajecory_header: an object that will be used to define the header |
||||
:type trajecory_header: object |
||||
:return: A list containing the processed header. |
||||
:rtype: List |
||||
""" |
||||
pass |
||||
|
||||
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, |
||||
columns_header: typing.List, shifted_cols_header: typing.List) \ |
||||
-> pd.DataFrame: |
||||
"""Computes the difference between each value present in th time column. |
||||
Copies and shift by one position up all the values present in the remaining columns. |
||||
|
||||
:param sample_frame: the traj to be processed |
||||
:type sample_frame: pandas.Dataframe |
||||
:param columns_header: the original header of sample_frame |
||||
:type columns_header: List |
||||
:param shifted_cols_header: a copy of columns_header with changed names of the contents |
||||
:type shifted_cols_header: List |
||||
:return: The processed dataframe |
||||
:rtype: pandas.Dataframe |
||||
|
||||
.. warning:: |
||||
the Dataframe ``sample_frame`` has to follow the column structure of this header: |
||||
Header of sample_frame = [Time | Variable values] |
||||
""" |
||||
sample_frame = copy.deepcopy(sample_frame) |
||||
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) |
||||
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') |
||||
shifted_cols.columns = shifted_cols_header |
||||
sample_frame = sample_frame.assign(**shifted_cols) |
||||
sample_frame.drop(sample_frame.tail(1).index, inplace=True) |
||||
return sample_frame |
||||
|
||||
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: |
||||
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list |
||||
``df_samples_list``. |
||||
Concatenates the result in the dataframe ``concatanated_samples`` |
||||
|
||||
:param df_samples_list: the datframe's list to be processed and concatenated |
||||
:type df_samples_list: List |
||||
|
||||
.. warning:: |
||||
The Dataframe sample_frame has to follow the column structure of this header: |
||||
Header of sample_frame = [Time | Variable values] |
||||
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) |
||||
.. note:: |
||||
After the call of this method the class member ``concatanated_samples`` will contain all processed |
||||
and merged trajectories |
||||
""" |
||||
if not self._sorter: |
||||
raise RuntimeError("The class member self._sorter has to be INITIALIZED!") |
||||
shifted_cols_header = [s + "S" for s in self._sorter] |
||||
compute_row_delta = self.compute_row_delta_sigle_samples_frame |
||||
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) |
||||
for sample in df_samples_list] |
||||
self._concatenated_samples = pd.concat(proc_samples_list) |
||||
|
||||
complete_header = self._sorter[:] |
||||
complete_header.insert(0,'Time') |
||||
complete_header.extend(shifted_cols_header) |
||||
self._concatenated_samples = self._concatenated_samples[complete_header] |
||||
|
||||
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List: |
||||
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix |
||||
|
||||
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted |
||||
and converted |
||||
:type concatenated_sample: pandas.Dataframe |
||||
:return: the resulting list of numpy arrays |
||||
:rtype: List |
||||
""" |
||||
|
||||
concatenated_array = concatenated_sample.to_numpy() |
||||
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] |
||||
|
||||
return columns_list |
||||
|
||||
def clear_concatenated_frame(self) -> None: |
||||
"""Removes all values in the dataframe concatenated_samples. |
||||
""" |
||||
if isinstance(self._concatenated_samples, pd.DataFrame): |
||||
self._concatenated_samples = self._concatenated_samples.iloc[0:0] |
||||
|
||||
@abstractmethod |
||||
def dataset_id(self) -> object: |
||||
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current |
||||
dataset |
||||
""" |
||||
pass |
||||
|
||||
@property |
||||
def concatenated_samples(self) -> pd.DataFrame: |
||||
return self._concatenated_samples |
||||
|
||||
@property |
||||
def variables(self) -> pd.DataFrame: |
||||
return self._df_variables |
||||
|
||||
@property |
||||
def structure(self) -> pd.DataFrame: |
||||
return self._df_structure |
||||
|
||||
@property |
||||
def sorter(self) -> typing.List: |
||||
return self._sorter |
||||
|
||||
@property |
||||
def file_path(self) -> str: |
||||
return self._file_path |
@ -0,0 +1,58 @@ |
||||
|
||||
import typing |
||||
|
||||
from ..structure_graph.set_of_cims import SetOfCims |
||||
|
||||
|
||||
class Cache: |
||||
"""This class acts as a cache of ``SetOfCims`` objects for a node. |
||||
|
||||
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME |
||||
index is related |
||||
:__actual_cache: a list of setOfCims objects |
||||
""" |
||||
|
||||
def __init__(self): |
||||
"""Constructor Method |
||||
""" |
||||
self._list_of_sets_of_parents = [] |
||||
self._actual_cache = [] |
||||
|
||||
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] |
||||
""" |
||||
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims`` |
||||
related to that ``parents_comb``. |
||||
|
||||
:param parents_comb: the parents related to that ``SetOfCims`` |
||||
:type parents_comb: Set |
||||
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``. |
||||
None otherwise. |
||||
:rtype: SetOfCims |
||||
""" |
||||
try: |
||||
#print("Cache State:", self.list_of_sets_of_indxs) |
||||
#print("Look For:", parents_comb) |
||||
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] |
||||
#print("CACHE HIT!!!!", parents_comb) |
||||
return result |
||||
except ValueError: |
||||
return None |
||||
|
||||
def put(self, parents_comb: typing.Set, socim: SetOfCims): |
||||
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in |
||||
``__list_of_sets_of_parents``. |
||||
|
||||
:param parents_comb: the symbolic set index |
||||
:type parents_comb: Set |
||||
:param socim: the related SetOfCims object |
||||
:type socim: SetOfCims |
||||
""" |
||||
#print("Putting in cache:", parents_comb) |
||||
self._list_of_sets_of_parents.append(parents_comb) |
||||
self._actual_cache.append(socim) |
||||
|
||||
def clear(self): |
||||
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``. |
||||
""" |
||||
del self._list_of_sets_of_parents[:] |
||||
del self._actual_cache[:] |
@ -0,0 +1,176 @@ |
||||
import json |
||||
import typing |
||||
|
||||
import pandas as pd |
||||
|
||||
|
||||
from .abstract_importer import AbstractImporter |
||||
|
||||
|
||||
class JsonImporter(AbstractImporter): |
||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
||||
the data in json extension. |
||||
|
||||
:param file_path: the path of the file that contains tha data to be imported |
||||
:type file_path: string |
||||
:param samples_label: the reference key for the samples in the trajectories |
||||
:type samples_label: string |
||||
:param structure_label: the reference key for the structure of the network data |
||||
:type structure_label: string |
||||
:param variables_label: the reference key for the cardinalites of the nodes data |
||||
:type variables_label: string |
||||
:param time_key: the key used to identify the timestamps in each trajectory |
||||
:type time_key: string |
||||
:param variables_key: the key used to identify the names of the variables in the net |
||||
:type variables_key: string |
||||
:_array_indx: the index of the outer JsonArray to extract the data from |
||||
:type _array_indx: int |
||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
||||
:_raw_data: The raw contents of the json file to import |
||||
:type _raw_data: List |
||||
""" |
||||
|
||||
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, |
||||
variables_key: str): |
||||
"""Constructor method |
||||
|
||||
.. note:: |
||||
This constructor calls also the method ``read_json_file()``, so after the construction of the object |
||||
the class member ``_raw_data`` will contain the raw imported json data. |
||||
|
||||
""" |
||||
self._samples_label = samples_label |
||||
self._structure_label = structure_label |
||||
self._variables_label = variables_label |
||||
self._time_key = time_key |
||||
self._variables_key = variables_key |
||||
self._df_samples_list = None |
||||
self._array_indx = None |
||||
super(JsonImporter, self).__init__(file_path) |
||||
self._raw_data = self.read_json_file() |
||||
|
||||
def import_data(self, indx: int) -> None: |
||||
"""Implements the abstract method of :class:`AbstractImporter`. |
||||
|
||||
:param indx: the index of the outer JsonArray to extract the data from |
||||
:type indx: int |
||||
""" |
||||
self._array_indx = indx |
||||
self._df_samples_list = self.import_trajectories(self._raw_data) |
||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
||||
self.compute_row_delta_in_all_samples_frames(self._df_samples_list) |
||||
self.clear_data_frame_list() |
||||
self._df_structure = self.import_structure(self._raw_data) |
||||
self._df_variables = self.import_variables(self._raw_data) |
||||
|
||||
def import_trajectories(self, raw_data: typing.List) -> typing.List: |
||||
"""Imports the trajectories from the list of dicts ``raw_data``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: List of dataframes containing all the trajectories |
||||
:rtype: List |
||||
""" |
||||
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) |
||||
|
||||
def import_structure(self, raw_data: typing.List) -> pd.DataFrame: |
||||
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: Dataframe containg the starting node a ending node of every arc of the network |
||||
:rtype: pandas.Dataframe |
||||
""" |
||||
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) |
||||
|
||||
def import_variables(self, raw_data: typing.List) -> pd.DataFrame: |
||||
"""Imports the data in ``raw_data`` at the key ``_variables_label``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:return: Datframe containg the variables simbolic labels and their cardinalities |
||||
:rtype: pandas.Dataframe |
||||
""" |
||||
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) |
||||
|
||||
def read_json_file(self) -> typing.List: |
||||
"""Reads the JSON file in the path self.filePath. |
||||
|
||||
:return: The contents of the json file |
||||
:rtype: List |
||||
""" |
||||
with open(self._file_path) as f: |
||||
data = json.load(f) |
||||
return data |
||||
|
||||
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: |
||||
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param key: the key for the Dicts from which exctract data |
||||
:type key: string |
||||
:return: A normalized dataframe |
||||
:rtype: pandas.Datframe |
||||
""" |
||||
return pd.DataFrame(raw_data[indx][key]) |
||||
|
||||
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: |
||||
""" |
||||
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param trajectories_key: the key of the trajectories objects |
||||
:type trajectories_key: string |
||||
:return: A list of daframes containg the trajectories |
||||
:rtype: List |
||||
""" |
||||
dataframe = pd.DataFrame |
||||
smps = raw_data[indx][trajectories_key] |
||||
df_samples_list = [dataframe(sample) for sample in smps] |
||||
return df_samples_list |
||||
|
||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. |
||||
""" |
||||
columns_header = list(sample_frame.columns.values) |
||||
columns_header.remove(self._time_key) |
||||
return columns_header |
||||
|
||||
def clear_data_frame_list(self) -> None: |
||||
"""Removes all values present in the dataframes in the list ``_df_samples_list``. |
||||
""" |
||||
for indx in range(len(self._df_samples_list)): |
||||
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] |
||||
|
||||
def dataset_id(self) -> object: |
||||
return self._array_indx |
||||
|
||||
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: |
||||
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels |
||||
as keys for the set of CIMS of a particular node. |
||||
|
||||
:param raw_data: List of Dicts |
||||
:type raw_data: List |
||||
:param indx: The index of the array from which the data have to be extracted |
||||
:type indx: int |
||||
:param cims_key: the key where the json object cims are placed |
||||
:type cims_key: string |
||||
:return: a dictionary containing the sampled CIMS for all the variables in the net |
||||
:rtype: Dictionary |
||||
""" |
||||
cims_for_all_vars = {} |
||||
for var in raw_data[indx][cims_key]: |
||||
sampled_cims_list = [] |
||||
cims_for_all_vars[var] = sampled_cims_list |
||||
for p_comb in raw_data[indx][cims_key][var]: |
||||
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) |
||||
return cims_for_all_vars |
||||
|
||||
|
||||
|
@ -0,0 +1,65 @@ |
||||
import json |
||||
import typing |
||||
|
||||
import pandas as pd |
||||
import numpy as np |
||||
|
||||
from .abstract_importer import AbstractImporter |
||||
|
||||
|
||||
|
||||
class SampleImporter(AbstractImporter): |
||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
||||
the data loaded directly by using DataFrame |
||||
|
||||
:param trajectory_list: the data that describes the trajectories |
||||
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
:param variables: the data that describes the variables with name and cardinality |
||||
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
:param prior_net_structure: the data of the real structure, if it exists |
||||
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
||||
|
||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
||||
:_raw_data: The raw contents of the json file to import |
||||
:type _raw_data: List |
||||
""" |
||||
|
||||
def __init__(self, |
||||
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
||||
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
||||
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): |
||||
|
||||
'If the data are not DataFrame, it will be converted' |
||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
||||
variables = pd.DataFrame(variables) |
||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
||||
prior_net_structure=pd.DataFrame(prior_net_structure) |
||||
|
||||
super(SampleImporter, self).__init__(trajectory_list =trajectory_list, |
||||
variables= variables, |
||||
prior_net_structure=prior_net_structure) |
||||
|
||||
def import_data(self, header_column = None): |
||||
|
||||
if header_column is not None: |
||||
self._sorter = header_column |
||||
else: |
||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
||||
|
||||
samples_list= self._df_samples_list |
||||
|
||||
if isinstance(samples_list, np.ndarray): |
||||
samples_list = samples_list.tolist() |
||||
|
||||
self.compute_row_delta_in_all_samples_frames(samples_list) |
||||
|
||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. |
||||
""" |
||||
columns_header = list(sample_frame.columns.values) |
||||
del columns_header[0] |
||||
return columns_header |
||||
|
||||
|
||||
def dataset_id(self) -> object: |
||||
pass |
@ -0,0 +1 @@ |
||||
|
@ -0,0 +1,20 @@ |
||||
from setuptools import setup, find_packages |
||||
|
||||
|
||||
setup(name='PyCTBN', |
||||
version='1.0', |
||||
url='https://github.com/philipMartini/PyCTBN', |
||||
license='MIT', |
||||
author=['Alessandro Bregoli', 'Filippo Martini','Luca Moretti'], |
||||
author_email=['a.bregoli1@campus.unimib.it', 'f.martini@campus.unimib.it','lucamoretti96@gmail.com'], |
||||
description='A Continuous Time Bayesian Networks Library', |
||||
packages=find_packages('.', exclude=['tests']), |
||||
#packages=['PyCTBN.PyCTBN'], |
||||
install_requires=[ |
||||
'numpy', 'pandas', 'networkx', 'scipy', 'matplotlib', 'tqdm'], |
||||
dependency_links=['https://github.com/numpy/numpy', 'https://github.com/pandas-dev/pandas', |
||||
'https://github.com/networkx/networkx', 'https://github.com/scipy/scipy', |
||||
'https://github.com/tqdm/tqdm'], |
||||
#long_description=open('../README.md').read(), |
||||
zip_safe=False, |
||||
python_requires='>=3.6') |
@ -0,0 +1 @@ |
||||
|
@ -0,0 +1,963 @@ |
||||
<?xml version="1.0" ?> |
||||
<coverage version="5.2" timestamp="1597406229874" lines-valid="891" lines-covered="638" line-rate="0.716" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0"> |
||||
<!-- Generated by coverage.py: https://coverage.readthedocs.io --> |
||||
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd --> |
||||
<sources> |
||||
<source></source> |
||||
</sources> |
||||
<packages> |
||||
<package name="." line-rate="1" branch-rate="0" complexity="0"> |
||||
<PyCTBN> |
||||
<class name="test_json_importer.py" filename="test_json_importer.py" complexity="0" line-rate="1" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="1" hits="1"/> |
||||
<line number="2" hits="1"/> |
||||
<line number="3" hits="1"/> |
||||
<line number="4" hits="1"/> |
||||
<line number="5" hits="1"/> |
||||
<line number="6" hits="1"/> |
||||
<line number="7" hits="1"/> |
||||
<line number="8" hits="1"/> |
||||
<line number="10" hits="1"/> |
||||
<line number="12" hits="1"/> |
||||
<line number="16" hits="1"/> |
||||
<line number="18" hits="1"/> |
||||
<line number="19" hits="1"/> |
||||
<line number="20" hits="1"/> |
||||
<line number="22" hits="1"/> |
||||
<line number="23" hits="1"/> |
||||
<line number="24" hits="1"/> |
||||
<line number="25" hits="1"/> |
||||
<line number="26" hits="1"/> |
||||
<line number="27" hits="1"/> |
||||
<line number="28" hits="1"/> |
||||
<line number="29" hits="1"/> |
||||
<line number="30" hits="1"/> |
||||
<line number="31" hits="1"/> |
||||
<line number="32" hits="1"/> |
||||
<line number="33" hits="1"/> |
||||
<line number="34" hits="1"/> |
||||
<line number="36" hits="1"/> |
||||
<line number="37" hits="1"/> |
||||
<line number="38" hits="1"/> |
||||
<line number="39" hits="1"/> |
||||
<line number="40" hits="1"/> |
||||
<line number="41" hits="1"/> |
||||
<line number="42" hits="1"/> |
||||
<line number="43" hits="1"/> |
||||
<line number="44" hits="1"/> |
||||
<line number="45" hits="1"/> |
||||
<line number="47" hits="1"/> |
||||
<line number="48" hits="1"/> |
||||
<line number="49" hits="1"/> |
||||
<line number="50" hits="1"/> |
||||
<line number="51" hits="1"/> |
||||
<line number="53" hits="1"/> |
||||
<line number="54" hits="1"/> |
||||
<line number="55" hits="1"/> |
||||
<line number="57" hits="1"/> |
||||
<line number="58" hits="1"/> |
||||
<line number="59" hits="1"/> |
||||
<line number="61" hits="1"/> |
||||
<line number="62" hits="1"/> |
||||
<line number="63" hits="1"/> |
||||
<line number="64" hits="1"/> |
||||
<line number="66" hits="1"/> |
||||
<line number="67" hits="1"/> |
||||
<line number="68" hits="1"/> |
||||
<line number="69" hits="1"/> |
||||
<line number="71" hits="1"/> |
||||
<line number="72" hits="1"/> |
||||
<line number="73" hits="1"/> |
||||
<line number="74" hits="1"/> |
||||
<line number="75" hits="1"/> |
||||
<line number="76" hits="1"/> |
||||
<line number="77" hits="1"/> |
||||
<line number="78" hits="1"/> |
||||
<line number="80" hits="1"/> |
||||
<line number="82" hits="1"/> |
||||
<line number="84" hits="1"/> |
||||
<line number="85" hits="1"/> |
||||
<line number="86" hits="1"/> |
||||
<line number="87" hits="1"/> |
||||
<line number="88" hits="1"/> |
||||
<line number="89" hits="1"/> |
||||
<line number="90" hits="1"/> |
||||
<line number="92" hits="1"/> |
||||
<line number="93" hits="1"/> |
||||
<line number="94" hits="1"/> |
||||
<line number="95" hits="1"/> |
||||
<line number="96" hits="1"/> |
||||
<line number="97" hits="1"/> |
||||
<line number="98" hits="1"/> |
||||
<line number="99" hits="1"/> |
||||
<line number="101" hits="1"/> |
||||
<line number="102" hits="1"/> |
||||
<line number="103" hits="1"/> |
||||
<line number="104" hits="1"/> |
||||
<line number="105" hits="1"/> |
||||
<line number="107" hits="1"/> |
||||
<line number="108" hits="1"/> |
||||
<line number="109" hits="1"/> |
||||
<line number="110" hits="1"/> |
||||
<line number="111" hits="1"/> |
||||
<line number="112" hits="1"/> |
||||
<line number="113" hits="1"/> |
||||
<line number="114" hits="1"/> |
||||
<line number="115" hits="1"/> |
||||
<line number="116" hits="1"/> |
||||
<line number="117" hits="1"/> |
||||
<line number="118" hits="1"/> |
||||
<line number="119" hits="1"/> |
||||
<line number="120" hits="1"/> |
||||
<line number="121" hits="1"/> |
||||
<line number="122" hits="1"/> |
||||
<line number="124" hits="1"/> |
||||
<line number="125" hits="1"/> |
||||
<line number="126" hits="1"/> |
||||
<line number="127" hits="1"/> |
||||
<line number="128" hits="1"/> |
||||
<line number="129" hits="1"/> |
||||
<line number="131" hits="1"/> |
||||
<line number="132" hits="1"/> |
||||
<line number="133" hits="1"/> |
||||
<line number="134" hits="1"/> |
||||
<line number="136" hits="1"/> |
||||
<line number="138" hits="1"/> |
||||
<line number="139" hits="1"/> |
||||
<line number="140" hits="1"/> |
||||
<line number="141" hits="1"/> |
||||
<line number="142" hits="1"/> |
||||
<line number="143" hits="1"/> |
||||
<line number="145" hits="1"/> |
||||
<line number="146" hits="1"/> |
||||
<line number="147" hits="1"/> |
||||
<line number="154" hits="1"/> |
||||
<line number="156" hits="1"/> |
||||
<line number="157" hits="1"/> |
||||
<line number="158" hits="1"/> |
||||
<line number="160" hits="1"/> |
||||
<line number="161" hits="1"/> |
||||
<line number="162" hits="1"/> |
||||
<line number="163" hits="1"/> |
||||
<line number="164" hits="1"/> |
||||
<line number="166" hits="1"/> |
||||
<line number="169" hits="1"/> |
||||
<line number="170" hits="1"/> |
||||
</lines> |
||||
</class> |
||||
<class name="test_sample_path.py" filename="test_sample_path.py" complexity="0" line-rate="1" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="1" hits="1"/> |
||||
<line number="2" hits="1"/> |
||||
<line number="3" hits="1"/> |
||||
<line number="4" hits="1"/> |
||||
<line number="5" hits="1"/> |
||||
<line number="6" hits="1"/> |
||||
<line number="7" hits="1"/> |
||||
<line number="8" hits="1"/> |
||||
<line number="9" hits="1"/> |
||||
<line number="12" hits="1"/> |
||||
<line number="14" hits="1"/> |
||||
<line number="15" hits="1"/> |
||||
<line number="16" hits="1"/> |
||||
<line number="17" hits="1"/> |
||||
<line number="19" hits="1"/> |
||||
<line number="20" hits="1"/> |
||||
<line number="21" hits="1"/> |
||||
<line number="22" hits="1"/> |
||||
<line number="23" hits="1"/> |
||||
<line number="24" hits="1"/> |
||||
<line number="25" hits="1"/> |
||||
<line number="26" hits="1"/> |
||||
<line number="27" hits="1"/> |
||||
<line number="28" hits="1"/> |
||||
<line number="29" hits="1"/> |
||||
<line number="30" hits="1"/> |
||||
<line number="33" hits="1"/> |
||||
<line number="34" hits="1"/> |
||||
</lines> |
||||
</class> |
||||
<class name="test_trajectory.py" filename="test_trajectory.py" complexity="0" line-rate="1" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="1" hits="1"/> |
||||
<line number="2" hits="1"/> |
||||
<line number="3" hits="1"/> |
||||
<line number="4" hits="1"/> |
||||
<line number="6" hits="1"/> |
||||
<line number="9" hits="1"/> |
||||
<line number="11" hits="1"/> |
||||
<line number="12" hits="1"/> |
||||
<line number="13" hits="1"/> |
||||
<line number="14" hits="1"/> |
||||
<line number="15" hits="1"/> |
||||
<line number="16" hits="1"/> |
||||
<line number="17" hits="1"/> |
||||
<line number="18" hits="1"/> |
||||
<line number="20" hits="1"/> |
||||
<line number="21" hits="1"/> |
||||
<line number="22" hits="1"/> |
||||
<line number="24" hits="1"/> |
||||
<line number="25" hits="1"/> |
||||
<line number="26" hits="1"/> |
||||
<line number="27" hits="1"/> |
||||
<line number="28" hits="1"/> |
||||
<line number="30" hits="1"/> |
||||
<line number="31" hits="1"/> |
||||
<line number="32" hits="1"/> |
||||
<line number="33" hits="1"/> |
||||
<line number="35" hits="1"/> |
||||
<line number="36" hits="1"/> |
||||
<line number="37" hits="1"/> |
||||
<line number="38" hits="1"/> |
||||
<line number="40" hits="1"/> |
||||
<line number="41" hits="1"/> |
||||
<line number="42" hits="1"/> |
||||
<line number="43" hits="1"/> |
||||
<line number="46" hits="1"/> |
||||
<line number="47" hits="1"/> |
||||
</lines> |
||||
</class> |
||||
</PyCTBN> |
||||
</package> |
||||
<package name=".Users.Zalum.Desktop.Tesi.CTBN_Project.PyCTBN.PyCTBN" line-rate="0.9059" branch-rate="0" complexity="0"> |
||||
<PyCTBN> |
||||
<class name="abstract_importer.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/abstract_importer.py" complexity="0" line-rate="0.8182" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="1" hits="1"/> |
||||
<line number="4" hits="1"/> |
||||
<line number="12" hits="1"/> |
||||
<line number="13" hits="1"/> |
||||
<line number="14" hits="1"/> |
||||
<line number="16" hits="1"/> |
||||
<line number="17" hits="1"/> |
||||
<line number="18" hits="0"/> |
||||
<line number="20" hits="1"/> |
||||
<line number="21" hits="1"/> |
||||
<line number="22" hits="0"/> |
||||
</lines> |
||||
</class> |
||||
<class name="abstract_sample_path.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/abstract_sample_path.py" complexity="0" line-rate="0.8571" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="1" hits="1"/> |
||||
<line number="2" hits="1"/> |
||||
<line number="5" hits="1"/> |
||||
<line number="7" hits="1"/> |
||||
<line number="8" hits="1"/> |
||||
<line number="9" hits="1"/> |
||||
<line number="10" hits="1"/> |
||||
<line number="11" hits="1"/> |
||||
<line number="13" hits="1"/> |
||||
<line number="14" hits="1"/> |
||||
<line number="25" hits="0"/> |
||||
<line number="27" hits="1"/> |
||||
<line number="28" hits="1"/> |
||||
<line number="37" hits="0"/> |
||||
</lines> |
||||
</class> |
||||
<class name="json_importer.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/json_importer.py" complexity="0" line-rate="1" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="2" hits="1"/> |
||||
<line number="3" hits="1"/> |
||||
<line number="4" hits="1"/> |
||||
<line number="5" hits="1"/> |
||||
<line number="8" hits="1"/> |
||||
<line number="30" hits="1"/> |
||||
<line number="32" hits="1"/> |
||||
<line number="33" hits="1"/> |
||||
<line number="34" hits="1"/> |
||||
<line number="35" hits="1"/> |
||||
<line number="36" hits="1"/> |
||||
<line number="37" hits="1"/> |
||||
<line number="38" hits="1"/> |
||||
<line number="39" hits="1"/> |
||||
<line number="40" hits="1"/> |
||||
<line number="41" hits="1"/> |
||||
<line number="42" hits="1"/> |
||||
<line number="44" hits="1"/> |
||||
<line number="52" hits="1"/> |
||||
<line number="53" hits="1"/> |
||||
<line number="54" hits="1"/> |
||||
<line number="55" hits="1"/> |
||||
<line number="56" hits="1"/> |
||||
<line number="57" hits="1"/> |
||||
<line number="59" hits="1"/> |
||||
<line number="67" hits="1"/> |
||||
<line number="69" hits="1"/> |
||||
<line number="78" hits="1"/> |
||||
<line number="81" hits="1"/> |
||||
<line number="92" hits="1"/> |
||||
<line number="96" hits="1"/> |
||||
<line number="97" hits="1"/> |
||||
<line number="98" hits="1"/> |
||||
<line number="99" hits="1"/> |
||||
<line number="100" hits="1"/> |
||||
<line number="102" hits="1"/> |
||||
<line number="116" hits="1"/> |
||||
<line number="117" hits="1"/> |
||||
<line number="118" hits="1"/> |
||||
<line number="122" hits="1"/> |
||||
<line number="134" hits="1"/> |
||||
<line number="136" hits="1"/> |
||||
<line number="149" hits="1"/> |
||||
<line number="150" hits="1"/> |
||||
<line number="151" hits="1"/> |
||||
<line number="152" hits="1"/> |
||||
<line number="153" hits="1"/> |
||||
<line number="154" hits="1"/> |
||||
<line number="156" hits="1"/> |
||||
<line number="171" hits="1"/> |
||||
<line number="172" hits="1"/> |
||||
<line number="174" hits="1"/> |
||||
<line number="175" hits="1"/> |
||||
<line number="176" hits="1"/> |
||||
<line number="177" hits="1"/> |
||||
<line number="179" hits="1"/> |
||||
<line number="189" hits="1"/> |
||||
<line number="190" hits="1"/> |
||||
<line number="191" hits="1"/> |
||||
<line number="193" hits="1"/> |
||||
<line number="194" hits="1"/> |
||||
<line number="195" hits="1"/> |
||||
<line number="196" hits="1"/> |
||||
<line number="198" hits="1"/> |
||||
<line number="201" hits="1"/> |
||||
<line number="209" hits="1"/> |
||||
<line number="212" hits="1"/> |
||||
<line number="214" hits="1"/> |
||||
<line number="222" hits="1"/> |
||||
<line number="224" hits="1"/> |
||||
<line number="228" hits="1"/> |
||||
<line number="229" hits="1"/> |
||||
<line number="231" hits="1"/> |
||||
<line number="232" hits="1"/> |
||||
<line number="233" hits="1"/> |
||||
<line number="234" hits="1"/> |
||||
<line number="235" hits="1"/> |
||||
<line number="236" hits="1"/> |
||||
<line number="237" hits="1"/> |
||||
<line number="238" hits="1"/> |
||||
<line number="240" hits="1"/> |
||||
<line number="241" hits="1"/> |
||||
<line number="242" hits="1"/> |
||||
<line number="244" hits="1"/> |
||||
<line number="245" hits="1"/> |
||||
<line number="246" hits="1"/> |
||||
<line number="248" hits="1"/> |
||||
<line number="249" hits="1"/> |
||||
<line number="250" hits="1"/> |
||||
</lines> |
||||
</class> |
||||
<class name="sample_path.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/sample_path.py" complexity="0" line-rate="0.963" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="1" hits="1"/> |
||||
<line number="2" hits="1"/> |
||||
<line number="3" hits="1"/> |
||||
<line number="4" hits="1"/> |
||||
<line number="7" hits="1"/> |
||||
<line number="22" hits="1"/> |
||||
<line number="24" hits="1"/> |
||||
<line number="27" hits="1"/> |
||||
<line number="29" hits="1"/> |
||||
<line number="39" hits="1"/> |
||||
<line number="40" hits="1"/> |
||||
<line number="44" hits="1"/> |
||||
<line number="46" hits="1"/> |
||||
<line number="54" hits="1"/> |
||||
<line number="55" hits="1"/> |
||||
<line number="57" hits="1"/> |
||||
<line number="58" hits="1"/> |
||||
<line number="59" hits="1"/> |
||||
<line number="60" hits="1"/> |
||||
<line number="63" hits="1"/> |
||||
<line number="64" hits="1"/> |
||||
<line number="65" hits="1"/> |
||||
<line number="67" hits="1"/> |
||||
<line number="68" hits="1"/> |
||||
<line number="69" hits="1"/> |
||||
<line number="71" hits="1"/> |
||||
<line number="72" hits="0"/> |
||||
</lines> |
||||
</class> |
||||
<class name="structure.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/structure.py" complexity="0" line-rate="0.65" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="1" hits="1"/> |
||||
<line number="2" hits="1"/> |
||||
<line number="5" hits="1"/> |
||||
<line number="16" hits="1"/> |
||||
<line number="18" hits="1"/> |
||||
<line number="19" hits="1"/> |
||||
<line number="20" hits="1"/> |
||||
<line number="21" hits="1"/> |
||||
<line number="22" hits="1"/> |
||||
<line number="24" hits="1"/> |
||||
<line number="25" hits="1"/> |
||||
<line number="28" hits="0"/> |
||||
<line number="30" hits="1"/> |
||||
<line number="31" hits="1"/> |
||||
<line number="32" hits="0"/> |
||||
<line number="34" hits="1"/> |
||||
<line number="35" hits="1"/> |
||||
<line number="36" hits="0"/> |
||||
<line number="38" hits="1"/> |
||||
<line number="39" hits="1"/> |
||||
<line number="40" hits="0"/> |
||||
<line number="42" hits="1"/> |
||||
<line number="43" hits="1"/> |
||||
<line number="44" hits="0"/> |
||||
<line number="46" hits="1"/> |
||||
<line number="47" hits="0"/> |
||||
<line number="49" hits="1"/> |
||||
<line number="50" hits="0"/> |
||||
<line number="51" hits="0"/> |
||||
<line number="53" hits="1"/> |
||||
<line number="54" hits="0"/> |
||||
<line number="56" hits="1"/> |
||||
<line number="57" hits="0"/> |
||||
<line number="58" hits="0"/> |
||||
<line number="60" hits="1"/> |
||||
<line number="61" hits="1"/> |
||||
<line number="64" hits="1"/> |
||||
<line number="66" hits="0"/> |
||||
<line number="67" hits="0"/> |
||||
<line number="72" hits="0"/> |
||||
</lines> |
||||
</class> |
||||
<class name="trajectory.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/trajectory.py" complexity="0" line-rate="1" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="2" hits="1"/> |
||||
<line number="5" hits="1"/> |
||||
<line number="17" hits="1"/> |
||||
<line number="18" hits="1"/> |
||||
<line number="19" hits="1"/> |
||||
<line number="20" hits="1"/> |
||||
<line number="21" hits="1"/> |
||||
<line number="22" hits="1"/> |
||||
<line number="24" hits="1"/> |
||||
<line number="25" hits="1"/> |
||||
<line number="32" hits="1"/> |
||||
<line number="34" hits="1"/> |
||||
<line number="35" hits="1"/> |
||||
<line number="42" hits="1"/> |
||||
<line number="44" hits="1"/> |
||||
<line number="45" hits="1"/> |
||||
<line number="46" hits="1"/> |
||||
<line number="48" hits="1"/> |
||||
<line number="49" hits="1"/> |
||||
<line number="51" hits="1"/> |
||||
<line number="52" hits="1"/> |
||||
</lines> |
||||
</class> |
||||
</PyCTBN> |
||||
</package> |
||||
<package name=".Users.Zalum.Library.Python.3.8.lib.python.site-packages" line-rate="0.5234" branch-rate="0" complexity="0"> |
||||
<PyCTBN> |
||||
<class name="six.py" filename="/Users/Zalum/Library/Python/3.8/lib/python/site-packages/six.py" complexity="0" line-rate="0.5234" branch-rate="0"> |
||||
<methods/> |
||||
<lines> |
||||
<line number="21" hits="1"/> |
||||
<line number="23" hits="1"/> |
||||
<line number="25" hits="1"/> |
||||
<line number="26" hits="1"/> |
||||
<line number="27" hits="1"/> |
||||
<line number="28" hits="1"/> |
||||
<line number="29" hits="1"/> |
||||
<line number="31" hits="1"/> |
||||
<line number="32" hits="1"/> |
||||
<line number="36" hits="1"/> |
||||
<line number="37" hits="1"/> |
||||
<line number="38" hits="1"/> |
||||
<line number="40" hits="1"/> |
||||
<line number="41" hits="1"/> |
||||
<line number="42" hits="1"/> |
||||
<line number="43" hits="1"/> |
||||
<line number="44" hits="1"/> |
||||
<line number="45" hits="1"/> |
||||
<line number="47" hits="1"/> |
||||
<line number="49" hits="0"/> |
||||
<line number="50" hits="0"/> |
||||
<line number="51" hits="0"/> |
||||
<line number="52" hits="0"/> |
||||
<line number="53" hits="0"/> |
||||
<line number="55" hits="0"/> |
||||
<line number="57" hits="0"/> |
||||
<line number="60" hits="0"/> |
||||
<line number="62" hits="0"/> |
||||
<line number="63" hits="0"/> |
||||
<line number="64" hits="0"/> |
||||
<line number="65" hits="0"/> |
||||
<line number="66" hits="0"/> |
||||
<line number="68" hits="0"/> |
||||
<line number="71" hits="0"/> |
||||
<line number="72" hits="0"/> |
||||
<line number="75" hits="1"/> |
||||
<line number="77" hits="1"/> |
||||
<line number="80" hits="1"/> |
||||
<line number="82" hits="1"/> |
||||
<line number="83" hits="1"/> |
||||
<line number="86" hits="1"/> |
||||
<line number="88" hits="1"/> |
||||
<line number="89" hits="1"/> |
||||
<line number="91" hits="1"/> |
||||
<line number="92" hits="1"/> |
||||
<line number="93" hits="1"/> |
||||
<line number="94" hits="1"/> |
||||
<line number="97" hits="1"/> |
||||
<line number="98" hits="0"/> |
||||
<line number="99" hits="0"/> |
||||
<line number="100" hits="1"/> |
||||
<line number="103" hits="1"/> |
||||
<line number="105" hits="1"/> |
||||
<line number="106" hits="1"/> |
||||
<line number="107" hits="1"/> |
||||
<line number="108" hits="1"/> |
||||
<line number="109" hits="1"/> |
||||
<line number="110" hits="1"/> |
||||
<line number="112" hits="0"/> |
||||
<line number="114" hits="1"/> |
||||
<line number="115" hits="1"/> |
||||
<line number="117" hits="1"/> |
||||
<line number="118" hits="0"/> |
||||
<line number="119" hits="0"/> |
||||
<line number="120" hits="0"/> |
||||
<line number="121" hits="0"/> |
||||
<line number="124" hits="1"/> |
||||
<line number="126" hits="1"/> |
||||
<line number="127" hits="1"/> |
||||
<line number="128" hits="1"/> |
||||
<line number="130" hits="1"/> |
||||
<line number="131" hits="0"/> |
||||
<line number="132" hits="0"/> |
||||
<line number="133" hits="0"/> |
||||
<line number="136" hits="1"/> |
||||
<line number="139" hits="1"/> |
||||
<line number="141" hits="1"/> |
||||
<line number="142" hits="1"/> |
||||
<line number="143" hits="1"/> |
||||
<line number="144" hits="1"/> |
||||
<line number="145" hits="0"/> |
||||
<line number="146" hits="1"/> |
||||
<line number="147" hits="1"/> |
||||
<line number="148" hits="1"/> |
||||
<line number="149" hits="1"/> |
||||
<line number="151" hits="1"/> |
||||
<line number="152" hits="1"/> |
||||
<line number="154" hits="0"/> |
||||
<line number="155" hits="0"/> |
||||
<line number="156" hits="0"/> |
||||
<line number="157" hits="0"/> |
||||
<line number="159" hits="1"/> |
||||
<line number="160" hits="0"/> |
||||
<line number="161" hits="0"/> |
||||
<line number="164" hits="1"/> |
||||
<line number="173" hits="1"/> |
||||
<line number="174" hits="1"/> |
||||
<line number="175" hits="1"/> |
||||
<line number="177" hits="1"/> |
||||
<line number="178" hits="1"/> |
||||
<line number="179" hits="1"/> |
||||
<line number="181" hits="1"/> |
||||
<line number="182" hits="1"/> |
||||
<line number="184" hits="1"/> |
||||
<line number="185" hits="1"/> |
||||
<line number="186" hits="1"/> |
||||
<line number="187" hits="1"/> |
||||
<line number="189" hits="1"/> |
||||
<line number="190" hits="1"/> |
||||
<line number="191" hits="1"/> |
||||
<line number="192" hits="0"/> |
||||
<line number="193" hits="0"/> |
||||
<line number="195" hits="1"/> |
||||
<line number="196" hits="1"/> |
||||
<line number="198" hits="1"/> |
||||
<line number="199" hits="1"/> |
||||
<line number="200" hits="1"/> |
||||
<line number="201" hits="1"/> |
||||
<line number="202" hits="1"/> |
||||
<line number="203" hits="0"/> |
||||
<line number="205" hits="1"/> |
||||
<line number="206" hits="1"/> |
||||
<line number="207" hits="1"/> |
||||
<line number="209" hits="1"/> |
||||
<line number="216" hits="1"/> |
||||
<line number="218" hits="1"/> |
||||
<line number="222" hits="0"/> |
||||
<line number="223" hits="0"/> |
||||
<line number="224" hits="1"/> |
||||
<line number="226" hits="1"/> |
||||
<line number="229" hits="1"/> |
||||
<line number="232" hits="1"/> |
||||
<line number="235" hits="1"/> |
||||
<line number="307" hits="1"/> |
||||
<line number="308" hits="0"/> |
||||
<line number="312" hits="1"/> |
||||
<line number="313" hits="1"/> |
||||
<line number="314" hits="1"/> |
||||
<line number="315" hits="1"/> |
||||
<line number="316" hits="1"/> |
||||
<line number="318" hits="1"/> |
||||
<line number="320" hits="1"/> |
||||
<line number="321" hits="1"/> |
||||
<line number="324" hits="1"/> |
||||
<line number="329" hits="1"/> |
||||
<line number="356" hits="1"/> |
||||
<line number="357" hits="1"/> |
||||
<line number="358" hits="1"/> |
||||
<line number="360" hits="1"/> |
||||
<line number="362" hits="1"/> |
||||
<line number="366" hits="1"/> |
||||
<line number="371" hits="1"/> |
||||
<line number="376" hits="1"/> |
||||
<line number="377" hits="1"/> |
||||
<line number="378" hits="1"/> |
||||
<line number="380" hits="1"/> |
||||
<line number="382" hits="1"/> |
||||
<line number="386" hits="1"/> |
||||
<line number="391" hits="1"/> |
||||
<line number="428" hits="1"/> |
||||
<line number="429" hits="1"/> |
||||
<line number="430" hits="1"/> |
||||
<line number="432" hits="1"/> |
||||
<line number="434" hits="1"/> |
||||
<line number="438" hits="1"/> |
||||
<line number="443" hits="1"/> |
||||
<line number="449" hits="1"/> |
||||
<line number="450" hits="1"/> |
||||
<line number="451" hits="1"/> |
||||
<line number="453" hits="1"/> |
||||
<line number="455" hits="1"/> |
||||
<line number="459" hits="1"/> |
||||
<line number="464" hits="1"/> |
||||
<line number="467" hits="1"/> |
||||
<line number="468" hits="1"/> |
||||
<line number="469" hits="1"/> |
||||
<line number="471" hits="1"/> |
||||
<line number="473" hits="1"/> |
||||
<line number="477" hits="1"/> |
||||
<line number="480" hits="1"/> |
||||
<line number="481" hits="1"/> |
||||
<line number="482" hits="1"/> |
||||
<line number="483" hits="1"/> |
||||
<line number="484" hits="1"/> |
||||
<line number="485" hits="1"/> |
||||
<line number="487" hits="1"/> |
||||
<line number="488" hits="0"/> |
||||
<line number="490" hits="1"/> |
||||
<line number="494" hits="1"/> |
||||
<line number="496" hits="0"/> |
||||
<line number="499" hits="1"/> |
||||
<line number="501" hits="0"/> |
||||
<line number="502" hits="0"/> |
||||
<line number="503" hits="0"/> |
||||
<line number="504" hits="0"/> |
||||
<line number="505" hits="0"/> |
||||
<line number="506" hits="0"/> |
||||
<line number="507" hits="0"/> |
||||
<line number="510" hits="1"/> |
||||
<line number="511" hits="1"/> |
||||
<line number="512" hits="1"/> |
||||
<line number="514" hits="1"/> |
||||
<line number="515" hits="1"/> |
||||
<line number="516" hits="1"/> |
||||
<line number="517" hits="1"/> |
||||
<line number="519" hits="0"/> |
||||
<line number="520" hits="0"/> |
||||
<line number="522" hits="0"/> |
||||
<line number="523" hits="0"/> |
||||
<line number="524" hits="0"/> |
||||
<line number="525" hits="0"/> |
||||
<line number="528" hits="1"/> |
||||
<line number="529" hits="1"/> |
||||
<line number="530" hits="0"/> |
||||
<line number="531" hits="0"/> |
||||
<line number="532" hits="0"/> |
||||
<line number="533" hits="1"/> |
||||
<line number="536" hits="1"/> |
||||
<line number="537" hits="1"/> |
||||
<line number="538" hits="0"/> |
||||
<line number="539" hits="0"/> |
||||
<line number="540" hits="0"/> |
||||
<line number="543" hits="1"/> |
||||
<line number="544" hits="1"/> |
||||
<line number="545" hits="0"/> |
||||
<line number="547" hits="1"/> |
||||
<line number="549" hits="1"/> |
||||
<line number="550" hits="0"/> |
||||
<line number="552" hits="1"/> |
||||
<line number="554" hits="0"/> |
||||
<line number="555" hits="0"/> |
||||
<line number="557" hits="0"/> |
||||
<line number="558" hits="0"/> |
||||
<line number="560" hits="0"/> |
||||
<line number="561" hits="0"/> |
||||
<line number="563" hits="0"/> |
||||
<line number="565" hits="0"/> |
||||
<line number="566" hits="0"/> |
||||
<line number="568" hits="0"/> |
||||
<line number="569" hits="1"/> |
||||
<line number="573" hits="1"/> |
||||
<line number="574" hits="1"/> |
||||
<line number="575" hits="1"/> |
||||
<line number="576" hits="1"/> |
||||
<line number="577" hits="1"/> |
||||
<line number="578" hits="1"/> |
||||
<line number="581" hits="1"/> |
||||
<line number="582" hits="1"/> |
||||
<line number="583" hits="0"/> |
||||
<line number="585" hits="1"/> |
||||
<line number="586" hits="0"/> |
||||
<line number="588" hits="1"/> |
||||
<line number="589" hits="0"/> |
||||
<line number="591" hits="1"/> |
||||
<line number="592" hits="0"/> |
||||
<line number="594" hits="1"/> |
||||
<line number="596" hits="1"/> |
||||
<line number="598" hits="1"/> |
||||
<line number="600" hits="0"/> |
||||
<line number="601" hits="0"/> |
||||
<line number="603" hits="0"/> |
||||
<line number="604" hits="0"/> |
||||
<line number="606" hits="0"/> |
||||
<line number="607" hits="0"/> |
||||
<line number="609" hits="0"/> |
||||
<line number="610" hits="0"/> |
||||
<line number="612" hits="0"/> |
||||
<line number="614" hits="0"/> |
||||
<line number="616" hits="0"/> |
||||
<line number="618" hits="1"/> |
||||
<line number="619" hits="1"/> |
||||
<line number="620" hits="1"/> |
||||
<line number="622" hits="1"/> |
||||
<line number="626" hits="1"/> |
||||
<line number="627" hits="1"/> |
||||
<line number="628" hits="0"/> |
||||
<line number="630" hits="1"/> |
||||
<line number="631" hits="0"/> |
||||
<line number="632" hits="1"/> |
||||
<line number="633" hits="1"/> |
||||
<line number="634" hits="1"/> |
||||
<line number="635" hits="1"/> |
||||
<line number="636" hits="1"/> |
||||
<line number="637" hits="1"/> |
||||
<line number="638" hits="1"/> |
||||
<line number="639" hits="1"/> |
||||
<line number="640" hits="1"/> |
||||
<line number="641" hits="1"/> |
||||
<line number="642" hits="1"/> |
||||
<line number="643" hits="1"/> |
||||
<line number="644" hits="1"/> |
||||
<line number="645" hits="0"/> |
||||
<line number="646" hits="0"/> |
||||
<line number="647" hits="0"/> |
||||
<line number="649" hits="1"/> |
||||
<line number="650" hits="1"/> |
||||
<line number="651" hits="1"/> |
||||
<line number="653" hits="0"/> |
||||
<line number="654" hits="0"/> |
||||
<line number="657" hits="0"/> |
||||
<line number="658" hits="0"/> |
||||
<line number="659" hits="0"/> |
||||
<line number="660" hits="0"/> |
||||
<line number="662" hits="0"/> |
||||
<line number="663" hits="0"/> |
||||
<line number="665" hits="0"/> |
||||
<line number="666" hits="0"/> |
||||
<line number="667" hits="0"/> |
||||
<line number="668" hits="0"/> |
||||
<line number="669" hits="0"/> |
||||
<line number="670" hits="0"/> |
||||
<line number="671" hits="0"/> |
||||
<line number="672" hits="0"/> |
||||
<line number="673" hits="0"/> |
||||
<line number="674" hits="1"/> |
||||
<line number="675" hits="1"/> |
||||
<line number="678" hits="1"/> |
||||
<line number="679" hits="0"/> |
||||
<line number="682" hits="1"/> |
||||
<line number="683" hits="0"/> |
||||
<line number="686" hits="1"/> |
||||
<line number="687" hits="0"/> |
||||
<line number="690" hits="1"/> |
||||
<line number="691" hits="0"/> |
||||
<line number="694" hits="1"/> |
||||
<line number="695" hits="1"/> |
||||
<line number="697" hits="1"/> |
||||
<line number="698" hits="0"/> |
||||
<line number="699" hits="0"/> |
||||
<line number="700" hits="0"/> |
||||
<line number="701" hits="0"/> |
||||
<line number="702" hits="0"/> |
||||
<line number="703" hits="0"/> |
||||
<line number="705" hits="0"/> |
||||
<line number="706" hits="0"/> |
||||
<line number="709" hits="0"/> |
||||
<line number="711" hits="0"/> |
||||
<line number="712" hits="0"/> |
||||
<line number="713" hits="0"/> |
||||
<line number="714" hits="0"/> |
||||
<line number="715" hits="0"/> |
||||
<line number="716" hits="0"/> |
||||
<line number="717" hits="0"/> |
||||
<line number="718" hits="0"/> |
||||
<line number="719" hits="0"/> |
||||
<line number="721" hits="0"/> |
||||
<line number="729" hits="1"/> |
||||
<line number="730" hits="1"/> |
||||
<line number="737" hits="0"/> |
||||
<line number="738" hits="0"/> |
||||
<line number="741" hits="1"/> |
||||
<line number="742" hits="1"/> |
||||
<line number="743" hits="0"/> |
||||
<line number="745" hits="0"/> |
||||
<line number="746" hits="0"/> |
||||
<line number="747" hits="0"/> |
||||
<line number="749" hits="0"/> |
||||
<line number="750" hits="0"/> |
||||
<line number="751" hits="0"/> |
||||
<line number="753" hits="0"/> |
||||
<line number="756" hits="0"/> |
||||
<line number="757" hits="0"/> |
||||
<line number="758" hits="0"/> |
||||
<line number="759" hits="0"/> |
||||
<line number="760" hits="0"/> |
||||
<line number="761" hits="0"/> |
||||
<line number="762" hits="0"/> |
||||
<line number="763" hits="0"/> |
||||
<line number="764" hits="0"/> |
||||
<line number="765" hits="0"/> |
||||
<line number="766" hits="0"/> |
||||
<line number="767" hits="0"/> |
||||
<line number="768" hits="0"/> |
||||
<line number="769" hits="0"/> |
||||
<line number="770" hits="0"/> |
||||
<line number="771" hits="0"/> |
||||
<line number="772" hits="0"/> |
||||
<line number="773" hits="0"/> |
||||
<line number="774" hits="0"/> |
||||
<line number="775" hits="0"/> |
||||
<line number="776" hits="0"/> |
||||
<line number="777" hits="0"/> |
||||
<line number="778" hits="0"/> |
||||
<line number="779" hits="0"/> |
||||
<line number="780" hits="0"/> |
||||
<line number="781" hits="0"/> |
||||
<line number="782" hits="0"/> |
||||
<line number="783" hits="0"/> |
||||
<line number="785" hits="0"/> |
||||
<line number="786" hits="0"/> |
||||
<line number="787" hits="0"/> |
||||
<line number="788" hits="0"/> |
||||
<line number="789" hits="0"/> |
||||
<line number="790" hits="0"/> |
||||
<line number="791" hits="0"/> |
||||
<line number="792" hits="0"/> |
||||
<line number="793" hits="0"/> |
||||
<line number="794" hits="0"/> |
||||
<line number="795" hits="0"/> |
||||
<line number="796" hits="1"/> |
||||
<line number="797" hits="0"/> |
||||
<line number="799" hits="0"/> |
||||
<line number="800" hits="0"/> |
||||
<line number="801" hits="0"/> |
||||
<line number="802" hits="0"/> |
||||
<line number="803" hits="0"/> |
||||
<line number="804" hits="0"/> |
||||
<line number="806" hits="1"/> |
||||
<line number="808" hits="1"/> |
||||
<line number="814" hits="0"/> |
||||
<line number="817" hits="0"/> |
||||
<line number="818" hits="0"/> |
||||
<line number="819" hits="0"/> |
||||
<line number="820" hits="0"/> |
||||
<line number="821" hits="0"/> |
||||
<line number="823" hits="0"/> |
||||
<line number="824" hits="0"/> |
||||
<line number="825" hits="0"/> |
||||
<line number="826" hits="0"/> |
||||
<line number="827" hits="0"/> |
||||
<line number="828" hits="0"/> |
||||
<line number="830" hits="0"/> |
||||
<line number="832" hits="0"/> |
||||
<line number="834" hits="0"/> |
||||
<line number="837" hits="1"/> |
||||
<line number="840" hits="1"/> |
||||
<line number="845" hits="1"/> |
||||
<line number="847" hits="1"/> |
||||
<line number="848" hits="1"/> |
||||
<line number="851" hits="1"/> |
||||
<line number="852" hits="1"/> |
||||
<line number="853" hits="0"/> |
||||
<line number="855" hits="0"/> |
||||
<line number="856" hits="1"/> |
||||
<line number="858" hits="1"/> |
||||
<line number="859" hits="1"/> |
||||
<line number="860" hits="1"/> |
||||
<line number="861" hits="1"/> |
||||
<line number="864" hits="1"/> |
||||
<line number="866" hits="1"/> |
||||
<line number="867" hits="1"/> |
||||
<line number="868" hits="1"/> |
||||
<line number="869" hits="1"/> |
||||
<line number="870" hits="0"/> |
||||
<line number="871" hits="0"/> |
||||
<line number="872" hits="0"/> |
||||
<line number="873" hits="0"/> |
||||
<line number="874" hits="1"/> |
||||
<line number="875" hits="1"/> |
||||
<line number="876" hits="1"/> |
||||
<line number="877" hits="1"/> |
||||
<line number="878" hits="1"/> |
||||
<line number="879" hits="1"/> |
||||
<line number="882" hits="1"/> |
||||
<line number="893" hits="0"/> |
||||
<line number="894" hits="0"/> |
||||
<line number="895" hits="0"/> |
||||
<line number="896" hits="0"/> |
||||
<line number="898" hits="0"/> |
||||
<line number="901" hits="1"/> |
||||
<line number="912" hits="0"/> |
||||
<line number="913" hits="0"/> |
||||
<line number="914" hits="0"/> |
||||
<line number="915" hits="0"/> |
||||
<line number="916" hits="0"/> |
||||
<line number="917" hits="0"/> |
||||
<line number="918" hits="0"/> |
||||
<line number="921" hits="1"/> |
||||
<line number="932" hits="0"/> |
||||
<line number="933" hits="0"/> |
||||
<line number="934" hits="0"/> |
||||
<line number="935" hits="0"/> |
||||
<line number="937" hits="0"/> |
||||
<line number="940" hits="1"/> |
||||
<line number="948" hits="0"/> |
||||
<line number="949" hits="0"/> |
||||
<line number="950" hits="0"/> |
||||
<line number="953" hits="0"/> |
||||
<line number="954" hits="0"/> |
||||
<line number="955" hits="0"/> |
||||
<line number="961" hits="1"/> |
||||
<line number="962" hits="1"/> |
||||
<line number="963" hits="1"/> |
||||
<line number="964" hits="1"/> |
||||
<line number="968" hits="1"/> |
||||
<line number="969" hits="1"/> |
||||
<line number="974" hits="1"/> |
||||
<line number="976" hits="0"/> |
||||
<line number="977" hits="0"/> |
||||
<line number="978" hits="1"/> |
||||
<line number="980" hits="1"/> |
||||
</lines> |
||||
</class> |
||||
</PyCTBN> |
||||
</package> |
||||
</packages> |
||||
</coverage> |
@ -0,0 +1,67 @@ |
||||
|
||||
import unittest |
||||
import numpy as np |
||||
import glob |
||||
import os |
||||
|
||||
from ...PyCTBN.structure_graph.network_graph import NetworkGraph |
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.structure_graph.set_of_cims import SetOfCims |
||||
from ...PyCTBN.estimators.parameters_estimator import ParametersEstimator |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
|
||||
|
||||
class TestParametersEstimatior(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls) -> None: |
||||
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json', "*.json")) |
||||
cls.array_indx = 0 |
||||
cls.importer = JsonImporter('./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json', 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
cls.importer.import_data(cls.array_indx) |
||||
cls.s1 = SamplePath(cls.importer) |
||||
cls.s1.build_trajectories() |
||||
cls.s1.build_structure() |
||||
print(cls.s1.structure.edges) |
||||
print(cls.s1.structure.nodes_values) |
||||
|
||||
def test_fast_init(self): |
||||
for node in self.s1.structure.nodes_labels: |
||||
g = NetworkGraph(self.s1.structure) |
||||
g.fast_init(node) |
||||
p1 = ParametersEstimator(self.s1.trajectories, g) |
||||
self.assertEqual(p1._trajectories, self.s1.trajectories) |
||||
self.assertEqual(p1._net_graph, g) |
||||
self.assertIsNone(p1._single_set_of_cims) |
||||
p1.fast_init(node) |
||||
self.assertIsInstance(p1._single_set_of_cims, SetOfCims) |
||||
|
||||
def test_compute_parameters_for_node(self): |
||||
for indx, node in enumerate(self.s1.structure.nodes_labels): |
||||
print(node) |
||||
g = NetworkGraph(self.s1.structure) |
||||
g.fast_init(node) |
||||
p1 = ParametersEstimator(self.s1.trajectories, g) |
||||
p1.fast_init(node) |
||||
sofc1 = p1.compute_parameters_for_node(node) |
||||
sampled_cims = self.aux_import_sampled_cims('dyn.cims') |
||||
sc = list(sampled_cims.values()) |
||||
self.equality_of_cims_of_node(sc[indx], sofc1._actual_cims) |
||||
|
||||
def equality_of_cims_of_node(self, sampled_cims, estimated_cims): |
||||
self.assertEqual(len(sampled_cims), len(estimated_cims)) |
||||
for c1, c2 in zip(sampled_cims, estimated_cims): |
||||
self.cim_equality_test(c1, c2.cim) |
||||
|
||||
def cim_equality_test(self, cim1, cim2): |
||||
for r1, r2 in zip(cim1, cim2): |
||||
self.assertTrue(np.all(np.isclose(r1, r2, 1e01))) |
||||
|
||||
def aux_import_sampled_cims(self, cims_label): |
||||
i1 = JsonImporter('./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json', '', '', '', '', '') |
||||
raw_data = i1.read_json_file() |
||||
return i1.import_sampled_cims(raw_data, self.array_indx, cims_label) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,64 @@ |
||||
|
||||
import glob |
||||
import math |
||||
import os |
||||
import unittest |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
import psutil |
||||
from line_profiler import LineProfiler |
||||
|
||||
import json |
||||
import pandas as pd |
||||
|
||||
|
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.estimators.structure_constraint_based_estimator import StructureConstraintBasedEstimator |
||||
from ...PyCTBN.utility.sample_importer import SampleImporter |
||||
|
||||
import copy |
||||
|
||||
|
||||
class TestStructureConstraintBasedEstimator(unittest.TestCase): |
||||
@classmethod |
||||
def setUpClass(cls): |
||||
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f: |
||||
raw_data = json.load(f) |
||||
|
||||
trajectory_list_raw= raw_data[0]["samples"] |
||||
|
||||
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw] |
||||
|
||||
variables= pd.DataFrame(raw_data[0]["variables"]) |
||||
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"]) |
||||
|
||||
|
||||
cls.importer = SampleImporter( |
||||
trajectory_list=trajectory_list, |
||||
variables=variables, |
||||
prior_net_structure=prior_net_structure |
||||
) |
||||
|
||||
cls.importer.import_data() |
||||
#cls.s1 = sp.SamplePath(cls.importer) |
||||
|
||||
#cls.traj = cls.s1.concatenated_samples |
||||
|
||||
# print(len(cls.traj)) |
||||
cls.s1 = SamplePath(cls.importer) |
||||
cls.s1.build_trajectories() |
||||
cls.s1.build_structure() |
||||
|
||||
def test_structure(self): |
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1) |
||||
edges = se1.estimate_structure(disable_multiprocessing=False) |
||||
|
||||
|
||||
self.assertEqual(edges, true_edges) |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,59 @@ |
||||
|
||||
import glob |
||||
import math |
||||
import os |
||||
import unittest |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
import psutil |
||||
from line_profiler import LineProfiler |
||||
|
||||
from ...PyCTBN.utility.cache import Cache |
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.estimators.structure_constraint_based_estimator import StructureConstraintBasedEstimator |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
|
||||
from multiprocessing import set_start_method |
||||
|
||||
import copy |
||||
|
||||
|
||||
class TestStructureConstraintBasedEstimator(unittest.TestCase): |
||||
@classmethod |
||||
def setUpClass(cls): |
||||
pass |
||||
|
||||
def test_structure(self): |
||||
#cls.read_files = glob.glob(os.path.join('../../data', "*.json")) |
||||
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.s1 = SamplePath(self.importer) |
||||
self.s1.build_trajectories() |
||||
self.s1.build_structure() |
||||
|
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
|
||||
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1) |
||||
edges = se1.estimate_structure(disable_multiprocessing=False) |
||||
|
||||
|
||||
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.s1 = SamplePath(self.importer) |
||||
self.s1.build_trajectories() |
||||
self.s1.build_structure() |
||||
|
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
|
||||
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1) |
||||
edges = se1.estimate_structure(disable_multiprocessing=True) |
||||
|
||||
|
||||
|
||||
self.assertEqual(edges, true_edges) |
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,82 @@ |
||||
import sys |
||||
sys.path.append("../../PyCTBN/") |
||||
import glob |
||||
import math |
||||
import os |
||||
import unittest |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
import psutil |
||||
from line_profiler import LineProfiler |
||||
import copy |
||||
|
||||
from ...PyCTBN.utility.cache import Cache |
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
from ...PyCTBN.utility.sample_importer import SampleImporter |
||||
|
||||
import json |
||||
|
||||
import pandas as pd |
||||
|
||||
|
||||
|
||||
class TestStructureScoreBasedEstimator(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls): |
||||
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f: |
||||
raw_data = json.load(f) |
||||
|
||||
trajectory_list_raw= raw_data[0]["samples"] |
||||
|
||||
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw] |
||||
|
||||
variables= pd.DataFrame(raw_data[0]["variables"]) |
||||
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"]) |
||||
|
||||
|
||||
cls.importer = SampleImporter( |
||||
trajectory_list=trajectory_list, |
||||
variables=variables, |
||||
prior_net_structure=prior_net_structure |
||||
) |
||||
|
||||
cls.importer.import_data() |
||||
#cls.s1 = sp.SamplePath(cls.importer) |
||||
|
||||
#cls.traj = cls.s1.concatenated_samples |
||||
|
||||
# print(len(cls.traj)) |
||||
cls.s1 = SamplePath(cls.importer) |
||||
cls.s1.build_trajectories() |
||||
cls.s1.build_structure() |
||||
|
||||
|
||||
|
||||
def test_structure(self): |
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
|
||||
se1 = StructureScoreBasedEstimator(self.s1,known_edges = [('X','Q')]) |
||||
edges = se1.estimate_structure( |
||||
max_parents = None, |
||||
iterations_number = 100, |
||||
patience = 35, |
||||
tabu_length = 15, |
||||
tabu_rules_duration = 15, |
||||
optimizer = 'hill', |
||||
disable_multiprocessing=True |
||||
) |
||||
|
||||
|
||||
self.assertEqual(edges, true_edges) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
||||
|
@ -0,0 +1,79 @@ |
||||
|
||||
import glob |
||||
import math |
||||
import os |
||||
import unittest |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
import psutil |
||||
from line_profiler import LineProfiler |
||||
import copy |
||||
|
||||
from ...PyCTBN.utility.cache import Cache |
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
|
||||
|
||||
|
||||
class TestStructureScoreBasedEstimator(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls): |
||||
pass |
||||
|
||||
|
||||
|
||||
def test_structure(self): |
||||
#cls.read_files = glob.glob(os.path.join('../../data', "*.json")) |
||||
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.s1 = SamplePath(self.importer) |
||||
self.s1.build_trajectories() |
||||
self.s1.build_structure() |
||||
|
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
|
||||
se1 = StructureScoreBasedEstimator(self.s1) |
||||
edges = se1.estimate_structure( |
||||
max_parents = None, |
||||
iterations_number = 100, |
||||
patience = 35, |
||||
tabu_length = 15, |
||||
tabu_rules_duration = 15, |
||||
optimizer = 'tabu', |
||||
disable_multiprocessing=False |
||||
) |
||||
|
||||
|
||||
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.s1 = SamplePath(self.importer) |
||||
self.s1.build_trajectories() |
||||
self.s1.build_structure() |
||||
|
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
|
||||
se1 = StructureScoreBasedEstimator(self.s1) |
||||
edges = se1.estimate_structure( |
||||
max_parents = None, |
||||
iterations_number = 100, |
||||
patience = 35, |
||||
tabu_length = 15, |
||||
tabu_rules_duration = 15, |
||||
optimizer = 'tabu', |
||||
disable_multiprocessing=True |
||||
) |
||||
|
||||
|
||||
|
||||
self.assertEqual(edges, true_edges) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
||||
|
@ -0,0 +1,54 @@ |
||||
|
||||
import glob |
||||
import math |
||||
import os |
||||
import unittest |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
import psutil |
||||
from line_profiler import LineProfiler |
||||
import copy |
||||
|
||||
|
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
|
||||
|
||||
|
||||
class TestHillClimbingSearch(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls): |
||||
#cls.read_files = glob.glob(os.path.join('../../data', "*.json")) |
||||
|
||||
|
||||
cls.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
cls.importer.import_data(0) |
||||
cls.s1 = SamplePath(cls.importer) |
||||
cls.s1.build_trajectories() |
||||
cls.s1.build_structure() |
||||
|
||||
|
||||
|
||||
def test_structure(self): |
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
se1 = StructureScoreBasedEstimator(self.s1) |
||||
edges = se1.estimate_structure( |
||||
max_parents = None, |
||||
iterations_number = 40, |
||||
patience = None, |
||||
optimizer = 'hill' |
||||
) |
||||
|
||||
|
||||
self.assertEqual(edges, true_edges) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
||||
|
@ -0,0 +1,84 @@ |
||||
import sys |
||||
sys.path.append("../../PyCTBN/") |
||||
import glob |
||||
import math |
||||
import os |
||||
import unittest |
||||
|
||||
import networkx as nx |
||||
import numpy as np |
||||
import pandas as pd |
||||
import psutil |
||||
from line_profiler import LineProfiler |
||||
import copy |
||||
import json |
||||
|
||||
import utility.cache as ch |
||||
import structure_graph.sample_path as sp |
||||
import estimators.structure_score_based_estimator as se |
||||
import utility.json_importer as ji |
||||
import utility.sample_importer as si |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class TestTabuSearch(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls): |
||||
#cls.read_files = glob.glob(os.path.join('../../data', "*.json")) |
||||
|
||||
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f: |
||||
raw_data = json.load(f) |
||||
|
||||
trajectory_list_raw= raw_data[0]["samples"] |
||||
|
||||
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw] |
||||
|
||||
variables= pd.DataFrame(raw_data[0]["variables"]) |
||||
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"]) |
||||
|
||||
|
||||
cls.importer = si.SampleImporter( |
||||
trajectory_list=trajectory_list, |
||||
variables=variables, |
||||
prior_net_structure=prior_net_structure |
||||
) |
||||
|
||||
cls.importer.import_data() |
||||
#cls.s1 = sp.SamplePath(cls.importer) |
||||
|
||||
#cls.traj = cls.s1.concatenated_samples |
||||
|
||||
# print(len(cls.traj)) |
||||
cls.s1 = sp.SamplePath(cls.importer) |
||||
cls.s1.build_trajectories() |
||||
cls.s1.build_structure() |
||||
#cls.s1.clear_memory() |
||||
|
||||
|
||||
|
||||
def test_structure(self): |
||||
true_edges = copy.deepcopy(self.s1.structure.edges) |
||||
true_edges = set(map(tuple, true_edges)) |
||||
|
||||
se1 = se.StructureScoreBasedEstimator(self.s1) |
||||
edges = se1.estimate_structure( |
||||
max_parents = None, |
||||
iterations_number = 100, |
||||
patience = 20, |
||||
tabu_length = 10, |
||||
tabu_rules_duration = 10, |
||||
optimizer = 'tabu', |
||||
disable_multiprocessing=False |
||||
) |
||||
|
||||
|
||||
self.assertEqual(edges, true_edges) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
||||
|
@ -0,0 +1,46 @@ |
||||
|
||||
import unittest |
||||
import numpy as np |
||||
|
||||
from ...PyCTBN.structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
||||
|
||||
|
||||
class TestConditionalIntensityMatrix(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls) -> None: |
||||
cls.state_res_times = np.random.rand(1, 3)[0] |
||||
cls.state_res_times = cls.state_res_times * 1000 |
||||
cls.state_transition_matrix = np.random.randint(1, 10000, (3, 3)) |
||||
for i in range(0, len(cls.state_res_times)): |
||||
cls.state_transition_matrix[i, i] = 0 |
||||
cls.state_transition_matrix[i, i] = np.sum(cls.state_transition_matrix[i]) |
||||
|
||||
def test_init(self): |
||||
c1 = ConditionalIntensityMatrix(self.state_res_times, self.state_transition_matrix) |
||||
self.assertTrue(np.array_equal(self.state_res_times, c1.state_residence_times)) |
||||
self.assertTrue(np.array_equal(self.state_transition_matrix, c1.state_transition_matrix)) |
||||
self.assertEqual(c1.cim.dtype, np.float) |
||||
self.assertEqual(self.state_transition_matrix.shape, c1.cim.shape) |
||||
|
||||
def test_compute_cim_coefficients(self): |
||||
c1 = ConditionalIntensityMatrix(self.state_res_times, self.state_transition_matrix) |
||||
c2 = self.state_transition_matrix.astype(np.float) |
||||
np.fill_diagonal(c2, c2.diagonal() * -1) |
||||
for i in range(0, len(self.state_res_times)): |
||||
for j in range(0, len(self.state_res_times)): |
||||
c2[i, j] = (c2[i, j] + 1) / (self.state_res_times[i] + 1) |
||||
c1.compute_cim_coefficients() |
||||
for i in range(0, len(c1.state_residence_times)): |
||||
self.assertTrue(np.isclose(np.sum(c1.cim[i]), 0.0, 1e-02, 1e-01)) |
||||
for i in range(0, len(self.state_res_times)): |
||||
for j in range(0, len(self.state_res_times)): |
||||
self.assertTrue(np.isclose(c1.cim[i, j], c2[i, j], 1e-02, 1e-01)) |
||||
|
||||
def test_repr(self): |
||||
c1 = ConditionalIntensityMatrix(self.state_res_times, self.state_transition_matrix) |
||||
print(c1) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,190 @@ |
||||
|
||||
import unittest |
||||
import glob |
||||
import os |
||||
import networkx as nx |
||||
import numpy as np |
||||
import itertools |
||||
|
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.structure_graph.network_graph import NetworkGraph |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
|
||||
|
||||
class TestNetworkGraph(unittest.TestCase): |
||||
@classmethod |
||||
def setUpClass(cls): |
||||
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json")) |
||||
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
cls.importer.import_data(0) |
||||
cls.s1 = SamplePath(cls.importer) |
||||
cls.s1.build_trajectories() |
||||
cls.s1.build_structure() |
||||
|
||||
def test_init(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
self.assertEqual(self.s1.structure, g1._graph_struct) |
||||
self.assertIsInstance(g1._graph, nx.DiGraph) |
||||
self.assertIsNone(g1.time_scalar_indexing_strucure) |
||||
self.assertIsNone(g1.transition_scalar_indexing_structure) |
||||
self.assertIsNone(g1.transition_filtering) |
||||
self.assertIsNone(g1.p_combs) |
||||
|
||||
def test_add_nodes(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
for n1, n2 in zip(g1.nodes, self.s1.structure.nodes_labels): |
||||
self.assertEqual(n1, n2) |
||||
|
||||
def test_add_edges(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for e in self.s1.structure.edges: |
||||
self.assertIn(tuple(e), g1.edges) |
||||
|
||||
def test_fast_init(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
for node in self.s1.structure.nodes_labels: |
||||
g1.fast_init(node) |
||||
self.assertIsNotNone(g1._graph.nodes) |
||||
self.assertIsNotNone(g1._graph.edges) |
||||
self.assertIsInstance(g1._time_scalar_indexing_structure, np.ndarray) |
||||
self.assertIsInstance(g1._transition_scalar_indexing_structure, np.ndarray) |
||||
self.assertIsInstance(g1._time_filtering, np.ndarray) |
||||
self.assertIsInstance(g1._transition_filtering, np.ndarray) |
||||
self.assertIsInstance(g1._p_combs_structure, np.ndarray) |
||||
self.assertIsInstance(g1._aggregated_info_about_nodes_parents, tuple) |
||||
|
||||
def test_get_ordered_by_indx_set_of_parents(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node in self.s1.structure.nodes_labels: |
||||
aggr_info = g1.get_ordered_by_indx_set_of_parents(node) |
||||
for indx in range(len(aggr_info[0]) - 1 ): |
||||
self.assertLess(g1.get_node_indx(aggr_info[0][indx]), g1.get_node_indx(aggr_info[0][indx + 1])) |
||||
for par, par_indx in zip(aggr_info[0], aggr_info[1]): |
||||
self.assertEqual(g1.get_node_indx(par), par_indx) |
||||
for par, par_val in zip(aggr_info[0], aggr_info[2]): |
||||
self.assertEqual(g1._graph_struct.get_states_number(par), par_val) |
||||
|
||||
def test_build_time_scalar_indexing_structure_for_a_node(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node in self.s1.structure.nodes_labels: |
||||
aggr_info = g1.get_ordered_by_indx_set_of_parents(node) |
||||
self.aux_build_time_scalar_indexing_structure_for_a_node(g1, node, aggr_info[1], |
||||
aggr_info[0], aggr_info[2]) |
||||
|
||||
def aux_build_time_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels, parents_vals): |
||||
node_states = graph.get_states_number(node_id) |
||||
time_scalar_indexing = NetworkGraph.build_time_scalar_indexing_structure_for_a_node(node_states, parents_vals) |
||||
self.assertEqual(len(time_scalar_indexing), len(parents_indxs) + 1) |
||||
merged_list = parents_labels[:] |
||||
merged_list.insert(0, node_id) |
||||
vals_list = [] |
||||
for node in merged_list: |
||||
vals_list.append(graph.get_states_number(node)) |
||||
t_vec = np.array(vals_list) |
||||
t_vec = t_vec.cumprod() |
||||
self.assertTrue(np.array_equal(time_scalar_indexing, t_vec)) |
||||
|
||||
def test_build_transition_scalar_indexing_structure_for_a_node(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node in self.s1.structure.nodes_labels: |
||||
aggr_info = g1.get_ordered_by_indx_set_of_parents(node) |
||||
self.aux_build_transition_scalar_indexing_structure_for_a_node(g1, node, aggr_info[1], |
||||
aggr_info[0], aggr_info[2]) |
||||
|
||||
def aux_build_transition_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels, |
||||
parents_values): |
||||
node_states = graph.get_states_number(node_id) |
||||
transition_scalar_indexing = graph.build_transition_scalar_indexing_structure_for_a_node(node_states, |
||||
parents_values) |
||||
self.assertEqual(len(transition_scalar_indexing), len(parents_indxs) + 2) |
||||
merged_list = parents_labels[:] |
||||
merged_list.insert(0, node_id) |
||||
merged_list.insert(0, node_id) |
||||
vals_list = [] |
||||
for node_id in merged_list: |
||||
vals_list.append(graph.get_states_number(node_id)) |
||||
m_vec = np.array([vals_list]) |
||||
m_vec = m_vec.cumprod() |
||||
self.assertTrue(np.array_equal(transition_scalar_indexing, m_vec)) |
||||
|
||||
def test_build_time_columns_filtering_structure_for_a_node(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node in self.s1.structure.nodes_labels: |
||||
aggr_info = g1.get_ordered_by_indx_set_of_parents(node) |
||||
self.aux_build_time_columns_filtering_structure_for_a_node(g1, node, aggr_info[1]) |
||||
|
||||
def aux_build_time_columns_filtering_structure_for_a_node(self, graph, node_id, p_indxs): |
||||
graph.build_time_columns_filtering_for_a_node(graph.get_node_indx(node_id), p_indxs) |
||||
single_filter = [] |
||||
single_filter.append(graph.get_node_indx(node_id)) |
||||
single_filter.extend(p_indxs) |
||||
self.assertTrue(np.array_equal(graph.build_time_columns_filtering_for_a_node(graph.get_node_indx(node_id), |
||||
p_indxs),np.array(single_filter))) |
||||
def test_build_transition_columns_filtering_structure(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node in self.s1.structure.nodes_labels: |
||||
aggr_info = g1.get_ordered_by_indx_set_of_parents(node) |
||||
self.aux_build_time_columns_filtering_structure_for_a_node(g1, node, aggr_info[1]) |
||||
|
||||
def aux_build_transition_columns_filtering_structure(self, graph, node_id, p_indxs): |
||||
single_filter = [] |
||||
single_filter.append(graph.get_node_indx(node_id) + graph._graph_struct.total_variables_number) |
||||
single_filter.append(graph.get_node_indx(node_id)) |
||||
single_filter.extend(p_indxs) |
||||
self.assertTrue(np.array_equal(graph.build_transition_filtering_for_a_node(graph.get_node_indx(node_id), |
||||
|
||||
p_indxs), np.array(single_filter))) |
||||
def test_build_p_combs_structure(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node in self.s1.structure.nodes_labels: |
||||
aggr_info = g1.get_ordered_by_indx_set_of_parents(node) |
||||
self.aux_build_p_combs_structure(g1, aggr_info[2]) |
||||
|
||||
def aux_build_p_combs_structure(self, graph, p_vals): |
||||
p_combs = graph.build_p_comb_structure_for_a_node(p_vals) |
||||
p_possible_vals = [] |
||||
for val in p_vals: |
||||
vals = [v for v in range(val)] |
||||
p_possible_vals.extend(vals) |
||||
comb_struct = set(itertools.product(p_possible_vals,repeat=len(p_vals))) |
||||
for comb in comb_struct: |
||||
self.assertIn(np.array(comb), p_combs) |
||||
|
||||
def test_get_parents_by_id(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node in g1.nodes: |
||||
self.assertListEqual(g1.get_parents_by_id(node), list(g1._graph.predecessors(node))) |
||||
|
||||
def test_get_states_number(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node, val in zip(g1.nodes, g1.nodes_values): |
||||
self.assertEqual(val, g1.get_states_number(node)) |
||||
|
||||
def test_get_node_indx(self): |
||||
g1 = NetworkGraph(self.s1.structure) |
||||
g1.add_nodes(self.s1.structure.nodes_labels) |
||||
g1.add_edges(self.s1.structure.edges) |
||||
for node, indx in zip(g1.nodes, g1.nodes_indexes): |
||||
self.assertEqual(indx, g1.get_node_indx(node)) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,72 @@ |
||||
|
||||
import unittest |
||||
import glob |
||||
import os |
||||
import random |
||||
|
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
from ...PyCTBN.structure_graph.trajectory import Trajectory |
||||
from ...PyCTBN.structure_graph.structure import Structure |
||||
|
||||
|
||||
class TestSamplePath(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls) -> None: |
||||
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json")) |
||||
|
||||
def test_init_not_initialized_importer(self): |
||||
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.assertRaises(RuntimeError, SamplePath, importer) |
||||
|
||||
def test_init_not_filled_dataframse(self): |
||||
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
importer.import_data(0) |
||||
importer.clear_concatenated_frame() |
||||
self.assertRaises(RuntimeError, SamplePath, importer) |
||||
|
||||
def test_init(self): |
||||
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
importer.import_data(0) |
||||
s1 = SamplePath(importer) |
||||
self.assertIsNone(s1.trajectories) |
||||
self.assertIsNone(s1.structure) |
||||
self.assertFalse(s1._importer.concatenated_samples.empty) |
||||
self.assertIsNone(s1._total_variables_count) |
||||
|
||||
def test_build_trajectories(self): |
||||
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
importer.import_data(0) |
||||
s1 = SamplePath(importer) |
||||
s1.build_trajectories() |
||||
self.assertIsInstance(s1.trajectories, Trajectory) |
||||
|
||||
def test_build_structure(self): |
||||
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
importer.import_data(0) |
||||
s1 = SamplePath(importer) |
||||
s1.build_structure() |
||||
self.assertIsInstance(s1.structure, Structure) |
||||
self.assertEqual(s1._total_variables_count, len(s1._importer.sorter)) |
||||
|
||||
def test_build_structure_bad_sorter(self): |
||||
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
importer.import_data(0) |
||||
s1 = SamplePath(importer) |
||||
random.shuffle(importer._sorter) |
||||
self.assertRaises(RuntimeError, s1.build_structure) |
||||
|
||||
def test_build_saplepath_no_prior_net_structure(self): |
||||
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
importer.import_data(0) |
||||
importer._df_structure = None |
||||
s1 = SamplePath(importer) |
||||
s1.build_trajectories() |
||||
s1.build_structure() |
||||
self.assertFalse(s1.structure.edges) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,133 @@ |
||||
|
||||
import unittest |
||||
import numpy as np |
||||
import itertools |
||||
|
||||
from ...PyCTBN.structure_graph.set_of_cims import SetOfCims |
||||
|
||||
|
||||
class TestSetOfCims(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls) -> None: |
||||
cls.node_id = 'X' |
||||
cls.possible_cardinalities = [2, 3] |
||||
cls.possible_states = [[0,1], [0, 1, 2]] |
||||
cls.node_states_number = range(2, 4) |
||||
|
||||
def test_init(self): |
||||
# empty parent set |
||||
for sn in self.node_states_number: |
||||
p_combs = self.build_p_comb_structure_for_a_node([]) |
||||
self.aux_test_init(self.node_id, [], sn, p_combs) |
||||
# one parent |
||||
for sn in self.node_states_number: |
||||
for p in itertools.product(self.possible_cardinalities, repeat=1): |
||||
p_combs = self.build_p_comb_structure_for_a_node(list(p)) |
||||
self.aux_test_init(self.node_id, list(p), sn, p_combs) |
||||
#two parents |
||||
for sn in self.node_states_number: |
||||
for p in itertools.product(self.possible_cardinalities, repeat=2): |
||||
p_combs = self.build_p_comb_structure_for_a_node(list(p)) |
||||
self.aux_test_init(self.node_id, list(p), sn, p_combs) |
||||
|
||||
def test_build_cims(self): |
||||
# empty parent set |
||||
for sn in self.node_states_number: |
||||
p_combs = self.build_p_comb_structure_for_a_node([]) |
||||
self.aux_test_build_cims(self.node_id, [], sn, p_combs) |
||||
# one parent |
||||
for sn in self.node_states_number: |
||||
for p in itertools.product(self.possible_cardinalities, repeat=1): |
||||
p_combs = self.build_p_comb_structure_for_a_node(list(p)) |
||||
self.aux_test_build_cims(self.node_id, list(p), sn, p_combs) |
||||
#two parents |
||||
for sn in self.node_states_number: |
||||
for p in itertools.product(self.possible_cardinalities, repeat=2): |
||||
p_combs = self.build_p_comb_structure_for_a_node(list(p)) |
||||
self.aux_test_build_cims(self.node_id, list(p), sn, p_combs) |
||||
|
||||
def test_filter_cims_with_mask(self): |
||||
p_combs = self.build_p_comb_structure_for_a_node(self.possible_cardinalities) |
||||
sofc1 = SetOfCims('X', self.possible_cardinalities, 3, p_combs) |
||||
state_res_times_list = [] |
||||
transition_matrices_list = [] |
||||
for i in range(len(p_combs)): |
||||
state_res_times = np.random.rand(1, 3)[0] |
||||
state_res_times = state_res_times * 1000 |
||||
state_transition_matrix = np.random.randint(1, 10000, (3, 3)) |
||||
state_res_times_list.append(state_res_times) |
||||
transition_matrices_list.append(state_transition_matrix) |
||||
sofc1.build_cims(np.array(state_res_times_list), np.array(transition_matrices_list)) |
||||
for length_of_mask in range(3): |
||||
for mask in list(itertools.permutations([True, False],r=length_of_mask)): |
||||
m = np.array(mask) |
||||
for parent_value in range(self.possible_cardinalities[0]): |
||||
cims = sofc1.filter_cims_with_mask(m, [parent_value]) |
||||
if length_of_mask == 0 or length_of_mask == 1: |
||||
self.assertTrue(np.array_equal(sofc1._actual_cims, cims)) |
||||
else: |
||||
indxs = self.another_filtering_method(p_combs, m, [parent_value]) |
||||
self.assertTrue(np.array_equal(cims, sofc1._actual_cims[indxs])) |
||||
|
||||
def aux_test_build_cims(self, node_id, p_values, node_states, p_combs): |
||||
state_res_times_list = [] |
||||
transition_matrices_list = [] |
||||
so1 = SetOfCims(node_id, p_values, node_states, p_combs) |
||||
for i in range(len(p_combs)): |
||||
state_res_times = np.random.rand(1, node_states)[0] |
||||
state_res_times = state_res_times * 1000 |
||||
state_transition_matrix = np.random.randint(1, 10000, (node_states, node_states)) |
||||
state_res_times_list.append(state_res_times) |
||||
transition_matrices_list.append(state_transition_matrix) |
||||
so1.build_cims(np.array(state_res_times_list), np.array(transition_matrices_list)) |
||||
self.assertEqual(len(state_res_times_list), so1.get_cims_number()) |
||||
self.assertIsInstance(so1._actual_cims, np.ndarray) |
||||
self.assertIsNone(so1._transition_matrices) |
||||
self.assertIsNone(so1._state_residence_times) |
||||
|
||||
def aux_test_init(self, node_id, parents_states_number, node_states_number, p_combs): |
||||
sofcims = SetOfCims(node_id, parents_states_number, node_states_number, p_combs) |
||||
self.assertEqual(sofcims._node_id, node_id) |
||||
self.assertTrue(np.array_equal(sofcims._p_combs, p_combs)) |
||||
self.assertTrue(np.array_equal(sofcims._parents_states_number, parents_states_number)) |
||||
self.assertEqual(sofcims._node_states_number, node_states_number) |
||||
self.assertFalse(sofcims._actual_cims) |
||||
self.assertEqual(sofcims._state_residence_times.shape[0], np.prod(np.array(parents_states_number))) |
||||
self.assertEqual(len(sofcims._state_residence_times[0]), node_states_number) |
||||
self.assertEqual(sofcims._transition_matrices.shape[0], np.prod(np.array(parents_states_number))) |
||||
self.assertEqual(len(sofcims._transition_matrices[0][0]), node_states_number) |
||||
|
||||
def build_p_comb_structure_for_a_node(self, parents_values): |
||||
""" |
||||
Builds the combinatory structure that contains the combinations of all the values contained in parents_values. |
||||
|
||||
Parameters: |
||||
parents_values: the cardinalities of the nodes |
||||
Returns: |
||||
a numpy matrix containing a grid of the combinations |
||||
""" |
||||
tmp = [] |
||||
for val in parents_values: |
||||
tmp.append([x for x in range(val)]) |
||||
if len(parents_values) > 0: |
||||
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) |
||||
if len(parents_values) > 1: |
||||
tmp_comb = parents_comb[:, 1].copy() |
||||
parents_comb[:, 1] = parents_comb[:, 0].copy() |
||||
parents_comb[:, 0] = tmp_comb |
||||
else: |
||||
parents_comb = np.array([[]], dtype=np.int) |
||||
return parents_comb |
||||
|
||||
def another_filtering_method(self,p_combs, mask, parent_value): |
||||
masked_combs = p_combs[:, mask] |
||||
indxs = [] |
||||
for indx, val in enumerate(masked_combs): |
||||
if val == parent_value: |
||||
indxs.append(indx) |
||||
return np.array(indxs) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,81 @@ |
||||
|
||||
import unittest |
||||
import numpy as np |
||||
from ...PyCTBN.structure_graph.structure import Structure |
||||
|
||||
|
||||
class TestStructure(unittest.TestCase): |
||||
@classmethod |
||||
def setUpClass(cls): |
||||
cls.labels = ['X','Y','Z'] |
||||
cls.indxs = np.array([0,1,2]) |
||||
cls.vals = np.array([3,3,3]) |
||||
cls.edges = [('X','Z'),('Y','Z'), ('Z','Y')] |
||||
cls.vars_numb = len(cls.labels) |
||||
|
||||
def test_init(self): |
||||
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) |
||||
self.assertListEqual(self.labels,s1.nodes_labels) |
||||
self.assertIsInstance(s1.nodes_indexes, np.ndarray) |
||||
self.assertTrue(np.array_equal(self.indxs, s1.nodes_indexes)) |
||||
self.assertIsInstance(s1.nodes_values, np.ndarray) |
||||
self.assertTrue(np.array_equal(self.vals, s1.nodes_values)) |
||||
self.assertListEqual(self.edges, s1.edges) |
||||
self.assertEqual(self.vars_numb, s1.total_variables_number) |
||||
|
||||
def test_get_node_id(self): |
||||
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) |
||||
for indx, var in enumerate(self.labels): |
||||
self.assertEqual(var, s1.get_node_id(indx)) |
||||
|
||||
def test_get_node_indx(self): |
||||
l2 = self.labels[:] |
||||
l2.remove('Y') |
||||
i2 = self.indxs.copy() |
||||
np.delete(i2, 1) |
||||
v2 = self.vals.copy() |
||||
np.delete(v2, 1) |
||||
e2 = [('X','Z')] |
||||
n2 = self.vars_numb - 1 |
||||
s1 = Structure(l2, i2, v2, e2, n2) |
||||
for indx, var in zip(i2, l2): |
||||
self.assertEqual(indx, s1.get_node_indx(var)) |
||||
|
||||
def test_get_positional_node_indx(self): |
||||
l2 = self.labels[:] |
||||
l2.remove('Y') |
||||
i2 = self.indxs.copy() |
||||
np.delete(i2, 1) |
||||
v2 = self.vals.copy() |
||||
np.delete(v2, 1) |
||||
e2 = [('X', 'Z')] |
||||
n2 = self.vars_numb - 1 |
||||
s1 = Structure(l2, i2, v2, e2, n2) |
||||
for indx, var in enumerate(s1.nodes_labels): |
||||
self.assertEqual(indx, s1.get_positional_node_indx(var)) |
||||
|
||||
def test_get_states_number(self): |
||||
l2 = self.labels[:] |
||||
l2.remove('Y') |
||||
i2 = self.indxs.copy() |
||||
np.delete(i2, 1) |
||||
v2 = self.vals.copy() |
||||
np.delete(v2, 1) |
||||
e2 = [('X', 'Z')] |
||||
n2 = self.vars_numb - 1 |
||||
s1 = Structure(l2, i2, v2, e2, n2) |
||||
for val, node in zip(v2, l2): |
||||
self.assertEqual(val, s1.get_states_number(node)) |
||||
|
||||
def test_equality(self): |
||||
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) |
||||
s2 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) |
||||
self.assertEqual(s1, s2) |
||||
|
||||
def test_repr(self): |
||||
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) |
||||
print(s1) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,27 @@ |
||||
|
||||
import unittest |
||||
import numpy as np |
||||
import glob |
||||
|
||||
from ...PyCTBN.structure_graph.trajectory import Trajectory |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
|
||||
class TestTrajectory(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls) -> None: |
||||
cls.read_files = glob.glob(os.path.join('./test_data', "*.json")) |
||||
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
cls.importer.import_data(0) |
||||
|
||||
def test_init(self): |
||||
t1 = Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples), |
||||
len(self.importer.sorter) + 1) |
||||
self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:, 0].to_numpy(), t1.times)) |
||||
self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:,1:].to_numpy(), t1.complete_trajectory)) |
||||
self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:, 1: len(self.importer.sorter) + 1], t1.trajectory)) |
||||
self.assertEqual(len(self.importer.sorter) + 1, t1._original_cols_number) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,57 @@ |
||||
|
||||
import unittest |
||||
import numpy as np |
||||
|
||||
from ...PyCTBN.utility.cache import Cache |
||||
from ...PyCTBN.structure_graph.set_of_cims import SetOfCims |
||||
|
||||
|
||||
class TestCache(unittest.TestCase): |
||||
|
||||
def test_init(self): |
||||
c1 = Cache() |
||||
self.assertFalse(c1._list_of_sets_of_parents) |
||||
self.assertFalse(c1._actual_cache) |
||||
|
||||
def test_put(self): |
||||
c1 = Cache() |
||||
pset1 = {'X', 'Y'} |
||||
sofc1 = SetOfCims('Z', [], 3, np.array([])) |
||||
c1.put(pset1, sofc1) |
||||
self.assertEqual(1, len(c1._actual_cache)) |
||||
self.assertEqual(1, len(c1._list_of_sets_of_parents)) |
||||
self.assertEqual(sofc1, c1._actual_cache[0]) |
||||
pset2 = {'X'} |
||||
sofc2 = SetOfCims('Z', [], 3, np.array([])) |
||||
c1.put(pset2, sofc2) |
||||
self.assertEqual(2, len(c1._actual_cache)) |
||||
self.assertEqual(2, len(c1._list_of_sets_of_parents)) |
||||
self.assertEqual(sofc2, c1._actual_cache[1]) |
||||
|
||||
def test_find(self): |
||||
c1 = Cache() |
||||
pset1 = {'X', 'Y'} |
||||
sofc1 = SetOfCims('Z', [], 3, np.array([])) |
||||
c1.put(pset1, sofc1) |
||||
self.assertEqual(1, len(c1._actual_cache)) |
||||
self.assertEqual(1, len(c1._list_of_sets_of_parents)) |
||||
self.assertIsInstance(c1.find(pset1), SetOfCims) |
||||
self.assertEqual(sofc1, c1.find(pset1)) |
||||
self.assertIsInstance(c1.find({'Y', 'X'}), SetOfCims) |
||||
self.assertEqual(sofc1, c1.find({'Y', 'X'})) |
||||
self.assertIsNone(c1.find({'X'})) |
||||
|
||||
def test_clear(self): |
||||
c1 = Cache() |
||||
pset1 = {'X', 'Y'} |
||||
sofc1 = SetOfCims('Z', [], 3, np.array([])) |
||||
c1.put(pset1, sofc1) |
||||
self.assertEqual(1, len(c1._actual_cache)) |
||||
self.assertEqual(1, len(c1._list_of_sets_of_parents)) |
||||
c1.clear() |
||||
self.assertFalse(c1._list_of_sets_of_parents) |
||||
self.assertFalse(c1._actual_cache) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,176 @@ |
||||
|
||||
import unittest |
||||
import os |
||||
import glob |
||||
import numpy as np |
||||
import pandas as pd |
||||
from ...PyCTBN.utility.json_importer import JsonImporter |
||||
|
||||
import json |
||||
|
||||
|
||||
|
||||
class TestJsonImporter(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls) -> None: |
||||
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json")) |
||||
|
||||
def test_init(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.assertEqual(j1._samples_label, 'samples') |
||||
self.assertEqual(j1._structure_label, 'dyn.str') |
||||
self.assertEqual(j1._variables_label, 'variables') |
||||
self.assertEqual(j1._time_key, 'Time') |
||||
self.assertEqual(j1._variables_key, 'Name') |
||||
self.assertEqual(j1._file_path, "./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") |
||||
self.assertIsNone(j1._df_samples_list) |
||||
self.assertIsNone(j1.variables) |
||||
self.assertIsNone(j1.structure) |
||||
self.assertEqual(j1.concatenated_samples,[]) |
||||
self.assertIsNone(j1.sorter) |
||||
self.assertIsNone(j1._array_indx) |
||||
self.assertIsInstance(j1._raw_data, list) |
||||
|
||||
def test_read_json_file_found(self): |
||||
data_set = {"key1": [1, 2, 3], "key2": [4, 5, 6]} |
||||
with open('data.json', 'w') as f: |
||||
json.dump(data_set, f) |
||||
path = os.getcwd() |
||||
path = path + '/data.json' |
||||
j1 = JsonImporter(path, '', '', '', '', '') |
||||
self.assertTrue(self.ordered(data_set) == self.ordered(j1._raw_data)) |
||||
os.remove('data.json') |
||||
|
||||
def test_read_json_file_not_found(self): |
||||
path = os.getcwd() |
||||
path = path + '/data.json' |
||||
self.assertRaises(FileNotFoundError, JsonImporter, path, '', '', '', '', '') |
||||
|
||||
def test_build_sorter(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label) |
||||
sorter = j1.build_sorter(df_samples_list[0]) |
||||
self.assertListEqual(sorter, list(df_samples_list[0].columns.values)[1:]) |
||||
|
||||
def test_normalize_trajectories(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label) |
||||
self.assertEqual(len(df_samples_list), len(j1._raw_data[0][j1._samples_label])) |
||||
|
||||
def test_normalize_trajectories_wrong_indx(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.assertRaises(IndexError, j1.normalize_trajectories, j1._raw_data, 474, j1._samples_label) |
||||
|
||||
def test_normalize_trajectories_wrong_key(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'sample', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.assertRaises(KeyError, j1.normalize_trajectories, j1._raw_data, 0, j1._samples_label) |
||||
|
||||
def test_compute_row_delta_single_samples_frame(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
j1._array_indx = 0 |
||||
j1._df_samples_list = j1.import_trajectories(j1._raw_data) |
||||
sample_frame = j1._df_samples_list[0] |
||||
original_copy = sample_frame.copy() |
||||
columns_header = list(sample_frame.columns.values) |
||||
shifted_cols_header = [s + "S" for s in columns_header[1:]] |
||||
new_sample_frame = j1.compute_row_delta_sigle_samples_frame(sample_frame, columns_header[1:], |
||||
shifted_cols_header) |
||||
self.assertEqual(len(list(sample_frame.columns.values)) + len(shifted_cols_header), |
||||
len(list(new_sample_frame.columns.values))) |
||||
self.assertEqual(sample_frame.shape[0] - 1, new_sample_frame.shape[0]) |
||||
for indx, row in new_sample_frame.iterrows(): |
||||
self.assertAlmostEqual(row['Time'], |
||||
original_copy.iloc[indx + 1]['Time'] - original_copy.iloc[indx]['Time']) |
||||
for indx, row in new_sample_frame.iterrows(): |
||||
np.array_equal(np.array(row[columns_header[1:]],dtype=int), |
||||
np.array(original_copy.iloc[indx][columns_header[1:]],dtype=int)) |
||||
np.array_equal(np.array(row[shifted_cols_header], dtype=int), |
||||
np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int)) |
||||
|
||||
def test_compute_row_delta_in_all_frames(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
j1._array_indx = 0 |
||||
j1._df_samples_list = j1.import_trajectories(j1._raw_data) |
||||
j1._sorter = j1.build_sorter(j1._df_samples_list[0]) |
||||
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list) |
||||
self.assertEqual(list(j1._df_samples_list[0].columns.values), |
||||
list(j1.concatenated_samples.columns.values)[:len(list(j1._df_samples_list[0].columns.values))]) |
||||
self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1._time_key) |
||||
|
||||
def test_compute_row_delta_in_all_frames_not_init_sorter(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
j1._array_indx = 0 |
||||
j1._df_samples_list = j1.import_trajectories(j1._raw_data) |
||||
self.assertRaises(RuntimeError, j1.compute_row_delta_in_all_samples_frames, j1._df_samples_list) |
||||
|
||||
def test_clear_data_frame_list(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
j1._array_indx = 0 |
||||
j1._df_samples_list = j1.import_trajectories(j1._raw_data) |
||||
j1._sorter = j1.build_sorter(j1._df_samples_list[0]) |
||||
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list) |
||||
j1.clear_data_frame_list() |
||||
for df in j1._df_samples_list: |
||||
self.assertTrue(df.empty) |
||||
|
||||
def test_clear_concatenated_frame(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
j1.import_data(0) |
||||
j1.clear_concatenated_frame() |
||||
self.assertTrue(j1.concatenated_samples.empty) |
||||
|
||||
def test_import_variables(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
sorter = ['X', 'Y', 'Z'] |
||||
raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}] |
||||
j1._array_indx = 0 |
||||
df_var = j1.import_variables(raw_data) |
||||
self.assertEqual(list(df_var[j1._variables_key]), sorter) |
||||
|
||||
def test_import_structure(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}] |
||||
j1._array_indx = 0 |
||||
df_struct = j1.import_structure(raw_data) |
||||
self.assertIsInstance(df_struct, pd.DataFrame) |
||||
|
||||
def test_import_sampled_cims(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
raw_data = j1.read_json_file() |
||||
j1._array_indx = 0 |
||||
j1._df_samples_list = j1.import_trajectories(raw_data) |
||||
j1._sorter = j1.build_sorter(j1._df_samples_list[0]) |
||||
cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims') |
||||
self.assertEqual(list(cims.keys()), j1.sorter) |
||||
|
||||
def test_dataset_id(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
array_indx = 0 |
||||
j1.import_data(array_indx) |
||||
self.assertEqual(array_indx, j1.dataset_id()) |
||||
|
||||
def test_file_path(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
self.assertEqual(j1.file_path, "./PyCTBN/data/networks_and_trajectories_binary_data_01_3.json") |
||||
|
||||
def test_import_data(self): |
||||
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') |
||||
j1.import_data(0) |
||||
self.assertEqual(list(j1.variables[j1._variables_key]), |
||||
list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1._variables_key]) + 1])) |
||||
print(j1.variables) |
||||
print(j1.structure) |
||||
print(j1.concatenated_samples) |
||||
|
||||
def ordered(self, obj): |
||||
if isinstance(obj, dict): |
||||
return sorted((k, self.ordered(v)) for k, v in obj.items()) |
||||
if isinstance(obj, list): |
||||
return sorted(self.ordered(x) for x in obj) |
||||
else: |
||||
return obj |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,80 @@ |
||||
|
||||
import unittest |
||||
import os |
||||
import glob |
||||
import numpy as np |
||||
import pandas as pd |
||||
from ...PyCTBN.utility.sample_importer import SampleImporter |
||||
from ...PyCTBN.structure_graph.sample_path import SamplePath |
||||
|
||||
import json |
||||
|
||||
|
||||
|
||||
class TestSampleImporter(unittest.TestCase): |
||||
|
||||
@classmethod |
||||
def setUpClass(cls) -> None: |
||||
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f: |
||||
raw_data = json.load(f) |
||||
|
||||
trajectory_list_raw= raw_data[0]["samples"] |
||||
|
||||
cls.trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw] |
||||
|
||||
cls.variables= pd.DataFrame(raw_data[0]["variables"]) |
||||
cls.prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"]) |
||||
|
||||
|
||||
def test_init(self): |
||||
sample_importer = SampleImporter( |
||||
trajectory_list=self.trajectory_list, |
||||
variables=self.variables, |
||||
prior_net_structure=self.prior_net_structure |
||||
) |
||||
|
||||
sample_importer.import_data() |
||||
|
||||
s1 = SamplePath(sample_importer) |
||||
s1.build_trajectories() |
||||
s1.build_structure() |
||||
s1.clear_memory() |
||||
|
||||
self.assertEqual(len(s1._importer._df_samples_list), 300) |
||||
self.assertIsInstance(s1._importer._df_samples_list,list) |
||||
self.assertIsInstance(s1._importer._df_samples_list[0],pd.DataFrame) |
||||
self.assertEqual(len(s1._importer._df_variables), 3) |
||||
self.assertIsInstance(s1._importer._df_variables,pd.DataFrame) |
||||
self.assertEqual(len(s1._importer._df_structure), 2) |
||||
self.assertIsInstance(s1._importer._df_structure,pd.DataFrame) |
||||
|
||||
def test_order(self): |
||||
sample_importer = SampleImporter( |
||||
trajectory_list=self.trajectory_list, |
||||
variables=self.variables, |
||||
prior_net_structure=self.prior_net_structure |
||||
) |
||||
|
||||
sample_importer.import_data() |
||||
|
||||
s1 = SamplePath(sample_importer) |
||||
s1.build_trajectories() |
||||
s1.build_structure() |
||||
s1.clear_memory() |
||||
|
||||
for count,var in enumerate(s1._importer._df_samples_list[0].columns[1:]): |
||||
self.assertEqual(s1._importer._sorter[count],var) |
||||
|
||||
|
||||
|
||||
def ordered(self, obj): |
||||
if isinstance(obj, dict): |
||||
return sorted((k, self.ordered(v)) for k, v in obj.items()) |
||||
if isinstance(obj, list): |
||||
return sorted(self.ordered(x) for x in obj) |
||||
else: |
||||
return obj |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
unittest.main() |
@ -0,0 +1,20 @@ |
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help: |
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile |
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile |
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in new issue