diff --git a/main_package/classes/estimators/fam_score_calculator.py b/main_package/classes/estimators/fam_score_calculator.py index 37902b0..f72f002 100644 --- a/main_package/classes/estimators/fam_score_calculator.py +++ b/main_package/classes/estimators/fam_score_calculator.py @@ -1,6 +1,3 @@ -import sys -sys.path.append('../') - import itertools import json @@ -15,9 +12,9 @@ from math import log from scipy.special import loggamma from random import choice -import structure_graph.set_of_cims as soCims -import structure_graph.network_graph as net_graph -import structure_graph.conditional_intensity_matrix as cim_class +from ..structure_graph.set_of_cims import SetOfCims +from ..structure_graph.network_graph import NetworkGraph +from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix ''' @@ -37,7 +34,7 @@ class FamScoreCalculator: # region theta def marginal_likelihood_theta(self, - cims: cim_class.ConditionalIntensityMatrix, + cims: ConditionalIntensityMatrix, alpha_xu: float, alpha_xxu: float): """ @@ -60,7 +57,7 @@ class FamScoreCalculator: for cim in cims]) def variable_cim_xu_marginal_likelihood_theta(self, - cim: cim_class.ConditionalIntensityMatrix, + cim: ConditionalIntensityMatrix, alpha_xu: float, alpha_xxu: float): """ @@ -91,7 +88,7 @@ class FamScoreCalculator: def single_cim_xu_marginal_likelihood_theta(self, index: int, - cim: cim_class.ConditionalIntensityMatrix, + cim: ConditionalIntensityMatrix, alpha_xu: float, alpha_xxu: float): """ @@ -168,7 +165,7 @@ class FamScoreCalculator: return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) def variable_cim_xu_marginal_likelihood_q(self, - cim: cim_class.ConditionalIntensityMatrix, + cim: ConditionalIntensityMatrix, tau_xu: float=0.1, alpha_xu: float=1): """ diff --git a/main_package/classes/estimators/parameters_estimator.py b/main_package/classes/estimators/parameters_estimator.py index ec3cfc6..4754d58 100644 --- a/main_package/classes/estimators/parameters_estimator.py +++ b/main_package/classes/estimators/parameters_estimator.py @@ -2,13 +2,12 @@ import sys sys.path.append('../') import numpy as np -import structure_graph.network_graph as ng -import structure_graph.sample_path as sp -import structure_graph.set_of_cims as sofc -import structure_graph.sets_of_cims_container as acims +from ..structure_graph.network_graph import NetworkGraph +from ..structure_graph.set_of_cims import SetOfCims +from ..structure_graph.trajectory import Trajectory -class ParametersEstimator: +class ParametersEstimator(object): """Has the task of computing the cims of particular node given the trajectories and the net structure in the graph ``_net_graph``. @@ -19,25 +18,24 @@ class ParametersEstimator: :_single_set_of_cims: the set of cims object that will hold the cims of the node """ - def __init__(self, sample_path: sp.SamplePath, net_graph: ng.NetworkGraph): + def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): """Constructor Method """ - self.sample_path = sample_path - self.net_graph = net_graph - self.sets_of_cims_struct = None - self.single_set_of_cims = None + self._trajectories = trajectories + self._net_graph = net_graph + self._single_set_of_cims = None - def fast_init(self, node_id: str): + def fast_init(self, node_id: str) -> None: """Initializes all the necessary structures for the parameters estimation for the node ``node_id``. :param node_id: the node label :type node_id: string """ - p_vals = self.net_graph.aggregated_info_about_nodes_parents[2] - node_states_number = self.net_graph.get_states_number(node_id) - self.single_set_of_cims = sofc.SetOfCims(node_id, p_vals, node_states_number, self.net_graph.p_combs) + p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] + node_states_number = self._net_graph.get_states_number(node_id) + self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) - def compute_parameters_for_node(self, node_id: str) -> sofc.SetOfCims: + def compute_parameters_for_node(self, node_id: str) -> SetOfCims: """Compute the CIMS of the node identified by the label ``node_id``. :param node_id: the node label @@ -45,25 +43,25 @@ class ParametersEstimator: :return: A SetOfCims object filled with the computed CIMS :rtype: SetOfCims """ - node_indx = self.net_graph.get_node_indx(node_id) - state_res_times = self.single_set_of_cims._state_residence_times - transition_matrices = self.single_set_of_cims._transition_matrices - trajectory = self.sample_path.trajectories.trajectory - self.compute_state_res_time_for_node(node_indx, self.sample_path.trajectories.times, - trajectory, - self.net_graph.time_filtering, - self.net_graph.time_scalar_indexing_strucure, + node_indx = self._net_graph.get_node_indx(node_id) + state_res_times = self._single_set_of_cims._state_residence_times + transition_matrices = self._single_set_of_cims._transition_matrices + ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, + self._trajectories.trajectory, + self._net_graph.time_filtering, + self._net_graph.time_scalar_indexing_strucure, state_res_times) - self.compute_state_transitions_for_a_node(node_indx, - self.sample_path.trajectories.complete_trajectory, - self.net_graph.transition_filtering, - self.net_graph.transition_scalar_indexing_structure, - transition_matrices) - self.single_set_of_cims.build_cims(state_res_times, transition_matrices) - return self.single_set_of_cims - - def compute_state_res_time_for_node(self, node_indx: int, times: np.ndarray, trajectory: np.ndarray, - cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, T: np.ndarray): + ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, + self._net_graph.transition_filtering, + self._net_graph.transition_scalar_indexing_structure, + transition_matrices) + self._single_set_of_cims.build_cims(state_res_times, transition_matrices) + return self._single_set_of_cims + + @staticmethod + def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, + cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, + T: np.ndarray) -> None: """Compute the state residence times for a node and fill the matrix ``T`` with the results :param node_indx: the index of the node @@ -84,7 +82,9 @@ class ParametersEstimator: times, minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) - def compute_state_transitions_for_a_node(self, node_indx, trajectory, cols_filter, scalar_indexing, M): + @staticmethod + def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, + scalar_indexing: np.ndarray, M: np.ndarray) -> None: """Compute the state residence times for a node and fill the matrices ``M`` with the results. :param node_indx: the index of the node @@ -101,8 +101,8 @@ class ParametersEstimator: diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], dtype=np.int64) trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] - M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int), - minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) + M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) + , minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) M_raveled = M.ravel() M_raveled[diag_indices] = 0 M_raveled[diag_indices] = np.sum(M, axis=2).ravel() diff --git a/main_package/classes/estimators/parameters_estimator.py.bak b/main_package/classes/estimators/parameters_estimator.py.bak new file mode 100644 index 0000000..2805819 --- /dev/null +++ b/main_package/classes/estimators/parameters_estimator.py.bak @@ -0,0 +1,143 @@ +import sys +sys.path.append('../') +import numpy as np + +from ..structure_graph.network_graph import NetworkGraph +from ..structure_graph.sample_path import SetOfCims +from ..structure_graph.trajectory import Trajectory + + +class ParametersEstimator(object): + """Has the task of computing the cims of particular node given the trajectories and the net structure + in the graph ``_net_graph``. + + :param trajectories: the trajectories + :type trajectories: Trajectory + :param net_graph: the net structure + :type net_graph: NetworkGraph + :_single_set_of_cims: the set of cims object that will hold the cims of the node + """ + + def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): + """Constructor Method + """ + self._trajectories = trajectories + self._net_graph = net_graph + self._single_set_of_cims = None + + def fast_init(self, node_id: str) -> None: + """Initializes all the necessary structures for the parameters estimation for the node ``node_id``. + + :param node_id: the node label + :type node_id: string + """ + p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] + node_states_number = self._net_graph.get_states_number(node_id) + self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) + + def compute_parameters_for_node(self, node_id: str) -> SetOfCims: + """Compute the CIMS of the node identified by the label ``node_id``. + + :param node_id: the node label + :type node_id: string + :return: A SetOfCims object filled with the computed CIMS + :rtype: SetOfCims + """ + node_indx = self._net_graph.get_node_indx(node_id) + state_res_times = self._single_set_of_cims._state_residence_times + transition_matrices = self._single_set_of_cims._transition_matrices + ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, + self._trajectories.trajectory, + self._net_graph.time_filtering, + self._net_graph.time_scalar_indexing_strucure, + state_res_times) + ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, + self._net_graph.transition_filtering, + self._net_graph.transition_scalar_indexing_structure, + transition_matrices) + self._single_set_of_cims.build_cims(state_res_times, transition_matrices) + return self._single_set_of_cims + + @staticmethod + def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, + cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, + T: np.ndarray) -> None: + """Compute the state residence times for a node and fill the matrix ``T`` with the results + + :param node_indx: the index of the node + :type node_indx: int + :param times: the times deltas vector + :type times: numpy.array + :param trajectory: the trajectory + :type trajectory: numpy.ndArray + :param cols_filter: the columns filtering structure + :type cols_filter: numpy.array + :param scalar_indexes_struct: the indexing structure + :type scalar_indexes_struct: numpy.array + :param T: the state residence times vectors + :type T: numpy.ndArray + """ + T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) + .astype(np.int), \ + times, + minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) + + @staticmethod + def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, + scalar_indexing: np.ndarray, M: np.ndarray) -> None: + """Compute the state residence times for a node and fill the matrices ``M`` with the results. + + :param node_indx: the index of the node + :type node_indx: int + :param trajectory: the trajectory + :type trajectory: numpy.ndArray + :param cols_filter: the columns filtering structure + :type cols_filter: numpy.array + :param scalar_indexing: the indexing structure + :type scalar_indexing: numpy.array + :param M: the state transitions matrices + :type M: numpy.ndArray + """ + diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], + dtype=np.int64) + trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] + M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) + , minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) + M_raveled = M.ravel() + M_raveled[diag_indices] = 0 + M_raveled[diag_indices] = np.sum(M, axis=2).ravel() + + def init_sets_cims_container(self): + self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, + self.net_graph.nodes_values, + self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), + self.net_graph.p_combs) + + def compute_parameters(self): + #print(self.net_graph.get_nodes()) + #print(self.amalgamated_cims_struct.sets_of_cims) + #enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) + for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): + #print(self.net_graph.time_filtering[indx]) + #print(self.net_graph.time_scalar_indexing_strucure[indx]) + self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, + self.sample_path.trajectories.trajectory, + self.net_graph.time_filtering[indx], + self.net_graph.time_scalar_indexing_strucure[indx], + aggr[1]._state_residence_times) + #print(self.net_graph.transition_filtering[indx]) + #print(self.net_graph.transition_scalar_indexing_structure[indx]) + self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), + self.sample_path.trajectories.complete_trajectory, + self.net_graph.transition_filtering[indx], + self.net_graph.transition_scalar_indexing_structure[indx], + aggr[1]._transition_matrices) + aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) + + + + + + + + diff --git a/main_package/classes/estimators/structure_constraint_based_estimator.py b/main_package/classes/estimators/structure_constraint_based_estimator.py index a78b44f..11ff5ad 100644 --- a/main_package/classes/estimators/structure_constraint_based_estimator.py +++ b/main_package/classes/estimators/structure_constraint_based_estimator.py @@ -1,5 +1,4 @@ -import sys -sys.path.append('../') + import itertools import json import typing @@ -7,43 +6,54 @@ import typing import networkx as nx import numpy as np from networkx.readwrite import json_graph +import os from scipy.stats import chi2 as chi2_dist from scipy.stats import f as f_dist +from tqdm import tqdm -import utility.cache as ch -import structure_graph.conditional_intensity_matrix as condim -import structure_graph.network_graph as ng -import estimators.parameters_estimator as pe -import estimators.structure_estimator as se -import structure_graph.sample_path as sp -import structure_graph.structure as st -import optimizers.constraint_based_optimizer as optimizer +from ..utility.cache import Cache +from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix +from ..structure_graph.network_graph import NetworkGraph +from .parameters_estimator import ParametersEstimator +from .structure_estimator import StructureEstimator +from ..structure_graph.sample_path import SamplePath +from ..structure_graph.structure import Structure +from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer import concurrent.futures -from utility.decorators import timing,timing_write + import multiprocessing from multiprocessing import Pool -from multiprocessing import get_context -class StructureConstraintBasedEstimator(se.StructureEstimator): +class StructureConstraintBasedEstimator(StructureEstimator): """ - Has the task of estimating the network structure given the trajectories in samplepath. - - :exp_test_sign: the significance level for the exponential Hp test - :chi_test_alfa: the significance level for the chi Hp test + Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. + + :param sample_path: the _sample_path object containing the trajectories and the real structure + :type sample_path: SamplePath + :param exp_test_alfa: the significance level for the exponential Hp test + :type exp_test_alfa: float + :param chi_test_alfa: the significance level for the chi Hp test + :type chi_test_alfa: float + :_nodes: the nodes labels + :_nodes_vals: the nodes cardinalities + :_nodes_indxs: the nodes indexes + :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` + :_cache: the Cache object """ - def __init__(self, sample_path: sp.SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= []): + def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): super().__init__(sample_path,known_edges) - self.exp_test_sign = exp_test_alfa - self.chi_test_alfa = chi_test_alfa - + self._exp_test_sign = exp_test_alfa + self._chi_test_alfa = chi_test_alfa + self._thumb_threshold = thumb_threshold + self._cache = Cache() def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, - tot_vars_count: int): + tot_vars_count: int, parent_indx, child_indx) -> bool: """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} G2 = {G1 U test_parent} (added as an additional parent of the test_child). Generates all the necessary structures and datas to perform the tests. @@ -61,93 +71,57 @@ class StructureConstraintBasedEstimator(se.StructureEstimator): :return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise :rtype: bool """ - #print("Test Parent:", test_parent) - #print("Sep Set", parent_set) p_set = parent_set[:] complete_info = parent_set[:] complete_info.append(test_child) parents = np.array(parent_set) parents = np.append(parents, test_parent) - #print("PARENTS", parents) - #parents.sort() - sorted_parents = self.nodes[np.isin(self.nodes, parents)] - #print("SORTED PARENTS", sorted_parents) + sorted_parents = self._nodes[np.isin(self._nodes, parents)] cims_filter = sorted_parents != test_parent - #print("PARENTS NO FROM MASK", cims_filter) - #if not p_set: - #print("EMPTY PSET TRYING TO FIND", test_child) - #sofc1 = self.cache.find(test_child) - #else: - sofc1 = self.cache.find(set(p_set)) - if not sofc1: - #print("CACHE MISSS SOFC1") - bool_mask1 = np.isin(self.nodes,complete_info) - #print("Bool mask 1", bool_mask1) - l1 = list(self.nodes[bool_mask1]) - #print("L1", l1) - indxs1 = self.nodes_indxs[bool_mask1] - #print("INDXS 1", indxs1) - vals1 = self.nodes_vals[bool_mask1] - eds1 = list(itertools.product(parent_set,test_child)) - s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count) - g1 = ng.NetworkGraph(s1) - g1.fast_init(test_child) - p1 = pe.ParametersEstimator(self._sample_path, g1) - p1.fast_init(test_child) - sofc1 = p1.compute_parameters_for_node(test_child) - #if not p_set: - #self.cache.put(test_child, sofc1) - #else: - self.cache.put(set(p_set), sofc1) - sofc2 = None - #p_set.append(test_parent) p_set.insert(0, test_parent) - if p_set: - #print("FULL PSET TRYING TO FIND", p_set) - #p_set.append(test_parent) - #print("PSET ", p_set) - #set_p_set = set(p_set) - sofc2 = self.cache.find(set(p_set)) - #if sofc2: - #print("Sofc2 in CACHE ", sofc2.actual_cims) - #print(self.cache.list_of_sets_of_indxs) + sofc2 = self._cache.find(set(p_set)) + if not sofc2: - #print("Cache MISSS SOFC2") complete_info.append(test_parent) - bool_mask2 = np.isin(self.nodes, complete_info) - #print("BOOL MASK 2",bool_mask2) - l2 = list(self.nodes[bool_mask2]) - #print("L2", l2) - indxs2 = self.nodes_indxs[bool_mask2] - #print("INDXS 2", indxs2) - vals2 = self.nodes_vals[bool_mask2] + bool_mask2 = np.isin(self._nodes, complete_info) + l2 = list(self._nodes[bool_mask2]) + indxs2 = self._nodes_indxs[bool_mask2] + vals2 = self._nodes_vals[bool_mask2] eds2 = list(itertools.product(p_set, test_child)) - s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count) - g2 = ng.NetworkGraph(s2) + s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) + g2 = NetworkGraph(s2) g2.fast_init(test_child) - p2 = pe.ParametersEstimator(self._sample_path, g2) + p2 = ParametersEstimator(self._sample_path.trajectories, g2) p2.fast_init(test_child) sofc2 = p2.compute_parameters_for_node(test_child) - self.cache.put(set(p_set), sofc2) + self._cache.put(set(p_set), sofc2) + + del p_set[0] + sofc1 = self._cache.find(set(p_set)) + if not sofc1: + g2.remove_node(test_parent) + g2.fast_init(test_child) + p2 = ParametersEstimator(self._sample_path.trajectories, g2) + p2.fast_init(test_child) + sofc1 = p2.compute_parameters_for_node(test_child) + self._cache.put(set(p_set), sofc1) + thumb_value = 0.0 + if child_states_numb > 2: + parent_val = self._sample_path.structure.get_states_number(test_parent) + bool_mask_vals = np.isin(self._nodes, parent_set) + parents_vals = self._nodes_vals[bool_mask_vals] + thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): - #print("GETTING THIS P COMB", p_comb) - #if len(parent_set) > 1: cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) - #else: - #cond_cims = sofc2.actual_cims - #print("COnd Cims", cond_cims) for cim2 in cond_cims: - #cim2 = sofc2.actual_cims[j] - #print(indx) - #print("Run Test", i, j) - if not self.independence_test(child_states_numb, cim1, cim2): + if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): return False return True - def independence_test(self, child_states_numb: int, cim1: condim.ConditionalIntensityMatrix, - cim2: condim.ConditionalIntensityMatrix): + def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, + cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: """Compute the actual independence test using two cims. It is performed first the exponential test and if the null hypothesis is not rejected, it is performed also the chi_test. @@ -167,48 +141,54 @@ class StructureConstraintBasedEstimator(se.StructureEstimator): r2s = M2.diagonal() C1 = cim1.cim C2 = cim2.cim + if child_states_numb > 2: + if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: + self._removable_edges_matrix[parent_indx][child_indx] = False + return False F_stats = C2.diagonal() / C1.diagonal() - exp_alfa = self.exp_test_sign + exp_alfa = self._exp_test_sign for val in range(0, child_states_numb): if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): - #print("CONDITIONALLY DEPENDENT EXP") return False - #M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix) - #M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix) M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( M2.shape[0], -1) - chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1) - """ - Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal()) - Ls = np.reciprocal(Ks) - chi_stats = np.sum((np.power((M2_no_diag.T * Ks).T - (M1_no_diag.T * Ls).T, 2) \ - / (M1_no_diag + M2_no_diag)), axis=1)""" + chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) Ks = np.sqrt(r1s / r2s) Ls = np.sqrt(r2s / r1s) for val in range(0, child_states_numb): - #K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val]) - #L = 1 / K Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / (M1_no_diag[val] + M2_no_diag[val])) - - #print("Chi Stats", Chi) - #print("Chi Quantile", chi_2_quantile) if Chi > chi_2_quantile: - #if np.any(chi_stats > chi_2_quantile): - #print("CONDITIONALLY DEPENDENT CHI") return False - #print("Chi test", Chi) return True - - def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int): + + def compute_thumb_value(self, parent_val, child_val, parent_set_vals): + """Compute the value to test against the thumb_threshold. + + :param parent_val: test parent's variable cardinality + :type parent_val: int + :param child_val: test child's variable cardinality + :type child_val: int + :param parent_set_vals: the cardinalities of the nodes in the current sep-set + :type parent_set_vals: List + :return: the thumb value for the current independence test + :rtype: int + """ + df = (child_val - 1) ** 2 + df = df * parent_val + for v in parent_set_vals: + df = df * v + return df + + def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. :param var_id: the node label of the test child :type var_id: string """ - optimizer_obj = optimizer.ConstraintBasedOptimizer( + optimizer_obj = ConstraintBasedOptimizer( node_id = var_id, structure_estimator = self, tot_vars_count = tot_vars_count) @@ -226,7 +206,7 @@ class StructureConstraintBasedEstimator(se.StructureEstimator): ctpc_algo = self.one_iteration_of_CTPC_algorithm total_vars_numb = self._sample_path.total_variables_count - n_nodes= len(self.nodes) + n_nodes= len(self._nodes) total_vars_numb_array = [total_vars_numb] * n_nodes @@ -244,18 +224,17 @@ class StructureConstraintBasedEstimator(se.StructureEstimator): if disable_multiprocessing: print("DISABILITATO") cpu_count = 1 - list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self.nodes] + list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] else: with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: list_edges_partial = executor.map(ctpc_algo, - self.nodes, + self._nodes, total_vars_numb_array) - #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self.nodes] + #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] return set(itertools.chain.from_iterable(list_edges_partial)) - @timing def estimate_structure(self,disable_multiprocessing:bool=False): return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) diff --git a/main_package/classes/estimators/structure_constraint_based_estimator.py.bak b/main_package/classes/estimators/structure_constraint_based_estimator.py.bak new file mode 100644 index 0000000..558a625 --- /dev/null +++ b/main_package/classes/estimators/structure_constraint_based_estimator.py.bak @@ -0,0 +1,245 @@ +import sys +sys.path.append('../') +import itertools +import json +import typing + +import networkx as nx +import numpy as np +from networkx.readwrite import json_graph +import os +from scipy.stats import chi2 as chi2_dist +from scipy.stats import f as f_dist +from tqdm import tqdm + +from ..utility.cache as ch +from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix +from ..structure_graph.network_graph import NetworkGraph +from .parameters_estimator import ParametersEstimator +from .structure_estimator import StructureEstimator +from ..structure_graph.sample_path import SamplePath +from ..structure_graph.structure import Structure +from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer + +import concurrent.futures + +from utility.decorators import timing,timing_write + +import multiprocessing +from multiprocessing import Pool + + +class StructureConstraintBasedEstimator(se.StructureEstimator): + """ + Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. + + :param sample_path: the _sample_path object containing the trajectories and the real structure + :type sample_path: SamplePath + :param exp_test_alfa: the significance level for the exponential Hp test + :type exp_test_alfa: float + :param chi_test_alfa: the significance level for the chi Hp test + :type chi_test_alfa: float + :_nodes: the nodes labels + :_nodes_vals: the nodes cardinalities + :_nodes_indxs: the nodes indexes + :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` + :_cache: the Cache object + """ + + def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): + super().__init__(sample_path,known_edges) + self._exp_test_sign = exp_test_alfa + self._chi_test_alfa = chi_test_alfa + self._thumb_threshold = thumb_threshold + tot_vars_count: int, parent_indx, child_indx) -> bool: + + def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, + tot_vars_count: int, parent_indx, child_indx) -> bool: + """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} + G2 = {G1 U test_parent} (added as an additional parent of the test_child). + Generates all the necessary structures and datas to perform the tests. + + :param test_parent: the node label of the test parent + :type test_parent: string + :param test_child: the node label of the child + :type test_child: string + :param parent_set: the common parent set + :type parent_set: List + :param child_states_numb: the cardinality of the ``test_child`` + :type child_states_numb: int + :param tot_vars_count: the total number of variables in the net + :type tot_vars_count: int + :return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise + :rtype: bool + """ + p_set = parent_set[:] + complete_info = parent_set[:] + complete_info.append(test_child) + + parents = np.array(parent_set) + parents = np.append(parents, test_parent) + sorted_parents = self._nodes[np.isin(self._nodes, parents)] + cims_filter = sorted_parents != test_parent + + p_set.insert(0, test_parent) + sofc2 = self._cache.find(set(p_set)) + + if not sofc2: + complete_info.append(test_parent) + bool_mask2 = np.isin(self._nodes, complete_info) + l2 = list(self._nodes[bool_mask2]) + indxs2 = self._nodes_indxs[bool_mask2] + vals2 = self._nodes_vals[bool_mask2] + eds2 = list(itertools.product(p_set, test_child)) + s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) + g2 = NetworkGraph(s2) + g2.fast_init(test_child) + p2 = ParametersEstimator(self._sample_path.trajectories, g2) + p2.fast_init(test_child) + sofc2 = p2.compute_parameters_for_node(test_child) + self._cache.put(set(p_set), sofc2) + + del p_set[0] + sofc1 = self._cache.find(set(p_set)) + if not sofc1: + g2.remove_node(test_parent) + g2.fast_init(test_child) + p2 = ParametersEstimator(self._sample_path.trajectories, g2) + p2.fast_init(test_child) + sofc1 = p2.compute_parameters_for_node(test_child) + self._cache.put(set(p_set), sofc1) + thumb_value = 0.0 + if child_states_numb > 2: + parent_val = self._sample_path.structure.get_states_number(test_parent) + bool_mask_vals = np.isin(self._nodes, parent_set) + parents_vals = self._nodes_vals[bool_mask_vals] + thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) + for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): + cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) + for cim2 in cond_cims: + if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): + return False + return True + + def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, + cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: + """Compute the actual independence test using two cims. + It is performed first the exponential test and if the null hypothesis is not rejected, + it is performed also the chi_test. + + :param child_states_numb: the cardinality of the test child + :type child_states_numb: int + :param cim1: a cim belonging to the graph without test parent + :type cim1: ConditionalIntensityMatrix + :param cim2: a cim belonging to the graph with test parent + :type cim2: ConditionalIntensityMatrix + :return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. + :rtype: bool + """ + M1 = cim1.state_transition_matrix + M2 = cim2.state_transition_matrix + r1s = M1.diagonal() + r2s = M2.diagonal() + C1 = cim1.cim + C2 = cim2.cim + if child_states_numb > 2: + if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: + self._removable_edges_matrix[parent_indx][child_indx] = False + return False + F_stats = C2.diagonal() / C1.diagonal() + exp_alfa = self._exp_test_sign + for val in range(0, child_states_numb): + if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ + F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): + return False + M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) + M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( + M2.shape[0], -1) + chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) + Ks = np.sqrt(r1s / r2s) + Ls = np.sqrt(r2s / r1s) + for val in range(0, child_states_numb): + Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / + (M1_no_diag[val] + M2_no_diag[val])) + if Chi > chi_2_quantile: + return False + return True + + def compute_thumb_value(self, parent_val, child_val, parent_set_vals): + """Compute the value to test against the thumb_threshold. + + :param parent_val: test parent's variable cardinality + :type parent_val: int + :param child_val: test child's variable cardinality + :type child_val: int + :param parent_set_vals: the cardinalities of the nodes in the current sep-set + :type parent_set_vals: List + :return: the thumb value for the current independence test + :rtype: int + """ + df = (child_val - 1) ** 2 + df = df * parent_val + for v in parent_set_vals: + df = df * v + return df + + def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: + """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. + + :param var_id: the node label of the test child + :type var_id: string + """ + optimizer_obj = optimizer.ConstraintBasedOptimizer( + node_id = var_id, + structure_estimator = self, + tot_vars_count = tot_vars_count) + return optimizer_obj.optimize_structure() + + + def ctpc_algorithm(self,disable_multiprocessing:bool= False ): + """ + Compute the CTPC algorithm. + Parameters: + void + Returns: + void + """ + ctpc_algo = self.one_iteration_of_CTPC_algorithm + total_vars_numb = self._sample_path.total_variables_count + + n_nodes= len(self.nodes) + + total_vars_numb_array = [total_vars_numb] * n_nodes + + 'get the number of CPU' + cpu_count = multiprocessing.cpu_count() + + + + 'Remove all the edges from the structure' + self._sample_path.structure.clean_structure_edges() + + 'Estimate the best parents for each node' + #with multiprocessing.Pool(processes=cpu_count) as pool: + #with get_context("spawn").Pool(processes=cpu_count) as pool: + if disable_multiprocessing: + print("DISABILITATO") + cpu_count = 1 + list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self.nodes] + else: + with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: + list_edges_partial = executor.map(ctpc_algo, + self.nodes, + total_vars_numb_array) + #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self.nodes] + + return set(itertools.chain.from_iterable(list_edges_partial)) + + + @timing + def estimate_structure(self,disable_multiprocessing:bool=False): + return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) + + + + diff --git a/main_package/classes/estimators/structure_estimator.py b/main_package/classes/estimators/structure_estimator.py index c3f5646..fbf8ea9 100644 --- a/main_package/classes/estimators/structure_estimator.py +++ b/main_package/classes/estimators/structure_estimator.py @@ -1,9 +1,9 @@ -import sys -sys.path.append('../') + import itertools import json import typing +import matplotlib.pyplot as plt import networkx as nx import numpy as np from networkx.readwrite import json_graph @@ -12,33 +12,33 @@ from abc import ABC import abc -import utility.cache as ch -import structure_graph.conditional_intensity_matrix as condim -import structure_graph.network_graph as ng -import estimators.parameters_estimator as pe -import structure_graph.sample_path as sp -import structure_graph.structure as st +from ..utility.cache import Cache +from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix +from ..structure_graph.network_graph import NetworkGraph +from .parameters_estimator import ParametersEstimator +from ..structure_graph.sample_path import SamplePath +from ..structure_graph.structure import Structure -class StructureEstimator(ABC): - """ - Has the task of estimating the network structure given the trajectories in samplepath. +class StructureEstimator(object): + """Has the task of estimating the network structure given the trajectories in ``samplepath``. :param sample_path: the _sample_path object containing the trajectories and the real structure :type sample_path: SamplePath - :param known_edges: List of known edges - :type known_edges: List - + :_nodes: the nodes labels + :_nodes_vals: the nodes cardinalities + :_nodes_indxs: the nodes indexes + :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` """ - def __init__(self, sample_path: sp.SamplePath, known_edges: typing.List = None): + def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): self._sample_path = sample_path - self.nodes = np.array(self._sample_path.structure.nodes_labels) - self.nodes_vals = self._sample_path.structure.nodes_values - self.nodes_indxs = self._sample_path.structure.nodes_indexes + self._nodes = np.array(self._sample_path.structure.nodes_labels) + self._nodes_vals = self._sample_path.structure.nodes_values + self._nodes_indxs = self._sample_path.structure.nodes_indexes self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) - self.complete_graph = self.build_complete_graph(self._sample_path.structure.nodes_labels) - self.cache = ch.Cache() + self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) + def build_removable_edges_matrix(self, known_edges: typing.List): """Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: @@ -57,7 +57,8 @@ class StructureEstimator(ABC): complete_adj_matrix[i][j] = False return complete_adj_matrix - def build_complete_graph(self, node_ids: typing.List): + @staticmethod + def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: """Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: :param node_ids: the list of nodes labels @@ -71,7 +72,8 @@ class StructureEstimator(ABC): return complete_graph - def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str): + @staticmethod + def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): """Creates a list containing all possible subsets of the list ``u`` of size ``size``, that do not contains a the node identified by ``parent_label``. @@ -88,15 +90,17 @@ class StructureEstimator(ABC): list_without_test_parent.remove(parent_label) return map(list, itertools.combinations(list_without_test_parent, size)) - def save_results(self): + def save_results(self) -> None: """Save the estimated Structure to a .json file in the path where the data are loaded from. The file is named as the input dataset but the `results_` word is appended to the results file. """ - res = json_graph.node_link_data(self.complete_graph) - name = self._sample_path.importer.file_path.rsplit('/',1)[-1] - #print(name) - name = '../results_' + name - with open(name, 'w+') as f: + res = json_graph.node_link_data(self._complete_graph) + name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] + name = name.split('.', 1)[0] + name += '_' + str(self._sample_path._importer.dataset_id()) + name += '.json' + file_name = 'results_' + name + with open(file_name, 'w') as f: json.dump(res, f) @@ -177,3 +181,7 @@ class StructureEstimator(ABC): plt.clf() print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) + + + + diff --git a/main_package/classes/estimators/structure_estimator.py.bak b/main_package/classes/estimators/structure_estimator.py.bak new file mode 100644 index 0000000..36fcb04 --- /dev/null +++ b/main_package/classes/estimators/structure_estimator.py.bak @@ -0,0 +1,189 @@ +import sys +sys.path.append('../') +import itertools +import json +import typing + +import matplotlib.pyplot as plt +import networkx as nx +import numpy as np +from networkx.readwrite import json_graph + +from abc import ABC + +import abc + +import ..utility.cache as ch +import ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix +import ..structure_graph.network_graph import NetworkGraph +import .parameters_estimator import ParametersEstimator +import ..structure_graph.sample_path import SamplePath +from ..structure_graph.structure import Structure + + +class StructureEstimator(object): + """Has the task of estimating the network structure given the trajectories in ``samplepath``. + + :param sample_path: the _sample_path object containing the trajectories and the real structure + :type sample_path: SamplePath + :_nodes: the nodes labels + :_nodes_vals: the nodes cardinalities + :_nodes_indxs: the nodes indexes + :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` + """ + + def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): + self._sample_path = sample_path + self.nodes = np.array(self._sample_path.structure.nodes_labels) + self.nodes_vals = self._sample_path.structure.nodes_values + self.nodes_indxs = self._sample_path.structure.nodes_indexes + self._removable_edges_matrix = self. + (known_edges) + self.complete_graph = self.build_complete_graph(self._sample_path.structure.nodes_labels) + self.cache = ch.Cache() + + def build_removable_edges_matrix(self, known_edges: typing.List): + """Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: + + :param known_edges: the list of nodes labels + :type known_edges: List + :return: a boolean matrix + :rtype: np.ndarray + """ + tot_vars_count = self._sample_path.total_variables_count + complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) + if known_edges: + for edge in known_edges: + i = self._sample_path.structure.get_node_indx(edge[0]) + j = self._sample_path.structure.get_node_indx(edge[1]) + complete_adj_matrix[i][j] = False + return complete_adj_matrix + + @staticmethod + def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: + """Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: + + :param node_ids: the list of nodes labels + :type node_ids: List + :return: a complete Digraph Object + :rtype: networkx.DiGraph + """ + complete_graph = nx.DiGraph() + complete_graph.add_nodes_from(node_ids) + complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) + return complete_graph + + + + def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str): + """Creates a list containing all possible subsets of the list ``u`` of size ``size``, + that do not contains a the node identified by ``parent_label``. + + :param u: the list of nodes + :type u: List + :param size: the size of the subsets + :type size: int + :param parent_label: the node to exclude in the subsets generation + :type parent_label: string + :return: an Iterator Object containing a list of lists + :rtype: Iterator + """ + list_without_test_parent = u[:] + list_without_test_parent.remove(parent_label) + return map(list, itertools.combinations(list_without_test_parent, size)) + + def save_results(self) -> None: + """Save the estimated Structure to a .json file in the path where the data are loaded from. + The file is named as the input dataset but the `results_` word is appended to the results file. + """ + res = json_graph.node_link_data(self._complete_graph) + name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] + name = name.split('.', 1)[0] + name += '_' + str(self._sample_path._importer.dataset_id()) + name += '.json' + file_name = 'results_' + name + with open(file_name, 'w') as f: + json.dump(res, f) + + + def remove_diagonal_elements(self, matrix): + m = matrix.shape[0] + strided = np.lib.stride_tricks.as_strided + s0, s1 = matrix.strides + return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) + + + @abc.abstractmethod + def estimate_structure(self) -> typing.List: + """Abstract method to estimate the structure + + :return: List of estimated edges + :rtype: Typing.List + """ + pass + + + def adjacency_matrix(self) -> np.ndarray: + """Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. + + :return: The adjacency matrix of the graph ``_complete_graph`` + :rtype: numpy.ndArray + """ + return nx.adj_matrix(self._complete_graph).toarray().astype(bool) + + def spurious_edges(self) -> typing.List: + """Return the spurious edges present in the estimated structure, if a prior net structure is present in + ``_sample_path.structure``. + + :return: A list containing the spurious edges + :rtype: List + """ + if not self._sample_path.has_prior_net_structure: + raise RuntimeError("Can not compute spurious edges with no prior net structure!") + real_graph = nx.DiGraph() + real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) + real_graph.add_edges_from(self._sample_path.structure.edges) + return nx.difference(real_graph, self._complete_graph).edges + + def save_plot_estimated_structure_graph(self) -> None: + """Plot the estimated structure in a graphical model style. + Spurious edges are colored in red. + """ + graph_to_draw = nx.DiGraph() + spurious_edges = self.spurious_edges() + non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) + print(non_spurious_edges) + edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] + graph_to_draw.add_edges_from(spurious_edges) + graph_to_draw.add_edges_from(non_spurious_edges) + pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) + options = { + "node_size": 2000, + "node_color": "white", + "edgecolors": "black", + 'linewidths':2, + "with_labels":True, + "font_size":13, + 'connectionstyle': 'arc3, rad = 0.1', + "arrowsize": 15, + "arrowstyle": '<|-', + "width": 1, + "edge_color":edges_colors, + } + + nx.draw(graph_to_draw, pos, **options) + ax = plt.gca() + ax.margins(0.20) + plt.axis("off") + name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] + name = name.split('.', 1)[0] + name += '_' + str(self._sample_path._importer.dataset_id()) + name += '.png' + plt.savefig(name) + plt.clf() + print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) + + + + + diff --git a/main_package/classes/estimators/structure_score_based_estimator.py b/main_package/classes/estimators/structure_score_based_estimator.py index e07e8c8..19e7fc4 100644 --- a/main_package/classes/estimators/structure_score_based_estimator.py +++ b/main_package/classes/estimators/structure_score_based_estimator.py @@ -1,5 +1,4 @@ -import sys -sys.path.append('../') + import itertools import json import typing @@ -13,23 +12,18 @@ from random import choice import concurrent.futures import copy -import utility.cache as ch -import structure_graph.conditional_intensity_matrix as condim -import structure_graph.network_graph as ng -import estimators.parameters_estimator as pe -import estimators.structure_estimator as se -import structure_graph.sample_path as sp -import structure_graph.structure as st -import estimators.fam_score_calculator as fam_score -import optimizers.hill_climbing_search as hill -import optimizers.tabu_search as tabu - -from utility.decorators import timing,timing_write - -from multiprocessing import get_context +from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix +from ..structure_graph.network_graph import NetworkGraph +from .parameters_estimator import ParametersEstimator +from .structure_estimator import StructureEstimator +from ..structure_graph.sample_path import SamplePath +from ..structure_graph.structure import Structure +from .fam_score_calculator import FamScoreCalculator +from ..optimizers.hill_climbing_search import HillClimbing +from ..optimizers.tabu_search import TabuSearch -#from numba import njit +from ..utility.decorators import timing,timing_write import multiprocessing from multiprocessing import Pool @@ -37,7 +31,7 @@ from multiprocessing import Pool -class StructureScoreBasedEstimator(se.StructureEstimator): +class StructureScoreBasedEstimator(StructureEstimator): """ Has the task of estimating the network structure given the trajectories in samplepath by using a score based approach. @@ -53,7 +47,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator): """ - def __init__(self, sample_path: sp.SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): + def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): super().__init__(sample_path,known_edges) self.tau_xu=tau_xu self.alpha_xu=alpha_xu @@ -90,7 +84,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator): estimate_parents = self.estimate_parents - n_nodes= len(self.nodes) + n_nodes= len(self._nodes) l_max_parents= [max_parents] * n_nodes l_iterations_number = [iterations_number] * n_nodes @@ -116,11 +110,11 @@ class StructureScoreBasedEstimator(se.StructureEstimator): 'Estimate the best parents for each node' if disable_multiprocessing: - list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self.nodes] + list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] else: with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: list_edges_partial = executor.map(estimate_parents, - self.nodes, + self._nodes, l_max_parents, l_iterations_number, l_patience, @@ -130,7 +124,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator): - #list_edges_partial = p.map(estimate_parents, self.nodes) + #list_edges_partial = p.map(estimate_parents, self._nodes) #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) 'Concatenate all the edges list' @@ -194,7 +188,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator): "choose the optimizer algotithm" if optimizer == 'tabu': - optimizer = tabu.TabuSearch( + optimizer = TabuSearch( node_id = node_id, structure_estimator = self, max_parents = max_parents, @@ -203,7 +197,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator): tabu_length = tabu_length, tabu_rules_duration = tabu_rules_duration) else: #if optimizer == 'hill': - optimizer = hill.HillClimbing( + optimizer = HillClimbing( node_id = node_id, structure_estimator = self, max_parents = max_parents, @@ -215,7 +209,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator): def get_score_from_graph(self, - graph: ng.NetworkGraph, + graph: NetworkGraph, node_id:str): """ Get the FamScore of a node @@ -233,14 +227,14 @@ class StructureScoreBasedEstimator(se.StructureEstimator): 'inizialize the graph for a single node' graph.fast_init(node_id) - params_estimation = pe.ParametersEstimator(self._sample_path, graph) + params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) 'Inizialize and compute parameters for node' params_estimation.fast_init(node_id) SoCims = params_estimation.compute_parameters_for_node(node_id) 'calculate the FamScore for the node' - fam_score_obj = fam_score.FamScoreCalculator() + fam_score_obj = FamScoreCalculator() score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) diff --git a/main_package/classes/optimizers/constraint_based_optimizer.py b/main_package/classes/optimizers/constraint_based_optimizer.py index 7579992..65bc19c 100644 --- a/main_package/classes/optimizers/constraint_based_optimizer.py +++ b/main_package/classes/optimizers/constraint_based_optimizer.py @@ -1,12 +1,10 @@ -import sys -sys.path.append('../') + import itertools import json import typing import networkx as nx import numpy as np -from networkx.readwrite import json_graph from random import choice @@ -15,15 +13,14 @@ from abc import ABC import copy -from optimizers.optimizer import Optimizer -from estimators import structure_estimator as se -import structure_graph.network_graph as ng +from .optimizer import Optimizer +from ..estimators.structure_estimator import StructureEstimator +from ..structure_graph.network_graph import NetworkGraph class ConstraintBasedOptimizer(Optimizer): """ Optimizer class that implement a CTPC Algorithm - :param node_id: current node's id :type node_id: string @@ -31,12 +28,10 @@ class ConstraintBasedOptimizer(Optimizer): :type structure_estimator: class:'StructureEstimator' :param tot_vars_count: number of variables in the dataset :type tot_vars_count: int - - """ def __init__(self, node_id:str, - structure_estimator: se.StructureEstimator, + structure_estimator: StructureEstimator, tot_vars_count:int ): """ @@ -56,7 +51,7 @@ class ConstraintBasedOptimizer(Optimizer): """ print("##################TESTING VAR################", self.node_id) - graph = ng.NetworkGraph(self.structure_estimator._sample_path.structure) + graph = NetworkGraph(self.structure_estimator._sample_path.structure) other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] @@ -74,16 +69,19 @@ class ConstraintBasedOptimizer(Optimizer): parent_indx = 0 while parent_indx < len(u): removed = False - S = self.structure_estimator.generate_possible_sub_sets_of_size(u, b, u[parent_indx]) test_parent = u[parent_indx] - for parents_set in S: - if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count): - graph.remove_edges([(test_parent, self.node_id)]) - u.remove(test_parent) - removed = True - break + i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) + j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) + if self.structure_estimator._removable_edges_matrix[i][j]: + S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) + for parents_set in S: + if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): + graph.remove_edges([(test_parent, self.node_id)]) + u.remove(test_parent) + removed = True + break if not removed: parent_indx += 1 b += 1 - self.structure_estimator.cache.clear() - return graph.edges + self.structure_estimator._cache.clear() + return graph.edges \ No newline at end of file diff --git a/main_package/classes/optimizers/hill_climbing_search.py b/main_package/classes/optimizers/hill_climbing_search.py index 70b9eec..6783be0 100644 --- a/main_package/classes/optimizers/hill_climbing_search.py +++ b/main_package/classes/optimizers/hill_climbing_search.py @@ -1,21 +1,19 @@ -import sys -sys.path.append('../') + import itertools import json import typing import networkx as nx import numpy as np -from networkx.readwrite import json_graph from random import choice from abc import ABC -from optimizers.optimizer import Optimizer -from estimators import structure_estimator as se -import structure_graph.network_graph as ng +from .optimizer import Optimizer +from ..estimators.structure_estimator import StructureEstimator +from ..structure_graph.network_graph import NetworkGraph class HillClimbing(Optimizer): @@ -39,7 +37,7 @@ class HillClimbing(Optimizer): """ def __init__(self, node_id:str, - structure_estimator: se.StructureEstimator, + structure_estimator: StructureEstimator, max_parents:int = None, iterations_number:int= 40, patience:int = None @@ -63,7 +61,7 @@ class HillClimbing(Optimizer): """ #'Create the graph for the single node' - graph = ng.NetworkGraph(self.structure_estimator._sample_path.structure) + graph = NetworkGraph(self.structure_estimator._sample_path.structure) 'get the index for the current node' node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) diff --git a/main_package/classes/optimizers/optimizer.py b/main_package/classes/optimizers/optimizer.py index c749185..98d5e4c 100644 --- a/main_package/classes/optimizers/optimizer.py +++ b/main_package/classes/optimizers/optimizer.py @@ -1,16 +1,14 @@ -import sys -sys.path.append('../') + import itertools import json import typing import networkx as nx import numpy as np -from networkx.readwrite import json_graph import abc -from estimators import structure_estimator as se +from ..estimators.structure_estimator import StructureEstimator @@ -25,7 +23,7 @@ class Optimizer(abc.ABC): """ - def __init__(self, node_id:str, structure_estimator: se.StructureEstimator): + def __init__(self, node_id:str, structure_estimator: StructureEstimator): self.node_id = node_id self.structure_estimator = structure_estimator diff --git a/main_package/classes/optimizers/tabu_search.py b/main_package/classes/optimizers/tabu_search.py index 4414dc7..e15dd40 100644 --- a/main_package/classes/optimizers/tabu_search.py +++ b/main_package/classes/optimizers/tabu_search.py @@ -1,21 +1,19 @@ -import sys -sys.path.append('../') + import itertools import json import typing import networkx as nx import numpy as np -from networkx.readwrite import json_graph from random import choice,sample from abc import ABC -from optimizers.optimizer import Optimizer -from estimators import structure_estimator as se -import structure_graph.network_graph as ng +from .optimizer import Optimizer +from ..estimators.structure_estimator import StructureEstimator +from ..structure_graph.network_graph import NetworkGraph import queue @@ -44,7 +42,7 @@ class TabuSearch(Optimizer): """ def __init__(self, node_id:str, - structure_estimator: se.StructureEstimator, + structure_estimator: StructureEstimator, max_parents:int = None, iterations_number:int= 40, patience:int = None, @@ -72,7 +70,7 @@ class TabuSearch(Optimizer): print(f"tabu search is processing the structure of {self.node_id}") 'Create the graph for the single node' - graph = ng.NetworkGraph(self.structure_estimator._sample_path.structure) + graph = NetworkGraph(self.structure_estimator._sample_path.structure) 'get the index for the current node' node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) diff --git a/main_package/classes/structure_graph/conditional_intensity_matrix.py b/main_package/classes/structure_graph/conditional_intensity_matrix.py index e87b662..4abfdd0 100644 --- a/main_package/classes/structure_graph/conditional_intensity_matrix.py +++ b/main_package/classes/structure_graph/conditional_intensity_matrix.py @@ -1,9 +1,7 @@ import numpy as np -import sys -sys.path.append('../') -class ConditionalIntensityMatrix: +class ConditionalIntensityMatrix(object): """Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector and state transition matrix and the actual CIM matrix. @@ -20,7 +18,7 @@ class ConditionalIntensityMatrix: self._state_transition_matrix = state_transition_matrix self._cim = self.state_transition_matrix.astype(np.float64) - def compute_cim_coefficients(self): + def compute_cim_coefficients(self) -> None: """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. The class member ``_cim`` will contain the computed cim """ @@ -28,15 +26,15 @@ class ConditionalIntensityMatrix: self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T @property - def state_residence_times(self): + def state_residence_times(self) -> np.ndarray: return self._state_residence_times @property - def state_transition_matrix(self): + def state_transition_matrix(self) -> np.ndarray: return self._state_transition_matrix @property - def cim(self): + def cim(self) -> np.ndarray: return self._cim def __repr__(self): diff --git a/main_package/classes/structure_graph/conditional_intensity_matrix.py.bak b/main_package/classes/structure_graph/conditional_intensity_matrix.py.bak new file mode 100644 index 0000000..e87b662 --- /dev/null +++ b/main_package/classes/structure_graph/conditional_intensity_matrix.py.bak @@ -0,0 +1,44 @@ +import numpy as np + +import sys +sys.path.append('../') + +class ConditionalIntensityMatrix: + """Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector + and state transition matrix and the actual CIM matrix. + + :param state_residence_times: state residence times vector + :type state_residence_times: numpy.array + :param state_transition_matrix: the transitions count matrix + :type state_transition_matrix: numpy.ndArray + :_cim: the actual cim of the node + """ + def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): + """Constructor Method + """ + self._state_residence_times = state_residence_times + self._state_transition_matrix = state_transition_matrix + self._cim = self.state_transition_matrix.astype(np.float64) + + def compute_cim_coefficients(self): + """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. + The class member ``_cim`` will contain the computed cim + """ + np.fill_diagonal(self._cim, self._cim.diagonal() * -1) + self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T + + @property + def state_residence_times(self): + return self._state_residence_times + + @property + def state_transition_matrix(self): + return self._state_transition_matrix + + @property + def cim(self): + return self._cim + + def __repr__(self): + return 'CIM:\n' + str(self.cim) + diff --git a/main_package/classes/structure_graph/network_graph.py b/main_package/classes/structure_graph/network_graph.py index d0d0992..813eb92 100644 --- a/main_package/classes/structure_graph/network_graph.py +++ b/main_package/classes/structure_graph/network_graph.py @@ -4,37 +4,32 @@ import typing import networkx as nx import numpy as np -import sys -sys.path.append('../') +from .structure import Structure -class NetworkGraph: - """ - Abstracts the infos contained in the Structure class in the form of a directed graph. - Has the task of creating all the necessary filtering structures for parameters estimation - - :graph_struct: the Structure object from which infos about the net will be extracted - :graph: directed graph - :nodes_labels: the symbolic names of the variables - :nodes_indexes: the indexes of the nodes - :nodes_values: the cardinalites of the nodes - :aggregated_info_about_nodes_parents: a structure that contains all the necessary infos about every parents of every - node in the net - :_fancy_indexing: the indexes of every parent of every node in the net + +class NetworkGraph(object): + """Abstracts the infos contained in the Structure class in the form of a directed graph. + Has the task of creating all the necessary filtering and indexing structures for parameters estimation + + :param graph_struct: the ``Structure`` object from which infos about the net will be extracted + :type graph_struct: Structure + :_graph: directed graph + :_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos + about every parents of the node of which all the indexing and filtering structures will be constructed. :_time_scalar_indexing_structure: the indexing structure for state res time estimation :_transition_scalar_indexing_structure: the indexing structure for transition computation :_time_filtering: the columns filtering structure used in the computation of the state res times - :_transition_filtering: the columns filtering structure used in the computation of the transition from one state to another - :self._p_combs_structure: all the possible parents states combination for every node in the net + :_transition_filtering: the columns filtering structure used in the computation of the transition + from one state to another + :_p_combs_structure: all the possible parents states combination for the node of interest """ - def __init__(self, graph_struct): - self.graph_struct = graph_struct - self.graph = nx.DiGraph() - self._nodes_indexes = self.graph_struct.nodes_indexes - self._nodes_labels = self.graph_struct.nodes_labels - self._nodes_values = self.graph_struct.nodes_values - self.aggregated_info_about_nodes_parents = None - self._fancy_indexing = None + def __init__(self, graph_struct: Structure): + """Constructor Method + """ + self._graph_struct = graph_struct + self._graph = nx.DiGraph() + self._aggregated_info_about_nodes_parents = None self._time_scalar_indexing_structure = None self._transition_scalar_indexing_structure = None self._time_filtering = None @@ -51,44 +46,41 @@ class NetworkGraph: self.build_transition_columns_filtering_structure() self._p_combs_structure = self.build_p_combs_structure() - def fast_init(self, node_id: str): - """ - Initializes all the necessary structures for parameters estimation of the node identified by the label node_id - Parameters: - node_id: the label of the node - Returns: - void - """ - self.add_nodes(self._nodes_labels) - self.add_edges(self.graph_struct.edges) - self.aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) - self._fancy_indexing = self.aggregated_info_about_nodes_parents[1] - p_indxs = self._fancy_indexing - p_vals = self.aggregated_info_about_nodes_parents[2] - self._time_scalar_indexing_structure = self.build_time_scalar_indexing_structure_for_a_node(node_id, - p_vals) - self._transition_scalar_indexing_structure = self.build_transition_scalar_indexing_structure_for_a_node(node_id, - p_vals) - node_indx = self.get_node_indx(node_id) - self._time_filtering = self.build_time_columns_filtering_for_a_node(node_indx, p_indxs) - self._transition_filtering = self.build_transition_filtering_for_a_node(node_indx, p_indxs) - self._p_combs_structure = self.build_p_comb_structure_for_a_node(p_vals) + def fast_init(self, node_id: str) -> None: + """Initializes all the necessary structures for parameters estimation of the node identified by the label + node_id - def add_nodes(self, list_of_nodes: typing.List): + :param node_id: the label of the node + :type node_id: string """ - Adds the nodes to the graph contained in the list of nodes list_of_nodes. + self.add_nodes(self._graph_struct.nodes_labels) + self.add_edges(self._graph_struct.edges) + self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) + p_indxs = self._aggregated_info_about_nodes_parents[1] + p_vals = self._aggregated_info_about_nodes_parents[2] + node_states = self.get_states_number(node_id) + node_indx = self.get_node_indx(node_id) + cols_number = self._graph_struct.total_variables_number + self._time_scalar_indexing_structure = NetworkGraph.\ + build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) + self._transition_scalar_indexing_structure = NetworkGraph.\ + build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) + self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) + self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) + self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) + + def add_nodes(self, list_of_nodes: typing.List) -> None: + """Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. Sets all the properties that identify a nodes (index, positional index, cardinality) - Parameters: - list_of_nodes: the nodes to add to graph - Returns: - void + :param list_of_nodes: the nodes to add to ``_graph`` + :type list_of_nodes: List """ - nodes_indxs = self._nodes_indexes - nodes_vals = self.graph_struct.nodes_values + nodes_indxs = self._graph_struct.nodes_indexes + nodes_vals = self._graph_struct.nodes_values pos = 0 for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): - self.graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) + self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) pos += 1 def has_edge(self,edge:tuple)-> bool: @@ -100,135 +92,139 @@ class NetworkGraph: Returns: bool """ - return self.graph.has_edge(edge[0],edge[1]) + return self._graph.has_edge(edge[0],edge[1]) - def add_edges(self, list_of_edges: typing.List): - """ - Add the edges to the graph contained in the list list_of_edges. + def add_edges(self, list_of_edges: typing.List) -> None: + """Add the edges to the ``_graph`` contained in the list ``list_of_edges``. - Parameters: - list_of_edges - Returns: - void + :param list_of_edges: the list containing of tuples containing the edges + :type list_of_edges: List """ - self.graph.add_edges_from(list_of_edges) + self._graph.add_edges_from(list_of_edges) - def remove_edges(self, list_of_edges: typing.List): + def remove_node(self, node_id: str) -> None: + """Remove the node ``node_id`` from all the class members. + Initialize all the filtering/indexing structures. """ - Remove the edges to the graph contained in the list list_of_edges. + self._graph.remove_node(node_id) + self._graph_struct.remove_node(node_id) + self.clear_indexing_filtering_structures() - Parameters: - list_of_edges - Returns: - void + def clear_indexing_filtering_structures(self) -> None: + """Initialize all the filtering/indexing structures. """ - self.graph.remove_edges_from(list_of_edges) + self._aggregated_info_about_nodes_parents = None + self._time_scalar_indexing_structure = None + self._transition_scalar_indexing_structure = None + self._time_filtering = None + self._transition_filtering = None + self._p_combs_structure = None - def get_ordered_by_indx_set_of_parents(self, node: str): - """ - Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely + def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: + """Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely (parents_labels, parents_indexes, parents_cardinalities). - N.B. The parent set is sorted using the list of sorted nodes nodes - - Parameters: - node: the label of the node - Returns: - a tuple containing all the parent set infos + :param node: the label of the node + :type node: string + :return: a tuple containing all the parent set infos + :rtype: Tuple """ parents = self.get_parents_by_id(node) - nodes = self._nodes_labels + nodes = self._graph_struct.nodes_labels d = {v: i for i, v in enumerate(nodes)} sorted_parents = sorted(parents, key=lambda v: d[v]) get_node_indx = self.get_node_indx p_indxes = [get_node_indx(node) for node in sorted_parents] p_values = [self.get_states_number(node) for node in sorted_parents] - return (sorted_parents, p_indxes, p_values) - - def get_ord_set_of_par_of_all_nodes(self): - get_ordered_by_indx_set_of_parents = self.get_ordered_by_indx_set_of_parents - result = [get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels] - return result - - def get_ordered_by_indx_parents_values_for_all_nodes(self): - pars_values = [i[2] for i in self.aggregated_info_about_nodes_parents] - return pars_values + return sorted_parents, p_indxes, p_values - def build_fancy_indexing_structure(self, start_indx): - if start_indx > 0: - pass - else: - fancy_indx = [i[1] for i in self.aggregated_info_about_nodes_parents] - return fancy_indx - - def build_time_scalar_indexing_structure_for_a_node(self, node_id: str, parents_vals: typing.List) -> np.ndarray: + def remove_edges(self, list_of_edges: typing.List) -> None: """ - Builds an indexing structure for the computation of state residence times values. - + Remove the edges to the graph contained in the list list_of_edges. Parameters: - node_id: the node label - parents_vals: the caridinalites of the node's parents + list_of_edges Returns: - a numpy array. - + void + """ + self._graph.remove_edges_from(list_of_edges) + + @staticmethod + def build_time_scalar_indexing_structure_for_a_node(node_states: int, + parents_vals: typing.List) -> np.ndarray: + """Builds an indexing structure for the computation of state residence times values. + + :param node_states: the node cardinality + :type node_states: int + :param parents_vals: the caridinalites of the node's parents + :type parents_vals: List + :return: The time indexing structure + :rtype: numpy.ndArray """ - T_vector = np.array([self.get_states_number(node_id)]) + T_vector = np.array([node_states]) T_vector = np.append(T_vector, parents_vals) T_vector = T_vector.cumprod().astype(np.int) return T_vector - - def build_transition_scalar_indexing_structure_for_a_node(self, node_id: str, parents_vals: typing.List) -> np.ndarray: - """ - Builds an indexing structure for the computation of state transitions values. - - Parameters: - node_id: the node label - parents_vals: the caridinalites of the node's parents - Returns: - a numpy array. - + @staticmethod + def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ + -> np.ndarray: + """Builds an indexing structure for the computation of state transitions values. + + :param node_states_number: the node cardinality + :type node_states_number: int + :param parents_vals: the caridinalites of the node's parents + :type parents_vals: List + :return: The transition indexing structure + :rtype: numpy.ndArray """ - node_states_number = self.get_states_number(node_id) M_vector = np.array([node_states_number, node_states_number]) M_vector = np.append(M_vector, parents_vals) M_vector = M_vector.cumprod().astype(np.int) return M_vector - def build_time_columns_filtering_for_a_node(self, node_indx: int, p_indxs: typing.List) -> np.ndarray: + @staticmethod + def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: """ - Builds the necessary structure to filter the desired columns indicated by node_indx and p_indxs in the dataset. + Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` + in the dataset. This structute will be used in the computation of the state res times. - Parameters: - node_indx: the index of the node - p_indxs: the indexes of the node's parents - Returns: - a numpy array + :param node_indx: the index of the node + :type node_indx: int + :param p_indxs: the indexes of the node's parents + :type p_indxs: List + :return: The filtering structure for times estimation + :rtype: numpy.ndArray """ return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) - def build_transition_filtering_for_a_node(self, node_indx, p_indxs) -> np.ndarray: + @staticmethod + def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ + -> np.ndarray: + """Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` + in the dataset. + This structure will be used in the computation of the state transitions values. + :param node_indx: the index of the node + :type node_indx: int + :param p_indxs: the indexes of the node's parents + :type p_indxs: List + :param nodes_number: the total number of nodes in the dataset + :type nodes_number: int + :return: The filtering structure for transitions estimation + :rtype: numpy.ndArray """ - Builds the necessary structure to filter the desired columns indicated by node_indx and p_indxs in the dataset. - This structute will be used in the computation of the state transitions values. - Parameters: - node_indx: the index of the node - p_indxs: the indexes of the node's parents - Returns: - a numpy array - """ - nodes_number = self.graph_struct.total_variables_number return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) - def build_p_comb_structure_for_a_node(self, parents_values: typing.List) -> np.ndarray: + @staticmethod + def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: """ - Builds the combinatory structure that contains the combinations of all the values contained in parents_values. + Builds the combinatorial structure that contains the combinations of all the values contained in + ``parents_values``. - Parameters: - parents_values: the cardinalities of the nodes - Returns: - a numpy matrix containinga grid of the combinations + :param parents_values: the cardinalities of the nodes + :type parents_values: List + :return: A numpy matrix containing a grid of the combinations + :rtype: numpy.ndArray """ tmp = [] for val in parents_values: @@ -243,81 +239,57 @@ class NetworkGraph: parents_comb = np.array([[]], dtype=np.int) return parents_comb - def build_time_columns_filtering_structure(self): - nodes_indxs = self._nodes_indexes - self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) - for node_indx, p_indxs in zip(nodes_indxs, self._fancy_indexing)] - - def build_transition_columns_filtering_structure(self): - nodes_number = self.graph_struct.total_variables_number - nodes_indxs = self._nodes_indexes - self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) - for node_indx, p_indxs in zip(nodes_indxs, - self._fancy_indexing)] - - def build_scalar_indexing_structures(self): - parents_values_for_all_nodes = self.get_ordered_by_indx_parents_values_for_all_nodes() - build_transition_scalar_indexing_structure_for_a_node = self.build_transition_scalar_indexing_structure_for_a_node - build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node - aggr = [(build_transition_scalar_indexing_structure_for_a_node(node_id, p_vals), - build_time_scalar_indexing_structure_for_a_node(node_id, p_vals)) - for node_id, p_vals in - zip(self._nodes_labels, - parents_values_for_all_nodes)] - self._transition_scalar_indexing_structure = [i[0] for i in aggr] - self._time_scalar_indexing_structure = [i[1] for i in aggr] - - def build_p_combs_structure(self): - parents_values_for_all_nodes = self.get_ordered_by_indx_parents_values_for_all_nodes() - p_combs_struct = [self.build_p_comb_structure_for_a_node(p_vals) for p_vals in parents_values_for_all_nodes] - return p_combs_struct - - def get_parents_by_id(self, node_id): - return list(self.graph.predecessors(node_id)) - - def get_states_number(self, node_id): - return self.graph.nodes[node_id]['val'] - - def get_node_indx(self, node_id): - return nx.get_node_attributes(self.graph, 'indx')[node_id] - - def get_positional_node_indx(self, node_id): - return self.graph.nodes[node_id]['pos_indx'] + def get_parents_by_id(self, node_id) -> typing.List: + """Returns a list of labels of the parents of the node ``node_id`` + + :param node_id: the node label + :type node_id: string + :return: a List of labels of the parents + :rtype: List + """ + return list(self._graph.predecessors(node_id)) + + def get_states_number(self, node_id) -> int: + return self._graph.nodes[node_id]['val'] + + def get_node_indx(self, node_id) -> int: + return nx.get_node_attributes(self._graph, 'indx')[node_id] + + def get_positional_node_indx(self, node_id) -> int: + return self._graph.nodes[node_id]['pos_indx'] @property - def nodes(self): - return self._nodes_labels + def nodes(self) -> typing.List: + return self._graph_struct.nodes_labels @property - def edges(self): - return list(self.graph.edges) + def edges(self) -> typing.List: + return list(self._graph.edges) @property - def nodes_indexes(self): - return self._nodes_indexes + def nodes_indexes(self) -> np.ndarray: + return self._graph_struct.nodes_indexes @property - def nodes_values(self): - return self._nodes_values + def nodes_values(self) -> np.ndarray: + return self._graph_struct.nodes_values @property - def time_scalar_indexing_strucure(self): + def time_scalar_indexing_strucure(self) -> np.ndarray: return self._time_scalar_indexing_structure @property - def time_filtering(self): + def time_filtering(self) -> np.ndarray: return self._time_filtering @property - def transition_scalar_indexing_structure(self): + def transition_scalar_indexing_structure(self) -> np.ndarray: return self._transition_scalar_indexing_structure @property - def transition_filtering(self): + def transition_filtering(self) -> np.ndarray: return self._transition_filtering @property - def p_combs(self): + def p_combs(self) -> np.ndarray: return self._p_combs_structure - - diff --git a/main_package/classes/structure_graph/network_graph.py.bak b/main_package/classes/structure_graph/network_graph.py.bak new file mode 100644 index 0000000..b1ced85 --- /dev/null +++ b/main_package/classes/structure_graph/network_graph.py.bak @@ -0,0 +1,285 @@ + +import typing + +import networkx as nx +import numpy as np + +from .structure import Structure + + +class NetworkGraph(object): + """Abstracts the infos contained in the Structure class in the form of a directed graph. + Has the task of creating all the necessary filtering and indexing structures for parameters estimation + + :param graph_struct: the ``Structure`` object from which infos about the net will be extracted + :type graph_struct: Structure + :_graph: directed graph + :_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos + about every parents of the node of which all the indexing and filtering structures will be constructed. + :_time_scalar_indexing_structure: the indexing structure for state res time estimation + :_transition_scalar_indexing_structure: the indexing structure for transition computation + :_time_filtering: the columns filtering structure used in the computation of the state res times + :_transition_filtering: the columns filtering structure used in the computation of the transition + from one state to another + :_p_combs_structure: all the possible parents states combination for the node of interest + """ + + def __init__(self, graph_struct: Structure): + """Constructor Method + """ + self._graph_struct = graph_struct + self._graph = nx.DiGraph() + self._aggregated_info_about_nodes_parents = None + self._time_scalar_indexing_structure = None + self._transition_scalar_indexing_structure = None + self._time_filtering = None + self._transition_filtering = None + self._p_combs_structure = None + + def init_graph(self): + self.add_nodes(self._nodes_labels) + self.add_edges(self.graph_struct.edges) + self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() + self._fancy_indexing = self.build_fancy_indexing_structure(0) + self.build_scalar_indexing_structures() + self.build_time_columns_filtering_structure() + self.build_transition_columns_filtering_structure() + self._p_combs_structure = self.build_p_combs_structure() + + def fast_init(self, node_id: str) -> None: + """Initializes all the necessary structures for parameters estimation of the node identified by the label + node_id + + :param node_id: the label of the node + :type node_id: string + """ + self.add_nodes(self._graph_struct.nodes_labels) + self.add_edges(self._graph_struct.edges) + self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) + p_indxs = self._aggregated_info_about_nodes_parents[1] + p_vals = self._aggregated_info_about_nodes_parents[2] + node_states = self.get_states_number(node_id) + node_indx = self.get_node_indx(node_id) + cols_number = self._graph_struct.total_variables_number + self._time_scalar_indexing_structure = NetworkGraph.\ + build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) + self._transition_scalar_indexing_structure = NetworkGraph.\ + build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) + self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) + self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) + self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) + + def add_nodes(self, list_of_nodes: typing.List) -> None: + """Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. + Sets all the properties that identify a nodes (index, positional index, cardinality) + + :param list_of_nodes: the nodes to add to ``_graph`` + :type list_of_nodes: List + """ + nodes_indxs = self._graph_struct.nodes_indexes + nodes_vals = self._graph_struct.nodes_values + pos = 0 + for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): + self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) + pos += 1 + + def has_edge(self,edge:tuple)-> bool: + """ + Check if the graph contains a specific edge + + Parameters: + edge: a tuple that rappresents the edge + Returns: + bool + """ + return self.graph.has_edge(edge[0],edge[1]) + + def add_edges(self, list_of_edges: typing.List) -> None: + """Add the edges to the ``_graph`` contained in the list ``list_of_edges``. + + :param list_of_edges: the list containing of tuples containing the edges + :type list_of_edges: List + """ + self._graph.add_edges_from(list_of_edges) + + def remove_node(self, node_id: str) -> None: + """Remove the node ``node_id`` from all the class members. + Initialize all the filtering/indexing structures. + """ + self._graph.remove_node(node_id) + self._graph_struct.remove_node(node_id) + self.clear_indexing_filtering_structures() + + def clear_indexing_filtering_structures(self) -> None: + """Initialize all the filtering/indexing structures. + """ + self._aggregated_info_about_nodes_parents = None + self._time_scalar_indexing_structure = None + self._transition_scalar_indexing_structure = None + self._time_filtering = None + self._transition_filtering = None + self._p_combs_structure = None + + def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: + """Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely + (parents_labels, parents_indexes, parents_cardinalities). + + :param node: the label of the node + :type node: string + :return: a tuple containing all the parent set infos + :rtype: Tuple + """ + parents = self.get_parents_by_id(node) + nodes = self._graph_struct.nodes_labels + d = {v: i for i, v in enumerate(nodes)} + sorted_parents = sorted(parents, key=lambda v: d[v]) + get_node_indx = self.get_node_indx + p_indxes = [get_node_indx(node) for node in sorted_parents] + p_values = [self.get_states_number(node) for node in sorted_parents] + return sorted_parents, p_indxes, p_values + + @staticmethod + def build_time_scalar_indexing_structure_for_a_node(node_states: int, + parents_vals: typing.List) -> np.ndarray: + """Builds an indexing structure for the computation of state residence times values. + + :param node_states: the node cardinality + :type node_states: int + :param parents_vals: the caridinalites of the node's parents + :type parents_vals: List + :return: The time indexing structure + :rtype: numpy.ndArray + """ + T_vector = np.array([node_states]) + T_vector = np.append(T_vector, parents_vals) + T_vector = T_vector.cumprod().astype(np.int) + return T_vector + + @staticmethod + def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ + -> np.ndarray: + """Builds an indexing structure for the computation of state transitions values. + + :param node_states_number: the node cardinality + :type node_states_number: int + :param parents_vals: the caridinalites of the node's parents + :type parents_vals: List + :return: The transition indexing structure + :rtype: numpy.ndArray + """ + M_vector = np.array([node_states_number, + node_states_number]) + M_vector = np.append(M_vector, parents_vals) + M_vector = M_vector.cumprod().astype(np.int) + return M_vector + + @staticmethod + def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: + """ + Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` + in the dataset. + This structute will be used in the computation of the state res times. + :param node_indx: the index of the node + :type node_indx: int + :param p_indxs: the indexes of the node's parents + :type p_indxs: List + :return: The filtering structure for times estimation + :rtype: numpy.ndArray + """ + return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) + + @staticmethod + def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ + -> np.ndarray: + """Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` + in the dataset. + This structure will be used in the computation of the state transitions values. + :param node_indx: the index of the node + :type node_indx: int + :param p_indxs: the indexes of the node's parents + :type p_indxs: List + :param nodes_number: the total number of nodes in the dataset + :type nodes_number: int + :return: The filtering structure for transitions estimation + :rtype: numpy.ndArray + """ + return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) + + @staticmethod + def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: + """ + Builds the combinatorial structure that contains the combinations of all the values contained in + ``parents_values``. + + :param parents_values: the cardinalities of the nodes + :type parents_values: List + :return: A numpy matrix containing a grid of the combinations + :rtype: numpy.ndArray + """ + tmp = [] + for val in parents_values: + tmp.append([x for x in range(val)]) + if len(parents_values) > 0: + parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) + if len(parents_values) > 1: + tmp_comb = parents_comb[:, 1].copy() + parents_comb[:, 1] = parents_comb[:, 0].copy() + parents_comb[:, 0] = tmp_comb + else: + parents_comb = np.array([[]], dtype=np.int) + return parents_comb + + def get_parents_by_id(self, node_id) -> typing.List: + """Returns a list of labels of the parents of the node ``node_id`` + + :param node_id: the node label + :type node_id: string + :return: a List of labels of the parents + :rtype: List + """ + return list(self._graph.predecessors(node_id)) + + def get_states_number(self, node_id) -> int: + return self._graph.nodes[node_id]['val'] + + def get_node_indx(self, node_id) -> int: + return nx.get_node_attributes(self._graph, 'indx')[node_id] + + def get_positional_node_indx(self, node_id) -> int: + return self._graph.nodes[node_id]['pos_indx'] + + @property + def nodes(self) -> typing.List: + return self._graph_struct.nodes_labels + + @property + def edges(self) -> typing.List: + return list(self._graph.edges) + + @property + def nodes_indexes(self) -> np.ndarray: + return self._graph_struct.nodes_indexes + + @property + def nodes_values(self) -> np.ndarray: + return self._graph_struct.nodes_values + + @property + def time_scalar_indexing_strucure(self) -> np.ndarray: + return self._time_scalar_indexing_structure + + @property + def time_filtering(self) -> np.ndarray: + return self._time_filtering + + @property + def transition_scalar_indexing_structure(self) -> np.ndarray: + return self._transition_scalar_indexing_structure + + @property + def transition_filtering(self) -> np.ndarray: + return self._transition_filtering + + @property + def p_combs(self) -> np.ndarray: + return self._p_combs_structure \ No newline at end of file diff --git a/main_package/classes/structure_graph/sample_path.py b/main_package/classes/structure_graph/sample_path.py index 5951661..80b51d9 100644 --- a/main_package/classes/structure_graph/sample_path.py +++ b/main_package/classes/structure_graph/sample_path.py @@ -1,14 +1,11 @@ -import sys -sys.path.append('../') -import pandas as pd + import numpy as np +import pandas as pd -import structure_graph.abstract_sample_path as asam -import utility.json_importer as imp -from structure_graph.structure import Structure -from structure_graph.trajectory import Trajectory -import utility.abstract_importer as ai +from .structure import Structure +from .trajectory import Trajectory +from ..utility.abstract_importer import AbstractImporter @@ -23,7 +20,7 @@ class SamplePath(object): :_structure: the ``Structure`` Object that will contain all the structural infos about the net :_total_variables_count: the number of variables in the net """ - def __init__(self, importer: ai.AbstractImporter): + def __init__(self, importer: AbstractImporter): """Constructor Method """ self._importer = importer diff --git a/main_package/classes/structure_graph/sample_path.py.bak b/main_package/classes/structure_graph/sample_path.py.bak new file mode 100644 index 0000000..4162b80 --- /dev/null +++ b/main_package/classes/structure_graph/sample_path.py.bak @@ -0,0 +1,95 @@ +import sys +sys.path.append('../') + + +import numpy as np +import pandas as pd + +import .abstract_sample_path as asam +import ..utility.json_importer as imp +from .structure import Structure +from .trajectory import Trajectory +import ..utility.abstract_importer as ai + + + +class SamplePath(object): + """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables + cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will + contain the mentioned data. + + :param importer: the Importer object which contains the imported and processed data + :type importer: AbstractImporter + :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories + :_structure: the ``Structure`` Object that will contain all the structural infos about the net + :_total_variables_count: the number of variables in the net + """ + def __init__(self, importer: ai.AbstractImporter): + """Constructor Method + """ + self._importer = importer + if self._importer._df_variables is None or self._importer._concatenated_samples is None: + raise RuntimeError('The importer object has to contain the all processed data!') + if self._importer._df_variables.empty: + raise RuntimeError('The importer object has to contain the all processed data!') + if isinstance(self._importer._concatenated_samples, pd.DataFrame): + if self._importer._concatenated_samples.empty: + raise RuntimeError('The importer object has to contain the all processed data!') + if isinstance(self._importer._concatenated_samples, np.ndarray): + if self._importer._concatenated_samples.size == 0: + raise RuntimeError('The importer object has to contain the all processed data!') + self._trajectories = None + self._structure = None + self._total_variables_count = None + + def build_trajectories(self) -> None: + """Builds the Trajectory object that will contain all the trajectories. + Clears all the unused dataframes in ``_importer`` Object + """ + self._trajectories = \ + Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), + len(self._importer.sorter) + 1) + self._importer.clear_concatenated_frame() + + def build_structure(self) -> None: + """ + Builds the ``Structure`` object that aggregates all the infos about the net. + """ + if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): + raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") + + self._total_variables_count = len(self._importer.sorter) + labels = self._importer.variables.iloc[:, 0].to_list() + indxs = self._importer.variables.index.to_numpy() + vals = self._importer.variables.iloc[:, 1].to_numpy() + if self._importer.structure is None or self._importer.structure.empty: + edges = [] + else: + edges = list(self._importer.structure.to_records(index=False)) + self._structure = Structure(labels, indxs, vals, edges, + self._total_variables_count) + + def clear_memory(self): + self._importer._raw_data = [] + + @property + def trajectories(self) -> Trajectory: + return self._trajectories + + @property + def structure(self) -> Structure: + return self._structure + + @property + def total_variables_count(self) -> int: + return self._total_variables_count + + @property + def has_prior_net_structure(self) -> bool: + return bool(self._structure.edges) + + + + + + diff --git a/main_package/classes/structure_graph/set_of_cims.py b/main_package/classes/structure_graph/set_of_cims.py index 62d2365..81caff5 100644 --- a/main_package/classes/structure_graph/set_of_cims.py +++ b/main_package/classes/structure_graph/set_of_cims.py @@ -1,11 +1,10 @@ -import sys -sys.path.append('../') + import typing import numpy as np -import structure_graph.conditional_intensity_matrix as cim +from .conditional_intensity_matrix import ConditionalIntensityMatrix class SetOfCims(object): @@ -58,7 +57,7 @@ class SetOfCims(object): :type transition_matrices: numpy.ndArray """ for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): - cim_to_add = cim.ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) + cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) cim_to_add.compute_cim_coefficients() self._actual_cims.append(cim_to_add) self._actual_cims = np.array(self._actual_cims) diff --git a/main_package/classes/structure_graph/set_of_cims.py.bak b/main_package/classes/structure_graph/set_of_cims.py.bak new file mode 100644 index 0000000..62d2365 --- /dev/null +++ b/main_package/classes/structure_graph/set_of_cims.py.bak @@ -0,0 +1,98 @@ +import sys +sys.path.append('../') + +import typing + +import numpy as np + +import structure_graph.conditional_intensity_matrix as cim + + +class SetOfCims(object): + """Aggregates all the CIMS of the node identified by the label _node_id. + + :param node_id: the node label + :type node_ind: string + :param parents_states_number: the cardinalities of the parents + :type parents_states_number: List + :param node_states_number: the caridinality of the node + :type node_states_number: int + :param p_combs: the p_comb structure bound to this node + :type p_combs: numpy.ndArray + :_state_residence_time: matrix containing all the state residence time vectors for the node + :_transition_matrices: matrix containing all the transition matrices for the node + :_actual_cims: the cims of the node + """ + + def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): + """Constructor Method + """ + self._node_id = node_id + self._parents_states_number = parents_states_number + self._node_states_number = node_states_number + self._actual_cims = [] + self._state_residence_times = None + self._transition_matrices = None + self._p_combs = p_combs + self.build_times_and_transitions_structures() + + def build_times_and_transitions_structures(self) -> None: + """Initializes at the correct dimensions the state residence times matrix and the state transition matrices. + """ + if not self._parents_states_number: + self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) + self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) + else: + self._state_residence_times = \ + np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) + self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, + self._node_states_number], dtype=np.int) + + def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: + """Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. + Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. + + :param state_res_times: the state residence times matrix + :type state_res_times: numpy.ndArray + :param transition_matrices: the transition matrices + :type transition_matrices: numpy.ndArray + """ + for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): + cim_to_add = cim.ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) + cim_to_add.compute_cim_coefficients() + self._actual_cims.append(cim_to_add) + self._actual_cims = np.array(self._actual_cims) + self._transition_matrices = None + self._state_residence_times = None + + def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: + """Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index + ``comb``. + + :param mask_arr: the boolean mask that indicates which parent to consider + :type mask_arr: numpy.array + :param comb: the state/s of the filtered parents + :type comb: numpy.array + :return: Array of ``ConditionalIntensityMatrix`` objects + :rtype: numpy.array + """ + if mask_arr.size <= 1: + return self._actual_cims + else: + flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() + return self._actual_cims[flat_indxs] + + @property + def actual_cims(self) -> np.ndarray: + return self._actual_cims + + @property + def p_combs(self) -> np.ndarray: + return self._p_combs + + def get_cims_number(self): + return len(self._actual_cims) + + + + diff --git a/main_package/classes/structure_graph/structure.py b/main_package/classes/structure_graph/structure.py index d739d7f..a9d60cc 100644 --- a/main_package/classes/structure_graph/structure.py +++ b/main_package/classes/structure_graph/structure.py @@ -1,38 +1,51 @@ -import sys -sys.path.append('../') import typing as ty import numpy as np -class Structure: - """ - Contains all the infos about the network structure(nodes names, nodes caridinalites, edges...) +class Structure(object): + """Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) - :nodes_labels_list: the symbolic names of the variables - :nodes_indexes_arr: the indexes of the nodes - :nodes_vals_arr: the cardinalites of the nodes - :edges_list: the edges of the network - :total_variables_number: the total number of variables in the net + :param nodes_labels_list: the symbolic names of the variables + :type nodes_labels_list: List + :param nodes_indexes_arr: the indexes of the nodes + :type nodes_indexes_arr: numpy.ndArray + :param nodes_vals_arr: the cardinalites of the nodes + :type nodes_vals_arr: numpy.ndArray + :param edges_list: the edges of the network + :type edges_list: List + :param total_variables_number: the total number of variables in the dataset + :type total_variables_number: int """ - def __init__(self, nodes_label_list: ty.List, node_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, + def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, edges_list: ty.List, total_variables_number: int): - self._nodes_labels_list = nodes_label_list - self._nodes_indexes_arr = node_indexes_arr + """Constructor Method + """ + self._nodes_labels_list = nodes_labels_list + self._nodes_indexes_arr = nodes_indexes_arr self._nodes_vals_arr = nodes_vals_arr self._edges_list = edges_list self._total_variables_number = total_variables_number + def remove_node(self, node_id: str) -> None: + """Remove the node ``node_id`` from all the class members. + The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. + """ + node_positional_indx = self._nodes_labels_list.index(node_id) + del self._nodes_labels_list[node_positional_indx] + self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) + self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) + self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id + and to_node != node_id)] + @property - def edges(self): - #records = self.structure_frame.to_records(index=False) - #edges_list = list(records) + def edges(self) -> ty.List: return self._edges_list @property - def nodes_labels(self): + def nodes_labels(self) -> ty.List: return self._nodes_labels_list @property @@ -44,10 +57,17 @@ class Structure: return self._nodes_vals_arr @property - def total_variables_number(self): + def total_variables_number(self) -> int: return self._total_variables_number def get_node_id(self, node_indx: int) -> str: + """Given the ``node_index`` returns the node label. + + :param node_indx: the node index + :type node_indx: int + :return: the node label + :rtype: string + """ return self._nodes_labels_list[node_indx] def clean_structure_edges(self): @@ -64,6 +84,13 @@ class Structure: return edge in self._edges_list def get_node_indx(self, node_id: str) -> int: + """Given the ``node_index`` returns the node label. + + :param node_id: the node label + :type node_id: string + :return: the node index + :rtype: int + """ pos_indx = self._nodes_labels_list.index(node_id) return self._nodes_indexes_arr[pos_indx] @@ -71,6 +98,13 @@ class Structure: return self._nodes_labels_list.index(node_id) def get_states_number(self, node: str) -> int: + """Given the node label ``node`` returns the cardinality of the node. + + :param node: the node label + :type node: string + :return: the node cardinality + :rtype: int + """ pos_indx = self._nodes_labels_list.index(node) return self._nodes_vals_arr[pos_indx] @@ -86,5 +120,5 @@ class Structure: np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ self._edges_list == other._edges_list - return NotImplemented + return False diff --git a/main_package/classes/structure_graph/structure.py.bak b/main_package/classes/structure_graph/structure.py.bak new file mode 100644 index 0000000..0c74b60 --- /dev/null +++ b/main_package/classes/structure_graph/structure.py.bak @@ -0,0 +1,128 @@ +import sys +sys.path.append('../') + +import typing as ty + +import numpy as np + + +class Structure(object): + """Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) + + :param nodes_labels_list: the symbolic names of the variables + :type nodes_labels_list: List + :param nodes_indexes_arr: the indexes of the nodes + :type nodes_indexes_arr: numpy.ndArray + :param nodes_vals_arr: the cardinalites of the nodes + :type nodes_vals_arr: numpy.ndArray + :param edges_list: the edges of the network + :type edges_list: List + :param total_variables_number: the total number of variables in the dataset + :type total_variables_number: int + """ + + def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, + edges_list: ty.List, total_variables_number: int): + """Constructor Method + """ + self._nodes_labels_list = nodes_labels_list + self._nodes_indexes_arr = nodes_indexes_arr + self._nodes_vals_arr = nodes_vals_arr + self._edges_list = edges_list + self._total_variables_number = total_variables_number + + def remove_node(self, node_id: str) -> None: + """Remove the node ``node_id`` from all the class members. + The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. + """ + node_positional_indx = self._nodes_labels_list.index(node_id) + del self._nodes_labels_list[node_positional_indx] + self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) + self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) + self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id + and to_node != node_id)] + + @property + def edges(self) -> ty.List: + return self._edges_list + + @property + def nodes_labels(self) -> ty.List: + return self._nodes_labels_list + + @property + def nodes_indexes(self) -> np.ndarray: + return self._nodes_indexes_arr + + @property + def nodes_values(self) -> np.ndarray: + return self._nodes_vals_arr + + @property + def total_variables_number(self) -> int: + return self._total_variables_number + + def get_node_id(self, node_indx: int) -> str: + """ + Given the ``node_index`` returns the node label. + + :param node_indx: the node index + :type node_indx: int + :return: the node label + :rtype: string + """ + return self._nodes_labels_list[node_indx] + + def clean_structure_edges(self): + self._edges_list = list() + + def add_edge(self,edge: tuple): + self._edges_list.append(tuple) + print(self._edges_list) + + def remove_edge(self,edge: tuple): + self._edges_list.remove(tuple) + + def contains_edge(self,edge:tuple) -> bool: + return edge in self._edges_list + + def get_node_indx(self, node_id: str) -> int: + """ + Given the ``node_index`` returns the node label. + + :param node_id: the node label + :type node_id: string + :return: the node index + :rtype: int + """ + pos_indx = self._nodes_labels_list.index(node_id) + return self._nodes_indexes_arr[pos_indx] + + def get_positional_node_indx(self, node_id: str) -> int: + return self._nodes_labels_list.index(node_id) + + def get_states_number(self, node: str) -> int: + """Given the node label ``node`` returns the cardinality of the node. + + :param node: the node label + :type node: string + :return: the node cardinality + :rtype: int + """ + pos_indx = self._nodes_labels_list.index(node) + return self._nodes_vals_arr[pos_indx] + + def __repr__(self): + return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ + "\nEdges: \n" + str(self._edges_list) + + def __eq__(self, other): + """Overrides the default implementation""" + if isinstance(other, Structure): + return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ + np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ + np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ + self._edges_list == other._edges_list + + return False + diff --git a/main_package/classes/utility/cache.py b/main_package/classes/utility/cache.py index 96cb545..e8eca47 100644 --- a/main_package/classes/utility/cache.py +++ b/main_package/classes/utility/cache.py @@ -1,9 +1,7 @@ -import sys -sys.path.append('../') import typing -import structure_graph.set_of_cims as sofc +from ..structure_graph.set_of_cims import SetOfCims class Cache: @@ -40,7 +38,7 @@ class Cache: except ValueError: return None - def put(self, parents_comb: typing.Union[typing.Set, str], socim: sofc.SetOfCims): + def put(self, parents_comb: typing.Set, socim: SetOfCims): """Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in ``_list_of_sets_of_parents``. diff --git a/main_package/classes/utility/json_importer.py b/main_package/classes/utility/json_importer.py index c813128..edff212 100644 --- a/main_package/classes/utility/json_importer.py +++ b/main_package/classes/utility/json_importer.py @@ -2,13 +2,12 @@ import json import typing import pandas as pd -import sys -sys.path.append('../') -import utility.abstract_importer as ai +from .abstract_importer import AbstractImporter -class JsonImporter(ai.AbstractImporter): + +class JsonImporter(AbstractImporter): """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare the data in json extension. diff --git a/main_package/classes/utility/sample_importer.py b/main_package/classes/utility/sample_importer.py index 3c049ef..05073c8 100644 --- a/main_package/classes/utility/sample_importer.py +++ b/main_package/classes/utility/sample_importer.py @@ -3,14 +3,12 @@ import typing import pandas as pd import numpy as np -import sys -sys.path.append('../') -import utility.abstract_importer as ai +from .abstract_importer import AbstractImporter -class SampleImporter(ai.AbstractImporter): +class SampleImporter(AbstractImporter): """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare the data loaded directly by using DataFrame diff --git a/main_package/tests/estimators/test_structure_constraint_based_estimator.py b/main_package/tests/estimators/test_structure_constraint_based_estimator.py index 26d6fb2..339405a 100644 --- a/main_package/tests/estimators/test_structure_constraint_based_estimator.py +++ b/main_package/tests/estimators/test_structure_constraint_based_estimator.py @@ -1,5 +1,4 @@ -import sys -sys.path.append("../../classes/") + import glob import math import os @@ -13,10 +12,10 @@ from line_profiler import LineProfiler import json import pandas as pd -import utility.cache as ch -import structure_graph.sample_path as sp -import estimators.structure_constraint_based_estimator as se -import utility.sample_importer as si + +from ...classes.structure_graph.sample_path import SamplePath +from ...classes.estimators.structure_constraint_based_estimator import StructureConstraintBasedEstimator +from ...classes.utility.sample_importer import SampleImporter import copy @@ -24,7 +23,7 @@ import copy class TestStructureConstraintBasedEstimator(unittest.TestCase): @classmethod def setUpClass(cls): - with open("../../data/networks_and_trajectories_ternary_data_3.json") as f: + with open("./main_package/data/networks_and_trajectories_ternary_data_3.json") as f: raw_data = json.load(f) trajectory_list_raw= raw_data[0]["samples"] @@ -35,7 +34,7 @@ class TestStructureConstraintBasedEstimator(unittest.TestCase): prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"]) - cls.importer = si.SampleImporter( + cls.importer = SampleImporter( trajectory_list=trajectory_list, variables=variables, prior_net_structure=prior_net_structure @@ -47,7 +46,7 @@ class TestStructureConstraintBasedEstimator(unittest.TestCase): #cls.traj = cls.s1.concatenated_samples # print(len(cls.traj)) - cls.s1 = sp.SamplePath(cls.importer) + cls.s1 = SamplePath(cls.importer) cls.s1.build_trajectories() cls.s1.build_structure() @@ -55,7 +54,7 @@ class TestStructureConstraintBasedEstimator(unittest.TestCase): true_edges = copy.deepcopy(self.s1.structure.edges) true_edges = set(map(tuple, true_edges)) - se1 = se.StructureConstraintBasedEstimator(self.s1,0.1,0.1) + se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1) edges = se1.estimate_structure(disable_multiprocessing=False) diff --git a/main_package/tests/optimizers/test_hill_climbing_search.py b/main_package/tests/optimizers/test_hill_climbing_search.py index dfc87ef..4dc33e5 100644 --- a/main_package/tests/optimizers/test_hill_climbing_search.py +++ b/main_package/tests/optimizers/test_hill_climbing_search.py @@ -1,5 +1,4 @@ -import sys -sys.path.append("../../classes/") + import glob import math import os @@ -11,10 +10,10 @@ import psutil from line_profiler import LineProfiler import copy -import utility.cache as ch -import structure_graph.sample_path as sp -import estimators.structure_score_based_estimator as se -import utility.json_importer as ji + +from ...classes.structure_graph.sample_path import SamplePath +from ...classes.estimators.structure_score_based_estimator import StructureScoreBasedEstimator +from ...classes.utility.json_importer import JsonImporter @@ -23,8 +22,11 @@ class TestHillClimbingSearch(unittest.TestCase): @classmethod def setUpClass(cls): #cls.read_files = glob.glob(os.path.join('../../data', "*.json")) - cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') - cls.s1 = sp.SamplePath(cls.importer) + + + cls.importer = JsonImporter("./main_package/data/networks_and_trajectories_binary_data_01_10.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.importer.import_data(0) + cls.s1 = SamplePath(cls.importer) cls.s1.build_trajectories() cls.s1.build_structure() @@ -34,7 +36,7 @@ class TestHillClimbingSearch(unittest.TestCase): true_edges = copy.deepcopy(self.s1.structure.edges) true_edges = set(map(tuple, true_edges)) - se1 = se.StructureScoreBasedEstimator(self.s1) + se1 = StructureScoreBasedEstimator(self.s1) edges = se1.estimate_structure( max_parents = None, iterations_number = 40, diff --git a/main_package/tests/structure_graph/test_networkgraph.py b/main_package/tests/structure_graph/test_networkgraph.py index a9d8c47..aa54914 100644 --- a/main_package/tests/structure_graph/test_networkgraph.py +++ b/main_package/tests/structure_graph/test_networkgraph.py @@ -1,5 +1,4 @@ -import sys -sys.path.append("../../classes/") + import unittest import glob import os @@ -7,100 +6,80 @@ import networkx as nx import numpy as np import itertools -import structure_graph.sample_path as sp -import structure_graph.network_graph as ng -import utility.json_importer as ji - +from ...classes.structure_graph.sample_path import SamplePath +from ...classes.structure_graph.network_graph import NetworkGraph +from ...classes.utility.json_importer import JsonImporter class TestNetworkGraph(unittest.TestCase): @classmethod def setUpClass(cls): - cls.read_files = glob.glob(os.path.join('../../data', "*.json")) - cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') - cls.s1 = sp.SamplePath(cls.importer) + cls.read_files = glob.glob(os.path.join('./main_package/data', "*.json")) + cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.importer.import_data(0) + cls.s1 = SamplePath(cls.importer) cls.s1.build_trajectories() cls.s1.build_structure() def test_init(self): - g1 = ng.NetworkGraph(self.s1.structure) - self.assertEqual(self.s1.structure, g1.graph_struct) - self.assertIsInstance(g1.graph, nx.DiGraph) - self.assertTrue(np.array_equal(g1._nodes_indexes, self.s1.structure.nodes_indexes)) - self.assertListEqual(g1._nodes_labels, self.s1.structure.nodes_labels) - self.assertTrue(np.array_equal(g1._nodes_values, self.s1.structure.nodes_values)) - self.assertIsNone(g1._fancy_indexing) + g1 = NetworkGraph(self.s1.structure) + self.assertEqual(self.s1.structure, g1._graph_struct) + self.assertIsInstance(g1._graph, nx.DiGraph) self.assertIsNone(g1.time_scalar_indexing_strucure) self.assertIsNone(g1.transition_scalar_indexing_structure) self.assertIsNone(g1.transition_filtering) self.assertIsNone(g1.p_combs) def test_add_nodes(self): - g1 = ng.NetworkGraph(self.s1.structure) + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) for n1, n2 in zip(g1.nodes, self.s1.structure.nodes_labels): self.assertEqual(n1, n2) def test_add_edges(self): - g1 = ng.NetworkGraph(self.s1.structure) + g1 = NetworkGraph(self.s1.structure) g1.add_edges(self.s1.structure.edges) for e in self.s1.structure.edges: self.assertIn(tuple(e), g1.edges) - def aux_aggregated_par_list_data(self, graph, node_id, sorted_par_list_aggregated_info): - for indx, element in enumerate(sorted_par_list_aggregated_info): - if indx == 0: - self.assertEqual(graph.get_parents_by_id(node_id), element) - for j in range(0, len(sorted_par_list_aggregated_info[0]) - 1): - self.assertLess(self.s1.structure.get_node_indx(sorted_par_list_aggregated_info[0][j]), - self.s1.structure.get_node_indx(sorted_par_list_aggregated_info[0][j + 1])) - elif indx == 1: - for node, node_indx in zip(sorted_par_list_aggregated_info[0], sorted_par_list_aggregated_info[1]): - self.assertEqual(graph.get_node_indx(node), node_indx) - else: - for node, node_val in zip(sorted_par_list_aggregated_info[0], sorted_par_list_aggregated_info[2]): - self.assertEqual(graph.graph_struct.get_states_number(node), node_val) - - def test_get_ord_set_of_par_of_all_nodes(self): - g1 = ng.NetworkGraph(self.s1.structure) + def test_fast_init(self): + g1 = NetworkGraph(self.s1.structure) + for node in self.s1.structure.nodes_labels: + g1.fast_init(node) + self.assertIsNotNone(g1._graph.nodes) + self.assertIsNotNone(g1._graph.edges) + self.assertIsInstance(g1._time_scalar_indexing_structure, np.ndarray) + self.assertIsInstance(g1._transition_scalar_indexing_structure, np.ndarray) + self.assertIsInstance(g1._time_filtering, np.ndarray) + self.assertIsInstance(g1._transition_filtering, np.ndarray) + self.assertIsInstance(g1._p_combs_structure, np.ndarray) + self.assertIsInstance(g1._aggregated_info_about_nodes_parents, tuple) + + def test_get_ordered_by_indx_set_of_parents(self): + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) g1.add_edges(self.s1.structure.edges) - sorted_list_of_par_lists = g1.get_ord_set_of_par_of_all_nodes() - for node, par_list in zip(g1.nodes, sorted_list_of_par_lists): - self.aux_aggregated_par_list_data(g1, node, par_list) - """ - def test_get_ordered_by_indx_parents_values_for_all_nodes(self): - g1 = ng.NetworkGraph(self.s1.structure) - g1.add_nodes(self.s1.structure.list_of_nodes_labels()) - g1.add_edges(self.s1.structure.list_of_edges()) - g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() - #print(g1.get_ordered_by_indx_parents_values_for_all_nodes()) - parents_values_list = g1.get_ordered_by_indx_parents_values_for_all_nodes() - for pv1, aggr in zip(parents_values_list, g1.aggregated_info_about_nodes_parents): - self.assertEqual(pv1, aggr[2]) - - def test_get_states_number_of_all_nodes_sorted(self): - g1 = ng.NetworkGraph(self.s1.structure) - g1.add_nodes(self.s1.structure.list_of_nodes_labels()) - g1.add_edges(self.s1.structure.list_of_edges()) - nodes_cardinality_list = g1.get_states_number_of_all_nodes_sorted() - for val, node in zip(nodes_cardinality_list, g1.get_nodes_sorted_by_indx()): - self.assertEqual(val, g1.get_states_number(node)) - - def test_build_fancy_indexing_structure_no_offset(self): - g1 = ng.NetworkGraph(self.s1.structure) - g1.add_nodes(self.s1.structure.list_of_nodes_labels()) - g1.add_edges(self.s1.structure.list_of_edges()) - g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() - fancy_indx = g1.build_fancy_indexing_structure(0) - for par_indxs, aggr in zip(fancy_indx, g1.aggregated_info_about_nodes_parents): - self.assertEqual(par_indxs, aggr[1]) - - def test_build_fancy_indexing_structure_offset(self): - pass #TODO il codice di netgraph deve gestire questo caso""" + for node in self.s1.structure.nodes_labels: + aggr_info = g1.get_ordered_by_indx_set_of_parents(node) + for indx in range(len(aggr_info[0]) - 1 ): + self.assertLess(g1.get_node_indx(aggr_info[0][indx]), g1.get_node_indx(aggr_info[0][indx + 1])) + for par, par_indx in zip(aggr_info[0], aggr_info[1]): + self.assertEqual(g1.get_node_indx(par), par_indx) + for par, par_val in zip(aggr_info[0], aggr_info[2]): + self.assertEqual(g1._graph_struct.get_states_number(par), par_val) + + def test_build_time_scalar_indexing_structure_for_a_node(self): + g1 = NetworkGraph(self.s1.structure) + g1.add_nodes(self.s1.structure.nodes_labels) + g1.add_edges(self.s1.structure.edges) + for node in self.s1.structure.nodes_labels: + aggr_info = g1.get_ordered_by_indx_set_of_parents(node) + self.aux_build_time_scalar_indexing_structure_for_a_node(g1, node, aggr_info[1], + aggr_info[0], aggr_info[2]) def aux_build_time_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels, parents_vals): - time_scalar_indexing = graph.build_time_scalar_indexing_structure_for_a_node(node_id, parents_vals) + node_states = graph.get_states_number(node_id) + time_scalar_indexing = NetworkGraph.build_time_scalar_indexing_structure_for_a_node(node_states, parents_vals) self.assertEqual(len(time_scalar_indexing), len(parents_indxs) + 1) merged_list = parents_labels[:] merged_list.insert(0, node_id) @@ -111,9 +90,19 @@ class TestNetworkGraph(unittest.TestCase): t_vec = t_vec.cumprod() self.assertTrue(np.array_equal(time_scalar_indexing, t_vec)) + def test_build_transition_scalar_indexing_structure_for_a_node(self): + g1 = NetworkGraph(self.s1.structure) + g1.add_nodes(self.s1.structure.nodes_labels) + g1.add_edges(self.s1.structure.edges) + for node in self.s1.structure.nodes_labels: + aggr_info = g1.get_ordered_by_indx_set_of_parents(node) + self.aux_build_transition_scalar_indexing_structure_for_a_node(g1, node, aggr_info[1], + aggr_info[0], aggr_info[2]) + def aux_build_transition_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels, parents_values): - transition_scalar_indexing = graph.build_transition_scalar_indexing_structure_for_a_node(node_id, + node_states = graph.get_states_number(node_id) + transition_scalar_indexing = graph.build_transition_scalar_indexing_structure_for_a_node(node_states, parents_values) self.assertEqual(len(transition_scalar_indexing), len(parents_indxs) + 2) merged_list = parents_labels[:] @@ -126,114 +115,76 @@ class TestNetworkGraph(unittest.TestCase): m_vec = m_vec.cumprod() self.assertTrue(np.array_equal(transition_scalar_indexing, m_vec)) - def test_build_transition_scalar_indexing_structure(self): - g1 = ng.NetworkGraph(self.s1.structure) - g1.add_nodes(self.s1.structure.nodes_labels) - g1.add_edges(self.s1.structure.edges) - g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() - p_labels = [i[0] for i in g1.aggregated_info_about_nodes_parents] - p_vals = g1.get_ordered_by_indx_parents_values_for_all_nodes() - fancy_indx = g1.build_fancy_indexing_structure(0) - for node_id, p_i ,p_l, p_v in zip(g1.graph_struct.nodes_labels, fancy_indx, p_labels, p_vals): - self.aux_build_transition_scalar_indexing_structure_for_a_node(g1, node_id, p_i ,p_l, p_v) - - def test_build_time_scalar_indexing_structure(self): - g1 = ng.NetworkGraph(self.s1.structure) - g1.add_nodes(self.s1.structure.nodes_labels) - g1.add_edges(self.s1.structure.edges) - g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() - fancy_indx = g1.build_fancy_indexing_structure(0) - p_labels = [i[0] for i in g1.aggregated_info_about_nodes_parents] - p_vals = g1.get_ordered_by_indx_parents_values_for_all_nodes() - #print(fancy_indx) - for node_id, p_indxs, p_labels, p_v in zip(g1.graph_struct.nodes_labels, fancy_indx, p_labels, p_vals): - self.aux_build_time_scalar_indexing_structure_for_a_node(g1, node_id, p_indxs, p_labels, p_v) - - def test_build_time_columns_filtering_structure(self): - g1 = ng.NetworkGraph(self.s1.structure) + def test_build_time_columns_filtering_structure_for_a_node(self): + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) g1.add_edges(self.s1.structure.edges) - g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() - g1._fancy_indexing = g1.build_fancy_indexing_structure(0) - g1.build_time_columns_filtering_structure() - t_filter = [] - for node_id, p_indxs in zip(g1.nodes, g1._fancy_indexing): - single_filter = [] - single_filter.append(g1.get_node_indx(node_id)) - single_filter.extend(p_indxs) - t_filter.append(np.array(single_filter)) - #print(t_filter) - for a1, a2 in zip(g1.time_filtering, t_filter): - self.assertTrue(np.array_equal(a1, a2)) - + for node in self.s1.structure.nodes_labels: + aggr_info = g1.get_ordered_by_indx_set_of_parents(node) + self.aux_build_time_columns_filtering_structure_for_a_node(g1, node, aggr_info[1]) + + def aux_build_time_columns_filtering_structure_for_a_node(self, graph, node_id, p_indxs): + graph.build_time_columns_filtering_for_a_node(graph.get_node_indx(node_id), p_indxs) + single_filter = [] + single_filter.append(graph.get_node_indx(node_id)) + single_filter.extend(p_indxs) + self.assertTrue(np.array_equal(graph.build_time_columns_filtering_for_a_node(graph.get_node_indx(node_id), + p_indxs),np.array(single_filter))) def test_build_transition_columns_filtering_structure(self): - g1 = ng.NetworkGraph(self.s1.structure) + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) g1.add_edges(self.s1.structure.edges) - g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() - g1._fancy_indexing = g1.build_fancy_indexing_structure(0) - g1.build_transition_columns_filtering_structure() - m_filter = [] - for node_id, p_indxs in zip(g1.nodes, g1._fancy_indexing): - single_filter = [] - single_filter.append(g1.get_node_indx(node_id) + g1.graph_struct.total_variables_number) - single_filter.append(g1.get_node_indx(node_id)) - single_filter.extend(p_indxs) - m_filter.append(np.array(single_filter)) - for a1, a2 in zip(g1.transition_filtering, m_filter): - self.assertTrue(np.array_equal(a1, a2)) - + for node in self.s1.structure.nodes_labels: + aggr_info = g1.get_ordered_by_indx_set_of_parents(node) + self.aux_build_time_columns_filtering_structure_for_a_node(g1, node, aggr_info[1]) + + def aux_build_transition_columns_filtering_structure(self, graph, node_id, p_indxs): + single_filter = [] + single_filter.append(graph.get_node_indx(node_id) + graph._graph_struct.total_variables_number) + single_filter.append(graph.get_node_indx(node_id)) + single_filter.extend(p_indxs) + self.assertTrue(np.array_equal(graph.build_transition_filtering_for_a_node(graph.get_node_indx(node_id), + + p_indxs), np.array(single_filter))) def test_build_p_combs_structure(self): - g1 = ng.NetworkGraph(self.s1.structure) + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) g1.add_edges(self.s1.structure.edges) - g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() - p_vals = g1.get_ordered_by_indx_parents_values_for_all_nodes() - p_combs = g1.build_p_combs_structure() - - for matrix, p_v in zip(p_combs, p_vals): - p_possible_vals = [] - for val in p_v: - vals = [v for v in range(val)] - p_possible_vals.extend(vals) - comb_struct = set(itertools.product(p_possible_vals,repeat=len(p_v))) - #print(comb_struct) - for comb in comb_struct: - self.assertIn(np.array(comb), matrix) - - def test_fast_init(self): - g1 = ng.NetworkGraph(self.s1.structure) - g2 = ng.NetworkGraph(self.s1.structure) - g1.init_graph() - for indx, node in enumerate(g1.nodes): - g2.fast_init(node) - self.assertListEqual(g2._fancy_indexing, g1._fancy_indexing[indx]) - self.assertTrue(np.array_equal(g2.time_scalar_indexing_strucure, g1.time_scalar_indexing_strucure[indx])) - self.assertTrue(np.array_equal(g2.transition_scalar_indexing_structure, g1.transition_scalar_indexing_structure[indx])) - self.assertTrue(np.array_equal(g2.time_filtering, g1.time_filtering[indx])) - self.assertTrue(np.array_equal(g2.transition_filtering, g1.transition_filtering[indx])) - self.assertTrue(np.array_equal(g2.p_combs, g1.p_combs[indx])) + for node in self.s1.structure.nodes_labels: + aggr_info = g1.get_ordered_by_indx_set_of_parents(node) + self.aux_build_p_combs_structure(g1, aggr_info[2]) + + def aux_build_p_combs_structure(self, graph, p_vals): + p_combs = graph.build_p_comb_structure_for_a_node(p_vals) + p_possible_vals = [] + for val in p_vals: + vals = [v for v in range(val)] + p_possible_vals.extend(vals) + comb_struct = set(itertools.product(p_possible_vals,repeat=len(p_vals))) + for comb in comb_struct: + self.assertIn(np.array(comb), p_combs) def test_get_parents_by_id(self): - g1 = ng.NetworkGraph(self.s1.structure) + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) g1.add_edges(self.s1.structure.edges) for node in g1.nodes: - self.assertListEqual(g1.get_parents_by_id(node), list(g1.graph.predecessors(node))) + self.assertListEqual(g1.get_parents_by_id(node), list(g1._graph.predecessors(node))) def test_get_states_number(self): - g1 = ng.NetworkGraph(self.s1.structure) + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) g1.add_edges(self.s1.structure.edges) for node, val in zip(g1.nodes, g1.nodes_values): self.assertEqual(val, g1.get_states_number(node)) def test_get_node_indx(self): - g1 = ng.NetworkGraph(self.s1.structure) + g1 = NetworkGraph(self.s1.structure) g1.add_nodes(self.s1.structure.nodes_labels) g1.add_edges(self.s1.structure.edges) for node, indx in zip(g1.nodes, g1.nodes_indexes): self.assertEqual(indx, g1.get_node_indx(node)) + if __name__ == '__main__': unittest.main() diff --git a/main_package/tests/structure_graph/test_sample_path.py b/main_package/tests/structure_graph/test_sample_path.py index 1931f26..e4befa8 100644 --- a/main_package/tests/structure_graph/test_sample_path.py +++ b/main_package/tests/structure_graph/test_sample_path.py @@ -1,33 +1,71 @@ -import sys -sys.path.append("../../classes/") + import unittest import glob import os -import utility.json_importer as ji -import structure_graph.sample_path as sp -import structure_graph.trajectory as tr -import structure_graph.structure as st +import random + +from ...classes.utility.json_importer import JsonImporter +from ...classes.structure_graph.sample_path import SamplePath +from ...classes.structure_graph.trajectory import Trajectory +from ...classes.structure_graph.structure import Structure class TestSamplePath(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - cls.read_files = glob.glob(os.path.join('../../data', "*.json")) - cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.read_files = glob.glob(os.path.join('./main_package/data', "*.json")) + + def test_init_not_initialized_importer(self): + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + self.assertRaises(RuntimeError, SamplePath, importer) + + def test_init_not_filled_dataframse(self): + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + importer.clear_concatenated_frame() + self.assertRaises(RuntimeError, SamplePath, importer) def test_init(self): - s1 = sp.SamplePath(self.importer) + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + s1 = SamplePath(importer) + self.assertIsNone(s1.trajectories) + self.assertIsNone(s1.structure) + self.assertFalse(s1._importer.concatenated_samples.empty) + self.assertIsNone(s1._total_variables_count) + + def test_build_trajectories(self): + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + s1 = SamplePath(importer) s1.build_trajectories() - self.assertIsNotNone(s1.trajectories) - self.assertIsInstance(s1.trajectories, tr.Trajectory) + self.assertIsInstance(s1.trajectories, Trajectory) + + def test_build_structure(self): + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + s1 = SamplePath(importer) s1.build_structure() - self.assertIsNotNone(s1.structure) - self.assertIsInstance(s1.structure, st.Structure) - self.assertTrue(s1.importer.concatenated_samples.empty) - self.assertEqual(s1.total_variables_count, len(s1.importer.sorter)) - print(s1.structure) - print(s1.trajectories) + self.assertIsInstance(s1.structure, Structure) + self.assertEqual(s1._total_variables_count, len(s1._importer.sorter)) + + def test_build_structure_bad_sorter(self): + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + s1 = SamplePath(importer) + random.shuffle(importer._sorter) + self.assertRaises(RuntimeError, s1.build_structure) + + def test_build_saplepath_no_prior_net_structure(self): + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + importer._df_structure = None + s1 = SamplePath(importer) + s1.build_trajectories() + s1.build_structure() + self.assertFalse(s1.structure.edges) + if __name__ == '__main__': diff --git a/main_package/tests/structure_graph/test_setofcims.py b/main_package/tests/structure_graph/test_setofcims.py index db7eadd..7e13075 100644 --- a/main_package/tests/structure_graph/test_setofcims.py +++ b/main_package/tests/structure_graph/test_setofcims.py @@ -1,12 +1,9 @@ -import sys -sys.path.append("../../classes/") + import unittest import numpy as np import itertools -from structure_graph.set_of_cims import SetOfCims - - +from ...classes.structure_graph.set_of_cims import SetOfCims class TestSetOfCims(unittest.TestCase): diff --git a/main_package/tests/structure_graph/test_sets_of_cims_container.py b/main_package/tests/structure_graph/test_sets_of_cims_container.py deleted file mode 100644 index 43c1096..0000000 --- a/main_package/tests/structure_graph/test_sets_of_cims_container.py +++ /dev/null @@ -1,26 +0,0 @@ -import sys -sys.path.append("../../classes/") -import unittest -import structure_graph.set_of_cims as sc -import structure_graph.sets_of_cims_container as scc - - -class TestSetsOfCimsContainer(unittest.TestCase): - - @classmethod - def setUpClass(cls) -> None: - cls.variables = ['X', 'Y', 'Z'] - cls.states_per_node = [3, 3, 3] - cls.parents_states_list = [[], [3], [3, 3]] - - def test_init(self): - #TODO: Fix this initialization - c1 = scc.SetsOfCimsContainer(self.variables, self.states_per_node, self.parents_states_list) - self.assertEqual(len(c1.sets_of_cims), len(self.variables)) - for set_of_cims in c1.sets_of_cims: - self.assertIsInstance(set_of_cims, sc.SetOfCims) - - - -if __name__ == '__main__': - unittest.main() diff --git a/main_package/tests/structure_graph/test_trajectory.py b/main_package/tests/structure_graph/test_trajectory.py index b05edd4..89c5c36 100644 --- a/main_package/tests/structure_graph/test_trajectory.py +++ b/main_package/tests/structure_graph/test_trajectory.py @@ -1,16 +1,20 @@ -import sys -sys.path.append("../../classes/") + import unittest import numpy as np -import structure_graph.trajectory as tr +from ...classes.structure_graph.trajectory import Trajectory class TestTrajectory(unittest.TestCase): + @classmethod + def setUpClass(cls): + print("123") + pass + def test_init(self): cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)] - t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + t1 = Trajectory(cols_list, len(cols_list) - 2) self.assertTrue(np.array_equal(cols_list[0], t1.times)) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1])) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2])) @@ -19,27 +23,27 @@ class TestTrajectory(unittest.TestCase): def test_init_first_array_not_float_type(self): cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])] - self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list)) + self.assertRaises(TypeError, Trajectory, cols_list, len(cols_list)) def test_complete_trajectory(self): cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)] - t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + t1 = Trajectory(cols_list, len(cols_list) - 2) complete = np.column_stack((cols_list[1], cols_list[2])) self.assertTrue(np.array_equal(t1.complete_trajectory, complete)) def test_trajectory(self): cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)] - t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + t1 = Trajectory(cols_list, len(cols_list) - 2) self.assertTrue(np.array_equal(cols_list[1], t1.trajectory.ravel())) def test_times(self): cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)] - t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + t1 = Trajectory(cols_list, len(cols_list) - 2) self.assertTrue(np.array_equal(cols_list[0], t1.times)) def test_repr(self): cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)] - t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + t1 = Trajectory(cols_list, len(cols_list) - 2) print(t1)