diff --git a/PyCTBN/classes/structure_estimator.py b/PyCTBN/classes/structure_estimator.py index b28c95d..854c948 100644 --- a/PyCTBN/classes/structure_estimator.py +++ b/PyCTBN/classes/structure_estimator.py @@ -36,7 +36,8 @@ class StructureEstimator(object): :_cache: the Cache object """ - def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float): + def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float, + thumb_threshold:int = 25, known_edges: typing.List = None): """Constructor Method """ self._sample_path = sample_path @@ -46,6 +47,8 @@ class StructureEstimator(object): self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) self._exp_test_sign = exp_test_alfa self._chi_test_alfa = chi_test_alfa + self._thumb_threshold = thumb_threshold + self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) self._cache = Cache() @staticmethod @@ -62,8 +65,18 @@ class StructureEstimator(object): complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) return complete_graph + def build_removable_edges_matrix(self, known_edges: typing.List): + tot_vars_count = self._sample_path.total_variables_count + complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) + if known_edges: + for edge in known_edges: + i = self._sample_path.structure.get_node_indx(edge[0]) + j = self._sample_path.structure.get_node_indx(edge[1]) + complete_adj_matrix[i][j] = False + return complete_adj_matrix + def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, - tot_vars_count: int) -> bool: + tot_vars_count: int, parent_indx, child_indx) -> bool: """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} G2 = {G1 U test_parent} (added as an additional parent of the test_child). Generates all the necessary structures and datas to perform the tests. @@ -117,16 +130,21 @@ class StructureEstimator(object): p2.fast_init(test_child) sofc1 = p2.compute_parameters_for_node(test_child) self._cache.put(set(p_set), sofc1) - + thumb_value = 0.0 + if child_states_numb > 2: + parent_val = self._sample_path.structure.get_states_number(test_parent) + bool_mask_vals = np.isin(self._nodes, parent_set) + parents_vals = self._nodes_vals[bool_mask_vals] + thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) for cim2 in cond_cims: - if not self.independence_test(child_states_numb, cim1, cim2): + if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): return False return True def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, - cim2: ConditionalIntensityMatrix) -> bool: + cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: """Compute the actual independence test using two cims. It is performed first the exponential test and if the null hypothesis is not rejected, it is performed also the chi_test. @@ -146,6 +164,10 @@ class StructureEstimator(object): r2s = M2.diagonal() C1 = cim1.cim C2 = cim2.cim + if child_states_numb > 2: + if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: + self._removable_edges_matrix[parent_indx][child_indx] = False + return False F_stats = C2.diagonal() / C1.diagonal() exp_alfa = self._exp_test_sign for val in range(0, child_states_numb): @@ -165,6 +187,14 @@ class StructureEstimator(object): return False return True + + def compute_thumb_value(self, parent_val, child_val, parent_set_vals): + df = (child_val - 1) ** 2 + df = df * parent_val + for v in parent_set_vals: + df = df * v + return df + def one_iteration_of_CTPC_algorithm(self, var_id: str) -> None: """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. @@ -179,14 +209,17 @@ class StructureEstimator(object): parent_indx = 0 while parent_indx < len(u): removed = False - S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, u[parent_indx]) test_parent = u[parent_indx] - for parents_set in S: - if self.complete_test(test_parent, var_id, parents_set, child_states_numb, tot_vars_count): - self._complete_graph.remove_edge(test_parent, var_id) - u.remove(test_parent) - removed = True - break + i = self._sample_path.structure.get_node_indx(test_parent) + j = self._sample_path.structure.get_node_indx(var_id) + if self._removable_edges_matrix[i][j]: + S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) + for parents_set in S: + if self.complete_test(test_parent, var_id, parents_set, child_states_numb, tot_vars_count, i, j): + self._complete_graph.remove_edge(test_parent, var_id) + u.remove(test_parent) + removed = True + break if not removed: parent_indx += 1 b += 1 @@ -263,7 +296,7 @@ class StructureEstimator(object): edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] graph_to_draw.add_edges_from(spurious_edges) graph_to_draw.add_edges_from(non_spurious_edges) - pos = nx.spring_layout(graph_to_draw, k=5, scale=3) + pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) options = { "node_size": 2000, "node_color": "white", diff --git a/PyCTBN/tests/original_ctpc_algorithm.py b/PyCTBN/tests/original_ctpc_algorithm.py index e63154b..233bf67 100644 --- a/PyCTBN/tests/original_ctpc_algorithm.py +++ b/PyCTBN/tests/original_ctpc_algorithm.py @@ -242,27 +242,12 @@ class OriginalCTPCAlgorithm(AbstractImporter): dtype=np.int64) #print(diag_indices) T_filter = np.array([child_id, *parents_id], dtype=np.int) + 1 - #print("TFilter",T_filter) - #print("TVector", T_vector) - #print("Trajectories", trajectories) - #print("Actual TVect",T_vector / T_vector[0]) - #print("Masked COlumns", trajectories[:, T_filter]) # Colonne non shiftate dei values - #print("Masked Multiplied COlumns",trajectories[:, T_filter] * (T_vector / T_vector[0]) ) - #print("Summing",np.sum(trajectories[:, T_filter] * (T_vector / T_vector[0]), axis=1)) - #print("Deltas",trajectories[:, int(trajectories.shape[1] / 2)]) # i delta times assert np.sum(trajectories[:, T_filter] * (T_vector / T_vector[0]), axis=1).size == \ trajectories[:, int(trajectories.shape[1] / 2)].size #print(T_vector[-1]) T[:] = np.bincount(np.sum(trajectories[:, T_filter] * T_vector / T_vector[0], axis=1).astype(np.int), \ trajectories[:, int(trajectories.shape[1] / 2)], minlength=T_vector[-1]).reshape(-1, T.shape[1]) - #print("Shape", T.shape[1]) - #print(np.bincount(np.sum(trajectories[:, T_filter] * T_vector / T_vector[0], axis=1).astype(np.int), \ - #trajectories[:, int(trajectories.shape[1] / 2)], minlength=T_vector[-1])) - ###### Transitions ####### - - #print("Shifted Node column", trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int)) - #print("Step 2", trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int) >= 0) trj_tmp = trajectories[trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int) >= 0] #print("Trj Temp", trj_tmp) @@ -270,13 +255,6 @@ class OriginalCTPCAlgorithm(AbstractImporter): M_filter = np.array([child_id, child_id, *parents_id], dtype=np.int) + 1 #print("MFilter", M_filter) M_filter[0] += int(trj_tmp.shape[1] / 2) - #print("MFilter", M_filter) - #print("MVector", M_vector) - #print("Division", M_vector / M_vector[0]) - #print("Masked Traj temp", (trj_tmp[:, M_filter])) - #print("Masked Multiplied Traj temp", trj_tmp[:, M_filter] * M_vector / M_vector[0]) - #print("Summing", np.sum(trj_tmp[:, M_filter] * M_vector / M_vector[0], axis=1)) - #print(M.shape[2]) M[:] = np.bincount(np.sum(trj_tmp[:, M_filter] * M_vector / M_vector[0], axis=1).astype(np.int), \ minlength=M_vector[-1]).reshape(-1, M.shape[1], M.shape[2]) @@ -390,6 +368,7 @@ class OriginalCTPCAlgorithm(AbstractImporter): #print("C2:", CIM_from) + if self.variables.loc[to_var, "Value"] > 2: df = (self.variables.loc[to_var, "Value"] - 1) ** 2 df = df * (self.variables.loc[from_var, "Value"]) diff --git a/PyCTBN/tests/performance_comparisons.py b/PyCTBN/tests/performance_comparisons.py index 3fee2d1..849490d 100644 --- a/PyCTBN/tests/performance_comparisons.py +++ b/PyCTBN/tests/performance_comparisons.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd import networkx as nx import timeit -import psutil +#import psutil from ..classes.sample_path import SamplePath from ..classes.structure_estimator import StructureEstimator @@ -66,9 +66,9 @@ class PerformanceComparisons(unittest.TestCase): self.aux_build_importer(0) se1 = StructureEstimator(self.s1, 0.1, 0.1) se1.ctpc_algorithm() - current_process = psutil.Process(os.getpid()) - mem = current_process.memory_info().rss - self.memory_usages.append((mem / 10 ** 6)) + #current_process = psutil.Process(os.getpid()) + #mem = current_process.memory_info().rss + #self.memory_usages.append((mem / 10 ** 6)) self.save_memory_usage_data(self.memory_usages) def aux_build_importer(self, indx): diff --git a/PyCTBN/tests/test_structure_estimator.py b/PyCTBN/tests/test_structure_estimator.py index 0283ce0..c630cea 100644 --- a/PyCTBN/tests/test_structure_estimator.py +++ b/PyCTBN/tests/test_structure_estimator.py @@ -50,6 +50,17 @@ class TestStructureEstimator(unittest.TestCase): no_self_loops.remove(node) for n2 in no_self_loops: self.assertIn((node, n2), cg.edges) + #se1.save_plot_estimated_structure_graph() + + def test_build_removable_edges_matrix(self): + exp_alfa = 0.1 + chi_alfa = 0.1 + known_edges = self.s1.structure.edges[0:2] + se1 = StructureEstimator(self.s1, exp_alfa, chi_alfa, known_edges) + for edge in known_edges: + i = self.s1.structure.get_node_indx(edge[0]) + j = self.s1.structure.get_node_indx(edge[1]) + self.assertFalse(se1._removable_edges_matrix[i][j]) def test_generate_possible_sub_sets_of_size(self): exp_alfa = 0.1 @@ -67,7 +78,8 @@ class TestStructureEstimator(unittest.TestCase): self.assertFalse(node in sset) def test_time(self): - se1 = StructureEstimator(self.s1, 0.1, 0.1) + known_edges = [self.s1.structure.edges[0]] + se1 = StructureEstimator(self.s1, 0.1, 0.1, 25, known_edges) exec_time = timeit.timeit(se1.ctpc_algorithm, number=1) print("Execution Time: ", exec_time) for ed in self.s1.structure.edges: