From 7edfb6c962fcc3c9a5db1b5aa7f9b994554d62a8 Mon Sep 17 00:00:00 2001 From: philpMartin Date: Sat, 8 Aug 2020 18:24:05 +0200 Subject: [PATCH] Add Struct estim tests and DOCSTRING --- main_package/classes/cache.py | 3 +- main_package/classes/structure_estimator.py | 125 +++++++++++++----- .../tests/test_structure_estimator.py | 59 +++++++-- 3 files changed, 140 insertions(+), 47 deletions(-) diff --git a/main_package/classes/cache.py b/main_package/classes/cache.py index 16eabef..f9a1f7a 100644 --- a/main_package/classes/cache.py +++ b/main_package/classes/cache.py @@ -14,10 +14,9 @@ class Cache: self.list_of_sets_of_parents = [] self.actual_cache = [] - def find(self, parents_comb: typing.Union[typing.Set, str]): + def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] """ Tries to find in cache given the symbolic parents combination parents_comb the SetOfCims related to that parents_comb. - N.B. if the parents_comb is not a Set, than the index refers to the SetOfCims of the actual node with no parents. Parameters: parents_comb: the parents related to that SetOfCims Returns: diff --git a/main_package/classes/structure_estimator.py b/main_package/classes/structure_estimator.py index d128b28..35a00c9 100644 --- a/main_package/classes/structure_estimator.py +++ b/main_package/classes/structure_estimator.py @@ -2,12 +2,14 @@ import numpy as np import itertools import networkx as nx +import typing from scipy.stats import f as f_dist from scipy.stats import chi2 as chi2_dist import sample_path as sp import structure as st import network_graph as ng +import conditional_intensity_matrix as condim import parameters_estimator as pe import cache as ch @@ -37,13 +39,37 @@ class StructureEstimator: self.chi_test_alfa = chi_test_alfa self.cache = ch.Cache() - def build_complete_graph(self, node_ids): + def build_complete_graph(self, node_ids: typing.List): + """ + Builds a complete directed graph (no self loops) given the nodes labels in the list node_ids: + + Parameters: + node_ids: the list of nodes labels + Returns: + a complete Digraph Object + """ complete_graph = nx.DiGraph() complete_graph.add_nodes_from(node_ids) complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) return complete_graph - def complete_test(self, test_parent, test_child, parent_set, child_states_numb, tot_vars_count): + def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, + tot_vars_count: int): + """ + Permorms a complete independence test on the directed graphs G1 = test_child U parent_set + G2 = G1 U test_parent (added as an additional parent of the test_child). + Generates all the necessary structures and datas to perform the tests. + + Parameters: + test_parent: the node label of the test parent + test_child: the node label of the child + parent_set: the common parent set + child_states_numb: the cardinality of the test_child + tot_vars_count_ the total number of variables in the net + Returns: + True iff test_child and test_parent are independent given the sep_set parent_set + False otherwise + """ #print("Test Parent:", test_parent) #print("Sep Set", parent_set) p_set = parent_set[:] @@ -58,11 +84,11 @@ class StructureEstimator: #print("SORTED PARENTS", sorted_parents) cims_filter = sorted_parents != test_parent #print("PARENTS NO FROM MASK", cims_filter) - if not p_set: + #if not p_set: #print("EMPTY PSET TRYING TO FIND", test_child) - sofc1 = self.cache.find(test_child) - else: - sofc1 = self.cache.find(set(p_set)) + #sofc1 = self.cache.find(test_child) + #else: + sofc1 = self.cache.find(set(p_set)) if not sofc1: #print("CACHE MISSS SOFC1") @@ -76,18 +102,14 @@ class StructureEstimator: eds1 = list(itertools.product(parent_set,test_child)) s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count) g1 = ng.NetworkGraph(s1) - #g1.init_graph() g1.fast_init(test_child) p1 = pe.ParametersEstimator(self.sample_path, g1) - #p1.init_sets_cims_container() p1.fast_init(test_child) sofc1 = p1.compute_parameters_for_node(test_child) - - #sofc1 = p1.sets_of_cims_struct.sets_of_cims[g1.get_positional_node_indx(test_child)] - if not p_set: - self.cache.put(test_child, sofc1) - else: - self.cache.put(set(p_set), sofc1) + #if not p_set: + #self.cache.put(test_child, sofc1) + #else: + self.cache.put(set(p_set), sofc1) sofc2 = None #p_set.append(test_parent) p_set.insert(0, test_parent) @@ -100,12 +122,6 @@ class StructureEstimator: #if sofc2: #print("Sofc2 in CACHE ", sofc2.actual_cims) #print(self.cache.list_of_sets_of_indxs) - - """p2 = pe.ParametersEstimator(self.sample_path, g2) - p2.init_sets_cims_container() - #p2.compute_parameters() - p2.compute_parameters_for_node(test_child) - sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]""" if not sofc2: #print("Cache MISSS SOFC2") complete_info.append(test_parent) @@ -119,15 +135,10 @@ class StructureEstimator: eds2 = list(itertools.product(p_set, test_child)) s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count) g2 = ng.NetworkGraph(s2) - #g2.init_graph() g2.fast_init(test_child) p2 = pe.ParametersEstimator(self.sample_path, g2) - #p2.init_sets_cims_container() p2.fast_init(test_child) sofc2 = p2.compute_parameters_for_node(test_child) - #sofc2 = p2.sets_of_cims_struct.sets_of_cims[g2.get_positional_node_indx(test_child)] - #if p_set: - #set_p_set = set(p_set) self.cache.put(set(p_set), sofc2) for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): #print("GETTING THIS P COMB", p_comb) @@ -144,15 +155,28 @@ class StructureEstimator: return False return True - def independence_test(self, child_states_numb, cim1, cim2): + def independence_test(self, child_states_numb: int, cim1: condim.ConditionalIntensityMatrix, + cim2: condim.ConditionalIntensityMatrix): + """ + Compute the actual independence test using two cims. + It is performed first the exponential test and if the null hypothesis is not rejected, + it is permormed also the chi_test. + + Parameters: + child_states_numb: the cardinality of the test child + cim1: a cim belonging to the graph without test parent + cim2: a cim belonging to the graph with test parent + + Returns: + True iff both tests do NOT reject the null hypothesis of indipendence + False otherwise + """ M1 = cim1.state_transition_matrix M2 = cim2.state_transition_matrix r1s = M1.diagonal() r2s = M2.diagonal() C1 = cim1.cim C2 = cim2.cim - #print("C1", C1) - #print("C2", C2) F_stats = C2.diagonal() / C1.diagonal() exp_alfa = self.exp_test_sign for val in range(0, child_states_numb): @@ -188,7 +212,16 @@ class StructureEstimator: #print("Chi test", Chi) return True - def one_iteration_of_CTPC_algorithm(self, var_id, tot_vars_count): + def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int): + """ + Performs an iteration of the CTPC algorithm using the node var_id as test_child. + + Parameters: + var_id: the node label of the test child + tot_vars_count: the number of nodes in the net + Returns: + void + """ print("##################TESTING VAR################", var_id) u = list(self.complete_graph.predecessors(var_id)) #tests_parents_numb = len(u) @@ -227,19 +260,39 @@ class StructureEstimator: b += 1 self.cache.clear() - def generate_possible_sub_sets_of_size(self, u, size, parent_indx): + def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str): + """ + Creates a list containing all possible subsets of the list u of size size, + that do not contains a the node identified by parent_label. + + Parameters: + u: the list of nodes + size: the size of the subsets + parent_label: the nodes to exclude in the subsets generation + Returns: + a Map Object containing a list of lists + + """ list_without_test_parent = u[:] - list_without_test_parent.remove(parent_indx) + list_without_test_parent.remove(parent_label) return map(list, itertools.combinations(list_without_test_parent, size)) + def ctpc_algorithm(self): + """ + Compute the CTPC algorithm. + Parameters: + void + Returns: + void + """ + ctpc_algo = self.one_iteration_of_CTPC_algorithm + total_vars_numb = self.sample_path.total_variables_count + [ctpc_algo(n, total_vars_numb) for n in self.nodes] + + def remove_diagonal_elements(self, matrix): m = matrix.shape[0] strided = np.lib.stride_tricks.as_strided s0, s1 = matrix.strides return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) - def ctpc_algorithm(self): - ctpc_algo = self.one_iteration_of_CTPC_algorithm - total_vars_numb = self.sample_path.total_variables_count - [ctpc_algo(n, total_vars_numb) for n in self.nodes] - diff --git a/main_package/tests/test_structure_estimator.py b/main_package/tests/test_structure_estimator.py index c3f56f3..e760039 100644 --- a/main_package/tests/test_structure_estimator.py +++ b/main_package/tests/test_structure_estimator.py @@ -1,8 +1,12 @@ import unittest +import numpy as np +import networkx as nx +import math from line_profiler import LineProfiler import sample_path as sp import structure_estimator as se +import cache as ch class TestStructureEstimator(unittest.TestCase): @@ -14,15 +18,48 @@ class TestStructureEstimator(unittest.TestCase): cls.s1.build_structure() def test_init(self): - se1 = se.StructureEstimator(self.s1) + exp_alfa = 0.1 + chi_alfa = 0.1 + se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa) self.assertEqual(self.s1, se1.sample_path) - self.assertEqual(se1.complete_graph_frame.shape[0], - self.s1.total_variables_count *(self.s1.total_variables_count - 1)) + self.assertTrue(np.array_equal(se1.nodes, np.array(self.s1.structure.nodes_labels))) + self.assertTrue(np.array_equal(se1.nodes_indxs, self.s1.structure.nodes_indexes)) + self.assertTrue(np.array_equal(se1.nodes_vals, self.s1.structure.nodes_values)) + self.assertEqual(se1.exp_test_sign, exp_alfa) + self.assertEqual(se1.chi_test_alfa, chi_alfa) + self.assertIsInstance(se1.complete_graph, nx.DiGraph) + self.assertIsInstance(se1.cache, ch.Cache) + + def test_build_complete_graph(self): + exp_alfa = 0.1 + chi_alfa = 0.1 + nodes_numb = len(self.s1.structure.nodes_labels) + se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa) + cg = se1.build_complete_graph(self.s1.structure.nodes_labels) + self.assertEqual(len(cg.edges), nodes_numb*(nodes_numb - 1)) + for node in self.s1.structure.nodes_labels: + no_self_loops = self.s1.structure.nodes_labels[:] + no_self_loops.remove(node) + for n2 in no_self_loops: + self.assertIn((node, n2), cg.edges) + + def test_generate_possible_sub_sets_of_size(self): + exp_alfa = 0.1 + chi_alfa = 0.1 + nodes_numb = len(self.s1.structure.nodes_labels) + se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa) + + for node in self.s1.structure.nodes_labels: + for b in range(nodes_numb): + sets = se1.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node) + sets2 = se1.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node) + self.assertEqual(len(list(sets)), math.floor(math.factorial(nodes_numb - 1) / + (math.factorial(b)*math.factorial(nodes_numb -1 - b)))) + for sset in sets2: + self.assertFalse(node in sset) def test_one_iteration(self): se1 = se.StructureEstimator(self.s1, 0.1, 0.1) - #se1.one_iteration_of_CTPC_algorithm('X') - #self.aux_test_complete_test(se1, 'X', 'Y', ['Z']) lp = LineProfiler() lp.add_function(se1.complete_test) lp.add_function(se1.one_iteration_of_CTPC_algorithm) @@ -30,12 +67,16 @@ class TestStructureEstimator(unittest.TestCase): lp_wrapper = lp(se1.ctpc_algorithm) lp_wrapper() lp.print_stats() - #se1.ctpc_algorithm() print(se1.complete_graph.edges) print(self.s1.structure.edges) - - def aux_test_complete_test(self, estimator, test_par, test_child, p_set): - estimator.complete_test(test_par, test_child, p_set) + for ed in self.s1.structure.edges: + self.assertIn(tuple(ed), se1.complete_graph.edges) + tuples_edges = [tuple(rec) for rec in self.s1.structure.edges] + spurious_edges = [] + for ed in se1.complete_graph.edges: + if not(ed in tuples_edges): + spurious_edges.append(ed) + print("Spurious Edges:",spurious_edges) if __name__ == '__main__':