From 7561bfbcf8e6352d6b950d0fff6b264cacf4ed76 Mon Sep 17 00:00:00 2001 From: philpMartin Date: Tue, 28 Jul 2020 19:38:17 +0200 Subject: [PATCH] Add some optimizations --- main_package/classes/cache.py | 1 + .../classes/conditional_intensity_matrix.py | 5 +- main_package/classes/network_graph.py | 63 +++++++++-------- .../classes/sets_of_cims_container.py | 1 - main_package/classes/structure.py | 6 +- main_package/classes/structure_estimator.py | 68 +++++++++++-------- main_package/tests/test_networkgraph.py | 8 ++- .../tests/test_parameters_estimator.py | 10 ++- 8 files changed, 90 insertions(+), 72 deletions(-) diff --git a/main_package/classes/cache.py b/main_package/classes/cache.py index 13d7f6c..b3e15e1 100644 --- a/main_package/classes/cache.py +++ b/main_package/classes/cache.py @@ -1,6 +1,7 @@ import typing import set_of_cims as sofc + class Cache: def __init__(self): diff --git a/main_package/classes/conditional_intensity_matrix.py b/main_package/classes/conditional_intensity_matrix.py index 035c48b..3d78665 100644 --- a/main_package/classes/conditional_intensity_matrix.py +++ b/main_package/classes/conditional_intensity_matrix.py @@ -2,18 +2,15 @@ import numpy as np class ConditionalIntensityMatrix: - def __init__(self, state_residence_times, state_transition_matrix): self._state_residence_times = state_residence_times self._state_transition_matrix = state_transition_matrix #self.cim = np.zeros(shape=(dimension, dimension), dtype=float) - self._cim = self.state_transition_matrix.astype(np.float) + self._cim = self.state_transition_matrix.astype(np.float64) def compute_cim_coefficients(self): np.fill_diagonal(self._cim, self._cim.diagonal() * -1) - #print(self._cim) self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T - #np.fill_diagonal(self.state_transition_matrix, 0) @property def state_residence_times(self): diff --git a/main_package/classes/network_graph.py b/main_package/classes/network_graph.py index 833d40f..548be3a 100644 --- a/main_package/classes/network_graph.py +++ b/main_package/classes/network_graph.py @@ -39,25 +39,26 @@ class NetworkGraph(): def add_nodes(self, list_of_nodes): #self.graph.add_nodes_from(list_of_nodes) - for id in list_of_nodes: - self.graph.add_node(id) - nx.set_node_attributes(self.graph, {id:self.graph_struct.get_node_indx(id)}, 'indx') + set_node_attr = nx.set_node_attributes + nodes_indxs = self.graph_struct.list_of_nodes_indexes() + nodes_vals = self.graph_struct.nodes_values() + for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): + self.graph.add_node(id, indx=node_indx, val=node_val) + #set_node_attr(self.graph, {id:node_indx}, 'indx') def add_edges(self, list_of_edges): self.graph.add_edges_from(list_of_edges) def get_ordered_by_indx_set_of_parents(self, node): - #print(node) - #ordered_set = {} parents = self.get_parents_by_id(node) - #print(parents) - sorted_parents = [x for _, x in sorted(zip(self.graph_struct.list_of_nodes_labels(), parents))] - #print(sorted_parents) - #print(parents) + nodes = self.get_nodes() + sorted_parents = [x for _, x in sorted(zip(nodes, parents))] #p_indxes= [] #p_values = [] - p_indxes = [self.get_node_indx(node) for node in parents] - p_values = [self.get_states_number_by_indx(indx) for indx in p_indxes] + get_node_indx = self.get_node_indx + get_states_number_by_indx = self.get_states_number_by_indx + p_indxes = [get_node_indx(node) for node in sorted_parents] + p_values = [get_states_number_by_indx(indx) for indx in p_indxes] """for n in parents: #indx = self.graph_struct.get_node_indx(n) @@ -67,17 +68,15 @@ class NetworkGraph(): p_indxes.append(node_indx) #p_values.append(self.graph_struct.get_states_number(n)) p_values.append(self.get_states_number_by_indx(node_indx))""" - ordered_set = (sorted_parents, p_indxes, p_values) - #print(ordered_set) - - #ordered_set = {k: v for k, v in sorted(ordered_set.items(), key=lambda item: item[1])} - return ordered_set + #ordered_set = (sorted_parents, p_indxes, p_values) + return (sorted_parents, p_indxes, p_values) def get_ord_set_of_par_of_all_nodes(self): - result = [] + #result = [] #for node in self._nodes_labels: #result.append(self.get_ordered_by_indx_set_of_parents(node)) - result = [self.get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels] + get_ordered_by_indx_set_of_parents = self.get_ordered_by_indx_set_of_parents + result = [get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels] return result """def get_ordered_by_indx_parents_values(self, node): @@ -96,10 +95,11 @@ class NetworkGraph(): return pars_values def get_states_number_of_all_nodes_sorted(self): - states_number_list = [] + #states_number_list = [] #for node in self._nodes_labels: #states_number_list.append(self.get_states_number(node)) - states_number_list = [self.get_states_number(node) for node in self._nodes_labels] + get_states_number = self.get_states_number + states_number_list = [get_states_number(node) for node in self._nodes_labels] return states_number_list def build_fancy_indexing_structure(self, start_indx): @@ -124,10 +124,10 @@ class NetworkGraph(): #print(node_indx) #print("Parents_id", parents_indxs) #T_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)]) - T_vector = np.array([self.get_states_number_by_indx(node_indx)]) + get_states_number_by_indx = self.graph_struct.get_states_number_by_indx + T_vector = np.array([get_states_number_by_indx(node_indx)]) #print(T_vector) - #print("Here ", self.graph_struct.variables_frame.iloc[parents_id[0], 1]) - T_vector = np.append(T_vector, [self.graph_struct.get_states_number_by_indx(x) for x in parents_indxs]) + T_vector = np.append(T_vector, [get_states_number_by_indx(x) for x in parents_indxs]) #print(T_vector) T_vector = T_vector.cumprod().astype(np.int) return T_vector @@ -138,16 +138,19 @@ class NetworkGraph(): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): self._time_scalar_indexing_structure.append( self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs))""" - self._time_scalar_indexing_structure = [self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs) + build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node + self._time_scalar_indexing_structure = [build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs) for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing)] def build_transition_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs): #M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1], #self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)]) - M_vector = np.array([self.get_states_number_by_indx(node_indx), - self.get_states_number_by_indx(node_indx)]) - M_vector = np.append(M_vector, [self.graph_struct.get_states_number_by_indx(x) for x in parents_indxs]) + node_states_number = self.get_states_number_by_indx(node_indx) + get_states_number_by_indx = self.graph_struct.get_states_number_by_indx + M_vector = np.array([node_states_number, + node_states_number]) + M_vector = np.append(M_vector, [get_states_number_by_indx(x) for x in parents_indxs]) M_vector = M_vector.cumprod().astype(np.int) return M_vector @@ -156,8 +159,9 @@ class NetworkGraph(): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): self._transition_scalar_indexing_structure.append( self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs))""" + build_transition_scalar_indexing_structure_for_a_node = self.build_transition_scalar_indexing_structure_for_a_node self._transition_scalar_indexing_structure = \ - [self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs) + [build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs) for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing) ] @@ -191,7 +195,8 @@ class NetworkGraph(): return list(self.graph.predecessors(node_id)) def get_states_number(self, node_id): - return self.graph_struct.get_states_number(node_id) + #return self.graph_struct.get_states_number(node_id) + return self.graph.nodes[node_id]['val'] def get_states_number_by_indx(self, node_indx): return self.graph_struct.get_states_number_by_indx(node_indx) diff --git a/main_package/classes/sets_of_cims_container.py b/main_package/classes/sets_of_cims_container.py index a15f73b..2bfc6ca 100644 --- a/main_package/classes/sets_of_cims_container.py +++ b/main_package/classes/sets_of_cims_container.py @@ -6,7 +6,6 @@ class SetsOfCimsContainer: """ Aggrega un insieme di oggetti SetOfCims """ - # list_of_vars_orders contiene tutte le liste con i parent ordinati secondo il valore indx def __init__(self, list_of_keys, states_number_per_node, list_of_parents_states_number): self.sets_of_cims = None self.init_cims_structure(list_of_keys, states_number_per_node, list_of_parents_states_number) diff --git a/main_package/classes/structure.py b/main_package/classes/structure.py index 83d2e47..c706ee3 100644 --- a/main_package/classes/structure.py +++ b/main_package/classes/structure.py @@ -1,5 +1,6 @@ import numpy as np + class Structure: """ Contiene tutte il informazioni sulla struttura della rete (connessione dei nodi, valori assumibili dalle variabili) @@ -25,7 +26,7 @@ class Structure: return self.variables_frame[self.name_label].values.tolist() def list_of_nodes_indexes(self): - return list(self.variables_frame.index) + return self.variables_frame.index.to_list() def get_node_id(self, node_indx): return self.variables_frame[self.name_label][node_indx] @@ -45,6 +46,9 @@ class Structure: #print("Node indx", node_indx) return self.variables_frame[self.value_label][node_indx] + def nodes_values(self): + return self.variables_frame[self.value_label].to_list() + def total_variables_number(self): return self.total_variables_number diff --git a/main_package/classes/structure_estimator.py b/main_package/classes/structure_estimator.py index 6963aa0..d19b63f 100644 --- a/main_package/classes/structure_estimator.py +++ b/main_package/classes/structure_estimator.py @@ -4,7 +4,7 @@ import itertools import networkx as nx from scipy.stats import f as f_dist from scipy.stats import chi2 as chi2_dist -from numba import njit + @@ -36,16 +36,17 @@ class StructureEstimator: complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) return complete_graph - def complete_test(self, test_parent, test_child, parent_set): + def complete_test(self, tmp_df, test_parent, test_child, parent_set): p_set = parent_set[:] complete_info = parent_set[:] complete_info.append(test_parent) - tmp_df = self.complete_graph_frame.loc[self.complete_graph_frame['To'].isin([test_child])] + #tmp_df = self.complete_graph_frame.loc[self.complete_graph_frame['To'].isin([test_child])] #tmp_df = self.complete_graph_frame.loc[np.in1d(self.complete_graph_frame['To'], test_child)] d2 = tmp_df.loc[tmp_df['From'].isin(complete_info)] complete_info.append(test_child) - v2 = self.sample_path.structure.variables_frame.loc[ - self.sample_path.structure.variables_frame['Name'].isin(complete_info)] + values_frame = self.sample_path.structure.variables_frame + v2 = values_frame.loc[ + values_frame['Name'].isin(complete_info)] #print(tmp_df) #d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)] @@ -63,10 +64,13 @@ class StructureEstimator: g2 = ng.NetworkGraph(s2) g2.init_graph()""" #parent_set.append(test_child) - sofc1 = None + #sofc1 = None #if not sofc1: if not p_set: sofc1 = self.cache.find(test_child) + else: + sofc1 = self.cache.find(set(p_set)) + if not sofc1: #d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)] d1 = d2[d2.From != test_parent] @@ -81,17 +85,19 @@ class StructureEstimator: g1.init_graph() p1 = pe.ParametersEstimator(self.sample_path, g1) p1.init_sets_cims_container() - #print("Computing params for",test_child, test_parent, parent_set) p1.compute_parameters_for_node(test_child) sofc1 = p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)] - self.cache.put(test_child,sofc1) + if not p_set: + self.cache.put(test_child, sofc1) + else: + self.cache.put(set(p_set), sofc1) sofc2 = None p_set.append(test_parent) if p_set: #p_set.append(test_parent) #print("PSET ", p_set) - set_p_set = set(p_set) - sofc2 = self.cache.find(set_p_set) + #set_p_set = set(p_set) + sofc2 = self.cache.find(set(p_set)) #print("Sofc2 ", sofc2) #print(self.cache.list_of_sets_of_indxs) @@ -100,7 +106,7 @@ class StructureEstimator: #p2.compute_parameters() p2.compute_parameters_for_node(test_child) sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]""" - if not sofc2 or p_set: + if not sofc2: print("Cache Miss SOC2") #parent_set.append(test_parent) #d2 = tmp_df.loc[tmp_df['From'].isin(p_set)] @@ -114,12 +120,11 @@ class StructureEstimator: g2.init_graph() p2 = pe.ParametersEstimator(self.sample_path, g2) p2.init_sets_cims_container() - # p2.compute_parameters() p2.compute_parameters_for_node(test_child) sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)] if p_set: #set_p_set = set(p_set) - self.cache.put(set_p_set, sofc2) + self.cache.put(set(p_set), sofc2) end = 0 increment = self.sample_path.structure.get_states_number(test_parent) for cim1 in sofc1.actual_cims: @@ -143,8 +148,11 @@ class StructureEstimator: F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]): print("CONDITIONALLY DEPENDENT EXP") return False - M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix) - M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix) + #M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix) + #M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix) + M1_no_diag = cim1.state_transition_matrix[~np.eye(cim1.state_transition_matrix.shape[0], dtype=bool)].reshape(cim1.state_transition_matrix.shape[0], -1) + M2_no_diag = cim2.state_transition_matrix[~np.eye(cim2.state_transition_matrix.shape[0], dtype=bool)].reshape( + cim2.state_transition_matrix.shape[0], -1) chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1) """ Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal()) @@ -170,12 +178,10 @@ class StructureEstimator: def one_iteration_of_CTPC_algorithm(self, var_id): u = list(self.complete_graph.predecessors(var_id)) - #TODO aggiungere qui il filtraggio del complete_graph_frame verso il nodo di arrivo 'To' var_id e passare il frame a complete test - #TODO trovare un modo per passare direttamente anche i valori delle variabili comprese nel test del nodo var_id tests_parents_numb = len(u) - #print(u) + complete_frame = self.complete_graph_frame + test_frame = complete_frame.loc[complete_frame['To'].isin([var_id])] b = 0 - #parent_indx = 0 while b < len(u): #for parent_id in u: parent_indx = 0 @@ -186,13 +192,13 @@ class StructureEstimator: #print("Parent Indx", parent_indx) #if not list(self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])): #break - S = self.generate_possible_sub_sets_of_size(u, b, u[parent_indx]) + S = self.generate_possible_sub_sets_of_size(u, b, parent_indx) #print("U Set", u) #print("S", S) for parents_set in S: #print("Parent Set", parents_set) #print("Test Parent", u[parent_indx]) - if self.complete_test(u[parent_indx], var_id, parents_set): + if self.complete_test(test_frame, u[parent_indx], var_id, parents_set): #print("Removing EDGE:", u[parent_indx], var_id) self.complete_graph.remove_edge(u[parent_indx], var_id) #print(self.complete_graph_frame) @@ -200,7 +206,9 @@ class StructureEstimator: self.complete_graph_frame.drop( self.complete_graph_frame[(self.complete_graph_frame.From == u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)""" - self.complete_graph_frame.drop(self.complete_graph_frame[(self.complete_graph_frame.From == u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index) + + complete_frame.drop(complete_frame[(complete_frame.From == u[parent_indx]) & + (complete_frame.To == var_id)].index, inplace=True) #print(self.complete_graph_frame) #u.remove(u[parent_indx]) del u[parent_indx] @@ -210,13 +218,13 @@ class StructureEstimator: if not removed: parent_indx += 1 b += 1 - self.cache.clear() + self.cache.clear() - def generate_possible_sub_sets_of_size(self, u, size, parent_id): + def generate_possible_sub_sets_of_size(self, u, size, parent_indx): #print("Inside Generate subsets", u) #print("InsideGenerate Subsets", parent_id) list_without_test_parent = u[:] - list_without_test_parent.remove(parent_id) + del list_without_test_parent[parent_indx] # u.remove(parent_id) #print(list(map(list, itertools.combinations(list_without_test_parent, size)))) return map(list, itertools.combinations(list_without_test_parent, size)) @@ -228,7 +236,11 @@ class StructureEstimator: return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) def ctpc_algorithm(self): - for node_id in self.sample_path.structure.list_of_nodes_labels(): - print("TESTING VAR:", node_id) - self.one_iteration_of_CTPC_algorithm(node_id) + ctpc_algo = self.one_iteration_of_CTPC_algorithm + nodes = self.sample_path.structure.list_of_nodes_labels() + #for node_id in self.sample_path.structure.list_of_nodes_labels(): + #print("TESTING VAR:", node_id) + #self.one_iteration_of_CTPC_algorithm(node_id) + #print(self.complete_graph_frame) + [ctpc_algo(n) for n in nodes] diff --git a/main_package/tests/test_networkgraph.py b/main_package/tests/test_networkgraph.py index 438cfa9..c2e21e6 100644 --- a/main_package/tests/test_networkgraph.py +++ b/main_package/tests/test_networkgraph.py @@ -179,10 +179,12 @@ class TestNetworkGraph(unittest.TestCase): def test_init_graph(self): g1 = ng.NetworkGraph(self.s1.structure) + #g1.init_graph() lp = LineProfiler() - lp.add_function(g1.get_ordered_by_indx_set_of_parents) - lp_wrapper = lp(g1.init_graph) - lp_wrapper() + #lp.add_function(g1.get_ordered_by_indx_set_of_parents) + #lp.add_function(g1.get_states_number) + lp_wrapper = lp(g1.get_states_number) + lp_wrapper('X') lp.print_stats() """def test_remove_node(self): diff --git a/main_package/tests/test_parameters_estimator.py b/main_package/tests/test_parameters_estimator.py index b4c70c0..d647c78 100644 --- a/main_package/tests/test_parameters_estimator.py +++ b/main_package/tests/test_parameters_estimator.py @@ -57,14 +57,12 @@ class TestParametersEstimatior(unittest.TestCase): self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True)) def test_compute_parameters_for_node(self):#TODO Questo non รจ un test - self.g1.remove_node('Y') - print(self.g1.time_filtering) pe1 = pe.ParametersEstimator(self.s1, self.g1) - pe1.init_sets_cims_container() + #pe1.init_sets_cims_container() lp = LineProfiler() - #lp.add_function(pe1.init_sets_cims_container) - lp_wrapper = lp(pe1.compute_parameters_for_node) - lp_wrapper('X') + lp_wrapper = lp(pe1.init_sets_cims_container) + #lp.add_function(pe1.sets_of_cims_struct.init_cims_structure) + lp_wrapper() lp.print_stats() #pe1.init_sets_cims_container() #pe1.compute_parameters_for_node('Y')