1
0
Fork 0

Add refactors

parallel_struct_est
philpMartin 4 years ago
parent 7561bfbcf8
commit cef0280c09
  1. 34
      main_package/classes/network_graph.py
  2. 36
      main_package/classes/structure_estimator.py
  3. 10
      main_package/tests/test_networkgraph.py
  4. 1
      main_package/tests/test_structure_estimator.py

@ -32,9 +32,10 @@ class NetworkGraph():
self.add_edges(self.graph_struct.list_of_edges()) self.add_edges(self.graph_struct.list_of_edges())
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0) self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_time_scalar_indexing_structure() self.build_scalar_indexing_structures()
#self.build_time_scalar_indexing_structure()
self.build_time_columns_filtering_structure() self.build_time_columns_filtering_structure()
self.build_transition_scalar_indexing_structure() #self.build_transition_scalar_indexing_structure()
self.build_transition_columns_filtering_structure() self.build_transition_columns_filtering_structure()
def add_nodes(self, list_of_nodes): def add_nodes(self, list_of_nodes):
@ -42,8 +43,10 @@ class NetworkGraph():
set_node_attr = nx.set_node_attributes set_node_attr = nx.set_node_attributes
nodes_indxs = self.graph_struct.list_of_nodes_indexes() nodes_indxs = self.graph_struct.list_of_nodes_indexes()
nodes_vals = self.graph_struct.nodes_values() nodes_vals = self.graph_struct.nodes_values()
pos = 0
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
self.graph.add_node(id, indx=node_indx, val=node_val) self.graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
pos += 1
#set_node_attr(self.graph, {id:node_indx}, 'indx') #set_node_attr(self.graph, {id:node_indx}, 'indx')
def add_edges(self, list_of_edges): def add_edges(self, list_of_edges):
@ -127,7 +130,8 @@ class NetworkGraph():
get_states_number_by_indx = self.graph_struct.get_states_number_by_indx get_states_number_by_indx = self.graph_struct.get_states_number_by_indx
T_vector = np.array([get_states_number_by_indx(node_indx)]) T_vector = np.array([get_states_number_by_indx(node_indx)])
#print(T_vector) #print(T_vector)
T_vector = np.append(T_vector, [get_states_number_by_indx(x) for x in parents_indxs]) #T_vector = np.append(T_vector, [get_states_number_by_indx(x) for x in parents_indxs])
T_vector = np.append(T_vector, parents_indxs)
#print(T_vector) #print(T_vector)
T_vector = T_vector.cumprod().astype(np.int) T_vector = T_vector.cumprod().astype(np.int)
return T_vector return T_vector
@ -141,7 +145,7 @@ class NetworkGraph():
build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node
self._time_scalar_indexing_structure = [build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs) self._time_scalar_indexing_structure = [build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing)] self.get_ordered_by_indx_parents_values_for_all_nodes())]
def build_transition_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs): def build_transition_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs):
#M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1], #M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1],
@ -150,7 +154,8 @@ class NetworkGraph():
get_states_number_by_indx = self.graph_struct.get_states_number_by_indx get_states_number_by_indx = self.graph_struct.get_states_number_by_indx
M_vector = np.array([node_states_number, M_vector = np.array([node_states_number,
node_states_number]) node_states_number])
M_vector = np.append(M_vector, [get_states_number_by_indx(x) for x in parents_indxs]) #M_vector = np.append(M_vector, [get_states_number_by_indx(x) for x in parents_indxs])
M_vector = np.append(M_vector, parents_indxs)
M_vector = M_vector.cumprod().astype(np.int) M_vector = M_vector.cumprod().astype(np.int)
return M_vector return M_vector
@ -164,7 +169,7 @@ class NetworkGraph():
[build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs) [build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
for node_indx, p_indxs in for node_indx, p_indxs in
zip(self.graph_struct.list_of_nodes_indexes(), zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing) ] self.get_ordered_by_indx_parents_values_for_all_nodes())]
def build_time_columns_filtering_structure(self): def build_time_columns_filtering_structure(self):
#parents_indexes_list = self._fancy_indexing #parents_indexes_list = self._fancy_indexing
@ -182,6 +187,18 @@ class NetworkGraph():
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing)] self._fancy_indexing)]
def build_scalar_indexing_structures(self):
parents_values_for_all_nodes = self.get_ordered_by_indx_parents_values_for_all_nodes()
build_transition_scalar_indexing_structure_for_a_node = self.build_transition_scalar_indexing_structure_for_a_node
build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node
aggr = [(build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs),
build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs))
for node_indx, p_indxs in
zip(self.graph_struct.list_of_nodes_indexes(),
parents_values_for_all_nodes)]
self._transition_scalar_indexing_structure = [i[0] for i in aggr]
self._time_scalar_indexing_structure = [i[1] for i in aggr]
def get_nodes(self): def get_nodes(self):
return list(self.graph.nodes) return list(self.graph.nodes)
@ -208,6 +225,9 @@ class NetworkGraph():
return nx.get_node_attributes(self.graph, 'indx')[node_id] return nx.get_node_attributes(self.graph, 'indx')[node_id]
#return self.graph_struct.get_node_indx(node_id) #return self.graph_struct.get_node_indx(node_id)
def get_positional_node_indx(self, node_id):
return self.graph.nodes[node_id]['pos_indx']
@property @property
def time_scalar_indexing_strucure(self): def time_scalar_indexing_strucure(self):
return self._time_scalar_indexing_structure return self._time_scalar_indexing_structure

@ -35,8 +35,8 @@ class StructureEstimator:
complete_graph.add_nodes_from(node_ids) complete_graph.add_nodes_from(node_ids)
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph return complete_graph
#TODO Tutti i valori che riguardano il test child possono essere settati una volta sola
def complete_test(self, tmp_df, test_parent, test_child, parent_set): def complete_test(self, tmp_df, test_parent, test_child, parent_set, child_states_numb):
p_set = parent_set[:] p_set = parent_set[:]
complete_info = parent_set[:] complete_info = parent_set[:]
complete_info.append(test_parent) complete_info.append(test_parent)
@ -80,13 +80,14 @@ class StructureEstimator:
v1 = v2[v2.Name != test_parent] v1 = v2[v2.Name != test_parent]
#print("D1", d1) #print("D1", d1)
#print("V1", v1) #print("V1", v1)
#TODO il numero di variabili puo essere passato dall'esterno
s1 = st.Structure(d1, v1, self.sample_path.total_variables_count) s1 = st.Structure(d1, v1, self.sample_path.total_variables_count)
g1 = ng.NetworkGraph(s1) g1 = ng.NetworkGraph(s1)
g1.init_graph() g1.init_graph()
p1 = pe.ParametersEstimator(self.sample_path, g1) p1 = pe.ParametersEstimator(self.sample_path, g1)
p1.init_sets_cims_container() p1.init_sets_cims_container()
p1.compute_parameters_for_node(test_child) p1.compute_parameters_for_node(test_child)
sofc1 = p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)] sofc1 = p1.sets_of_cims_struct.sets_of_cims[g1.get_positional_node_indx(test_child)]
if not p_set: if not p_set:
self.cache.put(test_child, sofc1) self.cache.put(test_child, sofc1)
else: else:
@ -107,7 +108,7 @@ class StructureEstimator:
p2.compute_parameters_for_node(test_child) p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]""" sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
if not sofc2: if not sofc2:
print("Cache Miss SOC2") #print("Cache Miss SOC2")
#parent_set.append(test_parent) #parent_set.append(test_parent)
#d2 = tmp_df.loc[tmp_df['From'].isin(p_set)] #d2 = tmp_df.loc[tmp_df['From'].isin(p_set)]
#v2 = self.sample_path.structure.variables_frame.loc[ #v2 = self.sample_path.structure.variables_frame.loc[
@ -121,7 +122,7 @@ class StructureEstimator:
p2 = pe.ParametersEstimator(self.sample_path, g2) p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container() p2.init_sets_cims_container()
p2.compute_parameters_for_node(test_child) p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)] sofc2 = p2.sets_of_cims_struct.sets_of_cims[g2.get_positional_node_indx(test_child)]
if p_set: if p_set:
#set_p_set = set(p_set) #set_p_set = set(p_set)
self.cache.put(set(p_set), sofc2) self.cache.put(set(p_set), sofc2)
@ -134,15 +135,19 @@ class StructureEstimator:
#cim2 = sofc2.actual_cims[j] #cim2 = sofc2.actual_cims[j]
#print(indx) #print(indx)
#print("Run Test", i, j) #print("Run Test", i, j)
if not self.independence_test(test_child, cim1, sofc2.actual_cims[j]): if not self.independence_test(child_states_numb, cim1, sofc2.actual_cims[j]):
return False return False
return True return True
def independence_test(self, tested_child, cim1, cim2): def independence_test(self, child_states_numb, cim1, cim2):
r1s = cim1.state_transition_matrix.diagonal() M1 = cim1.state_transition_matrix
r2s = cim2.state_transition_matrix.diagonal() M2 = cim2.state_transition_matrix
F_stats = cim2.cim.diagonal() / cim1.cim.diagonal() r1s = M1.diagonal()
child_states_numb = self.sample_path.structure.get_states_number(tested_child) r2s = M2.diagonal()
C1 = cim1.cim
C2 = cim2.cim
F_stats = C2.diagonal() / C1.diagonal()
#child_states_numb = self.sample_path.structure.get_states_number(tested_child)
for val in range(0, child_states_numb): for val in range(0, child_states_numb):
if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \ if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \
F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]): F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]):
@ -150,9 +155,9 @@ class StructureEstimator:
return False return False
#M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix) #M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix)
#M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix) #M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
M1_no_diag = cim1.state_transition_matrix[~np.eye(cim1.state_transition_matrix.shape[0], dtype=bool)].reshape(cim1.state_transition_matrix.shape[0], -1) M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1)
M2_no_diag = cim2.state_transition_matrix[~np.eye(cim2.state_transition_matrix.shape[0], dtype=bool)].reshape( M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape(
cim2.state_transition_matrix.shape[0], -1) M2.shape[0], -1)
chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1) chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1)
""" """
Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal()) Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal())
@ -181,6 +186,7 @@ class StructureEstimator:
tests_parents_numb = len(u) tests_parents_numb = len(u)
complete_frame = self.complete_graph_frame complete_frame = self.complete_graph_frame
test_frame = complete_frame.loc[complete_frame['To'].isin([var_id])] test_frame = complete_frame.loc[complete_frame['To'].isin([var_id])]
child_states_numb = self.sample_path.structure.get_states_number(var_id)
b = 0 b = 0
while b < len(u): while b < len(u):
#for parent_id in u: #for parent_id in u:
@ -198,7 +204,7 @@ class StructureEstimator:
for parents_set in S: for parents_set in S:
#print("Parent Set", parents_set) #print("Parent Set", parents_set)
#print("Test Parent", u[parent_indx]) #print("Test Parent", u[parent_indx])
if self.complete_test(test_frame, u[parent_indx], var_id, parents_set): if self.complete_test(test_frame, u[parent_indx], var_id, parents_set, child_states_numb):
#print("Removing EDGE:", u[parent_indx], var_id) #print("Removing EDGE:", u[parent_indx], var_id)
self.complete_graph.remove_edge(u[parent_indx], var_id) self.complete_graph.remove_edge(u[parent_indx], var_id)
#print(self.complete_graph_frame) #print(self.complete_graph_frame)

@ -179,12 +179,16 @@ class TestNetworkGraph(unittest.TestCase):
def test_init_graph(self): def test_init_graph(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
#g1.init_graph() #g1.build_scalar_indexing_structures()
lp = LineProfiler() lp = LineProfiler()
#lp.add_function(g1.get_ordered_by_indx_set_of_parents) #lp.add_function(g1.get_ordered_by_indx_set_of_parents)
#lp.add_function(g1.get_states_number) #lp.add_function(g1.get_states_number)
lp_wrapper = lp(g1.get_states_number) lp_wrapper = lp(g1.init_graph)
lp_wrapper('X') print(g1.time_scalar_indexing_strucure)
print(g1.transition_scalar_indexing_structure)
"""[array([3]), array([3, 9]), array([ 3, 9, 27])]
[array([3, 9]), array([ 3, 9, 27]), array([ 3, 9, 27, 81])]"""
lp_wrapper()
lp.print_stats() lp.print_stats()
"""def test_remove_node(self): """def test_remove_node(self):

@ -26,6 +26,7 @@ class TestStructureEstimator(unittest.TestCase):
lp = LineProfiler() lp = LineProfiler()
lp.add_function(se1.complete_test) lp.add_function(se1.complete_test)
lp.add_function(se1.one_iteration_of_CTPC_algorithm) lp.add_function(se1.one_iteration_of_CTPC_algorithm)
lp.add_function(se1.independence_test)
lp_wrapper = lp(se1.ctpc_algorithm) lp_wrapper = lp(se1.ctpc_algorithm)
lp_wrapper() lp_wrapper()
lp.print_stats() lp.print_stats()