1
0
Fork 0

Add some optimizations

parallel_struct_est
philpMartin 4 years ago
parent ee029dd3d5
commit 7561bfbcf8
  1. 1
      main_package/classes/cache.py
  2. 5
      main_package/classes/conditional_intensity_matrix.py
  3. 63
      main_package/classes/network_graph.py
  4. 1
      main_package/classes/sets_of_cims_container.py
  5. 6
      main_package/classes/structure.py
  6. 64
      main_package/classes/structure_estimator.py
  7. 8
      main_package/tests/test_networkgraph.py
  8. 10
      main_package/tests/test_parameters_estimator.py

@ -1,6 +1,7 @@
import typing
import set_of_cims as sofc
class Cache:
def __init__(self):

@ -2,18 +2,15 @@ import numpy as np
class ConditionalIntensityMatrix:
def __init__(self, state_residence_times, state_transition_matrix):
self._state_residence_times = state_residence_times
self._state_transition_matrix = state_transition_matrix
#self.cim = np.zeros(shape=(dimension, dimension), dtype=float)
self._cim = self.state_transition_matrix.astype(np.float)
self._cim = self.state_transition_matrix.astype(np.float64)
def compute_cim_coefficients(self):
np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
#print(self._cim)
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T
#np.fill_diagonal(self.state_transition_matrix, 0)
@property
def state_residence_times(self):

@ -39,25 +39,26 @@ class NetworkGraph():
def add_nodes(self, list_of_nodes):
#self.graph.add_nodes_from(list_of_nodes)
for id in list_of_nodes:
self.graph.add_node(id)
nx.set_node_attributes(self.graph, {id:self.graph_struct.get_node_indx(id)}, 'indx')
set_node_attr = nx.set_node_attributes
nodes_indxs = self.graph_struct.list_of_nodes_indexes()
nodes_vals = self.graph_struct.nodes_values()
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
self.graph.add_node(id, indx=node_indx, val=node_val)
#set_node_attr(self.graph, {id:node_indx}, 'indx')
def add_edges(self, list_of_edges):
self.graph.add_edges_from(list_of_edges)
def get_ordered_by_indx_set_of_parents(self, node):
#print(node)
#ordered_set = {}
parents = self.get_parents_by_id(node)
#print(parents)
sorted_parents = [x for _, x in sorted(zip(self.graph_struct.list_of_nodes_labels(), parents))]
#print(sorted_parents)
#print(parents)
nodes = self.get_nodes()
sorted_parents = [x for _, x in sorted(zip(nodes, parents))]
#p_indxes= []
#p_values = []
p_indxes = [self.get_node_indx(node) for node in parents]
p_values = [self.get_states_number_by_indx(indx) for indx in p_indxes]
get_node_indx = self.get_node_indx
get_states_number_by_indx = self.get_states_number_by_indx
p_indxes = [get_node_indx(node) for node in sorted_parents]
p_values = [get_states_number_by_indx(indx) for indx in p_indxes]
"""for n in parents:
#indx = self.graph_struct.get_node_indx(n)
@ -67,17 +68,15 @@ class NetworkGraph():
p_indxes.append(node_indx)
#p_values.append(self.graph_struct.get_states_number(n))
p_values.append(self.get_states_number_by_indx(node_indx))"""
ordered_set = (sorted_parents, p_indxes, p_values)
#print(ordered_set)
#ordered_set = {k: v for k, v in sorted(ordered_set.items(), key=lambda item: item[1])}
return ordered_set
#ordered_set = (sorted_parents, p_indxes, p_values)
return (sorted_parents, p_indxes, p_values)
def get_ord_set_of_par_of_all_nodes(self):
result = []
#result = []
#for node in self._nodes_labels:
#result.append(self.get_ordered_by_indx_set_of_parents(node))
result = [self.get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels]
get_ordered_by_indx_set_of_parents = self.get_ordered_by_indx_set_of_parents
result = [get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels]
return result
"""def get_ordered_by_indx_parents_values(self, node):
@ -96,10 +95,11 @@ class NetworkGraph():
return pars_values
def get_states_number_of_all_nodes_sorted(self):
states_number_list = []
#states_number_list = []
#for node in self._nodes_labels:
#states_number_list.append(self.get_states_number(node))
states_number_list = [self.get_states_number(node) for node in self._nodes_labels]
get_states_number = self.get_states_number
states_number_list = [get_states_number(node) for node in self._nodes_labels]
return states_number_list
def build_fancy_indexing_structure(self, start_indx):
@ -124,10 +124,10 @@ class NetworkGraph():
#print(node_indx)
#print("Parents_id", parents_indxs)
#T_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)])
T_vector = np.array([self.get_states_number_by_indx(node_indx)])
get_states_number_by_indx = self.graph_struct.get_states_number_by_indx
T_vector = np.array([get_states_number_by_indx(node_indx)])
#print(T_vector)
#print("Here ", self.graph_struct.variables_frame.iloc[parents_id[0], 1])
T_vector = np.append(T_vector, [self.graph_struct.get_states_number_by_indx(x) for x in parents_indxs])
T_vector = np.append(T_vector, [get_states_number_by_indx(x) for x in parents_indxs])
#print(T_vector)
T_vector = T_vector.cumprod().astype(np.int)
return T_vector
@ -138,16 +138,19 @@ class NetworkGraph():
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._time_scalar_indexing_structure.append(
self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs))"""
self._time_scalar_indexing_structure = [self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node
self._time_scalar_indexing_structure = [build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing)]
def build_transition_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs):
#M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1],
#self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)])
M_vector = np.array([self.get_states_number_by_indx(node_indx),
self.get_states_number_by_indx(node_indx)])
M_vector = np.append(M_vector, [self.graph_struct.get_states_number_by_indx(x) for x in parents_indxs])
node_states_number = self.get_states_number_by_indx(node_indx)
get_states_number_by_indx = self.graph_struct.get_states_number_by_indx
M_vector = np.array([node_states_number,
node_states_number])
M_vector = np.append(M_vector, [get_states_number_by_indx(x) for x in parents_indxs])
M_vector = M_vector.cumprod().astype(np.int)
return M_vector
@ -156,8 +159,9 @@ class NetworkGraph():
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._transition_scalar_indexing_structure.append(
self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs))"""
build_transition_scalar_indexing_structure_for_a_node = self.build_transition_scalar_indexing_structure_for_a_node
self._transition_scalar_indexing_structure = \
[self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
[build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
for node_indx, p_indxs in
zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing) ]
@ -191,7 +195,8 @@ class NetworkGraph():
return list(self.graph.predecessors(node_id))
def get_states_number(self, node_id):
return self.graph_struct.get_states_number(node_id)
#return self.graph_struct.get_states_number(node_id)
return self.graph.nodes[node_id]['val']
def get_states_number_by_indx(self, node_indx):
return self.graph_struct.get_states_number_by_indx(node_indx)

@ -6,7 +6,6 @@ class SetsOfCimsContainer:
"""
Aggrega un insieme di oggetti SetOfCims
"""
# list_of_vars_orders contiene tutte le liste con i parent ordinati secondo il valore indx
def __init__(self, list_of_keys, states_number_per_node, list_of_parents_states_number):
self.sets_of_cims = None
self.init_cims_structure(list_of_keys, states_number_per_node, list_of_parents_states_number)

@ -1,5 +1,6 @@
import numpy as np
class Structure:
"""
Contiene tutte il informazioni sulla struttura della rete (connessione dei nodi, valori assumibili dalle variabili)
@ -25,7 +26,7 @@ class Structure:
return self.variables_frame[self.name_label].values.tolist()
def list_of_nodes_indexes(self):
return list(self.variables_frame.index)
return self.variables_frame.index.to_list()
def get_node_id(self, node_indx):
return self.variables_frame[self.name_label][node_indx]
@ -45,6 +46,9 @@ class Structure:
#print("Node indx", node_indx)
return self.variables_frame[self.value_label][node_indx]
def nodes_values(self):
return self.variables_frame[self.value_label].to_list()
def total_variables_number(self):
return self.total_variables_number

@ -4,7 +4,7 @@ import itertools
import networkx as nx
from scipy.stats import f as f_dist
from scipy.stats import chi2 as chi2_dist
from numba import njit
@ -36,16 +36,17 @@ class StructureEstimator:
complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph
def complete_test(self, test_parent, test_child, parent_set):
def complete_test(self, tmp_df, test_parent, test_child, parent_set):
p_set = parent_set[:]
complete_info = parent_set[:]
complete_info.append(test_parent)
tmp_df = self.complete_graph_frame.loc[self.complete_graph_frame['To'].isin([test_child])]
#tmp_df = self.complete_graph_frame.loc[self.complete_graph_frame['To'].isin([test_child])]
#tmp_df = self.complete_graph_frame.loc[np.in1d(self.complete_graph_frame['To'], test_child)]
d2 = tmp_df.loc[tmp_df['From'].isin(complete_info)]
complete_info.append(test_child)
v2 = self.sample_path.structure.variables_frame.loc[
self.sample_path.structure.variables_frame['Name'].isin(complete_info)]
values_frame = self.sample_path.structure.variables_frame
v2 = values_frame.loc[
values_frame['Name'].isin(complete_info)]
#print(tmp_df)
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
@ -63,10 +64,13 @@ class StructureEstimator:
g2 = ng.NetworkGraph(s2)
g2.init_graph()"""
#parent_set.append(test_child)
sofc1 = None
#sofc1 = None
#if not sofc1:
if not p_set:
sofc1 = self.cache.find(test_child)
else:
sofc1 = self.cache.find(set(p_set))
if not sofc1:
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
d1 = d2[d2.From != test_parent]
@ -81,17 +85,19 @@ class StructureEstimator:
g1.init_graph()
p1 = pe.ParametersEstimator(self.sample_path, g1)
p1.init_sets_cims_container()
#print("Computing params for",test_child, test_parent, parent_set)
p1.compute_parameters_for_node(test_child)
sofc1 = p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)]
if not p_set:
self.cache.put(test_child, sofc1)
else:
self.cache.put(set(p_set), sofc1)
sofc2 = None
p_set.append(test_parent)
if p_set:
#p_set.append(test_parent)
#print("PSET ", p_set)
set_p_set = set(p_set)
sofc2 = self.cache.find(set_p_set)
#set_p_set = set(p_set)
sofc2 = self.cache.find(set(p_set))
#print("Sofc2 ", sofc2)
#print(self.cache.list_of_sets_of_indxs)
@ -100,7 +106,7 @@ class StructureEstimator:
#p2.compute_parameters()
p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
if not sofc2 or p_set:
if not sofc2:
print("Cache Miss SOC2")
#parent_set.append(test_parent)
#d2 = tmp_df.loc[tmp_df['From'].isin(p_set)]
@ -114,12 +120,11 @@ class StructureEstimator:
g2.init_graph()
p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container()
# p2.compute_parameters()
p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]
if p_set:
#set_p_set = set(p_set)
self.cache.put(set_p_set, sofc2)
self.cache.put(set(p_set), sofc2)
end = 0
increment = self.sample_path.structure.get_states_number(test_parent)
for cim1 in sofc1.actual_cims:
@ -143,8 +148,11 @@ class StructureEstimator:
F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]):
print("CONDITIONALLY DEPENDENT EXP")
return False
M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix)
M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
#M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix)
#M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
M1_no_diag = cim1.state_transition_matrix[~np.eye(cim1.state_transition_matrix.shape[0], dtype=bool)].reshape(cim1.state_transition_matrix.shape[0], -1)
M2_no_diag = cim2.state_transition_matrix[~np.eye(cim2.state_transition_matrix.shape[0], dtype=bool)].reshape(
cim2.state_transition_matrix.shape[0], -1)
chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1)
"""
Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal())
@ -170,12 +178,10 @@ class StructureEstimator:
def one_iteration_of_CTPC_algorithm(self, var_id):
u = list(self.complete_graph.predecessors(var_id))
#TODO aggiungere qui il filtraggio del complete_graph_frame verso il nodo di arrivo 'To' var_id e passare il frame a complete test
#TODO trovare un modo per passare direttamente anche i valori delle variabili comprese nel test del nodo var_id
tests_parents_numb = len(u)
#print(u)
complete_frame = self.complete_graph_frame
test_frame = complete_frame.loc[complete_frame['To'].isin([var_id])]
b = 0
#parent_indx = 0
while b < len(u):
#for parent_id in u:
parent_indx = 0
@ -186,13 +192,13 @@ class StructureEstimator:
#print("Parent Indx", parent_indx)
#if not list(self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])):
#break
S = self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])
S = self.generate_possible_sub_sets_of_size(u, b, parent_indx)
#print("U Set", u)
#print("S", S)
for parents_set in S:
#print("Parent Set", parents_set)
#print("Test Parent", u[parent_indx])
if self.complete_test(u[parent_indx], var_id, parents_set):
if self.complete_test(test_frame, u[parent_indx], var_id, parents_set):
#print("Removing EDGE:", u[parent_indx], var_id)
self.complete_graph.remove_edge(u[parent_indx], var_id)
#print(self.complete_graph_frame)
@ -200,7 +206,9 @@ class StructureEstimator:
self.complete_graph_frame.drop(
self.complete_graph_frame[(self.complete_graph_frame.From ==
u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)"""
self.complete_graph_frame.drop(self.complete_graph_frame[(self.complete_graph_frame.From == u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)
complete_frame.drop(complete_frame[(complete_frame.From == u[parent_indx]) &
(complete_frame.To == var_id)].index, inplace=True)
#print(self.complete_graph_frame)
#u.remove(u[parent_indx])
del u[parent_indx]
@ -212,11 +220,11 @@ class StructureEstimator:
b += 1
self.cache.clear()
def generate_possible_sub_sets_of_size(self, u, size, parent_id):
def generate_possible_sub_sets_of_size(self, u, size, parent_indx):
#print("Inside Generate subsets", u)
#print("InsideGenerate Subsets", parent_id)
list_without_test_parent = u[:]
list_without_test_parent.remove(parent_id)
del list_without_test_parent[parent_indx]
# u.remove(parent_id)
#print(list(map(list, itertools.combinations(list_without_test_parent, size))))
return map(list, itertools.combinations(list_without_test_parent, size))
@ -228,7 +236,11 @@ class StructureEstimator:
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
def ctpc_algorithm(self):
for node_id in self.sample_path.structure.list_of_nodes_labels():
print("TESTING VAR:", node_id)
self.one_iteration_of_CTPC_algorithm(node_id)
ctpc_algo = self.one_iteration_of_CTPC_algorithm
nodes = self.sample_path.structure.list_of_nodes_labels()
#for node_id in self.sample_path.structure.list_of_nodes_labels():
#print("TESTING VAR:", node_id)
#self.one_iteration_of_CTPC_algorithm(node_id)
#print(self.complete_graph_frame)
[ctpc_algo(n) for n in nodes]

@ -179,10 +179,12 @@ class TestNetworkGraph(unittest.TestCase):
def test_init_graph(self):
g1 = ng.NetworkGraph(self.s1.structure)
#g1.init_graph()
lp = LineProfiler()
lp.add_function(g1.get_ordered_by_indx_set_of_parents)
lp_wrapper = lp(g1.init_graph)
lp_wrapper()
#lp.add_function(g1.get_ordered_by_indx_set_of_parents)
#lp.add_function(g1.get_states_number)
lp_wrapper = lp(g1.get_states_number)
lp_wrapper('X')
lp.print_stats()
"""def test_remove_node(self):

@ -57,14 +57,12 @@ class TestParametersEstimatior(unittest.TestCase):
self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
def test_compute_parameters_for_node(self):#TODO Questo non è un test
self.g1.remove_node('Y')
print(self.g1.time_filtering)
pe1 = pe.ParametersEstimator(self.s1, self.g1)
pe1.init_sets_cims_container()
#pe1.init_sets_cims_container()
lp = LineProfiler()
#lp.add_function(pe1.init_sets_cims_container)
lp_wrapper = lp(pe1.compute_parameters_for_node)
lp_wrapper('X')
lp_wrapper = lp(pe1.init_sets_cims_container)
#lp.add_function(pe1.sets_of_cims_struct.init_cims_structure)
lp_wrapper()
lp.print_stats()
#pe1.init_sets_cims_container()
#pe1.compute_parameters_for_node('Y')