1
0
Fork 0

Add Cache class

parallel_struct_est
philpMartin 4 years ago
parent 4b9ff23e0b
commit 55ec35d084
  1. 24
      main_package/classes/cache.py
  2. 75
      main_package/classes/network_graph.py
  3. 7
      main_package/classes/parameters_estimator.py
  4. 5
      main_package/classes/structure.py
  5. 138
      main_package/classes/structure_estimator.py
  6. 19
      main_package/tests/test_networkgraph.py
  7. 13
      main_package/tests/test_parameters_estimator.py
  8. 8
      main_package/tests/test_structure.py
  9. 9
      main_package/tests/test_structure_estimator.py

@ -0,0 +1,24 @@
import typing
import set_of_cims as sofc
class Cache:
def __init__(self):
self.list_of_sets_of_indxs = []
self.actual_cache = []
def find(self, parents_comb: typing.Set):
try:
result = self.actual_cache[self.list_of_sets_of_indxs.index(parents_comb)]
print("CACHE HIT!!!!")
return result
except ValueError:
return None
def put(self, parents_comb: typing.Set, socim: sofc.SetOfCims):
self.list_of_sets_of_indxs.append(parents_comb)
self.actual_cache.append(socim)
def clear(self):
del self.list_of_sets_of_indxs[:]
del self.actual_cache[:]

@ -1,5 +1,4 @@
import os
import sample_path as sp
import networkx as nx
import numpy as np
@ -39,6 +38,7 @@ class NetworkGraph():
self.build_transition_columns_filtering_structure()
def add_nodes(self, list_of_nodes):
#self.graph.add_nodes_from(list_of_nodes)
for id in list_of_nodes:
self.graph.add_node(id)
nx.set_node_attributes(self.graph, {id:self.graph_struct.get_node_indx(id)}, 'indx')
@ -48,7 +48,7 @@ class NetworkGraph():
def get_ordered_by_indx_set_of_parents(self, node):
#print(node)
ordered_set = {}
#ordered_set = {}
parents = self.get_parents_by_id(node)
#print(parents)
sorted_parents = [x for _, x in sorted(zip(self.graph_struct.list_of_nodes_labels(), parents))]
@ -61,8 +61,10 @@ class NetworkGraph():
#print(indx)
#ordered_set[n] = indx
p_indxes.append(self.graph_struct.get_node_indx(n))
p_values.append(self.graph_struct.get_states_number(n))
node_indx = self.get_node_indx(n)
p_indxes.append(node_indx)
#p_values.append(self.graph_struct.get_states_number(n))
p_values.append(self.get_states_number_by_indx(node_indx))
ordered_set = (sorted_parents, p_indxes, p_values)
#print(ordered_set)
@ -129,8 +131,8 @@ class NetworkGraph():
#print(T_vector)
def build_time_scalar_indexing_structure(self):
parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), parents_indexes_list):
#parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
#if not p_indxs:
#self._time_scalar_indexing_structure.append(np.array([self.get_states_number_by_indx(node_indx)],
#dtype=np.int))
@ -148,23 +150,23 @@ class NetworkGraph():
return M_vector
def build_transition_scalar_indexing_structure(self):
parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), parents_indexes_list):
#parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._transition_scalar_indexing_structure.append(
self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs))
def build_time_columns_filtering_structure(self):
parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), parents_indexes_list):
#parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
#if p_indxs.size == 0:
#self._time_filtering.append(np.append(p_indxs, np.array([node_indx], dtype=np.int)))
#else:
self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))
def build_transition_columns_filtering_structure(self):
parents_indexes_list = self._fancy_indexing
#parents_indexes_list = self._fancy_indexing
nodes_number = self.graph_struct.total_variables_number
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), parents_indexes_list):
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))
def get_nodes(self):
@ -177,7 +179,7 @@ class NetworkGraph():
return self.graph_struct.list_of_nodes_labels()
def get_parents_by_id(self, node_id):
return list(self.graph.predecessors(node_id))
return list(self.graph.predecessors(node_id))
def get_states_number(self, node_id):
return self.graph_struct.get_states_number(node_id)
@ -190,6 +192,7 @@ class NetworkGraph():
def get_node_indx(self, node_id):
return nx.get_node_attributes(self.graph, 'indx')[node_id]
#return self.graph_struct.get_node_indx(node_id)
@property
def time_scalar_indexing_strucure(self):
@ -207,36 +210,16 @@ class NetworkGraph():
def transition_filtering(self):
return self._transition_filtering
######Veloci Tests#######
"""os.getcwd()
os.chdir('..')
path = os.getcwd() + '/data'
s1 = sp.SamplePath(path)
s1.build_trajectories()
s1.build_structure()
g1 = NetworkGraph(s1.structure)
g1.init_graph()
print(g1.transition_scalar_indexing_structure)
print(g1.transition_filtering)
print(g1.time_scalar_indexing_strucure)
print(g1.time_filering)
#print(g1.build_fancy_indexing_structure(0))
#print(g1.get_states_number_of_all_nodes_sorted())
g1.build_scalar_indexing_structure()
print(g1.scalar_indexing_structure)
print(g1.build_columns_filtering_structure())
g1.build_transition_scalar_indexing_structure()
print(g1.transition_scalar_indexing_structure)
g1.build_transition_columns_filtering_structure()
print(g1.transition_filtering)
[array([3, 9]), array([ 3, 9, 27]), array([ 3, 9, 27, 81])]
[array([3, 0]), array([4, 1, 2]), array([5, 2, 0, 1])]"""
"""def remove_node(self, node_id):
node_indx = self.get_node_indx(node_id)
self.graph_struct.remove_node(node_id)
self.graph.remove_node(node_id)
del self._fancy_indexing[node_indx]
del self._time_filtering[node_indx]
del self._nodes_labels[node_indx]
del self._transition_scalar_indexing_structure[node_indx]
del self._transition_filtering[node_indx]
del self._time_scalar_indexing_structure[node_indx]
del self.aggregated_info_about_nodes_parents[node_indx]
del self._nodes_indexes[node_indx]"""

@ -45,17 +45,16 @@ class ParametersEstimator:
def compute_parameters_for_node(self, node_id):
pos_index = self.net_graph.graph_struct.get_positional_node_indx(node_id)
node_indx = self.net_graph.get_node_indx(node_id)
#print("Nodes", self.net_graph.get_nodes())
#print(pos_index)
#print(self.net_graph.time_filtering)
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(node_id), self.sample_path.trajectories.times,
self.compute_state_res_time_for_node(node_indx, self.sample_path.trajectories.times,
self.sample_path.trajectories.trajectory,
self.net_graph.time_filtering[pos_index],
self.net_graph.time_scalar_indexing_strucure[pos_index],
self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times)
# print(self.net_graph.transition_filtering[indx])
# print(self.net_graph.transition_scalar_indexing_structure[indx])
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(node_id),
self.compute_state_transitions_for_a_node(node_indx,
self.sample_path.trajectories.complete_trajectory,
self.net_graph.transition_filtering[pos_index],
self.net_graph.transition_scalar_indexing_structure[pos_index],

@ -57,3 +57,8 @@ class Structure:
return self.structure_frame.equals(other.structure_frame) and \
self.variables_frame.equals(other.variables_frame)
return NotImplemented
"""def remove_node(self, node_id):
self.variables_frame = self.variables_frame[self.variables_frame.Name != node_id]
self.structure_frame = self.structure_frame[(self.structure_frame.From != node_id) &
(self.structure_frame.To != node_id)]"""

@ -1,16 +1,17 @@
import pandas as pd
import numpy as np
import math
import itertools
import networkx as nx
from scipy.stats import f as f_dist
from scipy.stats import chi2 as chi2_dist
import sample_path as sp
import structure as st
import network_graph as ng
import parameters_estimator as pe
import cache as ch
class StructureEstimator:
@ -21,6 +22,7 @@ class StructureEstimator:
self.complete_graph = self.build_complete_graph(self.sample_path.structure.list_of_nodes_labels())
self.exp_test_sign = exp_test_alfa
self.chi_test_alfa = chi_test_alfa
self.cache = ch.Cache()
def build_complete_graph_frame(self, node_ids):
complete_frame = pd.DataFrame(itertools.permutations(node_ids, 2))
@ -34,54 +36,99 @@ class StructureEstimator:
return complete_graph
def complete_test(self, test_parent, test_child, parent_set):
p_set = parent_set[:]
complete_info = parent_set[:]
complete_info.append(test_parent)
tmp_df = self.complete_graph_frame.loc[self.complete_graph_frame['To'].isin([test_child])]
#tmp_df = self.complete_graph_frame.loc[np.in1d(self.complete_graph_frame['To'], test_child)]
d2 = tmp_df.loc[tmp_df['From'].isin(complete_info)]
complete_info.append(test_child)
v2 = self.sample_path.structure.variables_frame.loc[
self.sample_path.structure.variables_frame['Name'].isin(complete_info)]
#print(tmp_df)
d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
parent_set.append(test_child)
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
#parent_set.append(test_child)
#print(parent_set)
v1 = self.sample_path.structure.variables_frame.loc[self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
"""v1 = self.sample_path.structure.variables_frame.loc[self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
s1 = st.Structure(d1, v1, self.sample_path.total_variables_count)
g1 = ng.NetworkGraph(s1)
g1.init_graph()
g1.init_graph()"""
parent_set.append(test_parent)
d2 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
#parent_set.append(test_parent)
"""d2 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
v2 = self.sample_path.structure.variables_frame.loc[self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
#print(d2)
#print(v2)
s2 = st.Structure(d2, v2, self.sample_path.total_variables_count)
g2 = ng.NetworkGraph(s2)
g2.init_graph()
p1 = pe.ParametersEstimator(self.sample_path, g1)
p1.init_sets_cims_container()
#print("Computing params for",test_child, test_parent, parent_set)
p1.compute_parameters_for_node(test_child)
#p1.compute_parameters()
p2 = pe.ParametersEstimator(self.sample_path, g2)
g2.init_graph()"""
#parent_set.append(test_child)
sofc1 = None
#if not sofc1:
if not p_set:
sofc1 = self.cache.find(test_child)
if not sofc1:
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
d1 = d2[d2.From != test_parent]
#v1 = self.sample_path.structure.variables_frame.loc[
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
v1 = v2[v2.Name != test_parent]
#print("D1", d1)
#print("V1", v1)
s1 = st.Structure(d1, v1, self.sample_path.total_variables_count)
g1 = ng.NetworkGraph(s1)
g1.init_graph()
p1 = pe.ParametersEstimator(self.sample_path, g1)
p1.init_sets_cims_container()
#print("Computing params for",test_child, test_parent, parent_set)
p1.compute_parameters_for_node(test_child)
sofc1 = p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)]
self.cache.put(test_child,sofc1)
sofc2 = None
p_set.append(test_parent)
if p_set:
#p_set.append(test_parent)
#print("PSET ", p_set)
set_p_set = set(p_set)
sofc2 = self.cache.find(set_p_set)
#print("Sofc2 ", sofc2)
#print(self.cache.list_of_sets_of_indxs)
"""p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container()
#p2.compute_parameters()
p2.compute_parameters_for_node(test_child)
#for cim in p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].actual_cims:
#print(cim)
#print(cim.state_transition_matrix)
#print("C_1", p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].transition_matrices)
indx = 0
for i, cim1 in enumerate(
p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].actual_cims):
#for j, cim2 in enumerate(
#p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims):
for j in range(indx, self.sample_path.structure.get_states_number(test_parent) + indx):
#print("J", j)
#print("Pos Index", p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims)
cim2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims[j]
indx += 1
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
if not sofc2 or p_set:
print("Cache Miss SOC2")
#parent_set.append(test_parent)
#d2 = tmp_df.loc[tmp_df['From'].isin(p_set)]
#v2 = self.sample_path.structure.variables_frame.loc[
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
#print("D2", d2)
#print("V2", v2)
#s2 = st.Structure(d2, v2, self.sample_path.total_variables_count)
s2 = st.Structure(d2, v2, self.sample_path.total_variables_count)
g2 = ng.NetworkGraph(s2)
g2.init_graph()
p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container()
# p2.compute_parameters()
p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]
if p_set:
#set_p_set = set(p_set)
self.cache.put(set_p_set, sofc2)
end = 0
increment = self.sample_path.structure.get_states_number(test_parent)
for cim1 in sofc1.actual_cims:
start = end
end = start + increment
for j in range(start, end):
#cim2 = sofc2.actual_cims[j]
#print(indx)
print("Run Test", i, j)
if not self.independence_test(test_child, cim1, cim2):
#print("Run Test", i, j)
if not self.independence_test(test_child, cim1, sofc2.actual_cims[j]):
return False
return True
@ -90,7 +137,8 @@ class StructureEstimator:
r1s = cim1.state_transition_matrix.diagonal()
r2s = cim2.state_transition_matrix.diagonal()
F_stats = cim2.cim.diagonal() / cim1.cim.diagonal()
for val in range(0, self.sample_path.structure.get_states_number(tested_child)): # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
child_states_numb = self.sample_path.structure.get_states_number(tested_child)
for val in range(0, child_states_numb): # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
#r1 = cim1.state_transition_matrix[val][val]
#r2 = cim2.state_transition_matrix[val][val]
#print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
@ -108,7 +156,7 @@ class StructureEstimator:
M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
#print("M1 no diag", M1_no_diag)
#print("M2 no diag", M2_no_diag)
chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, self.sample_path.structure.get_states_number(tested_child) - 1)
chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1)
"""
Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal())
Ls = np.reciprocal(Ks)
@ -116,7 +164,7 @@ class StructureEstimator:
/ (M1_no_diag + M2_no_diag)), axis=1)"""
Ks = np.sqrt(r1s / r2s)
Ls = np.sqrt(r2s / r1s)
for val in range(0, self.sample_path.structure.get_states_number(tested_child)):
for val in range(0, child_states_numb):
#K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
#L = 1 / K
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
@ -154,21 +202,24 @@ class StructureEstimator:
#print("Parent Set", parents_set)
#print("Test Parent", u[parent_indx])
if self.complete_test(u[parent_indx], var_id, parents_set):
print("Removing EDGE:", u[parent_indx], var_id)
#print("Removing EDGE:", u[parent_indx], var_id)
self.complete_graph.remove_edge(u[parent_indx], var_id)
#print(self.complete_graph_frame)
self.complete_graph_frame = \
"""self.complete_graph_frame = \
self.complete_graph_frame.drop(
self.complete_graph_frame[(self.complete_graph_frame.From ==
u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)
u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)"""
self.complete_graph_frame.drop(self.complete_graph_frame[(self.complete_graph_frame.From == u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)
#print(self.complete_graph_frame)
u.remove(u[parent_indx])
#u.remove(u[parent_indx])
del u[parent_indx]
removed = True
#else:
#parent_indx += 1
if not removed:
parent_indx += 1
b += 1
self.cache.clear()
def generate_possible_sub_sets_of_size(self, u, size, parent_id):
#print("Inside Generate subsets", u)
@ -187,5 +238,6 @@ class StructureEstimator:
def ctpc_algorithm(self):
for node_id in self.sample_path.structure.list_of_nodes_labels():
print("TESTING VAR:", node_id)
self.one_iteration_of_CTPC_algorithm(node_id)

@ -1,6 +1,7 @@
import unittest
import networkx as nx
import numpy as np
from line_profiler import LineProfiler
import sample_path as sp
import network_graph as ng
@ -176,6 +177,24 @@ class TestNetworkGraph(unittest.TestCase):
for a1, a2 in zip(g1.transition_filtering, m_filter):
self.assertTrue(np.array_equal(a1, a2))
def test_init_graph(self):
g1 = ng.NetworkGraph(self.s1.structure)
lp = LineProfiler()
lp.add_function(g1.get_ordered_by_indx_set_of_parents)
lp_wrapper = lp(g1.init_graph)
lp_wrapper()
lp.print_stats()
"""def test_remove_node(self):
g1 = ng.NetworkGraph(self.s1.structure)
g1.init_graph()
g1.remove_node('Y')
print(g1.get_nodes())
print(g1.get_edges())"""
#TODO mancano i test sulle property e sui getters_vari
if __name__ == '__main__':
unittest.main()

@ -1,5 +1,6 @@
import unittest
import numpy as np
from line_profiler import LineProfiler
import network_graph as ng
import sample_path as sp
@ -56,10 +57,18 @@ class TestParametersEstimatior(unittest.TestCase):
self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
def test_compute_parameters_for_node(self):#TODO Questo non è un test
self.g1.remove_node('Y')
print(self.g1.time_filtering)
pe1 = pe.ParametersEstimator(self.s1, self.g1)
pe1.init_sets_cims_container()
pe1.compute_parameters_for_node('Y')
print(pe1.sets_of_cims_struct.get_set_of_cims(1).actual_cims)
lp = LineProfiler()
#lp.add_function(pe1.init_sets_cims_container)
lp_wrapper = lp(pe1.compute_parameters_for_node)
lp_wrapper('X')
lp.print_stats()
#pe1.init_sets_cims_container()
#pe1.compute_parameters_for_node('Y')
print(pe1.sets_of_cims_struct.get_set_of_cims(0).actual_cims)
def aux_import_sampled_cims(self, cims_label):

@ -68,5 +68,13 @@ class TestStructure(unittest.TestCase):
for indx, row in self.variables_frame.iterrows():
self.assertEqual(row[1], s1.get_states_number_by_indx(indx))
def test_remove_node(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
s1.remove_node('Y')
print(s1.variables_frame)
print(s1.structure_frame)
print(s1.get_node_indx('Z'))
print(s1.get_positional_node_indx('Z'))
if __name__ == '__main__':
unittest.main()

@ -1,4 +1,5 @@
import unittest
from line_profiler import LineProfiler
import sample_path as sp
import structure_estimator as se
@ -21,7 +22,13 @@ class TestStructureEstimator(unittest.TestCase):
se1 = se.StructureEstimator(self.s1, 0.1, 0.1)
#se1.one_iteration_of_CTPC_algorithm('X')
#self.aux_test_complete_test(se1, 'X', 'Y', ['Z'])
se1.ctpc_algorithm()
lp = LineProfiler()
lp.add_function(se1.complete_test)
lp.add_function(se1.one_iteration_of_CTPC_algorithm)
lp_wrapper = lp(se1.ctpc_algorithm)
lp_wrapper()
lp.print_stats()
#se1.ctpc_algorithm()
print(se1.complete_graph.edges)
def aux_test_complete_test(self, estimator, test_par, test_child, p_set):