@ -1,16 +1,17 @@
import pandas as pd
import pandas as pd
import numpy as np
import numpy as np
import math
import itertools
import itertools
import networkx as nx
import networkx as nx
from scipy . stats import f as f_dist
from scipy . stats import f as f_dist
from scipy . stats import chi2 as chi2_dist
from scipy . stats import chi2 as chi2_dist
import sample_path as sp
import sample_path as sp
import structure as st
import structure as st
import network_graph as ng
import network_graph as ng
import parameters_estimator as pe
import parameters_estimator as pe
import cache as ch
class StructureEstimator :
class StructureEstimator :
@ -21,6 +22,7 @@ class StructureEstimator:
self . complete_graph = self . build_complete_graph ( self . sample_path . structure . list_of_nodes_labels ( ) )
self . complete_graph = self . build_complete_graph ( self . sample_path . structure . list_of_nodes_labels ( ) )
self . exp_test_sign = exp_test_alfa
self . exp_test_sign = exp_test_alfa
self . chi_test_alfa = chi_test_alfa
self . chi_test_alfa = chi_test_alfa
self . cache = ch . Cache ( )
def build_complete_graph_frame ( self , node_ids ) :
def build_complete_graph_frame ( self , node_ids ) :
complete_frame = pd . DataFrame ( itertools . permutations ( node_ids , 2 ) )
complete_frame = pd . DataFrame ( itertools . permutations ( node_ids , 2 ) )
@ -34,54 +36,99 @@ class StructureEstimator:
return complete_graph
return complete_graph
def complete_test ( self , test_parent , test_child , parent_set ) :
def complete_test ( self , test_parent , test_child , parent_set ) :
p_set = parent_set [ : ]
complete_info = parent_set [ : ]
complete_info . append ( test_parent )
tmp_df = self . complete_graph_frame . loc [ self . complete_graph_frame [ ' To ' ] . isin ( [ test_child ] ) ]
tmp_df = self . complete_graph_frame . loc [ self . complete_graph_frame [ ' To ' ] . isin ( [ test_child ] ) ]
#tmp_df = self.complete_graph_frame.loc[np.in1d(self.complete_graph_frame['To'], test_child)]
d2 = tmp_df . loc [ tmp_df [ ' From ' ] . isin ( complete_info ) ]
complete_info . append ( test_child )
v2 = self . sample_path . structure . variables_frame . loc [
self . sample_path . structure . variables_frame [ ' Name ' ] . isin ( complete_info ) ]
#print(tmp_df)
#print(tmp_df)
d1 = tmp_df . loc [ tmp_df [ ' From ' ] . isin ( parent_set ) ]
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set) ]
parent_set . append ( test_child )
#parent_set.append(test_child )
#print(parent_set)
#print(parent_set)
v1 = self . sample_path . structure . variables_frame . loc [ self . sample_path . structure . variables_frame [ ' Name ' ] . isin ( parent_set ) ]
""" v1 = self.sample_path.structure.variables_frame.loc[self.sample_path.structure.variables_frame[ ' Name ' ].isin(parent_set) ]
s1 = st . Structure ( d1 , v1 , self . sample_path . total_variables_count )
s1 = st . Structure ( d1 , v1 , self . sample_path . total_variables_count )
g1 = ng . NetworkGraph ( s1 )
g1 = ng . NetworkGraph ( s1 )
g1 . init_graph ( )
g1 . init_graph ( ) """
parent_set . append ( test_parent )
#parent_set.append(test_parent )
d2 = tmp_df . loc [ tmp_df [ ' From ' ] . isin ( parent_set ) ]
""" d2 = tmp_df.loc[tmp_df[ ' From ' ].isin(parent_set) ]
v2 = self . sample_path . structure . variables_frame . loc [ self . sample_path . structure . variables_frame [ ' Name ' ] . isin ( parent_set ) ]
v2 = self . sample_path . structure . variables_frame . loc [ self . sample_path . structure . variables_frame [ ' Name ' ] . isin ( parent_set ) ]
#print(d2)
#print(v2)
s2 = st . Structure ( d2 , v2 , self . sample_path . total_variables_count )
s2 = st . Structure ( d2 , v2 , self . sample_path . total_variables_count )
g2 = ng . NetworkGraph ( s2 )
g2 = ng . NetworkGraph ( s2 )
g2 . init_graph ( )
g2 . init_graph ( ) """
#parent_set.append(test_child)
p1 = pe . ParametersEstimator ( self . sample_path , g1 )
sofc1 = None
p1 . init_sets_cims_container ( )
#if not sofc1:
#print("Computing params for",test_child, test_parent, parent_set)
if not p_set :
p1 . compute_parameters_for_node ( test_child )
sofc1 = self . cache . find ( test_child )
#p1.compute_parameters()
if not sofc1 :
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
p2 = pe . ParametersEstimator ( self . sample_path , g2 )
d1 = d2 [ d2 . From != test_parent ]
#v1 = self.sample_path.structure.variables_frame.loc[
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
v1 = v2 [ v2 . Name != test_parent ]
#print("D1", d1)
#print("V1", v1)
s1 = st . Structure ( d1 , v1 , self . sample_path . total_variables_count )
g1 = ng . NetworkGraph ( s1 )
g1 . init_graph ( )
p1 = pe . ParametersEstimator ( self . sample_path , g1 )
p1 . init_sets_cims_container ( )
#print("Computing params for",test_child, test_parent, parent_set)
p1 . compute_parameters_for_node ( test_child )
sofc1 = p1 . sets_of_cims_struct . sets_of_cims [ s1 . get_positional_node_indx ( test_child ) ]
self . cache . put ( test_child , sofc1 )
sofc2 = None
p_set . append ( test_parent )
if p_set :
#p_set.append(test_parent)
#print("PSET ", p_set)
set_p_set = set ( p_set )
sofc2 = self . cache . find ( set_p_set )
#print("Sofc2 ", sofc2)
#print(self.cache.list_of_sets_of_indxs)
""" p2 = pe.ParametersEstimator(self.sample_path, g2)
p2 . init_sets_cims_container ( )
p2 . init_sets_cims_container ( )
#p2.compute_parameters()
#p2.compute_parameters()
p2 . compute_parameters_for_node ( test_child )
p2 . compute_parameters_for_node ( test_child )
sofc2 = p2 . sets_of_cims_struct . sets_of_cims [ s2 . get_positional_node_indx ( test_child ) ] """
#for cim in p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].actual_cims:
if not sofc2 or p_set :
#print(cim)
print ( " Cache Miss SOC2 " )
#print(cim.state_transition_matrix)
#parent_set.append(test_parent)
#print("C_1", p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].transition_matrices)
#d2 = tmp_df.loc[tmp_df['From'].isin(p_set)]
indx = 0
#v2 = self.sample_path.structure.variables_frame.loc[
for i , cim1 in enumerate (
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
p1 . sets_of_cims_struct . sets_of_cims [ s1 . get_positional_node_indx ( test_child ) ] . actual_cims ) :
#print("D2", d2)
#print("V2", v2)
#for j, cim2 in enumerate(
#s2 = st.Structure(d2, v2, self.sample_path.total_variables_count)
#p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims):
s2 = st . Structure ( d2 , v2 , self . sample_path . total_variables_count )
for j in range ( indx , self . sample_path . structure . get_states_number ( test_parent ) + indx ) :
g2 = ng . NetworkGraph ( s2 )
#print("J", j)
g2 . init_graph ( )
#print("Pos Index", p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims)
p2 = pe . ParametersEstimator ( self . sample_path , g2 )
cim2 = p2 . sets_of_cims_struct . sets_of_cims [ s2 . get_positional_node_indx ( test_child ) ] . actual_cims [ j ]
p2 . init_sets_cims_container ( )
indx + = 1
# p2.compute_parameters()
p2 . compute_parameters_for_node ( test_child )
sofc2 = p2 . sets_of_cims_struct . sets_of_cims [ s2 . get_positional_node_indx ( test_child ) ]
if p_set :
#set_p_set = set(p_set)
self . cache . put ( set_p_set , sofc2 )
end = 0
increment = self . sample_path . structure . get_states_number ( test_parent )
for cim1 in sofc1 . actual_cims :
start = end
end = start + increment
for j in range ( start , end ) :
#cim2 = sofc2.actual_cims[j]
#print(indx)
#print(indx)
print ( " Run Test " , i , j )
#print("Run Test", i, j )
if not self . independence_test ( test_child , cim1 , cim2 ) :
if not self . independence_test ( test_child , cim1 , sofc2 . actual_cims [ j ] ) :
return False
return False
return True
return True
@ -90,7 +137,8 @@ class StructureEstimator:
r1s = cim1 . state_transition_matrix . diagonal ( )
r1s = cim1 . state_transition_matrix . diagonal ( )
r2s = cim2 . state_transition_matrix . diagonal ( )
r2s = cim2 . state_transition_matrix . diagonal ( )
F_stats = cim2 . cim . diagonal ( ) / cim1 . cim . diagonal ( )
F_stats = cim2 . cim . diagonal ( ) / cim1 . cim . diagonal ( )
for val in range ( 0 , self . sample_path . structure . get_states_number ( tested_child ) ) : # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
child_states_numb = self . sample_path . structure . get_states_number ( tested_child )
for val in range ( 0 , child_states_numb ) : # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
#r1 = cim1.state_transition_matrix[val][val]
#r1 = cim1.state_transition_matrix[val][val]
#r2 = cim2.state_transition_matrix[val][val]
#r2 = cim2.state_transition_matrix[val][val]
#print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
#print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
@ -108,7 +156,7 @@ class StructureEstimator:
M2_no_diag = self . remove_diagonal_elements ( cim2 . state_transition_matrix )
M2_no_diag = self . remove_diagonal_elements ( cim2 . state_transition_matrix )
#print("M1 no diag", M1_no_diag)
#print("M1 no diag", M1_no_diag)
#print("M2 no diag", M2_no_diag)
#print("M2 no diag", M2_no_diag)
chi_2_quantile = chi2_dist . ppf ( 1 - self . chi_test_alfa , self . sample_path . structure . get_states_number ( tested_child ) - 1 )
chi_2_quantile = chi2_dist . ppf ( 1 - self . chi_test_alfa , child_states_numb - 1 )
"""
"""
Ks = np . sqrt ( cim1 . state_transition_matrix . diagonal ( ) / cim2 . state_transition_matrix . diagonal ( ) )
Ks = np . sqrt ( cim1 . state_transition_matrix . diagonal ( ) / cim2 . state_transition_matrix . diagonal ( ) )
Ls = np . reciprocal ( Ks )
Ls = np . reciprocal ( Ks )
@ -116,7 +164,7 @@ class StructureEstimator:
/ ( M1_no_diag + M2_no_diag ) ) , axis = 1 ) """
/ ( M1_no_diag + M2_no_diag ) ) , axis = 1 ) """
Ks = np . sqrt ( r1s / r2s )
Ks = np . sqrt ( r1s / r2s )
Ls = np . sqrt ( r2s / r1s )
Ls = np . sqrt ( r2s / r1s )
for val in range ( 0 , self . sample_path . structure . get_states_number ( tested_child ) ) :
for val in range ( 0 , child_states_numb ) :
#K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
#K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
#L = 1 / K
#L = 1 / K
Chi = np . sum ( np . power ( Ks [ val ] * M2_no_diag [ val ] - Ls [ val ] * M1_no_diag [ val ] , 2 ) /
Chi = np . sum ( np . power ( Ks [ val ] * M2_no_diag [ val ] - Ls [ val ] * M1_no_diag [ val ] , 2 ) /
@ -154,21 +202,24 @@ class StructureEstimator:
#print("Parent Set", parents_set)
#print("Parent Set", parents_set)
#print("Test Parent", u[parent_indx])
#print("Test Parent", u[parent_indx])
if self . complete_test ( u [ parent_indx ] , var_id , parents_set ) :
if self . complete_test ( u [ parent_indx ] , var_id , parents_set ) :
print ( " Removing EDGE: " , u [ parent_indx ] , var_id )
#print("Removing EDGE:", u[parent_indx], var_id )
self . complete_graph . remove_edge ( u [ parent_indx ] , var_id )
self . complete_graph . remove_edge ( u [ parent_indx ] , var_id )
#print(self.complete_graph_frame)
#print(self.complete_graph_frame)
self . complete_graph_frame = \
""" self.complete_graph_frame = \
self . complete_graph_frame . drop (
self . complete_graph_frame . drop (
self . complete_graph_frame [ ( self . complete_graph_frame . From ==
self . complete_graph_frame [ ( self . complete_graph_frame . From ==
u [ parent_indx ] ) & ( self . complete_graph_frame . To == var_id ) ] . index )
u [ parent_indx ] ) & ( self . complete_graph_frame . To == var_id ) ] . index ) """
self . complete_graph_frame . drop ( self . complete_graph_frame [ ( self . complete_graph_frame . From == u [ parent_indx ] ) & ( self . complete_graph_frame . To == var_id ) ] . index )
#print(self.complete_graph_frame)
#print(self.complete_graph_frame)
u . remove ( u [ parent_indx ] )
#u.remove(u[parent_indx])
del u [ parent_indx ]
removed = True
removed = True
#else:
#else:
#parent_indx += 1
#parent_indx += 1
if not removed :
if not removed :
parent_indx + = 1
parent_indx + = 1
b + = 1
b + = 1
self . cache . clear ( )
def generate_possible_sub_sets_of_size ( self , u , size , parent_id ) :
def generate_possible_sub_sets_of_size ( self , u , size , parent_id ) :
#print("Inside Generate subsets", u)
#print("Inside Generate subsets", u)
@ -187,5 +238,6 @@ class StructureEstimator:
def ctpc_algorithm ( self ) :
def ctpc_algorithm ( self ) :
for node_id in self . sample_path . structure . list_of_nodes_labels ( ) :
for node_id in self . sample_path . structure . list_of_nodes_labels ( ) :
print ( " TESTING VAR: " , node_id )
self . one_iteration_of_CTPC_algorithm ( node_id )
self . one_iteration_of_CTPC_algorithm ( node_id )