@ -1,16 +1,17 @@
import pandas as pd
import numpy as np
import math
import itertools
import networkx as nx
from scipy . stats import f as f_dist
from scipy . stats import chi2 as chi2_dist
import sample_path as sp
import structure as st
import network_graph as ng
import parameters_estimator as pe
import cache as ch
class StructureEstimator :
@ -21,6 +22,7 @@ class StructureEstimator:
self . complete_graph = self . build_complete_graph ( self . sample_path . structure . list_of_nodes_labels ( ) )
self . exp_test_sign = exp_test_alfa
self . chi_test_alfa = chi_test_alfa
self . cache = ch . Cache ( )
def build_complete_graph_frame ( self , node_ids ) :
complete_frame = pd . DataFrame ( itertools . permutations ( node_ids , 2 ) )
@ -34,54 +36,99 @@ class StructureEstimator:
return complete_graph
def complete_test ( self , test_parent , test_child , parent_set ) :
p_set = parent_set [ : ]
complete_info = parent_set [ : ]
complete_info . append ( test_parent )
tmp_df = self . complete_graph_frame . loc [ self . complete_graph_frame [ ' To ' ] . isin ( [ test_child ] ) ]
#tmp_df = self.complete_graph_frame.loc[np.in1d(self.complete_graph_frame['To'], test_child)]
d2 = tmp_df . loc [ tmp_df [ ' From ' ] . isin ( complete_info ) ]
complete_info . append ( test_child )
v2 = self . sample_path . structure . variables_frame . loc [
self . sample_path . structure . variables_frame [ ' Name ' ] . isin ( complete_info ) ]
#print(tmp_df)
d1 = tmp_df . loc [ tmp_df [ ' From ' ] . isin ( parent_set ) ]
parent_set . append ( test_child )
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set) ]
#parent_set.append(test_child )
#print(parent_set)
v1 = self . sample_path . structure . variables_frame . loc [ self . sample_path . structure . variables_frame [ ' Name ' ] . isin ( parent_set ) ]
""" v1 = self.sample_path.structure.variables_frame.loc[self.sample_path.structure.variables_frame[ ' Name ' ].isin(parent_set) ]
s1 = st . Structure ( d1 , v1 , self . sample_path . total_variables_count )
g1 = ng . NetworkGraph ( s1 )
g1 . init_graph ( )
g1 . init_graph ( ) """
parent_set . append ( test_parent )
d2 = tmp_df . loc [ tmp_df [ ' From ' ] . isin ( parent_set ) ]
#parent_set.append(test_parent )
""" d2 = tmp_df.loc[tmp_df[ ' From ' ].isin(parent_set) ]
v2 = self . sample_path . structure . variables_frame . loc [ self . sample_path . structure . variables_frame [ ' Name ' ] . isin ( parent_set ) ]
#print(d2)
#print(v2)
s2 = st . Structure ( d2 , v2 , self . sample_path . total_variables_count )
g2 = ng . NetworkGraph ( s2 )
g2 . init_graph ( )
g2 . init_graph ( ) """
#parent_set.append(test_child)
sofc1 = None
#if not sofc1:
if not p_set :
sofc1 = self . cache . find ( test_child )
if not sofc1 :
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
d1 = d2 [ d2 . From != test_parent ]
#v1 = self.sample_path.structure.variables_frame.loc[
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
v1 = v2 [ v2 . Name != test_parent ]
#print("D1", d1)
#print("V1", v1)
s1 = st . Structure ( d1 , v1 , self . sample_path . total_variables_count )
g1 = ng . NetworkGraph ( s1 )
g1 . init_graph ( )
p1 = pe . ParametersEstimator ( self . sample_path , g1 )
p1 . init_sets_cims_container ( )
#print("Computing params for",test_child, test_parent, parent_set)
p1 . compute_parameters_for_node ( test_child )
#p1.compute_parameters()
sofc1 = p1 . sets_of_cims_struct . sets_of_cims [ s1 . get_positional_node_indx ( test_child ) ]
self . cache . put ( test_child , sofc1 )
sofc2 = None
p_set . append ( test_parent )
if p_set :
#p_set.append(test_parent)
#print("PSET ", p_set)
set_p_set = set ( p_set )
sofc2 = self . cache . find ( set_p_set )
#print("Sofc2 ", sofc2)
#print(self.cache.list_of_sets_of_indxs)
""" p2 = pe.ParametersEstimator(self.sample_path, g2)
p2 . init_sets_cims_container ( )
#p2.compute_parameters()
p2 . compute_parameters_for_node ( test_child )
sofc2 = p2 . sets_of_cims_struct . sets_of_cims [ s2 . get_positional_node_indx ( test_child ) ] """
if not sofc2 or p_set :
print ( " Cache Miss SOC2 " )
#parent_set.append(test_parent)
#d2 = tmp_df.loc[tmp_df['From'].isin(p_set)]
#v2 = self.sample_path.structure.variables_frame.loc[
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
#print("D2", d2)
#print("V2", v2)
#s2 = st.Structure(d2, v2, self.sample_path.total_variables_count)
s2 = st . Structure ( d2 , v2 , self . sample_path . total_variables_count )
g2 = ng . NetworkGraph ( s2 )
g2 . init_graph ( )
p2 = pe . ParametersEstimator ( self . sample_path , g2 )
p2 . init_sets_cims_container ( )
# p2.compute_parameters()
p2 . compute_parameters_for_node ( test_child )
#for cim in p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].actual_cims:
#print(cim)
#print(cim.state_transition_matrix)
#print("C_1", p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].transition_matrices)
indx = 0
for i , cim1 in enumerate (
p1 . sets_of_cims_struct . sets_of_cims [ s1 . get_positional_node_indx ( test_child ) ] . actual_cims ) :
#for j, cim2 in enumerate(
#p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims):
for j in range ( indx , self . sample_path . structure . get_states_number ( test_parent ) + indx ) :
#print("J", j)
#print("Pos Index", p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims)
cim2 = p2 . sets_of_cims_struct . sets_of_cims [ s2 . get_positional_node_indx ( test_child ) ] . actual_cims [ j ]
indx + = 1
sofc2 = p2 . sets_of_cims_struct . sets_of_cims [ s2 . get_positional_node_indx ( test_child ) ]
if p_set :
#set_p_set = set(p_set)
self . cache . put ( set_p_set , sofc2 )
end = 0
increment = self . sample_path . structure . get_states_number ( test_parent )
for cim1 in sofc1 . actual_cims :
start = end
end = start + increment
for j in range ( start , end ) :
#cim2 = sofc2.actual_cims[j]
#print(indx)
print ( " Run Test " , i , j )
if not self . independence_test ( test_child , cim1 , cim2 ) :
#print("Run Test", i, j)
if not self . independence_test ( test_child , cim1 , sofc2 . actual_cims [ j ] ) :
return False
return True
@ -90,7 +137,8 @@ class StructureEstimator:
r1s = cim1 . state_transition_matrix . diagonal ( )
r2s = cim2 . state_transition_matrix . diagonal ( )
F_stats = cim2 . cim . diagonal ( ) / cim1 . cim . diagonal ( )
for val in range ( 0 , self . sample_path . structure . get_states_number ( tested_child ) ) : # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
child_states_numb = self . sample_path . structure . get_states_number ( tested_child )
for val in range ( 0 , child_states_numb ) : # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
#r1 = cim1.state_transition_matrix[val][val]
#r2 = cim2.state_transition_matrix[val][val]
#print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
@ -108,7 +156,7 @@ class StructureEstimator:
M2_no_diag = self . remove_diagonal_elements ( cim2 . state_transition_matrix )
#print("M1 no diag", M1_no_diag)
#print("M2 no diag", M2_no_diag)
chi_2_quantile = chi2_dist . ppf ( 1 - self . chi_test_alfa , self . sample_path . structure . get_states_number ( tested_child ) - 1 )
chi_2_quantile = chi2_dist . ppf ( 1 - self . chi_test_alfa , child_states_numb - 1 )
"""
Ks = np . sqrt ( cim1 . state_transition_matrix . diagonal ( ) / cim2 . state_transition_matrix . diagonal ( ) )
Ls = np . reciprocal ( Ks )
@ -116,7 +164,7 @@ class StructureEstimator:
/ ( M1_no_diag + M2_no_diag ) ) , axis = 1 ) """
Ks = np . sqrt ( r1s / r2s )
Ls = np . sqrt ( r2s / r1s )
for val in range ( 0 , self . sample_path . structure . get_states_number ( tested_child ) ) :
for val in range ( 0 , child_states_numb ) :
#K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
#L = 1 / K
Chi = np . sum ( np . power ( Ks [ val ] * M2_no_diag [ val ] - Ls [ val ] * M1_no_diag [ val ] , 2 ) /
@ -154,21 +202,24 @@ class StructureEstimator:
#print("Parent Set", parents_set)
#print("Test Parent", u[parent_indx])
if self . complete_test ( u [ parent_indx ] , var_id , parents_set ) :
print ( " Removing EDGE: " , u [ parent_indx ] , var_id )
#print("Removing EDGE:", u[parent_indx], var_id )
self . complete_graph . remove_edge ( u [ parent_indx ] , var_id )
#print(self.complete_graph_frame)
self . complete_graph_frame = \
""" self.complete_graph_frame = \
self . complete_graph_frame . drop (
self . complete_graph_frame [ ( self . complete_graph_frame . From ==
u [ parent_indx ] ) & ( self . complete_graph_frame . To == var_id ) ] . index )
u [ parent_indx ] ) & ( self . complete_graph_frame . To == var_id ) ] . index ) """
self . complete_graph_frame . drop ( self . complete_graph_frame [ ( self . complete_graph_frame . From == u [ parent_indx ] ) & ( self . complete_graph_frame . To == var_id ) ] . index )
#print(self.complete_graph_frame)
u . remove ( u [ parent_indx ] )
#u.remove(u[parent_indx])
del u [ parent_indx ]
removed = True
#else:
#parent_indx += 1
if not removed :
parent_indx + = 1
b + = 1
self . cache . clear ( )
def generate_possible_sub_sets_of_size ( self , u , size , parent_id ) :
#print("Inside Generate subsets", u)
@ -187,5 +238,6 @@ class StructureEstimator:
def ctpc_algorithm ( self ) :
for node_id in self . sample_path . structure . list_of_nodes_labels ( ) :
print ( " TESTING VAR: " , node_id )
self . one_iteration_of_CTPC_algorithm ( node_id )