|
|
|
@ -2,12 +2,14 @@ |
|
|
|
|
import numpy as np |
|
|
|
|
import itertools |
|
|
|
|
import networkx as nx |
|
|
|
|
import typing |
|
|
|
|
from scipy.stats import f as f_dist |
|
|
|
|
from scipy.stats import chi2 as chi2_dist |
|
|
|
|
|
|
|
|
|
import sample_path as sp |
|
|
|
|
import structure as st |
|
|
|
|
import network_graph as ng |
|
|
|
|
import conditional_intensity_matrix as condim |
|
|
|
|
import parameters_estimator as pe |
|
|
|
|
import cache as ch |
|
|
|
|
|
|
|
|
@ -37,13 +39,37 @@ class StructureEstimator: |
|
|
|
|
self.chi_test_alfa = chi_test_alfa |
|
|
|
|
self.cache = ch.Cache() |
|
|
|
|
|
|
|
|
|
def build_complete_graph(self, node_ids): |
|
|
|
|
def build_complete_graph(self, node_ids: typing.List): |
|
|
|
|
""" |
|
|
|
|
Builds a complete directed graph (no self loops) given the nodes labels in the list node_ids: |
|
|
|
|
|
|
|
|
|
Parameters: |
|
|
|
|
node_ids: the list of nodes labels |
|
|
|
|
Returns: |
|
|
|
|
a complete Digraph Object |
|
|
|
|
""" |
|
|
|
|
complete_graph = nx.DiGraph() |
|
|
|
|
complete_graph.add_nodes_from(node_ids) |
|
|
|
|
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) |
|
|
|
|
return complete_graph |
|
|
|
|
|
|
|
|
|
def complete_test(self, test_parent, test_child, parent_set, child_states_numb, tot_vars_count): |
|
|
|
|
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, |
|
|
|
|
tot_vars_count: int): |
|
|
|
|
""" |
|
|
|
|
Permorms a complete independence test on the directed graphs G1 = test_child U parent_set |
|
|
|
|
G2 = G1 U test_parent (added as an additional parent of the test_child). |
|
|
|
|
Generates all the necessary structures and datas to perform the tests. |
|
|
|
|
|
|
|
|
|
Parameters: |
|
|
|
|
test_parent: the node label of the test parent |
|
|
|
|
test_child: the node label of the child |
|
|
|
|
parent_set: the common parent set |
|
|
|
|
child_states_numb: the cardinality of the test_child |
|
|
|
|
tot_vars_count_ the total number of variables in the net |
|
|
|
|
Returns: |
|
|
|
|
True iff test_child and test_parent are independent given the sep_set parent_set |
|
|
|
|
False otherwise |
|
|
|
|
""" |
|
|
|
|
#print("Test Parent:", test_parent) |
|
|
|
|
#print("Sep Set", parent_set) |
|
|
|
|
p_set = parent_set[:] |
|
|
|
@ -58,11 +84,11 @@ class StructureEstimator: |
|
|
|
|
#print("SORTED PARENTS", sorted_parents) |
|
|
|
|
cims_filter = sorted_parents != test_parent |
|
|
|
|
#print("PARENTS NO FROM MASK", cims_filter) |
|
|
|
|
if not p_set: |
|
|
|
|
#if not p_set: |
|
|
|
|
#print("EMPTY PSET TRYING TO FIND", test_child) |
|
|
|
|
sofc1 = self.cache.find(test_child) |
|
|
|
|
else: |
|
|
|
|
sofc1 = self.cache.find(set(p_set)) |
|
|
|
|
#sofc1 = self.cache.find(test_child) |
|
|
|
|
#else: |
|
|
|
|
sofc1 = self.cache.find(set(p_set)) |
|
|
|
|
|
|
|
|
|
if not sofc1: |
|
|
|
|
#print("CACHE MISSS SOFC1") |
|
|
|
@ -76,18 +102,14 @@ class StructureEstimator: |
|
|
|
|
eds1 = list(itertools.product(parent_set,test_child)) |
|
|
|
|
s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count) |
|
|
|
|
g1 = ng.NetworkGraph(s1) |
|
|
|
|
#g1.init_graph() |
|
|
|
|
g1.fast_init(test_child) |
|
|
|
|
p1 = pe.ParametersEstimator(self.sample_path, g1) |
|
|
|
|
#p1.init_sets_cims_container() |
|
|
|
|
p1.fast_init(test_child) |
|
|
|
|
sofc1 = p1.compute_parameters_for_node(test_child) |
|
|
|
|
|
|
|
|
|
#sofc1 = p1.sets_of_cims_struct.sets_of_cims[g1.get_positional_node_indx(test_child)] |
|
|
|
|
if not p_set: |
|
|
|
|
self.cache.put(test_child, sofc1) |
|
|
|
|
else: |
|
|
|
|
self.cache.put(set(p_set), sofc1) |
|
|
|
|
#if not p_set: |
|
|
|
|
#self.cache.put(test_child, sofc1) |
|
|
|
|
#else: |
|
|
|
|
self.cache.put(set(p_set), sofc1) |
|
|
|
|
sofc2 = None |
|
|
|
|
#p_set.append(test_parent) |
|
|
|
|
p_set.insert(0, test_parent) |
|
|
|
@ -100,12 +122,6 @@ class StructureEstimator: |
|
|
|
|
#if sofc2: |
|
|
|
|
#print("Sofc2 in CACHE ", sofc2.actual_cims) |
|
|
|
|
#print(self.cache.list_of_sets_of_indxs) |
|
|
|
|
|
|
|
|
|
"""p2 = pe.ParametersEstimator(self.sample_path, g2) |
|
|
|
|
p2.init_sets_cims_container() |
|
|
|
|
#p2.compute_parameters() |
|
|
|
|
p2.compute_parameters_for_node(test_child) |
|
|
|
|
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]""" |
|
|
|
|
if not sofc2: |
|
|
|
|
#print("Cache MISSS SOFC2") |
|
|
|
|
complete_info.append(test_parent) |
|
|
|
@ -119,15 +135,10 @@ class StructureEstimator: |
|
|
|
|
eds2 = list(itertools.product(p_set, test_child)) |
|
|
|
|
s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count) |
|
|
|
|
g2 = ng.NetworkGraph(s2) |
|
|
|
|
#g2.init_graph() |
|
|
|
|
g2.fast_init(test_child) |
|
|
|
|
p2 = pe.ParametersEstimator(self.sample_path, g2) |
|
|
|
|
#p2.init_sets_cims_container() |
|
|
|
|
p2.fast_init(test_child) |
|
|
|
|
sofc2 = p2.compute_parameters_for_node(test_child) |
|
|
|
|
#sofc2 = p2.sets_of_cims_struct.sets_of_cims[g2.get_positional_node_indx(test_child)] |
|
|
|
|
#if p_set: |
|
|
|
|
#set_p_set = set(p_set) |
|
|
|
|
self.cache.put(set(p_set), sofc2) |
|
|
|
|
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): |
|
|
|
|
#print("GETTING THIS P COMB", p_comb) |
|
|
|
@ -144,15 +155,28 @@ class StructureEstimator: |
|
|
|
|
return False |
|
|
|
|
return True |
|
|
|
|
|
|
|
|
|
def independence_test(self, child_states_numb, cim1, cim2): |
|
|
|
|
def independence_test(self, child_states_numb: int, cim1: condim.ConditionalIntensityMatrix, |
|
|
|
|
cim2: condim.ConditionalIntensityMatrix): |
|
|
|
|
""" |
|
|
|
|
Compute the actual independence test using two cims. |
|
|
|
|
It is performed first the exponential test and if the null hypothesis is not rejected, |
|
|
|
|
it is permormed also the chi_test. |
|
|
|
|
|
|
|
|
|
Parameters: |
|
|
|
|
child_states_numb: the cardinality of the test child |
|
|
|
|
cim1: a cim belonging to the graph without test parent |
|
|
|
|
cim2: a cim belonging to the graph with test parent |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
True iff both tests do NOT reject the null hypothesis of indipendence |
|
|
|
|
False otherwise |
|
|
|
|
""" |
|
|
|
|
M1 = cim1.state_transition_matrix |
|
|
|
|
M2 = cim2.state_transition_matrix |
|
|
|
|
r1s = M1.diagonal() |
|
|
|
|
r2s = M2.diagonal() |
|
|
|
|
C1 = cim1.cim |
|
|
|
|
C2 = cim2.cim |
|
|
|
|
#print("C1", C1) |
|
|
|
|
#print("C2", C2) |
|
|
|
|
F_stats = C2.diagonal() / C1.diagonal() |
|
|
|
|
exp_alfa = self.exp_test_sign |
|
|
|
|
for val in range(0, child_states_numb): |
|
|
|
@ -188,7 +212,16 @@ class StructureEstimator: |
|
|
|
|
#print("Chi test", Chi) |
|
|
|
|
return True |
|
|
|
|
|
|
|
|
|
def one_iteration_of_CTPC_algorithm(self, var_id, tot_vars_count): |
|
|
|
|
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int): |
|
|
|
|
""" |
|
|
|
|
Performs an iteration of the CTPC algorithm using the node var_id as test_child. |
|
|
|
|
|
|
|
|
|
Parameters: |
|
|
|
|
var_id: the node label of the test child |
|
|
|
|
tot_vars_count: the number of nodes in the net |
|
|
|
|
Returns: |
|
|
|
|
void |
|
|
|
|
""" |
|
|
|
|
print("##################TESTING VAR################", var_id) |
|
|
|
|
u = list(self.complete_graph.predecessors(var_id)) |
|
|
|
|
#tests_parents_numb = len(u) |
|
|
|
@ -227,19 +260,39 @@ class StructureEstimator: |
|
|
|
|
b += 1 |
|
|
|
|
self.cache.clear() |
|
|
|
|
|
|
|
|
|
def generate_possible_sub_sets_of_size(self, u, size, parent_indx): |
|
|
|
|
def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str): |
|
|
|
|
""" |
|
|
|
|
Creates a list containing all possible subsets of the list u of size size, |
|
|
|
|
that do not contains a the node identified by parent_label. |
|
|
|
|
|
|
|
|
|
Parameters: |
|
|
|
|
u: the list of nodes |
|
|
|
|
size: the size of the subsets |
|
|
|
|
parent_label: the nodes to exclude in the subsets generation |
|
|
|
|
Returns: |
|
|
|
|
a Map Object containing a list of lists |
|
|
|
|
|
|
|
|
|
""" |
|
|
|
|
list_without_test_parent = u[:] |
|
|
|
|
list_without_test_parent.remove(parent_indx) |
|
|
|
|
list_without_test_parent.remove(parent_label) |
|
|
|
|
return map(list, itertools.combinations(list_without_test_parent, size)) |
|
|
|
|
|
|
|
|
|
def ctpc_algorithm(self): |
|
|
|
|
""" |
|
|
|
|
Compute the CTPC algorithm. |
|
|
|
|
Parameters: |
|
|
|
|
void |
|
|
|
|
Returns: |
|
|
|
|
void |
|
|
|
|
""" |
|
|
|
|
ctpc_algo = self.one_iteration_of_CTPC_algorithm |
|
|
|
|
total_vars_numb = self.sample_path.total_variables_count |
|
|
|
|
[ctpc_algo(n, total_vars_numb) for n in self.nodes] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_diagonal_elements(self, matrix): |
|
|
|
|
m = matrix.shape[0] |
|
|
|
|
strided = np.lib.stride_tricks.as_strided |
|
|
|
|
s0, s1 = matrix.strides |
|
|
|
|
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) |
|
|
|
|
|
|
|
|
|
def ctpc_algorithm(self): |
|
|
|
|
ctpc_algo = self.one_iteration_of_CTPC_algorithm |
|
|
|
|
total_vars_numb = self.sample_path.total_variables_count |
|
|
|
|
[ctpc_algo(n, total_vars_numb) for n in self.nodes] |
|
|
|
|
|
|
|
|
|