1
0
Fork 0

Add Struct estim tests and DOCSTRING

parallel_struct_est
philpMartin 4 years ago
parent 5c6aa186db
commit 7edfb6c962
  1. 3
      main_package/classes/cache.py
  2. 125
      main_package/classes/structure_estimator.py
  3. 59
      main_package/tests/test_structure_estimator.py

@ -14,10 +14,9 @@ class Cache:
self.list_of_sets_of_parents = []
self.actual_cache = []
def find(self, parents_comb: typing.Union[typing.Set, str]):
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
"""
Tries to find in cache given the symbolic parents combination parents_comb the SetOfCims related to that parents_comb.
N.B. if the parents_comb is not a Set, than the index refers to the SetOfCims of the actual node with no parents.
Parameters:
parents_comb: the parents related to that SetOfCims
Returns:

@ -2,12 +2,14 @@
import numpy as np
import itertools
import networkx as nx
import typing
from scipy.stats import f as f_dist
from scipy.stats import chi2 as chi2_dist
import sample_path as sp
import structure as st
import network_graph as ng
import conditional_intensity_matrix as condim
import parameters_estimator as pe
import cache as ch
@ -37,13 +39,37 @@ class StructureEstimator:
self.chi_test_alfa = chi_test_alfa
self.cache = ch.Cache()
def build_complete_graph(self, node_ids):
def build_complete_graph(self, node_ids: typing.List):
"""
Builds a complete directed graph (no self loops) given the nodes labels in the list node_ids:
Parameters:
node_ids: the list of nodes labels
Returns:
a complete Digraph Object
"""
complete_graph = nx.DiGraph()
complete_graph.add_nodes_from(node_ids)
complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph
def complete_test(self, test_parent, test_child, parent_set, child_states_numb, tot_vars_count):
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int):
"""
Permorms a complete independence test on the directed graphs G1 = test_child U parent_set
G2 = G1 U test_parent (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests.
Parameters:
test_parent: the node label of the test parent
test_child: the node label of the child
parent_set: the common parent set
child_states_numb: the cardinality of the test_child
tot_vars_count_ the total number of variables in the net
Returns:
True iff test_child and test_parent are independent given the sep_set parent_set
False otherwise
"""
#print("Test Parent:", test_parent)
#print("Sep Set", parent_set)
p_set = parent_set[:]
@ -58,11 +84,11 @@ class StructureEstimator:
#print("SORTED PARENTS", sorted_parents)
cims_filter = sorted_parents != test_parent
#print("PARENTS NO FROM MASK", cims_filter)
if not p_set:
#if not p_set:
#print("EMPTY PSET TRYING TO FIND", test_child)
sofc1 = self.cache.find(test_child)
else:
sofc1 = self.cache.find(set(p_set))
#sofc1 = self.cache.find(test_child)
#else:
sofc1 = self.cache.find(set(p_set))
if not sofc1:
#print("CACHE MISSS SOFC1")
@ -76,18 +102,14 @@ class StructureEstimator:
eds1 = list(itertools.product(parent_set,test_child))
s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count)
g1 = ng.NetworkGraph(s1)
#g1.init_graph()
g1.fast_init(test_child)
p1 = pe.ParametersEstimator(self.sample_path, g1)
#p1.init_sets_cims_container()
p1.fast_init(test_child)
sofc1 = p1.compute_parameters_for_node(test_child)
#sofc1 = p1.sets_of_cims_struct.sets_of_cims[g1.get_positional_node_indx(test_child)]
if not p_set:
self.cache.put(test_child, sofc1)
else:
self.cache.put(set(p_set), sofc1)
#if not p_set:
#self.cache.put(test_child, sofc1)
#else:
self.cache.put(set(p_set), sofc1)
sofc2 = None
#p_set.append(test_parent)
p_set.insert(0, test_parent)
@ -100,12 +122,6 @@ class StructureEstimator:
#if sofc2:
#print("Sofc2 in CACHE ", sofc2.actual_cims)
#print(self.cache.list_of_sets_of_indxs)
"""p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container()
#p2.compute_parameters()
p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
if not sofc2:
#print("Cache MISSS SOFC2")
complete_info.append(test_parent)
@ -119,15 +135,10 @@ class StructureEstimator:
eds2 = list(itertools.product(p_set, test_child))
s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count)
g2 = ng.NetworkGraph(s2)
#g2.init_graph()
g2.fast_init(test_child)
p2 = pe.ParametersEstimator(self.sample_path, g2)
#p2.init_sets_cims_container()
p2.fast_init(test_child)
sofc2 = p2.compute_parameters_for_node(test_child)
#sofc2 = p2.sets_of_cims_struct.sets_of_cims[g2.get_positional_node_indx(test_child)]
#if p_set:
#set_p_set = set(p_set)
self.cache.put(set(p_set), sofc2)
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
#print("GETTING THIS P COMB", p_comb)
@ -144,15 +155,28 @@ class StructureEstimator:
return False
return True
def independence_test(self, child_states_numb, cim1, cim2):
def independence_test(self, child_states_numb: int, cim1: condim.ConditionalIntensityMatrix,
cim2: condim.ConditionalIntensityMatrix):
"""
Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected,
it is permormed also the chi_test.
Parameters:
child_states_numb: the cardinality of the test child
cim1: a cim belonging to the graph without test parent
cim2: a cim belonging to the graph with test parent
Returns:
True iff both tests do NOT reject the null hypothesis of indipendence
False otherwise
"""
M1 = cim1.state_transition_matrix
M2 = cim2.state_transition_matrix
r1s = M1.diagonal()
r2s = M2.diagonal()
C1 = cim1.cim
C2 = cim2.cim
#print("C1", C1)
#print("C2", C2)
F_stats = C2.diagonal() / C1.diagonal()
exp_alfa = self.exp_test_sign
for val in range(0, child_states_numb):
@ -188,7 +212,16 @@ class StructureEstimator:
#print("Chi test", Chi)
return True
def one_iteration_of_CTPC_algorithm(self, var_id, tot_vars_count):
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int):
"""
Performs an iteration of the CTPC algorithm using the node var_id as test_child.
Parameters:
var_id: the node label of the test child
tot_vars_count: the number of nodes in the net
Returns:
void
"""
print("##################TESTING VAR################", var_id)
u = list(self.complete_graph.predecessors(var_id))
#tests_parents_numb = len(u)
@ -227,19 +260,39 @@ class StructureEstimator:
b += 1
self.cache.clear()
def generate_possible_sub_sets_of_size(self, u, size, parent_indx):
def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str):
"""
Creates a list containing all possible subsets of the list u of size size,
that do not contains a the node identified by parent_label.
Parameters:
u: the list of nodes
size: the size of the subsets
parent_label: the nodes to exclude in the subsets generation
Returns:
a Map Object containing a list of lists
"""
list_without_test_parent = u[:]
list_without_test_parent.remove(parent_indx)
list_without_test_parent.remove(parent_label)
return map(list, itertools.combinations(list_without_test_parent, size))
def ctpc_algorithm(self):
"""
Compute the CTPC algorithm.
Parameters:
void
Returns:
void
"""
ctpc_algo = self.one_iteration_of_CTPC_algorithm
total_vars_numb = self.sample_path.total_variables_count
[ctpc_algo(n, total_vars_numb) for n in self.nodes]
def remove_diagonal_elements(self, matrix):
m = matrix.shape[0]
strided = np.lib.stride_tricks.as_strided
s0, s1 = matrix.strides
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
def ctpc_algorithm(self):
ctpc_algo = self.one_iteration_of_CTPC_algorithm
total_vars_numb = self.sample_path.total_variables_count
[ctpc_algo(n, total_vars_numb) for n in self.nodes]

@ -1,8 +1,12 @@
import unittest
import numpy as np
import networkx as nx
import math
from line_profiler import LineProfiler
import sample_path as sp
import structure_estimator as se
import cache as ch
class TestStructureEstimator(unittest.TestCase):
@ -14,15 +18,48 @@ class TestStructureEstimator(unittest.TestCase):
cls.s1.build_structure()
def test_init(self):
se1 = se.StructureEstimator(self.s1)
exp_alfa = 0.1
chi_alfa = 0.1
se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa)
self.assertEqual(self.s1, se1.sample_path)
self.assertEqual(se1.complete_graph_frame.shape[0],
self.s1.total_variables_count *(self.s1.total_variables_count - 1))
self.assertTrue(np.array_equal(se1.nodes, np.array(self.s1.structure.nodes_labels)))
self.assertTrue(np.array_equal(se1.nodes_indxs, self.s1.structure.nodes_indexes))
self.assertTrue(np.array_equal(se1.nodes_vals, self.s1.structure.nodes_values))
self.assertEqual(se1.exp_test_sign, exp_alfa)
self.assertEqual(se1.chi_test_alfa, chi_alfa)
self.assertIsInstance(se1.complete_graph, nx.DiGraph)
self.assertIsInstance(se1.cache, ch.Cache)
def test_build_complete_graph(self):
exp_alfa = 0.1
chi_alfa = 0.1
nodes_numb = len(self.s1.structure.nodes_labels)
se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa)
cg = se1.build_complete_graph(self.s1.structure.nodes_labels)
self.assertEqual(len(cg.edges), nodes_numb*(nodes_numb - 1))
for node in self.s1.structure.nodes_labels:
no_self_loops = self.s1.structure.nodes_labels[:]
no_self_loops.remove(node)
for n2 in no_self_loops:
self.assertIn((node, n2), cg.edges)
def test_generate_possible_sub_sets_of_size(self):
exp_alfa = 0.1
chi_alfa = 0.1
nodes_numb = len(self.s1.structure.nodes_labels)
se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa)
for node in self.s1.structure.nodes_labels:
for b in range(nodes_numb):
sets = se1.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node)
sets2 = se1.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node)
self.assertEqual(len(list(sets)), math.floor(math.factorial(nodes_numb - 1) /
(math.factorial(b)*math.factorial(nodes_numb -1 - b))))
for sset in sets2:
self.assertFalse(node in sset)
def test_one_iteration(self):
se1 = se.StructureEstimator(self.s1, 0.1, 0.1)
#se1.one_iteration_of_CTPC_algorithm('X')
#self.aux_test_complete_test(se1, 'X', 'Y', ['Z'])
lp = LineProfiler()
lp.add_function(se1.complete_test)
lp.add_function(se1.one_iteration_of_CTPC_algorithm)
@ -30,12 +67,16 @@ class TestStructureEstimator(unittest.TestCase):
lp_wrapper = lp(se1.ctpc_algorithm)
lp_wrapper()
lp.print_stats()
#se1.ctpc_algorithm()
print(se1.complete_graph.edges)
print(self.s1.structure.edges)
def aux_test_complete_test(self, estimator, test_par, test_child, p_set):
estimator.complete_test(test_par, test_child, p_set)
for ed in self.s1.structure.edges:
self.assertIn(tuple(ed), se1.complete_graph.edges)
tuples_edges = [tuple(rec) for rec in self.s1.structure.edges]
spurious_edges = []
for ed in se1.complete_graph.edges:
if not(ed in tuples_edges):
spurious_edges.append(ed)
print("Spurious Edges:",spurious_edges)
if __name__ == '__main__':