1
0
Fork 0

Add removable_edges_matrix to StructureEstimator

better_develop
Filippo Martini 4 years ago
parent b54b2edf7a
commit b9fc914962
  1. 59
      PyCTBN/classes/structure_estimator.py
  2. 23
      PyCTBN/tests/original_ctpc_algorithm.py
  3. 8
      PyCTBN/tests/performance_comparisons.py
  4. 14
      PyCTBN/tests/test_structure_estimator.py

@ -36,7 +36,8 @@ class StructureEstimator(object):
:_cache: the Cache object :_cache: the Cache object
""" """
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float): def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,
thumb_threshold:int = 25, known_edges: typing.List = None):
"""Constructor Method """Constructor Method
""" """
self._sample_path = sample_path self._sample_path = sample_path
@ -46,6 +47,8 @@ class StructureEstimator(object):
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
self._exp_test_sign = exp_test_alfa self._exp_test_sign = exp_test_alfa
self._chi_test_alfa = chi_test_alfa self._chi_test_alfa = chi_test_alfa
self._thumb_threshold = thumb_threshold
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
self._cache = Cache() self._cache = Cache()
@staticmethod @staticmethod
@ -62,8 +65,18 @@ class StructureEstimator(object):
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph return complete_graph
def build_removable_edges_matrix(self, known_edges: typing.List):
tot_vars_count = self._sample_path.total_variables_count
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
if known_edges:
for edge in known_edges:
i = self._sample_path.structure.get_node_indx(edge[0])
j = self._sample_path.structure.get_node_indx(edge[1])
complete_adj_matrix[i][j] = False
return complete_adj_matrix
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int) -> bool: tot_vars_count: int, parent_indx, child_indx) -> bool:
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
G2 = {G1 U test_parent} (added as an additional parent of the test_child). G2 = {G1 U test_parent} (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests. Generates all the necessary structures and datas to perform the tests.
@ -117,16 +130,21 @@ class StructureEstimator(object):
p2.fast_init(test_child) p2.fast_init(test_child)
sofc1 = p2.compute_parameters_for_node(test_child) sofc1 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc1) self._cache.put(set(p_set), sofc1)
thumb_value = 0.0
if child_states_numb > 2:
parent_val = self._sample_path.structure.get_states_number(test_parent)
bool_mask_vals = np.isin(self._nodes, parent_set)
parents_vals = self._nodes_vals[bool_mask_vals]
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
for cim2 in cond_cims: for cim2 in cond_cims:
if not self.independence_test(child_states_numb, cim1, cim2): if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
return False return False
return True return True
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
cim2: ConditionalIntensityMatrix) -> bool: cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
"""Compute the actual independence test using two cims. """Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected, It is performed first the exponential test and if the null hypothesis is not rejected,
it is performed also the chi_test. it is performed also the chi_test.
@ -146,6 +164,10 @@ class StructureEstimator(object):
r2s = M2.diagonal() r2s = M2.diagonal()
C1 = cim1.cim C1 = cim1.cim
C2 = cim2.cim C2 = cim2.cim
if child_states_numb > 2:
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
self._removable_edges_matrix[parent_indx][child_indx] = False
return False
F_stats = C2.diagonal() / C1.diagonal() F_stats = C2.diagonal() / C1.diagonal()
exp_alfa = self._exp_test_sign exp_alfa = self._exp_test_sign
for val in range(0, child_states_numb): for val in range(0, child_states_numb):
@ -165,6 +187,14 @@ class StructureEstimator(object):
return False return False
return True return True
def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
df = (child_val - 1) ** 2
df = df * parent_val
for v in parent_set_vals:
df = df * v
return df
def one_iteration_of_CTPC_algorithm(self, var_id: str) -> None: def one_iteration_of_CTPC_algorithm(self, var_id: str) -> None:
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
@ -179,14 +209,17 @@ class StructureEstimator(object):
parent_indx = 0 parent_indx = 0
while parent_indx < len(u): while parent_indx < len(u):
removed = False removed = False
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, u[parent_indx])
test_parent = u[parent_indx] test_parent = u[parent_indx]
for parents_set in S: i = self._sample_path.structure.get_node_indx(test_parent)
if self.complete_test(test_parent, var_id, parents_set, child_states_numb, tot_vars_count): j = self._sample_path.structure.get_node_indx(var_id)
self._complete_graph.remove_edge(test_parent, var_id) if self._removable_edges_matrix[i][j]:
u.remove(test_parent) S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
removed = True for parents_set in S:
break if self.complete_test(test_parent, var_id, parents_set, child_states_numb, tot_vars_count, i, j):
self._complete_graph.remove_edge(test_parent, var_id)
u.remove(test_parent)
removed = True
break
if not removed: if not removed:
parent_indx += 1 parent_indx += 1
b += 1 b += 1
@ -263,7 +296,7 @@ class StructureEstimator(object):
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
graph_to_draw.add_edges_from(spurious_edges) graph_to_draw.add_edges_from(spurious_edges)
graph_to_draw.add_edges_from(non_spurious_edges) graph_to_draw.add_edges_from(non_spurious_edges)
pos = nx.spring_layout(graph_to_draw, k=5, scale=3) pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
options = { options = {
"node_size": 2000, "node_size": 2000,
"node_color": "white", "node_color": "white",

@ -242,27 +242,12 @@ class OriginalCTPCAlgorithm(AbstractImporter):
dtype=np.int64) dtype=np.int64)
#print(diag_indices) #print(diag_indices)
T_filter = np.array([child_id, *parents_id], dtype=np.int) + 1 T_filter = np.array([child_id, *parents_id], dtype=np.int) + 1
#print("TFilter",T_filter)
#print("TVector", T_vector)
#print("Trajectories", trajectories)
#print("Actual TVect",T_vector / T_vector[0])
#print("Masked COlumns", trajectories[:, T_filter]) # Colonne non shiftate dei values
#print("Masked Multiplied COlumns",trajectories[:, T_filter] * (T_vector / T_vector[0]) )
#print("Summing",np.sum(trajectories[:, T_filter] * (T_vector / T_vector[0]), axis=1))
#print("Deltas",trajectories[:, int(trajectories.shape[1] / 2)]) # i delta times
assert np.sum(trajectories[:, T_filter] * (T_vector / T_vector[0]), axis=1).size == \ assert np.sum(trajectories[:, T_filter] * (T_vector / T_vector[0]), axis=1).size == \
trajectories[:, int(trajectories.shape[1] / 2)].size trajectories[:, int(trajectories.shape[1] / 2)].size
#print(T_vector[-1]) #print(T_vector[-1])
T[:] = np.bincount(np.sum(trajectories[:, T_filter] * T_vector / T_vector[0], axis=1).astype(np.int), \ T[:] = np.bincount(np.sum(trajectories[:, T_filter] * T_vector / T_vector[0], axis=1).astype(np.int), \
trajectories[:, int(trajectories.shape[1] / 2)], minlength=T_vector[-1]).reshape(-1, trajectories[:, int(trajectories.shape[1] / 2)], minlength=T_vector[-1]).reshape(-1,
T.shape[1]) T.shape[1])
#print("Shape", T.shape[1])
#print(np.bincount(np.sum(trajectories[:, T_filter] * T_vector / T_vector[0], axis=1).astype(np.int), \
#trajectories[:, int(trajectories.shape[1] / 2)], minlength=T_vector[-1]))
###### Transitions #######
#print("Shifted Node column", trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int))
#print("Step 2", trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int) >= 0)
trj_tmp = trajectories[trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int) >= 0] trj_tmp = trajectories[trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int) >= 0]
#print("Trj Temp", trj_tmp) #print("Trj Temp", trj_tmp)
@ -270,13 +255,6 @@ class OriginalCTPCAlgorithm(AbstractImporter):
M_filter = np.array([child_id, child_id, *parents_id], dtype=np.int) + 1 M_filter = np.array([child_id, child_id, *parents_id], dtype=np.int) + 1
#print("MFilter", M_filter) #print("MFilter", M_filter)
M_filter[0] += int(trj_tmp.shape[1] / 2) M_filter[0] += int(trj_tmp.shape[1] / 2)
#print("MFilter", M_filter)
#print("MVector", M_vector)
#print("Division", M_vector / M_vector[0])
#print("Masked Traj temp", (trj_tmp[:, M_filter]))
#print("Masked Multiplied Traj temp", trj_tmp[:, M_filter] * M_vector / M_vector[0])
#print("Summing", np.sum(trj_tmp[:, M_filter] * M_vector / M_vector[0], axis=1))
#print(M.shape[2])
M[:] = np.bincount(np.sum(trj_tmp[:, M_filter] * M_vector / M_vector[0], axis=1).astype(np.int), \ M[:] = np.bincount(np.sum(trj_tmp[:, M_filter] * M_vector / M_vector[0], axis=1).astype(np.int), \
minlength=M_vector[-1]).reshape(-1, M.shape[1], M.shape[2]) minlength=M_vector[-1]).reshape(-1, M.shape[1], M.shape[2])
@ -390,6 +368,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
#print("C2:", CIM_from) #print("C2:", CIM_from)
if self.variables.loc[to_var, "Value"] > 2: if self.variables.loc[to_var, "Value"] > 2:
df = (self.variables.loc[to_var, "Value"] - 1) ** 2 df = (self.variables.loc[to_var, "Value"] - 1) ** 2
df = df * (self.variables.loc[from_var, "Value"]) df = df * (self.variables.loc[from_var, "Value"])

@ -5,7 +5,7 @@ import numpy as np
import pandas as pd import pandas as pd
import networkx as nx import networkx as nx
import timeit import timeit
import psutil #import psutil
from ..classes.sample_path import SamplePath from ..classes.sample_path import SamplePath
from ..classes.structure_estimator import StructureEstimator from ..classes.structure_estimator import StructureEstimator
@ -66,9 +66,9 @@ class PerformanceComparisons(unittest.TestCase):
self.aux_build_importer(0) self.aux_build_importer(0)
se1 = StructureEstimator(self.s1, 0.1, 0.1) se1 = StructureEstimator(self.s1, 0.1, 0.1)
se1.ctpc_algorithm() se1.ctpc_algorithm()
current_process = psutil.Process(os.getpid()) #current_process = psutil.Process(os.getpid())
mem = current_process.memory_info().rss #mem = current_process.memory_info().rss
self.memory_usages.append((mem / 10 ** 6)) #self.memory_usages.append((mem / 10 ** 6))
self.save_memory_usage_data(self.memory_usages) self.save_memory_usage_data(self.memory_usages)
def aux_build_importer(self, indx): def aux_build_importer(self, indx):

@ -50,6 +50,17 @@ class TestStructureEstimator(unittest.TestCase):
no_self_loops.remove(node) no_self_loops.remove(node)
for n2 in no_self_loops: for n2 in no_self_loops:
self.assertIn((node, n2), cg.edges) self.assertIn((node, n2), cg.edges)
#se1.save_plot_estimated_structure_graph()
def test_build_removable_edges_matrix(self):
exp_alfa = 0.1
chi_alfa = 0.1
known_edges = self.s1.structure.edges[0:2]
se1 = StructureEstimator(self.s1, exp_alfa, chi_alfa, known_edges)
for edge in known_edges:
i = self.s1.structure.get_node_indx(edge[0])
j = self.s1.structure.get_node_indx(edge[1])
self.assertFalse(se1._removable_edges_matrix[i][j])
def test_generate_possible_sub_sets_of_size(self): def test_generate_possible_sub_sets_of_size(self):
exp_alfa = 0.1 exp_alfa = 0.1
@ -67,7 +78,8 @@ class TestStructureEstimator(unittest.TestCase):
self.assertFalse(node in sset) self.assertFalse(node in sset)
def test_time(self): def test_time(self):
se1 = StructureEstimator(self.s1, 0.1, 0.1) known_edges = [self.s1.structure.edges[0]]
se1 = StructureEstimator(self.s1, 0.1, 0.1, 25, known_edges)
exec_time = timeit.timeit(se1.ctpc_algorithm, number=1) exec_time = timeit.timeit(se1.ctpc_algorithm, number=1)
print("Execution Time: ", exec_time) print("Execution Time: ", exec_time)
for ed in self.s1.structure.edges: for ed in self.s1.structure.edges: