1
0
Fork 0

Add removable_edges_matrix to StructureEstimator

better_develop
Filippo Martini 4 years ago
parent b54b2edf7a
commit b9fc914962
  1. 49
      PyCTBN/classes/structure_estimator.py
  2. 23
      PyCTBN/tests/original_ctpc_algorithm.py
  3. 8
      PyCTBN/tests/performance_comparisons.py
  4. 14
      PyCTBN/tests/test_structure_estimator.py

@ -36,7 +36,8 @@ class StructureEstimator(object):
:_cache: the Cache object
"""
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float):
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,
thumb_threshold:int = 25, known_edges: typing.List = None):
"""Constructor Method
"""
self._sample_path = sample_path
@ -46,6 +47,8 @@ class StructureEstimator(object):
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
self._exp_test_sign = exp_test_alfa
self._chi_test_alfa = chi_test_alfa
self._thumb_threshold = thumb_threshold
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
self._cache = Cache()
@staticmethod
@ -62,8 +65,18 @@ class StructureEstimator(object):
complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph
def build_removable_edges_matrix(self, known_edges: typing.List):
tot_vars_count = self._sample_path.total_variables_count
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
if known_edges:
for edge in known_edges:
i = self._sample_path.structure.get_node_indx(edge[0])
j = self._sample_path.structure.get_node_indx(edge[1])
complete_adj_matrix[i][j] = False
return complete_adj_matrix
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int) -> bool:
tot_vars_count: int, parent_indx, child_indx) -> bool:
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
G2 = {G1 U test_parent} (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests.
@ -117,16 +130,21 @@ class StructureEstimator(object):
p2.fast_init(test_child)
sofc1 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc1)
thumb_value = 0.0
if child_states_numb > 2:
parent_val = self._sample_path.structure.get_states_number(test_parent)
bool_mask_vals = np.isin(self._nodes, parent_set)
parents_vals = self._nodes_vals[bool_mask_vals]
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
for cim2 in cond_cims:
if not self.independence_test(child_states_numb, cim1, cim2):
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
return False
return True
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
cim2: ConditionalIntensityMatrix) -> bool:
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
"""Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected,
it is performed also the chi_test.
@ -146,6 +164,10 @@ class StructureEstimator(object):
r2s = M2.diagonal()
C1 = cim1.cim
C2 = cim2.cim
if child_states_numb > 2:
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
self._removable_edges_matrix[parent_indx][child_indx] = False
return False
F_stats = C2.diagonal() / C1.diagonal()
exp_alfa = self._exp_test_sign
for val in range(0, child_states_numb):
@ -165,6 +187,14 @@ class StructureEstimator(object):
return False
return True
def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
df = (child_val - 1) ** 2
df = df * parent_val
for v in parent_set_vals:
df = df * v
return df
def one_iteration_of_CTPC_algorithm(self, var_id: str) -> None:
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
@ -179,10 +209,13 @@ class StructureEstimator(object):
parent_indx = 0
while parent_indx < len(u):
removed = False
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, u[parent_indx])
test_parent = u[parent_indx]
i = self._sample_path.structure.get_node_indx(test_parent)
j = self._sample_path.structure.get_node_indx(var_id)
if self._removable_edges_matrix[i][j]:
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
for parents_set in S:
if self.complete_test(test_parent, var_id, parents_set, child_states_numb, tot_vars_count):
if self.complete_test(test_parent, var_id, parents_set, child_states_numb, tot_vars_count, i, j):
self._complete_graph.remove_edge(test_parent, var_id)
u.remove(test_parent)
removed = True
@ -263,7 +296,7 @@ class StructureEstimator(object):
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
graph_to_draw.add_edges_from(spurious_edges)
graph_to_draw.add_edges_from(non_spurious_edges)
pos = nx.spring_layout(graph_to_draw, k=5, scale=3)
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
options = {
"node_size": 2000,
"node_color": "white",

@ -242,27 +242,12 @@ class OriginalCTPCAlgorithm(AbstractImporter):
dtype=np.int64)
#print(diag_indices)
T_filter = np.array([child_id, *parents_id], dtype=np.int) + 1
#print("TFilter",T_filter)
#print("TVector", T_vector)
#print("Trajectories", trajectories)
#print("Actual TVect",T_vector / T_vector[0])
#print("Masked COlumns", trajectories[:, T_filter]) # Colonne non shiftate dei values
#print("Masked Multiplied COlumns",trajectories[:, T_filter] * (T_vector / T_vector[0]) )
#print("Summing",np.sum(trajectories[:, T_filter] * (T_vector / T_vector[0]), axis=1))
#print("Deltas",trajectories[:, int(trajectories.shape[1] / 2)]) # i delta times
assert np.sum(trajectories[:, T_filter] * (T_vector / T_vector[0]), axis=1).size == \
trajectories[:, int(trajectories.shape[1] / 2)].size
#print(T_vector[-1])
T[:] = np.bincount(np.sum(trajectories[:, T_filter] * T_vector / T_vector[0], axis=1).astype(np.int), \
trajectories[:, int(trajectories.shape[1] / 2)], minlength=T_vector[-1]).reshape(-1,
T.shape[1])
#print("Shape", T.shape[1])
#print(np.bincount(np.sum(trajectories[:, T_filter] * T_vector / T_vector[0], axis=1).astype(np.int), \
#trajectories[:, int(trajectories.shape[1] / 2)], minlength=T_vector[-1]))
###### Transitions #######
#print("Shifted Node column", trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int))
#print("Step 2", trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int) >= 0)
trj_tmp = trajectories[trajectories[:, int(trajectories.shape[1] / 2) + 1 + child_id].astype(np.int) >= 0]
#print("Trj Temp", trj_tmp)
@ -270,13 +255,6 @@ class OriginalCTPCAlgorithm(AbstractImporter):
M_filter = np.array([child_id, child_id, *parents_id], dtype=np.int) + 1
#print("MFilter", M_filter)
M_filter[0] += int(trj_tmp.shape[1] / 2)
#print("MFilter", M_filter)
#print("MVector", M_vector)
#print("Division", M_vector / M_vector[0])
#print("Masked Traj temp", (trj_tmp[:, M_filter]))
#print("Masked Multiplied Traj temp", trj_tmp[:, M_filter] * M_vector / M_vector[0])
#print("Summing", np.sum(trj_tmp[:, M_filter] * M_vector / M_vector[0], axis=1))
#print(M.shape[2])
M[:] = np.bincount(np.sum(trj_tmp[:, M_filter] * M_vector / M_vector[0], axis=1).astype(np.int), \
minlength=M_vector[-1]).reshape(-1, M.shape[1], M.shape[2])
@ -390,6 +368,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
#print("C2:", CIM_from)
if self.variables.loc[to_var, "Value"] > 2:
df = (self.variables.loc[to_var, "Value"] - 1) ** 2
df = df * (self.variables.loc[from_var, "Value"])

@ -5,7 +5,7 @@ import numpy as np
import pandas as pd
import networkx as nx
import timeit
import psutil
#import psutil
from ..classes.sample_path import SamplePath
from ..classes.structure_estimator import StructureEstimator
@ -66,9 +66,9 @@ class PerformanceComparisons(unittest.TestCase):
self.aux_build_importer(0)
se1 = StructureEstimator(self.s1, 0.1, 0.1)
se1.ctpc_algorithm()
current_process = psutil.Process(os.getpid())
mem = current_process.memory_info().rss
self.memory_usages.append((mem / 10 ** 6))
#current_process = psutil.Process(os.getpid())
#mem = current_process.memory_info().rss
#self.memory_usages.append((mem / 10 ** 6))
self.save_memory_usage_data(self.memory_usages)
def aux_build_importer(self, indx):

@ -50,6 +50,17 @@ class TestStructureEstimator(unittest.TestCase):
no_self_loops.remove(node)
for n2 in no_self_loops:
self.assertIn((node, n2), cg.edges)
#se1.save_plot_estimated_structure_graph()
def test_build_removable_edges_matrix(self):
exp_alfa = 0.1
chi_alfa = 0.1
known_edges = self.s1.structure.edges[0:2]
se1 = StructureEstimator(self.s1, exp_alfa, chi_alfa, known_edges)
for edge in known_edges:
i = self.s1.structure.get_node_indx(edge[0])
j = self.s1.structure.get_node_indx(edge[1])
self.assertFalse(se1._removable_edges_matrix[i][j])
def test_generate_possible_sub_sets_of_size(self):
exp_alfa = 0.1
@ -67,7 +78,8 @@ class TestStructureEstimator(unittest.TestCase):
self.assertFalse(node in sset)
def test_time(self):
se1 = StructureEstimator(self.s1, 0.1, 0.1)
known_edges = [self.s1.structure.edges[0]]
se1 = StructureEstimator(self.s1, 0.1, 0.1, 25, known_edges)
exec_time = timeit.timeit(se1.ctpc_algorithm, number=1)
print("Execution Time: ", exec_time)
for ed in self.s1.structure.edges: