1
0
Fork 0

Add refactors

parallel_struct_est
philpMartin 4 years ago
parent 55ec35d084
commit ee029dd3d5
  1. 11
      main_package/classes/json_importer.py
  2. 65
      main_package/classes/network_graph.py
  3. 3
      main_package/classes/parameters_estimator.py
  4. 8
      main_package/classes/sets_of_cims_container.py
  5. 17
      main_package/classes/structure_estimator.py
  6. 1
      main_package/tests/test_structure_estimator.py

@ -102,8 +102,9 @@ class JsonImporter(AbstractImporter):
Returns: Returns:
void void
""" """
for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]): self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]]
self.df_samples_list.append(pd.DataFrame(sample)) #for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]):
#self.df_samples_list.append(pd.DataFrame(sample))
self.sorter = list(self.df_samples_list[0].columns.values)[1:] self.sorter = list(self.df_samples_list[0].columns.values)[1:]
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str, def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
@ -135,9 +136,9 @@ class JsonImporter(AbstractImporter):
:columns_list: la lista contenente le colonne convertite in numpyarray :columns_list: la lista contenente le colonne convertite in numpyarray
""" """
columns_list = [] columns_list = [data_frame[column].to_numpy() for column in data_frame]
for column in data_frame: #for column in data_frame:
columns_list.append(data_frame[column].to_numpy()) #columns_list.append(data_frame[column].to_numpy())
return columns_list return columns_list
def clear_concatenated_frame(self): def clear_concatenated_frame(self):

@ -22,10 +22,10 @@ class NetworkGraph():
self._nodes_labels = self.graph_struct.list_of_nodes_labels() self._nodes_labels = self.graph_struct.list_of_nodes_labels()
self.aggregated_info_about_nodes_parents = None self.aggregated_info_about_nodes_parents = None
self._fancy_indexing = None self._fancy_indexing = None
self._time_scalar_indexing_structure = [] self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = [] self._transition_scalar_indexing_structure = None
self._time_filtering = [] self._time_filtering = None
self._transition_filtering = [] self._transition_filtering = None
def init_graph(self): def init_graph(self):
self.add_nodes(self.graph_struct.list_of_nodes_labels()) self.add_nodes(self.graph_struct.list_of_nodes_labels())
@ -54,9 +54,11 @@ class NetworkGraph():
sorted_parents = [x for _, x in sorted(zip(self.graph_struct.list_of_nodes_labels(), parents))] sorted_parents = [x for _, x in sorted(zip(self.graph_struct.list_of_nodes_labels(), parents))]
#print(sorted_parents) #print(sorted_parents)
#print(parents) #print(parents)
p_indxes= [] #p_indxes= []
p_values = [] #p_values = []
for n in parents: p_indxes = [self.get_node_indx(node) for node in parents]
p_values = [self.get_states_number_by_indx(indx) for indx in p_indxes]
"""for n in parents:
#indx = self.graph_struct.get_node_indx(n) #indx = self.graph_struct.get_node_indx(n)
#print(indx) #print(indx)
@ -64,7 +66,7 @@ class NetworkGraph():
node_indx = self.get_node_indx(n) node_indx = self.get_node_indx(n)
p_indxes.append(node_indx) p_indxes.append(node_indx)
#p_values.append(self.graph_struct.get_states_number(n)) #p_values.append(self.graph_struct.get_states_number(n))
p_values.append(self.get_states_number_by_indx(node_indx)) p_values.append(self.get_states_number_by_indx(node_indx))"""
ordered_set = (sorted_parents, p_indxes, p_values) ordered_set = (sorted_parents, p_indxes, p_values)
#print(ordered_set) #print(ordered_set)
@ -73,9 +75,9 @@ class NetworkGraph():
def get_ord_set_of_par_of_all_nodes(self): def get_ord_set_of_par_of_all_nodes(self):
result = [] result = []
for node in self._nodes_labels: #for node in self._nodes_labels:
result.append(self.get_ordered_by_indx_set_of_parents(node)) #result.append(self.get_ordered_by_indx_set_of_parents(node))
#print(result) result = [self.get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels]
return result return result
"""def get_ordered_by_indx_parents_values(self, node): """def get_ordered_by_indx_parents_values(self, node):
@ -95,8 +97,9 @@ class NetworkGraph():
def get_states_number_of_all_nodes_sorted(self): def get_states_number_of_all_nodes_sorted(self):
states_number_list = [] states_number_list = []
for node in self._nodes_labels: #for node in self._nodes_labels:
states_number_list.append(self.get_states_number(node)) #states_number_list.append(self.get_states_number(node))
states_number_list = [self.get_states_number(node) for node in self._nodes_labels]
return states_number_list return states_number_list
def build_fancy_indexing_structure(self, start_indx): def build_fancy_indexing_structure(self, start_indx):
@ -132,13 +135,12 @@ class NetworkGraph():
def build_time_scalar_indexing_structure(self): def build_time_scalar_indexing_structure(self):
#parents_indexes_list = self._fancy_indexing #parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
#if not p_indxs:
#self._time_scalar_indexing_structure.append(np.array([self.get_states_number_by_indx(node_indx)],
#dtype=np.int))
#else:
self._time_scalar_indexing_structure.append( self._time_scalar_indexing_structure.append(
self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs)) self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs))"""
self._time_scalar_indexing_structure = [self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing)]
def build_transition_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs): def build_transition_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs):
#M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1], #M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1],
@ -151,23 +153,30 @@ class NetworkGraph():
def build_transition_scalar_indexing_structure(self): def build_transition_scalar_indexing_structure(self):
#parents_indexes_list = self._fancy_indexing #parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._transition_scalar_indexing_structure.append( self._transition_scalar_indexing_structure.append(
self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs)) self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs))"""
self._transition_scalar_indexing_structure = \
[self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs)
for node_indx, p_indxs in
zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing) ]
def build_time_columns_filtering_structure(self): def build_time_columns_filtering_structure(self):
#parents_indexes_list = self._fancy_indexing #parents_indexes_list = self._fancy_indexing
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
#if p_indxs.size == 0: self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))"""
#self._time_filtering.append(np.append(p_indxs, np.array([node_indx], dtype=np.int))) self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
#else: for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing)]
self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))
def build_transition_columns_filtering_structure(self): def build_transition_columns_filtering_structure(self):
#parents_indexes_list = self._fancy_indexing #parents_indexes_list = self._fancy_indexing
nodes_number = self.graph_struct.total_variables_number nodes_number = self.graph_struct.total_variables_number
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)) self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))"""
self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(),
self._fancy_indexing)]
def get_nodes(self): def get_nodes(self):
return list(self.graph.nodes) return list(self.graph.nodes)

@ -2,7 +2,7 @@ import os
from line_profiler import LineProfiler from line_profiler import LineProfiler
import numba as nb from numba.experimental import jitclass
import numpy as np import numpy as np
import network_graph as ng import network_graph as ng
import sample_path as sp import sample_path as sp
@ -63,7 +63,6 @@ class ParametersEstimator:
self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times, self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times,
self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices) self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices)
def compute_state_res_time_for_node(self, node_indx, times, trajectory, cols_filter, scalar_indexes_struct, T): def compute_state_res_time_for_node(self, node_indx, times, trajectory, cols_filter, scalar_indexes_struct, T):
#print(times.size) #print(times.size)
#print(trajectory) #print(trajectory)

@ -8,14 +8,16 @@ class SetsOfCimsContainer:
""" """
# list_of_vars_orders contiene tutte le liste con i parent ordinati secondo il valore indx # list_of_vars_orders contiene tutte le liste con i parent ordinati secondo il valore indx
def __init__(self, list_of_keys, states_number_per_node, list_of_parents_states_number): def __init__(self, list_of_keys, states_number_per_node, list_of_parents_states_number):
self.sets_of_cims = [] self.sets_of_cims = None
self.init_cims_structure(list_of_keys, states_number_per_node, list_of_parents_states_number) self.init_cims_structure(list_of_keys, states_number_per_node, list_of_parents_states_number)
#self.states_per_variable = states_number #self.states_per_variable = states_number
def init_cims_structure(self, keys, states_number_per_node, list_of_parents_states_number): def init_cims_structure(self, keys, states_number_per_node, list_of_parents_states_number):
for indx, key in enumerate(keys): """for indx, key in enumerate(keys):
self.sets_of_cims.append( self.sets_of_cims.append(
socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx])) socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx]))"""
self.sets_of_cims = [socim.SetOfCims(pair[1], list_of_parents_states_number[pair[0]], states_number_per_node[pair[0]])
for pair in enumerate(keys)]
def get_set_of_cims(self, node_indx): def get_set_of_cims(self, node_indx):
return self.sets_of_cims[node_indx] return self.sets_of_cims[node_indx]

@ -4,6 +4,7 @@ import itertools
import networkx as nx import networkx as nx
from scipy.stats import f as f_dist from scipy.stats import f as f_dist
from scipy.stats import chi2 as chi2_dist from scipy.stats import chi2 as chi2_dist
from numba import njit
@ -133,29 +134,17 @@ class StructureEstimator:
return True return True
def independence_test(self, tested_child, cim1, cim2): def independence_test(self, tested_child, cim1, cim2):
# Fake exp test
r1s = cim1.state_transition_matrix.diagonal() r1s = cim1.state_transition_matrix.diagonal()
r2s = cim2.state_transition_matrix.diagonal() r2s = cim2.state_transition_matrix.diagonal()
F_stats = cim2.cim.diagonal() / cim1.cim.diagonal() F_stats = cim2.cim.diagonal() / cim1.cim.diagonal()
child_states_numb = self.sample_path.structure.get_states_number(tested_child) child_states_numb = self.sample_path.structure.get_states_number(tested_child)
for val in range(0, child_states_numb): # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO for val in range(0, child_states_numb):
#r1 = cim1.state_transition_matrix[val][val]
#r2 = cim2.state_transition_matrix[val][val]
#print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
#F = cim2.cim[val][val] / cim1.cim[val][val]
#print("Exponential test", F_stats[val], r1s[val], r2s[val])
#print(f_dist.ppf(1 - self.exp_test_sign / 2, r1, r2))
#print(f_dist.ppf(self.exp_test_sign / 2, r1, r2))
if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \ if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \
F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]): F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]):
print("CONDITIONALLY DEPENDENT EXP") print("CONDITIONALLY DEPENDENT EXP")
return False return False
# fake chi test
M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix) M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix)
M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix) M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
#print("M1 no diag", M1_no_diag)
#print("M2 no diag", M2_no_diag)
chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1) chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, child_states_numb - 1)
""" """
Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal()) Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal())
@ -181,6 +170,8 @@ class StructureEstimator:
def one_iteration_of_CTPC_algorithm(self, var_id): def one_iteration_of_CTPC_algorithm(self, var_id):
u = list(self.complete_graph.predecessors(var_id)) u = list(self.complete_graph.predecessors(var_id))
#TODO aggiungere qui il filtraggio del complete_graph_frame verso il nodo di arrivo 'To' var_id e passare il frame a complete test
#TODO trovare un modo per passare direttamente anche i valori delle variabili comprese nel test del nodo var_id
tests_parents_numb = len(u) tests_parents_numb = len(u)
#print(u) #print(u)
b = 0 b = 0

@ -1,5 +1,6 @@
import unittest import unittest
from line_profiler import LineProfiler from line_profiler import LineProfiler
from multiprocessing import Pool
import sample_path as sp import sample_path as sp
import structure_estimator as se import structure_estimator as se