1
0
Fork 0

Add parameters estimationper variable

parallel_struct_est
philpMartin 4 years ago
parent a852c465d3
commit 4b9ff23e0b
  1. 28
      main_package/classes/json_importer.py
  2. 81
      main_package/classes/parameters_estimator.py
  3. 85
      main_package/classes/structure_estimator.py
  4. 7
      main_package/tests/test_parameters_estimator.py
  5. 3
      main_package/tests/test_structure_estimator.py

@ -2,6 +2,7 @@ import os
import glob
import pandas as pd
import json
import typing
from abstract_importer import AbstractImporter
from line_profiler import LineProfiler
@ -22,7 +23,8 @@ class JsonImporter(AbstractImporter):
"""
def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key):
def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str):
self.samples_label = samples_label
self.structure_label = structure_label
self.variables_label = variables_label
@ -43,19 +45,19 @@ class JsonImporter(AbstractImporter):
self.import_structure(raw_data)
self.import_variables(raw_data, self.sorter)
def import_trajectories(self, raw_data):
def import_trajectories(self, raw_data: pd.DataFrame):
self.normalize_trajectories(raw_data, 0, self.samples_label)
def import_structure(self, raw_data):
def import_structure(self, raw_data: pd.DataFrame):
self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
def import_variables(self, raw_data, sorter):
def import_variables(self, raw_data: pd.DataFrame, sorter: typing.List):
self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category")
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
self._df_variables = self._df_variables.sort_values([self.variables_key])
def read_json_file(self):
def read_json_file(self) -> typing.List:
"""
Legge il primo file .json nel path self.filepath
@ -75,7 +77,7 @@ class JsonImporter(AbstractImporter):
except ValueError as err:
print(err.args)
def one_level_normalizing(self, raw_data, indx, key):
def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame:
"""
Estrae i dati innestati di un livello, presenti nel dataset raw_data,
presenti nel json array all'indice indx nel json object key
@ -90,7 +92,7 @@ class JsonImporter(AbstractImporter):
"""
return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data, indx, trajectories_key):
def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str):
"""
Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key.
Aggiunge le traj estratte nella lista di dataframe self.df_samples_list
@ -104,7 +106,9 @@ class JsonImporter(AbstractImporter):
self.df_samples_list.append(pd.DataFrame(sample))
self.sorter = list(self.df_samples_list[0].columns.values)[1:]
def compute_row_delta_sigle_samples_frame(self, sample_frame, time_header_label, columns_header, shifted_cols_header):
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame:
sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
#print(shifted_cols)
@ -113,8 +117,8 @@ class JsonImporter(AbstractImporter):
sample_frame.drop(sample_frame.tail(1).index, inplace=True)
return sample_frame
def compute_row_delta_in_all_samples_frames(self, time_header_label):
columns_header = list(self.df_samples_list[0].columns.values)
def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
#columns_header = list(self.df_samples_list[0].columns.values)
#self.sorter = columns_header[1:]
shifted_cols_header = [s + "S" for s in self.sorter]
for indx, sample in enumerate(self.df_samples_list):
@ -122,7 +126,7 @@ class JsonImporter(AbstractImporter):
time_header_label, self.sorter, shifted_cols_header)
self._concatenated_samples = pd.concat(self.df_samples_list)
def build_list_of_samples_array(self, data_frame):
def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
"""
Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array
Parameters:
@ -150,7 +154,7 @@ class JsonImporter(AbstractImporter):
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più
self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
def import_sampled_cims(self, raw_data, indx, cims_key):
def import_sampled_cims(self, raw_data: pd.DataFrame, indx: int, cims_key: str) -> typing.Dict:
cims_for_all_vars = {}
for var in raw_data[indx][cims_key]:
sampled_cims_list = []

@ -43,6 +43,26 @@ class ParametersEstimator:
aggr[1].transition_matrices)
aggr[1].build_cims(aggr[1].state_residence_times, aggr[1].transition_matrices)
def compute_parameters_for_node(self, node_id):
pos_index = self.net_graph.graph_struct.get_positional_node_indx(node_id)
#print("Nodes", self.net_graph.get_nodes())
#print(pos_index)
#print(self.net_graph.time_filtering)
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(node_id), self.sample_path.trajectories.times,
self.sample_path.trajectories.trajectory,
self.net_graph.time_filtering[pos_index],
self.net_graph.time_scalar_indexing_strucure[pos_index],
self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times)
# print(self.net_graph.transition_filtering[indx])
# print(self.net_graph.transition_scalar_indexing_structure[indx])
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(node_id),
self.sample_path.trajectories.complete_trajectory,
self.net_graph.transition_filtering[pos_index],
self.net_graph.transition_scalar_indexing_structure[pos_index],
self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices)
self.sets_of_cims_struct.sets_of_cims[pos_index].build_cims(
self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times,
self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices)
def compute_state_res_time_for_node(self, node_indx, times, trajectory, cols_filter, scalar_indexes_struct, T):
@ -96,64 +116,3 @@ class ParametersEstimator:
# Simple Test #
"""os.getcwd()
os.chdir('..')
path = os.getcwd() + '/data'
s1 = sp.SamplePath(path)
s1.build_trajectories()
s1.build_structure()
g1 = ng.NetworkGraph(s1.structure)
g1.init_graph()
pe = ParametersEstimator(s1, g1)
pe.init_amalgamated_cims_struct()
lp = LineProfiler()
[[2999.2966 2749.2298 3301.5975]
[3797.1737 3187.8345 2939.2009]
[3432.224 3062.5402 4530.9028]]
[[ 827.6058 838.1515 686.1365]
[1426.384 2225.2093 1999.8528]
[ 745.3068 733.8129 746.2347]
[ 520.8113 690.9502 853.4022]
[1590.8609 1853.0021 1554.1874]
[ 637.5576 643.8822 654.9506]
[ 718.7632 742.2117 998.5844]
[1811.984 1598.0304 2547.988 ]
[ 770.8503 598.9588 984.3304]]
lp_wrapper = lp(pe.compute_state_residence_time_for_all_nodes)
lp_wrapper()
lp.print_stats()
#pe.compute_state_residence_time_for_all_nodes()
print(pe.amalgamated_cims_struct.sets_of_cims[0].state_residence_times)
[[[14472, 3552, 10920],
[12230, 25307, 13077],
[ 9707, 14408, 24115]],
[[22918, 6426, 16492],
[10608, 16072, 5464],
[10746, 11213, 21959]],
[[23305, 6816, 16489],
[ 3792, 19190, 15398],
[13718, 18243, 31961]]])
Raveled [14472 3552 10920 12230 25307 13077 9707 14408 24115 22918 6426 16492
10608 16072 5464 10746 11213 21959 23305 6816 16489 3792 19190 15398
13718 18243 31961]
lp_wrapper = lp(pe.compute_parameters)
lp_wrapper()
#for variable in pe.amalgamated_cims_struct.sets_of_cims:
#for cond in variable.get_cims():
#print(cond.cim)
print(pe.amalgamated_cims_struct.get_cims_of_node(1,[2]))
lp.print_stats()"""

@ -55,11 +55,14 @@ class StructureEstimator:
p1 = pe.ParametersEstimator(self.sample_path, g1)
p1.init_sets_cims_container()
p1.compute_parameters()
#print("Computing params for",test_child, test_parent, parent_set)
p1.compute_parameters_for_node(test_child)
#p1.compute_parameters()
p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container()
p2.compute_parameters()
#p2.compute_parameters()
p2.compute_parameters_for_node(test_child)
#for cim in p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].actual_cims:
#print(cim)
@ -72,12 +75,11 @@ class StructureEstimator:
#for j, cim2 in enumerate(
#p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims):
for j in range(indx, self.sample_path.structure.get_states_number(test_parent) + indx):
print("J", j)
#print("J", j)
#print("Pos Index", p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims)
cim2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims[j]
indx += 1
print(indx)
#print(indx)
print("Run Test", i, j)
if not self.independence_test(test_child, cim1, cim2):
return False
@ -85,33 +87,45 @@ class StructureEstimator:
def independence_test(self, tested_child, cim1, cim2):
# Fake exp test
r1s = cim1.state_transition_matrix.diagonal()
r2s = cim2.state_transition_matrix.diagonal()
F_stats = cim2.cim.diagonal() / cim1.cim.diagonal()
for val in range(0, self.sample_path.structure.get_states_number(tested_child)): # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
r1 = cim1.state_transition_matrix[val][val]
r2 = cim2.state_transition_matrix[val][val]
print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
F = cim2.cim[val][val] / cim1.cim[val][val]
#r1 = cim1.state_transition_matrix[val][val]
#r2 = cim2.state_transition_matrix[val][val]
#print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
#F = cim2.cim[val][val] / cim1.cim[val][val]
print("Exponential test", F, r1, r2)
#print("Exponential test", F_stats[val], r1s[val], r2s[val])
#print(f_dist.ppf(1 - self.exp_test_sign / 2, r1, r2))
#print(f_dist.ppf(self.exp_test_sign / 2, r1, r2))
if F < f_dist.ppf(self.exp_test_sign / 2, r1, r2) or \
F > f_dist.ppf(1 - self.exp_test_sign / 2, r1, r2):
if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \
F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]):
print("CONDITIONALLY DEPENDENT EXP")
return False
# fake chi test
M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix)
M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
print("M1 no diag", M1_no_diag)
print("M2 no diag", M2_no_diag)
#print("M1 no diag", M1_no_diag)
#print("M2 no diag", M2_no_diag)
chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, self.sample_path.structure.get_states_number(tested_child) - 1)
"""
Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal())
Ls = np.reciprocal(Ks)
chi_stats = np.sum((np.power((M2_no_diag.T * Ks).T - (M1_no_diag.T * Ls).T, 2) \
/ (M1_no_diag + M2_no_diag)), axis=1)"""
Ks = np.sqrt(r1s / r2s)
Ls = np.sqrt(r2s / r1s)
for val in range(0, self.sample_path.structure.get_states_number(tested_child)):
K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
L = 1 / K
Chi = np.sum(np.power(K * M2_no_diag[val] - L *M1_no_diag[val], 2) /
#K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
#L = 1 / K
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
(M1_no_diag[val] + M2_no_diag[val]))
print("Chi Stats", Chi)
print("Chi Quantile", chi_2_quantile)
#print("Chi Stats", Chi)
#print("Chi Quantile", chi_2_quantile)
if Chi > chi_2_quantile:
#if np.any(chi_stats > chi_2_quantile):
print("CONDITIONALLY DEPENDENT CHI")
return False
#print("Chi test", Chi)
@ -122,29 +136,32 @@ class StructureEstimator:
tests_parents_numb = len(u)
#print(u)
b = 0
parent_indx = 0
#parent_indx = 0
while b < len(u):
#for parent_id in u:
parent_indx = 0
while u and parent_indx < tests_parents_numb and b < len(u):
# list_without_test_parent = u.remove(parent_id)
removed = False
print("b", b)
print("Parent Indx", parent_indx)
#print("b", b)
#print("Parent Indx", parent_indx)
#if not list(self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])):
#break
S = self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])
print("U Set", u)
print("S", S)
#print("U Set", u)
#print("S", S)
for parents_set in S:
print("Parent Set", parents_set)
print("Test Parent", u[parent_indx])
#print("Parent Set", parents_set)
#print("Test Parent", u[parent_indx])
if self.complete_test(u[parent_indx], var_id, parents_set):
print("Removing EDGE:", u[parent_indx], var_id)
self.complete_graph.remove_edge(u[parent_indx], var_id)
#self.complete_graph_frame = \
#self.complete_graph_frame[(self.complete_graph_frame.From !=
# u[parent_indx]) & (self.complete_graph_frame.To != var_id)]
#print(self.complete_graph_frame)
self.complete_graph_frame = \
self.complete_graph_frame.drop(
self.complete_graph_frame[(self.complete_graph_frame.From ==
u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)
#print(self.complete_graph_frame)
u.remove(u[parent_indx])
removed = True
#else:
@ -154,8 +171,8 @@ class StructureEstimator:
b += 1
def generate_possible_sub_sets_of_size(self, u, size, parent_id):
print("Inside Generate subsets", u)
print("InsideGenerate Subsets", parent_id)
#print("Inside Generate subsets", u)
#print("InsideGenerate Subsets", parent_id)
list_without_test_parent = u[:]
list_without_test_parent.remove(parent_id)
# u.remove(parent_id)
@ -168,3 +185,7 @@ class StructureEstimator:
s0, s1 = matrix.strides
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
def ctpc_algorithm(self):
for node_id in self.sample_path.structure.list_of_nodes_labels():
self.one_iteration_of_CTPC_algorithm(node_id)

@ -55,6 +55,13 @@ class TestParametersEstimatior(unittest.TestCase):
for r1, r2 in zip(cim1, cim2):
self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
def test_compute_parameters_for_node(self):#TODO Questo non è un test
pe1 = pe.ParametersEstimator(self.s1, self.g1)
pe1.init_sets_cims_container()
pe1.compute_parameters_for_node('Y')
print(pe1.sets_of_cims_struct.get_set_of_cims(1).actual_cims)
def aux_import_sampled_cims(self, cims_label):
i1 = ji.JsonImporter('../data', '', '', '', '', '')
raw_data = i1.read_json_file()

@ -19,8 +19,9 @@ class TestStructureEstimator(unittest.TestCase):
def test_one_iteration(self):
se1 = se.StructureEstimator(self.s1, 0.1, 0.1)
se1.one_iteration_of_CTPC_algorithm('X')
#se1.one_iteration_of_CTPC_algorithm('X')
#self.aux_test_complete_test(se1, 'X', 'Y', ['Z'])
se1.ctpc_algorithm()
print(se1.complete_graph.edges)
def aux_test_complete_test(self, estimator, test_par, test_child, p_set):