From 4b9ff23e0bcca13db2d72b245779a3d87c6b1404 Mon Sep 17 00:00:00 2001 From: philpMartin Date: Sun, 26 Jul 2020 21:39:01 +0200 Subject: [PATCH] Add parameters estimationper variable --- main_package/classes/json_importer.py | 28 +++--- main_package/classes/parameters_estimator.py | 81 +++++------------- main_package/classes/structure_estimator.py | 85 ++++++++++++------- .../tests/test_parameters_estimator.py | 7 ++ .../tests/test_structure_estimator.py | 3 +- 5 files changed, 98 insertions(+), 106 deletions(-) diff --git a/main_package/classes/json_importer.py b/main_package/classes/json_importer.py index 3595a2b..f774492 100644 --- a/main_package/classes/json_importer.py +++ b/main_package/classes/json_importer.py @@ -2,6 +2,7 @@ import os import glob import pandas as pd import json +import typing from abstract_importer import AbstractImporter from line_profiler import LineProfiler @@ -22,7 +23,8 @@ class JsonImporter(AbstractImporter): """ - def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key): + def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, + variables_key: str): self.samples_label = samples_label self.structure_label = structure_label self.variables_label = variables_label @@ -43,19 +45,19 @@ class JsonImporter(AbstractImporter): self.import_structure(raw_data) self.import_variables(raw_data, self.sorter) - def import_trajectories(self, raw_data): + def import_trajectories(self, raw_data: pd.DataFrame): self.normalize_trajectories(raw_data, 0, self.samples_label) - def import_structure(self, raw_data): + def import_structure(self, raw_data: pd.DataFrame): self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label) - def import_variables(self, raw_data, sorter): + def import_variables(self, raw_data: pd.DataFrame, sorter: typing.List): self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label) self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category") self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter) self._df_variables = self._df_variables.sort_values([self.variables_key]) - def read_json_file(self): + def read_json_file(self) -> typing.List: """ Legge il primo file .json nel path self.filepath @@ -75,7 +77,7 @@ class JsonImporter(AbstractImporter): except ValueError as err: print(err.args) - def one_level_normalizing(self, raw_data, indx, key): + def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame: """ Estrae i dati innestati di un livello, presenti nel dataset raw_data, presenti nel json array all'indice indx nel json object key @@ -90,7 +92,7 @@ class JsonImporter(AbstractImporter): """ return pd.DataFrame(raw_data[indx][key]) - def normalize_trajectories(self, raw_data, indx, trajectories_key): + def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str): """ Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key. Aggiunge le traj estratte nella lista di dataframe self.df_samples_list @@ -104,7 +106,9 @@ class JsonImporter(AbstractImporter): self.df_samples_list.append(pd.DataFrame(sample)) self.sorter = list(self.df_samples_list[0].columns.values)[1:] - def compute_row_delta_sigle_samples_frame(self, sample_frame, time_header_label, columns_header, shifted_cols_header): + def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str, + columns_header: typing.List, shifted_cols_header: typing.List) \ + -> pd.DataFrame: sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1) shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') #print(shifted_cols) @@ -113,8 +117,8 @@ class JsonImporter(AbstractImporter): sample_frame.drop(sample_frame.tail(1).index, inplace=True) return sample_frame - def compute_row_delta_in_all_samples_frames(self, time_header_label): - columns_header = list(self.df_samples_list[0].columns.values) + def compute_row_delta_in_all_samples_frames(self, time_header_label: str): + #columns_header = list(self.df_samples_list[0].columns.values) #self.sorter = columns_header[1:] shifted_cols_header = [s + "S" for s in self.sorter] for indx, sample in enumerate(self.df_samples_list): @@ -122,7 +126,7 @@ class JsonImporter(AbstractImporter): time_header_label, self.sorter, shifted_cols_header) self._concatenated_samples = pd.concat(self.df_samples_list) - def build_list_of_samples_array(self, data_frame): + def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List: """ Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array Parameters: @@ -150,7 +154,7 @@ class JsonImporter(AbstractImporter): for indx in range(len(self.df_samples_list)): # Le singole traj non servono più self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] - def import_sampled_cims(self, raw_data, indx, cims_key): + def import_sampled_cims(self, raw_data: pd.DataFrame, indx: int, cims_key: str) -> typing.Dict: cims_for_all_vars = {} for var in raw_data[indx][cims_key]: sampled_cims_list = [] diff --git a/main_package/classes/parameters_estimator.py b/main_package/classes/parameters_estimator.py index ab998aa..2a07689 100644 --- a/main_package/classes/parameters_estimator.py +++ b/main_package/classes/parameters_estimator.py @@ -43,6 +43,26 @@ class ParametersEstimator: aggr[1].transition_matrices) aggr[1].build_cims(aggr[1].state_residence_times, aggr[1].transition_matrices) + def compute_parameters_for_node(self, node_id): + pos_index = self.net_graph.graph_struct.get_positional_node_indx(node_id) + #print("Nodes", self.net_graph.get_nodes()) + #print(pos_index) + #print(self.net_graph.time_filtering) + self.compute_state_res_time_for_node(self.net_graph.get_node_indx(node_id), self.sample_path.trajectories.times, + self.sample_path.trajectories.trajectory, + self.net_graph.time_filtering[pos_index], + self.net_graph.time_scalar_indexing_strucure[pos_index], + self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times) + # print(self.net_graph.transition_filtering[indx]) + # print(self.net_graph.transition_scalar_indexing_structure[indx]) + self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(node_id), + self.sample_path.trajectories.complete_trajectory, + self.net_graph.transition_filtering[pos_index], + self.net_graph.transition_scalar_indexing_structure[pos_index], + self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices) + self.sets_of_cims_struct.sets_of_cims[pos_index].build_cims( + self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times, + self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices) def compute_state_res_time_for_node(self, node_indx, times, trajectory, cols_filter, scalar_indexes_struct, T): @@ -96,64 +116,3 @@ class ParametersEstimator: -# Simple Test # -"""os.getcwd() -os.chdir('..') -path = os.getcwd() + '/data' - -s1 = sp.SamplePath(path) -s1.build_trajectories() -s1.build_structure() - -g1 = ng.NetworkGraph(s1.structure) -g1.init_graph() - -pe = ParametersEstimator(s1, g1) -pe.init_amalgamated_cims_struct() -lp = LineProfiler() - -[[2999.2966 2749.2298 3301.5975] - [3797.1737 3187.8345 2939.2009] - [3432.224 3062.5402 4530.9028]] - -[[ 827.6058 838.1515 686.1365] - [1426.384 2225.2093 1999.8528] - [ 745.3068 733.8129 746.2347] - [ 520.8113 690.9502 853.4022] - [1590.8609 1853.0021 1554.1874] - [ 637.5576 643.8822 654.9506] - [ 718.7632 742.2117 998.5844] - [1811.984 1598.0304 2547.988 ] - [ 770.8503 598.9588 984.3304]] - -lp_wrapper = lp(pe.compute_state_residence_time_for_all_nodes) -lp_wrapper() -lp.print_stats() - -#pe.compute_state_residence_time_for_all_nodes() -print(pe.amalgamated_cims_struct.sets_of_cims[0].state_residence_times) - -[[[14472, 3552, 10920], - [12230, 25307, 13077], - [ 9707, 14408, 24115]], - - [[22918, 6426, 16492], - [10608, 16072, 5464], - [10746, 11213, 21959]], - - [[23305, 6816, 16489], - [ 3792, 19190, 15398], - [13718, 18243, 31961]]]) - - Raveled [14472 3552 10920 12230 25307 13077 9707 14408 24115 22918 6426 16492 - 10608 16072 5464 10746 11213 21959 23305 6816 16489 3792 19190 15398 - 13718 18243 31961] - -lp_wrapper = lp(pe.compute_parameters) -lp_wrapper() -#for variable in pe.amalgamated_cims_struct.sets_of_cims: - #for cond in variable.get_cims(): - #print(cond.cim) -print(pe.amalgamated_cims_struct.get_cims_of_node(1,[2])) -lp.print_stats()""" - diff --git a/main_package/classes/structure_estimator.py b/main_package/classes/structure_estimator.py index 59555b1..c7dd6ad 100644 --- a/main_package/classes/structure_estimator.py +++ b/main_package/classes/structure_estimator.py @@ -55,11 +55,14 @@ class StructureEstimator: p1 = pe.ParametersEstimator(self.sample_path, g1) p1.init_sets_cims_container() - p1.compute_parameters() + #print("Computing params for",test_child, test_parent, parent_set) + p1.compute_parameters_for_node(test_child) + #p1.compute_parameters() p2 = pe.ParametersEstimator(self.sample_path, g2) p2.init_sets_cims_container() - p2.compute_parameters() + #p2.compute_parameters() + p2.compute_parameters_for_node(test_child) #for cim in p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].actual_cims: #print(cim) @@ -72,12 +75,11 @@ class StructureEstimator: #for j, cim2 in enumerate( #p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims): for j in range(indx, self.sample_path.structure.get_states_number(test_parent) + indx): - print("J", j) + #print("J", j) + #print("Pos Index", p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims) cim2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims[j] indx += 1 - print(indx) - - + #print(indx) print("Run Test", i, j) if not self.independence_test(test_child, cim1, cim2): return False @@ -85,33 +87,45 @@ class StructureEstimator: def independence_test(self, tested_child, cim1, cim2): # Fake exp test + r1s = cim1.state_transition_matrix.diagonal() + r2s = cim2.state_transition_matrix.diagonal() + F_stats = cim2.cim.diagonal() / cim1.cim.diagonal() for val in range(0, self.sample_path.structure.get_states_number(tested_child)): # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO - r1 = cim1.state_transition_matrix[val][val] - r2 = cim2.state_transition_matrix[val][val] - print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val]) - F = cim2.cim[val][val] / cim1.cim[val][val] + #r1 = cim1.state_transition_matrix[val][val] + #r2 = cim2.state_transition_matrix[val][val] + #print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val]) + #F = cim2.cim[val][val] / cim1.cim[val][val] - print("Exponential test", F, r1, r2) + #print("Exponential test", F_stats[val], r1s[val], r2s[val]) #print(f_dist.ppf(1 - self.exp_test_sign / 2, r1, r2)) #print(f_dist.ppf(self.exp_test_sign / 2, r1, r2)) - if F < f_dist.ppf(self.exp_test_sign / 2, r1, r2) or \ - F > f_dist.ppf(1 - self.exp_test_sign / 2, r1, r2): + if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \ + F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]): print("CONDITIONALLY DEPENDENT EXP") return False # fake chi test M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix) M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix) - print("M1 no diag", M1_no_diag) - print("M2 no diag", M2_no_diag) + #print("M1 no diag", M1_no_diag) + #print("M2 no diag", M2_no_diag) chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, self.sample_path.structure.get_states_number(tested_child) - 1) + """ + Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal()) + Ls = np.reciprocal(Ks) + chi_stats = np.sum((np.power((M2_no_diag.T * Ks).T - (M1_no_diag.T * Ls).T, 2) \ + / (M1_no_diag + M2_no_diag)), axis=1)""" + Ks = np.sqrt(r1s / r2s) + Ls = np.sqrt(r2s / r1s) for val in range(0, self.sample_path.structure.get_states_number(tested_child)): - K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val]) - L = 1 / K - Chi = np.sum(np.power(K * M2_no_diag[val] - L *M1_no_diag[val], 2) / + #K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val]) + #L = 1 / K + Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / (M1_no_diag[val] + M2_no_diag[val])) - print("Chi Stats", Chi) - print("Chi Quantile", chi_2_quantile) + + #print("Chi Stats", Chi) + #print("Chi Quantile", chi_2_quantile) if Chi > chi_2_quantile: + #if np.any(chi_stats > chi_2_quantile): print("CONDITIONALLY DEPENDENT CHI") return False #print("Chi test", Chi) @@ -122,29 +136,32 @@ class StructureEstimator: tests_parents_numb = len(u) #print(u) b = 0 - parent_indx = 0 + #parent_indx = 0 while b < len(u): #for parent_id in u: parent_indx = 0 while u and parent_indx < tests_parents_numb and b < len(u): # list_without_test_parent = u.remove(parent_id) removed = False - print("b", b) - print("Parent Indx", parent_indx) + #print("b", b) + #print("Parent Indx", parent_indx) #if not list(self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])): #break S = self.generate_possible_sub_sets_of_size(u, b, u[parent_indx]) - print("U Set", u) - print("S", S) + #print("U Set", u) + #print("S", S) for parents_set in S: - print("Parent Set", parents_set) - print("Test Parent", u[parent_indx]) + #print("Parent Set", parents_set) + #print("Test Parent", u[parent_indx]) if self.complete_test(u[parent_indx], var_id, parents_set): print("Removing EDGE:", u[parent_indx], var_id) self.complete_graph.remove_edge(u[parent_indx], var_id) - #self.complete_graph_frame = \ - #self.complete_graph_frame[(self.complete_graph_frame.From != - # u[parent_indx]) & (self.complete_graph_frame.To != var_id)] + #print(self.complete_graph_frame) + self.complete_graph_frame = \ + self.complete_graph_frame.drop( + self.complete_graph_frame[(self.complete_graph_frame.From == + u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index) + #print(self.complete_graph_frame) u.remove(u[parent_indx]) removed = True #else: @@ -154,8 +171,8 @@ class StructureEstimator: b += 1 def generate_possible_sub_sets_of_size(self, u, size, parent_id): - print("Inside Generate subsets", u) - print("InsideGenerate Subsets", parent_id) + #print("Inside Generate subsets", u) + #print("InsideGenerate Subsets", parent_id) list_without_test_parent = u[:] list_without_test_parent.remove(parent_id) # u.remove(parent_id) @@ -168,3 +185,7 @@ class StructureEstimator: s0, s1 = matrix.strides return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) + def ctpc_algorithm(self): + for node_id in self.sample_path.structure.list_of_nodes_labels(): + self.one_iteration_of_CTPC_algorithm(node_id) + diff --git a/main_package/tests/test_parameters_estimator.py b/main_package/tests/test_parameters_estimator.py index 5170295..2e484e3 100644 --- a/main_package/tests/test_parameters_estimator.py +++ b/main_package/tests/test_parameters_estimator.py @@ -55,6 +55,13 @@ class TestParametersEstimatior(unittest.TestCase): for r1, r2 in zip(cim1, cim2): self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True)) + def test_compute_parameters_for_node(self):#TODO Questo non è un test + pe1 = pe.ParametersEstimator(self.s1, self.g1) + pe1.init_sets_cims_container() + pe1.compute_parameters_for_node('Y') + print(pe1.sets_of_cims_struct.get_set_of_cims(1).actual_cims) + + def aux_import_sampled_cims(self, cims_label): i1 = ji.JsonImporter('../data', '', '', '', '', '') raw_data = i1.read_json_file() diff --git a/main_package/tests/test_structure_estimator.py b/main_package/tests/test_structure_estimator.py index 571d5a5..53c3119 100644 --- a/main_package/tests/test_structure_estimator.py +++ b/main_package/tests/test_structure_estimator.py @@ -19,8 +19,9 @@ class TestStructureEstimator(unittest.TestCase): def test_one_iteration(self): se1 = se.StructureEstimator(self.s1, 0.1, 0.1) - se1.one_iteration_of_CTPC_algorithm('X') + #se1.one_iteration_of_CTPC_algorithm('X') #self.aux_test_complete_test(se1, 'X', 'Y', ['Z']) + se1.ctpc_algorithm() print(se1.complete_graph.edges) def aux_test_complete_test(self, estimator, test_par, test_child, p_set):