From b48fd3e67f7080d76611bc89a2f962d41e10d18d Mon Sep 17 00:00:00 2001 From: philpMartin Date: Sat, 4 Jul 2020 15:09:06 +0200 Subject: [PATCH 1/2] Add Merged Trajectories in a single frame --- main_package/classes/amalgamated_cims.py | 8 ++-- main_package/classes/json_importer.py | 28 ++++++++++++- main_package/classes/parameters_estimator.py | 44 ++++++++++++-------- 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/main_package/classes/amalgamated_cims.py b/main_package/classes/amalgamated_cims.py index 65138e5..26bcf03 100644 --- a/main_package/classes/amalgamated_cims.py +++ b/main_package/classes/amalgamated_cims.py @@ -9,7 +9,7 @@ class AmalgamatedCims: """ # list_of_vars_orders contiene tutte le liste con i parent ordinati secondo il valore indx def __init__(self, states_number_per_node, list_of_keys, list_of_parents_states_number): - self.sets_of_cims = {} + self.sets_of_cims = [] self.init_cims_structure(list_of_keys, states_number_per_node, list_of_parents_states_number) #self.states_per_variable = states_number @@ -17,10 +17,10 @@ class AmalgamatedCims: print(keys) print(list_of_parents_states_number) for indx, key in enumerate(keys): - self.sets_of_cims[key] = socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx]) + self.sets_of_cims.append(socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx])) - def get_set_of_cims(self, node_id): - return self.sets_of_cims[node_id] + def get_set_of_cims(self, node_indx): + return self.sets_of_cims[node_indx] def get_vars_order(self, node): return self.actual_cims[node][1] diff --git a/main_package/classes/json_importer.py b/main_package/classes/json_importer.py index 4a73d13..cbe1631 100644 --- a/main_package/classes/json_importer.py +++ b/main_package/classes/json_importer.py @@ -23,6 +23,7 @@ class JsonImporter(AbstractImporter): def __init__(self, files_path): self.df_samples_list = [] + self.concatenated_samples = None self.df_structure = pd.DataFrame() self.df_variables = pd.DataFrame() super(JsonImporter, self).__init__(files_path) @@ -89,6 +90,26 @@ class JsonImporter(AbstractImporter): for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]): self.df_samples_list.append(pd.json_normalize(raw_data[indx][trajectories_key][sample_indx])) + def compute_row_delta_sigle_samples_frame(self, sample_frame): + columns_header = list(sample_frame.columns.values) + #print(columns_header) + for col_name in columns_header: + if col_name == 'Time': + sample_frame[col_name + 'Delta'] = sample_frame[col_name].diff() + else: + sample_frame[col_name + 'Delta'] = (sample_frame[col_name].diff().bfill() != 0).astype(int) + #sample_frame['Delta'] = sample_frame['Time'].diff() + #print(sample_frame) + + def compute_row_delta_in_all_samples_frames(self): + for sample in self.df_samples_list: + self.compute_row_delta_sigle_samples_frame(sample) + self.concatenated_samples = pd.concat(self.df_samples_list) + self.concatenated_samples['Time'] = self.concatenated_samples['TimeDelta'] + del self.concatenated_samples['TimeDelta'] + self.concatenated_samples['Time'] = self.concatenated_samples['Time'].fillna(0) + + def build_list_of_samples_array(self, data_frame): """ Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array @@ -115,9 +136,12 @@ class JsonImporter(AbstractImporter): self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] -"""ij = JsonImporter("../data") +ij = JsonImporter("../data") ij.import_data() #print(ij.df_samples_list[7]) print(ij.df_structure) print(ij.df_variables) -print((ij.build_list_of_samples_array(0)[1].size))""" +#print((ij.build_list_of_samples_array(0)[1].size)) +#ij.compute_row_delta_sigle_samples_frame(ij.df_samples_list[0]) +ij.compute_row_delta_in_all_samples_frames() +print(ij.concatenated_samples.to_numpy()) diff --git a/main_package/classes/parameters_estimator.py b/main_package/classes/parameters_estimator.py index 4ac0563..370b5f2 100644 --- a/main_package/classes/parameters_estimator.py +++ b/main_package/classes/parameters_estimator.py @@ -1,5 +1,6 @@ import os import time as tm +from line_profiler import LineProfiler import network_graph as ng import sample_path as sp @@ -20,22 +21,22 @@ class ParametersEstimator: self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes()) def parameters_estimation(self): - print("Starting computing") - t0 = tm.time() + #print("Starting computing") + #t0 = tm.time() for indx, trajectory in enumerate(self.sample_path.trajectories): self.parameters_estimation_single_trajectory(trajectory.get_trajectory()) #print("Finished Trajectory number", indx) - t1 = tm.time() - t0 - print("Elapsed Time ", t1) + #t1 = tm.time() - t0 + #print("Elapsed Time ", t1) def parameters_estimation_single_trajectory(self, trajectory): - #print(type(trajectory[0][0])) + t0 = tm.time() for indx, row in enumerate(trajectory): if trajectory[indx][1] == -1: break if trajectory[indx + 1][1] != -1: transition = self.find_transition(trajectory[indx], trajectory[indx + 1]) - which_node = self.net_graph.get_node_by_index(transition[0]) + which_node = transition[0] # print(which_node) which_matrix = self.which_matrix_to_update(row, transition[0]) which_element = transition[1] @@ -43,17 +44,22 @@ class ParametersEstimator: #changed_node = which_node time = self.compute_time_delta(trajectory[indx], trajectory[indx + 1]) + which_element = transition[1][0] + self.amalgamated_cims_struct.update_state_residence_time_for_matrix(which_node, which_matrix, which_element, + time) - for node_indx, node in enumerate(self.net_graph.get_nodes()): - #if node != changed_node: + for node_indx in range(0, 3): + if node_indx != transition[0]: # print(node) - which_node = node + which_node = node_indx which_matrix = self.which_matrix_to_update(row, node_indx) which_element = row[node_indx + 1] # print("State res time element " + str(which_element) + node) # print("State res time matrix indx" + str(which_matrix)) - self.amalgamated_cims_struct.update_state_residence_time_for_matrix(which_node, which_matrix, which_element, time) - + self.amalgamated_cims_struct.update_state_residence_time_for_matrix(which_node, which_matrix, + which_element, time) + t1 = tm.time() - t0 + print("Elapsed Time ", t1) def find_transition(self, current_row, next_row): for indx in range(1, len(current_row)): @@ -85,14 +91,18 @@ g1.init_graph() pe = ParametersEstimator(s1, g1) pe.init_amalgamated_cims_struct() -print(pe.amalgamated_cims_struct.get_set_of_cims('X').get_cims_number()) -print(pe.amalgamated_cims_struct.get_set_of_cims('Y').get_cims_number()) -print(pe.amalgamated_cims_struct.get_set_of_cims('Z').get_cims_number()) +print(pe.amalgamated_cims_struct.get_set_of_cims(0).get_cims_number()) +print(pe.amalgamated_cims_struct.get_set_of_cims(1).get_cims_number()) +print(pe.amalgamated_cims_struct.get_set_of_cims(2).get_cims_number()) #pe.parameters_estimation_single_trajectory(pe.sample_path.trajectories[0].get_trajectory()) -pe.parameters_estimation() -for matrix in pe.amalgamated_cims_struct.get_set_of_cims('Y').actual_cims: +lp = LineProfiler() +lp_wrapper = lp(pe.parameters_estimation_single_trajectory) +lp_wrapper(pe.sample_path.trajectories[0].get_trajectory()) +lp.print_stats() +#pe.parameters_estimation() +"""for matrix in pe.amalgamated_cims_struct.get_set_of_cims(1).actual_cims: print(matrix.state_residence_times) print(matrix.state_transition_matrix) matrix.compute_cim_coefficients() - print(matrix.cim) + print(matrix.cim)""" From 2425f491dfc4c43fa561a3bdb8b0e93b32325746 Mon Sep 17 00:00:00 2001 From: philpMartin Date: Sat, 4 Jul 2020 16:19:38 +0200 Subject: [PATCH 2/2] Merging traje but keep type of columns --- .../classes/conditional_intensity_matrix.py | 1 + main_package/classes/json_importer.py | 9 ++++--- main_package/classes/parameters_estimator.py | 26 ++++++++++++------- main_package/classes/sample_path.py | 17 +++++++++--- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/main_package/classes/conditional_intensity_matrix.py b/main_package/classes/conditional_intensity_matrix.py index 82445dd..7523c8e 100644 --- a/main_package/classes/conditional_intensity_matrix.py +++ b/main_package/classes/conditional_intensity_matrix.py @@ -15,6 +15,7 @@ class ConditionalIntensityMatrix: def update_state_residence_time_for_state(self, state, time): #print("Time updating In state", state, time) + #print(state) self.state_residence_times[state] = self.state_residence_times[state] + time def compute_cim_coefficients(self): diff --git a/main_package/classes/json_importer.py b/main_package/classes/json_importer.py index cbe1631..ebbb7eb 100644 --- a/main_package/classes/json_importer.py +++ b/main_package/classes/json_importer.py @@ -96,8 +96,8 @@ class JsonImporter(AbstractImporter): for col_name in columns_header: if col_name == 'Time': sample_frame[col_name + 'Delta'] = sample_frame[col_name].diff() - else: - sample_frame[col_name + 'Delta'] = (sample_frame[col_name].diff().bfill() != 0).astype(int) + #else: + #sample_frame[col_name + 'Delta'] = (sample_frame[col_name].diff().bfill() != 0).astype(int) #sample_frame['Delta'] = sample_frame['Time'].diff() #print(sample_frame) @@ -134,9 +134,10 @@ class JsonImporter(AbstractImporter): """ for indx in range(len(self.df_samples_list)): self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] + self.concatenated_samples = self.concatenated_samples.iloc[0:0] -ij = JsonImporter("../data") +"""ij = JsonImporter("../data") ij.import_data() #print(ij.df_samples_list[7]) print(ij.df_structure) @@ -144,4 +145,4 @@ print(ij.df_variables) #print((ij.build_list_of_samples_array(0)[1].size)) #ij.compute_row_delta_sigle_samples_frame(ij.df_samples_list[0]) ij.compute_row_delta_in_all_samples_frames() -print(ij.concatenated_samples.to_numpy()) +print(ij.concatenated_samples.to_numpy())""" diff --git a/main_package/classes/parameters_estimator.py b/main_package/classes/parameters_estimator.py index 370b5f2..9a2032f 100644 --- a/main_package/classes/parameters_estimator.py +++ b/main_package/classes/parameters_estimator.py @@ -1,6 +1,7 @@ import os import time as tm from line_profiler import LineProfiler +import numpy as np import network_graph as ng import sample_path as sp @@ -30,12 +31,18 @@ class ParametersEstimator: #print("Elapsed Time ", t1) def parameters_estimation_single_trajectory(self, trajectory): + tr_len = trajectory.shape[0] + row_length = trajectory.shape[1] + print(tr_len) + print(row_length) t0 = tm.time() for indx, row in enumerate(trajectory): - if trajectory[indx][1] == -1: + """ #if int(trajectory[indx][1]) == -1: + #break + if indx == tr_len - 2: break if trajectory[indx + 1][1] != -1: - transition = self.find_transition(trajectory[indx], trajectory[indx + 1]) + transition = self.find_transition(trajectory[indx], trajectory[indx + 1], row_length) which_node = transition[0] # print(which_node) which_matrix = self.which_matrix_to_update(row, transition[0]) @@ -43,7 +50,9 @@ class ParametersEstimator: self.amalgamated_cims_struct.update_state_transition_for_matrix(which_node, which_matrix, which_element) #changed_node = which_node - time = self.compute_time_delta(trajectory[indx], trajectory[indx + 1]) + if int(trajectory[indx][0]) == 0: + time = trajectory[indx + 1][0] + #time = self.compute_time_delta(trajectory[indx], trajectory[indx + 1]) which_element = transition[1][0] self.amalgamated_cims_struct.update_state_residence_time_for_matrix(which_node, which_matrix, which_element, time) @@ -53,20 +62,19 @@ class ParametersEstimator: # print(node) which_node = node_indx which_matrix = self.which_matrix_to_update(row, node_indx) - which_element = row[node_indx + 1] + which_element = int(row[node_indx + 1]) # print("State res time element " + str(which_element) + node) # print("State res time matrix indx" + str(which_matrix)) self.amalgamated_cims_struct.update_state_residence_time_for_matrix(which_node, which_matrix, which_element, time) t1 = tm.time() - t0 - print("Elapsed Time ", t1) + print("Elapsed Time ", t1)""" - def find_transition(self, current_row, next_row): - for indx in range(1, len(current_row)): + def find_transition(self, current_row, next_row, row_length): + for indx in range(1, row_length): if current_row[indx] != next_row[indx]: return [indx - 1, (current_row[indx], next_row[indx])] - def compute_time_delta(self, current_row, next_row): return next_row[0] - current_row[0] @@ -97,7 +105,7 @@ print(pe.amalgamated_cims_struct.get_set_of_cims(2).get_cims_number()) #pe.parameters_estimation_single_trajectory(pe.sample_path.trajectories[0].get_trajectory()) lp = LineProfiler() lp_wrapper = lp(pe.parameters_estimation_single_trajectory) -lp_wrapper(pe.sample_path.trajectories[0].get_trajectory()) +lp_wrapper(pe.sample_path.trajectories.get_trajectory()) lp.print_stats() #pe.parameters_estimation() """for matrix in pe.amalgamated_cims_struct.get_set_of_cims(1).actual_cims: diff --git a/main_package/classes/sample_path.py b/main_package/classes/sample_path.py index 9f5b7dc..a7f2be3 100644 --- a/main_package/classes/sample_path.py +++ b/main_package/classes/sample_path.py @@ -21,14 +21,16 @@ class SamplePath: def __init__(self, files_path): print() self.importer = imp.JsonImporter(files_path) - self.trajectories = [] + self.trajectories = None self.structure = None def build_trajectories(self): self.importer.import_data() - for traj_data_frame in self.importer.df_samples_list: - trajectory = tr.Trajectory(self.importer.build_list_of_samples_array(traj_data_frame)) - self.trajectories.append(trajectory) + self.importer.compute_row_delta_in_all_samples_frames() + #self.trajectories = self.importer.concatenated_samples.to_numpy() + #for traj_data_frame in self.importer.df_samples_list: + self.trajectories = tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples)) + #self.trajectories.append(trajectory) self.importer.clear_data_frames() def build_structure(self): @@ -38,4 +40,11 @@ class SamplePath: return len(self.trajectories) +"""os.getcwd() +os.chdir('..') +path = os.getcwd() + '/data' +s1 = SamplePath(path) +s1.build_trajectories() +s1.build_structure() +print(s1.trajectories.get_trajectory())"""