diff --git a/main_package/classes/amalgamated_cims.py b/main_package/classes/amalgamated_cims.py index 26bcf03..dfd154a 100644 --- a/main_package/classes/amalgamated_cims.py +++ b/main_package/classes/amalgamated_cims.py @@ -14,14 +14,19 @@ class AmalgamatedCims: #self.states_per_variable = states_number def init_cims_structure(self, keys, states_number_per_node, list_of_parents_states_number): - print(keys) - print(list_of_parents_states_number) + #print(keys) + #print(list_of_parents_states_number) for indx, key in enumerate(keys): - self.sets_of_cims.append(socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx])) + self.sets_of_cims.append( + socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx])) + def get_set_of_cims(self, node_indx): return self.sets_of_cims[node_indx] + def get_cims_of_node(self, node_indx, cim_indx): + return self.sets_of_cims[node_indx].get_cim(cim_indx) + def get_vars_order(self, node): return self.actual_cims[node][1] diff --git a/main_package/classes/conditional_intensity_matrix.py b/main_package/classes/conditional_intensity_matrix.py index 7523c8e..bffd040 100644 --- a/main_package/classes/conditional_intensity_matrix.py +++ b/main_package/classes/conditional_intensity_matrix.py @@ -3,27 +3,26 @@ import numpy as np class ConditionalIntensityMatrix: - def __init__(self, dimension): - self.state_residence_times = np.zeros(shape=dimension) - self.state_transition_matrix = np.zeros(shape=(dimension, dimension), dtype=int) - self.cim = np.zeros(shape=(dimension, dimension), dtype=float) + def __init__(self, dimension, state_residence_times, state_transition_matrix): + self.state_residence_times = state_residence_times + self.state_transition_matrix = state_transition_matrix + #self.cim = np.zeros(shape=(dimension, dimension), dtype=float) + self.cim = self.state_transition_matrix.astype(np.float) def update_state_transition_count(self, element_indx): #print(element_indx) - self.state_transition_matrix[element_indx[0]][element_indx[1]] = \ - self.state_transition_matrix[element_indx[0]][element_indx[1]] + 1 + #self.state_transition_matrix[element_indx[0]][element_indx[1]] += 1 + self.state_transition_matrix[element_indx] += 1 def update_state_residence_time_for_state(self, state, time): #print("Time updating In state", state, time) - #print(state) - self.state_residence_times[state] = self.state_residence_times[state] + time + self.state_residence_times[state] += time + def compute_cim_coefficients(self): - for i, row in enumerate(self.state_transition_matrix): - row_sum = 0.0 - for j, elem in enumerate(row): - rate_coefficient = elem / self.state_residence_times[i] - self.cim[i][j] = rate_coefficient - row_sum = row_sum + rate_coefficient - self.cim[i][i] = -1 * row_sum + np.fill_diagonal(self.cim, self.cim.diagonal() * -1) + self.cim = ((self.cim.T + 1) / (self.state_residence_times + 1)).T + + def __repr__(self): + return 'CIM:\n' + str(self.cim) diff --git a/main_package/classes/json_importer.py b/main_package/classes/json_importer.py index ebbb7eb..45ecd6a 100644 --- a/main_package/classes/json_importer.py +++ b/main_package/classes/json_importer.py @@ -3,6 +3,7 @@ import glob import pandas as pd import json from abstract_importer import AbstractImporter +from line_profiler import LineProfiler class JsonImporter(AbstractImporter): @@ -23,25 +24,30 @@ class JsonImporter(AbstractImporter): def __init__(self, files_path): self.df_samples_list = [] - self.concatenated_samples = None - self.df_structure = pd.DataFrame() - self.df_variables = pd.DataFrame() + self._df_structure = pd.DataFrame() + self._df_variables = pd.DataFrame() + self._concatenated_samples = None + super(JsonImporter, self).__init__(files_path) def import_data(self): raw_data = self.read_json_file() self.import_trajectories(raw_data) + self.compute_row_delta_in_all_samples_frames('Time') self.import_structure(raw_data) self.import_variables(raw_data) + #Le variabili DEVONO essere ordinate come le Colonne del dataset + assert list(self._df_variables['Name']) == \ + (list(self._concatenated_samples.columns.values[1:len(self.variables['Name']) + 1])) def import_trajectories(self, raw_data): self.normalize_trajectories(raw_data, 0, 'samples') def import_structure(self, raw_data): - self.df_structure = self.one_level_normalizing(raw_data, 0, 'dyn.str') + self._df_structure = self.one_level_normalizing(raw_data, 0, 'dyn.str') def import_variables(self, raw_data): - self.df_variables = self.one_level_normalizing(raw_data, 0, 'variables') + self._df_variables = self.one_level_normalizing(raw_data, 0, 'variables') def read_json_file(self): """ @@ -88,7 +94,27 @@ class JsonImporter(AbstractImporter): void """ for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]): - self.df_samples_list.append(pd.json_normalize(raw_data[indx][trajectories_key][sample_indx])) + self.df_samples_list.append(pd.DataFrame(sample)) + + def compute_row_delta_sigle_samples_frame(self, sample_frame, time_header_label, columns_header, shifted_cols_header): + sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1) + shifted_cols = sample_frame[columns_header[1:]].shift(-1) + shifted_cols.columns = shifted_cols_header + sample_frame = sample_frame.assign(**shifted_cols) + sample_frame.drop(sample_frame.tail(1).index, inplace=True) + return sample_frame + + def compute_row_delta_in_all_samples_frames(self, time_header_label): + columns_header = list(self.df_samples_list[0].columns.values) + shifted_cols_header = [s + "S" for s in columns_header[1:]] + for indx, sample in enumerate(self.df_samples_list): + self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample, + time_header_label, columns_header, shifted_cols_header) + #print(self.df_samples_list[indx]) + self._concatenated_samples = pd.concat(self.df_samples_list) + #print("Concatenated", self._concatenated_samples) + for indx in range(len(self.df_samples_list)): # Le singole traj non servono piĆ¹ + self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] def compute_row_delta_sigle_samples_frame(self, sample_frame): columns_header = list(sample_frame.columns.values) @@ -132,17 +158,34 @@ class JsonImporter(AbstractImporter): Returns: void """ - for indx in range(len(self.df_samples_list)): - self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] - self.concatenated_samples = self.concatenated_samples.iloc[0:0] + self._concatenated_samples = self._concatenated_samples.iloc[0:0] + + @property + def concatenated_samples(self): + return self._concatenated_samples + + @property + def variables(self): + return self._df_variables + + @property + def structure(self): + return self._df_structure """ij = JsonImporter("../data") +#raw_data = ij.read_json_file() +lp = LineProfiler() +lp_wrapper = lp(ij.import_data) +lp_wrapper() +lp.print_stats() + + ij.import_data() #print(ij.df_samples_list[7]) print(ij.df_structure) print(ij.df_variables) -#print((ij.build_list_of_samples_array(0)[1].size)) -#ij.compute_row_delta_sigle_samples_frame(ij.df_samples_list[0]) -ij.compute_row_delta_in_all_samples_frames() -print(ij.concatenated_samples.to_numpy())""" +print(ij.concatenated_samples)""" + + + diff --git a/main_package/classes/network_graph.py b/main_package/classes/network_graph.py index 616e381..f2b5e28 100644 --- a/main_package/classes/network_graph.py +++ b/main_package/classes/network_graph.py @@ -19,10 +19,22 @@ class NetworkGraph(): def __init__(self, graph_struct): self.graph_struct = graph_struct self.graph = nx.DiGraph() + self._nodes_indexes = self.graph_struct.list_of_nodes_indexes() + self._nodes_labels = self.graph_struct.list_of_nodes_labels() + self._fancy_indexing = None + self._time_scalar_indexing_structure = [] + self._transition_scalar_indexing_structure = [] + self._time_filtering = [] + self._transition_filtering = [] def init_graph(self): - self.add_nodes(self.graph_struct.list_of_nodes()) + self.add_nodes(self.graph_struct.list_of_nodes_labels()) self.add_edges(self.graph_struct.list_of_edges()) + self._fancy_indexing = self.build_fancy_indexing_structure(0) + self.build_time_scalar_indexing_structure() + self.build_time_columns_filtering_structure() + self.build_transition_scalar_indexing_structure() + self.build_transition_columns_filtering_structure() def add_nodes(self, list_of_nodes): for indx, id in enumerate(list_of_nodes): @@ -36,34 +48,33 @@ class NetworkGraph(): ordered_set = {} parents = self.get_parents_by_id(node) for n in parents: - indx = self.graph_struct.get_node_indx(n) + indx = self._nodes_labels.index(n) ordered_set[n] = indx {k: v for k, v in sorted(ordered_set.items(), key=lambda item: item[1])} return list(ordered_set.keys()) def get_ord_set_of_par_of_all_nodes(self): result = [] - for node in self.get_nodes(): + for node in self._nodes_labels: result.append(self.get_ordered_by_indx_set_of_parents(node)) return result def get_ordered_by_indx_parents_values(self, node): parents_values = [] - parents = self.get_parents_by_id(node) - parents.sort() #Assumo che la structure rifletta l'ordine delle colonne del dataset + parents = self.get_ordered_by_indx_set_of_parents(node) for n in parents: parents_values.append(self.graph_struct.get_states_number(n)) return parents_values def get_ordered_by_indx_parents_values_for_all_nodes(self): result = [] - for node in self.get_nodes(): #TODO bisogna essere sicuri che l'ordine sia coerente con quello del dataset serve un metodo get_nodes_sort_by_indx + for node in self._nodes_labels: result.append(self.get_ordered_by_indx_parents_values(node)) return result def get_states_number_of_all_nodes_sorted(self): states_number_list = [] - for node in self.get_nodes(): #TODO SERVE UN get_nodes_ordered!!!!!! + for node in self._nodes_labels: states_number_list.append(self.get_states_number(node)) return states_number_list @@ -74,9 +85,55 @@ class NetworkGraph(): indexes_for_a_node = [] for j, node in enumerate(list_of_parents): indexes_for_a_node.append(self.get_node_indx(node) + start_indx) - index_structure.append(indexes_for_a_node) + index_structure.append(np.array(indexes_for_a_node, dtype=np.int)) return index_structure + def build_time_scalar_indexing_structure_for_a_node(self, node_id, parents_id): + #print(parents_id) + T_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)]) + #print(T_vector) + T_vector = np.append(T_vector, [self.graph_struct.variables_frame.iloc[x, 1] for x in parents_id]) + #print(T_vector) + T_vector = T_vector.cumprod().astype(np.int) + return T_vector + #print(T_vector) + + def build_time_scalar_indexing_structure(self): + parents_indexes_list = self._fancy_indexing + for node_indx, p_indxs in enumerate(parents_indexes_list): + if p_indxs.size == 0: + self._time_scalar_indexing_structure.append(np.array([self.get_states_number_by_indx(node_indx)], dtype=np.int)) + else: + self._time_scalar_indexing_structure.append( + self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs)) + + def build_transition_scalar_indexing_structure_for_a_node(self, node_id, parents_id): + M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1], + self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)]) + M_vector = np.append(M_vector, [self.graph_struct.variables_frame.iloc[x, 1] for x in parents_id]) + M_vector = M_vector.cumprod().astype(np.int) + return M_vector + + def build_transition_scalar_indexing_structure(self): + parents_indexes_list = self._fancy_indexing + for node_indx, p_indxs in enumerate(parents_indexes_list): + self._transition_scalar_indexing_structure.append( + self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs)) + + def build_time_columns_filtering_structure(self): + parents_indexes_list = self._fancy_indexing + for node_indx, p_indxs in enumerate(parents_indexes_list): + if p_indxs.size == 0: + self._time_filtering.append(np.append(p_indxs, np.array([node_indx], dtype=np.int))) + else: + self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs)) + + def build_transition_columns_filtering_structure(self): + parents_indexes_list = self._fancy_indexing + nodes_number = len(parents_indexes_list) + for node_indx, p_indxs in enumerate(parents_indexes_list): + self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)) + def get_nodes(self): return list(self.graph.nodes) @@ -89,12 +146,30 @@ class NetworkGraph(): def get_states_number(self, node_id): return self.graph_struct.get_states_number(node_id) + def get_states_number_by_indx(self, node_indx): + return self.graph_struct.get_states_number_by_indx(node_indx) + def get_node_by_index(self, node_indx): return self.graph_struct.get_node_id(node_indx) def get_node_indx(self, node_id): return nx.get_node_attributes(self.graph, 'indx')[node_id] + @property + def time_scalar_indexing_strucure(self): + return self._time_scalar_indexing_structure + + @property + def time_filtering(self): + return self._time_filtering + + @property + def transition_scalar_indexing_structure(self): + return self._transition_scalar_indexing_structure + + @property + def transition_filtering(self): + return self._transition_filtering @@ -110,16 +185,22 @@ s1.build_structure() g1 = NetworkGraph(s1.structure) g1.init_graph() -print(g1.graph.number_of_nodes()) -print(g1.graph.number_of_edges()) - -print(nx.get_node_attributes(g1.graph, 'indx')['X']) -for node in g1.get_parents_by_id('Z'): - # print(g1.get_node_by_index(node)) - print(node) -print(g1.get_ordered_by_indx_parents_values_for_all_nodes()) -print(g1.build_fancy_indexing_structure()) -print(g1.get_states_number_of_all_nodes_sorted())""" - - +print(g1.transition_scalar_indexing_structure) +print(g1.transition_filtering) +print(g1.time_scalar_indexing_strucure) +print(g1.time_filering) + + +#print(g1.build_fancy_indexing_structure(0)) +#print(g1.get_states_number_of_all_nodes_sorted()) +g1.build_scalar_indexing_structure() +print(g1.scalar_indexing_structure) +print(g1.build_columns_filtering_structure()) +g1.build_transition_scalar_indexing_structure() +print(g1.transition_scalar_indexing_structure) +g1.build_transition_columns_filtering_structure() +print(g1.transition_filtering) + +[array([3, 9]), array([ 3, 9, 27]), array([ 3, 9, 27, 81])] +[array([3, 0]), array([4, 1, 2]), array([5, 2, 0, 1])]""" diff --git a/main_package/classes/parameters_estimator.py b/main_package/classes/parameters_estimator.py index 9a2032f..6073484 100644 --- a/main_package/classes/parameters_estimator.py +++ b/main_package/classes/parameters_estimator.py @@ -1,8 +1,9 @@ import os -import time as tm + from line_profiler import LineProfiler -import numpy as np +import numba as nb +import numpy as np import network_graph as ng import sample_path as sp import amalgamated_cims as acims @@ -13,7 +14,6 @@ class ParametersEstimator: def __init__(self, sample_path, net_graph): self.sample_path = sample_path self.net_graph = net_graph - self.fancy_indexing_structure = self.net_graph.build_fancy_indexing_structure(1) self.amalgamated_cims_struct = None def init_amalgamated_cims_struct(self): @@ -21,67 +21,71 @@ class ParametersEstimator: self.net_graph.get_nodes(), self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes()) - def parameters_estimation(self): - #print("Starting computing") - #t0 = tm.time() - for indx, trajectory in enumerate(self.sample_path.trajectories): - self.parameters_estimation_single_trajectory(trajectory.get_trajectory()) - #print("Finished Trajectory number", indx) - #t1 = tm.time() - t0 - #print("Elapsed Time ", t1) - - def parameters_estimation_single_trajectory(self, trajectory): - tr_len = trajectory.shape[0] - row_length = trajectory.shape[1] - print(tr_len) - print(row_length) - t0 = tm.time() - for indx, row in enumerate(trajectory): - """ #if int(trajectory[indx][1]) == -1: - #break - if indx == tr_len - 2: - break - if trajectory[indx + 1][1] != -1: - transition = self.find_transition(trajectory[indx], trajectory[indx + 1], row_length) - which_node = transition[0] - # print(which_node) - which_matrix = self.which_matrix_to_update(row, transition[0]) - which_element = transition[1] - self.amalgamated_cims_struct.update_state_transition_for_matrix(which_node, which_matrix, which_element) - - #changed_node = which_node - if int(trajectory[indx][0]) == 0: - time = trajectory[indx + 1][0] - #time = self.compute_time_delta(trajectory[indx], trajectory[indx + 1]) - which_element = transition[1][0] - self.amalgamated_cims_struct.update_state_residence_time_for_matrix(which_node, which_matrix, which_element, - time) - - for node_indx in range(0, 3): - if node_indx != transition[0]: - # print(node) - which_node = node_indx - which_matrix = self.which_matrix_to_update(row, node_indx) - which_element = int(row[node_indx + 1]) - # print("State res time element " + str(which_element) + node) - # print("State res time matrix indx" + str(which_matrix)) - self.amalgamated_cims_struct.update_state_residence_time_for_matrix(which_node, which_matrix, - which_element, time) - t1 = tm.time() - t0 - print("Elapsed Time ", t1)""" - - def find_transition(self, current_row, next_row, row_length): - for indx in range(1, row_length): - if current_row[indx] != next_row[indx]: - return [indx - 1, (current_row[indx], next_row[indx])] - - def compute_time_delta(self, current_row, next_row): - return next_row[0] - current_row[0] - - def which_matrix_to_update(self, current_row, node_indx): # produce strutture {'X':1, 'Y':2} dove X e Y sono i parent di node_id - return current_row[self.fancy_indexing_structure[node_indx]] - + def compute_parameters(self): + for node_indx, set_of_cims in enumerate(self.amalgamated_cims_struct.sets_of_cims): + self.compute_state_res_time_for_node(node_indx, self.sample_path.trajectories.times, + self.sample_path.trajectories.trajectory, + self.net_graph.time_filtering[node_indx], + self.net_graph.time_scalar_indexing_strucure[node_indx], + set_of_cims.state_residence_times) + self.compute_state_transitions_for_a_node(node_indx, + self.sample_path.trajectories.complete_trajectory, + self.net_graph.transition_filtering[node_indx], + self.net_graph.transition_scalar_indexing_structure[node_indx], + set_of_cims.transition_matrices) + set_of_cims.build_cims(set_of_cims.state_residence_times, set_of_cims.transition_matrices) + + + + def compute_state_res_time_for_node(self, node_indx, times, trajectory, cols_filter, scalar_indexes_struct, T): + #print(times.size) + #print(trajectory) + #print(cols_filter) + #print(scalar_indexes_struct) + #print(T) + T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) + .astype(np.int), \ + times, + minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) + #print("Done This NODE", T) + + def compute_state_residence_time_for_all_nodes(self): + for node_indx, set_of_cims in enumerate(self.amalgamated_cims_struct.sets_of_cims): + self.compute_state_res_time_for_node(node_indx, self.sample_path.trajectories[0].get_times(), + self.sample_path.trajectories[0].get_trajectory(), self.columns_filtering_structure[node_indx], + self.scalar_indexes_converter[node_indx], set_of_cims.state_residence_times) + + + def compute_state_transitions_for_a_node(self, node_indx, trajectory, cols_filter, scalar_indexing, M): + #print(node_indx) + #print(trajectory) + #print(cols_filter) + #print(scalar_indexing) + #print(M) + diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], + dtype=np.int64) + trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] + #print(trj_tmp) + #print("Summing", np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int)) + #print(M.shape[1]) + #print(M.shape[2]) + + M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int), + minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) + M_raveled = M.ravel() + M_raveled[diag_indices] = 0 + #print(M_raveled) + M_raveled[diag_indices] = np.sum(M, axis=2).ravel() + #print(M_raveled) + + #print(M) + + def compute_state_transitions_for_all_nodes(self): + for node_indx, set_of_cims in enumerate(self.amalgamated_cims_struct.sets_of_cims): + self.compute_state_transitions_for_a_node(node_indx, self.sample_path.trajectories[0].get_complete_trajectory(), + self.transition_filtering[node_indx], + self.transition_scalar_index_converter[node_indx], set_of_cims.transition_matrices) @@ -99,18 +103,50 @@ g1.init_graph() pe = ParametersEstimator(s1, g1) pe.init_amalgamated_cims_struct() -print(pe.amalgamated_cims_struct.get_set_of_cims(0).get_cims_number()) -print(pe.amalgamated_cims_struct.get_set_of_cims(1).get_cims_number()) -print(pe.amalgamated_cims_struct.get_set_of_cims(2).get_cims_number()) -#pe.parameters_estimation_single_trajectory(pe.sample_path.trajectories[0].get_trajectory()) lp = LineProfiler() -lp_wrapper = lp(pe.parameters_estimation_single_trajectory) -lp_wrapper(pe.sample_path.trajectories.get_trajectory()) + +"""[[2999.2966 2749.2298 3301.5975] + [3797.1737 3187.8345 2939.2009] + [3432.224 3062.5402 4530.9028]] + +[[ 827.6058 838.1515 686.1365] + [1426.384 2225.2093 1999.8528] + [ 745.3068 733.8129 746.2347] + [ 520.8113 690.9502 853.4022] + [1590.8609 1853.0021 1554.1874] + [ 637.5576 643.8822 654.9506] + [ 718.7632 742.2117 998.5844] + [1811.984 1598.0304 2547.988 ] + [ 770.8503 598.9588 984.3304]] + +lp_wrapper = lp(pe.compute_state_residence_time_for_all_nodes) +lp_wrapper() +lp.print_stats() + +#pe.compute_state_residence_time_for_all_nodes() +print(pe.amalgamated_cims_struct.sets_of_cims[0].state_residence_times) + +[[[14472, 3552, 10920], + [12230, 25307, 13077], + [ 9707, 14408, 24115]], + + [[22918, 6426, 16492], + [10608, 16072, 5464], + [10746, 11213, 21959]], + + [[23305, 6816, 16489], + [ 3792, 19190, 15398], + [13718, 18243, 31961]]]) + + Raveled [14472 3552 10920 12230 25307 13077 9707 14408 24115 22918 6426 16492 + 10608 16072 5464 10746 11213 21959 23305 6816 16489 3792 19190 15398 + 13718 18243 31961]""" + +lp_wrapper = lp(pe.compute_parameters) +lp_wrapper() +#for variable in pe.amalgamated_cims_struct.sets_of_cims: + #for cond in variable.get_cims(): + #print(cond.cim) +print(pe.amalgamated_cims_struct.get_cims_of_node(1,[2])) lp.print_stats() -#pe.parameters_estimation() -"""for matrix in pe.amalgamated_cims_struct.get_set_of_cims(1).actual_cims: - print(matrix.state_residence_times) - print(matrix.state_transition_matrix) - matrix.compute_cim_coefficients() - print(matrix.cim)""" diff --git a/main_package/classes/sample_path.py b/main_package/classes/sample_path.py index a7f2be3..acee6c1 100644 --- a/main_package/classes/sample_path.py +++ b/main_package/classes/sample_path.py @@ -19,25 +19,30 @@ class SamplePath: """ def __init__(self, files_path): - print() self.importer = imp.JsonImporter(files_path) - self.trajectories = None - self.structure = None + self._trajectories = None + self._structure = None def build_trajectories(self): self.importer.import_data() - self.importer.compute_row_delta_in_all_samples_frames() - #self.trajectories = self.importer.concatenated_samples.to_numpy() - #for traj_data_frame in self.importer.df_samples_list: - self.trajectories = tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples)) - #self.trajectories.append(trajectory) + self._trajectories = tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples)) + #self.trajectories.append(trajectory) self.importer.clear_data_frames() def build_structure(self): - self.structure = st.Structure(self.importer.df_structure, self.importer.df_variables) + self._structure = st.Structure(self.importer.structure, self.importer.variables) - def get_number_trajectories(self): - return len(self.trajectories) + @property + def trajectories(self): + return self._trajectories + + @property + def structure(self): + return self._structure + +"""os.getcwd() +os.chdir('..') +path = os.getcwd() + '/data' """os.getcwd() @@ -47,4 +52,5 @@ path = os.getcwd() + '/data' s1 = SamplePath(path) s1.build_trajectories() s1.build_structure() -print(s1.trajectories.get_trajectory())""" +print(s1.trajectories[0].get_complete_trajectory())""" + diff --git a/main_package/classes/set_of_cims.py b/main_package/classes/set_of_cims.py index 2a970a6..4d0dac8 100644 --- a/main_package/classes/set_of_cims.py +++ b/main_package/classes/set_of_cims.py @@ -1,4 +1,5 @@ import numpy as np +from numba import njit, int32 import conditional_intensity_matrix as cim @@ -16,45 +17,91 @@ class SetOfCims: self.node_id = node_id self.parents_states_number = parents_states_number self.node_states_number = node_states_number - self.actual_cims = None + self.actual_cims = [] + self.state_residence_times = None + self.transition_matrices = None self.build_actual_cims_structure() def build_actual_cims_structure(self): - cims_number = 1 - for state_number in self.parents_states_number: - cims_number = cims_number * state_number - self.actual_cims = np.empty(cims_number, dtype=cim.ConditionalIntensityMatrix) - for indx, matrix in enumerate(self.actual_cims): - self.actual_cims[indx] = cim.ConditionalIntensityMatrix(self.node_states_number) + #cims_number = 1 + #for state_number in self.parents_states_number: + #cims_number = cims_number * state_number + if not self.parents_states_number: + #self.actual_cims = np.empty(1, dtype=cim.ConditionalIntensityMatrix) + #self.actual_cims[0] = cim.ConditionalIntensityMatrix(self.node_states_number) + self.state_residence_times = np.zeros((1, self.node_states_number), dtype=np.float) + self.transition_matrices = np.zeros((1,self.node_states_number, self.node_states_number), dtype=np.int) + else: + #self.actual_cims = np.empty(self.parents_states_number, dtype=cim.ConditionalIntensityMatrix) + #self.build_actual_cims(self.actual_cims) + #for indx, matrix in enumerate(self.actual_cims): + #self.actual_cims[indx] = cim.ConditionalIntensityMatrix(self.node_states_number) + self.state_residence_times = \ + np.zeros((np.prod(self.parents_states_number), self.node_states_number), dtype=np.float) + self.transition_matrices = np.zeros([np.prod(self.parents_states_number), self.node_states_number, + self.node_states_number], dtype=np.int) def update_state_transition(self, indexes, element_indx_tuple): - matrix_indx = self.indexes_converter(indexes) - self.actual_cims[matrix_indx].update_state_transition_count(element_indx_tuple) + #matrix_indx = self.indexes_converter(indexes) + #print(indexes) + if not indexes: + self.actual_cims[0].update_state_transition_count(element_indx_tuple) + else: + self.actual_cims[indexes].update_state_transition_count(element_indx_tuple) def update_state_residence_time(self, which_matrix, which_element, time): - matrix_indx = self.indexes_converter(which_matrix) - self.actual_cims[matrix_indx].update_state_residence_time_for_state(which_element, time) + #matrix_indx = self.indexes_converter(which_matrix) + if not which_matrix: + self.actual_cims[0].update_state_residence_time_for_state(which_element, time) + else: + #print(type(which_matrix)) + #print(self.actual_cims[(2,2)]) + self.actual_cims[which_matrix].update_state_residence_time_for_state(which_element, time) + + def build_actual_cims(self, cim_structure): + for indx in range(len(cim_structure)): + if cim_structure[indx] is None: + cim_structure[indx] = cim.ConditionalIntensityMatrix(self.node_states_number) + else: + self.build_actual_cims(cim_structure[indx]) def get_cims_number(self): return len(self.actual_cims) + def indexes_converter(self, indexes): # Si aspetta array del tipo [2,2] dove - #print(type(indexes)) - if indexes.size == 0: - return 0 + assert len(indexes) == len(self.parents_states_number) + vector_index = 0 + if not indexes: + return vector_index else: - vector_index = 0 for indx, value in enumerate(indexes): vector_index = vector_index*self.parents_states_number[indx] + indexes[indx] return vector_index + def build_cims(self, state_res_times, transition_matrices): + for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): + #print(state_res_time_vector, transition_matrix) + cim_to_add = cim.ConditionalIntensityMatrix(self.node_states_number, + state_res_time_vector, transition_matrix) + cim_to_add.compute_cim_coefficients() + #print(cim_to_add) + self.actual_cims.append(cim_to_add) + self.transition_matrices = None + self.state_residence_times = None -""" -sofc = SetOfCims('W', [], 2) -sofc.build_actual_cims_structure() -print(sofc.actual_cims) -print(sofc.indexes_converter([]))""" + def get_cims(self): + return self.actual_cims + def get_cim(self, index): + flat_index = self.indexes_converter(index) + return self.actual_cims[flat_index] +"""sofc = SetOfCims('Z', [3, 3], 3) +sofc.build_actual_cims_structure() +print(sofc.actual_cims) +print(sofc.actual_cims[0,0]) +print(sofc.actual_cims[1,2]) +#print(sofc.indexes_converter([]))""" diff --git a/main_package/classes/structure.py b/main_package/classes/structure.py index ee687f9..7f1bea8 100644 --- a/main_package/classes/structure.py +++ b/main_package/classes/structure.py @@ -12,6 +12,9 @@ class Structure: def __init__(self, structure, variables): self.structure_frame = structure self.variables_frame = variables + #self._nodes_indexes = self.list_of_nodes_indexes() + self.name_label = variables.columns.values[0] + self.value_label = variables.columns.values[1] def list_of_edges(self): edges_list = [] @@ -20,14 +23,24 @@ class Structure: edges_list.append(row_tuple) return edges_list - def list_of_nodes(self): - return self.variables_frame['Name'].values.tolist() #TODO rimuovere dipendenza diretta dalla key 'Name' + def list_of_nodes_labels(self): + return self.variables_frame[self.name_label].values.tolist() + + def list_of_nodes_indexes(self): + nodes_indexes = [] + for indx in self.list_of_nodes_labels(): + nodes_indexes.append(indx) + return nodes_indexes def get_node_id(self, node_indx): - return self.variables_frame['Name'][node_indx] + return self.variables_frame[self.name_label][node_indx] def get_node_indx(self, node_id): - return list(self.variables_frame['Name']).index(node_id) + return list(self.variables_frame[self.name_label]).index(node_id) def get_states_number(self, node): - return self.variables_frame['Value'][self.get_node_indx(node)] + return self.variables_frame[self.value_label][self.get_node_indx(node)] + + def get_states_number_by_indx(self, node_indx): + #print(self.value_label) + return self.variables_frame[self.value_label][node_indx] diff --git a/main_package/classes/trajectory.py b/main_package/classes/trajectory.py index 7ee004c..a41fb6a 100644 --- a/main_package/classes/trajectory.py +++ b/main_package/classes/trajectory.py @@ -1,8 +1,8 @@ -import pandas as pd + import numpy as np -class Trajectory(): +class Trajectory: """ Rappresenta una traiettoria come un numpy_array contenente n-ple (indx, T_k,S_i,.....,Sj) Offre i metodi utili alla computazione sulla struttura stessa. @@ -15,11 +15,24 @@ class Trajectory(): """ def __init__(self, list_of_columns): - self.actual_trajectory = np.array(list_of_columns, dtype=object).T - - def get_trajectory(self): - return self.actual_trajectory + print(list_of_columns) + self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T + self._times = np.array(list_of_columns[0], dtype=np.float) + print(self._times) + + @property + def trajectory(self): + return self._actual_trajectory[:, :4] + + @property + def complete_trajectory(self): + return self._actual_trajectory + + @property + def times(self): + return self._times + + def size(self): + return self.actual_trajectory.shape[0] - def merge_columns(self, list_of_cols): - return np.vstack(list_of_cols).T