diff --git a/main_package/classes/cache.py b/main_package/classes/cache.py index 16adc09..235e823 100644 --- a/main_package/classes/cache.py +++ b/main_package/classes/cache.py @@ -8,17 +8,17 @@ class Cache: self.list_of_sets_of_indxs = [] self.actual_cache = [] - def find(self, parents_comb: typing.Set): + def find(self, parents_comb: typing.Union[typing.Set, str]): try: #print("Cache State:", self.list_of_sets_of_indxs) #print("Look For:", parents_comb) result = self.actual_cache[self.list_of_sets_of_indxs.index(parents_comb)] - print("CACHE HIT!!!!") + print("CACHE HIT!!!!", parents_comb) return result except ValueError: return None - def put(self, parents_comb: typing.Set, socim: sofc.SetOfCims): + def put(self, parents_comb: typing.Union[typing.Set, str], socim: sofc.SetOfCims): #print("Putting in cache:", parents_comb) self.list_of_sets_of_indxs.append(parents_comb) self.actual_cache.append(socim) diff --git a/main_package/classes/json_importer.py b/main_package/classes/json_importer.py index 45add07..2d11d0e 100644 --- a/main_package/classes/json_importer.py +++ b/main_package/classes/json_importer.py @@ -4,23 +4,28 @@ import pandas as pd import json import typing from abstract_importer import AbstractImporter -from line_profiler import LineProfiler class JsonImporter(AbstractImporter): """ - Implementa l'interfaccia AbstractImporter e aggiunge i metodi necessari a costruire le trajectories e la struttura della rete - del dataset in formato json con la seguente struttura: + Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext. + with the following structure: [] 0 |_ dyn.cims |_ dyn.str |_ samples |_ variabels - - :df_samples_list: lista di dataframe, ogni dataframe contiene una traj - :df_structure: dataframe contenente la struttura della rete - :df_variables: dataframe contenente le infromazioni sulle variabili della rete - + :files_path: the path that contains tha data to be imported + :samples_label: the reference key for the samples in the trajectories + :structure_label: the reference key for the structure of the network data + :variables_label: the reference key for the cardinalites of the nodes data + :time_key: the key used to identify the timestamps in each trajectory + :variables_key: the key used to identify the names of the variables in the net + :df_samples_list: a Dataframe list in which every df contains a trajectory + :df_structure: Dataframe containing the structure of the network (edges) + :df_variables: Dataframe containing the nodes cardinalities + :df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list + :sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples """ def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, @@ -38,6 +43,13 @@ class JsonImporter(AbstractImporter): super(JsonImporter, self).__init__(files_path) def import_data(self): + """ + Imports and prepares all data present needed for susequent computation. + Parameters: + void + Returns: + void + """ raw_data = self.read_json_file() #self.import_variables(raw_data) self.import_trajectories(raw_data) @@ -46,14 +58,38 @@ class JsonImporter(AbstractImporter): self.import_structure(raw_data) self.import_variables(raw_data, self.sorter) - def import_trajectories(self, raw_data: pd.DataFrame): + def import_trajectories(self, raw_data: typing.List): + """ + Imports the trajectories in the list of dicts raw_data. + Parameters: + :raw_data: List of Dicts + Returns: + void + """ self.normalize_trajectories(raw_data, 0, self.samples_label) - def import_structure(self, raw_data: pd.DataFrame): + def import_structure(self, raw_data: typing.List): + """ + Imports in a dataframe the data in the list raw_data at the key structure_label + + Parameters: + raw_data: the data + Returns: + void + """ self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label) - #TODO Attenzione l'ordine delle vars non è alfabetico come nel dataset -> agire di conseguenza - #Ordinando la vars alfabeticamente - def import_variables(self, raw_data: pd.DataFrame, sorter): + + def import_variables(self, raw_data: typing.List, sorter: typing.List): + """ + Imports the data in raw_data at the key variables_label. + Sorts the row of the dataframe df_variables using the list sorter. + + Parameters: + raw_data: the data + sorter: the list used to sort the dataframe self.df_variables + Returns: + void + """ self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label) #self.sorter = self._df_variables[self.variables_key].to_list() #self.sorter.sort() @@ -62,16 +98,16 @@ class JsonImporter(AbstractImporter): self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter) self._df_variables = self._df_variables.sort_values([self.variables_key]) self._df_variables.reset_index(inplace=True) - #print("Var Frame", self._df_variables) + print("Var Frame", self._df_variables) def read_json_file(self) -> typing.List: """ - Legge il primo file .json nel path self.filepath + Reads the first json file in the path self.filePath Parameters: void Returns: - :data: il contenuto del file json + data: the contents of the json file """ try: @@ -84,39 +120,55 @@ class JsonImporter(AbstractImporter): except ValueError as err: print(err.args) - def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame: + def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: """ - Estrae i dati innestati di un livello, presenti nel dataset raw_data, - presenti nel json array all'indice indx nel json object key + Extracts the one-level nested data in the list raw_data at the index indx at the key key Parameters: - :raw_data: il dataset json completo - :indx: l'indice del json array da cui estrarre i dati - :key: il json object da cui estrarre i dati + raw_data: List of Dicts + indx: The index of the array from which the data have to be extracted + key: the key for the Dicts from which exctract data Returns: - Il dataframe contenente i dati normalizzati + a normalized dataframe """ return pd.DataFrame(raw_data[indx][key]) - def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str): + def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str): """ - Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key. - Aggiunge le traj estratte nella lista di dataframe self.df_samples_list + Extracts the traj in raw_data at the index index at the key trajectories key. + Adds the extracted traj in the dataframe list self._df_samples_list. + Initializes the list self.sorter. Parameters: - void + raw_data: the data + indx: the index of the array from which extract data + trajectories_key: the key of the trajectories objects Returns: void """ - self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]] - #for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]): - #self.df_samples_list.append(pd.DataFrame(sample)) - #self.sorter = list(self.df_samples_list[0].columns.values)[1:] #TODO Qui ci deve essere la colonna NAME ordinata alfabeticamente + dataframe = pd.DataFrame + smps = raw_data[indx][trajectories_key] + self.df_samples_list = [dataframe(sample) for sample in smps] + columns_header = list(self.df_samples_list[0].columns.values) + columns_header.remove(self.time_key) + self.sorter = columns_header def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str, columns_header: typing.List, shifted_cols_header: typing.List) \ -> pd.DataFrame: + """ + Computes the difference between each value present in th time column. + Copies and shift by one position up all the values present in the remaining columns. + Parameters: + sample_frame: the traj to be processed + time_header_label: the label for the times + columns_header: the original header of sample_frame + shifted_cols_header: a copy of columns_header with changed names of the contents + Returns: + sample_frame: the processed dataframe + + """ sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1) shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') #print(shifted_cols) @@ -126,15 +178,25 @@ class JsonImporter(AbstractImporter): return sample_frame def compute_row_delta_in_all_samples_frames(self, time_header_label: str): - columns_header = list(self.df_samples_list[0].columns.values) + """ + Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list. + Concatenates the result in the dataframe concatanated_samples + + Parameters: + time_header_label: the label of the time column + Returns: + void + """ + """columns_header = list(self.df_samples_list[0].columns.values) columns_header.remove('Time') - self.sorter = columns_header + self.sorter = columns_header""" shifted_cols_header = [s + "S" for s in self.sorter] compute_row_delta = self.compute_row_delta_sigle_samples_frame """for indx, sample in enumerate(self.df_samples_list): self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample, time_header_label, self.sorter, shifted_cols_header)""" - self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) for sample in self.df_samples_list] + self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) + for sample in self.df_samples_list] self._concatenated_samples = pd.concat(self.df_samples_list) complete_header = self.sorter[:] complete_header.insert(0,'Time') @@ -146,10 +208,11 @@ class JsonImporter(AbstractImporter): def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List: """ Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array + Builds a List containing the columns of dataframe and converts them to a numpy array. Parameters: - :data_frame: il dataframe da cui estrarre e convertire le colonne + :data_frame: the dataframe from which the columns have to be extracted and converted Returns: - :columns_list: la lista contenente le colonne convertite in numpyarray + :columns_list: the resulting list of numpy arrays """ columns_list = [data_frame[column].to_numpy() for column in data_frame] @@ -159,7 +222,7 @@ class JsonImporter(AbstractImporter): def clear_concatenated_frame(self): """ - Rimuove tutti i valori contenuti nei data_frames presenti in df_samples_list + Removes all values in the dataframe concatenated_samples Parameters: void Returns: @@ -168,6 +231,9 @@ class JsonImporter(AbstractImporter): self._concatenated_samples = self._concatenated_samples.iloc[0:0] def clear_data_frame_list(self): + """ + Removes all values present in the dataframes in the list df_samples_list + """ for indx in range(len(self.df_samples_list)): # Le singole traj non servono più #TODO usare list comprens self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] @@ -180,7 +246,6 @@ class JsonImporter(AbstractImporter): cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) return cims_for_all_vars - @property def concatenated_samples(self): return self._concatenated_samples diff --git a/main_package/classes/network_graph.py b/main_package/classes/network_graph.py index 4d31c00..08d986d 100644 --- a/main_package/classes/network_graph.py +++ b/main_package/classes/network_graph.py @@ -3,7 +3,7 @@ import networkx as nx import numpy as np -class NetworkGraph(): +class NetworkGraph: """ Rappresenta il grafo che contiene i nodi e gli archi presenti nell'oggetto Structure graph_struct. Ogni nodo contine la label node_id, al nodo è anche associato un id numerico progressivo indx che rappresenta la posizione @@ -17,9 +17,9 @@ class NetworkGraph(): def __init__(self, graph_struct): self.graph_struct = graph_struct self.graph = nx.DiGraph() - self._nodes_indexes = self.graph_struct.list_of_nodes_indexes() - self._nodes_labels = self.graph_struct.list_of_nodes_labels() - self._nodes_values = self.graph_struct.nodes_values() + self._nodes_indexes = self.graph_struct.nodes_indexes + self._nodes_labels = self.graph_struct.nodes_labels + self._nodes_values = self.graph_struct.nodes_values self.aggregated_info_about_nodes_parents = None self._fancy_indexing = None self._time_scalar_indexing_structure = None @@ -30,7 +30,7 @@ class NetworkGraph(): def init_graph(self): self.add_nodes(self._nodes_labels) - self.add_edges(self.graph_struct.list_of_edges()) + self.add_edges(self.graph_struct.edges) self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() self._fancy_indexing = self.build_fancy_indexing_structure(0) self.build_scalar_indexing_structures() @@ -41,7 +41,7 @@ class NetworkGraph(): def add_nodes(self, list_of_nodes): #self.graph.add_nodes_from(list_of_nodes) nodes_indxs = self._nodes_indexes - nodes_vals = self.graph_struct.nodes_values() + nodes_vals = self.graph_struct.nodes_values pos = 0 #print("LIST OF NODES", list_of_nodes) for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): @@ -134,7 +134,7 @@ class NetworkGraph(): #parents_indexes_list = self._fancy_indexing """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))""" - nodes_indxs = self.graph_struct.list_of_nodes_indexes() + nodes_indxs = self._nodes_indexes #print("FINDXING", self._fancy_indexing) #print("Nodes Indxs", nodes_indxs) self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) @@ -145,7 +145,7 @@ class NetworkGraph(): nodes_number = self.graph_struct.total_variables_number """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))""" - nodes_indxs = self.graph_struct.list_of_nodes_indexes() + nodes_indxs = self._nodes_indexes self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) for node_indx, p_indxs in zip(nodes_indxs, self._fancy_indexing)] diff --git a/main_package/classes/sample_path.py b/main_package/classes/sample_path.py index e5b6f63..570deac 100644 --- a/main_package/classes/sample_path.py +++ b/main_package/classes/sample_path.py @@ -6,17 +6,26 @@ import structure as st class SamplePath: """ - Contiene l'aggregazione di una o più traiettorie e la struttura della rete. - Ha il compito dato di costruire tutte gli oggetti Trajectory e l'oggetto Structure - a partire dai dataframe contenuti in self.importer + Aggregates all the informations about the trajectories, the real structure of the sampled net and variables + cardinalites. + Has the task of creating the objects that will contain the mentioned data. + + :files_path: the path that contains tha data to be imported + :samples_label: the reference key for the samples in the trajectories + :structure_label: the reference key for the structure of the network data + :variables_label: the reference key for the cardinalites of the nodes data + :time_key: the key used to identify the timestamps in each trajectory + :variables_key: the key used to identify the names of the variables in the net + + :importer: the Importer objects that will import ad process data + :trajectories: the Trajectory object that will contain all the concatenated trajectories + :structure: the Structure Object that will contain all the structurral infos about the net + :total_variables_count: the number of variables in the net - - :importer: l'oggetto Importer che ha il compito di caricare i dataset - :trajectories: lista di oggetti Trajectories - :structure: oggetto Structure """ - def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key): + def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, + variables_key: str): self.importer = imp.JsonImporter(files_path, samples_label, structure_label, variables_label, time_key, variables_key) self._trajectories = None @@ -24,6 +33,15 @@ class SamplePath: self.total_variables_count = None def build_trajectories(self): + """ + Builds the Trajectory object that will contain all the trajectories. + Clears all the unsed dataframes in Importer Object + + Parameters: + void + Returns: + void + """ self.importer.import_data() self._trajectories = \ tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples), @@ -32,12 +50,19 @@ class SamplePath: self.importer.clear_concatenated_frame() def build_structure(self): + """ + Builds the Structure object that aggregates all the infos about the net. + Parameters: + void + Returns: + void + """ self.total_variables_count = len(self.importer.sorter) - labels = self.importer._df_variables['Name'].to_list() + labels = self.importer.variables[self.importer.variables_key].to_list() #print("SAMPLE PATH LABELS",labels) - indxs = self.importer._df_variables.index.to_numpy() - vals = self.importer._df_variables['Value'].to_numpy() - edges = list(self.importer._df_structure.to_records(index=False)) + indxs = self.importer.variables.index.to_numpy() + vals = self.importer.variables['Value'].to_numpy() + edges = list(self.importer.structure.to_records(index=False)) self._structure = st.Structure(labels, indxs, vals, edges, self.total_variables_count) diff --git a/main_package/classes/structure.py b/main_package/classes/structure.py index c588e9f..f4f5322 100644 --- a/main_package/classes/structure.py +++ b/main_package/classes/structure.py @@ -1,68 +1,78 @@ +import typing as ty import numpy as np class Structure: """ - Contiene tutte il informazioni sulla struttura della rete (connessione dei nodi, valori assumibili dalle variabili) + Contains all the infos about the network structure(nodes names, nodes caridinalites, edges...) - :structure_frame: il dataframe contenente le connessioni dei nodi della rete - :variables_frame: il data_frame contenente i valori assumibili dalle variabili e si suppone il corretto ordinamento - rispetto alle colonne del dataset + :nodes_labels_list: the symbolic names of the variables + :nodes_indexes_arr: the indexes of the nodes + :nodes_vals_arr: the cardinalites of the nodes + :edges_list: the edges of the network + :total_variables_number: the total number of variables in the net """ - def __init__(self, nodes_label_list, node_indexes_arr, nodes_vals_arr, edges_list, total_variables_number): - #self.structure_frame = structure - #self.variables_frame = variables - self.nodes_labels_list = nodes_label_list - self.nodes_indexes_arr = node_indexes_arr - self.nodes_vals_arr = nodes_vals_arr - self.edges_list = edges_list - self.total_variables_number = total_variables_number - #self.name_label = variables.columns.values[0] - #self.value_label = variables.columns.values[1] - - def list_of_edges(self): + def __init__(self, nodes_label_list: ty.List, node_indexes_arr: np.array, nodes_vals_arr: np.array, + edges_list: ty.List, total_variables_number: int): + self._nodes_labels_list = nodes_label_list + self._nodes_indexes_arr = node_indexes_arr + self._nodes_vals_arr = nodes_vals_arr + self._edges_list = edges_list + self._total_variables_number = total_variables_number + + @property + def edges(self): #records = self.structure_frame.to_records(index=False) #edges_list = list(records) - return self.edges_list + return self._edges_list - def list_of_nodes_labels(self): - return self.nodes_labels_list + @property + def nodes_labels(self): + return self._nodes_labels_list - def list_of_nodes_indexes(self): - return self.nodes_indexes_arr + @property + def nodes_indexes(self): + return self._nodes_indexes_arr - def get_node_id(self, node_indx): - return self.nodes_labels_list[node_indx] + @property + def nodes_values(self): + return self._nodes_vals_arr - def get_node_indx(self, node_id): - return self.nodes_indexes_arr[self.nodes_labels_list.index(node_id)] + @property + def total_variables_number(self): + return self._total_variables_number - def get_positional_node_indx(self, node_id): - return self.nodes_labels_list.index(node_id) + def get_node_id(self, node_indx: int): + return self._nodes_labels_list[node_indx] - def get_states_number(self, node): - #print("node", node) - return self.nodes_vals_arr[self.get_positional_node_indx(node)] + def get_node_indx(self, node_id: str): + pos_indx = self._nodes_labels_list.index(node_id) + return self._nodes_indexes_arr[pos_indx] - def get_states_number_by_indx(self, node_indx): - #print(self.value_label) - #print("Node indx", node_indx) - return self.nodes_vals_arr[node_indx] + def get_positional_node_indx(self, node_id: str): + return self._nodes_labels_list.index(node_id) - def nodes_values(self): - return self.nodes_vals_arr + def get_states_number(self, node: str): + pos_indx = self._nodes_labels_list.index(node) + return self._nodes_vals_arr[pos_indx] - def total_variables_number(self): - return self.total_variables_number + def get_states_number_by_indx(self, node_indx: int): + #print(self.value_label) + #print("Node indx", node_indx) + return self._nodes_vals_arr[node_indx] def __repr__(self): - return "Variables:\n" + str(self.variables_frame) + "\nEdges: \n" + str(self.structure_frame) + return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ + "\nEdges: \n" + str(self._edges_list) def __eq__(self, other): """Overrides the default implementation""" if isinstance(other, Structure): - return self.structure_frame.equals(other.structure_frame) and \ - self.variables_frame.equals(other.variables_frame) + return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ + np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ + np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ + set(self._edges_list) == set(other._edges_list) + return NotImplemented diff --git a/main_package/classes/structure_estimator.py b/main_package/classes/structure_estimator.py index 3da2869..3fe954d 100644 --- a/main_package/classes/structure_estimator.py +++ b/main_package/classes/structure_estimator.py @@ -15,13 +15,13 @@ class StructureEstimator: def __init__(self, sample_path, exp_test_alfa, chi_test_alfa): self.sample_path = sample_path - self.nodes = np.array(self.sample_path.structure.list_of_nodes_labels()) + self.nodes = np.array(self.sample_path.structure.nodes_labels) #print("NODES", self.nodes) - self.nodes_vals = self.sample_path.structure.nodes_vals_arr - self.nodes_indxs = self.sample_path.structure.nodes_indexes_arr + self.nodes_vals = self.sample_path.structure.nodes_values + self.nodes_indxs = self.sample_path.structure.nodes_indexes #self.nodes_indxs = np.array(range(0,4)) #print("INDXS", self.nodes_indxs) - self.complete_graph = self.build_complete_graph(self.sample_path.structure.list_of_nodes_labels()) + self.complete_graph = self.build_complete_graph(self.sample_path.structure.nodes_labels) self.exp_test_sign = exp_test_alfa self.chi_test_alfa = chi_test_alfa self.cache = ch.Cache() @@ -53,11 +53,13 @@ class StructureEstimator: cims_filter = sorted_parents != test_parent #print("PARENTS NO FROM MASK", cims_filter) if not p_set: + print("EMPTY PSET TRYING TO FIND", test_child) sofc1 = self.cache.find(test_child) else: sofc1 = self.cache.find(set(p_set)) if not sofc1: + print("CACHE MISSS SOFC1") bool_mask1 = np.isin(self.nodes,complete_info) #print("Bool mask 1", bool_mask1) l1 = list(self.nodes[bool_mask1]) @@ -88,6 +90,7 @@ class StructureEstimator: #p_set.append(test_parent) p_set.insert(0, test_parent) if p_set: + print("FULL PSET TRYING TO FIND", p_set) #p_set.append(test_parent) #print("PSET ", p_set) #set_p_set = set(p_set) @@ -102,6 +105,7 @@ class StructureEstimator: p2.compute_parameters_for_node(test_child) sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]""" if not sofc2: + print("Cache MISSS SOFC2") complete_info.append(test_parent) bool_mask2 = np.isin(self.nodes, complete_info) #print("BOOL MASK 2",bool_mask2) diff --git a/main_package/classes/trajectory.py b/main_package/classes/trajectory.py index b7ce12d..d134927 100644 --- a/main_package/classes/trajectory.py +++ b/main_package/classes/trajectory.py @@ -4,13 +4,13 @@ import numpy as np class Trajectory: """ - Rappresenta una traiettoria come un numpy_array contenente n-ple (indx, T_k,S_i,.....,Sj) - Offre i metodi utili alla computazione sulla struttura stessa. + Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix + of ints. - Una Trajectory viene costruita a partire da una lista di numpyarray dove ogni elemento rappresenta una colonna - della traj - - :actual_trajectory: il numpy_array contenente la successione di n-ple (indx, T_k,S_i,.....,Sj) + :list_of_columns: the list containing the times array and values matrix + :original_cols_numb: total number of cols in the data + :actual_trajectory: the trajectory containing also the duplicated and shifted values + :times: the array containing the time deltas """ @@ -23,10 +23,22 @@ class Trajectory: @property def trajectory(self): + """ + Parameters: + void + Returns: + a numpy matrix containing ONLY the original columns values, not the shifted ones + """ return self._actual_trajectory[:, :self.original_cols_number] @property def complete_trajectory(self): + """ + Parameters: + void + Returns: + a numpy matrix containing all the values + """ return self._actual_trajectory @property diff --git a/main_package/tests/test_json_importer.py b/main_package/tests/test_json_importer.py index d48420e..f28c2d6 100644 --- a/main_package/tests/test_json_importer.py +++ b/main_package/tests/test_json_importer.py @@ -5,6 +5,8 @@ import numpy as np import pandas as pd import json_importer as ji +from line_profiler import LineProfiler + import os import json @@ -44,6 +46,7 @@ class TestJsonImporter(unittest.TestCase): def test_normalize_trajectories(self): j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() + #print(raw_data) j1.normalize_trajectories(raw_data, 0, j1.samples_label) self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label])) self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter) @@ -51,7 +54,7 @@ class TestJsonImporter(unittest.TestCase): def test_normalize_trajectories_wrong_indx(self): j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() - self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 1, j1.samples_label) + self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1.samples_label) def test_normalize_trajectories_wrong_key(self): j1 = ji.JsonImporter('../data', 'sample', 'dyn.str', 'variables', 'Time', 'Name') @@ -77,6 +80,7 @@ class TestJsonImporter(unittest.TestCase): j1.import_trajectories(raw_data) j1.compute_row_delta_in_all_samples_frames(j1.time_key) self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values)) + self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1.time_key) def test_clear_data_frame_list(self): j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') @@ -112,7 +116,12 @@ class TestJsonImporter(unittest.TestCase): def test_import_data(self): j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') - j1.import_data() + lp = LineProfiler() + + lp_wrapper = lp(j1.import_data) + lp_wrapper() + lp.print_stats() + #j1.import_data() self.assertEqual(list(j1.variables[j1.variables_key]), list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1.variables_key]) + 1])) print(j1.variables) diff --git a/main_package/tests/sample_path_test.py b/main_package/tests/test_sample_path.py similarity index 100% rename from main_package/tests/sample_path_test.py rename to main_package/tests/test_sample_path.py diff --git a/main_package/tests/test_structure.py b/main_package/tests/test_structure.py index 1c18166..56f45ddc 100644 --- a/main_package/tests/test_structure.py +++ b/main_package/tests/test_structure.py @@ -11,71 +11,74 @@ import parameters_estimator as pe class TestStructure(unittest.TestCase): - def setUp(self): - self.structure_frame = pd.DataFrame([{"From":"X","To":"Z"}, {"From":"Y","To":"Z"}, - {"From":"Z","To":"Y"} ]) - self.variables_frame = pd.DataFrame([{"Name":"X","Value":3},{"Name":"Y","Value":3},{"Name":"Z","Value":3}]) + @classmethod + def setUpClass(cls): + cls.labels = ['X','Y','Z'] + cls.indxs = np.array([0,1,2]) + cls.vals = np.array([3,3,3]) + cls.edges = [('X','Z'),('Y','Z'), ('Z','Y')] + cls.vars_numb = len(cls.labels) def test_init(self): - s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) - self.assertTrue(self.structure_frame.equals(s1.structure_frame)) - self.assertTrue(self.variables_frame.equals(s1.variables_frame)) - self.assertEqual(self.variables_frame.columns.values[0], s1.name_label) - self.assertEqual(self.variables_frame.columns.values[1], s1.value_label) - #print(len(self.variables_frame.index)) - self.assertEqual(len(self.variables_frame.index), s1.total_variables_number) - - def test_list_of_edges(self): - s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) - records = self.structure_frame.to_records(index=False) - result = list(records) - for e1, e2 in zip(result, s1.list_of_edges()): - self.assertEqual(e1, e2) - - def test_list_of_nodes_labels(self): - s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) - self.assertEqual(list(self.variables_frame['Name']), s1.list_of_nodes_labels()) + s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) + self.assertListEqual(self.labels,s1.nodes_labels) + self.assertTrue(np.array_equal(self.indxs, s1.nodes_indexes)) + self.assertTrue(np.array_equal(self.vals, s1.nodes_values)) + self.assertListEqual(self.edges, s1.edges) + self.assertEqual(self.vars_numb, s1.total_variables_number) def test_get_node_id(self): - s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) - for indx, var in enumerate(list(self.variables_frame['Name'])): + s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) + for indx, var in enumerate(self.labels): self.assertEqual(var, s1.get_node_id(indx)) def test_get_node_indx(self): - filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index) - #print(filtered_frame) - s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index)) - for indx, var in zip(filtered_frame.index, filtered_frame['Name']): + l2 = self.labels[:] + l2.remove('Y') + i2 = self.indxs.copy() + np.delete(i2, 1) + v2 = self.vals.copy() + np.delete(v2, 1) + e2 = [('X','Z')] + n2 = self.vars_numb - 1 + s1 = st.Structure(l2, i2, v2, e2, n2) + for indx, var in zip(i2, l2): self.assertEqual(indx, s1.get_node_indx(var)) - def test_list_of_node_indxs(self): - filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index) - # print(filtered_frame) - s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index)) - - for indx1, indx2 in zip(filtered_frame.index, s1.list_of_nodes_indexes()): - self.assertEqual(indx1, indx2) - def test_get_positional_node_indx(self): - filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index) - # print(filtered_frame) - s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index)) - for indx, var in enumerate(s1.list_of_nodes_labels()): + l2 = self.labels[:] + l2.remove('Y') + i2 = self.indxs.copy() + np.delete(i2, 1) + v2 = self.vals.copy() + np.delete(v2, 1) + e2 = [('X', 'Z')] + n2 = self.vars_numb - 1 + s1 = st.Structure(l2, i2, v2, e2, n2) + for indx, var in enumerate(s1.nodes_labels): self.assertEqual(indx, s1.get_positional_node_indx(var)) def test_get_states_number(self): - s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) - for indx, row in self.variables_frame.iterrows(): - self.assertEqual(row[1], s1.get_states_number(row[0])) - - def test_get_states_numeber_by_indx(self): + l2 = self.labels[:] + l2.remove('Y') + i2 = self.indxs.copy() + np.delete(i2, 1) + v2 = self.vals.copy() + np.delete(v2, 1) + e2 = [('X', 'Z')] + n2 = self.vars_numb - 1 + s1 = st.Structure(l2, i2, v2, e2, n2) + for val, node in zip(v2, l2): + self.assertEqual(val, s1.get_states_number(node)) +#TODO FORSE QUESTO TEST NON serve verificare se questo metodo sia davvero utile + """def test_get_states_numeber_by_indx(self): s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) for indx, row in self.variables_frame.iterrows(): self.assertEqual(row[1], s1.get_states_number_by_indx(indx)) def test_new_init(self): #self.variables_frame.drop(self.variables_frame[(self.variables_frame['Name'] == 'Y')].index, inplace=True) - """labels = self.variables_frame['Name'].to_list() + labels = self.variables_frame['Name'].to_list() indxs = self.variables_frame.index.to_numpy() vals = self.variables_frame['Value'].to_numpy() edges = list(self.structure_frame.to_records(index=False)) @@ -103,7 +106,7 @@ class TestStructure(unittest.TestCase): array([3, 9]) array([1, 2]) array([4, 1, 2]) - """ + sp1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') sp1.build_trajectories() sp1.build_structure() @@ -122,7 +125,7 @@ class TestStructure(unittest.TestCase): #print(p1.sets_of_cims_struct.get_cims_of_node(0,[1,0])) print(p1.sets_of_cims_struct.sets_of_cims[1].actual_cims) #print(p1.sets_of_cims_struct.sets_of_cims[2].get_cims_where_parents_except_last_are_in_state(np.array([0]))) - #print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs) + #print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs)""" if __name__ == '__main__': diff --git a/main_package/tests/test_structure_estimator.py b/main_package/tests/test_structure_estimator.py index fb30ee9..c3f56f3 100644 --- a/main_package/tests/test_structure_estimator.py +++ b/main_package/tests/test_structure_estimator.py @@ -4,6 +4,7 @@ from line_profiler import LineProfiler import sample_path as sp import structure_estimator as se + class TestStructureEstimator(unittest.TestCase): @classmethod @@ -31,7 +32,7 @@ class TestStructureEstimator(unittest.TestCase): lp.print_stats() #se1.ctpc_algorithm() print(se1.complete_graph.edges) - print(self.s1.structure.list_of_edges()) + print(self.s1.structure.edges) def aux_test_complete_test(self, estimator, test_par, test_child, p_set): estimator.complete_test(test_par, test_child, p_set) diff --git a/main_package/tests/test_trajectory.py b/main_package/tests/test_trajectory.py index a29de9c..09a776e 100644 --- a/main_package/tests/test_trajectory.py +++ b/main_package/tests/test_trajectory.py @@ -8,7 +8,7 @@ class TestTrajectory(unittest.TestCase): def test_init(self): cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)] - t1 = tr.Trajectory(cols_list, len(cols_list)) + t1 = tr.Trajectory(cols_list, len(cols_list) - 2) self.assertTrue(np.array_equal(cols_list[0], t1.times)) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1])) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2])) @@ -19,6 +19,21 @@ class TestTrajectory(unittest.TestCase): cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])] self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list)) + def test_complete_trajectory(self): + cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)] + t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + complete = np.column_stack((cols_list[1], cols_list[2])) + self.assertTrue(np.array_equal(t1.complete_trajectory, complete)) + + def test_trajectory(self): + cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)] + t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + self.assertTrue(np.array_equal(cols_list[1], t1.trajectory.ravel())) + + def test_times(self): + cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)] + t1 = tr.Trajectory(cols_list, len(cols_list) - 2) + self.assertTrue(np.array_equal(cols_list[0], t1.times)) if __name__ == '__main__':