Refactor test_structure and test_jsonImporter

4 years ago · b374d60117
parent 864ffbd319
commit b374d60117
12 changed files with 308 additions and 164 deletions
--- a/main_package/classes/cache.py
+++ b/main_package/classes/cache.py
@ -8,17 +8,17 @@ class Cache:
        self.list_of_sets_of_indxs = []
        self.actual_cache = []
-    def find(self, parents_comb: typing.Set):
+    def find(self, parents_comb: typing.Union[typing.Set, str]):
            try:
                #print("Cache State:", self.list_of_sets_of_indxs)
                #print("Look For:", parents_comb)
                result = self.actual_cache[self.list_of_sets_of_indxs.index(parents_comb)]
-                print("CACHE HIT!!!!")
+                print("CACHE HIT!!!!", parents_comb)
                return result
            except ValueError:
                return None
-    def put(self, parents_comb: typing.Set, socim: sofc.SetOfCims):
+    def put(self, parents_comb: typing.Union[typing.Set, str], socim: sofc.SetOfCims):
        #print("Putting in cache:", parents_comb)
        self.list_of_sets_of_indxs.append(parents_comb)
        self.actual_cache.append(socim)
--- a/main_package/classes/json_importer.py
+++ b/main_package/classes/json_importer.py
@ -4,23 +4,28 @@ import pandas as pd
 import json
 import typing
 from abstract_importer import AbstractImporter
 from line_profiler import LineProfiler
 class JsonImporter(AbstractImporter):
    """
-    Implementa l'interfaccia AbstractImporter e aggiunge i metodi necessari a costruire le trajectories e la struttura della rete
+    Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext.
-    del dataset in formato json con la seguente struttura:
+    with the following structure:
    [] 0
        |_ dyn.cims
        |_ dyn.str
        |_ samples
        |_ variabels
-
+    :files_path: the path that contains tha data to be imported
-    :df_samples_list: lista di dataframe, ogni dataframe contiene una traj
+    :samples_label: the reference key for the samples in the trajectories
-    :df_structure: dataframe contenente la struttura della rete
+    :structure_label: the reference key for the structure of the network data
-    :df_variables: dataframe contenente le infromazioni sulle variabili della rete
+    :variables_label: the reference key for the cardinalites of the nodes data
-
+    :time_key: the key used to identify the timestamps in each trajectory
    :variables_key: the key used to identify the names of the variables in the net
    :df_samples_list: a Dataframe list in which every df contains a trajectory
    :df_structure: Dataframe containing the structure of the network (edges)
    :df_variables: Dataframe containing the nodes cardinalities
    :df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list
    :sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples
    """
    def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
@ -38,6 +43,13 @@ class JsonImporter(AbstractImporter):
        super(JsonImporter, self).__init__(files_path)
    def import_data(self):
        """
        Imports and prepares all data present needed for susequent computation.
        Parameters:
            void
        Returns:
            void
        """
        raw_data = self.read_json_file()
        #self.import_variables(raw_data)
        self.import_trajectories(raw_data)
@ -46,14 +58,38 @@ class JsonImporter(AbstractImporter):
        self.import_structure(raw_data)
        self.import_variables(raw_data, self.sorter)
-    def import_trajectories(self, raw_data: pd.DataFrame):
+    def import_trajectories(self, raw_data: typing.List):
        """
        Imports the trajectories in the list of dicts raw_data.
        Parameters:
            :raw_data: List of Dicts
        Returns:
            void
        """
        self.normalize_trajectories(raw_data, 0, self.samples_label)
-    def import_structure(self, raw_data: pd.DataFrame):
+    def import_structure(self, raw_data: typing.List):
        """
        Imports in a dataframe the data in the list raw_data at the key structure_label
        Parameters:
            raw_data: the data
        Returns:
            void
        """
        self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
-    #TODO Attenzione l'ordine delle vars non è alfabetico come nel dataset -> agire di conseguenza
+
-    #Ordinando la vars alfabeticamente
+    def import_variables(self, raw_data: typing.List, sorter: typing.List):
-    def import_variables(self, raw_data: pd.DataFrame, sorter):
+        """
        Imports the data in raw_data at the key variables_label.
        Sorts the row of the dataframe df_variables using the list sorter.
        Parameters:
            raw_data: the data
            sorter: the list used to sort the dataframe self.df_variables
        Returns:
            void
        """
        self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
        #self.sorter = self._df_variables[self.variables_key].to_list()
        #self.sorter.sort()
@ -62,16 +98,16 @@ class JsonImporter(AbstractImporter):
        self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
        self._df_variables = self._df_variables.sort_values([self.variables_key])
        self._df_variables.reset_index(inplace=True)
-        #print("Var Frame", self._df_variables)
+        print("Var Frame", self._df_variables)
    def read_json_file(self) -> typing.List:
        """
-        Legge il primo file .json nel path self.filepath
+        Reads the first json file in the path self.filePath
        Parameters:
              void
        Returns:
-              :data: il contenuto del file json
+              data: the contents of the json file
        """
        try:
@ -84,39 +120,55 @@ class JsonImporter(AbstractImporter):
        except ValueError as err:
            print(err.args)
-    def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame:
+    def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
        """
-        Estrae i dati innestati di un livello, presenti nel dataset raw_data,
+        Extracts the one-level nested data in the list raw_data at the index indx at the key key
        presenti nel json array all'indice indx nel json object key
        Parameters:
-            :raw_data: il dataset json completo
+            raw_data: List of Dicts
-            :indx: l'indice del json array da cui estrarre i dati
+            indx: The index of the array from which the data have to be extracted
-            :key: il json object da cui estrarre i dati
+            key: the key for the Dicts from which exctract data
        Returns:
-            Il dataframe contenente i dati normalizzati
+            a normalized dataframe
        """
        return pd.DataFrame(raw_data[indx][key])
-    def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str):
+    def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str):
        """
-        Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key.
+        Extracts the traj in raw_data at the index index at the key trajectories key.
-        Aggiunge le traj estratte nella lista di dataframe self.df_samples_list
+        Adds the extracted traj in the dataframe list self._df_samples_list.
        Initializes the list self.sorter.
        Parameters:
-            void
+            raw_data: the data
            indx: the index of the array from which extract data
            trajectories_key: the key of the trajectories objects
        Returns:
            void
        """
-        self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]]
+        dataframe = pd.DataFrame
-        #for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]):
+        smps = raw_data[indx][trajectories_key]
-            #self.df_samples_list.append(pd.DataFrame(sample))
+        self.df_samples_list = [dataframe(sample) for sample in smps]
-        #self.sorter = list(self.df_samples_list[0].columns.values)[1:] #TODO Qui ci deve essere la colonna NAME ordinata alfabeticamente
+        columns_header = list(self.df_samples_list[0].columns.values)
        columns_header.remove(self.time_key)
        self.sorter = columns_header
    def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
                                              columns_header: typing.List, shifted_cols_header: typing.List) \
            -> pd.DataFrame:
        """
        Computes the difference between each value present in th time column.
        Copies and shift by one position up all the values present in the remaining columns.
        Parameters:
            sample_frame: the traj to be processed
            time_header_label: the label for the times
            columns_header: the original header of sample_frame
            shifted_cols_header: a copy of columns_header with changed names of the contents
        Returns:
            sample_frame: the processed dataframe
        """
        sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
        shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
        #print(shifted_cols)
@ -126,15 +178,25 @@ class JsonImporter(AbstractImporter):
        return sample_frame
    def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
-        columns_header = list(self.df_samples_list[0].columns.values)
+        """
        Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list.
        Concatenates the result in the dataframe concatanated_samples
        Parameters:
            time_header_label: the label of the time column
        Returns:
            void
        """
        """columns_header = list(self.df_samples_list[0].columns.values)
        columns_header.remove('Time')
-        self.sorter = columns_header
+        self.sorter = columns_header"""
        shifted_cols_header = [s + "S" for s in self.sorter]
        compute_row_delta = self.compute_row_delta_sigle_samples_frame
        """for indx, sample in enumerate(self.df_samples_list):
            self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample,
                                                        time_header_label, self.sorter, shifted_cols_header)"""
-        self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) for sample in self.df_samples_list]
+        self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header)
                                for sample in self.df_samples_list]
        self._concatenated_samples = pd.concat(self.df_samples_list)
        complete_header = self.sorter[:]
        complete_header.insert(0,'Time')
@ -146,10 +208,11 @@ class JsonImporter(AbstractImporter):
    def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
        """
        Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array
        Builds a List containing the columns of dataframe and converts them to a numpy array.
        Parameters:
-            :data_frame: il dataframe da cui estrarre e convertire le colonne
+            :data_frame: the dataframe from which the columns have to be extracted and converted
        Returns:
-            :columns_list: la lista contenente le colonne convertite in numpyarray
+            :columns_list: the resulting list of numpy arrays
        """
        columns_list = [data_frame[column].to_numpy() for column in data_frame]
@ -159,7 +222,7 @@ class JsonImporter(AbstractImporter):
    def clear_concatenated_frame(self):
        """
-        Rimuove tutti i valori contenuti nei data_frames presenti in df_samples_list
+        Removes all values in the dataframe concatenated_samples
        Parameters:
            void
        Returns:
@ -168,6 +231,9 @@ class JsonImporter(AbstractImporter):
        self._concatenated_samples = self._concatenated_samples.iloc[0:0]
    def clear_data_frame_list(self):
        """
        Removes all values present in the dataframes in the list df_samples_list
        """
        for indx in range(len(self.df_samples_list)):  # Le singole traj non servono più #TODO usare list comprens
            self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
@ -180,7 +246,6 @@ class JsonImporter(AbstractImporter):
                cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
        return cims_for_all_vars
    @property
    def concatenated_samples(self):
        return self._concatenated_samples
--- a/main_package/classes/network_graph.py
+++ b/main_package/classes/network_graph.py
@ -3,7 +3,7 @@ import networkx as nx
 import numpy as np
-class NetworkGraph():
+class NetworkGraph:
    """
    Rappresenta il grafo che contiene i nodi e gli archi presenti nell'oggetto Structure graph_struct.
    Ogni nodo contine la label node_id, al nodo è anche associato un id numerico progressivo indx che rappresenta la posizione
@ -17,9 +17,9 @@ class NetworkGraph():
    def __init__(self, graph_struct):
        self.graph_struct = graph_struct
        self.graph = nx.DiGraph()
-        self._nodes_indexes = self.graph_struct.list_of_nodes_indexes()
+        self._nodes_indexes = self.graph_struct.nodes_indexes
-        self._nodes_labels = self.graph_struct.list_of_nodes_labels()
+        self._nodes_labels = self.graph_struct.nodes_labels
-        self._nodes_values = self.graph_struct.nodes_values()
+        self._nodes_values = self.graph_struct.nodes_values
        self.aggregated_info_about_nodes_parents = None
        self._fancy_indexing = None
        self._time_scalar_indexing_structure = None
@ -30,7 +30,7 @@ class NetworkGraph():
    def init_graph(self):
        self.add_nodes(self._nodes_labels)
-        self.add_edges(self.graph_struct.list_of_edges())
+        self.add_edges(self.graph_struct.edges)
        self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
        self._fancy_indexing = self.build_fancy_indexing_structure(0)
        self.build_scalar_indexing_structures()
@ -41,7 +41,7 @@ class NetworkGraph():
    def add_nodes(self, list_of_nodes):
        #self.graph.add_nodes_from(list_of_nodes)
        nodes_indxs = self._nodes_indexes
-        nodes_vals = self.graph_struct.nodes_values()
+        nodes_vals = self.graph_struct.nodes_values
        pos = 0
        #print("LIST OF NODES", list_of_nodes)
        for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
@ -134,7 +134,7 @@ class NetworkGraph():
        #parents_indexes_list = self._fancy_indexing
        """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
                self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))"""
-        nodes_indxs = self.graph_struct.list_of_nodes_indexes()
+        nodes_indxs = self._nodes_indexes
        #print("FINDXING", self._fancy_indexing)
        #print("Nodes Indxs", nodes_indxs)
        self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
@ -145,7 +145,7 @@ class NetworkGraph():
        nodes_number = self.graph_struct.total_variables_number
        """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
            self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))"""
-        nodes_indxs = self.graph_struct.list_of_nodes_indexes()
+        nodes_indxs = self._nodes_indexes
        self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
                                      for node_indx, p_indxs in zip(nodes_indxs,
                                                                    self._fancy_indexing)]
--- a/main_package/classes/sample_path.py
+++ b/main_package/classes/sample_path.py
@ -6,17 +6,26 @@ import structure as st
 class SamplePath:
    """
-    Contiene l'aggregazione di una o più traiettorie e la struttura della rete.
+    Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
-    Ha il compito dato di costruire tutte gli oggetti Trajectory e l'oggetto Structure
+    cardinalites.
-    a partire dai dataframe contenuti in self.importer
+    Has the task of creating the objects that will contain the mentioned data.
    :files_path: the path that contains tha data to be imported
    :samples_label: the reference key for the samples in the trajectories
    :structure_label: the reference key for the structure of the network data
    :variables_label: the reference key for the cardinalites of the nodes data
    :time_key: the key used to identify the timestamps in each trajectory
    :variables_key: the key used to identify the names of the variables in the net
    :importer: the Importer objects that will import ad process data
    :trajectories: the Trajectory object that will contain all the concatenated trajectories
    :structure: the Structure Object that will contain all the structurral infos about the net
    :total_variables_count: the number of variables in the net
    :importer: l'oggetto Importer che ha il compito di caricare i dataset
    :trajectories: lista di oggetti Trajectories
    :structure: oggetto Structure
    """
-    def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key):
+    def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
                 variables_key: str):
        self.importer = imp.JsonImporter(files_path, samples_label, structure_label,
                                         variables_label, time_key, variables_key)
        self._trajectories = None
@ -24,6 +33,15 @@ class SamplePath:
        self.total_variables_count = None
    def build_trajectories(self):
        """
        Builds the Trajectory object that will contain all the trajectories.
        Clears all the unsed dataframes in Importer Object
        Parameters:
            void
        Returns:
            void
        """
        self.importer.import_data()
        self._trajectories = \
            tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples),
@ -32,12 +50,19 @@ class SamplePath:
        self.importer.clear_concatenated_frame()
    def build_structure(self):
        """
        Builds the Structure object that aggregates all the infos about the net.
        Parameters:
            void
        Returns:
            void
        """
        self.total_variables_count = len(self.importer.sorter)
-        labels = self.importer._df_variables['Name'].to_list()
+        labels = self.importer.variables[self.importer.variables_key].to_list()
        #print("SAMPLE PATH LABELS",labels)
-        indxs = self.importer._df_variables.index.to_numpy()
+        indxs = self.importer.variables.index.to_numpy()
-        vals = self.importer._df_variables['Value'].to_numpy()
+        vals = self.importer.variables['Value'].to_numpy()
-        edges = list(self.importer._df_structure.to_records(index=False))
+        edges = list(self.importer.structure.to_records(index=False))
        self._structure = st.Structure(labels, indxs, vals, edges,
                                       self.total_variables_count)
--- a/main_package/classes/structure.py
+++ b/main_package/classes/structure.py
@ -1,68 +1,78 @@
 import typing as ty
 import numpy as np
 class Structure:
    """
-    Contiene tutte il informazioni sulla struttura della rete (connessione dei nodi, valori assumibili dalle variabili)
+    Contains all the infos about the network structure(nodes names, nodes caridinalites, edges...)
-    :structure_frame: il dataframe contenente le connessioni dei nodi della rete
+    :nodes_labels_list: the symbolic names of the variables
-    :variables_frame: il data_frame contenente i valori assumibili dalle variabili e si suppone il corretto ordinamento
+    :nodes_indexes_arr: the indexes of the nodes
-    rispetto alle colonne del dataset
+    :nodes_vals_arr: the cardinalites of the nodes
    :edges_list: the edges of the network
    :total_variables_number: the total number of variables in the net
    """
-    def __init__(self, nodes_label_list, node_indexes_arr, nodes_vals_arr, edges_list, total_variables_number):
+    def __init__(self, nodes_label_list: ty.List, node_indexes_arr: np.array, nodes_vals_arr: np.array,
-        #self.structure_frame = structure
+                 edges_list: ty.List, total_variables_number: int):
-        #self.variables_frame = variables
+        self._nodes_labels_list = nodes_label_list
-        self.nodes_labels_list = nodes_label_list
+        self._nodes_indexes_arr = node_indexes_arr
-        self.nodes_indexes_arr = node_indexes_arr
+        self._nodes_vals_arr = nodes_vals_arr
-        self.nodes_vals_arr = nodes_vals_arr
+        self._edges_list = edges_list
-        self.edges_list = edges_list
+        self._total_variables_number = total_variables_number
-        self.total_variables_number = total_variables_number
+
-        #self.name_label = variables.columns.values[0]
+    @property
-        #self.value_label = variables.columns.values[1]
+    def edges(self):
    def list_of_edges(self):
        #records = self.structure_frame.to_records(index=False)
        #edges_list = list(records)
-        return self.edges_list
+        return self._edges_list
-    def list_of_nodes_labels(self):
+    @property
-        return self.nodes_labels_list
+    def nodes_labels(self):
        return self._nodes_labels_list
-    def list_of_nodes_indexes(self):
+    @property
-        return self.nodes_indexes_arr
+    def nodes_indexes(self):
        return self._nodes_indexes_arr
-    def get_node_id(self, node_indx):
+    @property
-        return self.nodes_labels_list[node_indx]
+    def nodes_values(self):
        return self._nodes_vals_arr
-    def get_node_indx(self, node_id):
+    @property
-        return self.nodes_indexes_arr[self.nodes_labels_list.index(node_id)]
+    def total_variables_number(self):
        return self._total_variables_number
-    def get_positional_node_indx(self, node_id):
+    def get_node_id(self, node_indx: int):
-        return self.nodes_labels_list.index(node_id)
+        return self._nodes_labels_list[node_indx]
-    def get_states_number(self, node):
+    def get_node_indx(self, node_id: str):
-        #print("node", node)
+        pos_indx = self._nodes_labels_list.index(node_id)
-        return self.nodes_vals_arr[self.get_positional_node_indx(node)]
+        return self._nodes_indexes_arr[pos_indx]
-    def get_states_number_by_indx(self, node_indx):
+    def get_positional_node_indx(self, node_id: str):
-        #print(self.value_label)
+        return self._nodes_labels_list.index(node_id)
        #print("Node indx", node_indx)
        return self.nodes_vals_arr[node_indx]
-    def nodes_values(self):
+    def get_states_number(self, node: str):
-        return self.nodes_vals_arr
+        pos_indx = self._nodes_labels_list.index(node)
        return self._nodes_vals_arr[pos_indx]
-    def total_variables_number(self):
+    def get_states_number_by_indx(self, node_indx: int):
-        return self.total_variables_number
+        #print(self.value_label)
        #print("Node indx", node_indx)
        return self._nodes_vals_arr[node_indx]
    def __repr__(self):
-        return "Variables:\n" + str(self.variables_frame) + "\nEdges: \n" + str(self.structure_frame)
+        return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
               "\nEdges: \n" + str(self._edges_list)
    def __eq__(self, other):
        """Overrides the default implementation"""
        if isinstance(other, Structure):
-            return self.structure_frame.equals(other.structure_frame) and \
+            return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
-                   self.variables_frame.equals(other.variables_frame)
+                   np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
                   np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
                   set(self._edges_list) == set(other._edges_list)
        return NotImplemented
--- a/main_package/classes/structure_estimator.py
+++ b/main_package/classes/structure_estimator.py
@ -15,13 +15,13 @@ class StructureEstimator:
    def __init__(self, sample_path, exp_test_alfa, chi_test_alfa):
        self.sample_path = sample_path
-        self.nodes = np.array(self.sample_path.structure.list_of_nodes_labels())
+        self.nodes = np.array(self.sample_path.structure.nodes_labels)
        #print("NODES", self.nodes)
-        self.nodes_vals = self.sample_path.structure.nodes_vals_arr
+        self.nodes_vals = self.sample_path.structure.nodes_values
-        self.nodes_indxs = self.sample_path.structure.nodes_indexes_arr
+        self.nodes_indxs = self.sample_path.structure.nodes_indexes
        #self.nodes_indxs = np.array(range(0,4))
        #print("INDXS", self.nodes_indxs)
-        self.complete_graph = self.build_complete_graph(self.sample_path.structure.list_of_nodes_labels())
+        self.complete_graph = self.build_complete_graph(self.sample_path.structure.nodes_labels)
        self.exp_test_sign = exp_test_alfa
        self.chi_test_alfa = chi_test_alfa
        self.cache = ch.Cache()
@ -53,11 +53,13 @@ class StructureEstimator:
        cims_filter = sorted_parents != test_parent
        #print("PARENTS NO FROM MASK", cims_filter)
        if not p_set:
            print("EMPTY PSET TRYING TO FIND", test_child)
            sofc1 = self.cache.find(test_child)
        else:
            sofc1 = self.cache.find(set(p_set))
        if not sofc1:
            print("CACHE MISSS SOFC1")
            bool_mask1 = np.isin(self.nodes,complete_info)
            #print("Bool mask 1", bool_mask1)
            l1 = list(self.nodes[bool_mask1])
@ -88,6 +90,7 @@ class StructureEstimator:
        #p_set.append(test_parent)
        p_set.insert(0, test_parent)
        if p_set:
            print("FULL PSET TRYING TO FIND", p_set)
            #p_set.append(test_parent)
            #print("PSET ", p_set)
            #set_p_set = set(p_set)
@ -102,6 +105,7 @@ class StructureEstimator:
        p2.compute_parameters_for_node(test_child)
        sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
        if not sofc2:
            print("Cache MISSS SOFC2")
            complete_info.append(test_parent)
            bool_mask2 = np.isin(self.nodes, complete_info)
            #print("BOOL MASK 2",bool_mask2)
--- a/main_package/classes/trajectory.py
+++ b/main_package/classes/trajectory.py
@ -4,13 +4,13 @@ import numpy as np
 class Trajectory:
    """ 
-    Rappresenta una traiettoria come un numpy_array contenente n-ple (indx, T_k,S_i,.....,Sj)
+    Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix
-    Offre i metodi utili alla computazione sulla struttura stessa.
+    of ints.
-    Una Trajectory viene costruita a partire da una lista di numpyarray dove ogni elemento rappresenta una colonna
+    :list_of_columns: the list containing the times array and values matrix
-    della traj
+    :original_cols_numb: total number of cols in the data
-
+    :actual_trajectory: the trajectory containing also the duplicated and shifted values
-    :actual_trajectory: il numpy_array contenente la successione di n-ple (indx, T_k,S_i,.....,Sj)
+    :times: the array containing the time deltas
    """
@ -23,10 +23,22 @@ class Trajectory:
    @property
    def trajectory(self):
        """
        Parameters:
            void
        Returns:
            a numpy matrix containing ONLY the original columns values, not the shifted ones
        """
        return self._actual_trajectory[:, :self.original_cols_number]
    @property
    def complete_trajectory(self):
        """
                Parameters:
                    void
                Returns:
                    a numpy matrix containing all the values
                """
        return self._actual_trajectory
    @property
--- a/main_package/tests/test_json_importer.py
+++ b/main_package/tests/test_json_importer.py
@ -5,6 +5,8 @@ import numpy as np
 import pandas as pd
 import json_importer as ji
 from line_profiler import LineProfiler
 import os
 import json
@ -44,6 +46,7 @@ class TestJsonImporter(unittest.TestCase):
    def test_normalize_trajectories(self):
        j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        raw_data = j1.read_json_file()
        #print(raw_data)
        j1.normalize_trajectories(raw_data, 0, j1.samples_label)
        self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label]))
        self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter)
@ -51,7 +54,7 @@ class TestJsonImporter(unittest.TestCase):
    def test_normalize_trajectories_wrong_indx(self):
        j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        raw_data = j1.read_json_file()
-        self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 1, j1.samples_label)
+        self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1.samples_label)
    def test_normalize_trajectories_wrong_key(self):
        j1 = ji.JsonImporter('../data', 'sample', 'dyn.str', 'variables', 'Time', 'Name')
@ -77,6 +80,7 @@ class TestJsonImporter(unittest.TestCase):
        j1.import_trajectories(raw_data)
        j1.compute_row_delta_in_all_samples_frames(j1.time_key)
        self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values))
        self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1.time_key)
    def test_clear_data_frame_list(self):
        j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
@ -112,7 +116,12 @@ class TestJsonImporter(unittest.TestCase):
    def test_import_data(self):
        j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
-        j1.import_data()
+        lp = LineProfiler()
        lp_wrapper = lp(j1.import_data)
        lp_wrapper()
        lp.print_stats()
        #j1.import_data()
        self.assertEqual(list(j1.variables[j1.variables_key]),
                         list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1.variables_key]) + 1]))
        print(j1.variables)
--- a/main_package/tests/test_sample_path.py
+++ b/main_package/tests/test_sample_path.py
--- a/main_package/tests/test_structure.py
+++ b/main_package/tests/test_structure.py
@ -11,71 +11,74 @@ import parameters_estimator as pe
 class TestStructure(unittest.TestCase):
-    def setUp(self):
+    @classmethod
-        self.structure_frame = pd.DataFrame([{"From":"X","To":"Z"}, {"From":"Y","To":"Z"},
+    def setUpClass(cls):
-                                             {"From":"Z","To":"Y"} ])
+        cls.labels = ['X','Y','Z']
-        self.variables_frame = pd.DataFrame([{"Name":"X","Value":3},{"Name":"Y","Value":3},{"Name":"Z","Value":3}])
+        cls.indxs = np.array([0,1,2])
        cls.vals = np.array([3,3,3])
        cls.edges = [('X','Z'),('Y','Z'), ('Z','Y')]
        cls.vars_numb = len(cls.labels)
    def test_init(self):
-        s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
+        s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
-        self.assertTrue(self.structure_frame.equals(s1.structure_frame))
+        self.assertListEqual(self.labels,s1.nodes_labels)
-        self.assertTrue(self.variables_frame.equals(s1.variables_frame))
+        self.assertTrue(np.array_equal(self.indxs, s1.nodes_indexes))
-        self.assertEqual(self.variables_frame.columns.values[0], s1.name_label)
+        self.assertTrue(np.array_equal(self.vals, s1.nodes_values))
-        self.assertEqual(self.variables_frame.columns.values[1], s1.value_label)
+        self.assertListEqual(self.edges, s1.edges)
-        #print(len(self.variables_frame.index))
+        self.assertEqual(self.vars_numb, s1.total_variables_number)
        self.assertEqual(len(self.variables_frame.index), s1.total_variables_number)
    def test_list_of_edges(self):
        s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
        records = self.structure_frame.to_records(index=False)
        result = list(records)
        for e1, e2 in zip(result, s1.list_of_edges()):
           self.assertEqual(e1, e2)
    def test_list_of_nodes_labels(self):
        s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
        self.assertEqual(list(self.variables_frame['Name']), s1.list_of_nodes_labels())
    def test_get_node_id(self):
-        s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
+        s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
-        for indx, var in enumerate(list(self.variables_frame['Name'])):
+        for indx, var in enumerate(self.labels):
            self.assertEqual(var, s1.get_node_id(indx))
    def test_get_node_indx(self):
-        filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
+        l2 = self.labels[:]
-        #print(filtered_frame)
+        l2.remove('Y')
-        s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
+        i2 = self.indxs.copy()
-        for indx, var in zip(filtered_frame.index, filtered_frame['Name']):
+        np.delete(i2, 1)
        v2 = self.vals.copy()
        np.delete(v2, 1)
        e2 = [('X','Z')]
        n2 = self.vars_numb - 1
        s1 = st.Structure(l2, i2, v2, e2, n2)
        for indx, var in zip(i2, l2):
            self.assertEqual(indx, s1.get_node_indx(var))
    def test_list_of_node_indxs(self):
        filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
        # print(filtered_frame)
        s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
        for indx1, indx2 in zip(filtered_frame.index, s1.list_of_nodes_indexes()):
            self.assertEqual(indx1, indx2)
    def test_get_positional_node_indx(self):
-        filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
+        l2 = self.labels[:]
-        # print(filtered_frame)
+        l2.remove('Y')
-        s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
+        i2 = self.indxs.copy()
-        for indx, var in enumerate(s1.list_of_nodes_labels()):
+        np.delete(i2, 1)
        v2 = self.vals.copy()
        np.delete(v2, 1)
        e2 = [('X', 'Z')]
        n2 = self.vars_numb - 1
        s1 = st.Structure(l2, i2, v2, e2, n2)
        for indx, var in enumerate(s1.nodes_labels):
            self.assertEqual(indx, s1.get_positional_node_indx(var))
    def test_get_states_number(self):
-        s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
+        l2 = self.labels[:]
-        for indx, row in self.variables_frame.iterrows():
+        l2.remove('Y')
-            self.assertEqual(row[1], s1.get_states_number(row[0]))
+        i2 = self.indxs.copy()
-
+        np.delete(i2, 1)
-    def test_get_states_numeber_by_indx(self):
+        v2 = self.vals.copy()
        np.delete(v2, 1)
        e2 = [('X', 'Z')]
        n2 = self.vars_numb - 1
        s1 = st.Structure(l2, i2, v2, e2, n2)
        for val, node in zip(v2, l2):
            self.assertEqual(val, s1.get_states_number(node))
 #TODO FORSE QUESTO TEST NON serve verificare se questo metodo sia davvero utile
    """def test_get_states_numeber_by_indx(self):
        s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
        for indx, row in self.variables_frame.iterrows():
            self.assertEqual(row[1], s1.get_states_number_by_indx(indx))
    def test_new_init(self):
        #self.variables_frame.drop(self.variables_frame[(self.variables_frame['Name'] == 'Y')].index, inplace=True)
-        """labels = self.variables_frame['Name'].to_list()
+        labels = self.variables_frame['Name'].to_list()
        indxs = self.variables_frame.index.to_numpy()
        vals = self.variables_frame['Value'].to_numpy()
        edges = list(self.structure_frame.to_records(index=False))
@ -103,7 +106,7 @@ class TestStructure(unittest.TestCase):
 array([3, 9])
 array([1, 2])
 array([4, 1, 2])
-        """
+
        sp1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        sp1.build_trajectories()
        sp1.build_structure()
@ -122,7 +125,7 @@ class TestStructure(unittest.TestCase):
        #print(p1.sets_of_cims_struct.get_cims_of_node(0,[1,0]))
        print(p1.sets_of_cims_struct.sets_of_cims[1].actual_cims)
        #print(p1.sets_of_cims_struct.sets_of_cims[2].get_cims_where_parents_except_last_are_in_state(np.array([0])))
-        #print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs)
+        #print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs)"""
 if __name__ == '__main__':
--- a/main_package/tests/test_structure_estimator.py
+++ b/main_package/tests/test_structure_estimator.py
@ -4,6 +4,7 @@ from line_profiler import LineProfiler
 import sample_path as sp
 import structure_estimator as se
 class TestStructureEstimator(unittest.TestCase):
    @classmethod
@ -31,7 +32,7 @@ class TestStructureEstimator(unittest.TestCase):
        lp.print_stats()
        #se1.ctpc_algorithm()
        print(se1.complete_graph.edges)
-        print(self.s1.structure.list_of_edges())
+        print(self.s1.structure.edges)
    def aux_test_complete_test(self, estimator, test_par, test_child, p_set):
        estimator.complete_test(test_par, test_child, p_set)
--- a/main_package/tests/test_trajectory.py
+++ b/main_package/tests/test_trajectory.py
@ -8,7 +8,7 @@ class TestTrajectory(unittest.TestCase):
    def test_init(self):
        cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)]
-        t1 = tr.Trajectory(cols_list, len(cols_list))
+        t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
        self.assertTrue(np.array_equal(cols_list[0], t1.times))
        self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1]))
        self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2]))
@ -19,6 +19,21 @@ class TestTrajectory(unittest.TestCase):
        cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])]
        self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list))
    def test_complete_trajectory(self):
        cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
        t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
        complete = np.column_stack((cols_list[1], cols_list[2]))
        self.assertTrue(np.array_equal(t1.complete_trajectory, complete))
    def test_trajectory(self):
        cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
        t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
        self.assertTrue(np.array_equal(cols_list[1], t1.trajectory.ravel()))
    def test_times(self):
        cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
        t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
        self.assertTrue(np.array_equal(cols_list[0], t1.times))
 if __name__ == '__main__':