Updated importer and added clear_memory

4 years ago · 44a8f5a6dc
parent 9251de0fb6
commit 44a8f5a6dc
6 changed files with 360 additions and 304 deletions
--- a/main_package/classes/estimators/structure_score_based_estimator.py
+++ b/main_package/classes/estimators/structure_score_based_estimator.py
@ -90,6 +90,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
        'get the number of CPU'
        cpu_count = multiprocessing.cpu_count()
        print(f"CPU COUNT: {cpu_count}")
        if disable_multiprocessing:
            cpu_count = 1
--- a/main_package/classes/structure_graph/sample_path.py
+++ b/main_package/classes/structure_graph/sample_path.py
@ -1,86 +1,92 @@
 import sys
 sys.path.append('../')
 import pandas as pd
 import numpy as np
 import structure_graph.abstract_sample_path as asam
 import utility.json_importer as imp
-import structure_graph.structure as st
+from structure_graph.structure import Structure
-import structure_graph.trajectory as tr
+from structure_graph.trajectory import Trajectory
 import utility.abstract_importer as ai
 import pandas as pd
 class SamplePath(object):
    """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
    cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
    contain the mentioned data.
-class SamplePath(asam.AbstractSamplePath):
+    :param importer: the Importer object which contains the imported and processed data
    :type importer: AbstractImporter
    :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
    :_structure: the ``Structure`` Object that will contain all the structural infos about the net
    :_total_variables_count: the number of variables in the net
    """
-    Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
+    def __init__(self, importer: ai.AbstractImporter):
-    cardinalites.
+        """Constructor Method
    Has the task of creating the objects that will contain the mentioned data.
    :importer: the Importer objects that will import ad process data
    :trajectories: the Trajectory object that will contain all the concatenated trajectories
    :structure: the Structure Object that will contain all the structurral infos about the net
    :total_variables_count: the number of variables in the net
        """
-
+        self._importer = importer
-    #def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
+        if self._importer._df_variables is None or self._importer._concatenated_samples is None:
-                 #variables_key: str):
+            raise RuntimeError('The importer object has to contain the all processed data!')
-    def __init__(self, importer: imp.JsonImporter):
+        if self._importer._df_variables.empty:
-        #self.importer =importer
+            raise RuntimeError('The importer object has to contain the all processed data!')
-        super().__init__(importer)
+        if isinstance(self._importer._concatenated_samples, pd.DataFrame):
-        #self._trajectories = None
+            if self._importer._concatenated_samples.empty:
-        #self._structure = None
+                raise RuntimeError('The importer object has to contain the all processed data!')
-        self.total_variables_count = None
+        if isinstance(self._importer._concatenated_samples, np.ndarray):
-
+            if self._importer._concatenated_samples.size == 0:
-    def build_trajectories(self):
+                raise RuntimeError('The importer object has to contain the all processed data!')
        self._trajectories = None
        self._structure = None
        self._total_variables_count = None
    def build_trajectories(self) -> None:
        """Builds the Trajectory object that will contain all the trajectories.
        Clears all the unused dataframes in ``_importer`` Object
        """
        Builds the Trajectory object that will contain all the trajectories.
        Clears all the unused dataframes in Importer Object
        Parameters:
            void
        Returns:
            void
        """
        self.importer.import_data()
        #TODO: VALUTARE PARAMETRO PER DATA AUGMENTATION
        #trajects_samples =  pd.concat([self.importer.concatenated_samples,
        #                            self.importer.concatenated_samples])
        self._trajectories = \
-            tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples),
+            Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
-                          len(self.importer.sorter) + 1)
+                len(self._importer.sorter) + 1)
-        #self.trajectories.append(trajectory)
+        self._importer.clear_concatenated_frame()
        self.importer.clear_concatenated_frame()
-    def build_structure(self):
+    def build_structure(self) -> None:
        """
-        Builds the Structure object that aggregates all the infos about the net.
+        Builds the ``Structure`` object that aggregates all the infos about the net.
        Parameters:
            void
        Returns:
            void
        """
-        self.total_variables_count = len(self.importer.sorter)
+        if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
-        labels = self.importer.variables[self.importer.variables_key].to_list()
+            raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
-        #print("SAMPLE PATH LABELS",labels)
+
-        indxs = self.importer.variables.index.to_numpy()
+        self._total_variables_count = len(self._importer.sorter)
-        vals = self.importer.variables['Value'].to_numpy()
+        labels = self._importer.variables.iloc[:, 0].to_list()
-        edges = list(self.importer.structure.to_records(index=False))
+        indxs = self._importer.variables.index.to_numpy()
-        self._structure = st.Structure(labels, indxs, vals, edges,
+        vals = self._importer.variables.iloc[:, 1].to_numpy()
-                                       self.total_variables_count)
+        if self._importer.structure is None or self._importer.structure.empty:
            edges = []
        else:
            edges = list(self._importer.structure.to_records(index=False))
        self._structure = Structure(labels, indxs, vals, edges,
                                       self._total_variables_count)
    def clear_memory(self):
        self._importer._raw_data = [] 
    @property
-    def trajectories(self):
+    def trajectories(self) -> Trajectory:
        return self._trajectories
    @property
-    def structure(self):
+    def structure(self) -> Structure:
        return self._structure
-    def total_variables_count(self):
+    @property
-        return self.total_variables_count
+    def total_variables_count(self) -> int:
        return self._total_variables_count
    @property
    def has_prior_net_structure(self) -> bool:
        return bool(self._structure.edges)
--- a/main_package/classes/structure_graph/trajectory.py
+++ b/main_package/classes/structure_graph/trajectory.py
@ -1,46 +1,34 @@
 import sys
 sys.path.append('../')
-import numpy as np
+import typing
 import numpy as np
 class Trajectory:
    """ 
    Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix
    of ints.
-    :list_of_columns: the list containing the times array and values matrix
+class Trajectory(object):
-    :original_cols_numb: total number of cols in the data
+    """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
-    :actual_trajectory: the trajectory containing also the duplicated and shifted values
+    (the time deltas) and a numpy matrix of ints (the changes of states).
    :times: the array containing the time deltas
    :param list_of_columns: the list containing the times array and values matrix
    :type list_of_columns: List
    :param original_cols_number: total number of cols in the data
    :type original_cols_number: int
    :_actual_trajectory: the trajectory containing also the duplicated/shifted values
    :_times: the array containing the time deltas
    """
-    def __init__(self, list_of_columns, original_cols_number):
+    def __init__(self, list_of_columns: typing.List, original_cols_number: int):
-        if type(list_of_columns[0][0]) != np.float64:
+        """Constructor Method
-            raise TypeError('The first array in the list has to be Times')
+        """
-        self.original_cols_number = original_cols_number
+        self._times = list_of_columns[0]
-        self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T
+        self._actual_trajectory = list_of_columns[1]
-        self._times = np.array(list_of_columns[0], dtype=np.float)
+        self._original_cols_number = original_cols_number
    @property
    def trajectory(self) -> np.ndarray:
-        """
+        return self._actual_trajectory[:, :self._original_cols_number]
        Parameters:
            void
        Returns:
            a numpy matrix containing ONLY the original columns values, not the shifted ones
        """
        return self._actual_trajectory[:, :self.original_cols_number]
    @property
    def complete_trajectory(self) -> np.ndarray:
        """
                Parameters:
                    void
                Returns:
                    a numpy matrix containing all the values
                """
        return self._actual_trajectory
    @property
--- a/main_package/classes/utility/abstract_importer.py
+++ b/main_package/classes/utility/abstract_importer.py
@ -1,23 +1,158 @@
 import typing
 from abc import ABC, abstractmethod
 import numpy as np
 import pandas as pd
 class AbstractImporter(ABC):
-    """
+    """Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
-    Interface that exposes all the necessary methods to import the trajectories and the net structure.
+
    :param file_path: the file path, or dataset name if you import already processed data
    :type file_path: str
    :param concatenated_samples: Dataframe or numpy array containing the concatenation of all the processed trajectories
    :type concatenated_samples: typing.Union[pandas.DataFrame, numpy.ndarray]
    :param variables: Dataframe containing the nodes labels and cardinalities
    :type variables: pandas.DataFrame
    :prior_net_structure: Dataframe containing the structure of the network (edges)
    :type prior_net_structure: pandas.DataFrame
    :_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
-    :file_path: the file path
+    .. warning::
        The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
        as Pandas Dataframes with the following structure:
        Header of _df_structure = [From_Node | To_Node]
        Header of _df_variables = [Variable_Label | Variable_Cardinality]
        See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
    .. note::
        See :class:``JsonImporter`` for an example implementation
    """
-    def __init__(self, file_path: str):
+    def __init__(self, file_path: str = None, concatenated_samples: typing.Union[pd.DataFrame, np.ndarray] = None,
-        self.file_path = file_path
+                 variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
        """Constructor
        """
        self._file_path = file_path
        self._concatenated_samples = concatenated_samples
        self._df_variables = variables
        self._df_structure = prior_net_structure
        self._sorter = None
        super().__init__()
    @abstractmethod
-    def import_trajectories(self, raw_data):
+    def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
        """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
        and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
        :param sample_frame: The dataframe from which extract the header
        :type sample_frame: pandas.DataFrame
        :return: A list containing the processed header.
        :rtype: List
        """
        pass
    def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
                                              columns_header: typing.List, shifted_cols_header: typing.List) \
            -> pd.DataFrame:
        """Computes the difference between each value present in th time column.
        Copies and shift by one position up all the values present in the remaining columns.
        :param sample_frame: the traj to be processed
        :type sample_frame: pandas.Dataframe
        :param columns_header: the original header of sample_frame
        :type columns_header: List
        :param shifted_cols_header: a copy of columns_header with changed names of the contents
        :type shifted_cols_header: List
        :return: The processed dataframe
        :rtype: pandas.Dataframe
        .. warning::
            the Dataframe ``sample_frame`` has to follow the column structure of this header:
            Header of sample_frame = [Time | Variable values]
        """
        sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
        shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
        shifted_cols.columns = shifted_cols_header
        sample_frame = sample_frame.assign(**shifted_cols)
        sample_frame.drop(sample_frame.tail(1).index, inplace=True)
        return sample_frame
    def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
        """Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
        ``df_samples_list``.
        Concatenates the result in the dataframe ``concatanated_samples``
        :param df_samples_list: the datframe's list to be processed and concatenated
        :type df_samples_list: List
        .. warning::
            The Dataframe sample_frame has to follow the column structure of this header:
            Header of sample_frame = [Time | Variable values]
            The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
        .. note::
            After the call of this method the class member ``concatanated_samples`` will contain all processed
            and merged trajectories
        """
        if not self._sorter:
            raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
        shifted_cols_header = [s + "S" for s in self._sorter]
        compute_row_delta = self.compute_row_delta_sigle_samples_frame
        proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
                                for sample in df_samples_list]
        self._concatenated_samples = pd.concat(proc_samples_list)
        complete_header = self._sorter[:]
        complete_header.insert(0,'Time')
        complete_header.extend(shifted_cols_header)
        self._concatenated_samples = self._concatenated_samples[complete_header]
    def build_list_of_samples_array(self, concatenated_sample: typing.Union[pd.DataFrame, np.ndarray]) -> typing.List:
        """Builds a List containing the the delta times numpy array, and the complete transitions matrix
        :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
            and converted
        :type concatenated_sample: typing.Union[pandas.Dataframe, numpy.ndarray]
        :return: the resulting list of numpy arrays
        :rtype: List
        """
        if isinstance(concatenated_sample, pd.DataFrame):
            concatenated_array = concatenated_sample.to_numpy()
            columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
        else:
            columns_list = [concatenated_sample[:, 0], concatenated_sample[:, 1:].astype(int)]
        return columns_list
    def clear_concatenated_frame(self) -> None:
        """Removes all values in the dataframe concatenated_samples.
         """
        if isinstance(self._concatenated_samples, pd.DataFrame):
            self._concatenated_samples = self._concatenated_samples.iloc[0:0]
    @abstractmethod
-    def import_structure(self, raw_data):
+    def dataset_id(self) -> object:
        """If the original dataset contains multiple dataset, this method returns a unique id to identify the current
        dataset
        """
        pass
    @property
    def concatenated_samples(self) -> pd.DataFrame:
        return self._concatenated_samples
    @property
    def variables(self) -> pd.DataFrame:
        return self._df_variables
    @property
    def structure(self) -> pd.DataFrame:
        return self._df_structure
    @property
    def sorter(self) -> typing.List:
        return self._sorter
    @property
    def file_path(self) -> str:
        return self._file_path
--- a/main_package/classes/utility/json_importer.py
+++ b/main_package/classes/utility/json_importer.py
@ -1,239 +1,170 @@
 import sys
 sys.path.append('../')
 import json
 import typing
 import pandas as pd
 import sys
 sys.path.append('../')
 import utility.abstract_importer as ai
 class JsonImporter(ai.AbstractImporter):
-    """
+    """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
-    Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext.
+    the data in json extension.
-    with the following structure:
+
-    [] 0
+    :param file_path: the path of the file that contains tha data to be imported
-        |_ dyn.cims
+    :type file_path: string
-        |_ dyn.str
+    :param samples_label: the reference key for the samples in the trajectories
-        |_ samples
+    :type samples_label: string
-        |_ variabels
+    :param structure_label: the reference key for the structure of the network data
-    :file_path: the path of the file that contains tha data to be imported
+    :type structure_label: string
-    :samples_label: the reference key for the samples in the trajectories
+    :param variables_label: the reference key for the cardinalites of the nodes data
-    :structure_label: the reference key for the structure of the network data
+    :type variables_label: string
-    :variables_label: the reference key for the cardinalites of the nodes data
+    :param time_key: the key used to identify the timestamps in each trajectory
-    :time_key: the key used to identify the timestamps in each trajectory
+    :type time_key: string
-    :variables_key: the key used to identify the names of the variables in the net
+    :param variables_key: the key used to identify the names of the variables in the net
-    :df_samples_list: a Dataframe list in which every df contains a trajectory
+    :type variables_key: string
-    :df_structure: Dataframe containing the structure of the network (edges)
+    :_array_indx: the index of the outer JsonArray to extract the data from
-    :df_variables: Dataframe containing the nodes cardinalities
+    :type _array_indx: int
-    :df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list
+    :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
-    :sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples
+    :_raw_data: The raw contents of the json file to import
    :type _raw_data: List
    """
    def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
-                 variables_key: str, network_number:int=0):
+                 variables_key: str):
-        self.samples_label = samples_label
+        """Constructor method
        self.structure_label = structure_label
        self.variables_label = variables_label
        self.time_key = time_key
        self.variables_key = variables_key
        self.df_samples_list = []
        self._df_structure = pd.DataFrame()
        self._df_variables = pd.DataFrame()
        self._concatenated_samples = None
        self.sorter = None
        self.network_number= network_number
        super(JsonImporter, self).__init__(file_path)
-    def import_data(self):
+        .. note::
-        """
+            This constructor calls also the method ``read_json_file()``, so after the construction of the object
-        Imports and prepares all data present needed for susequent computation.
+            the class member ``_raw_data`` will contain the raw imported json data.
        Parameters:
            void
        Returns:
            void
        """
        raw_data = self.read_json_file()
        self.import_trajectories(raw_data)
        self.compute_row_delta_in_all_samples_frames(self.time_key)
        self.clear_data_frame_list()
        self.import_structure(raw_data)
        self.import_variables(raw_data, self.sorter)
    def import_trajectories(self, raw_data: typing.List):
        """
-        Imports the trajectories in the list of dicts raw_data.
+        self._samples_label = samples_label
-        Parameters:
+        self._structure_label = structure_label
-            :raw_data: List of Dicts
+        self._variables_label = variables_label
-        Returns:
+        self._time_key = time_key
-            void
+        self._variables_key = variables_key
-        """
+        self._df_samples_list = None
-        self.normalize_trajectories(raw_data, self.network_number, self.samples_label)
+        self._array_indx = None
        super(JsonImporter, self).__init__(file_path)
        self._raw_data = self.read_json_file()
-    def import_structure(self, raw_data: typing.List):
+    def import_data(self, indx: int) -> None:
-        """
+        """Implements the abstract method of :class:`AbstractImporter`.
        Imports in a dataframe the data in the list raw_data at the key structure_label
-        Parameters:
+        :param indx: the index of the outer JsonArray to extract the data from
-            raw_data: the data
+        :type indx: int
        Returns:
            void
        """
-        self._df_structure = self.one_level_normalizing(raw_data, self.network_number, self.structure_label)
+        self._array_indx = indx
        self._df_samples_list = self.import_trajectories(self._raw_data)
        self._sorter = self.build_sorter(self._df_samples_list[0])
        self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
        self.clear_data_frame_list()
        self._df_structure = self.import_structure(self._raw_data)
        self._df_variables = self.import_variables(self._raw_data)
    def import_trajectories(self, raw_data: typing.List) -> typing.List:
        """Imports the trajectories from the list of dicts ``raw_data``.
-    def import_variables(self, raw_data: typing.List, sorter: typing.List):
+        :param raw_data: List of Dicts
        :type raw_data: List
        :return: List of dataframes containing all the trajectories
        :rtype: List
        """
-        Imports the data in raw_data at the key variables_label.
+        return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
-        Sorts the row of the dataframe df_variables using the list sorter.
+
    def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
        """Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
-        Parameters:
+        :param raw_data: List of Dicts
-            raw_data: the data
+        :type raw_data: List
-            sorter: the list used to sort the dataframe self.df_variables
+        :return: Dataframe containg the starting node a ending node of every arc of the network
-        Returns:
+        :rtype: pandas.Dataframe
            void
        """
-        self._df_variables = self.one_level_normalizing(raw_data, self.network_number, self.variables_label)
+        return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
        #self.sorter = self._df_variables[self.variables_key].to_list()
        #self.sorter.sort()
        #print("Sorter:", self.sorter)
        self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category")
        self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
        self._df_variables = self._df_variables.sort_values([self.variables_key])
        self._df_variables.reset_index(inplace=True)
        print("Var Frame", self._df_variables)
-    def read_json_file(self) -> typing.List:
+    def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
        """Imports the data in ``raw_data`` at the key ``_variables_label``.
        :param raw_data: List of Dicts
        :type raw_data: List
        :return: Datframe containg the variables simbolic labels and their cardinalities
        :rtype: pandas.Dataframe
        """
-        Reads the first json file in the path self.filePath
+        return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
-        Parameters:
+    def read_json_file(self) -> typing.List:
-              void
+        """Reads the JSON file in the path self.filePath.
        Returns:
              data: the contents of the json file
        :return: The contents of the json file
        :rtype: List
        """
-        #try:
+        with open(self._file_path) as f:
            #read_files = glob.glob(os.path.join(self.files_path, "*.json"))
            #if not read_files:
                #raise ValueError('No .json file found in the entered path!')
        with open(self.file_path) as f:
            data = json.load(f)
            return data
        #except ValueError as err:
            #print(err.args)
    def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
-        """
+        """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
        Extracts the one-level nested data in the list raw_data at the index indx at the key key
        Parameters:
            raw_data: List of Dicts
            indx: The index of the array from which the data have to be extracted
            key: the key for the Dicts from which exctract data
        Returns:
            a normalized dataframe
        :param raw_data: List of Dicts
        :type raw_data: List
        :param indx: The index of the array from which the data have to be extracted
        :type indx: int
        :param key: the key for the Dicts from which exctract data
        :type key: string
        :return: A normalized dataframe
        :rtype: pandas.Datframe
        """
        return pd.DataFrame(raw_data[indx][key])
-    def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str):
+    def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
        """
-        Extracts the traj in raw_data at the index index at the key trajectories key.
+        Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
        Adds the extracted traj in the dataframe list self._df_samples_list.
        Initializes the list self.sorter.
-        Parameters:
+        :param raw_data: List of Dicts
-            raw_data: the data
+        :type raw_data: List
-            indx: the index of the array from which extract data
+        :param indx: The index of the array from which the data have to be extracted
-            trajectories_key: the key of the trajectories objects
+        :type indx: int
-        Returns:
+        :param trajectories_key: the key of the trajectories objects
-            void
+        :type trajectories_key: string
        :return: A list of daframes containg the trajectories
        :rtype: List
        """
        dataframe = pd.DataFrame
        smps = raw_data[indx][trajectories_key]
-        self.df_samples_list = [dataframe(sample) for sample in smps]
+        df_samples_list = [dataframe(sample) for sample in smps]
-        columns_header = list(self.df_samples_list[0].columns.values)
+        return df_samples_list
        columns_header.remove(self.time_key)
        self.sorter = columns_header
    def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
                                              columns_header: typing.List, shifted_cols_header: typing.List) \
            -> pd.DataFrame:
        """
        Computes the difference between each value present in th time column.
        Copies and shift by one position up all the values present in the remaining columns.
        Parameters:
            sample_frame: the traj to be processed
            time_header_label: the label for the times
            columns_header: the original header of sample_frame
            shifted_cols_header: a copy of columns_header with changed names of the contents
        Returns:
            sample_frame: the processed dataframe
        """
        sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
        shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
        #print(shifted_cols)
        shifted_cols.columns = shifted_cols_header
        sample_frame = sample_frame.assign(**shifted_cols)
        sample_frame.drop(sample_frame.tail(1).index, inplace=True)
        return sample_frame
    def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
        """
        Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list.
        Concatenates the result in the dataframe concatanated_samples
        Parameters:
            time_header_label: the label of the time column
        Returns:
            void
        """
        shifted_cols_header = [s + "S" for s in self.sorter]
        compute_row_delta = self.compute_row_delta_sigle_samples_frame
        self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header)
                                for sample in self.df_samples_list]
        self._concatenated_samples = pd.concat(self.df_samples_list)
        complete_header = self.sorter[:]
        complete_header.insert(0,'Time')
        complete_header.extend(shifted_cols_header)
        #print("Complete Header", complete_header)
        self._concatenated_samples = self._concatenated_samples[complete_header]
        #print("Concat Samples",self._concatenated_samples)
-    def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
+    def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
        """Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
        """
-        Builds a List containing the columns of dataframe and converts them to a numpy array.
+        columns_header = list(sample_frame.columns.values)
-        Parameters:
+        columns_header.remove(self._time_key)
-            :data_frame: the dataframe from which the columns have to be extracted and converted
+        return columns_header
        Returns:
            :columns_list: the resulting list of numpy arrays
        """
        columns_list = [data_frame[column].to_numpy() for column in data_frame]
        #for column in data_frame:
            #columns_list.append(data_frame[column].to_numpy())
        return columns_list
-    def clear_concatenated_frame(self):
+    def clear_data_frame_list(self) -> None:
-        """
+        """Removes all values present in the dataframes in the list ``_df_samples_list``.
        Removes all values in the dataframe concatenated_samples
        Parameters:
            void
        Returns:
            void
        """
-        self._concatenated_samples = self._concatenated_samples.iloc[0:0]
+        for indx in range(len(self._df_samples_list)):
            self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
-    def clear_data_frame_list(self):
+    def dataset_id(self) -> object:
-        """
+        return self._array_indx
        Removes all values present in the dataframes in the list df_samples_list
        """
        for indx in range(len(self.df_samples_list)):  # Le singole traj non servono più #TODO usare list comprens
            self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
    def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
        """Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
        as keys for the set of CIMS of a particular node.
        :param raw_data: List of Dicts
        :type raw_data: List
        :param indx: The index of the array from which the data have to be extracted
        :type indx: int
        :param cims_key: the key where the json object cims are placed
        :type cims_key: string
        :return: a dictionary containing the sampled CIMS for all the variables in the net
        :rtype: Dictionary
        """
        cims_for_all_vars = {}
        for var in raw_data[indx][cims_key]:
            sampled_cims_list = []
@ -242,18 +173,5 @@ class JsonImporter(ai.AbstractImporter):
                cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
        return cims_for_all_vars
    @property
    def concatenated_samples(self):
        return self._concatenated_samples
    @property
    def variables(self):
        return self._df_variables
    @property
    def structure(self):
        return self._df_structure
--- a/main_package/tests/optimizers/test_tabu_search.py
+++ b/main_package/tests/optimizers/test_tabu_search.py
@ -18,16 +18,24 @@ import utility.json_importer as ji
 class TestTabuSearch(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        #cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
-        cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_20.json", 
+        cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_5.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
-                                    'samples', 'dyn.str', 'variables', 'Time', 'Name', 2 )
+        cls.importer.import_data(0)
        cls.s1 = sp.SamplePath(cls.importer)
        #cls.traj = cls.s1.concatenated_samples
       # print(len(cls.traj))
        cls.s1 = sp.SamplePath(cls.importer)
        cls.s1.build_trajectories()
        cls.s1.build_structure()
        cls.s1.clear_memory() 
@ -39,11 +47,11 @@ class TestTabuSearch(unittest.TestCase):
        edges = se1.estimate_structure(
                            max_parents = None,
                            iterations_number = 100,
-                            patience = None,
+                            patience = 20,
-                            tabu_length = 15,
+                            tabu_length = 10,
-                            tabu_rules_duration = 15,
+                            tabu_rules_duration = 10,
                            optimizer = 'tabu',
-                            disable_multiprocessing=True
+                            disable_multiprocessing=False
                            )