diff --git a/main_package/classes/estimators/structure_score_based_estimator.py b/main_package/classes/estimators/structure_score_based_estimator.py index 5c3108d..32de24c 100644 --- a/main_package/classes/estimators/structure_score_based_estimator.py +++ b/main_package/classes/estimators/structure_score_based_estimator.py @@ -90,6 +90,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator): 'get the number of CPU' cpu_count = multiprocessing.cpu_count() + print(f"CPU COUNT: {cpu_count}") if disable_multiprocessing: cpu_count = 1 diff --git a/main_package/classes/structure_graph/sample_path.py b/main_package/classes/structure_graph/sample_path.py index 0c4a287..5951661 100644 --- a/main_package/classes/structure_graph/sample_path.py +++ b/main_package/classes/structure_graph/sample_path.py @@ -1,86 +1,92 @@ import sys sys.path.append('../') -import structure_graph.abstract_sample_path as asam -import utility.json_importer as imp -import structure_graph.structure as st -import structure_graph.trajectory as tr - import pandas as pd +import numpy as np +import structure_graph.abstract_sample_path as asam +import utility.json_importer as imp +from structure_graph.structure import Structure +from structure_graph.trajectory import Trajectory +import utility.abstract_importer as ai -class SamplePath(asam.AbstractSamplePath): - """ - Aggregates all the informations about the trajectories, the real structure of the sampled net and variables - cardinalites. - Has the task of creating the objects that will contain the mentioned data. - :importer: the Importer objects that will import ad process data - :trajectories: the Trajectory object that will contain all the concatenated trajectories - :structure: the Structure Object that will contain all the structurral infos about the net - :total_variables_count: the number of variables in the net +class SamplePath(object): + """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables + cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will + contain the mentioned data. + :param importer: the Importer object which contains the imported and processed data + :type importer: AbstractImporter + :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories + :_structure: the ``Structure`` Object that will contain all the structural infos about the net + :_total_variables_count: the number of variables in the net """ - - #def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, - #variables_key: str): - def __init__(self, importer: imp.JsonImporter): - #self.importer =importer - super().__init__(importer) - #self._trajectories = None - #self._structure = None - self.total_variables_count = None - - def build_trajectories(self): + def __init__(self, importer: ai.AbstractImporter): + """Constructor Method """ - Builds the Trajectory object that will contain all the trajectories. - Clears all the unused dataframes in Importer Object - - Parameters: - void - Returns: - void + self._importer = importer + if self._importer._df_variables is None or self._importer._concatenated_samples is None: + raise RuntimeError('The importer object has to contain the all processed data!') + if self._importer._df_variables.empty: + raise RuntimeError('The importer object has to contain the all processed data!') + if isinstance(self._importer._concatenated_samples, pd.DataFrame): + if self._importer._concatenated_samples.empty: + raise RuntimeError('The importer object has to contain the all processed data!') + if isinstance(self._importer._concatenated_samples, np.ndarray): + if self._importer._concatenated_samples.size == 0: + raise RuntimeError('The importer object has to contain the all processed data!') + self._trajectories = None + self._structure = None + self._total_variables_count = None + + def build_trajectories(self) -> None: + """Builds the Trajectory object that will contain all the trajectories. + Clears all the unused dataframes in ``_importer`` Object """ - self.importer.import_data() - - #TODO: VALUTARE PARAMETRO PER DATA AUGMENTATION - #trajects_samples = pd.concat([self.importer.concatenated_samples, - # self.importer.concatenated_samples]) - self._trajectories = \ - tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples), - len(self.importer.sorter) + 1) - #self.trajectories.append(trajectory) - self.importer.clear_concatenated_frame() + Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), + len(self._importer.sorter) + 1) + self._importer.clear_concatenated_frame() - def build_structure(self): + def build_structure(self) -> None: """ - Builds the Structure object that aggregates all the infos about the net. - Parameters: - void - Returns: - void + Builds the ``Structure`` object that aggregates all the infos about the net. """ - self.total_variables_count = len(self.importer.sorter) - labels = self.importer.variables[self.importer.variables_key].to_list() - #print("SAMPLE PATH LABELS",labels) - indxs = self.importer.variables.index.to_numpy() - vals = self.importer.variables['Value'].to_numpy() - edges = list(self.importer.structure.to_records(index=False)) - self._structure = st.Structure(labels, indxs, vals, edges, - self.total_variables_count) + if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): + raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") + + self._total_variables_count = len(self._importer.sorter) + labels = self._importer.variables.iloc[:, 0].to_list() + indxs = self._importer.variables.index.to_numpy() + vals = self._importer.variables.iloc[:, 1].to_numpy() + if self._importer.structure is None or self._importer.structure.empty: + edges = [] + else: + edges = list(self._importer.structure.to_records(index=False)) + self._structure = Structure(labels, indxs, vals, edges, + self._total_variables_count) + + def clear_memory(self): + self._importer._raw_data = [] @property - def trajectories(self): + def trajectories(self) -> Trajectory: return self._trajectories @property - def structure(self): + def structure(self) -> Structure: return self._structure - def total_variables_count(self): - return self.total_variables_count + @property + def total_variables_count(self) -> int: + return self._total_variables_count + + @property + def has_prior_net_structure(self) -> bool: + return bool(self._structure.edges) + diff --git a/main_package/classes/structure_graph/trajectory.py b/main_package/classes/structure_graph/trajectory.py index 518ae25..0a0a861 100644 --- a/main_package/classes/structure_graph/trajectory.py +++ b/main_package/classes/structure_graph/trajectory.py @@ -1,46 +1,34 @@ -import sys -sys.path.append('../') -import numpy as np +import typing +import numpy as np -class Trajectory: - """ - Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix - of ints. - :list_of_columns: the list containing the times array and values matrix - :original_cols_numb: total number of cols in the data - :actual_trajectory: the trajectory containing also the duplicated and shifted values - :times: the array containing the time deltas +class Trajectory(object): + """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles + (the time deltas) and a numpy matrix of ints (the changes of states). + :param list_of_columns: the list containing the times array and values matrix + :type list_of_columns: List + :param original_cols_number: total number of cols in the data + :type original_cols_number: int + :_actual_trajectory: the trajectory containing also the duplicated/shifted values + :_times: the array containing the time deltas """ - def __init__(self, list_of_columns, original_cols_number): - if type(list_of_columns[0][0]) != np.float64: - raise TypeError('The first array in the list has to be Times') - self.original_cols_number = original_cols_number - self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T - self._times = np.array(list_of_columns[0], dtype=np.float) + def __init__(self, list_of_columns: typing.List, original_cols_number: int): + """Constructor Method + """ + self._times = list_of_columns[0] + self._actual_trajectory = list_of_columns[1] + self._original_cols_number = original_cols_number @property def trajectory(self) -> np.ndarray: - """ - Parameters: - void - Returns: - a numpy matrix containing ONLY the original columns values, not the shifted ones - """ - return self._actual_trajectory[:, :self.original_cols_number] + return self._actual_trajectory[:, :self._original_cols_number] @property def complete_trajectory(self) -> np.ndarray: - """ - Parameters: - void - Returns: - a numpy matrix containing all the values - """ return self._actual_trajectory @property diff --git a/main_package/classes/utility/abstract_importer.py b/main_package/classes/utility/abstract_importer.py index 707f0bd..c7b7503 100644 --- a/main_package/classes/utility/abstract_importer.py +++ b/main_package/classes/utility/abstract_importer.py @@ -1,23 +1,158 @@ + +import typing from abc import ABC, abstractmethod +import numpy as np +import pandas as pd + class AbstractImporter(ABC): - """ - Interface that exposes all the necessary methods to import the trajectories and the net structure. + """Abstract class that exposes all the necessary methods to process the trajectories and the net structure. + + :param file_path: the file path, or dataset name if you import already processed data + :type file_path: str + :param concatenated_samples: Dataframe or numpy array containing the concatenation of all the processed trajectories + :type concatenated_samples: typing.Union[pandas.DataFrame, numpy.ndarray] + :param variables: Dataframe containing the nodes labels and cardinalities + :type variables: pandas.DataFrame + :prior_net_structure: Dataframe containing the structure of the network (edges) + :type prior_net_structure: pandas.DataFrame + :_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` + + .. warning:: + The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed + as Pandas Dataframes with the following structure: + Header of _df_structure = [From_Node | To_Node] + Header of _df_variables = [Variable_Label | Variable_Cardinality] + See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. - :file_path: the file path + .. note:: + See :class:``JsonImporter`` for an example implementation """ - def __init__(self, file_path: str): - self.file_path = file_path + def __init__(self, file_path: str = None, concatenated_samples: typing.Union[pd.DataFrame, np.ndarray] = None, + variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): + """Constructor + """ + self._file_path = file_path + self._concatenated_samples = concatenated_samples + self._df_variables = variables + self._df_structure = prior_net_structure + self._sorter = None super().__init__() @abstractmethod - def import_trajectories(self, raw_data): + def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: + """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame + and keeping ONLY the variables symbolic labels, cutting out the time label in the header. + + :param sample_frame: The dataframe from which extract the header + :type sample_frame: pandas.DataFrame + :return: A list containing the processed header. + :rtype: List + """ pass + def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, + columns_header: typing.List, shifted_cols_header: typing.List) \ + -> pd.DataFrame: + """Computes the difference between each value present in th time column. + Copies and shift by one position up all the values present in the remaining columns. + + :param sample_frame: the traj to be processed + :type sample_frame: pandas.Dataframe + :param columns_header: the original header of sample_frame + :type columns_header: List + :param shifted_cols_header: a copy of columns_header with changed names of the contents + :type shifted_cols_header: List + :return: The processed dataframe + :rtype: pandas.Dataframe + + .. warning:: + the Dataframe ``sample_frame`` has to follow the column structure of this header: + Header of sample_frame = [Time | Variable values] + """ + sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) + shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') + shifted_cols.columns = shifted_cols_header + sample_frame = sample_frame.assign(**shifted_cols) + sample_frame.drop(sample_frame.tail(1).index, inplace=True) + return sample_frame + + def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: + """Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list + ``df_samples_list``. + Concatenates the result in the dataframe ``concatanated_samples`` + + :param df_samples_list: the datframe's list to be processed and concatenated + :type df_samples_list: List + + .. warning:: + The Dataframe sample_frame has to follow the column structure of this header: + Header of sample_frame = [Time | Variable values] + The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) + .. note:: + After the call of this method the class member ``concatanated_samples`` will contain all processed + and merged trajectories + """ + if not self._sorter: + raise RuntimeError("The class member self._sorter has to be INITIALIZED!") + shifted_cols_header = [s + "S" for s in self._sorter] + compute_row_delta = self.compute_row_delta_sigle_samples_frame + proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) + for sample in df_samples_list] + self._concatenated_samples = pd.concat(proc_samples_list) + complete_header = self._sorter[:] + complete_header.insert(0,'Time') + complete_header.extend(shifted_cols_header) + self._concatenated_samples = self._concatenated_samples[complete_header] + + def build_list_of_samples_array(self, concatenated_sample: typing.Union[pd.DataFrame, np.ndarray]) -> typing.List: + """Builds a List containing the the delta times numpy array, and the complete transitions matrix + + :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted + and converted + :type concatenated_sample: typing.Union[pandas.Dataframe, numpy.ndarray] + :return: the resulting list of numpy arrays + :rtype: List + """ + if isinstance(concatenated_sample, pd.DataFrame): + concatenated_array = concatenated_sample.to_numpy() + columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] + else: + columns_list = [concatenated_sample[:, 0], concatenated_sample[:, 1:].astype(int)] + return columns_list + + def clear_concatenated_frame(self) -> None: + """Removes all values in the dataframe concatenated_samples. + """ + if isinstance(self._concatenated_samples, pd.DataFrame): + self._concatenated_samples = self._concatenated_samples.iloc[0:0] + @abstractmethod - def import_structure(self, raw_data): + def dataset_id(self) -> object: + """If the original dataset contains multiple dataset, this method returns a unique id to identify the current + dataset + """ pass + @property + def concatenated_samples(self) -> pd.DataFrame: + return self._concatenated_samples + + @property + def variables(self) -> pd.DataFrame: + return self._df_variables + + @property + def structure(self) -> pd.DataFrame: + return self._df_structure + + @property + def sorter(self) -> typing.List: + return self._sorter + + @property + def file_path(self) -> str: + return self._file_path diff --git a/main_package/classes/utility/json_importer.py b/main_package/classes/utility/json_importer.py index b75a1e5..c813128 100644 --- a/main_package/classes/utility/json_importer.py +++ b/main_package/classes/utility/json_importer.py @@ -1,239 +1,170 @@ -import sys -sys.path.append('../') - import json import typing import pandas as pd +import sys +sys.path.append('../') import utility.abstract_importer as ai class JsonImporter(ai.AbstractImporter): - """ - Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext. - with the following structure: - [] 0 - |_ dyn.cims - |_ dyn.str - |_ samples - |_ variabels - :file_path: the path of the file that contains tha data to be imported - :samples_label: the reference key for the samples in the trajectories - :structure_label: the reference key for the structure of the network data - :variables_label: the reference key for the cardinalites of the nodes data - :time_key: the key used to identify the timestamps in each trajectory - :variables_key: the key used to identify the names of the variables in the net - :df_samples_list: a Dataframe list in which every df contains a trajectory - :df_structure: Dataframe containing the structure of the network (edges) - :df_variables: Dataframe containing the nodes cardinalities - :df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list - :sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples + """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare + the data in json extension. + + :param file_path: the path of the file that contains tha data to be imported + :type file_path: string + :param samples_label: the reference key for the samples in the trajectories + :type samples_label: string + :param structure_label: the reference key for the structure of the network data + :type structure_label: string + :param variables_label: the reference key for the cardinalites of the nodes data + :type variables_label: string + :param time_key: the key used to identify the timestamps in each trajectory + :type time_key: string + :param variables_key: the key used to identify the names of the variables in the net + :type variables_key: string + :_array_indx: the index of the outer JsonArray to extract the data from + :type _array_indx: int + :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory + :_raw_data: The raw contents of the json file to import + :type _raw_data: List """ def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, - variables_key: str, network_number:int=0): - self.samples_label = samples_label - self.structure_label = structure_label - self.variables_label = variables_label - self.time_key = time_key - self.variables_key = variables_key - self.df_samples_list = [] - self._df_structure = pd.DataFrame() - self._df_variables = pd.DataFrame() - self._concatenated_samples = None - self.sorter = None - self.network_number= network_number - super(JsonImporter, self).__init__(file_path) + variables_key: str): + """Constructor method - def import_data(self): - """ - Imports and prepares all data present needed for susequent computation. - Parameters: - void - Returns: - void - """ - raw_data = self.read_json_file() - self.import_trajectories(raw_data) - self.compute_row_delta_in_all_samples_frames(self.time_key) - self.clear_data_frame_list() - self.import_structure(raw_data) - self.import_variables(raw_data, self.sorter) + .. note:: + This constructor calls also the method ``read_json_file()``, so after the construction of the object + the class member ``_raw_data`` will contain the raw imported json data. - def import_trajectories(self, raw_data: typing.List): - """ - Imports the trajectories in the list of dicts raw_data. - Parameters: - :raw_data: List of Dicts - Returns: - void """ - self.normalize_trajectories(raw_data, self.network_number, self.samples_label) + self._samples_label = samples_label + self._structure_label = structure_label + self._variables_label = variables_label + self._time_key = time_key + self._variables_key = variables_key + self._df_samples_list = None + self._array_indx = None + super(JsonImporter, self).__init__(file_path) + self._raw_data = self.read_json_file() - def import_structure(self, raw_data: typing.List): - """ - Imports in a dataframe the data in the list raw_data at the key structure_label + def import_data(self, indx: int) -> None: + """Implements the abstract method of :class:`AbstractImporter`. - Parameters: - raw_data: the data - Returns: - void + :param indx: the index of the outer JsonArray to extract the data from + :type indx: int """ - self._df_structure = self.one_level_normalizing(raw_data, self.network_number, self.structure_label) + self._array_indx = indx + self._df_samples_list = self.import_trajectories(self._raw_data) + self._sorter = self.build_sorter(self._df_samples_list[0]) + self.compute_row_delta_in_all_samples_frames(self._df_samples_list) + self.clear_data_frame_list() + self._df_structure = self.import_structure(self._raw_data) + self._df_variables = self.import_variables(self._raw_data) + def import_trajectories(self, raw_data: typing.List) -> typing.List: + """Imports the trajectories from the list of dicts ``raw_data``. - def import_variables(self, raw_data: typing.List, sorter: typing.List): + :param raw_data: List of Dicts + :type raw_data: List + :return: List of dataframes containing all the trajectories + :rtype: List """ - Imports the data in raw_data at the key variables_label. - Sorts the row of the dataframe df_variables using the list sorter. + return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) - Parameters: - raw_data: the data - sorter: the list used to sort the dataframe self.df_variables - Returns: - void + def import_structure(self, raw_data: typing.List) -> pd.DataFrame: + """Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` + + :param raw_data: List of Dicts + :type raw_data: List + :return: Dataframe containg the starting node a ending node of every arc of the network + :rtype: pandas.Dataframe """ - self._df_variables = self.one_level_normalizing(raw_data, self.network_number, self.variables_label) - #self.sorter = self._df_variables[self.variables_key].to_list() - #self.sorter.sort() - #print("Sorter:", self.sorter) - self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category") - self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter) - self._df_variables = self._df_variables.sort_values([self.variables_key]) - self._df_variables.reset_index(inplace=True) - print("Var Frame", self._df_variables) + return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) - def read_json_file(self) -> typing.List: + def import_variables(self, raw_data: typing.List) -> pd.DataFrame: + """Imports the data in ``raw_data`` at the key ``_variables_label``. + + :param raw_data: List of Dicts + :type raw_data: List + :return: Datframe containg the variables simbolic labels and their cardinalities + :rtype: pandas.Dataframe """ - Reads the first json file in the path self.filePath + return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) - Parameters: - void - Returns: - data: the contents of the json file + def read_json_file(self) -> typing.List: + """Reads the JSON file in the path self.filePath. + :return: The contents of the json file + :rtype: List """ - #try: - #read_files = glob.glob(os.path.join(self.files_path, "*.json")) - #if not read_files: - #raise ValueError('No .json file found in the entered path!') - with open(self.file_path) as f: + with open(self._file_path) as f: data = json.load(f) return data - #except ValueError as err: - #print(err.args) def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: - """ - Extracts the one-level nested data in the list raw_data at the index indx at the key key - - Parameters: - raw_data: List of Dicts - indx: The index of the array from which the data have to be extracted - key: the key for the Dicts from which exctract data - Returns: - a normalized dataframe + """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. + :param raw_data: List of Dicts + :type raw_data: List + :param indx: The index of the array from which the data have to be extracted + :type indx: int + :param key: the key for the Dicts from which exctract data + :type key: string + :return: A normalized dataframe + :rtype: pandas.Datframe """ return pd.DataFrame(raw_data[indx][key]) - def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str): + def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: """ - Extracts the traj in raw_data at the index index at the key trajectories key. - Adds the extracted traj in the dataframe list self._df_samples_list. - Initializes the list self.sorter. + Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. - Parameters: - raw_data: the data - indx: the index of the array from which extract data - trajectories_key: the key of the trajectories objects - Returns: - void + :param raw_data: List of Dicts + :type raw_data: List + :param indx: The index of the array from which the data have to be extracted + :type indx: int + :param trajectories_key: the key of the trajectories objects + :type trajectories_key: string + :return: A list of daframes containg the trajectories + :rtype: List """ dataframe = pd.DataFrame smps = raw_data[indx][trajectories_key] - self.df_samples_list = [dataframe(sample) for sample in smps] - columns_header = list(self.df_samples_list[0].columns.values) - columns_header.remove(self.time_key) - self.sorter = columns_header - - def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str, - columns_header: typing.List, shifted_cols_header: typing.List) \ - -> pd.DataFrame: - """ - Computes the difference between each value present in th time column. - Copies and shift by one position up all the values present in the remaining columns. - Parameters: - sample_frame: the traj to be processed - time_header_label: the label for the times - columns_header: the original header of sample_frame - shifted_cols_header: a copy of columns_header with changed names of the contents - Returns: - sample_frame: the processed dataframe - - """ - sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1) - shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') - #print(shifted_cols) - shifted_cols.columns = shifted_cols_header - sample_frame = sample_frame.assign(**shifted_cols) - sample_frame.drop(sample_frame.tail(1).index, inplace=True) - return sample_frame - - def compute_row_delta_in_all_samples_frames(self, time_header_label: str): - """ - Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list. - Concatenates the result in the dataframe concatanated_samples - - Parameters: - time_header_label: the label of the time column - Returns: - void - """ - shifted_cols_header = [s + "S" for s in self.sorter] - compute_row_delta = self.compute_row_delta_sigle_samples_frame - self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) - for sample in self.df_samples_list] - self._concatenated_samples = pd.concat(self.df_samples_list) - complete_header = self.sorter[:] - complete_header.insert(0,'Time') - complete_header.extend(shifted_cols_header) - #print("Complete Header", complete_header) - self._concatenated_samples = self._concatenated_samples[complete_header] - #print("Concat Samples",self._concatenated_samples) + df_samples_list = [dataframe(sample) for sample in smps] + return df_samples_list - def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List: - """ - Builds a List containing the columns of dataframe and converts them to a numpy array. - Parameters: - :data_frame: the dataframe from which the columns have to be extracted and converted - Returns: - :columns_list: the resulting list of numpy arrays + def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: + """Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. """ - columns_list = [data_frame[column].to_numpy() for column in data_frame] - #for column in data_frame: - #columns_list.append(data_frame[column].to_numpy()) - return columns_list + columns_header = list(sample_frame.columns.values) + columns_header.remove(self._time_key) + return columns_header - def clear_concatenated_frame(self): + def clear_data_frame_list(self) -> None: + """Removes all values present in the dataframes in the list ``_df_samples_list``. """ - Removes all values in the dataframe concatenated_samples - Parameters: - void - Returns: - void - """ - self._concatenated_samples = self._concatenated_samples.iloc[0:0] + for indx in range(len(self._df_samples_list)): + self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] - def clear_data_frame_list(self): - """ - Removes all values present in the dataframes in the list df_samples_list - """ - for indx in range(len(self.df_samples_list)): # Le singole traj non servono piĆ¹ #TODO usare list comprens - self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] + def dataset_id(self) -> object: + return self._array_indx def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: + """Imports the synthetic CIMS in the dataset in a dictionary, using variables labels + as keys for the set of CIMS of a particular node. + + :param raw_data: List of Dicts + :type raw_data: List + :param indx: The index of the array from which the data have to be extracted + :type indx: int + :param cims_key: the key where the json object cims are placed + :type cims_key: string + :return: a dictionary containing the sampled CIMS for all the variables in the net + :rtype: Dictionary + """ cims_for_all_vars = {} for var in raw_data[indx][cims_key]: sampled_cims_list = [] @@ -242,18 +173,5 @@ class JsonImporter(ai.AbstractImporter): cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) return cims_for_all_vars - @property - def concatenated_samples(self): - return self._concatenated_samples - - @property - def variables(self): - return self._df_variables - - @property - def structure(self): - return self._df_structure - - diff --git a/main_package/tests/optimizers/test_tabu_search.py b/main_package/tests/optimizers/test_tabu_search.py index 2d4998a..8fcc196 100644 --- a/main_package/tests/optimizers/test_tabu_search.py +++ b/main_package/tests/optimizers/test_tabu_search.py @@ -18,16 +18,24 @@ import utility.json_importer as ji + + class TestTabuSearch(unittest.TestCase): @classmethod def setUpClass(cls): #cls.read_files = glob.glob(os.path.join('../../data', "*.json")) - cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_20.json", - 'samples', 'dyn.str', 'variables', 'Time', 'Name', 2 ) + cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_5.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.importer.import_data(0) + cls.s1 = sp.SamplePath(cls.importer) + + #cls.traj = cls.s1.concatenated_samples + + # print(len(cls.traj)) cls.s1 = sp.SamplePath(cls.importer) cls.s1.build_trajectories() cls.s1.build_structure() + cls.s1.clear_memory() @@ -39,11 +47,11 @@ class TestTabuSearch(unittest.TestCase): edges = se1.estimate_structure( max_parents = None, iterations_number = 100, - patience = None, - tabu_length = 15, - tabu_rules_duration = 15, + patience = 20, + tabu_length = 10, + tabu_rules_duration = 10, optimizer = 'tabu', - disable_multiprocessing=True + disable_multiprocessing=False )