1
0
Fork 0

Updated importer and added clear_memory

master
Luca Moretti 4 years ago
parent 9251de0fb6
commit 44a8f5a6dc
  1. 1
      main_package/classes/estimators/structure_score_based_estimator.py
  2. 126
      main_package/classes/structure_graph/sample_path.py
  3. 48
      main_package/classes/structure_graph/trajectory.py
  4. 149
      main_package/classes/utility/abstract_importer.py
  5. 320
      main_package/classes/utility/json_importer.py
  6. 20
      main_package/tests/optimizers/test_tabu_search.py

@ -90,6 +90,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
'get the number of CPU' 'get the number of CPU'
cpu_count = multiprocessing.cpu_count() cpu_count = multiprocessing.cpu_count()
print(f"CPU COUNT: {cpu_count}")
if disable_multiprocessing: if disable_multiprocessing:
cpu_count = 1 cpu_count = 1

@ -1,86 +1,92 @@
import sys import sys
sys.path.append('../') sys.path.append('../')
import pandas as pd
import numpy as np
import structure_graph.abstract_sample_path as asam import structure_graph.abstract_sample_path as asam
import utility.json_importer as imp import utility.json_importer as imp
import structure_graph.structure as st from structure_graph.structure import Structure
import structure_graph.trajectory as tr from structure_graph.trajectory import Trajectory
import utility.abstract_importer as ai
import pandas as pd
class SamplePath(object):
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
contain the mentioned data.
class SamplePath(asam.AbstractSamplePath): :param importer: the Importer object which contains the imported and processed data
:type importer: AbstractImporter
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
:_structure: the ``Structure`` Object that will contain all the structural infos about the net
:_total_variables_count: the number of variables in the net
""" """
Aggregates all the informations about the trajectories, the real structure of the sampled net and variables def __init__(self, importer: ai.AbstractImporter):
cardinalites. """Constructor Method
Has the task of creating the objects that will contain the mentioned data.
:importer: the Importer objects that will import ad process data
:trajectories: the Trajectory object that will contain all the concatenated trajectories
:structure: the Structure Object that will contain all the structurral infos about the net
:total_variables_count: the number of variables in the net
""" """
self._importer = importer
#def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, if self._importer._df_variables is None or self._importer._concatenated_samples is None:
#variables_key: str): raise RuntimeError('The importer object has to contain the all processed data!')
def __init__(self, importer: imp.JsonImporter): if self._importer._df_variables.empty:
#self.importer =importer raise RuntimeError('The importer object has to contain the all processed data!')
super().__init__(importer) if isinstance(self._importer._concatenated_samples, pd.DataFrame):
#self._trajectories = None if self._importer._concatenated_samples.empty:
#self._structure = None raise RuntimeError('The importer object has to contain the all processed data!')
self.total_variables_count = None if isinstance(self._importer._concatenated_samples, np.ndarray):
if self._importer._concatenated_samples.size == 0:
def build_trajectories(self): raise RuntimeError('The importer object has to contain the all processed data!')
self._trajectories = None
self._structure = None
self._total_variables_count = None
def build_trajectories(self) -> None:
"""Builds the Trajectory object that will contain all the trajectories.
Clears all the unused dataframes in ``_importer`` Object
""" """
Builds the Trajectory object that will contain all the trajectories.
Clears all the unused dataframes in Importer Object
Parameters:
void
Returns:
void
"""
self.importer.import_data()
#TODO: VALUTARE PARAMETRO PER DATA AUGMENTATION
#trajects_samples = pd.concat([self.importer.concatenated_samples,
# self.importer.concatenated_samples])
self._trajectories = \ self._trajectories = \
tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples), Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
len(self.importer.sorter) + 1) len(self._importer.sorter) + 1)
#self.trajectories.append(trajectory) self._importer.clear_concatenated_frame()
self.importer.clear_concatenated_frame()
def build_structure(self): def build_structure(self) -> None:
""" """
Builds the Structure object that aggregates all the infos about the net. Builds the ``Structure`` object that aggregates all the infos about the net.
Parameters:
void
Returns:
void
""" """
self.total_variables_count = len(self.importer.sorter) if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
labels = self.importer.variables[self.importer.variables_key].to_list() raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
#print("SAMPLE PATH LABELS",labels)
indxs = self.importer.variables.index.to_numpy() self._total_variables_count = len(self._importer.sorter)
vals = self.importer.variables['Value'].to_numpy() labels = self._importer.variables.iloc[:, 0].to_list()
edges = list(self.importer.structure.to_records(index=False)) indxs = self._importer.variables.index.to_numpy()
self._structure = st.Structure(labels, indxs, vals, edges, vals = self._importer.variables.iloc[:, 1].to_numpy()
self.total_variables_count) if self._importer.structure is None or self._importer.structure.empty:
edges = []
else:
edges = list(self._importer.structure.to_records(index=False))
self._structure = Structure(labels, indxs, vals, edges,
self._total_variables_count)
def clear_memory(self):
self._importer._raw_data = []
@property @property
def trajectories(self): def trajectories(self) -> Trajectory:
return self._trajectories return self._trajectories
@property @property
def structure(self): def structure(self) -> Structure:
return self._structure return self._structure
def total_variables_count(self): @property
return self.total_variables_count def total_variables_count(self) -> int:
return self._total_variables_count
@property
def has_prior_net_structure(self) -> bool:
return bool(self._structure.edges)

@ -1,46 +1,34 @@
import sys
sys.path.append('../')
import numpy as np import typing
import numpy as np
class Trajectory:
"""
Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix
of ints.
:list_of_columns: the list containing the times array and values matrix class Trajectory(object):
:original_cols_numb: total number of cols in the data """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
:actual_trajectory: the trajectory containing also the duplicated and shifted values (the time deltas) and a numpy matrix of ints (the changes of states).
:times: the array containing the time deltas
:param list_of_columns: the list containing the times array and values matrix
:type list_of_columns: List
:param original_cols_number: total number of cols in the data
:type original_cols_number: int
:_actual_trajectory: the trajectory containing also the duplicated/shifted values
:_times: the array containing the time deltas
""" """
def __init__(self, list_of_columns, original_cols_number): def __init__(self, list_of_columns: typing.List, original_cols_number: int):
if type(list_of_columns[0][0]) != np.float64: """Constructor Method
raise TypeError('The first array in the list has to be Times') """
self.original_cols_number = original_cols_number self._times = list_of_columns[0]
self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T self._actual_trajectory = list_of_columns[1]
self._times = np.array(list_of_columns[0], dtype=np.float) self._original_cols_number = original_cols_number
@property @property
def trajectory(self) -> np.ndarray: def trajectory(self) -> np.ndarray:
""" return self._actual_trajectory[:, :self._original_cols_number]
Parameters:
void
Returns:
a numpy matrix containing ONLY the original columns values, not the shifted ones
"""
return self._actual_trajectory[:, :self.original_cols_number]
@property @property
def complete_trajectory(self) -> np.ndarray: def complete_trajectory(self) -> np.ndarray:
"""
Parameters:
void
Returns:
a numpy matrix containing all the values
"""
return self._actual_trajectory return self._actual_trajectory
@property @property

@ -1,23 +1,158 @@
import typing
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
class AbstractImporter(ABC): class AbstractImporter(ABC):
""" """Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
Interface that exposes all the necessary methods to import the trajectories and the net structure.
:param file_path: the file path, or dataset name if you import already processed data
:type file_path: str
:param concatenated_samples: Dataframe or numpy array containing the concatenation of all the processed trajectories
:type concatenated_samples: typing.Union[pandas.DataFrame, numpy.ndarray]
:param variables: Dataframe containing the nodes labels and cardinalities
:type variables: pandas.DataFrame
:prior_net_structure: Dataframe containing the structure of the network (edges)
:type prior_net_structure: pandas.DataFrame
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
:file_path: the file path .. warning::
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
.. note::
See :class:``JsonImporter`` for an example implementation
""" """
def __init__(self, file_path: str): def __init__(self, file_path: str = None, concatenated_samples: typing.Union[pd.DataFrame, np.ndarray] = None,
self.file_path = file_path variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
"""Constructor
"""
self._file_path = file_path
self._concatenated_samples = concatenated_samples
self._df_variables = variables
self._df_structure = prior_net_structure
self._sorter = None
super().__init__() super().__init__()
@abstractmethod @abstractmethod
def import_trajectories(self, raw_data): def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
:param sample_frame: The dataframe from which extract the header
:type sample_frame: pandas.DataFrame
:return: A list containing the processed header.
:rtype: List
"""
pass pass
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame:
"""Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
:param sample_frame: the traj to be processed
:type sample_frame: pandas.Dataframe
:param columns_header: the original header of sample_frame
:type columns_header: List
:param shifted_cols_header: a copy of columns_header with changed names of the contents
:type shifted_cols_header: List
:return: The processed dataframe
:rtype: pandas.Dataframe
.. warning::
the Dataframe ``sample_frame`` has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
"""
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
shifted_cols.columns = shifted_cols_header
sample_frame = sample_frame.assign(**shifted_cols)
sample_frame.drop(sample_frame.tail(1).index, inplace=True)
return sample_frame
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
``df_samples_list``.
Concatenates the result in the dataframe ``concatanated_samples``
:param df_samples_list: the datframe's list to be processed and concatenated
:type df_samples_list: List
.. warning::
The Dataframe sample_frame has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
.. note::
After the call of this method the class member ``concatanated_samples`` will contain all processed
and merged trajectories
"""
if not self._sorter:
raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
shifted_cols_header = [s + "S" for s in self._sorter]
compute_row_delta = self.compute_row_delta_sigle_samples_frame
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
for sample in df_samples_list]
self._concatenated_samples = pd.concat(proc_samples_list)
complete_header = self._sorter[:]
complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header)
self._concatenated_samples = self._concatenated_samples[complete_header]
def build_list_of_samples_array(self, concatenated_sample: typing.Union[pd.DataFrame, np.ndarray]) -> typing.List:
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
and converted
:type concatenated_sample: typing.Union[pandas.Dataframe, numpy.ndarray]
:return: the resulting list of numpy arrays
:rtype: List
"""
if isinstance(concatenated_sample, pd.DataFrame):
concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
else:
columns_list = [concatenated_sample[:, 0], concatenated_sample[:, 1:].astype(int)]
return columns_list
def clear_concatenated_frame(self) -> None:
"""Removes all values in the dataframe concatenated_samples.
"""
if isinstance(self._concatenated_samples, pd.DataFrame):
self._concatenated_samples = self._concatenated_samples.iloc[0:0]
@abstractmethod @abstractmethod
def import_structure(self, raw_data): def dataset_id(self) -> object:
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current
dataset
"""
pass pass
@property
def concatenated_samples(self) -> pd.DataFrame:
return self._concatenated_samples
@property
def variables(self) -> pd.DataFrame:
return self._df_variables
@property
def structure(self) -> pd.DataFrame:
return self._df_structure
@property
def sorter(self) -> typing.List:
return self._sorter
@property
def file_path(self) -> str:
return self._file_path

@ -1,239 +1,170 @@
import sys
sys.path.append('../')
import json import json
import typing import typing
import pandas as pd import pandas as pd
import sys
sys.path.append('../')
import utility.abstract_importer as ai import utility.abstract_importer as ai
class JsonImporter(ai.AbstractImporter): class JsonImporter(ai.AbstractImporter):
""" """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext. the data in json extension.
with the following structure:
[] 0 :param file_path: the path of the file that contains tha data to be imported
|_ dyn.cims :type file_path: string
|_ dyn.str :param samples_label: the reference key for the samples in the trajectories
|_ samples :type samples_label: string
|_ variabels :param structure_label: the reference key for the structure of the network data
:file_path: the path of the file that contains tha data to be imported :type structure_label: string
:samples_label: the reference key for the samples in the trajectories :param variables_label: the reference key for the cardinalites of the nodes data
:structure_label: the reference key for the structure of the network data :type variables_label: string
:variables_label: the reference key for the cardinalites of the nodes data :param time_key: the key used to identify the timestamps in each trajectory
:time_key: the key used to identify the timestamps in each trajectory :type time_key: string
:variables_key: the key used to identify the names of the variables in the net :param variables_key: the key used to identify the names of the variables in the net
:df_samples_list: a Dataframe list in which every df contains a trajectory :type variables_key: string
:df_structure: Dataframe containing the structure of the network (edges) :_array_indx: the index of the outer JsonArray to extract the data from
:df_variables: Dataframe containing the nodes cardinalities :type _array_indx: int
:df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples :_raw_data: The raw contents of the json file to import
:type _raw_data: List
""" """
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str, network_number:int=0): variables_key: str):
self.samples_label = samples_label """Constructor method
self.structure_label = structure_label
self.variables_label = variables_label
self.time_key = time_key
self.variables_key = variables_key
self.df_samples_list = []
self._df_structure = pd.DataFrame()
self._df_variables = pd.DataFrame()
self._concatenated_samples = None
self.sorter = None
self.network_number= network_number
super(JsonImporter, self).__init__(file_path)
def import_data(self): .. note::
""" This constructor calls also the method ``read_json_file()``, so after the construction of the object
Imports and prepares all data present needed for susequent computation. the class member ``_raw_data`` will contain the raw imported json data.
Parameters:
void
Returns:
void
"""
raw_data = self.read_json_file()
self.import_trajectories(raw_data)
self.compute_row_delta_in_all_samples_frames(self.time_key)
self.clear_data_frame_list()
self.import_structure(raw_data)
self.import_variables(raw_data, self.sorter)
def import_trajectories(self, raw_data: typing.List):
""" """
Imports the trajectories in the list of dicts raw_data. self._samples_label = samples_label
Parameters: self._structure_label = structure_label
:raw_data: List of Dicts self._variables_label = variables_label
Returns: self._time_key = time_key
void self._variables_key = variables_key
""" self._df_samples_list = None
self.normalize_trajectories(raw_data, self.network_number, self.samples_label) self._array_indx = None
super(JsonImporter, self).__init__(file_path)
self._raw_data = self.read_json_file()
def import_structure(self, raw_data: typing.List): def import_data(self, indx: int) -> None:
""" """Implements the abstract method of :class:`AbstractImporter`.
Imports in a dataframe the data in the list raw_data at the key structure_label
Parameters: :param indx: the index of the outer JsonArray to extract the data from
raw_data: the data :type indx: int
Returns:
void
""" """
self._df_structure = self.one_level_normalizing(raw_data, self.network_number, self.structure_label) self._array_indx = indx
self._df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self._df_samples_list[0])
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
self.clear_data_frame_list()
self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(self._raw_data)
def import_trajectories(self, raw_data: typing.List) -> typing.List:
"""Imports the trajectories from the list of dicts ``raw_data``.
def import_variables(self, raw_data: typing.List, sorter: typing.List): :param raw_data: List of Dicts
:type raw_data: List
:return: List of dataframes containing all the trajectories
:rtype: List
""" """
Imports the data in raw_data at the key variables_label. return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
Sorts the row of the dataframe df_variables using the list sorter.
def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
Parameters: :param raw_data: List of Dicts
raw_data: the data :type raw_data: List
sorter: the list used to sort the dataframe self.df_variables :return: Dataframe containg the starting node a ending node of every arc of the network
Returns: :rtype: pandas.Dataframe
void
""" """
self._df_variables = self.one_level_normalizing(raw_data, self.network_number, self.variables_label) return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
#self.sorter = self._df_variables[self.variables_key].to_list()
#self.sorter.sort()
#print("Sorter:", self.sorter)
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category")
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
self._df_variables = self._df_variables.sort_values([self.variables_key])
self._df_variables.reset_index(inplace=True)
print("Var Frame", self._df_variables)
def read_json_file(self) -> typing.List: def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports the data in ``raw_data`` at the key ``_variables_label``.
:param raw_data: List of Dicts
:type raw_data: List
:return: Datframe containg the variables simbolic labels and their cardinalities
:rtype: pandas.Dataframe
""" """
Reads the first json file in the path self.filePath return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
Parameters: def read_json_file(self) -> typing.List:
void """Reads the JSON file in the path self.filePath.
Returns:
data: the contents of the json file
:return: The contents of the json file
:rtype: List
""" """
#try: with open(self._file_path) as f:
#read_files = glob.glob(os.path.join(self.files_path, "*.json"))
#if not read_files:
#raise ValueError('No .json file found in the entered path!')
with open(self.file_path) as f:
data = json.load(f) data = json.load(f)
return data return data
#except ValueError as err:
#print(err.args)
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
""" """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
Extracts the one-level nested data in the list raw_data at the index indx at the key key
Parameters:
raw_data: List of Dicts
indx: The index of the array from which the data have to be extracted
key: the key for the Dicts from which exctract data
Returns:
a normalized dataframe
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param key: the key for the Dicts from which exctract data
:type key: string
:return: A normalized dataframe
:rtype: pandas.Datframe
""" """
return pd.DataFrame(raw_data[indx][key]) return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str): def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
""" """
Extracts the traj in raw_data at the index index at the key trajectories key. Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
Adds the extracted traj in the dataframe list self._df_samples_list.
Initializes the list self.sorter.
Parameters: :param raw_data: List of Dicts
raw_data: the data :type raw_data: List
indx: the index of the array from which extract data :param indx: The index of the array from which the data have to be extracted
trajectories_key: the key of the trajectories objects :type indx: int
Returns: :param trajectories_key: the key of the trajectories objects
void :type trajectories_key: string
:return: A list of daframes containg the trajectories
:rtype: List
""" """
dataframe = pd.DataFrame dataframe = pd.DataFrame
smps = raw_data[indx][trajectories_key] smps = raw_data[indx][trajectories_key]
self.df_samples_list = [dataframe(sample) for sample in smps] df_samples_list = [dataframe(sample) for sample in smps]
columns_header = list(self.df_samples_list[0].columns.values) return df_samples_list
columns_header.remove(self.time_key)
self.sorter = columns_header
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame:
"""
Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
Parameters:
sample_frame: the traj to be processed
time_header_label: the label for the times
columns_header: the original header of sample_frame
shifted_cols_header: a copy of columns_header with changed names of the contents
Returns:
sample_frame: the processed dataframe
"""
sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
#print(shifted_cols)
shifted_cols.columns = shifted_cols_header
sample_frame = sample_frame.assign(**shifted_cols)
sample_frame.drop(sample_frame.tail(1).index, inplace=True)
return sample_frame
def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
"""
Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list.
Concatenates the result in the dataframe concatanated_samples
Parameters:
time_header_label: the label of the time column
Returns:
void
"""
shifted_cols_header = [s + "S" for s in self.sorter]
compute_row_delta = self.compute_row_delta_sigle_samples_frame
self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header)
for sample in self.df_samples_list]
self._concatenated_samples = pd.concat(self.df_samples_list)
complete_header = self.sorter[:]
complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header)
#print("Complete Header", complete_header)
self._concatenated_samples = self._concatenated_samples[complete_header]
#print("Concat Samples",self._concatenated_samples)
def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List: def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
""" """
Builds a List containing the columns of dataframe and converts them to a numpy array. columns_header = list(sample_frame.columns.values)
Parameters: columns_header.remove(self._time_key)
:data_frame: the dataframe from which the columns have to be extracted and converted return columns_header
Returns:
:columns_list: the resulting list of numpy arrays
"""
columns_list = [data_frame[column].to_numpy() for column in data_frame]
#for column in data_frame:
#columns_list.append(data_frame[column].to_numpy())
return columns_list
def clear_concatenated_frame(self): def clear_data_frame_list(self) -> None:
""" """Removes all values present in the dataframes in the list ``_df_samples_list``.
Removes all values in the dataframe concatenated_samples
Parameters:
void
Returns:
void
""" """
self._concatenated_samples = self._concatenated_samples.iloc[0:0] for indx in range(len(self._df_samples_list)):
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
def clear_data_frame_list(self): def dataset_id(self) -> object:
""" return self._array_indx
Removes all values present in the dataframes in the list df_samples_list
"""
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più #TODO usare list comprens
self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
as keys for the set of CIMS of a particular node.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param cims_key: the key where the json object cims are placed
:type cims_key: string
:return: a dictionary containing the sampled CIMS for all the variables in the net
:rtype: Dictionary
"""
cims_for_all_vars = {} cims_for_all_vars = {}
for var in raw_data[indx][cims_key]: for var in raw_data[indx][cims_key]:
sampled_cims_list = [] sampled_cims_list = []
@ -242,18 +173,5 @@ class JsonImporter(ai.AbstractImporter):
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
return cims_for_all_vars return cims_for_all_vars
@property
def concatenated_samples(self):
return self._concatenated_samples
@property
def variables(self):
return self._df_variables
@property
def structure(self):
return self._df_structure

@ -18,16 +18,24 @@ import utility.json_importer as ji
class TestTabuSearch(unittest.TestCase): class TestTabuSearch(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json")) #cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_20.json", cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_5.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
'samples', 'dyn.str', 'variables', 'Time', 'Name', 2 ) cls.importer.import_data(0)
cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
cls.s1 = sp.SamplePath(cls.importer) cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
cls.s1.clear_memory()
@ -39,11 +47,11 @@ class TestTabuSearch(unittest.TestCase):
edges = se1.estimate_structure( edges = se1.estimate_structure(
max_parents = None, max_parents = None,
iterations_number = 100, iterations_number = 100,
patience = None, patience = 20,
tabu_length = 15, tabu_length = 10,
tabu_rules_duration = 15, tabu_rules_duration = 10,
optimizer = 'tabu', optimizer = 'tabu',
disable_multiprocessing=True disable_multiprocessing=False
) )