1
0
Fork 0

Implemented Sample_Importer

master
Luca Moretti 4 years ago
parent 44a8f5a6dc
commit 2819d1f1a0
  1. 30
      main_package/classes/utility/abstract_importer.py
  2. 66
      main_package/classes/utility/sample_importer.py
  3. 22
      main_package/tests/optimizers/test_tabu_search.py

@ -5,14 +5,18 @@ from abc import ABC, abstractmethod
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import copy
from sklearn.utils import resample
class AbstractImporter(ABC): class AbstractImporter(ABC):
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure. """Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
:param file_path: the file path, or dataset name if you import already processed data :param file_path: the file path, or dataset name if you import already processed data
:type file_path: str :type file_path: str
:param concatenated_samples: Dataframe or numpy array containing the concatenation of all the processed trajectories :param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
:type concatenated_samples: typing.Union[pandas.DataFrame, numpy.ndarray] :type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
:param variables: Dataframe containing the nodes labels and cardinalities :param variables: Dataframe containing the nodes labels and cardinalities
:type variables: pandas.DataFrame :type variables: pandas.DataFrame
:prior_net_structure: Dataframe containing the structure of the network (edges) :prior_net_structure: Dataframe containing the structure of the network (edges)
@ -31,24 +35,25 @@ class AbstractImporter(ABC):
""" """
def __init__(self, file_path: str = None, concatenated_samples: typing.Union[pd.DataFrame, np.ndarray] = None, def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
"""Constructor """Constructor
""" """
self._file_path = file_path self._file_path = file_path
self._concatenated_samples = concatenated_samples self._df_samples_list = trajectory_list
self._concatenated_samples = []
self._df_variables = variables self._df_variables = variables
self._df_structure = prior_net_structure self._df_structure = prior_net_structure
self._sorter = None self._sorter = None
super().__init__() super().__init__()
@abstractmethod @abstractmethod
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: def build_sorter(self, trajecory_header: object) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
and keeping ONLY the variables symbolic labels, cutting out the time label in the header. and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
:param sample_frame: The dataframe from which extract the header :param trajecory_header: an object that will be used to define the header
:type sample_frame: pandas.DataFrame :type trajecory_header: object
:return: A list containing the processed header. :return: A list containing the processed header.
:rtype: List :rtype: List
""" """
@ -73,6 +78,7 @@ class AbstractImporter(ABC):
the Dataframe ``sample_frame`` has to follow the column structure of this header: the Dataframe ``sample_frame`` has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values] Header of sample_frame = [Time | Variable values]
""" """
sample_frame = copy.deepcopy(sample_frame)
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
shifted_cols.columns = shifted_cols_header shifted_cols.columns = shifted_cols_header
@ -103,25 +109,25 @@ class AbstractImporter(ABC):
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
for sample in df_samples_list] for sample in df_samples_list]
self._concatenated_samples = pd.concat(proc_samples_list) self._concatenated_samples = pd.concat(proc_samples_list)
complete_header = self._sorter[:] complete_header = self._sorter[:]
complete_header.insert(0,'Time') complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header) complete_header.extend(shifted_cols_header)
self._concatenated_samples = self._concatenated_samples[complete_header] self._concatenated_samples = self._concatenated_samples[complete_header]
def build_list_of_samples_array(self, concatenated_sample: typing.Union[pd.DataFrame, np.ndarray]) -> typing.List: def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix """Builds a List containing the the delta times numpy array, and the complete transitions matrix
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
and converted and converted
:type concatenated_sample: typing.Union[pandas.Dataframe, numpy.ndarray] :type concatenated_sample: pandas.Dataframe
:return: the resulting list of numpy arrays :return: the resulting list of numpy arrays
:rtype: List :rtype: List
""" """
if isinstance(concatenated_sample, pd.DataFrame):
concatenated_array = concatenated_sample.to_numpy() concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
else:
columns_list = [concatenated_sample[:, 0], concatenated_sample[:, 1:].astype(int)]
return columns_list return columns_list
def clear_concatenated_frame(self) -> None: def clear_concatenated_frame(self) -> None:

@ -0,0 +1,66 @@
import json
import typing
import pandas as pd
import numpy as np
import sys
sys.path.append('../')
import utility.abstract_importer as ai
class SampleImporter(ai.AbstractImporter):
#TODO: Scrivere documentazione
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data in json extension.
:param file_path: the path of the file that contains tha data to be imported
:type file_path: string
:param samples_label: the reference key for the samples in the trajectories
:type samples_label: string
:param structure_label: the reference key for the structure of the network data
:type structure_label: string
:param variables_label: the reference key for the cardinalites of the nodes data
:type variables_label: string
:param time_key: the key used to identify the timestamps in each trajectory
:type time_key: string
:param variables_key: the key used to identify the names of the variables in the net
:type variables_key: string
:_array_indx: the index of the outer JsonArray to extract the data from
:type _array_indx: int
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
variables= variables,
prior_net_structure=prior_net_structure)
def import_data(self, header_column = None):
if header_column is None:
self._sorter = header_column
else:
self._sorter = self.build_sorter(self._df_samples_list[0])
samples_list= self._df_samples_list
if isinstance(samples_list, np.ndarray):
samples_list = samples_list.tolist()
self.compute_row_delta_in_all_samples_frames(samples_list)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
"""
columns_header = list(sample_frame.columns.values)
del columns_header[0]
return columns_header
def dataset_id(self) -> object:
pass

@ -7,14 +7,17 @@ import unittest
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import pandas as pd
import psutil import psutil
from line_profiler import LineProfiler from line_profiler import LineProfiler
import copy import copy
import json
import utility.cache as ch import utility.cache as ch
import structure_graph.sample_path as sp import structure_graph.sample_path as sp
import estimators.structure_score_based_estimator as se import estimators.structure_score_based_estimator as se
import utility.json_importer as ji import utility.json_importer as ji
import utility.sample_importer as si
@ -25,7 +28,24 @@ class TestTabuSearch(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json")) #cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_5.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
with open("../../data/networks_and_trajectories_binary_data_01_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = si.SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
cls.importer.import_data(0) cls.importer.import_data(0)
cls.s1 = sp.SamplePath(cls.importer) cls.s1 = sp.SamplePath(cls.importer)