1
0
Fork 0

Implemented Sample_Importer

master
Luca Moretti 4 years ago
parent 44a8f5a6dc
commit 2819d1f1a0
  1. 34
      main_package/classes/utility/abstract_importer.py
  2. 66
      main_package/classes/utility/sample_importer.py
  3. 22
      main_package/tests/optimizers/test_tabu_search.py

@ -5,14 +5,18 @@ from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
import copy
from sklearn.utils import resample
class AbstractImporter(ABC):
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
:param file_path: the file path, or dataset name if you import already processed data
:type file_path: str
:param concatenated_samples: Dataframe or numpy array containing the concatenation of all the processed trajectories
:type concatenated_samples: typing.Union[pandas.DataFrame, numpy.ndarray]
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
:param variables: Dataframe containing the nodes labels and cardinalities
:type variables: pandas.DataFrame
:prior_net_structure: Dataframe containing the structure of the network (edges)
@ -31,24 +35,25 @@ class AbstractImporter(ABC):
"""
def __init__(self, file_path: str = None, concatenated_samples: typing.Union[pd.DataFrame, np.ndarray] = None,
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
"""Constructor
"""
self._file_path = file_path
self._concatenated_samples = concatenated_samples
self._df_samples_list = trajectory_list
self._concatenated_samples = []
self._df_variables = variables
self._df_structure = prior_net_structure
self._sorter = None
super().__init__()
@abstractmethod
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
def build_sorter(self, trajecory_header: object) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
:param sample_frame: The dataframe from which extract the header
:type sample_frame: pandas.DataFrame
:param trajecory_header: an object that will be used to define the header
:type trajecory_header: object
:return: A list containing the processed header.
:rtype: List
"""
@ -73,6 +78,7 @@ class AbstractImporter(ABC):
the Dataframe ``sample_frame`` has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
"""
sample_frame = copy.deepcopy(sample_frame)
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
shifted_cols.columns = shifted_cols_header
@ -103,25 +109,25 @@ class AbstractImporter(ABC):
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
for sample in df_samples_list]
self._concatenated_samples = pd.concat(proc_samples_list)
complete_header = self._sorter[:]
complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header)
self._concatenated_samples = self._concatenated_samples[complete_header]
def build_list_of_samples_array(self, concatenated_sample: typing.Union[pd.DataFrame, np.ndarray]) -> typing.List:
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
and converted
:type concatenated_sample: typing.Union[pandas.Dataframe, numpy.ndarray]
:type concatenated_sample: pandas.Dataframe
:return: the resulting list of numpy arrays
:rtype: List
"""
if isinstance(concatenated_sample, pd.DataFrame):
concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
else:
columns_list = [concatenated_sample[:, 0], concatenated_sample[:, 1:].astype(int)]
concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
return columns_list
def clear_concatenated_frame(self) -> None:

@ -0,0 +1,66 @@
import json
import typing
import pandas as pd
import numpy as np
import sys
sys.path.append('../')
import utility.abstract_importer as ai
class SampleImporter(ai.AbstractImporter):
#TODO: Scrivere documentazione
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data in json extension.
:param file_path: the path of the file that contains tha data to be imported
:type file_path: string
:param samples_label: the reference key for the samples in the trajectories
:type samples_label: string
:param structure_label: the reference key for the structure of the network data
:type structure_label: string
:param variables_label: the reference key for the cardinalites of the nodes data
:type variables_label: string
:param time_key: the key used to identify the timestamps in each trajectory
:type time_key: string
:param variables_key: the key used to identify the names of the variables in the net
:type variables_key: string
:_array_indx: the index of the outer JsonArray to extract the data from
:type _array_indx: int
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
variables= variables,
prior_net_structure=prior_net_structure)
def import_data(self, header_column = None):
if header_column is None:
self._sorter = header_column
else:
self._sorter = self.build_sorter(self._df_samples_list[0])
samples_list= self._df_samples_list
if isinstance(samples_list, np.ndarray):
samples_list = samples_list.tolist()
self.compute_row_delta_in_all_samples_frames(samples_list)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
"""
columns_header = list(sample_frame.columns.values)
del columns_header[0]
return columns_header
def dataset_id(self) -> object:
pass

@ -7,14 +7,17 @@ import unittest
import networkx as nx
import numpy as np
import pandas as pd
import psutil
from line_profiler import LineProfiler
import copy
import json
import utility.cache as ch
import structure_graph.sample_path as sp
import estimators.structure_score_based_estimator as se
import utility.json_importer as ji
import utility.sample_importer as si
@ -25,7 +28,24 @@ class TestTabuSearch(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_5.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
with open("../../data/networks_and_trajectories_binary_data_01_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = si.SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
cls.importer.import_data(0)
cls.s1 = sp.SamplePath(cls.importer)