1
0
Fork 0

Add SampleImporter class; Refactor AbstractImporter method

better_develop
Filippo Martini 4 years ago
parent b9fc914962
commit be5baf8b31
  1. 11
      PyCTBN/classes/abstract_importer.py
  2. 61
      PyCTBN/classes/sample_importer.py
  3. 10
      PyCTBN/tests/test_json_importer.py

@ -31,14 +31,15 @@ class AbstractImporter(ABC):
""" """
def __init__(self, file_path: str = None, concatenated_samples: typing.Union[pd.DataFrame, np.ndarray] = None, def __init__(self, file_path: str = None, trajectories_list: typing.Union[typing.List, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
"""Constructor """Constructor
""" """
self._file_path = file_path self._file_path = file_path
self._concatenated_samples = concatenated_samples self._df_samples_list = trajectories_list
self._df_variables = variables self._df_variables = variables
self._df_structure = prior_net_structure self._df_structure = prior_net_structure
self._concatenated_samples = None
self._sorter = None self._sorter = None
super().__init__() super().__init__()
@ -117,17 +118,15 @@ class AbstractImporter(ABC):
:return: the resulting list of numpy arrays :return: the resulting list of numpy arrays
:rtype: List :rtype: List
""" """
if isinstance(concatenated_sample, pd.DataFrame):
concatenated_array = concatenated_sample.to_numpy() concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
else:
columns_list = [concatenated_sample[:, 0], concatenated_sample[:, 1:].astype(int)]
return columns_list return columns_list
def clear_concatenated_frame(self) -> None: def clear_concatenated_frame(self) -> None:
"""Removes all values in the dataframe concatenated_samples. """Removes all values in the dataframe concatenated_samples.
""" """
if isinstance(self._concatenated_samples, pd.DataFrame):
self._concatenated_samples = self._concatenated_samples.iloc[0:0] self._concatenated_samples = self._concatenated_samples.iloc[0:0]
@abstractmethod @abstractmethod

@ -0,0 +1,61 @@
import numpy as np
import pandas as pd
import typing
from .abstract_importer import AbstractImporter
class SampleImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data loaded directly by using DataFrame
:param trajectory_list: the data that describes the trajectories
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param variables: the data that describes the variables with name and cardinality
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param prior_net_structure: the data of the real structure, if it exists
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self,
trajectory_list: typing.Union[typing.List, np.ndarray] = None,
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None):
'If the data are not DataFrame, it will be converted'
if isinstance(variables,list) or isinstance(variables,np.ndarray):
variables = pd.DataFrame(variables)
if isinstance(variables,list) or isinstance(variables,np.ndarray):
prior_net_structure=pd.DataFrame(prior_net_structure)
super(SampleImporter, self).__init__(file_path=None, trajectories_list =trajectory_list,
variables= variables,
prior_net_structure=prior_net_structure)
def import_data(self, header_column = None):
if header_column is not None:
self._sorter = header_column
else:
self._sorter = self.build_sorter(self._df_samples_list[0])
samples_list= self._df_samples_list
if isinstance(samples_list, np.ndarray):
samples_list = samples_list.tolist()
self.compute_row_delta_in_all_samples_frames(samples_list)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list.
"""
columns_header = list(sample_frame.columns.values)
del columns_header[0]
return columns_header
def dataset_id(self) -> object:
pass

@ -121,6 +121,7 @@ class TestJsonImporter(unittest.TestCase):
self.assertTrue(j1.concatenated_samples.empty) self.assertTrue(j1.concatenated_samples.empty)
def test_build_list_of_samples_array(self): def test_build_list_of_samples_array(self):
"""
data_set = {"key1": [1, 2, 3], "key2": [4.1, 5.2, 6.3]} data_set = {"key1": [1, 2, 3], "key2": [4.1, 5.2, 6.3]}
with open('data.json', 'w') as f: with open('data.json', 'w') as f:
json.dump(data_set, f) json.dump(data_set, f)
@ -136,6 +137,15 @@ class TestJsonImporter(unittest.TestCase):
for a1, a2 in zip(col_list, forced_list): for a1, a2 in zip(col_list, forced_list):
self.assertTrue(np.array_equal(a1, a2)) self.assertTrue(np.array_equal(a1, a2))
os.remove('data.json') os.remove('data.json')
"""
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data(0)
times_state_changes_list = j1.build_list_of_samples_array(j1._concatenated_samples)
self.assertEqual(len(times_state_changes_list), 2)
self.assertIsInstance(times_state_changes_list[0], np.ndarray)
self.assertIsInstance(times_state_changes_list[1], np.ndarray)
self.assertIsInstance(times_state_changes_list[0][0], float)
self.assertIsInstance(times_state_changes_list[1][0][0], np.int64)
def test_import_variables(self): def test_import_variables(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')