From be5baf8b31fd0f65abc20cd8cbf8f4106f4b37c1 Mon Sep 17 00:00:00 2001 From: Filippo Martini Date: Tue, 12 Jan 2021 15:55:34 +0100 Subject: [PATCH] Add SampleImporter class; Refactor AbstractImporter method --- PyCTBN/classes/abstract_importer.py | 17 ++++---- PyCTBN/classes/sample_importer.py | 61 +++++++++++++++++++++++++++++ PyCTBN/tests/test_json_importer.py | 10 +++++ 3 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 PyCTBN/classes/sample_importer.py diff --git a/PyCTBN/classes/abstract_importer.py b/PyCTBN/classes/abstract_importer.py index c7b7503..1a8e941 100644 --- a/PyCTBN/classes/abstract_importer.py +++ b/PyCTBN/classes/abstract_importer.py @@ -31,14 +31,15 @@ class AbstractImporter(ABC): """ - def __init__(self, file_path: str = None, concatenated_samples: typing.Union[pd.DataFrame, np.ndarray] = None, + def __init__(self, file_path: str = None, trajectories_list: typing.Union[typing.List, np.ndarray] = None, variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): """Constructor """ self._file_path = file_path - self._concatenated_samples = concatenated_samples + self._df_samples_list = trajectories_list self._df_variables = variables self._df_structure = prior_net_structure + self._concatenated_samples = None self._sorter = None super().__init__() @@ -117,18 +118,16 @@ class AbstractImporter(ABC): :return: the resulting list of numpy arrays :rtype: List """ - if isinstance(concatenated_sample, pd.DataFrame): - concatenated_array = concatenated_sample.to_numpy() - columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] - else: - columns_list = [concatenated_sample[:, 0], concatenated_sample[:, 1:].astype(int)] + + concatenated_array = concatenated_sample.to_numpy() + columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] + return columns_list def clear_concatenated_frame(self) -> None: """Removes all values in the dataframe concatenated_samples. """ - if isinstance(self._concatenated_samples, pd.DataFrame): - self._concatenated_samples = self._concatenated_samples.iloc[0:0] + self._concatenated_samples = self._concatenated_samples.iloc[0:0] @abstractmethod def dataset_id(self) -> object: diff --git a/PyCTBN/classes/sample_importer.py b/PyCTBN/classes/sample_importer.py new file mode 100644 index 0000000..43df9d1 --- /dev/null +++ b/PyCTBN/classes/sample_importer.py @@ -0,0 +1,61 @@ + + +import numpy as np +import pandas as pd +import typing + +from .abstract_importer import AbstractImporter + + +class SampleImporter(AbstractImporter): + """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare + the data loaded directly by using DataFrame + :param trajectory_list: the data that describes the trajectories + :type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] + :param variables: the data that describes the variables with name and cardinality + :type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] + :param prior_net_structure: the data of the real structure, if it exists + :type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] + :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory + :_raw_data: The raw contents of the json file to import + :type _raw_data: List + """ + + def __init__(self, + trajectory_list: typing.Union[typing.List, np.ndarray] = None, + variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, + prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): + + 'If the data are not DataFrame, it will be converted' + if isinstance(variables,list) or isinstance(variables,np.ndarray): + variables = pd.DataFrame(variables) + if isinstance(variables,list) or isinstance(variables,np.ndarray): + prior_net_structure=pd.DataFrame(prior_net_structure) + + super(SampleImporter, self).__init__(file_path=None, trajectories_list =trajectory_list, + variables= variables, + prior_net_structure=prior_net_structure) + + def import_data(self, header_column = None): + + if header_column is not None: + self._sorter = header_column + else: + self._sorter = self.build_sorter(self._df_samples_list[0]) + + samples_list= self._df_samples_list + + if isinstance(samples_list, np.ndarray): + samples_list = samples_list.tolist() + + self.compute_row_delta_in_all_samples_frames(samples_list) + + def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: + """Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. + """ + columns_header = list(sample_frame.columns.values) + del columns_header[0] + return columns_header + + def dataset_id(self) -> object: + pass \ No newline at end of file diff --git a/PyCTBN/tests/test_json_importer.py b/PyCTBN/tests/test_json_importer.py index fde0067..99f218c 100644 --- a/PyCTBN/tests/test_json_importer.py +++ b/PyCTBN/tests/test_json_importer.py @@ -121,6 +121,7 @@ class TestJsonImporter(unittest.TestCase): self.assertTrue(j1.concatenated_samples.empty) def test_build_list_of_samples_array(self): + """ data_set = {"key1": [1, 2, 3], "key2": [4.1, 5.2, 6.3]} with open('data.json', 'w') as f: json.dump(data_set, f) @@ -136,6 +137,15 @@ class TestJsonImporter(unittest.TestCase): for a1, a2 in zip(col_list, forced_list): self.assertTrue(np.array_equal(a1, a2)) os.remove('data.json') + """ + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + j1.import_data(0) + times_state_changes_list = j1.build_list_of_samples_array(j1._concatenated_samples) + self.assertEqual(len(times_state_changes_list), 2) + self.assertIsInstance(times_state_changes_list[0], np.ndarray) + self.assertIsInstance(times_state_changes_list[1], np.ndarray) + self.assertIsInstance(times_state_changes_list[0][0], float) + self.assertIsInstance(times_state_changes_list[1][0][0], np.int64) def test_import_variables(self): j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')