diff --git a/PyCTBN/PyCTBN/abstract_importer.py b/PyCTBN/PyCTBN/abstract_importer.py index 283841a..ff57d5e 100644 --- a/PyCTBN/PyCTBN/abstract_importer.py +++ b/PyCTBN/PyCTBN/abstract_importer.py @@ -13,6 +13,14 @@ class AbstractImporter(ABC): :_df_structure: Dataframe containing the structure of the network (edges) :_df_variables: Dataframe containing the nodes cardinalities :_sorter: A list containing the columns header (excluding the time column) of the `_concatenated_samples` + + .. warning:: + The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed + as Pandas Dataframes with the following structure: + Header of _df_structure = [From_Node | To_Node] + Header of _df_variables = [Variable_Label | Variable_Cardinality] + .. note:: + See :class:``JsonImporter`` for an example implementation """ def __init__(self, file_path: str): @@ -24,21 +32,7 @@ class AbstractImporter(ABC): self._concatenated_samples = None self._sorter = None super().__init__() - - @abstractmethod - def import_data(self) -> None: - """Imports all the trajectories, variables cardinalities, and net edges. - - .. warning:: - The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed - as Pandas Dataframes with the following structure: - Header of _df_structure = [From_Node | To_Node] - Header of _df_variables = [Variable_Label | Variable_Cardinality] - .. note:: - See :class:``JsonImporter`` for an example of implementation of this method. - """ - pass - + @abstractmethod def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame diff --git a/PyCTBN/PyCTBN/json_importer.py b/PyCTBN/PyCTBN/json_importer.py index 283057d..7579492 100644 --- a/PyCTBN/PyCTBN/json_importer.py +++ b/PyCTBN/PyCTBN/json_importer.py @@ -8,8 +8,8 @@ from .abstract_importer import AbstractImporter class JsonImporter(AbstractImporter): - """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare the data in json ext. - with the following structure: + """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare + the data in json extension with the following structure: [0] |_ dyn.cims |_ dyn.str @@ -27,14 +27,20 @@ class JsonImporter(AbstractImporter): :type time_key: string :param variables_key: the key used to identify the names of the variables in the net :type variables_key: string - :param array_indx: the index of the outer JsonArray to exctract the data from - :type array_indx: int + :_array_indx: the index of the outer JsonArray to extract the data from + :type _array_indx: int :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory + :_raw_data: The raw contents of the json file to import + :type _raw_data: List """ def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, - variables_key: str, array_indx: int): + variables_key: str): """Constructor method + + .. note:: + This constructor calls also the method ``read_json_file()``, so after the construction of the object + the class member ``_raw_data`` will contain the raw imported json data. """ self._samples_label = samples_label self._structure_label = structure_label @@ -42,19 +48,23 @@ class JsonImporter(AbstractImporter): self._time_key = time_key self._variables_key = variables_key self._df_samples_list = None - self._array_indx = array_indx + self._array_indx = None super(JsonImporter, self).__init__(file_path) + self._raw_data = self.read_json_file() + + def import_data(self, indx: int) -> None: + """Implements the abstract method of :class:`AbstractImporter`. - def import_data(self) -> None: - """Implements the abstract method of :class:`AbstractImporter` + :param indx: the index of the outer JsonArray to extract the data from + :type indx: int """ - raw_data = self.read_json_file() - self._df_samples_list = self.import_trajectories(raw_data) + self._array_indx = indx + self._df_samples_list = self.import_trajectories(self._raw_data) self._sorter = self.build_sorter(self._df_samples_list[0]) self.compute_row_delta_in_all_samples_frames(self._df_samples_list) self.clear_data_frame_list() - self._df_structure = self.import_structure(raw_data) - self._df_variables = self.import_variables(raw_data) + self._df_structure = self.import_structure(self._raw_data) + self._df_variables = self.import_variables(self._raw_data) def import_trajectories(self, raw_data: typing.List) -> typing.List: """Imports the trajectories from the list of dicts ``raw_data``. @@ -87,7 +97,7 @@ class JsonImporter(AbstractImporter): return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) def read_json_file(self) -> typing.List: - """Reads the JSON file in the path self.filePath + """Reads the JSON file in the path self.filePath. :return: The contents of the json file :rtype: List diff --git a/PyCTBN/PyCTBN/original_ctpc_algorithm.py b/PyCTBN/PyCTBN/original_ctpc_algorithm.py index 45e539a..d1558e6 100644 --- a/PyCTBN/PyCTBN/original_ctpc_algorithm.py +++ b/PyCTBN/PyCTBN/original_ctpc_algorithm.py @@ -1,12 +1,10 @@ -import glob + import json -import os from itertools import combinations import typing import numpy as np import pandas as pd -from line_profiler import LineProfiler from scipy.stats import chi2 as chi2_dist from scipy.stats import f as f_dist from tqdm import tqdm @@ -36,7 +34,7 @@ class OriginalCTPCAlgorithm(AbstractImporter): pass def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, - variables_key: str, array_indx: int): + variables_key: str, raw_data: typing.List): """ Parameters: file_path: the path of the file that contains tha data to be imported @@ -53,11 +51,12 @@ class OriginalCTPCAlgorithm(AbstractImporter): self.variables_key = variables_key self.df_samples_list = None self.trajectories = None - self._array_indx = array_indx + self._array_indx = None self.matrix = None super(OriginalCTPCAlgorithm, self).__init__(file_path) + self._raw_data = raw_data - def import_data(self): + def import_data(self, indx): """ Imports and prepares all data present needed for subsequent processing. Parameters: @@ -65,14 +64,16 @@ class OriginalCTPCAlgorithm(AbstractImporter): Returns: _void """ - raw_data = self.read_json_file() - self.df_samples_list = self.import_trajectories(raw_data) + self._array_indx = indx + self.df_samples_list = self.import_trajectories(self._raw_data) self._sorter = self.build_sorter(self.df_samples_list[0]) #self.compute_row_delta_in_all_samples_frames(self._df_samples_list) #self.clear_data_frame_list() - self._df_structure = self.import_structure(raw_data) - self._df_variables = self.import_variables(raw_data, self._sorter) + self._df_structure = self.import_structure(self._raw_data) + self._df_variables = self.import_variables(self._raw_data, self._sorter) + def datasets_numb(self): + return len(self._raw_data) def import_trajectories(self, raw_data: typing.List): """ @@ -107,15 +108,7 @@ class OriginalCTPCAlgorithm(AbstractImporter): :Datframe containg the variables simbolic labels and their cardinalities """ return self.one_level_normalizing(raw_data, self._array_indx, self.variables_label) - #TODO Usando come Pre-requisito l'ordinamento del frame _df_variables uguale a quello presente in - #TODO self _sorter questo codice risulta inutile - """self._df_variables[self._variables_key] = self._df_variables[self._variables_key].astype("category") - self._df_variables[self._variables_key] = self._df_variables[self._variables_key].cat.set_categories(sorter) - self._df_variables = self._df_variables.sort_values([self._variables_key]) - self._df_variables.reset_index(inplace=True) - self._df_variables.drop('index', axis=1, inplace=True) - #print("Var Frame", self._df_variables) - """ + def read_json_file(self) -> typing.List: """ @@ -160,9 +153,6 @@ class OriginalCTPCAlgorithm(AbstractImporter): smps = raw_data[indx][trajectories_key] df_samples_list = [dataframe(sample) for sample in smps] return df_samples_list - #columns_header = list(self._df_samples_list[0].columns.values) - #columns_header.remove(self._time_key) - #self._sorter = columns_header def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: """ diff --git a/PyCTBN/PyCTBN/sample_path.py b/PyCTBN/PyCTBN/sample_path.py index 1d6c6c0..232822e 100644 --- a/PyCTBN/PyCTBN/sample_path.py +++ b/PyCTBN/PyCTBN/sample_path.py @@ -12,20 +12,25 @@ class SamplePath: cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will contain the mentioned data. - :param importer: the Importer objects that will import ad process data + :param importer: the Importer object which contains the imported and processed data :type importer: AbstractImporter :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories - :_structure: the ``Structure`` Object that will contain all the structurral infos about the net + :_structure: the ``Structure`` Object that will contain all the structural infos about the net :_total_variables_count: the number of variables in the net """ def __init__(self, importer: AbstractImporter): """Constructor Method """ self._importer = importer + if (self._importer._df_variables is None or self._importer._df_structure is None + or self._importer._concatenated_samples is None): + raise RuntimeError('The importer object has to contain the all processed data!') + if(self._importer._df_variables.empty or self._importer._df_structure.empty + or self._importer._concatenated_samples.empty): + raise RuntimeError('The importer object has to contain the all processed data!') self._trajectories = None self._structure = None self._total_variables_count = None - self._importer.import_data() def build_trajectories(self) -> None: """Builds the Trajectory object that will contain all the trajectories. @@ -60,7 +65,7 @@ class SamplePath: return self._structure @property - def total_variables_count(self): + def total_variables_count(self) -> int: return self._total_variables_count diff --git a/PyCTBN/PyCTBN/simple_cvs_importer.py b/PyCTBN/PyCTBN/simple_cvs_importer.py index 2f04fb6..196ab73 100644 --- a/PyCTBN/PyCTBN/simple_cvs_importer.py +++ b/PyCTBN/PyCTBN/simple_cvs_importer.py @@ -4,8 +4,6 @@ import os import typing -#import abstract_importer as ai -#import sample_path as sp from .abstract_importer import AbstractImporter from .sample_path import SamplePath diff --git a/PyCTBN/tests/test_json_importer.py b/PyCTBN/tests/test_json_importer.py index f806ebd..4bbbe6d 100644 --- a/PyCTBN/tests/test_json_importer.py +++ b/PyCTBN/tests/test_json_importer.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd import json + from ..PyCTBN.json_importer import JsonImporter @@ -14,10 +15,9 @@ class TestJsonImporter(unittest.TestCase): @classmethod def setUpClass(cls) -> None: cls.read_files = glob.glob(os.path.join('./data', "*.json")) - #print(os.path.join('../data')) def test_init(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') self.assertEqual(j1._samples_label, 'samples') self.assertEqual(j1._structure_label, 'dyn.str') self.assertEqual(j1._variables_label, 'variables') @@ -29,6 +29,8 @@ class TestJsonImporter(unittest.TestCase): self.assertIsNone(j1.structure) self.assertIsNone(j1.concatenated_samples) self.assertIsNone(j1.sorter) + self.assertIsNone(j1._array_indx) + self.assertIsInstance(j1._raw_data, list) def test_read_json_file_found(self): data_set = {"key1": [1, 2, 3], "key2": [4, 5, 6]} @@ -36,39 +38,37 @@ class TestJsonImporter(unittest.TestCase): json.dump(data_set, f) path = os.getcwd() path = path + '/data.json' - j1 = JsonImporter(path, '', '', '', '', '', 0) - imported_data = j1.read_json_file() - self.assertTrue(self.ordered(data_set) == self.ordered(imported_data)) + j1 = JsonImporter(path, '', '', '', '', '') + #imported_data = j1.read_json_file() + self.assertTrue(self.ordered(data_set) == self.ordered(j1._raw_data)) os.remove('data.json') def test_read_json_file_not_found(self): path = os.getcwd() path = path + '/data.json' - j1 = JsonImporter(path, '', '', '', '', '', 0) - self.assertRaises(FileNotFoundError, j1.read_json_file) + #j1 = JsonImporter(path, '', '', '', '', '') + self.assertRaises(FileNotFoundError, JsonImporter, path, '', '', '', '', '') def test_normalize_trajectories(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) - raw_data = j1.read_json_file() + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + #raw_data = j1.read_json_file() #print(raw_data) - df_samples_list = j1.normalize_trajectories(raw_data, 0, j1._samples_label) - self.assertEqual(len(df_samples_list), len(raw_data[0][j1._samples_label])) - #self.assertEqual(list(j1._df_samples_list[0].columns.values)[1:], j1.sorter) + df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label) + self.assertEqual(len(df_samples_list), len(j1._raw_data[0][j1._samples_label])) def test_normalize_trajectories_wrong_indx(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) - raw_data = j1.read_json_file() - self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1._samples_label) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + self.assertRaises(IndexError, j1.normalize_trajectories, j1._raw_data, 474, j1._samples_label) def test_normalize_trajectories_wrong_key(self): - j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name', 0) - raw_data = j1.read_json_file() - self.assertRaises(KeyError, j1.normalize_trajectories, raw_data, 0, j1._samples_label) + j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name') + self.assertRaises(KeyError, j1.normalize_trajectories, j1._raw_data, 0, j1._samples_label) def test_compute_row_delta_single_samples_frame(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) - raw_data = j1.read_json_file() - j1._df_samples_list = j1.import_trajectories(raw_data) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + #raw_data = j1.read_json_file() + j1._array_indx = 0 + j1._df_samples_list = j1.import_trajectories(j1._raw_data) sample_frame = j1._df_samples_list[0] original_copy = sample_frame.copy() columns_header = list(sample_frame.columns.values) @@ -88,9 +88,10 @@ class TestJsonImporter(unittest.TestCase): np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int)) def test_compute_row_delta_in_all_frames(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) - raw_data = j1.read_json_file() - j1._df_samples_list = j1.import_trajectories(raw_data) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + #raw_data = j1.read_json_file() + j1._array_indx = 0 + j1._df_samples_list = j1.import_trajectories(j1._raw_data) j1._sorter = j1.build_sorter(j1._df_samples_list[0]) j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list) self.assertEqual(list(j1._df_samples_list[0].columns.values), @@ -98,9 +99,10 @@ class TestJsonImporter(unittest.TestCase): self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1._time_key) def test_clear_data_frame_list(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) - raw_data = j1.read_json_file() - j1._df_samples_list = j1.import_trajectories(raw_data) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + #raw_data = j1.read_json_file() + j1._array_indx = 0 + j1._df_samples_list = j1.import_trajectories(j1._raw_data) j1._sorter = j1.build_sorter(j1._df_samples_list[0]) j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list) j1.clear_data_frame_list() @@ -108,8 +110,8 @@ class TestJsonImporter(unittest.TestCase): self.assertTrue(df.empty) def test_clear_concatenated_frame(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) - j1.import_data() + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + j1.import_data(0) j1.clear_concatenated_frame() self.assertTrue(j1.concatenated_samples.empty) @@ -119,7 +121,7 @@ class TestJsonImporter(unittest.TestCase): json.dump(data_set, f) path = os.getcwd() path = path + '/data.json' - j1 = JsonImporter(path, '', '', '', '', '', 0) + j1 = JsonImporter(path, '', '', '', '', '') raw_data = j1.read_json_file() frame = pd.DataFrame(raw_data) col_list = j1.build_list_of_samples_array(frame) @@ -131,22 +133,25 @@ class TestJsonImporter(unittest.TestCase): os.remove('data.json') def test_import_variables(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') sorter = ['X', 'Y', 'Z'] raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}] + j1._array_indx = 0 df_var = j1.import_variables(raw_data) self.assertEqual(list(df_var[j1._variables_key]), sorter) def test_import_structure(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}] + j1._array_indx = 0 df_struct = j1.import_structure(raw_data) #print(raw_data[0]['dyn.str'][0].items()) self.assertIsInstance(df_struct, pd.DataFrame) def test_import_sampled_cims(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() + j1._array_indx = 0 j1._df_samples_list = j1.import_trajectories(raw_data) j1._sorter = j1.build_sorter(j1._df_samples_list[0]) cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims') @@ -154,8 +159,8 @@ class TestJsonImporter(unittest.TestCase): self.assertEqual(list(cims.keys()), j1.sorter) def test_import_data(self): - j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 2) - j1.import_data() + j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + j1.import_data(0) self.assertEqual(list(j1.variables[j1._variables_key]), list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1._variables_key]) + 1])) print(j1.variables) diff --git a/PyCTBN/tests/test_networkgraph.py b/PyCTBN/tests/test_networkgraph.py index 6bb819b..db1f727 100644 --- a/PyCTBN/tests/test_networkgraph.py +++ b/PyCTBN/tests/test_networkgraph.py @@ -15,7 +15,8 @@ class TestNetworkGraph(unittest.TestCase): @classmethod def setUpClass(cls): cls.read_files = glob.glob(os.path.join('./data', "*.json")) - cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) + cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.importer.import_data(0) cls.s1 = SamplePath(cls.importer) cls.s1.build_trajectories() cls.s1.build_structure() diff --git a/PyCTBN/tests/test_parameters_estimator.py b/PyCTBN/tests/test_parameters_estimator.py index 9314f84..44a9059 100644 --- a/PyCTBN/tests/test_parameters_estimator.py +++ b/PyCTBN/tests/test_parameters_estimator.py @@ -17,8 +17,8 @@ class TestParametersEstimatior(unittest.TestCase): def setUpClass(cls) -> None: cls.read_files = glob.glob(os.path.join('./data', "*.json")) cls.array_indx = 0 - cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', - cls.array_indx) + cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.importer.import_data(cls.array_indx) cls.s1 = SamplePath(cls.importer) cls.s1.build_trajectories() cls.s1.build_structure() @@ -58,7 +58,7 @@ class TestParametersEstimatior(unittest.TestCase): self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True)) def aux_import_sampled_cims(self, cims_label): - i1 = JsonImporter(self.read_files[0], '', '', '', '', '', self.array_indx) + i1 = JsonImporter(self.read_files[0], '', '', '', '', '') raw_data = i1.read_json_file() return i1.import_sampled_cims(raw_data, self.array_indx, cims_label) diff --git a/PyCTBN/tests/test_sample_path.py b/PyCTBN/tests/test_sample_path.py index e2f10b1..c372781 100644 --- a/PyCTBN/tests/test_sample_path.py +++ b/PyCTBN/tests/test_sample_path.py @@ -14,22 +14,31 @@ class TestSamplePath(unittest.TestCase): @classmethod def setUpClass(cls) -> None: cls.read_files = glob.glob(os.path.join('./data', "*.json")) - cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) + + def test_init_not_initialized_importer(self): + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + self.assertRaises(RuntimeError, SamplePath, importer) def test_init(self): - s1 = SamplePath(self.importer) + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + s1 = SamplePath(importer) self.assertIsNone(s1.trajectories) self.assertIsNone(s1.structure) self.assertFalse(s1._importer.concatenated_samples.empty) self.assertIsNone(s1._total_variables_count) def test_build_trajectories(self): - s1 = SamplePath(self.importer) + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + s1 = SamplePath(importer) s1.build_trajectories() self.assertIsInstance(s1.trajectories, Trajectory) def test_build_structure(self): - s1 = SamplePath(self.importer) + importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) + s1 = SamplePath(importer) s1.build_structure() self.assertIsInstance(s1.structure, Structure) self.assertEqual(s1._total_variables_count, len(s1._importer.sorter)) diff --git a/PyCTBN/tests/test_structure_estimator.py b/PyCTBN/tests/test_structure_estimator.py index ca21cab..fe2e140 100644 --- a/PyCTBN/tests/test_structure_estimator.py +++ b/PyCTBN/tests/test_structure_estimator.py @@ -21,7 +21,8 @@ class TestStructureEstimator(unittest.TestCase): @classmethod def setUpClass(cls): cls.read_files = glob.glob(os.path.join('./data', "*.json")) - cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) + cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.importer.import_data(0) cls.s1 = SamplePath(cls.importer) cls.s1.build_trajectories() cls.s1.build_structure() diff --git a/basic_main.py b/basic_main.py index f8124d7..17cc15a 100644 --- a/basic_main.py +++ b/basic_main.py @@ -12,7 +12,8 @@ from PyCTBN.parameters_estimator import ParametersEstimator def main(): read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir #import data - importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 1) + importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') + importer.import_data(0) #Create a SamplePath Obj s1 = SamplePath(importer) #Build The trajectries and the structural infos diff --git a/documentation/rst/abstract_importer.rst b/documentation/rst/abstract_importer.rst index 3283578..5289911 100644 --- a/documentation/rst/abstract_importer.rst +++ b/documentation/rst/abstract_importer.rst @@ -5,3 +5,44 @@ abstract\_importer module :members: :undoc-members: :show-inheritance: + +An example of a simple CSV Importer +=================================== +Suppose you have a csv dataset containing only the trajectories, three variables labels and cardinalites. +Then the resulting importer that inherit and extends AbstractImpoter would be: + +.. code_block:: python + class CSVImporter(AbstractImporter): + + def __init__(self, file_path): + self._df_samples_list = None + super(CSVImporter, self).__init__(file_path) + + def import_data(self): + self.read_csv_file() + self._sorter = self.build_sorter(self._df_samples_list[0]) + self.import_variables() + self.import_structure() + self.compute_row_delta_in_all_samples_frames(self._df_samples_list) + + def read_csv_file(self): + df = pd.read_csv(self._file_path) + df.drop(df.columns[[0]], axis=1, inplace=True) + self._df_samples_list = [df] + + def import_variables(self): + values_list = [3 for var in self._sorter] + # initialize dict of lists + data = {'Name':self._sorter, 'Value':values_list} + # Create the pandas DataFrame + self._df_variables = pd.DataFrame(data) + + def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: + return list(sample_frame.columns)[1:] + + def import_structure(self): + data = {'From':['X','Y','Z'], 'To':['Z','Z','Y']} + self._df_structure = pd.DataFrame(data) + + def dataset_id(self) -> object: + pass