diff --git a/main_package/classes/abstract_importer.py b/main_package/classes/abstract_importer.py index cf34738..b09f2a9 100644 --- a/main_package/classes/abstract_importer.py +++ b/main_package/classes/abstract_importer.py @@ -5,17 +5,22 @@ import typing class AbstractImporter(ABC): """ - Interface that exposes all the necessary methods to import the trajectories and the net structure. + Abstract class that exposes all the necessary methods to process the trajectories and the net structure. :file_path: the file path :_concatenated_samples: the concatenation of all the processed trajectories :df_structure: Dataframe containing the structure of the network (edges) :df_variables: Dataframe containing the nodes cardinalities - :df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list + :df_concatenated_samples: the concatenation and processing of all the trajectories present + in the list df_samples list :sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples """ def __init__(self, file_path: str): + """ + Parameters: + :file_path: the path to the file containing the data + """ self.file_path = file_path self._df_variables = None self._df_structure = None @@ -23,26 +28,30 @@ class AbstractImporter(ABC): self._sorter = None super().__init__() - """ @abstractmethod - def import_trajectories(self, raw_data): - pass - - @abstractmethod - def import_structure(self, raw_data): + def import_data(self): + """ + Imports and prepares all data present needed for susequent computation. + Parameters: + :void + Returns: + :void + post[self]: the class members self._df_variables and self._df_structure HAVE to be properly constructed + as Pandas Dataframes with the following structure: + Header of self._df_structure = [From_Node | To_Node] + Header of self.df_variables = [Variable_Label | Variable_Cardinality] + """ pass - """ @abstractmethod - def import_data(self): + def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: """ - Imports and prepares all data present needed for susequent computation. + Initializes the self._sorter class member from a trajectory dataframe, exctracting the header of the frame + and keeping ONLY the variables symbolic labels, cutting out the time label in the header. Parameters: - void + :sample_frame: The dataframe from which extract the header Returns: - void - POSTCONDITION: the class members self._df_variables and self._df_structure HAVE to be properly constructed - as Pandas Dataframes + :a list containing the processed header. """ pass @@ -52,16 +61,15 @@ class AbstractImporter(ABC): """ Computes the difference between each value present in th time column. Copies and shift by one position up all the values present in the remaining columns. - PREREQUISITE: the Dataframe in input has to follow the column structure of this header: - [Time|Variable values], so it is assumed TIME is ALWAYS the FIRST column. Parameters: - sample_frame: the traj to be processed - time_header_label: the label for the times - columns_header: the original header of sample_frame - shifted_cols_header: a copy of columns_header with changed names of the contents + :sample_frame: the traj to be processed + :time_header_label: the label for the times + :columns_header: the original header of sample_frame + :shifted_cols_header: a copy of columns_header with changed names of the contents Returns: - sample_frame: the processed dataframe - + :sample_frame: the processed dataframe + pre: the Dataframe sample_frame has to follow the column structure of this header: + Header of sample_frame = [Time | Variable values] """ #sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1) sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) @@ -75,16 +83,18 @@ class AbstractImporter(ABC): """ Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list df_samples_list. Concatenates the result in the dataframe concatanated_samples - PREREQUISITE: the Dataframe in input has to follow the column structure of this header: - [Time|Variable values], so it is assumed TIME is ALWAYS the FIRST column. - The class member self._sorter HAS to be properly INITIALIZED Parameters: time_header_label: the label of the time column df_samples_list: the datframe's list to be processed and concatenated Returns: void + pre: the Dataframe sample_frame has to follow the column structure of this header: + Header of sample_frame = [Time | Variable values] + The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) """ + if not self.sorter: + raise RuntimeError("The class member self._sorter has to be INITIALIZED!") shifted_cols_header = [s + "S" for s in self._sorter] compute_row_delta = self.compute_row_delta_sigle_samples_frame proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) @@ -112,9 +122,9 @@ class AbstractImporter(ABC): """ Removes all values in the dataframe concatenated_samples Parameters: - void + :void Returns: - void + :void """ self._concatenated_samples = self._concatenated_samples.iloc[0:0] @@ -131,5 +141,5 @@ class AbstractImporter(ABC): return self._df_structure @property - def sorter(self): + def sorter(self) -> typing.List: return self._sorter diff --git a/main_package/classes/json_importer.py b/main_package/classes/json_importer.py index 675ee06..22f3563 100644 --- a/main_package/classes/json_importer.py +++ b/main_package/classes/json_importer.py @@ -1,7 +1,6 @@ import json import typing - import pandas as pd import abstract_importer as ai @@ -9,9 +8,9 @@ import abstract_importer as ai class JsonImporter(ai.AbstractImporter): """ - Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext. + Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare the data in json ext. with the following structure: - [] 0 + [0] |_ dyn.cims |_ dyn.str |_ samples @@ -27,28 +26,38 @@ class JsonImporter(ai.AbstractImporter): def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, variables_key: str): + """ + Parameters: + file_path: the path of the file that contains tha data to be imported + :samples_label: the reference key for the samples in the trajectories + :structure_label: the reference key for the structure of the network data + :variables_label: the reference key for the cardinalites of the nodes data + :time_key: the key used to identify the timestamps in each trajectory + :variables_key: the key used to identify the names of the variables in the net + """ self.samples_label = samples_label self.structure_label = structure_label self.variables_label = variables_label self.time_key = time_key self.variables_key = variables_key - self.df_samples_list = [] + self.df_samples_list = None super(JsonImporter, self).__init__(file_path) def import_data(self): """ - Imports and prepares all data present needed for susequent computation. + Imports and prepares all data present needed for subsequent processing. Parameters: - void + :void Returns: - void + _void """ raw_data = self.read_json_file() - self.import_trajectories(raw_data) + self.df_samples_list = self.import_trajectories(raw_data) + self._sorter = self.build_sorter(self.df_samples_list[0]) self.compute_row_delta_in_all_samples_frames(self.df_samples_list) self.clear_data_frame_list() - self.import_structure(raw_data) - self.import_variables(raw_data, self._sorter) + self._df_structure = self.import_structure(raw_data) + self._df_variables = self.import_variables(raw_data, self._sorter) def import_trajectories(self, raw_data: typing.List): """ @@ -56,50 +65,51 @@ class JsonImporter(ai.AbstractImporter): Parameters: :raw_data: List of Dicts Returns: - void + :List of dataframes containing all the trajectories """ - self.normalize_trajectories(raw_data, 0, self.samples_label) + return self.normalize_trajectories(raw_data, 0, self.samples_label) - def import_structure(self, raw_data: typing.List): + def import_structure(self, raw_data: typing.List) -> pd.DataFrame: """ Imports in a dataframe the data in the list raw_data at the key structure_label Parameters: - raw_data: the data + :raw_data: the data Returns: - void + :Daframe containg the starting node a ending node of every arc of the network """ - self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label) + return self.one_level_normalizing(raw_data, 0, self.structure_label) - def import_variables(self, raw_data: typing.List, sorter: typing.List): + def import_variables(self, raw_data: typing.List, sorter: typing.List) -> pd.DataFrame: """ Imports the data in raw_data at the key variables_label. Sorts the row of the dataframe df_variables using the list sorter. Parameters: - raw_data: the data - sorter: the list used to sort the dataframe self.df_variables + :raw_data: the data + :sorter: the header of the dataset containing only variables symbolic labels Returns: - void + :Datframe containg the variables simbolic labels and their cardinalities """ - self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label) + return self.one_level_normalizing(raw_data, 0, self.variables_label) #TODO Usando come Pre-requisito l'ordinamento del frame _df_variables uguale a quello presente in #TODO self _sorter questo codice risulta inutile - self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category") + """self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category") self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter) self._df_variables = self._df_variables.sort_values([self.variables_key]) self._df_variables.reset_index(inplace=True) self._df_variables.drop('index', axis=1, inplace=True) - print("Var Frame", self._df_variables) + #print("Var Frame", self._df_variables) + """ def read_json_file(self) -> typing.List: """ - Reads the first json file in the path self.filePath + Reads the JSON file in the path self.filePath Parameters: - void + :void Returns: - data: the contents of the json file + :data: the contents of the json file """ with open(self.file_path) as f: @@ -111,11 +121,11 @@ class JsonImporter(ai.AbstractImporter): Extracts the one-level nested data in the list raw_data at the index indx at the key key Parameters: - raw_data: List of Dicts - indx: The index of the array from which the data have to be extracted - key: the key for the Dicts from which exctract data + :raw_data: List of Dicts + :indx: The index of the array from which the data have to be extracted + :key: the key for the Dicts from which exctract data Returns: - a normalized dataframe + :a normalized dataframe: """ return pd.DataFrame(raw_data[indx][key]) @@ -123,32 +133,52 @@ class JsonImporter(ai.AbstractImporter): def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str): """ Extracts the traj in raw_data at the index index at the key trajectories key. - Adds the extracted traj in the dataframe list self._df_samples_list. - Initializes the list self.sorter. Parameters: - raw_data: the data - indx: the index of the array from which extract data - trajectories_key: the key of the trajectories objects + :raw_data: the data + :indx: the index of the array from which extract data + :trajectories_key: the key of the trajectories objects Returns: - void + :A list of daframes containg the trajectories """ dataframe = pd.DataFrame smps = raw_data[indx][trajectories_key] - self.df_samples_list = [dataframe(sample) for sample in smps] - columns_header = list(self.df_samples_list[0].columns.values) - #print("COLUMNs HEADER", columns_header) + df_samples_list = [dataframe(sample) for sample in smps] + return df_samples_list + #columns_header = list(self.df_samples_list[0].columns.values) + #columns_header.remove(self.time_key) + #self._sorter = columns_header + + def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: + """ + Implements the abstract method build_sorter for this dataset + """ + columns_header = list(sample_frame.columns.values) columns_header.remove(self.time_key) - self._sorter = columns_header + return columns_header def clear_data_frame_list(self): """ Removes all values present in the dataframes in the list df_samples_list + Parameters: + :void + Returns: + :void """ for indx in range(len(self.df_samples_list)): self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: + """ + Imports the synthetic CIMS in the dataset in a dictionary, using variables labels + as keys for the set of CIMS of a particular node. + Parameters: + :raw_data: the data + :indx: the json array index + :cims_key: the key where the json object cims are placed + Returns: + :a dictionary containing the sampled CIMS for all the variables in the net + """ cims_for_all_vars = {} for var in raw_data[indx][cims_key]: sampled_cims_list = [] diff --git a/main_package/classes/sample_path.py b/main_package/classes/sample_path.py index a104b7e..ae7b8c5 100644 --- a/main_package/classes/sample_path.py +++ b/main_package/classes/sample_path.py @@ -10,7 +10,7 @@ class SamplePath: cardinalites. Has the task of creating the objects that will contain the mentioned data. - + :importer: the Importer objects that will import ad process data :trajectories: the Trajectory object that will contain all the concatenated trajectories :structure: the Structure Object that will contain all the structurral infos about the net :total_variables_count: the number of variables in the net @@ -18,12 +18,14 @@ class SamplePath: """ def __init__(self, importer: imp.AbstractImporter): """ - :importer: the Importer objects that will import ad process data + Parameters: + :importer: the Importer objects that will import ad process data """ self.importer = importer self._trajectories = None self._structure = None self.total_variables_count = None + self.importer.import_data() def build_trajectories(self): """ @@ -31,11 +33,11 @@ class SamplePath: Clears all the unused dataframes in Importer Object Parameters: - void + :void Returns: - void + :void """ - self.importer.import_data() + #self.importer.import_data() self._trajectories = \ tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples), len(self.importer.sorter) + 1) @@ -46,12 +48,13 @@ class SamplePath: """ Builds the Structure object that aggregates all the infos about the net. Parameters: - void + :void Returns: - void + :void """ if self.importer.sorter != self.importer.variables.iloc[:, 0].to_list(): raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") + self.total_variables_count = len(self.importer.sorter) #labels = self.importer.variables[self.importer.variables_key].to_list() #print("SAMPLE PATH LABELS",labels) @@ -64,11 +67,11 @@ class SamplePath: self.total_variables_count) @property - def trajectories(self): + def trajectories(self) -> tr.Trajectory: return self._trajectories @property - def structure(self): + def structure(self) -> st.Structure: return self._structure def total_variables_count(self): diff --git a/main_package/tests/test_json_importer.py b/main_package/tests/test_json_importer.py index 026f229..095de7b 100644 --- a/main_package/tests/test_json_importer.py +++ b/main_package/tests/test_json_importer.py @@ -23,7 +23,7 @@ class TestJsonImporter(unittest.TestCase): self.assertEqual(j1.time_key, 'Time') self.assertEqual(j1.variables_key, 'Name') self.assertEqual(j1.file_path, self.read_files[0]) - self.assertFalse(j1.df_samples_list) + self.assertIsNone(j1.df_samples_list) self.assertIsNone(j1.variables) self.assertIsNone(j1.structure) self.assertIsNone(j1.concatenated_samples) @@ -50,9 +50,9 @@ class TestJsonImporter(unittest.TestCase): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() #print(raw_data) - j1.normalize_trajectories(raw_data, 0, j1.samples_label) - self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label])) - self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter) + df_samples_list = j1.normalize_trajectories(raw_data, 0, j1.samples_label) + self.assertEqual(len(df_samples_list), len(raw_data[0][j1.samples_label])) + #self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter) def test_normalize_trajectories_wrong_indx(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') @@ -67,8 +67,9 @@ class TestJsonImporter(unittest.TestCase): def test_compute_row_delta_single_samples_frame(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() - j1.normalize_trajectories(raw_data, 0, j1.samples_label) + j1.df_samples_list = j1.import_trajectories(raw_data) sample_frame = j1.df_samples_list[0] + original_copy = sample_frame.copy() columns_header = list(sample_frame.columns.values) shifted_cols_header = [s + "S" for s in columns_header[1:]] new_sample_frame = j1.compute_row_delta_sigle_samples_frame(sample_frame, columns_header[1:], @@ -76,11 +77,20 @@ class TestJsonImporter(unittest.TestCase): self.assertEqual(len(list(sample_frame.columns.values)) + len(shifted_cols_header), len(list(new_sample_frame.columns.values))) self.assertEqual(sample_frame.shape[0] - 1, new_sample_frame.shape[0]) + for indx, row in new_sample_frame.iterrows(): + self.assertAlmostEqual(row['Time'], + original_copy.iloc[indx + 1]['Time'] - original_copy.iloc[indx]['Time']) + for indx, row in new_sample_frame.iterrows(): + np.array_equal(np.array(row[columns_header[1:]],dtype=int), + np.array(original_copy.iloc[indx][columns_header[1:]],dtype=int)) + np.array_equal(np.array(row[shifted_cols_header], dtype=int), + np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int)) def test_compute_row_delta_in_all_frames(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() - j1.import_trajectories(raw_data) + j1.df_samples_list = j1.import_trajectories(raw_data) + j1._sorter = j1.build_sorter(j1.df_samples_list[0]) j1.compute_row_delta_in_all_samples_frames(j1.df_samples_list) self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values)[:len(list(j1.df_samples_list[0].columns.values))]) @@ -89,7 +99,8 @@ class TestJsonImporter(unittest.TestCase): def test_clear_data_frame_list(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() - j1.import_trajectories(raw_data) + j1.df_samples_list = j1.import_trajectories(raw_data) + j1._sorter = j1.build_sorter(j1.df_samples_list[0]) j1.compute_row_delta_in_all_samples_frames(j1.df_samples_list) j1.clear_data_frame_list() for df in j1.df_samples_list: @@ -121,23 +132,25 @@ class TestJsonImporter(unittest.TestCase): def test_import_variables(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') sorter = ['X', 'Y', 'Z'] - raw_data = [{'variables':{"Name": ['Z', 'Y', 'X'], "value": [3, 3, 3]}}] - j1.import_variables(raw_data, sorter) - self.assertEqual(list(j1.variables[j1.variables_key]), sorter) + raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}] + df_var = j1.import_variables(raw_data, sorter) + self.assertEqual(list(df_var[j1.variables_key]), sorter) def test_import_structure(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}] - j1.import_structure(raw_data) + df_struct = j1.import_structure(raw_data) #print(raw_data[0]['dyn.str'][0].items()) - self.assertIsInstance(j1.structure, pd.DataFrame) + self.assertIsInstance(df_struct, pd.DataFrame) def test_import_sampled_cims(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') raw_data = j1.read_json_file() + j1.df_samples_list = j1.import_trajectories(raw_data) + j1._sorter = j1.build_sorter(j1.df_samples_list[0]) cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims') - j1.import_variables(raw_data, ['X','Y','Z']) #TODO NON PUò dipendere direttamente da questo sorter - self.assertEqual(list(cims.keys()), j1.variables['Name'].tolist()) + #j1.import_variables(raw_data, j1.sorter) + self.assertEqual(list(cims.keys()), j1.sorter) def test_import_data(self): j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') diff --git a/main_package/tests/test_sample_path.py b/main_package/tests/test_sample_path.py index 49e3641..2dd9aa6 100644 --- a/main_package/tests/test_sample_path.py +++ b/main_package/tests/test_sample_path.py @@ -17,18 +17,22 @@ class TestSamplePath(unittest.TestCase): cls.importer = ji.JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') def test_init(self): + s1 = sp.SamplePath(self.importer) + self.assertIsNone(s1.trajectories) + self.assertIsNone(s1.structure) + self.assertFalse(s1.importer.concatenated_samples.empty) + self.assertIsNone(s1.total_variables_count) + + def test_build_trajectories(self): s1 = sp.SamplePath(self.importer) s1.build_trajectories() - self.assertIsNotNone(s1.trajectories) self.assertIsInstance(s1.trajectories, tr.Trajectory) + + def test_build_structure(self): + s1 = sp.SamplePath(self.importer) s1.build_structure() - self.assertIsNotNone(s1.structure) self.assertIsInstance(s1.structure, st.Structure) - self.assertTrue(s1.importer.concatenated_samples.empty) self.assertEqual(s1.total_variables_count, len(s1.importer.sorter)) - print(s1.structure) - print(s1.trajectories) - if __name__ == '__main__': unittest.main()