Refactors in JsonImporter class

4 years ago · da3533007c
parent a020b9c004
commit da3533007c
7 changed files with 33 additions and 1882 deletions
--- a/main_package/classes/json_importer.py
+++ b/main_package/classes/json_importer.py
@ -39,22 +39,27 @@ class JsonImporter(AbstractImporter):

    def import_data(self):
        raw_data = self.read_json_file()
+        self.import_variables(raw_data)
        self.import_trajectories(raw_data)
        self.compute_row_delta_in_all_samples_frames(self.time_key)
        self.clear_data_frame_list()
        self.import_structure(raw_data)
-        self.import_variables(raw_data, self.sorter)
+        #self.import_variables(raw_data, self.sorter)

    def import_trajectories(self, raw_data: pd.DataFrame):
        self.normalize_trajectories(raw_data, 0, self.samples_label)

    def import_structure(self, raw_data: pd.DataFrame):
        self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
-
-    def import_variables(self, raw_data: pd.DataFrame, sorter: typing.List):
+    #TODO Attenzione l'ordine delle vars non è alfabetico come nel dataset -> agire di conseguenza
+    #Ordinando la vars alfabeticamente
+    def import_variables(self, raw_data: pd.DataFrame):
        self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
+        self.sorter = self._df_variables[self.variables_key].to_list()
+        self.sorter.sort()
+        print("Sorter:", self.sorter)
        self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category")
-        self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
+        self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(self.sorter)
        self._df_variables = self._df_variables.sort_values([self.variables_key])

    def read_json_file(self) -> typing.List:
@ -105,7 +110,7 @@ class JsonImporter(AbstractImporter):
        self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]]
        #for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]):
            #self.df_samples_list.append(pd.DataFrame(sample))
-        self.sorter = list(self.df_samples_list[0].columns.values)[1:]
+        #self.sorter = list(self.df_samples_list[0].columns.values)[1:] #TODO Qui ci deve essere la colonna NAME ordinata alfabeticamente

    def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
                                              columns_header: typing.List, shifted_cols_header: typing.List) \
@ -122,10 +127,19 @@ class JsonImporter(AbstractImporter):
        #columns_header = list(self.df_samples_list[0].columns.values)
        #self.sorter = columns_header[1:]
        shifted_cols_header = [s + "S" for s in self.sorter]
-        for indx, sample in enumerate(self.df_samples_list):
+        compute_row_delta = self.compute_row_delta_sigle_samples_frame
+        """for indx, sample in enumerate(self.df_samples_list):
            self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample,
-                                                        time_header_label, self.sorter, shifted_cols_header)
+                                                        time_header_label, self.sorter, shifted_cols_header)"""
+        self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) for sample in self.df_samples_list]
        self._concatenated_samples = pd.concat(self.df_samples_list)
+        #TODO Attenzione la colonna di indice 0 non è sempre quella del tempo ordinare il daframe concatenato di conseguenza
+        complete_header = self.sorter[:]
+        complete_header.insert(0,'Time')
+        complete_header.extend(shifted_cols_header)
+        print("Complete Header", complete_header)
+        self._concatenated_samples = self._concatenated_samples[complete_header]
+        print("Concat Samples",self._concatenated_samples)

    def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
        """
@ -152,7 +166,7 @@ class JsonImporter(AbstractImporter):
        self._concatenated_samples = self._concatenated_samples.iloc[0:0]

    def clear_data_frame_list(self):
-        for indx in range(len(self.df_samples_list)):  # Le singole traj non servono più
+        for indx in range(len(self.df_samples_list)):  # Le singole traj non servono più #TODO usare list comprens
            self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]

    def import_sampled_cims(self, raw_data: pd.DataFrame, indx: int, cims_key: str) -> typing.Dict:
--- a/main_package/classes/sample_path.py
+++ b/main_package/classes/sample_path.py
@ -51,12 +51,6 @@ class SamplePath:
    def total_variables_count(self):
        return self.total_variables_count

-    """def build_possible_values_variables_structure(self):
-        possible_val_list = []
-        print(self.importer.variables)
-        for cardinality in self.importer.variables['Value']:
-            possible_val_list.append(list(range(0, cardinality)))
-        self.possible_variables_values = possible_val_list"""



--- a/main_package/classes/structure_estimator.py
+++ b/main_package/classes/structure_estimator.py
@ -52,9 +52,12 @@ class StructureEstimator:
            s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count)
            g1 = ng.NetworkGraph(s1)
            g1.init_graph()
+            print("G1 NODES", g1.get_nodes())
+            print("G1 Edges", g1.get_edges())
            p1 = pe.ParametersEstimator(self.sample_path, g1)
            p1.init_sets_cims_container()
            p1.compute_parameters_for_node(test_child)
+            
            sofc1 = p1.sets_of_cims_struct.sets_of_cims[g1.get_positional_node_indx(test_child)]
            if not p_set:
                self.cache.put(test_child, sofc1)
@ -86,6 +89,8 @@ class StructureEstimator:
            s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count)
            g2 = ng.NetworkGraph(s2)
            g2.init_graph()
+            print("G2 Nodes", g2.get_nodes())
+            print("G2 Edges", g2.get_edges())
            p2 = pe.ParametersEstimator(self.sample_path, g2)
            p2.init_sets_cims_container()
            p2.compute_parameters_for_node(test_child)
@ -113,6 +118,8 @@ class StructureEstimator:
        r2s = M2.diagonal()
        C1 = cim1.cim
        C2 = cim2.cim
+        print("C1", C1)
+        print("C2", C2)
        F_stats = C2.diagonal() / C1.diagonal()
        exp_alfa = self.exp_test_sign
        for val in range(0, child_states_numb):
@ -149,7 +156,7 @@ class StructureEstimator:
        return True

    def one_iteration_of_CTPC_algorithm(self, var_id, tot_vars_count):
-        #print("TESTING VAR", var_id)
+        print("TESTING VAR", var_id)
        u = list(self.complete_graph.predecessors(var_id))
        #tests_parents_numb = len(u)
        #complete_frame = self.complete_graph_frame
--- a/main_package/classes/trajectory.py
+++ b/main_package/classes/trajectory.py
@ -17,7 +17,6 @@ class Trajectory:
    def __init__(self, list_of_columns, original_cols_number):
        if type(list_of_columns[0][0]) != np.float64:
            raise TypeError('The first array in the list has to be Times')
-        #TODO valutare se vale la pena ordinare la lista di numpy array per tipo
        self.original_cols_number = original_cols_number
        self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T
        self._times = np.array(list_of_columns[0], dtype=np.float)
--- a/main_package/data/esempio_dataset.csv
+++ b/main_package/data/esempio_dataset.csv
--- a/main_package/tests/test_structure.py
+++ b/main_package/tests/test_structure.py
@ -107,7 +107,7 @@ class TestStructure(unittest.TestCase):
        sp1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        sp1.build_trajectories()
        sp1.build_structure()
-        st1 = st.Structure(['X', 'Y', 'Z'], np.array([0,1,2]), np.array([3,3,3]), [('X', 'Y'), ('Z', 'Y')], sp1.total_variables_count)
+        st1 = st.Structure(['Y'], np.array([2]), np.array([2]), [], sp1.total_variables_count)
        g1 = ng.NetworkGraph(st1)
        g1.init_graph()
        print(g1.transition_scalar_indexing_structure)
@ -117,7 +117,7 @@ class TestStructure(unittest.TestCase):
        p1 = pe.ParametersEstimator(sp1,g1)
        p1.init_sets_cims_container()
        p1.compute_parameters_for_node('Y')
-        print(p1.sets_of_cims_struct.sets_of_cims[1].actual_cims)
+        print(p1.sets_of_cims_struct.sets_of_cims[0].actual_cims)


 if __name__ == '__main__':
--- a/main_package/tests/test_structure_estimator.py
+++ b/main_package/tests/test_structure_estimator.py
@ -31,6 +31,7 @@ class TestStructureEstimator(unittest.TestCase):
        lp.print_stats()
        #se1.ctpc_algorithm()
        print(se1.complete_graph.edges)
+        print(self.s1.structure.list_of_edges())

    def aux_test_complete_test(self, estimator, test_par, test_child, p_set):
        estimator.complete_test(test_par, test_child, p_set)