From 4b9ff23e0bcca13db2d72b245779a3d87c6b1404 Mon Sep 17 00:00:00 2001
From: philpMartin <f.martini@campus.unimib.it>
Date: Sun, 26 Jul 2020 21:39:01 +0200
Subject: [PATCH] Add parameters estimationper variable

---
 main_package/classes/json_importer.py         | 28 +++---
 main_package/classes/parameters_estimator.py  | 81 +++++-------------
 main_package/classes/structure_estimator.py   | 85 ++++++++++++-------
 .../tests/test_parameters_estimator.py        |  7 ++
 .../tests/test_structure_estimator.py         |  3 +-
 5 files changed, 98 insertions(+), 106 deletions(-)

diff --git a/main_package/classes/json_importer.py b/main_package/classes/json_importer.py
index 3595a2b..f774492 100644
--- a/main_package/classes/json_importer.py
+++ b/main_package/classes/json_importer.py
@@ -2,6 +2,7 @@ import os
 import glob
 import pandas as pd
 import json
+import typing
 from abstract_importer import AbstractImporter
 from line_profiler import LineProfiler
 
@@ -22,7 +23,8 @@ class JsonImporter(AbstractImporter):
 
     """
 
-    def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key):
+    def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
+                 variables_key: str):
         self.samples_label = samples_label
         self.structure_label = structure_label
         self.variables_label = variables_label
@@ -43,19 +45,19 @@ class JsonImporter(AbstractImporter):
         self.import_structure(raw_data)
         self.import_variables(raw_data, self.sorter)
 
-    def import_trajectories(self, raw_data):
+    def import_trajectories(self, raw_data: pd.DataFrame):
         self.normalize_trajectories(raw_data, 0, self.samples_label)
 
-    def import_structure(self, raw_data):
+    def import_structure(self, raw_data: pd.DataFrame):
         self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
 
-    def import_variables(self, raw_data, sorter):
+    def import_variables(self, raw_data: pd.DataFrame, sorter: typing.List):
         self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
         self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category")
         self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
         self._df_variables = self._df_variables.sort_values([self.variables_key])
 
-    def read_json_file(self):
+    def read_json_file(self) -> typing.List:
         """
         Legge il primo file .json nel path self.filepath
 
@@ -75,7 +77,7 @@ class JsonImporter(AbstractImporter):
         except ValueError as err:
             print(err.args)
 
-    def one_level_normalizing(self, raw_data, indx, key):
+    def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame:
         """
         Estrae i dati innestati di un livello, presenti nel dataset raw_data,
         presenti nel json array all'indice indx nel json object key
@@ -90,7 +92,7 @@ class JsonImporter(AbstractImporter):
         """
         return pd.DataFrame(raw_data[indx][key])
 
-    def normalize_trajectories(self, raw_data, indx, trajectories_key):
+    def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str):
         """
         Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key.
         Aggiunge le traj estratte nella lista di dataframe self.df_samples_list
@@ -104,7 +106,9 @@ class JsonImporter(AbstractImporter):
             self.df_samples_list.append(pd.DataFrame(sample))
         self.sorter = list(self.df_samples_list[0].columns.values)[1:]
 
-    def compute_row_delta_sigle_samples_frame(self, sample_frame, time_header_label, columns_header, shifted_cols_header):
+    def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
+                                              columns_header: typing.List, shifted_cols_header: typing.List) \
+            -> pd.DataFrame:
         sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
         shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
         #print(shifted_cols)
@@ -113,8 +117,8 @@ class JsonImporter(AbstractImporter):
         sample_frame.drop(sample_frame.tail(1).index, inplace=True)
         return sample_frame
 
-    def compute_row_delta_in_all_samples_frames(self, time_header_label):
-        columns_header = list(self.df_samples_list[0].columns.values)
+    def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
+        #columns_header = list(self.df_samples_list[0].columns.values)
         #self.sorter = columns_header[1:]
         shifted_cols_header = [s + "S" for s in self.sorter]
         for indx, sample in enumerate(self.df_samples_list):
@@ -122,7 +126,7 @@ class JsonImporter(AbstractImporter):
                                                         time_header_label, self.sorter, shifted_cols_header)
         self._concatenated_samples = pd.concat(self.df_samples_list)
 
-    def build_list_of_samples_array(self, data_frame):
+    def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
         """
         Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array
         Parameters:
@@ -150,7 +154,7 @@ class JsonImporter(AbstractImporter):
         for indx in range(len(self.df_samples_list)):  # Le singole traj non servono più
             self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
 
-    def import_sampled_cims(self, raw_data, indx, cims_key):
+    def import_sampled_cims(self, raw_data: pd.DataFrame, indx: int, cims_key: str) -> typing.Dict:
         cims_for_all_vars = {}
         for var in raw_data[indx][cims_key]:
             sampled_cims_list = []
diff --git a/main_package/classes/parameters_estimator.py b/main_package/classes/parameters_estimator.py
index ab998aa..2a07689 100644
--- a/main_package/classes/parameters_estimator.py
+++ b/main_package/classes/parameters_estimator.py
@@ -43,6 +43,26 @@ class ParametersEstimator:
                                                       aggr[1].transition_matrices)
             aggr[1].build_cims(aggr[1].state_residence_times, aggr[1].transition_matrices)
 
+    def compute_parameters_for_node(self, node_id):
+        pos_index = self.net_graph.graph_struct.get_positional_node_indx(node_id)
+        #print("Nodes", self.net_graph.get_nodes())
+        #print(pos_index)
+        #print(self.net_graph.time_filtering)
+        self.compute_state_res_time_for_node(self.net_graph.get_node_indx(node_id), self.sample_path.trajectories.times,
+                                             self.sample_path.trajectories.trajectory,
+                                             self.net_graph.time_filtering[pos_index],
+                                             self.net_graph.time_scalar_indexing_strucure[pos_index],
+                                             self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times)
+        # print(self.net_graph.transition_filtering[indx])
+        # print(self.net_graph.transition_scalar_indexing_structure[indx])
+        self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(node_id),
+                                                  self.sample_path.trajectories.complete_trajectory,
+                                                  self.net_graph.transition_filtering[pos_index],
+                                                  self.net_graph.transition_scalar_indexing_structure[pos_index],
+                                                  self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices)
+        self.sets_of_cims_struct.sets_of_cims[pos_index].build_cims(
+            self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times,
+            self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices)
 
 
     def compute_state_res_time_for_node(self, node_indx, times, trajectory, cols_filter, scalar_indexes_struct, T):
@@ -96,64 +116,3 @@ class ParametersEstimator:
 
 
 
-# Simple Test #
-"""os.getcwd()
-os.chdir('..')
-path = os.getcwd() + '/data'
-
-s1 = sp.SamplePath(path)
-s1.build_trajectories()
-s1.build_structure()
-
-g1 = ng.NetworkGraph(s1.structure)
-g1.init_graph()
-
-pe = ParametersEstimator(s1, g1)
-pe.init_amalgamated_cims_struct()
-lp = LineProfiler()
-
-[[2999.2966 2749.2298 3301.5975]
- [3797.1737 3187.8345 2939.2009]
- [3432.224  3062.5402 4530.9028]]
-
-[[ 827.6058  838.1515  686.1365]
- [1426.384  2225.2093 1999.8528]
- [ 745.3068  733.8129  746.2347]
- [ 520.8113  690.9502  853.4022]
- [1590.8609 1853.0021 1554.1874]
- [ 637.5576  643.8822  654.9506]
- [ 718.7632  742.2117  998.5844]
- [1811.984  1598.0304 2547.988 ]
- [ 770.8503  598.9588  984.3304]]
-
-lp_wrapper = lp(pe.compute_state_residence_time_for_all_nodes)
-lp_wrapper()
-lp.print_stats()
-
-#pe.compute_state_residence_time_for_all_nodes()
-print(pe.amalgamated_cims_struct.sets_of_cims[0].state_residence_times)
-
-[[[14472,  3552, 10920],
-        [12230, 25307, 13077],
-        [ 9707, 14408, 24115]],
-
-       [[22918,  6426, 16492],
-        [10608, 16072,  5464],
-        [10746, 11213, 21959]],
-
-       [[23305,  6816, 16489],
-        [ 3792, 19190, 15398],
-        [13718, 18243, 31961]]])
-        
-        Raveled [14472  3552 10920 12230 25307 13077  9707 14408 24115 22918  6426 16492
- 10608 16072  5464 10746 11213 21959 23305  6816 16489  3792 19190 15398
- 13718 18243 31961]
-
-lp_wrapper = lp(pe.compute_parameters)
-lp_wrapper()
-#for variable in pe.amalgamated_cims_struct.sets_of_cims:
-    #for cond in variable.get_cims():
-        #print(cond.cim)
-print(pe.amalgamated_cims_struct.get_cims_of_node(1,[2]))
-lp.print_stats()"""
-
diff --git a/main_package/classes/structure_estimator.py b/main_package/classes/structure_estimator.py
index 59555b1..c7dd6ad 100644
--- a/main_package/classes/structure_estimator.py
+++ b/main_package/classes/structure_estimator.py
@@ -55,11 +55,14 @@ class StructureEstimator:
 
         p1 = pe.ParametersEstimator(self.sample_path, g1)
         p1.init_sets_cims_container()
-        p1.compute_parameters()
+        #print("Computing params for",test_child, test_parent, parent_set)
+        p1.compute_parameters_for_node(test_child)
+        #p1.compute_parameters()
 
         p2 = pe.ParametersEstimator(self.sample_path, g2)
         p2.init_sets_cims_container()
-        p2.compute_parameters()
+        #p2.compute_parameters()
+        p2.compute_parameters_for_node(test_child)
 
         #for cim in p1.sets_of_cims_struct.sets_of_cims[s1.get_positional_node_indx(test_child)].actual_cims:
             #print(cim)
@@ -72,12 +75,11 @@ class StructureEstimator:
             #for j, cim2 in enumerate(
                     #p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims):
             for j in range(indx, self.sample_path.structure.get_states_number(test_parent) + indx):
-                print("J", j)
+                #print("J", j)
+                #print("Pos Index", p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims)
                 cim2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)].actual_cims[j]
                 indx += 1
-                print(indx)
-
-
+                #print(indx)
                 print("Run Test", i, j)
                 if not self.independence_test(test_child, cim1, cim2):
                     return False
@@ -85,33 +87,45 @@ class StructureEstimator:
 
     def independence_test(self, tested_child, cim1, cim2):
         # Fake exp test
+        r1s = cim1.state_transition_matrix.diagonal()
+        r2s = cim2.state_transition_matrix.diagonal()
+        F_stats = cim2.cim.diagonal() / cim1.cim.diagonal()
         for val in range(0, self.sample_path.structure.get_states_number(tested_child)):  # i possibili valori di tested child TODO QUESTO CONTO DEVE ESSERE VETTORIZZATO
-            r1 = cim1.state_transition_matrix[val][val]
-            r2 = cim2.state_transition_matrix[val][val]
-            print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
-            F = cim2.cim[val][val] / cim1.cim[val][val]
+            #r1 = cim1.state_transition_matrix[val][val]
+            #r2 = cim2.state_transition_matrix[val][val]
+            #print("No Test Parent:",cim1.cim[val][val],"With Test Parent", cim2.cim[val][val])
+            #F = cim2.cim[val][val] / cim1.cim[val][val]
 
-            print("Exponential test", F, r1, r2)
+            #print("Exponential test", F_stats[val], r1s[val], r2s[val])
             #print(f_dist.ppf(1 - self.exp_test_sign / 2, r1, r2))
             #print(f_dist.ppf(self.exp_test_sign / 2, r1, r2))
-            if F < f_dist.ppf(self.exp_test_sign / 2, r1, r2) or \
-                    F > f_dist.ppf(1 - self.exp_test_sign / 2, r1, r2):
+            if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \
+                    F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]):
                 print("CONDITIONALLY DEPENDENT EXP")
                 return False
         # fake chi test
         M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix)
         M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
-        print("M1 no diag", M1_no_diag)
-        print("M2 no diag", M2_no_diag)
+        #print("M1 no diag", M1_no_diag)
+        #print("M2 no diag", M2_no_diag)
         chi_2_quantile = chi2_dist.ppf(1 - self.chi_test_alfa, self.sample_path.structure.get_states_number(tested_child) - 1)
+        """
+        Ks = np.sqrt(cim1.state_transition_matrix.diagonal() / cim2.state_transition_matrix.diagonal())
+        Ls = np.reciprocal(Ks)
+        chi_stats = np.sum((np.power((M2_no_diag.T * Ks).T - (M1_no_diag.T * Ls).T, 2) \
+                            / (M1_no_diag + M2_no_diag)), axis=1)"""
+        Ks = np.sqrt(r1s / r2s)
+        Ls = np.sqrt(r2s / r1s)
         for val in range(0, self.sample_path.structure.get_states_number(tested_child)):
-            K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
-            L = 1 / K
-            Chi = np.sum(np.power(K * M2_no_diag[val] - L *M1_no_diag[val], 2) /
+            #K = math.sqrt(cim1.state_transition_matrix[val][val] / cim2.state_transition_matrix[val][val])
+            #L = 1 / K
+            Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
                          (M1_no_diag[val] + M2_no_diag[val]))
-            print("Chi Stats", Chi)
-            print("Chi Quantile", chi_2_quantile)
+
+            #print("Chi Stats", Chi)
+            #print("Chi Quantile", chi_2_quantile)
             if Chi > chi_2_quantile:
+        #if np.any(chi_stats > chi_2_quantile):
                 print("CONDITIONALLY DEPENDENT CHI")
                 return False
             #print("Chi test", Chi)
@@ -122,29 +136,32 @@ class StructureEstimator:
         tests_parents_numb = len(u)
         #print(u)
         b = 0
-        parent_indx = 0
+        #parent_indx = 0
         while b < len(u):
             #for parent_id in u:
             parent_indx = 0
             while u and parent_indx < tests_parents_numb and b < len(u):
                 # list_without_test_parent = u.remove(parent_id)
                 removed = False
-                print("b", b)
-                print("Parent Indx", parent_indx)
+                #print("b", b)
+                #print("Parent Indx", parent_indx)
                 #if not list(self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])):
                     #break
                 S = self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])
-                print("U Set", u)
-                print("S", S)
+                #print("U Set", u)
+                #print("S", S)
                 for parents_set in S:
-                    print("Parent Set", parents_set)
-                    print("Test Parent", u[parent_indx])
+                    #print("Parent Set", parents_set)
+                    #print("Test Parent", u[parent_indx])
                     if self.complete_test(u[parent_indx], var_id, parents_set):
                         print("Removing EDGE:", u[parent_indx], var_id)
                         self.complete_graph.remove_edge(u[parent_indx], var_id)
-                        #self.complete_graph_frame = \
-                            #self.complete_graph_frame[(self.complete_graph_frame.From !=
-                                                     # u[parent_indx]) & (self.complete_graph_frame.To != var_id)]
+                        #print(self.complete_graph_frame)
+                        self.complete_graph_frame = \
+                            self.complete_graph_frame.drop(
+                                self.complete_graph_frame[(self.complete_graph_frame.From ==
+                                                     u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)
+                        #print(self.complete_graph_frame)
                         u.remove(u[parent_indx])
                         removed = True
                     #else:
@@ -154,8 +171,8 @@ class StructureEstimator:
             b += 1
 
     def generate_possible_sub_sets_of_size(self, u, size, parent_id):
-        print("Inside Generate subsets", u)
-        print("InsideGenerate Subsets", parent_id)
+        #print("Inside Generate subsets", u)
+        #print("InsideGenerate Subsets", parent_id)
         list_without_test_parent = u[:]
         list_without_test_parent.remove(parent_id)
         # u.remove(parent_id)
@@ -168,3 +185,7 @@ class StructureEstimator:
         s0, s1 = matrix.strides
         return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
 
+    def ctpc_algorithm(self):
+        for node_id in self.sample_path.structure.list_of_nodes_labels():
+            self.one_iteration_of_CTPC_algorithm(node_id)
+
diff --git a/main_package/tests/test_parameters_estimator.py b/main_package/tests/test_parameters_estimator.py
index 5170295..2e484e3 100644
--- a/main_package/tests/test_parameters_estimator.py
+++ b/main_package/tests/test_parameters_estimator.py
@@ -55,6 +55,13 @@ class TestParametersEstimatior(unittest.TestCase):
         for r1, r2 in zip(cim1, cim2):
             self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
 
+    def test_compute_parameters_for_node(self):#TODO Questo non è un test
+        pe1 = pe.ParametersEstimator(self.s1, self.g1)
+        pe1.init_sets_cims_container()
+        pe1.compute_parameters_for_node('Y')
+        print(pe1.sets_of_cims_struct.get_set_of_cims(1).actual_cims)
+
+
     def aux_import_sampled_cims(self, cims_label):
         i1 = ji.JsonImporter('../data', '', '', '', '', '')
         raw_data = i1.read_json_file()
diff --git a/main_package/tests/test_structure_estimator.py b/main_package/tests/test_structure_estimator.py
index 571d5a5..53c3119 100644
--- a/main_package/tests/test_structure_estimator.py
+++ b/main_package/tests/test_structure_estimator.py
@@ -19,8 +19,9 @@ class TestStructureEstimator(unittest.TestCase):
 
     def test_one_iteration(self):
         se1 = se.StructureEstimator(self.s1, 0.1, 0.1)
-        se1.one_iteration_of_CTPC_algorithm('X')
+        #se1.one_iteration_of_CTPC_algorithm('X')
         #self.aux_test_complete_test(se1, 'X', 'Y', ['Z'])
+        se1.ctpc_algorithm()
         print(se1.complete_graph.edges)
 
     def aux_test_complete_test(self, estimator, test_par, test_child, p_set):