1
0
Fork 0

Refactors in JsonImporter class

parallel_struct_est
philpMartin 4 years ago
parent a020b9c004
commit da3533007c
  1. 30
      main_package/classes/json_importer.py
  2. 6
      main_package/classes/sample_path.py
  3. 9
      main_package/classes/structure_estimator.py
  4. 1
      main_package/classes/trajectory.py
  5. 1864
      main_package/data/esempio_dataset.csv
  6. 4
      main_package/tests/test_structure.py
  7. 1
      main_package/tests/test_structure_estimator.py

@ -39,22 +39,27 @@ class JsonImporter(AbstractImporter):
def import_data(self):
raw_data = self.read_json_file()
self.import_variables(raw_data)
self.import_trajectories(raw_data)
self.compute_row_delta_in_all_samples_frames(self.time_key)
self.clear_data_frame_list()
self.import_structure(raw_data)
self.import_variables(raw_data, self.sorter)
#self.import_variables(raw_data, self.sorter)
def import_trajectories(self, raw_data: pd.DataFrame):
self.normalize_trajectories(raw_data, 0, self.samples_label)
def import_structure(self, raw_data: pd.DataFrame):
self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
def import_variables(self, raw_data: pd.DataFrame, sorter: typing.List):
#TODO Attenzione l'ordine delle vars non è alfabetico come nel dataset -> agire di conseguenza
#Ordinando la vars alfabeticamente
def import_variables(self, raw_data: pd.DataFrame):
self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
self.sorter = self._df_variables[self.variables_key].to_list()
self.sorter.sort()
print("Sorter:", self.sorter)
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category")
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(self.sorter)
self._df_variables = self._df_variables.sort_values([self.variables_key])
def read_json_file(self) -> typing.List:
@ -105,7 +110,7 @@ class JsonImporter(AbstractImporter):
self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]]
#for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]):
#self.df_samples_list.append(pd.DataFrame(sample))
self.sorter = list(self.df_samples_list[0].columns.values)[1:]
#self.sorter = list(self.df_samples_list[0].columns.values)[1:] #TODO Qui ci deve essere la colonna NAME ordinata alfabeticamente
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
columns_header: typing.List, shifted_cols_header: typing.List) \
@ -122,10 +127,19 @@ class JsonImporter(AbstractImporter):
#columns_header = list(self.df_samples_list[0].columns.values)
#self.sorter = columns_header[1:]
shifted_cols_header = [s + "S" for s in self.sorter]
for indx, sample in enumerate(self.df_samples_list):
compute_row_delta = self.compute_row_delta_sigle_samples_frame
"""for indx, sample in enumerate(self.df_samples_list):
self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample,
time_header_label, self.sorter, shifted_cols_header)
time_header_label, self.sorter, shifted_cols_header)"""
self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) for sample in self.df_samples_list]
self._concatenated_samples = pd.concat(self.df_samples_list)
#TODO Attenzione la colonna di indice 0 non è sempre quella del tempo ordinare il daframe concatenato di conseguenza
complete_header = self.sorter[:]
complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header)
print("Complete Header", complete_header)
self._concatenated_samples = self._concatenated_samples[complete_header]
print("Concat Samples",self._concatenated_samples)
def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
"""
@ -152,7 +166,7 @@ class JsonImporter(AbstractImporter):
self._concatenated_samples = self._concatenated_samples.iloc[0:0]
def clear_data_frame_list(self):
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più #TODO usare list comprens
self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
def import_sampled_cims(self, raw_data: pd.DataFrame, indx: int, cims_key: str) -> typing.Dict:

@ -51,12 +51,6 @@ class SamplePath:
def total_variables_count(self):
return self.total_variables_count
"""def build_possible_values_variables_structure(self):
possible_val_list = []
print(self.importer.variables)
for cardinality in self.importer.variables['Value']:
possible_val_list.append(list(range(0, cardinality)))
self.possible_variables_values = possible_val_list"""

@ -52,9 +52,12 @@ class StructureEstimator:
s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count)
g1 = ng.NetworkGraph(s1)
g1.init_graph()
print("G1 NODES", g1.get_nodes())
print("G1 Edges", g1.get_edges())
p1 = pe.ParametersEstimator(self.sample_path, g1)
p1.init_sets_cims_container()
p1.compute_parameters_for_node(test_child)
sofc1 = p1.sets_of_cims_struct.sets_of_cims[g1.get_positional_node_indx(test_child)]
if not p_set:
self.cache.put(test_child, sofc1)
@ -86,6 +89,8 @@ class StructureEstimator:
s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count)
g2 = ng.NetworkGraph(s2)
g2.init_graph()
print("G2 Nodes", g2.get_nodes())
print("G2 Edges", g2.get_edges())
p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container()
p2.compute_parameters_for_node(test_child)
@ -113,6 +118,8 @@ class StructureEstimator:
r2s = M2.diagonal()
C1 = cim1.cim
C2 = cim2.cim
print("C1", C1)
print("C2", C2)
F_stats = C2.diagonal() / C1.diagonal()
exp_alfa = self.exp_test_sign
for val in range(0, child_states_numb):
@ -149,7 +156,7 @@ class StructureEstimator:
return True
def one_iteration_of_CTPC_algorithm(self, var_id, tot_vars_count):
#print("TESTING VAR", var_id)
print("TESTING VAR", var_id)
u = list(self.complete_graph.predecessors(var_id))
#tests_parents_numb = len(u)
#complete_frame = self.complete_graph_frame

@ -17,7 +17,6 @@ class Trajectory:
def __init__(self, list_of_columns, original_cols_number):
if type(list_of_columns[0][0]) != np.float64:
raise TypeError('The first array in the list has to be Times')
#TODO valutare se vale la pena ordinare la lista di numpy array per tipo
self.original_cols_number = original_cols_number
self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T
self._times = np.array(list_of_columns[0], dtype=np.float)

File diff suppressed because it is too large Load Diff

@ -107,7 +107,7 @@ class TestStructure(unittest.TestCase):
sp1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
sp1.build_trajectories()
sp1.build_structure()
st1 = st.Structure(['X', 'Y', 'Z'], np.array([0,1,2]), np.array([3,3,3]), [('X', 'Y'), ('Z', 'Y')], sp1.total_variables_count)
st1 = st.Structure(['Y'], np.array([2]), np.array([2]), [], sp1.total_variables_count)
g1 = ng.NetworkGraph(st1)
g1.init_graph()
print(g1.transition_scalar_indexing_structure)
@ -117,7 +117,7 @@ class TestStructure(unittest.TestCase):
p1 = pe.ParametersEstimator(sp1,g1)
p1.init_sets_cims_container()
p1.compute_parameters_for_node('Y')
print(p1.sets_of_cims_struct.sets_of_cims[1].actual_cims)
print(p1.sets_of_cims_struct.sets_of_cims[0].actual_cims)
if __name__ == '__main__':

@ -31,6 +31,7 @@ class TestStructureEstimator(unittest.TestCase):
lp.print_stats()
#se1.ctpc_algorithm()
print(se1.complete_graph.edges)
print(self.s1.structure.list_of_edges())
def aux_test_complete_test(self, estimator, test_par, test_child, p_set):
estimator.complete_test(test_par, test_child, p_set)