1
0
Fork 0

Refactor test_structure and test_jsonImporter

parallel_struct_est
philpMartin 4 years ago
parent 864ffbd319
commit b374d60117
  1. 6
      main_package/classes/cache.py
  2. 141
      main_package/classes/json_importer.py
  3. 16
      main_package/classes/network_graph.py
  4. 49
      main_package/classes/sample_path.py
  5. 92
      main_package/classes/structure.py
  6. 12
      main_package/classes/structure_estimator.py
  7. 24
      main_package/classes/trajectory.py
  8. 13
      main_package/tests/test_json_importer.py
  9. 0
      main_package/tests/test_sample_path.py
  10. 99
      main_package/tests/test_structure.py
  11. 3
      main_package/tests/test_structure_estimator.py
  12. 17
      main_package/tests/test_trajectory.py

@ -8,17 +8,17 @@ class Cache:
self.list_of_sets_of_indxs = [] self.list_of_sets_of_indxs = []
self.actual_cache = [] self.actual_cache = []
def find(self, parents_comb: typing.Set): def find(self, parents_comb: typing.Union[typing.Set, str]):
try: try:
#print("Cache State:", self.list_of_sets_of_indxs) #print("Cache State:", self.list_of_sets_of_indxs)
#print("Look For:", parents_comb) #print("Look For:", parents_comb)
result = self.actual_cache[self.list_of_sets_of_indxs.index(parents_comb)] result = self.actual_cache[self.list_of_sets_of_indxs.index(parents_comb)]
print("CACHE HIT!!!!") print("CACHE HIT!!!!", parents_comb)
return result return result
except ValueError: except ValueError:
return None return None
def put(self, parents_comb: typing.Set, socim: sofc.SetOfCims): def put(self, parents_comb: typing.Union[typing.Set, str], socim: sofc.SetOfCims):
#print("Putting in cache:", parents_comb) #print("Putting in cache:", parents_comb)
self.list_of_sets_of_indxs.append(parents_comb) self.list_of_sets_of_indxs.append(parents_comb)
self.actual_cache.append(socim) self.actual_cache.append(socim)

@ -4,23 +4,28 @@ import pandas as pd
import json import json
import typing import typing
from abstract_importer import AbstractImporter from abstract_importer import AbstractImporter
from line_profiler import LineProfiler
class JsonImporter(AbstractImporter): class JsonImporter(AbstractImporter):
""" """
Implementa l'interfaccia AbstractImporter e aggiunge i metodi necessari a costruire le trajectories e la struttura della rete Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext.
del dataset in formato json con la seguente struttura: with the following structure:
[] 0 [] 0
|_ dyn.cims |_ dyn.cims
|_ dyn.str |_ dyn.str
|_ samples |_ samples
|_ variabels |_ variabels
:files_path: the path that contains tha data to be imported
:df_samples_list: lista di dataframe, ogni dataframe contiene una traj :samples_label: the reference key for the samples in the trajectories
:df_structure: dataframe contenente la struttura della rete :structure_label: the reference key for the structure of the network data
:df_variables: dataframe contenente le infromazioni sulle variabili della rete :variables_label: the reference key for the cardinalites of the nodes data
:time_key: the key used to identify the timestamps in each trajectory
:variables_key: the key used to identify the names of the variables in the net
:df_samples_list: a Dataframe list in which every df contains a trajectory
:df_structure: Dataframe containing the structure of the network (edges)
:df_variables: Dataframe containing the nodes cardinalities
:df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list
:sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples
""" """
def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
@ -38,6 +43,13 @@ class JsonImporter(AbstractImporter):
super(JsonImporter, self).__init__(files_path) super(JsonImporter, self).__init__(files_path)
def import_data(self): def import_data(self):
"""
Imports and prepares all data present needed for susequent computation.
Parameters:
void
Returns:
void
"""
raw_data = self.read_json_file() raw_data = self.read_json_file()
#self.import_variables(raw_data) #self.import_variables(raw_data)
self.import_trajectories(raw_data) self.import_trajectories(raw_data)
@ -46,14 +58,38 @@ class JsonImporter(AbstractImporter):
self.import_structure(raw_data) self.import_structure(raw_data)
self.import_variables(raw_data, self.sorter) self.import_variables(raw_data, self.sorter)
def import_trajectories(self, raw_data: pd.DataFrame): def import_trajectories(self, raw_data: typing.List):
"""
Imports the trajectories in the list of dicts raw_data.
Parameters:
:raw_data: List of Dicts
Returns:
void
"""
self.normalize_trajectories(raw_data, 0, self.samples_label) self.normalize_trajectories(raw_data, 0, self.samples_label)
def import_structure(self, raw_data: pd.DataFrame): def import_structure(self, raw_data: typing.List):
"""
Imports in a dataframe the data in the list raw_data at the key structure_label
Parameters:
raw_data: the data
Returns:
void
"""
self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label) self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
#TODO Attenzione l'ordine delle vars non è alfabetico come nel dataset -> agire di conseguenza
#Ordinando la vars alfabeticamente def import_variables(self, raw_data: typing.List, sorter: typing.List):
def import_variables(self, raw_data: pd.DataFrame, sorter): """
Imports the data in raw_data at the key variables_label.
Sorts the row of the dataframe df_variables using the list sorter.
Parameters:
raw_data: the data
sorter: the list used to sort the dataframe self.df_variables
Returns:
void
"""
self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label) self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
#self.sorter = self._df_variables[self.variables_key].to_list() #self.sorter = self._df_variables[self.variables_key].to_list()
#self.sorter.sort() #self.sorter.sort()
@ -62,16 +98,16 @@ class JsonImporter(AbstractImporter):
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter) self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
self._df_variables = self._df_variables.sort_values([self.variables_key]) self._df_variables = self._df_variables.sort_values([self.variables_key])
self._df_variables.reset_index(inplace=True) self._df_variables.reset_index(inplace=True)
#print("Var Frame", self._df_variables) print("Var Frame", self._df_variables)
def read_json_file(self) -> typing.List: def read_json_file(self) -> typing.List:
""" """
Legge il primo file .json nel path self.filepath Reads the first json file in the path self.filePath
Parameters: Parameters:
void void
Returns: Returns:
:data: il contenuto del file json data: the contents of the json file
""" """
try: try:
@ -84,39 +120,55 @@ class JsonImporter(AbstractImporter):
except ValueError as err: except ValueError as err:
print(err.args) print(err.args)
def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame: def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
""" """
Estrae i dati innestati di un livello, presenti nel dataset raw_data, Extracts the one-level nested data in the list raw_data at the index indx at the key key
presenti nel json array all'indice indx nel json object key
Parameters: Parameters:
:raw_data: il dataset json completo raw_data: List of Dicts
:indx: l'indice del json array da cui estrarre i dati indx: The index of the array from which the data have to be extracted
:key: il json object da cui estrarre i dati key: the key for the Dicts from which exctract data
Returns: Returns:
Il dataframe contenente i dati normalizzati a normalized dataframe
""" """
return pd.DataFrame(raw_data[indx][key]) return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str): def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str):
""" """
Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key. Extracts the traj in raw_data at the index index at the key trajectories key.
Aggiunge le traj estratte nella lista di dataframe self.df_samples_list Adds the extracted traj in the dataframe list self._df_samples_list.
Initializes the list self.sorter.
Parameters: Parameters:
void raw_data: the data
indx: the index of the array from which extract data
trajectories_key: the key of the trajectories objects
Returns: Returns:
void void
""" """
self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]] dataframe = pd.DataFrame
#for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]): smps = raw_data[indx][trajectories_key]
#self.df_samples_list.append(pd.DataFrame(sample)) self.df_samples_list = [dataframe(sample) for sample in smps]
#self.sorter = list(self.df_samples_list[0].columns.values)[1:] #TODO Qui ci deve essere la colonna NAME ordinata alfabeticamente columns_header = list(self.df_samples_list[0].columns.values)
columns_header.remove(self.time_key)
self.sorter = columns_header
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str, def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
columns_header: typing.List, shifted_cols_header: typing.List) \ columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame: -> pd.DataFrame:
"""
Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
Parameters:
sample_frame: the traj to be processed
time_header_label: the label for the times
columns_header: the original header of sample_frame
shifted_cols_header: a copy of columns_header with changed names of the contents
Returns:
sample_frame: the processed dataframe
"""
sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1) sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
#print(shifted_cols) #print(shifted_cols)
@ -126,15 +178,25 @@ class JsonImporter(AbstractImporter):
return sample_frame return sample_frame
def compute_row_delta_in_all_samples_frames(self, time_header_label: str): def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
columns_header = list(self.df_samples_list[0].columns.values) """
Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list.
Concatenates the result in the dataframe concatanated_samples
Parameters:
time_header_label: the label of the time column
Returns:
void
"""
"""columns_header = list(self.df_samples_list[0].columns.values)
columns_header.remove('Time') columns_header.remove('Time')
self.sorter = columns_header self.sorter = columns_header"""
shifted_cols_header = [s + "S" for s in self.sorter] shifted_cols_header = [s + "S" for s in self.sorter]
compute_row_delta = self.compute_row_delta_sigle_samples_frame compute_row_delta = self.compute_row_delta_sigle_samples_frame
"""for indx, sample in enumerate(self.df_samples_list): """for indx, sample in enumerate(self.df_samples_list):
self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample, self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample,
time_header_label, self.sorter, shifted_cols_header)""" time_header_label, self.sorter, shifted_cols_header)"""
self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) for sample in self.df_samples_list] self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header)
for sample in self.df_samples_list]
self._concatenated_samples = pd.concat(self.df_samples_list) self._concatenated_samples = pd.concat(self.df_samples_list)
complete_header = self.sorter[:] complete_header = self.sorter[:]
complete_header.insert(0,'Time') complete_header.insert(0,'Time')
@ -146,10 +208,11 @@ class JsonImporter(AbstractImporter):
def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List: def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
""" """
Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array
Builds a List containing the columns of dataframe and converts them to a numpy array.
Parameters: Parameters:
:data_frame: il dataframe da cui estrarre e convertire le colonne :data_frame: the dataframe from which the columns have to be extracted and converted
Returns: Returns:
:columns_list: la lista contenente le colonne convertite in numpyarray :columns_list: the resulting list of numpy arrays
""" """
columns_list = [data_frame[column].to_numpy() for column in data_frame] columns_list = [data_frame[column].to_numpy() for column in data_frame]
@ -159,7 +222,7 @@ class JsonImporter(AbstractImporter):
def clear_concatenated_frame(self): def clear_concatenated_frame(self):
""" """
Rimuove tutti i valori contenuti nei data_frames presenti in df_samples_list Removes all values in the dataframe concatenated_samples
Parameters: Parameters:
void void
Returns: Returns:
@ -168,6 +231,9 @@ class JsonImporter(AbstractImporter):
self._concatenated_samples = self._concatenated_samples.iloc[0:0] self._concatenated_samples = self._concatenated_samples.iloc[0:0]
def clear_data_frame_list(self): def clear_data_frame_list(self):
"""
Removes all values present in the dataframes in the list df_samples_list
"""
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più #TODO usare list comprens for indx in range(len(self.df_samples_list)): # Le singole traj non servono più #TODO usare list comprens
self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
@ -180,7 +246,6 @@ class JsonImporter(AbstractImporter):
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
return cims_for_all_vars return cims_for_all_vars
@property @property
def concatenated_samples(self): def concatenated_samples(self):
return self._concatenated_samples return self._concatenated_samples

@ -3,7 +3,7 @@ import networkx as nx
import numpy as np import numpy as np
class NetworkGraph(): class NetworkGraph:
""" """
Rappresenta il grafo che contiene i nodi e gli archi presenti nell'oggetto Structure graph_struct. Rappresenta il grafo che contiene i nodi e gli archi presenti nell'oggetto Structure graph_struct.
Ogni nodo contine la label node_id, al nodo è anche associato un id numerico progressivo indx che rappresenta la posizione Ogni nodo contine la label node_id, al nodo è anche associato un id numerico progressivo indx che rappresenta la posizione
@ -17,9 +17,9 @@ class NetworkGraph():
def __init__(self, graph_struct): def __init__(self, graph_struct):
self.graph_struct = graph_struct self.graph_struct = graph_struct
self.graph = nx.DiGraph() self.graph = nx.DiGraph()
self._nodes_indexes = self.graph_struct.list_of_nodes_indexes() self._nodes_indexes = self.graph_struct.nodes_indexes
self._nodes_labels = self.graph_struct.list_of_nodes_labels() self._nodes_labels = self.graph_struct.nodes_labels
self._nodes_values = self.graph_struct.nodes_values() self._nodes_values = self.graph_struct.nodes_values
self.aggregated_info_about_nodes_parents = None self.aggregated_info_about_nodes_parents = None
self._fancy_indexing = None self._fancy_indexing = None
self._time_scalar_indexing_structure = None self._time_scalar_indexing_structure = None
@ -30,7 +30,7 @@ class NetworkGraph():
def init_graph(self): def init_graph(self):
self.add_nodes(self._nodes_labels) self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.list_of_edges()) self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0) self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_scalar_indexing_structures() self.build_scalar_indexing_structures()
@ -41,7 +41,7 @@ class NetworkGraph():
def add_nodes(self, list_of_nodes): def add_nodes(self, list_of_nodes):
#self.graph.add_nodes_from(list_of_nodes) #self.graph.add_nodes_from(list_of_nodes)
nodes_indxs = self._nodes_indexes nodes_indxs = self._nodes_indexes
nodes_vals = self.graph_struct.nodes_values() nodes_vals = self.graph_struct.nodes_values
pos = 0 pos = 0
#print("LIST OF NODES", list_of_nodes) #print("LIST OF NODES", list_of_nodes)
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
@ -134,7 +134,7 @@ class NetworkGraph():
#parents_indexes_list = self._fancy_indexing #parents_indexes_list = self._fancy_indexing
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))""" self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))"""
nodes_indxs = self.graph_struct.list_of_nodes_indexes() nodes_indxs = self._nodes_indexes
#print("FINDXING", self._fancy_indexing) #print("FINDXING", self._fancy_indexing)
#print("Nodes Indxs", nodes_indxs) #print("Nodes Indxs", nodes_indxs)
self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
@ -145,7 +145,7 @@ class NetworkGraph():
nodes_number = self.graph_struct.total_variables_number nodes_number = self.graph_struct.total_variables_number
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): """for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))""" self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))"""
nodes_indxs = self.graph_struct.list_of_nodes_indexes() nodes_indxs = self._nodes_indexes
self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
for node_indx, p_indxs in zip(nodes_indxs, for node_indx, p_indxs in zip(nodes_indxs,
self._fancy_indexing)] self._fancy_indexing)]

@ -6,17 +6,26 @@ import structure as st
class SamplePath: class SamplePath:
""" """
Contiene l'aggregazione di una o più traiettorie e la struttura della rete. Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
Ha il compito dato di costruire tutte gli oggetti Trajectory e l'oggetto Structure cardinalites.
a partire dai dataframe contenuti in self.importer Has the task of creating the objects that will contain the mentioned data.
:files_path: the path that contains tha data to be imported
:samples_label: the reference key for the samples in the trajectories
:structure_label: the reference key for the structure of the network data
:variables_label: the reference key for the cardinalites of the nodes data
:time_key: the key used to identify the timestamps in each trajectory
:variables_key: the key used to identify the names of the variables in the net
:importer: the Importer objects that will import ad process data
:trajectories: the Trajectory object that will contain all the concatenated trajectories
:structure: the Structure Object that will contain all the structurral infos about the net
:total_variables_count: the number of variables in the net
:importer: l'oggetto Importer che ha il compito di caricare i dataset
:trajectories: lista di oggetti Trajectories
:structure: oggetto Structure
""" """
def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key): def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str):
self.importer = imp.JsonImporter(files_path, samples_label, structure_label, self.importer = imp.JsonImporter(files_path, samples_label, structure_label,
variables_label, time_key, variables_key) variables_label, time_key, variables_key)
self._trajectories = None self._trajectories = None
@ -24,6 +33,15 @@ class SamplePath:
self.total_variables_count = None self.total_variables_count = None
def build_trajectories(self): def build_trajectories(self):
"""
Builds the Trajectory object that will contain all the trajectories.
Clears all the unsed dataframes in Importer Object
Parameters:
void
Returns:
void
"""
self.importer.import_data() self.importer.import_data()
self._trajectories = \ self._trajectories = \
tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples), tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples),
@ -32,12 +50,19 @@ class SamplePath:
self.importer.clear_concatenated_frame() self.importer.clear_concatenated_frame()
def build_structure(self): def build_structure(self):
"""
Builds the Structure object that aggregates all the infos about the net.
Parameters:
void
Returns:
void
"""
self.total_variables_count = len(self.importer.sorter) self.total_variables_count = len(self.importer.sorter)
labels = self.importer._df_variables['Name'].to_list() labels = self.importer.variables[self.importer.variables_key].to_list()
#print("SAMPLE PATH LABELS",labels) #print("SAMPLE PATH LABELS",labels)
indxs = self.importer._df_variables.index.to_numpy() indxs = self.importer.variables.index.to_numpy()
vals = self.importer._df_variables['Value'].to_numpy() vals = self.importer.variables['Value'].to_numpy()
edges = list(self.importer._df_structure.to_records(index=False)) edges = list(self.importer.structure.to_records(index=False))
self._structure = st.Structure(labels, indxs, vals, edges, self._structure = st.Structure(labels, indxs, vals, edges,
self.total_variables_count) self.total_variables_count)

@ -1,68 +1,78 @@
import typing as ty
import numpy as np import numpy as np
class Structure: class Structure:
""" """
Contiene tutte il informazioni sulla struttura della rete (connessione dei nodi, valori assumibili dalle variabili) Contains all the infos about the network structure(nodes names, nodes caridinalites, edges...)
:structure_frame: il dataframe contenente le connessioni dei nodi della rete :nodes_labels_list: the symbolic names of the variables
:variables_frame: il data_frame contenente i valori assumibili dalle variabili e si suppone il corretto ordinamento :nodes_indexes_arr: the indexes of the nodes
rispetto alle colonne del dataset :nodes_vals_arr: the cardinalites of the nodes
:edges_list: the edges of the network
:total_variables_number: the total number of variables in the net
""" """
def __init__(self, nodes_label_list, node_indexes_arr, nodes_vals_arr, edges_list, total_variables_number): def __init__(self, nodes_label_list: ty.List, node_indexes_arr: np.array, nodes_vals_arr: np.array,
#self.structure_frame = structure edges_list: ty.List, total_variables_number: int):
#self.variables_frame = variables self._nodes_labels_list = nodes_label_list
self.nodes_labels_list = nodes_label_list self._nodes_indexes_arr = node_indexes_arr
self.nodes_indexes_arr = node_indexes_arr self._nodes_vals_arr = nodes_vals_arr
self.nodes_vals_arr = nodes_vals_arr self._edges_list = edges_list
self.edges_list = edges_list self._total_variables_number = total_variables_number
self.total_variables_number = total_variables_number
#self.name_label = variables.columns.values[0] @property
#self.value_label = variables.columns.values[1] def edges(self):
def list_of_edges(self):
#records = self.structure_frame.to_records(index=False) #records = self.structure_frame.to_records(index=False)
#edges_list = list(records) #edges_list = list(records)
return self.edges_list return self._edges_list
def list_of_nodes_labels(self): @property
return self.nodes_labels_list def nodes_labels(self):
return self._nodes_labels_list
def list_of_nodes_indexes(self): @property
return self.nodes_indexes_arr def nodes_indexes(self):
return self._nodes_indexes_arr
def get_node_id(self, node_indx): @property
return self.nodes_labels_list[node_indx] def nodes_values(self):
return self._nodes_vals_arr
def get_node_indx(self, node_id): @property
return self.nodes_indexes_arr[self.nodes_labels_list.index(node_id)] def total_variables_number(self):
return self._total_variables_number
def get_positional_node_indx(self, node_id): def get_node_id(self, node_indx: int):
return self.nodes_labels_list.index(node_id) return self._nodes_labels_list[node_indx]
def get_states_number(self, node): def get_node_indx(self, node_id: str):
#print("node", node) pos_indx = self._nodes_labels_list.index(node_id)
return self.nodes_vals_arr[self.get_positional_node_indx(node)] return self._nodes_indexes_arr[pos_indx]
def get_states_number_by_indx(self, node_indx): def get_positional_node_indx(self, node_id: str):
#print(self.value_label) return self._nodes_labels_list.index(node_id)
#print("Node indx", node_indx)
return self.nodes_vals_arr[node_indx]
def nodes_values(self): def get_states_number(self, node: str):
return self.nodes_vals_arr pos_indx = self._nodes_labels_list.index(node)
return self._nodes_vals_arr[pos_indx]
def total_variables_number(self): def get_states_number_by_indx(self, node_indx: int):
return self.total_variables_number #print(self.value_label)
#print("Node indx", node_indx)
return self._nodes_vals_arr[node_indx]
def __repr__(self): def __repr__(self):
return "Variables:\n" + str(self.variables_frame) + "\nEdges: \n" + str(self.structure_frame) return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
"\nEdges: \n" + str(self._edges_list)
def __eq__(self, other): def __eq__(self, other):
"""Overrides the default implementation""" """Overrides the default implementation"""
if isinstance(other, Structure): if isinstance(other, Structure):
return self.structure_frame.equals(other.structure_frame) and \ return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
self.variables_frame.equals(other.variables_frame) np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
set(self._edges_list) == set(other._edges_list)
return NotImplemented return NotImplemented

@ -15,13 +15,13 @@ class StructureEstimator:
def __init__(self, sample_path, exp_test_alfa, chi_test_alfa): def __init__(self, sample_path, exp_test_alfa, chi_test_alfa):
self.sample_path = sample_path self.sample_path = sample_path
self.nodes = np.array(self.sample_path.structure.list_of_nodes_labels()) self.nodes = np.array(self.sample_path.structure.nodes_labels)
#print("NODES", self.nodes) #print("NODES", self.nodes)
self.nodes_vals = self.sample_path.structure.nodes_vals_arr self.nodes_vals = self.sample_path.structure.nodes_values
self.nodes_indxs = self.sample_path.structure.nodes_indexes_arr self.nodes_indxs = self.sample_path.structure.nodes_indexes
#self.nodes_indxs = np.array(range(0,4)) #self.nodes_indxs = np.array(range(0,4))
#print("INDXS", self.nodes_indxs) #print("INDXS", self.nodes_indxs)
self.complete_graph = self.build_complete_graph(self.sample_path.structure.list_of_nodes_labels()) self.complete_graph = self.build_complete_graph(self.sample_path.structure.nodes_labels)
self.exp_test_sign = exp_test_alfa self.exp_test_sign = exp_test_alfa
self.chi_test_alfa = chi_test_alfa self.chi_test_alfa = chi_test_alfa
self.cache = ch.Cache() self.cache = ch.Cache()
@ -53,11 +53,13 @@ class StructureEstimator:
cims_filter = sorted_parents != test_parent cims_filter = sorted_parents != test_parent
#print("PARENTS NO FROM MASK", cims_filter) #print("PARENTS NO FROM MASK", cims_filter)
if not p_set: if not p_set:
print("EMPTY PSET TRYING TO FIND", test_child)
sofc1 = self.cache.find(test_child) sofc1 = self.cache.find(test_child)
else: else:
sofc1 = self.cache.find(set(p_set)) sofc1 = self.cache.find(set(p_set))
if not sofc1: if not sofc1:
print("CACHE MISSS SOFC1")
bool_mask1 = np.isin(self.nodes,complete_info) bool_mask1 = np.isin(self.nodes,complete_info)
#print("Bool mask 1", bool_mask1) #print("Bool mask 1", bool_mask1)
l1 = list(self.nodes[bool_mask1]) l1 = list(self.nodes[bool_mask1])
@ -88,6 +90,7 @@ class StructureEstimator:
#p_set.append(test_parent) #p_set.append(test_parent)
p_set.insert(0, test_parent) p_set.insert(0, test_parent)
if p_set: if p_set:
print("FULL PSET TRYING TO FIND", p_set)
#p_set.append(test_parent) #p_set.append(test_parent)
#print("PSET ", p_set) #print("PSET ", p_set)
#set_p_set = set(p_set) #set_p_set = set(p_set)
@ -102,6 +105,7 @@ class StructureEstimator:
p2.compute_parameters_for_node(test_child) p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]""" sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
if not sofc2: if not sofc2:
print("Cache MISSS SOFC2")
complete_info.append(test_parent) complete_info.append(test_parent)
bool_mask2 = np.isin(self.nodes, complete_info) bool_mask2 = np.isin(self.nodes, complete_info)
#print("BOOL MASK 2",bool_mask2) #print("BOOL MASK 2",bool_mask2)

@ -4,13 +4,13 @@ import numpy as np
class Trajectory: class Trajectory:
""" """
Rappresenta una traiettoria come un numpy_array contenente n-ple (indx, T_k,S_i,.....,Sj) Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix
Offre i metodi utili alla computazione sulla struttura stessa. of ints.
Una Trajectory viene costruita a partire da una lista di numpyarray dove ogni elemento rappresenta una colonna :list_of_columns: the list containing the times array and values matrix
della traj :original_cols_numb: total number of cols in the data
:actual_trajectory: the trajectory containing also the duplicated and shifted values
:actual_trajectory: il numpy_array contenente la successione di n-ple (indx, T_k,S_i,.....,Sj) :times: the array containing the time deltas
""" """
@ -23,10 +23,22 @@ class Trajectory:
@property @property
def trajectory(self): def trajectory(self):
"""
Parameters:
void
Returns:
a numpy matrix containing ONLY the original columns values, not the shifted ones
"""
return self._actual_trajectory[:, :self.original_cols_number] return self._actual_trajectory[:, :self.original_cols_number]
@property @property
def complete_trajectory(self): def complete_trajectory(self):
"""
Parameters:
void
Returns:
a numpy matrix containing all the values
"""
return self._actual_trajectory return self._actual_trajectory
@property @property

@ -5,6 +5,8 @@ import numpy as np
import pandas as pd import pandas as pd
import json_importer as ji import json_importer as ji
from line_profiler import LineProfiler
import os import os
import json import json
@ -44,6 +46,7 @@ class TestJsonImporter(unittest.TestCase):
def test_normalize_trajectories(self): def test_normalize_trajectories(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
#print(raw_data)
j1.normalize_trajectories(raw_data, 0, j1.samples_label) j1.normalize_trajectories(raw_data, 0, j1.samples_label)
self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label])) self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label]))
self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter) self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter)
@ -51,7 +54,7 @@ class TestJsonImporter(unittest.TestCase):
def test_normalize_trajectories_wrong_indx(self): def test_normalize_trajectories_wrong_indx(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 1, j1.samples_label) self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1.samples_label)
def test_normalize_trajectories_wrong_key(self): def test_normalize_trajectories_wrong_key(self):
j1 = ji.JsonImporter('../data', 'sample', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter('../data', 'sample', 'dyn.str', 'variables', 'Time', 'Name')
@ -77,6 +80,7 @@ class TestJsonImporter(unittest.TestCase):
j1.import_trajectories(raw_data) j1.import_trajectories(raw_data)
j1.compute_row_delta_in_all_samples_frames(j1.time_key) j1.compute_row_delta_in_all_samples_frames(j1.time_key)
self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values)) self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values))
self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1.time_key)
def test_clear_data_frame_list(self): def test_clear_data_frame_list(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
@ -112,7 +116,12 @@ class TestJsonImporter(unittest.TestCase):
def test_import_data(self): def test_import_data(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data() lp = LineProfiler()
lp_wrapper = lp(j1.import_data)
lp_wrapper()
lp.print_stats()
#j1.import_data()
self.assertEqual(list(j1.variables[j1.variables_key]), self.assertEqual(list(j1.variables[j1.variables_key]),
list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1.variables_key]) + 1])) list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1.variables_key]) + 1]))
print(j1.variables) print(j1.variables)

@ -11,71 +11,74 @@ import parameters_estimator as pe
class TestStructure(unittest.TestCase): class TestStructure(unittest.TestCase):
def setUp(self): @classmethod
self.structure_frame = pd.DataFrame([{"From":"X","To":"Z"}, {"From":"Y","To":"Z"}, def setUpClass(cls):
{"From":"Z","To":"Y"} ]) cls.labels = ['X','Y','Z']
self.variables_frame = pd.DataFrame([{"Name":"X","Value":3},{"Name":"Y","Value":3},{"Name":"Z","Value":3}]) cls.indxs = np.array([0,1,2])
cls.vals = np.array([3,3,3])
cls.edges = [('X','Z'),('Y','Z'), ('Z','Y')]
cls.vars_numb = len(cls.labels)
def test_init(self): def test_init(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
self.assertTrue(self.structure_frame.equals(s1.structure_frame)) self.assertListEqual(self.labels,s1.nodes_labels)
self.assertTrue(self.variables_frame.equals(s1.variables_frame)) self.assertTrue(np.array_equal(self.indxs, s1.nodes_indexes))
self.assertEqual(self.variables_frame.columns.values[0], s1.name_label) self.assertTrue(np.array_equal(self.vals, s1.nodes_values))
self.assertEqual(self.variables_frame.columns.values[1], s1.value_label) self.assertListEqual(self.edges, s1.edges)
#print(len(self.variables_frame.index)) self.assertEqual(self.vars_numb, s1.total_variables_number)
self.assertEqual(len(self.variables_frame.index), s1.total_variables_number)
def test_list_of_edges(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
records = self.structure_frame.to_records(index=False)
result = list(records)
for e1, e2 in zip(result, s1.list_of_edges()):
self.assertEqual(e1, e2)
def test_list_of_nodes_labels(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
self.assertEqual(list(self.variables_frame['Name']), s1.list_of_nodes_labels())
def test_get_node_id(self): def test_get_node_id(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
for indx, var in enumerate(list(self.variables_frame['Name'])): for indx, var in enumerate(self.labels):
self.assertEqual(var, s1.get_node_id(indx)) self.assertEqual(var, s1.get_node_id(indx))
def test_get_node_indx(self): def test_get_node_indx(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index) l2 = self.labels[:]
#print(filtered_frame) l2.remove('Y')
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index)) i2 = self.indxs.copy()
for indx, var in zip(filtered_frame.index, filtered_frame['Name']): np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X','Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for indx, var in zip(i2, l2):
self.assertEqual(indx, s1.get_node_indx(var)) self.assertEqual(indx, s1.get_node_indx(var))
def test_list_of_node_indxs(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
# print(filtered_frame)
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
for indx1, indx2 in zip(filtered_frame.index, s1.list_of_nodes_indexes()):
self.assertEqual(indx1, indx2)
def test_get_positional_node_indx(self): def test_get_positional_node_indx(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index) l2 = self.labels[:]
# print(filtered_frame) l2.remove('Y')
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index)) i2 = self.indxs.copy()
for indx, var in enumerate(s1.list_of_nodes_labels()): np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X', 'Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for indx, var in enumerate(s1.nodes_labels):
self.assertEqual(indx, s1.get_positional_node_indx(var)) self.assertEqual(indx, s1.get_positional_node_indx(var))
def test_get_states_number(self): def test_get_states_number(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) l2 = self.labels[:]
for indx, row in self.variables_frame.iterrows(): l2.remove('Y')
self.assertEqual(row[1], s1.get_states_number(row[0])) i2 = self.indxs.copy()
np.delete(i2, 1)
def test_get_states_numeber_by_indx(self): v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X', 'Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for val, node in zip(v2, l2):
self.assertEqual(val, s1.get_states_number(node))
#TODO FORSE QUESTO TEST NON serve verificare se questo metodo sia davvero utile
"""def test_get_states_numeber_by_indx(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
for indx, row in self.variables_frame.iterrows(): for indx, row in self.variables_frame.iterrows():
self.assertEqual(row[1], s1.get_states_number_by_indx(indx)) self.assertEqual(row[1], s1.get_states_number_by_indx(indx))
def test_new_init(self): def test_new_init(self):
#self.variables_frame.drop(self.variables_frame[(self.variables_frame['Name'] == 'Y')].index, inplace=True) #self.variables_frame.drop(self.variables_frame[(self.variables_frame['Name'] == 'Y')].index, inplace=True)
"""labels = self.variables_frame['Name'].to_list() labels = self.variables_frame['Name'].to_list()
indxs = self.variables_frame.index.to_numpy() indxs = self.variables_frame.index.to_numpy()
vals = self.variables_frame['Value'].to_numpy() vals = self.variables_frame['Value'].to_numpy()
edges = list(self.structure_frame.to_records(index=False)) edges = list(self.structure_frame.to_records(index=False))
@ -103,7 +106,7 @@ class TestStructure(unittest.TestCase):
array([3, 9]) array([3, 9])
array([1, 2]) array([1, 2])
array([4, 1, 2]) array([4, 1, 2])
"""
sp1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') sp1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
sp1.build_trajectories() sp1.build_trajectories()
sp1.build_structure() sp1.build_structure()
@ -122,7 +125,7 @@ class TestStructure(unittest.TestCase):
#print(p1.sets_of_cims_struct.get_cims_of_node(0,[1,0])) #print(p1.sets_of_cims_struct.get_cims_of_node(0,[1,0]))
print(p1.sets_of_cims_struct.sets_of_cims[1].actual_cims) print(p1.sets_of_cims_struct.sets_of_cims[1].actual_cims)
#print(p1.sets_of_cims_struct.sets_of_cims[2].get_cims_where_parents_except_last_are_in_state(np.array([0]))) #print(p1.sets_of_cims_struct.sets_of_cims[2].get_cims_where_parents_except_last_are_in_state(np.array([0])))
#print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs) #print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs)"""
if __name__ == '__main__': if __name__ == '__main__':

@ -4,6 +4,7 @@ from line_profiler import LineProfiler
import sample_path as sp import sample_path as sp
import structure_estimator as se import structure_estimator as se
class TestStructureEstimator(unittest.TestCase): class TestStructureEstimator(unittest.TestCase):
@classmethod @classmethod
@ -31,7 +32,7 @@ class TestStructureEstimator(unittest.TestCase):
lp.print_stats() lp.print_stats()
#se1.ctpc_algorithm() #se1.ctpc_algorithm()
print(se1.complete_graph.edges) print(se1.complete_graph.edges)
print(self.s1.structure.list_of_edges()) print(self.s1.structure.edges)
def aux_test_complete_test(self, estimator, test_par, test_child, p_set): def aux_test_complete_test(self, estimator, test_par, test_child, p_set):
estimator.complete_test(test_par, test_child, p_set) estimator.complete_test(test_par, test_child, p_set)

@ -8,7 +8,7 @@ class TestTrajectory(unittest.TestCase):
def test_init(self): def test_init(self):
cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)] cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)]
t1 = tr.Trajectory(cols_list, len(cols_list)) t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[0], t1.times)) self.assertTrue(np.array_equal(cols_list[0], t1.times))
self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1])) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1]))
self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2])) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2]))
@ -19,6 +19,21 @@ class TestTrajectory(unittest.TestCase):
cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])] cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])]
self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list)) self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list))
def test_complete_trajectory(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
complete = np.column_stack((cols_list[1], cols_list[2]))
self.assertTrue(np.array_equal(t1.complete_trajectory, complete))
def test_trajectory(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[1], t1.trajectory.ravel()))
def test_times(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[0], t1.times))
if __name__ == '__main__': if __name__ == '__main__':