1
0
Fork 0

Refactor test_structure and test_jsonImporter

parallel_struct_est
philpMartin 4 years ago
parent 864ffbd319
commit b374d60117
  1. 6
      main_package/classes/cache.py
  2. 141
      main_package/classes/json_importer.py
  3. 16
      main_package/classes/network_graph.py
  4. 49
      main_package/classes/sample_path.py
  5. 92
      main_package/classes/structure.py
  6. 12
      main_package/classes/structure_estimator.py
  7. 24
      main_package/classes/trajectory.py
  8. 13
      main_package/tests/test_json_importer.py
  9. 0
      main_package/tests/test_sample_path.py
  10. 99
      main_package/tests/test_structure.py
  11. 3
      main_package/tests/test_structure_estimator.py
  12. 17
      main_package/tests/test_trajectory.py

@ -8,17 +8,17 @@ class Cache:
self.list_of_sets_of_indxs = []
self.actual_cache = []
def find(self, parents_comb: typing.Set):
def find(self, parents_comb: typing.Union[typing.Set, str]):
try:
#print("Cache State:", self.list_of_sets_of_indxs)
#print("Look For:", parents_comb)
result = self.actual_cache[self.list_of_sets_of_indxs.index(parents_comb)]
print("CACHE HIT!!!!")
print("CACHE HIT!!!!", parents_comb)
return result
except ValueError:
return None
def put(self, parents_comb: typing.Set, socim: sofc.SetOfCims):
def put(self, parents_comb: typing.Union[typing.Set, str], socim: sofc.SetOfCims):
#print("Putting in cache:", parents_comb)
self.list_of_sets_of_indxs.append(parents_comb)
self.actual_cache.append(socim)

@ -4,23 +4,28 @@ import pandas as pd
import json
import typing
from abstract_importer import AbstractImporter
from line_profiler import LineProfiler
class JsonImporter(AbstractImporter):
"""
Implementa l'interfaccia AbstractImporter e aggiunge i metodi necessari a costruire le trajectories e la struttura della rete
del dataset in formato json con la seguente struttura:
Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext.
with the following structure:
[] 0
|_ dyn.cims
|_ dyn.str
|_ samples
|_ variabels
:df_samples_list: lista di dataframe, ogni dataframe contiene una traj
:df_structure: dataframe contenente la struttura della rete
:df_variables: dataframe contenente le infromazioni sulle variabili della rete
:files_path: the path that contains tha data to be imported
:samples_label: the reference key for the samples in the trajectories
:structure_label: the reference key for the structure of the network data
:variables_label: the reference key for the cardinalites of the nodes data
:time_key: the key used to identify the timestamps in each trajectory
:variables_key: the key used to identify the names of the variables in the net
:df_samples_list: a Dataframe list in which every df contains a trajectory
:df_structure: Dataframe containing the structure of the network (edges)
:df_variables: Dataframe containing the nodes cardinalities
:df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list
:sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples
"""
def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
@ -38,6 +43,13 @@ class JsonImporter(AbstractImporter):
super(JsonImporter, self).__init__(files_path)
def import_data(self):
"""
Imports and prepares all data present needed for susequent computation.
Parameters:
void
Returns:
void
"""
raw_data = self.read_json_file()
#self.import_variables(raw_data)
self.import_trajectories(raw_data)
@ -46,14 +58,38 @@ class JsonImporter(AbstractImporter):
self.import_structure(raw_data)
self.import_variables(raw_data, self.sorter)
def import_trajectories(self, raw_data: pd.DataFrame):
def import_trajectories(self, raw_data: typing.List):
"""
Imports the trajectories in the list of dicts raw_data.
Parameters:
:raw_data: List of Dicts
Returns:
void
"""
self.normalize_trajectories(raw_data, 0, self.samples_label)
def import_structure(self, raw_data: pd.DataFrame):
def import_structure(self, raw_data: typing.List):
"""
Imports in a dataframe the data in the list raw_data at the key structure_label
Parameters:
raw_data: the data
Returns:
void
"""
self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
#TODO Attenzione l'ordine delle vars non è alfabetico come nel dataset -> agire di conseguenza
#Ordinando la vars alfabeticamente
def import_variables(self, raw_data: pd.DataFrame, sorter):
def import_variables(self, raw_data: typing.List, sorter: typing.List):
"""
Imports the data in raw_data at the key variables_label.
Sorts the row of the dataframe df_variables using the list sorter.
Parameters:
raw_data: the data
sorter: the list used to sort the dataframe self.df_variables
Returns:
void
"""
self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
#self.sorter = self._df_variables[self.variables_key].to_list()
#self.sorter.sort()
@ -62,16 +98,16 @@ class JsonImporter(AbstractImporter):
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(sorter)
self._df_variables = self._df_variables.sort_values([self.variables_key])
self._df_variables.reset_index(inplace=True)
#print("Var Frame", self._df_variables)
print("Var Frame", self._df_variables)
def read_json_file(self) -> typing.List:
"""
Legge il primo file .json nel path self.filepath
Reads the first json file in the path self.filePath
Parameters:
void
Returns:
:data: il contenuto del file json
data: the contents of the json file
"""
try:
@ -84,39 +120,55 @@ class JsonImporter(AbstractImporter):
except ValueError as err:
print(err.args)
def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame:
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
"""
Estrae i dati innestati di un livello, presenti nel dataset raw_data,
presenti nel json array all'indice indx nel json object key
Extracts the one-level nested data in the list raw_data at the index indx at the key key
Parameters:
:raw_data: il dataset json completo
:indx: l'indice del json array da cui estrarre i dati
:key: il json object da cui estrarre i dati
raw_data: List of Dicts
indx: The index of the array from which the data have to be extracted
key: the key for the Dicts from which exctract data
Returns:
Il dataframe contenente i dati normalizzati
a normalized dataframe
"""
return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str):
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str):
"""
Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key.
Aggiunge le traj estratte nella lista di dataframe self.df_samples_list
Extracts the traj in raw_data at the index index at the key trajectories key.
Adds the extracted traj in the dataframe list self._df_samples_list.
Initializes the list self.sorter.
Parameters:
void
raw_data: the data
indx: the index of the array from which extract data
trajectories_key: the key of the trajectories objects
Returns:
void
"""
self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]]
#for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]):
#self.df_samples_list.append(pd.DataFrame(sample))
#self.sorter = list(self.df_samples_list[0].columns.values)[1:] #TODO Qui ci deve essere la colonna NAME ordinata alfabeticamente
dataframe = pd.DataFrame
smps = raw_data[indx][trajectories_key]
self.df_samples_list = [dataframe(sample) for sample in smps]
columns_header = list(self.df_samples_list[0].columns.values)
columns_header.remove(self.time_key)
self.sorter = columns_header
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame:
"""
Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
Parameters:
sample_frame: the traj to be processed
time_header_label: the label for the times
columns_header: the original header of sample_frame
shifted_cols_header: a copy of columns_header with changed names of the contents
Returns:
sample_frame: the processed dataframe
"""
sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
#print(shifted_cols)
@ -126,15 +178,25 @@ class JsonImporter(AbstractImporter):
return sample_frame
def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
columns_header = list(self.df_samples_list[0].columns.values)
"""
Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list.
Concatenates the result in the dataframe concatanated_samples
Parameters:
time_header_label: the label of the time column
Returns:
void
"""
"""columns_header = list(self.df_samples_list[0].columns.values)
columns_header.remove('Time')
self.sorter = columns_header
self.sorter = columns_header"""
shifted_cols_header = [s + "S" for s in self.sorter]
compute_row_delta = self.compute_row_delta_sigle_samples_frame
"""for indx, sample in enumerate(self.df_samples_list):
self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample,
time_header_label, self.sorter, shifted_cols_header)"""
self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header) for sample in self.df_samples_list]
self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header)
for sample in self.df_samples_list]
self._concatenated_samples = pd.concat(self.df_samples_list)
complete_header = self.sorter[:]
complete_header.insert(0,'Time')
@ -146,10 +208,11 @@ class JsonImporter(AbstractImporter):
def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
"""
Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array
Builds a List containing the columns of dataframe and converts them to a numpy array.
Parameters:
:data_frame: il dataframe da cui estrarre e convertire le colonne
:data_frame: the dataframe from which the columns have to be extracted and converted
Returns:
:columns_list: la lista contenente le colonne convertite in numpyarray
:columns_list: the resulting list of numpy arrays
"""
columns_list = [data_frame[column].to_numpy() for column in data_frame]
@ -159,7 +222,7 @@ class JsonImporter(AbstractImporter):
def clear_concatenated_frame(self):
"""
Rimuove tutti i valori contenuti nei data_frames presenti in df_samples_list
Removes all values in the dataframe concatenated_samples
Parameters:
void
Returns:
@ -168,6 +231,9 @@ class JsonImporter(AbstractImporter):
self._concatenated_samples = self._concatenated_samples.iloc[0:0]
def clear_data_frame_list(self):
"""
Removes all values present in the dataframes in the list df_samples_list
"""
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più #TODO usare list comprens
self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
@ -180,7 +246,6 @@ class JsonImporter(AbstractImporter):
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
return cims_for_all_vars
@property
def concatenated_samples(self):
return self._concatenated_samples

@ -3,7 +3,7 @@ import networkx as nx
import numpy as np
class NetworkGraph():
class NetworkGraph:
"""
Rappresenta il grafo che contiene i nodi e gli archi presenti nell'oggetto Structure graph_struct.
Ogni nodo contine la label node_id, al nodo è anche associato un id numerico progressivo indx che rappresenta la posizione
@ -17,9 +17,9 @@ class NetworkGraph():
def __init__(self, graph_struct):
self.graph_struct = graph_struct
self.graph = nx.DiGraph()
self._nodes_indexes = self.graph_struct.list_of_nodes_indexes()
self._nodes_labels = self.graph_struct.list_of_nodes_labels()
self._nodes_values = self.graph_struct.nodes_values()
self._nodes_indexes = self.graph_struct.nodes_indexes
self._nodes_labels = self.graph_struct.nodes_labels
self._nodes_values = self.graph_struct.nodes_values
self.aggregated_info_about_nodes_parents = None
self._fancy_indexing = None
self._time_scalar_indexing_structure = None
@ -30,7 +30,7 @@ class NetworkGraph():
def init_graph(self):
self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.list_of_edges())
self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_scalar_indexing_structures()
@ -41,7 +41,7 @@ class NetworkGraph():
def add_nodes(self, list_of_nodes):
#self.graph.add_nodes_from(list_of_nodes)
nodes_indxs = self._nodes_indexes
nodes_vals = self.graph_struct.nodes_values()
nodes_vals = self.graph_struct.nodes_values
pos = 0
#print("LIST OF NODES", list_of_nodes)
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
@ -134,7 +134,7 @@ class NetworkGraph():
#parents_indexes_list = self._fancy_indexing
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))"""
nodes_indxs = self.graph_struct.list_of_nodes_indexes()
nodes_indxs = self._nodes_indexes
#print("FINDXING", self._fancy_indexing)
#print("Nodes Indxs", nodes_indxs)
self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
@ -145,7 +145,7 @@ class NetworkGraph():
nodes_number = self.graph_struct.total_variables_number
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))"""
nodes_indxs = self.graph_struct.list_of_nodes_indexes()
nodes_indxs = self._nodes_indexes
self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
for node_indx, p_indxs in zip(nodes_indxs,
self._fancy_indexing)]

@ -6,17 +6,26 @@ import structure as st
class SamplePath:
"""
Contiene l'aggregazione di una o più traiettorie e la struttura della rete.
Ha il compito dato di costruire tutte gli oggetti Trajectory e l'oggetto Structure
a partire dai dataframe contenuti in self.importer
Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
cardinalites.
Has the task of creating the objects that will contain the mentioned data.
:files_path: the path that contains tha data to be imported
:samples_label: the reference key for the samples in the trajectories
:structure_label: the reference key for the structure of the network data
:variables_label: the reference key for the cardinalites of the nodes data
:time_key: the key used to identify the timestamps in each trajectory
:variables_key: the key used to identify the names of the variables in the net
:importer: the Importer objects that will import ad process data
:trajectories: the Trajectory object that will contain all the concatenated trajectories
:structure: the Structure Object that will contain all the structurral infos about the net
:total_variables_count: the number of variables in the net
:importer: l'oggetto Importer che ha il compito di caricare i dataset
:trajectories: lista di oggetti Trajectories
:structure: oggetto Structure
"""
def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key):
def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str):
self.importer = imp.JsonImporter(files_path, samples_label, structure_label,
variables_label, time_key, variables_key)
self._trajectories = None
@ -24,6 +33,15 @@ class SamplePath:
self.total_variables_count = None
def build_trajectories(self):
"""
Builds the Trajectory object that will contain all the trajectories.
Clears all the unsed dataframes in Importer Object
Parameters:
void
Returns:
void
"""
self.importer.import_data()
self._trajectories = \
tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples),
@ -32,12 +50,19 @@ class SamplePath:
self.importer.clear_concatenated_frame()
def build_structure(self):
"""
Builds the Structure object that aggregates all the infos about the net.
Parameters:
void
Returns:
void
"""
self.total_variables_count = len(self.importer.sorter)
labels = self.importer._df_variables['Name'].to_list()
labels = self.importer.variables[self.importer.variables_key].to_list()
#print("SAMPLE PATH LABELS",labels)
indxs = self.importer._df_variables.index.to_numpy()
vals = self.importer._df_variables['Value'].to_numpy()
edges = list(self.importer._df_structure.to_records(index=False))
indxs = self.importer.variables.index.to_numpy()
vals = self.importer.variables['Value'].to_numpy()
edges = list(self.importer.structure.to_records(index=False))
self._structure = st.Structure(labels, indxs, vals, edges,
self.total_variables_count)

@ -1,68 +1,78 @@
import typing as ty
import numpy as np
class Structure:
"""
Contiene tutte il informazioni sulla struttura della rete (connessione dei nodi, valori assumibili dalle variabili)
Contains all the infos about the network structure(nodes names, nodes caridinalites, edges...)
:structure_frame: il dataframe contenente le connessioni dei nodi della rete
:variables_frame: il data_frame contenente i valori assumibili dalle variabili e si suppone il corretto ordinamento
rispetto alle colonne del dataset
:nodes_labels_list: the symbolic names of the variables
:nodes_indexes_arr: the indexes of the nodes
:nodes_vals_arr: the cardinalites of the nodes
:edges_list: the edges of the network
:total_variables_number: the total number of variables in the net
"""
def __init__(self, nodes_label_list, node_indexes_arr, nodes_vals_arr, edges_list, total_variables_number):
#self.structure_frame = structure
#self.variables_frame = variables
self.nodes_labels_list = nodes_label_list
self.nodes_indexes_arr = node_indexes_arr
self.nodes_vals_arr = nodes_vals_arr
self.edges_list = edges_list
self.total_variables_number = total_variables_number
#self.name_label = variables.columns.values[0]
#self.value_label = variables.columns.values[1]
def list_of_edges(self):
def __init__(self, nodes_label_list: ty.List, node_indexes_arr: np.array, nodes_vals_arr: np.array,
edges_list: ty.List, total_variables_number: int):
self._nodes_labels_list = nodes_label_list
self._nodes_indexes_arr = node_indexes_arr
self._nodes_vals_arr = nodes_vals_arr
self._edges_list = edges_list
self._total_variables_number = total_variables_number
@property
def edges(self):
#records = self.structure_frame.to_records(index=False)
#edges_list = list(records)
return self.edges_list
return self._edges_list
def list_of_nodes_labels(self):
return self.nodes_labels_list
@property
def nodes_labels(self):
return self._nodes_labels_list
def list_of_nodes_indexes(self):
return self.nodes_indexes_arr
@property
def nodes_indexes(self):
return self._nodes_indexes_arr
def get_node_id(self, node_indx):
return self.nodes_labels_list[node_indx]
@property
def nodes_values(self):
return self._nodes_vals_arr
def get_node_indx(self, node_id):
return self.nodes_indexes_arr[self.nodes_labels_list.index(node_id)]
@property
def total_variables_number(self):
return self._total_variables_number
def get_positional_node_indx(self, node_id):
return self.nodes_labels_list.index(node_id)
def get_node_id(self, node_indx: int):
return self._nodes_labels_list[node_indx]
def get_states_number(self, node):
#print("node", node)
return self.nodes_vals_arr[self.get_positional_node_indx(node)]
def get_node_indx(self, node_id: str):
pos_indx = self._nodes_labels_list.index(node_id)
return self._nodes_indexes_arr[pos_indx]
def get_states_number_by_indx(self, node_indx):
#print(self.value_label)
#print("Node indx", node_indx)
return self.nodes_vals_arr[node_indx]
def get_positional_node_indx(self, node_id: str):
return self._nodes_labels_list.index(node_id)
def nodes_values(self):
return self.nodes_vals_arr
def get_states_number(self, node: str):
pos_indx = self._nodes_labels_list.index(node)
return self._nodes_vals_arr[pos_indx]
def total_variables_number(self):
return self.total_variables_number
def get_states_number_by_indx(self, node_indx: int):
#print(self.value_label)
#print("Node indx", node_indx)
return self._nodes_vals_arr[node_indx]
def __repr__(self):
return "Variables:\n" + str(self.variables_frame) + "\nEdges: \n" + str(self.structure_frame)
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
"\nEdges: \n" + str(self._edges_list)
def __eq__(self, other):
"""Overrides the default implementation"""
if isinstance(other, Structure):
return self.structure_frame.equals(other.structure_frame) and \
self.variables_frame.equals(other.variables_frame)
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
set(self._edges_list) == set(other._edges_list)
return NotImplemented

@ -15,13 +15,13 @@ class StructureEstimator:
def __init__(self, sample_path, exp_test_alfa, chi_test_alfa):
self.sample_path = sample_path
self.nodes = np.array(self.sample_path.structure.list_of_nodes_labels())
self.nodes = np.array(self.sample_path.structure.nodes_labels)
#print("NODES", self.nodes)
self.nodes_vals = self.sample_path.structure.nodes_vals_arr
self.nodes_indxs = self.sample_path.structure.nodes_indexes_arr
self.nodes_vals = self.sample_path.structure.nodes_values
self.nodes_indxs = self.sample_path.structure.nodes_indexes
#self.nodes_indxs = np.array(range(0,4))
#print("INDXS", self.nodes_indxs)
self.complete_graph = self.build_complete_graph(self.sample_path.structure.list_of_nodes_labels())
self.complete_graph = self.build_complete_graph(self.sample_path.structure.nodes_labels)
self.exp_test_sign = exp_test_alfa
self.chi_test_alfa = chi_test_alfa
self.cache = ch.Cache()
@ -53,11 +53,13 @@ class StructureEstimator:
cims_filter = sorted_parents != test_parent
#print("PARENTS NO FROM MASK", cims_filter)
if not p_set:
print("EMPTY PSET TRYING TO FIND", test_child)
sofc1 = self.cache.find(test_child)
else:
sofc1 = self.cache.find(set(p_set))
if not sofc1:
print("CACHE MISSS SOFC1")
bool_mask1 = np.isin(self.nodes,complete_info)
#print("Bool mask 1", bool_mask1)
l1 = list(self.nodes[bool_mask1])
@ -88,6 +90,7 @@ class StructureEstimator:
#p_set.append(test_parent)
p_set.insert(0, test_parent)
if p_set:
print("FULL PSET TRYING TO FIND", p_set)
#p_set.append(test_parent)
#print("PSET ", p_set)
#set_p_set = set(p_set)
@ -102,6 +105,7 @@ class StructureEstimator:
p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
if not sofc2:
print("Cache MISSS SOFC2")
complete_info.append(test_parent)
bool_mask2 = np.isin(self.nodes, complete_info)
#print("BOOL MASK 2",bool_mask2)

@ -4,13 +4,13 @@ import numpy as np
class Trajectory:
"""
Rappresenta una traiettoria come un numpy_array contenente n-ple (indx, T_k,S_i,.....,Sj)
Offre i metodi utili alla computazione sulla struttura stessa.
Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix
of ints.
Una Trajectory viene costruita a partire da una lista di numpyarray dove ogni elemento rappresenta una colonna
della traj
:actual_trajectory: il numpy_array contenente la successione di n-ple (indx, T_k,S_i,.....,Sj)
:list_of_columns: the list containing the times array and values matrix
:original_cols_numb: total number of cols in the data
:actual_trajectory: the trajectory containing also the duplicated and shifted values
:times: the array containing the time deltas
"""
@ -23,10 +23,22 @@ class Trajectory:
@property
def trajectory(self):
"""
Parameters:
void
Returns:
a numpy matrix containing ONLY the original columns values, not the shifted ones
"""
return self._actual_trajectory[:, :self.original_cols_number]
@property
def complete_trajectory(self):
"""
Parameters:
void
Returns:
a numpy matrix containing all the values
"""
return self._actual_trajectory
@property

@ -5,6 +5,8 @@ import numpy as np
import pandas as pd
import json_importer as ji
from line_profiler import LineProfiler
import os
import json
@ -44,6 +46,7 @@ class TestJsonImporter(unittest.TestCase):
def test_normalize_trajectories(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file()
#print(raw_data)
j1.normalize_trajectories(raw_data, 0, j1.samples_label)
self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label]))
self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter)
@ -51,7 +54,7 @@ class TestJsonImporter(unittest.TestCase):
def test_normalize_trajectories_wrong_indx(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file()
self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 1, j1.samples_label)
self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1.samples_label)
def test_normalize_trajectories_wrong_key(self):
j1 = ji.JsonImporter('../data', 'sample', 'dyn.str', 'variables', 'Time', 'Name')
@ -77,6 +80,7 @@ class TestJsonImporter(unittest.TestCase):
j1.import_trajectories(raw_data)
j1.compute_row_delta_in_all_samples_frames(j1.time_key)
self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values))
self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1.time_key)
def test_clear_data_frame_list(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
@ -112,7 +116,12 @@ class TestJsonImporter(unittest.TestCase):
def test_import_data(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data()
lp = LineProfiler()
lp_wrapper = lp(j1.import_data)
lp_wrapper()
lp.print_stats()
#j1.import_data()
self.assertEqual(list(j1.variables[j1.variables_key]),
list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1.variables_key]) + 1]))
print(j1.variables)

@ -11,71 +11,74 @@ import parameters_estimator as pe
class TestStructure(unittest.TestCase):
def setUp(self):
self.structure_frame = pd.DataFrame([{"From":"X","To":"Z"}, {"From":"Y","To":"Z"},
{"From":"Z","To":"Y"} ])
self.variables_frame = pd.DataFrame([{"Name":"X","Value":3},{"Name":"Y","Value":3},{"Name":"Z","Value":3}])
@classmethod
def setUpClass(cls):
cls.labels = ['X','Y','Z']
cls.indxs = np.array([0,1,2])
cls.vals = np.array([3,3,3])
cls.edges = [('X','Z'),('Y','Z'), ('Z','Y')]
cls.vars_numb = len(cls.labels)
def test_init(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
self.assertTrue(self.structure_frame.equals(s1.structure_frame))
self.assertTrue(self.variables_frame.equals(s1.variables_frame))
self.assertEqual(self.variables_frame.columns.values[0], s1.name_label)
self.assertEqual(self.variables_frame.columns.values[1], s1.value_label)
#print(len(self.variables_frame.index))
self.assertEqual(len(self.variables_frame.index), s1.total_variables_number)
def test_list_of_edges(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
records = self.structure_frame.to_records(index=False)
result = list(records)
for e1, e2 in zip(result, s1.list_of_edges()):
self.assertEqual(e1, e2)
def test_list_of_nodes_labels(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
self.assertEqual(list(self.variables_frame['Name']), s1.list_of_nodes_labels())
s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
self.assertListEqual(self.labels,s1.nodes_labels)
self.assertTrue(np.array_equal(self.indxs, s1.nodes_indexes))
self.assertTrue(np.array_equal(self.vals, s1.nodes_values))
self.assertListEqual(self.edges, s1.edges)
self.assertEqual(self.vars_numb, s1.total_variables_number)
def test_get_node_id(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
for indx, var in enumerate(list(self.variables_frame['Name'])):
s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
for indx, var in enumerate(self.labels):
self.assertEqual(var, s1.get_node_id(indx))
def test_get_node_indx(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
#print(filtered_frame)
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
for indx, var in zip(filtered_frame.index, filtered_frame['Name']):
l2 = self.labels[:]
l2.remove('Y')
i2 = self.indxs.copy()
np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X','Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for indx, var in zip(i2, l2):
self.assertEqual(indx, s1.get_node_indx(var))
def test_list_of_node_indxs(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
# print(filtered_frame)
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
for indx1, indx2 in zip(filtered_frame.index, s1.list_of_nodes_indexes()):
self.assertEqual(indx1, indx2)
def test_get_positional_node_indx(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
# print(filtered_frame)
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
for indx, var in enumerate(s1.list_of_nodes_labels()):
l2 = self.labels[:]
l2.remove('Y')
i2 = self.indxs.copy()
np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X', 'Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for indx, var in enumerate(s1.nodes_labels):
self.assertEqual(indx, s1.get_positional_node_indx(var))
def test_get_states_number(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
for indx, row in self.variables_frame.iterrows():
self.assertEqual(row[1], s1.get_states_number(row[0]))
def test_get_states_numeber_by_indx(self):
l2 = self.labels[:]
l2.remove('Y')
i2 = self.indxs.copy()
np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X', 'Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for val, node in zip(v2, l2):
self.assertEqual(val, s1.get_states_number(node))
#TODO FORSE QUESTO TEST NON serve verificare se questo metodo sia davvero utile
"""def test_get_states_numeber_by_indx(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
for indx, row in self.variables_frame.iterrows():
self.assertEqual(row[1], s1.get_states_number_by_indx(indx))
def test_new_init(self):
#self.variables_frame.drop(self.variables_frame[(self.variables_frame['Name'] == 'Y')].index, inplace=True)
"""labels = self.variables_frame['Name'].to_list()
labels = self.variables_frame['Name'].to_list()
indxs = self.variables_frame.index.to_numpy()
vals = self.variables_frame['Value'].to_numpy()
edges = list(self.structure_frame.to_records(index=False))
@ -103,7 +106,7 @@ class TestStructure(unittest.TestCase):
array([3, 9])
array([1, 2])
array([4, 1, 2])
"""
sp1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
sp1.build_trajectories()
sp1.build_structure()
@ -122,7 +125,7 @@ class TestStructure(unittest.TestCase):
#print(p1.sets_of_cims_struct.get_cims_of_node(0,[1,0]))
print(p1.sets_of_cims_struct.sets_of_cims[1].actual_cims)
#print(p1.sets_of_cims_struct.sets_of_cims[2].get_cims_where_parents_except_last_are_in_state(np.array([0])))
#print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs)
#print(p1.sets_of_cims_struct.sets_of_cims[0].p_combs)"""
if __name__ == '__main__':

@ -4,6 +4,7 @@ from line_profiler import LineProfiler
import sample_path as sp
import structure_estimator as se
class TestStructureEstimator(unittest.TestCase):
@classmethod
@ -31,7 +32,7 @@ class TestStructureEstimator(unittest.TestCase):
lp.print_stats()
#se1.ctpc_algorithm()
print(se1.complete_graph.edges)
print(self.s1.structure.list_of_edges())
print(self.s1.structure.edges)
def aux_test_complete_test(self, estimator, test_par, test_child, p_set):
estimator.complete_test(test_par, test_child, p_set)

@ -8,7 +8,7 @@ class TestTrajectory(unittest.TestCase):
def test_init(self):
cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)]
t1 = tr.Trajectory(cols_list, len(cols_list))
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[0], t1.times))
self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1]))
self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2]))
@ -19,6 +19,21 @@ class TestTrajectory(unittest.TestCase):
cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])]
self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list))
def test_complete_trajectory(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
complete = np.column_stack((cols_list[1], cols_list[2]))
self.assertTrue(np.array_equal(t1.complete_trajectory, complete))
def test_trajectory(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[1], t1.trajectory.ravel()))
def test_times(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[0], t1.times))
if __name__ == '__main__':