1
0
Fork 0

Manual local merging

parallel_struct_est
philpMartin 4 years ago
commit 1bdc8a7231
  1. 23
      main_package/classes/abstract_importer.py
  2. 37
      main_package/classes/abstract_sample_path.py
  3. 50
      main_package/classes/cache.py
  4. 19
      main_package/classes/conditional_intensity_matrix.py
  5. 186
      main_package/classes/json_importer.py
  6. 339
      main_package/classes/network_graph.py
  7. 143
      main_package/classes/parameters_estimator.py
  8. 58
      main_package/classes/sample_path.py
  9. 93
      main_package/classes/set_of_cims.py
  10. 19
      main_package/classes/sets_of_cims_container.py
  11. 101
      main_package/classes/structure.py
  12. 326
      main_package/classes/structure_estimator.py
  13. 29
      main_package/classes/trajectory.py
  14. 1864
      main_package/data/esempio_dataset.csv
  15. 61
      main_package/tests/test_cache.py
  16. 45
      main_package/tests/test_json_importer.py
  17. 178
      main_package/tests/test_networkgraph.py
  18. 92
      main_package/tests/test_parameters_estimator.py
  19. 10
      main_package/tests/test_sample_path.py
  20. 109
      main_package/tests/test_setofcims.py
  21. 105
      main_package/tests/test_structure.py
  22. 81
      main_package/tests/test_structure_estimator.py
  23. 17
      main_package/tests/test_trajectory.py

@ -3,34 +3,21 @@ from abc import ABC, abstractmethod
class AbstractImporter(ABC): class AbstractImporter(ABC):
""" """
Interfaccia che espone i metodi necessari all'importing delle trajectories e della struttura della CTBN Interface that exposes all the necessary methods to import the trajectories and the net structure.
:files_path: il path in cui sono presenti i/il file da importare :file_path: the file path
""" """
def __init__(self, files_path): def __init__(self, file_path: str):
self.files_path = files_path self.file_path = file_path
super().__init__() super().__init__()
@abstractmethod @abstractmethod
def import_trajectories(self, raw_data): def import_trajectories(self, raw_data):
"""
Costruisce le traj partendo dal dataset raw_data
Parameters:
raw_data: il dataset da cui estrarre le traj
Returns:
void
"""
pass pass
@abstractmethod @abstractmethod
def import_structure(self, raw_data): def import_structure(self, raw_data):
"""
Costruisce la struttura della rete partendo dal dataset raw_data
Parameters:
raw_data: il dataset da cui estrarre la struttura
Returns:
void
"""
pass pass

@ -0,0 +1,37 @@
from abc import ABC, abstractmethod
import abstract_importer as ai
class AbstractSamplePath(ABC):
def __init__(self, importer: ai.AbstractImporter):
self.importer = importer
self._trajectories = None
self._structure = None
super().__init__()
@abstractmethod
def build_trajectories(self):
"""
Builds the Trajectory object that will contain all the trajectories.
Assigns the Trajectoriy object to the instance attribute _trajectories
Clears all the unused dataframes in Importer Object
Parameters:
void
Returns:
void
"""
pass
@abstractmethod
def build_structure(self):
"""
Builds the Structure object that aggregates all the infos about the net.
Assigns the Structure object to the instance attribuite _structure
Parameters:
void
Returns:
void
"""
pass

@ -3,23 +3,59 @@ import set_of_cims as sofc
class Cache: class Cache:
"""
This class has the role of a cache for SetOfCIMS of a test node that have been already computed during the ctpc algorithm.
:list_of_sets_of_parents: a list of Sets of the parents to which the cim in cache at SAME index is related
:actual_cache: a list of setOfCims objects
"""
def __init__(self): def __init__(self):
self.list_of_sets_of_indxs = [] self.list_of_sets_of_parents = []
self.actual_cache = [] self.actual_cache = []
def find(self, parents_comb: typing.Set): def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
"""
Tries to find in cache given the symbolic parents combination parents_comb the SetOfCims related to that parents_comb.
Parameters:
parents_comb: the parents related to that SetOfCims
Returns:
A SetOfCims object if the parents_comb index is found in list_of_sets_of_parents.
None otherwise.
"""
try: try:
result = self.actual_cache[self.list_of_sets_of_indxs.index(parents_comb)] #print("Cache State:", self.list_of_sets_of_indxs)
print("CACHE HIT!!!!") #print("Look For:", parents_comb)
result = self.actual_cache[self.list_of_sets_of_parents.index(parents_comb)]
print("CACHE HIT!!!!", parents_comb)
return result return result
except ValueError: except ValueError:
return None return None
def put(self, parents_comb: typing.Set, socim: sofc.SetOfCims): def put(self, parents_comb: typing.Union[typing.Set, str], socim: sofc.SetOfCims):
self.list_of_sets_of_indxs.append(parents_comb) """
Place in cache the SetOfCims object, and the related sybolyc index parents_comb in list_of_sets_of_parents
Parameters:
parents_comb: the symbolic set index
socim: the related SetOfCims object
Returns:
void
"""
#print("Putting in cache:", parents_comb)
self.list_of_sets_of_parents.append(parents_comb)
self.actual_cache.append(socim) self.actual_cache.append(socim)
def clear(self): def clear(self):
del self.list_of_sets_of_indxs[:] """
Clear the contents of both caches.
Parameters:
void
Returns:
void
"""
del self.list_of_sets_of_parents[:]
del self.actual_cache[:] del self.actual_cache[:]

@ -2,13 +2,28 @@ import numpy as np
class ConditionalIntensityMatrix: class ConditionalIntensityMatrix:
def __init__(self, state_residence_times, state_transition_matrix): """
Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector
and state transition matrix and the actual CIM matrix.
:_state_residence_times: state residence times vector
:_state_transition_matrix: the transitions count matrix
:_cim: the actual cim of the node
"""
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
self._state_residence_times = state_residence_times self._state_residence_times = state_residence_times
self._state_transition_matrix = state_transition_matrix self._state_transition_matrix = state_transition_matrix
#self.cim = np.zeros(shape=(dimension, dimension), dtype=float)
self._cim = self.state_transition_matrix.astype(np.float64) self._cim = self.state_transition_matrix.astype(np.float64)
def compute_cim_coefficients(self): def compute_cim_coefficients(self):
"""
Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]
Parameters:
void
Returns:
void
"""
np.fill_diagonal(self._cim, self._cim.diagonal() * -1) np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T

@ -1,29 +1,33 @@
import os
import glob
import pandas as pd import pandas as pd
import json import json
import typing import typing
from abstract_importer import AbstractImporter from abstract_importer import AbstractImporter
from line_profiler import LineProfiler
class JsonImporter(AbstractImporter): class JsonImporter(AbstractImporter):
""" """
Implementa l'interfaccia AbstractImporter e aggiunge i metodi necessari a costruire le trajectories e la struttura della rete Implements the Interface AbstractImporter and adds all the necessary methods to process and prepare the data in json ext.
del dataset in formato json con la seguente struttura: with the following structure:
[] 0 [] 0
|_ dyn.cims |_ dyn.cims
|_ dyn.str |_ dyn.str
|_ samples |_ samples
|_ variabels |_ variabels
:file_path: the path of the file that contains tha data to be imported
:df_samples_list: lista di dataframe, ogni dataframe contiene una traj :samples_label: the reference key for the samples in the trajectories
:df_structure: dataframe contenente la struttura della rete :structure_label: the reference key for the structure of the network data
:df_variables: dataframe contenente le infromazioni sulle variabili della rete :variables_label: the reference key for the cardinalites of the nodes data
:time_key: the key used to identify the timestamps in each trajectory
:variables_key: the key used to identify the names of the variables in the net
:df_samples_list: a Dataframe list in which every df contains a trajectory
:df_structure: Dataframe containing the structure of the network (edges)
:df_variables: Dataframe containing the nodes cardinalities
:df_concatenated_samples: the concatenation and processing of all the trajectories present in the list df_samples list
:sorter: the columns header(excluding the time column) of the Dataframe concatenated_samples
""" """
def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str): variables_key: str):
self.samples_label = samples_label self.samples_label = samples_label
self.structure_label = structure_label self.structure_label = structure_label
@ -35,85 +39,135 @@ class JsonImporter(AbstractImporter):
self._df_variables = pd.DataFrame() self._df_variables = pd.DataFrame()
self._concatenated_samples = None self._concatenated_samples = None
self.sorter = None self.sorter = None
super(JsonImporter, self).__init__(files_path) super(JsonImporter, self).__init__(file_path)
def import_data(self): def import_data(self):
"""
Imports and prepares all data present needed for susequent computation.
Parameters:
void
Returns:
void
"""
raw_data = self.read_json_file() raw_data = self.read_json_file()
self.import_variables(raw_data)
self.import_trajectories(raw_data) self.import_trajectories(raw_data)
self.compute_row_delta_in_all_samples_frames(self.time_key) self.compute_row_delta_in_all_samples_frames(self.time_key)
self.clear_data_frame_list() self.clear_data_frame_list()
self.import_structure(raw_data) self.import_structure(raw_data)
# self.import_variables(raw_data, self.sorter) self.import_variables(raw_data, self.sorter)
def import_trajectories(self, raw_data: pd.DataFrame): def import_trajectories(self, raw_data: typing.List):
"""
Imports the trajectories in the list of dicts raw_data.
Parameters:
:raw_data: List of Dicts
Returns:
void
"""
self.normalize_trajectories(raw_data, 0, self.samples_label) self.normalize_trajectories(raw_data, 0, self.samples_label)
def import_structure(self, raw_data: pd.DataFrame): def import_structure(self, raw_data: typing.List):
"""
Imports in a dataframe the data in the list raw_data at the key structure_label
Parameters:
raw_data: the data
Returns:
void
"""
self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label) self._df_structure = self.one_level_normalizing(raw_data, 0, self.structure_label)
def import_variables(self, raw_data: pd.DataFrame):
def import_variables(self, raw_data: typing.List, sorter: typing.List):
"""
Imports the data in raw_data at the key variables_label.
Sorts the row of the dataframe df_variables using the list sorter.
Parameters:
raw_data: the data
sorter: the list used to sort the dataframe self.df_variables
Returns:
void
"""
self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label) self._df_variables = self.one_level_normalizing(raw_data, 0, self.variables_label)
self.sorter = self._df_variables[self.variables_key].to_list() #self.sorter = self._df_variables[self.variables_key].to_list()
self.sorter.sort() #self.sorter.sort()
print("Sorter:", self.sorter) #print("Sorter:", self.sorter)
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category") self._df_variables[self.variables_key] = self._df_variables[self.variables_key].astype("category")
self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(self.sorter) self._df_variables[self.variables_key] = self._df_variables[self.variables_key].cat.set_categories(self.sorter)
self._df_variables = self._df_variables.sort_values([self.variables_key]) self._df_variables = self._df_variables.sort_values([self.variables_key])
self._df_variables.reset_index(inplace=True)
print("Var Frame", self._df_variables)
def read_json_file(self) -> typing.List: def read_json_file(self) -> typing.List:
""" """
Legge il primo file .json nel path self.filepath Reads the first json file in the path self.filePath
Parameters: Parameters:
void void
Returns: Returns:
:data: il contenuto del file json data: the contents of the json file
""" """
try: #try:
read_files = glob.glob(os.path.join(self.files_path, "*.json")) #read_files = glob.glob(os.path.join(self.files_path, "*.json"))
if not read_files: #if not read_files:
raise ValueError('No .json file found in the entered path!') #raise ValueError('No .json file found in the entered path!')
with open(read_files[0]) as f: with open(self.file_path) as f:
data = json.load(f) data = json.load(f)
return data return data
except ValueError as err: #except ValueError as err:
print(err.args) #print(err.args)
def one_level_normalizing(self, raw_data: pd.DataFrame, indx: int, key: str) -> pd.DataFrame: def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
""" """
Estrae i dati innestati di un livello, presenti nel dataset raw_data, Extracts the one-level nested data in the list raw_data at the index indx at the key key
presenti nel json array all'indice indx nel json object key
Parameters: Parameters:
:raw_data: il dataset json completo raw_data: List of Dicts
:indx: l'indice del json array da cui estrarre i dati indx: The index of the array from which the data have to be extracted
:key: il json object da cui estrarre i dati key: the key for the Dicts from which exctract data
Returns: Returns:
Il dataframe contenente i dati normalizzati a normalized dataframe
""" """
return pd.DataFrame(raw_data[indx][key]) return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data: pd.DataFrame, indx: int, trajectories_key: str): def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str):
""" """
Estrae le traiettorie presenti in rawdata nel json array all'indice indx, nel json object trajectories_key. Extracts the traj in raw_data at the index index at the key trajectories key.
Aggiunge le traj estratte nella lista di dataframe self.df_samples_list Adds the extracted traj in the dataframe list self._df_samples_list.
Initializes the list self.sorter.
Parameters: Parameters:
void raw_data: the data
indx: the index of the array from which extract data
trajectories_key: the key of the trajectories objects
Returns: Returns:
void void
""" """
self.df_samples_list = [pd.DataFrame(sample) for sample in raw_data[indx][trajectories_key]] dataframe = pd.DataFrame
#for sample_indx, sample in enumerate(raw_data[indx][trajectories_key]): smps = raw_data[indx][trajectories_key]
#self.df_samples_list.append(pd.DataFrame(sample)) self.df_samples_list = [dataframe(sample) for sample in smps]
#self.sorter = list(self.df_samples_list[0].columns.values)[1:] columns_header = list(self.df_samples_list[0].columns.values)
columns_header.remove(self.time_key)
self.sorter = columns_header
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str, def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, time_header_label: str,
columns_header: typing.List, shifted_cols_header: typing.List) \ columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame: -> pd.DataFrame:
"""
Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
Parameters:
sample_frame: the traj to be processed
time_header_label: the label for the times
columns_header: the original header of sample_frame
shifted_cols_header: a copy of columns_header with changed names of the contents
Returns:
sample_frame: the processed dataframe
"""
sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1) sample_frame[time_header_label] = sample_frame[time_header_label].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
#print(shifted_cols) #print(shifted_cols)
@ -123,28 +177,40 @@ class JsonImporter(AbstractImporter):
return sample_frame return sample_frame
def compute_row_delta_in_all_samples_frames(self, time_header_label: str): def compute_row_delta_in_all_samples_frames(self, time_header_label: str):
#columns_header = list(self.df_samples_list[0].columns.values) """
#self.sorter = columns_header[1:] Calls the method compute_row_delta_sigle_samples_frame on every dataframe present in the list self.df_samples_list.
Concatenates the result in the dataframe concatanated_samples
Parameters:
time_header_label: the label of the time column
Returns:
void
"""
"""columns_header = list(self.df_samples_list[0].columns.values)
columns_header.remove('Time')
self.sorter = columns_header"""
shifted_cols_header = [s + "S" for s in self.sorter] shifted_cols_header = [s + "S" for s in self.sorter]
for indx, sample in enumerate(self.df_samples_list): compute_row_delta = self.compute_row_delta_sigle_samples_frame
"""for indx, sample in enumerate(self.df_samples_list):
self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample, self.df_samples_list[indx] = self.compute_row_delta_sigle_samples_frame(sample,
time_header_label, self.sorter, shifted_cols_header) time_header_label, self.sorter, shifted_cols_header)"""
self.df_samples_list = [compute_row_delta(sample, time_header_label, self.sorter, shifted_cols_header)
for sample in self.df_samples_list]
self._concatenated_samples = pd.concat(self.df_samples_list) self._concatenated_samples = pd.concat(self.df_samples_list)
complete_header = self.sorter[:] complete_header = self.sorter[:]
complete_header.insert(0, 'Time') complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header) complete_header.extend(shifted_cols_header)
print("Complete Header", complete_header) #print("Complete Header", complete_header)
self._concatenated_samples = self._concatenated_samples[complete_header] self._concatenated_samples = self._concatenated_samples[complete_header]
print("Concat Samples", self._concatenated_samples) #print("Concat Samples",self._concatenated_samples)
def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List: def build_list_of_samples_array(self, data_frame: pd.DataFrame) -> typing.List:
""" """
Costruisce una lista contenente le colonne presenti nel dataframe data_frame convertendole in numpy_array Builds a List containing the columns of dataframe and converts them to a numpy array.
Parameters: Parameters:
:data_frame: il dataframe da cui estrarre e convertire le colonne :data_frame: the dataframe from which the columns have to be extracted and converted
Returns: Returns:
:columns_list: la lista contenente le colonne convertite in numpyarray :columns_list: the resulting list of numpy arrays
""" """
columns_list = [data_frame[column].to_numpy() for column in data_frame] columns_list = [data_frame[column].to_numpy() for column in data_frame]
#for column in data_frame: #for column in data_frame:
@ -153,7 +219,7 @@ class JsonImporter(AbstractImporter):
def clear_concatenated_frame(self): def clear_concatenated_frame(self):
""" """
Rimuove tutti i valori contenuti nei data_frames presenti in df_samples_list Removes all values in the dataframe concatenated_samples
Parameters: Parameters:
void void
Returns: Returns:
@ -162,10 +228,13 @@ class JsonImporter(AbstractImporter):
self._concatenated_samples = self._concatenated_samples.iloc[0:0] self._concatenated_samples = self._concatenated_samples.iloc[0:0]
def clear_data_frame_list(self): def clear_data_frame_list(self):
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più """
Removes all values present in the dataframes in the list df_samples_list
"""
for indx in range(len(self.df_samples_list)): # Le singole traj non servono più #TODO usare list comprens
self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0] self.df_samples_list[indx] = self.df_samples_list[indx].iloc[0:0]
def import_sampled_cims(self, raw_data: pd.DataFrame, indx: int, cims_key: str) -> typing.Dict: def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
cims_for_all_vars = {} cims_for_all_vars = {}
for var in raw_data[indx][cims_key]: for var in raw_data[indx][cims_key]:
sampled_cims_list = [] sampled_cims_list = []
@ -174,7 +243,6 @@ class JsonImporter(AbstractImporter):
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
return cims_for_all_vars return cims_for_all_vars
@property @property
def concatenated_samples(self): def concatenated_samples(self):
return self._concatenated_samples return self._concatenated_samples

@ -1,233 +1,280 @@
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import typing
class NetworkGraph:
class NetworkGraph():
""" """
Rappresenta il grafo che contiene i nodi e gli archi presenti nell'oggetto Structure graph_struct. Abstracts the infos contained in the Structure class in the form of a directed graph.
Ogni nodo contine la label node_id, al nodo è anche associato un id numerico progressivo indx che rappresenta la posizione Has the task of creating all the necessary filtering structures for parameters estimation
dei sui valori nella colonna indx della traj
:graph_struct: the Structure object from which infos about the net will be extracted
:graph_struct: l'oggetto Structure da cui estrarre i dati per costruire il grafo graph :graph: directed graph
:graph: il grafo :nodes_labels: the symbolic names of the variables
:nodes_indexes: the indexes of the nodes
:nodes_values: the cardinalites of the nodes
:aggregated_info_about_nodes_parents: a structure that contains all the necessary infos about every parents of every
node in the net
:_fancy_indexing: the indexes of every parent of every node in the net
:_time_scalar_indexing_structure: the indexing structure for state res time estimation
:_transition_scalar_indexing_structure: the indexing structure for transition computation
:_time_filtering: the columns filtering structure used in the computation of the state res times
:_transition_filtering: the columns filtering structure used in the computation of the transition from one state to another
:self._p_combs_structure: all the possibile parents states combination for every node in the net
""" """
def __init__(self, graph_struct): def __init__(self, graph_struct):
self.graph_struct = graph_struct self.graph_struct = graph_struct
self.graph = nx.DiGraph() self.graph = nx.DiGraph()
self._nodes_indexes = self.graph_struct.list_of_nodes_indexes() self._nodes_indexes = self.graph_struct.nodes_indexes
self._nodes_labels = self.graph_struct.list_of_nodes_labels() self._nodes_labels = self.graph_struct.nodes_labels
self._nodes_values = self.graph_struct.nodes_values
self.aggregated_info_about_nodes_parents = None self.aggregated_info_about_nodes_parents = None
self._fancy_indexing = None self._fancy_indexing = None
self._time_scalar_indexing_structure = None self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = None self._transition_scalar_indexing_structure = None
self._time_filtering = None self._time_filtering = None
self._transition_filtering = None self._transition_filtering = None
self._p_combs_structure = None
def init_graph(self): def init_graph(self):
self.add_nodes(self.graph_struct.list_of_nodes_labels()) self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.list_of_edges()) self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0) self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_scalar_indexing_structures() self.build_scalar_indexing_structures()
#self.build_time_scalar_indexing_structure()
self.build_time_columns_filtering_structure() self.build_time_columns_filtering_structure()
#self.build_transition_scalar_indexing_structure()
self.build_transition_columns_filtering_structure() self.build_transition_columns_filtering_structure()
self._p_combs_structure = self.build_p_combs_structure()
def fast_init(self, node_id: str):
"""
Initializes all the necessary structures for parameters estimation of the node identified by the label node_id
Parameters:
node_id: the label of the node
Returns:
void
"""
self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id)
self._fancy_indexing = self.aggregated_info_about_nodes_parents[1]
p_indxs = self._fancy_indexing
p_vals = self.aggregated_info_about_nodes_parents[2]
self._time_scalar_indexing_structure = self.build_time_scalar_indexing_structure_for_a_node(node_id,
p_vals)
self._transition_scalar_indexing_structure = self.build_transition_scalar_indexing_structure_for_a_node(node_id,
p_vals)
node_indx = self.get_node_indx(node_id)
self._time_filtering = self.build_time_columns_filtering_for_a_node(node_indx, p_indxs)
self._transition_filtering = self.build_transition_filtering_for_a_node(node_indx, p_indxs)
self._p_combs_structure = self.build_p_comb_structure_for_a_node(p_vals)
def add_nodes(self, list_of_nodes): def add_nodes(self, list_of_nodes: typing.List):
#self.graph.add_nodes_from(list_of_nodes) """
set_node_attr = nx.set_node_attributes Adds the nodes to the graph contained in the list of nodes list_of_nodes.
nodes_indxs = self.graph_struct.list_of_nodes_indexes() Sets all the properties that identify a nodes (index, positional index, cardinality)
nodes_vals = self.graph_struct.nodes_values()
Parameters:
list_of_nodes: the nodes to add to graph
Returns:
void
"""
nodes_indxs = self._nodes_indexes
nodes_vals = self.graph_struct.nodes_values
pos = 0 pos = 0
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
self.graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) self.graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
pos += 1 pos += 1
#set_node_attr(self.graph, {id:node_indx}, 'indx')
def add_edges(self, list_of_edges): def add_edges(self, list_of_edges: typing.List):
"""
Add the edges to the graph contained in the list list_of_edges.
Parameters:
list_of_edges
Returns:
void
"""
self.graph.add_edges_from(list_of_edges) self.graph.add_edges_from(list_of_edges)
def get_ordered_by_indx_set_of_parents(self, node): def get_ordered_by_indx_set_of_parents(self, node: str):
"""
Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
(parents_labels, parents_indexes, parents_cardinalities).
N.B. The parent set is sorted using the list of sorted nodes nodes
Parameters:
node: the label of the node
Returns:
a tuple containing all the parent set infos
"""
parents = self.get_parents_by_id(node) parents = self.get_parents_by_id(node)
nodes = self.get_nodes() nodes = self._nodes_labels
sorted_parents = [x for _, x in sorted(zip(nodes, parents))] d = {v: i for i, v in enumerate(nodes)}
#p_indxes= [] sorted_parents = sorted(parents, key=lambda v: d[v])
#p_values = []
get_node_indx = self.get_node_indx get_node_indx = self.get_node_indx
get_states_number_by_indx = self.get_states_number_by_indx
p_indxes = [get_node_indx(node) for node in sorted_parents] p_indxes = [get_node_indx(node) for node in sorted_parents]
p_values = [get_states_number_by_indx(indx) for indx in p_indxes] p_values = [self.get_states_number(node) for node in sorted_parents]
"""for n in parents:
#indx = self.graph_struct.get_node_indx(n)
#print(indx)
#ordered_set[n] = indx
node_indx = self.get_node_indx(n)
p_indxes.append(node_indx)
#p_values.append(self.graph_struct.get_states_number(n))
p_values.append(self.get_states_number_by_indx(node_indx))"""
#ordered_set = (sorted_parents, p_indxes, p_values)
return (sorted_parents, p_indxes, p_values) return (sorted_parents, p_indxes, p_values)
def get_ord_set_of_par_of_all_nodes(self): def get_ord_set_of_par_of_all_nodes(self):
#result = []
#for node in self._nodes_labels:
#result.append(self.get_ordered_by_indx_set_of_parents(node))
get_ordered_by_indx_set_of_parents = self.get_ordered_by_indx_set_of_parents get_ordered_by_indx_set_of_parents = self.get_ordered_by_indx_set_of_parents
result = [get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels] result = [get_ordered_by_indx_set_of_parents(node) for node in self._nodes_labels]
return result return result
"""def get_ordered_by_indx_parents_values(self, node):
parents_values = []
parents = self.get_ordered_by_indx_set_of_parents(node)
for n in parents:
parents_values.append(self.graph_struct.get_states_number(n))
return parents_values"""
def get_ordered_by_indx_parents_values_for_all_nodes(self): def get_ordered_by_indx_parents_values_for_all_nodes(self):
"""result = []
for node in self._nodes_labels:
result.append(self.get_ordered_by_indx_parents_values(node))
return result"""
pars_values = [i[2] for i in self.aggregated_info_about_nodes_parents] pars_values = [i[2] for i in self.aggregated_info_about_nodes_parents]
return pars_values return pars_values
def get_states_number_of_all_nodes_sorted(self):
#states_number_list = []
#for node in self._nodes_labels:
#states_number_list.append(self.get_states_number(node))
get_states_number = self.get_states_number
states_number_list = [get_states_number(node) for node in self._nodes_labels]
return states_number_list
def build_fancy_indexing_structure(self, start_indx): def build_fancy_indexing_structure(self, start_indx):
"""list_of_parents_list = self.get_ord_set_of_par_of_all_nodes()
#print(list_of_parents_list)
index_structure = []
for i, list_of_parents in enumerate(list_of_parents_list):
indexes_for_a_node = []
for j, node in enumerate(list_of_parents):
indexes_for_a_node.append(self.get_node_indx(node) + start_indx)
index_structure.append(np.array(indexes_for_a_node, dtype=np.int))
#print(index_structure)
return index_structure"""
if start_indx > 0: if start_indx > 0:
pass pass
else: else:
fancy_indx = [i[1] for i in self.aggregated_info_about_nodes_parents] fancy_indx = [i[1] for i in self.aggregated_info_about_nodes_parents]
return fancy_indx return fancy_indx
def build_time_scalar_indexing_structure_for_a_node(self, node_id: str, parents_vals: typing.List) -> np.ndarray:
"""
Builds an indexing structure for the computation of state residence times values.
Parameters:
node_id: the node label
parents_vals: the caridinalites of the node's parents
Returns:
a numpy array.
def build_time_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs): """
#print(node_indx) T_vector = np.array([self.get_states_number(node_id)])
#print("Parents_id", parents_indxs) T_vector = np.append(T_vector, parents_vals)
#T_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)])
get_states_number_by_indx = self.graph_struct.get_states_number_by_indx
T_vector = np.array([get_states_number_by_indx(node_indx)])
#print(T_vector)
#T_vector = np.append(T_vector, [get_states_number_by_indx(x) for x in parents_indxs])
T_vector = np.append(T_vector, parents_indxs)
#print(T_vector)
T_vector = T_vector.cumprod().astype(np.int) T_vector = T_vector.cumprod().astype(np.int)
return T_vector return T_vector
#print(T_vector)
def build_time_scalar_indexing_structure(self):
#parents_indexes_list = self._fancy_indexing def build_transition_scalar_indexing_structure_for_a_node(self, node_id: str, parents_vals: typing.List) -> np.ndarray:
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): """
self._time_scalar_indexing_structure.append( Builds an indexing structure for the computation of state transitions values.
self.build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs))"""
build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node Parameters:
self._time_scalar_indexing_structure = [build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs) node_id: the node label
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), parents_vals: the caridinalites of the node's parents
self.get_ordered_by_indx_parents_values_for_all_nodes())] Returns:
a numpy array.
def build_transition_scalar_indexing_structure_for_a_node(self, node_indx, parents_indxs):
#M_vector = np.array([self.graph_struct.variables_frame.iloc[node_id, 1], """
#self.graph_struct.variables_frame.iloc[node_id, 1].astype(np.int)]) node_states_number = self.get_states_number(node_id)
node_states_number = self.get_states_number_by_indx(node_indx)
get_states_number_by_indx = self.graph_struct.get_states_number_by_indx
M_vector = np.array([node_states_number, M_vector = np.array([node_states_number,
node_states_number]) node_states_number])
#M_vector = np.append(M_vector, [get_states_number_by_indx(x) for x in parents_indxs]) M_vector = np.append(M_vector, parents_vals)
M_vector = np.append(M_vector, parents_indxs)
M_vector = M_vector.cumprod().astype(np.int) M_vector = M_vector.cumprod().astype(np.int)
return M_vector return M_vector
def build_transition_scalar_indexing_structure(self): def build_time_columns_filtering_for_a_node(self, node_indx: int, p_indxs: typing.List) -> np.ndarray:
#parents_indexes_list = self._fancy_indexing """
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): Builds the necessary structure to filter the desired columns indicated by node_indx and p_indxs in the dataset.
self._transition_scalar_indexing_structure.append( This structute will be used in the computation of the state res times.
self.build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs))""" Parameters:
build_transition_scalar_indexing_structure_for_a_node = self.build_transition_scalar_indexing_structure_for_a_node node_indx: the index of the node
self._transition_scalar_indexing_structure = \ p_indxs: the indexes of the node's parents
[build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs) Returns:
for node_indx, p_indxs in a numpy array
zip(self.graph_struct.list_of_nodes_indexes(), """
self.get_ordered_by_indx_parents_values_for_all_nodes())] return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
def build_transition_filtering_for_a_node(self, node_indx, p_indxs) -> np.ndarray:
"""
Builds the necessary structure to filter the desired columns indicated by node_indx and p_indxs in the dataset.
This structute will be used in the computation of the state transitions values.
Parameters:
node_indx: the index of the node
p_indxs: the indexes of the node's parents
Returns:
a numpy array
"""
nodes_number = self.graph_struct.total_variables_number
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
def build_p_comb_structure_for_a_node(self, parents_values: typing.List) -> np.ndarray:
"""
Builds the combinatory structure that contains the combinations of all the values contained in parents_values.
Parameters:
parents_values: the cardinalities of the nodes
Returns:
a numpy matrix containinga grid of the combinations
"""
tmp = []
for val in parents_values:
tmp.append([x for x in range(val)])
if len(parents_values) > 0:
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
if len(parents_values) > 1:
tmp_comb = parents_comb[:, 1].copy()
parents_comb[:, 1] = parents_comb[:, 0].copy()
parents_comb[:, 0] = tmp_comb
else:
parents_comb = np.array([[]], dtype=np.int)
return parents_comb
def build_time_columns_filtering_structure(self): def build_time_columns_filtering_structure(self):
#parents_indexes_list = self._fancy_indexing nodes_indxs = self._nodes_indexes
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing):
self._time_filtering.append(np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int))"""
self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) self._time_filtering = [np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing)] for node_indx, p_indxs in zip(nodes_indxs, self._fancy_indexing)]
def build_transition_columns_filtering_structure(self): def build_transition_columns_filtering_structure(self):
#parents_indexes_list = self._fancy_indexing
nodes_number = self.graph_struct.total_variables_number nodes_number = self.graph_struct.total_variables_number
"""for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), self._fancy_indexing): nodes_indxs = self._nodes_indexes
self._transition_filtering.append(np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int))"""
self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) self._transition_filtering = [np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
for node_indx, p_indxs in zip(self.graph_struct.list_of_nodes_indexes(), for node_indx, p_indxs in zip(nodes_indxs,
self._fancy_indexing)] self._fancy_indexing)]
def build_scalar_indexing_structures(self): def build_scalar_indexing_structures(self):
parents_values_for_all_nodes = self.get_ordered_by_indx_parents_values_for_all_nodes() parents_values_for_all_nodes = self.get_ordered_by_indx_parents_values_for_all_nodes()
build_transition_scalar_indexing_structure_for_a_node = self.build_transition_scalar_indexing_structure_for_a_node build_transition_scalar_indexing_structure_for_a_node = self.build_transition_scalar_indexing_structure_for_a_node
build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node build_time_scalar_indexing_structure_for_a_node = self.build_time_scalar_indexing_structure_for_a_node
aggr = [(build_transition_scalar_indexing_structure_for_a_node(node_indx, p_indxs), aggr = [(build_transition_scalar_indexing_structure_for_a_node(node_id, p_vals),
build_time_scalar_indexing_structure_for_a_node(node_indx, p_indxs)) build_time_scalar_indexing_structure_for_a_node(node_id, p_vals))
for node_indx, p_indxs in for node_id, p_vals in
zip(self.graph_struct.list_of_nodes_indexes(), zip(self._nodes_labels,
parents_values_for_all_nodes)] parents_values_for_all_nodes)]
self._transition_scalar_indexing_structure = [i[0] for i in aggr] self._transition_scalar_indexing_structure = [i[0] for i in aggr]
self._time_scalar_indexing_structure = [i[1] for i in aggr] self._time_scalar_indexing_structure = [i[1] for i in aggr]
def get_nodes(self): def build_p_combs_structure(self):
return list(self.graph.nodes) parents_values_for_all_nodes = self.get_ordered_by_indx_parents_values_for_all_nodes()
p_combs_struct = [self.build_p_comb_structure_for_a_node(p_vals) for p_vals in parents_values_for_all_nodes]
def get_edges(self): return p_combs_struct
return list(self.graph.edges)
def get_nodes_sorted_by_indx(self):
return self.graph_struct.list_of_nodes_labels()
def get_parents_by_id(self, node_id): def get_parents_by_id(self, node_id):
return list(self.graph.predecessors(node_id)) return list(self.graph.predecessors(node_id))
def get_states_number(self, node_id): def get_states_number(self, node_id):
#return self.graph_struct.get_states_number(node_id)
return self.graph.nodes[node_id]['val'] return self.graph.nodes[node_id]['val']
def get_states_number_by_indx(self, node_indx):
return self.graph_struct.get_states_number_by_indx(node_indx)
def get_node_by_index(self, node_indx):
return self.graph_struct.get_node_id(node_indx)
def get_node_indx(self, node_id): def get_node_indx(self, node_id):
return nx.get_node_attributes(self.graph, 'indx')[node_id] return nx.get_node_attributes(self.graph, 'indx')[node_id]
#return self.graph_struct.get_node_indx(node_id)
def get_positional_node_indx(self, node_id): def get_positional_node_indx(self, node_id):
return self.graph.nodes[node_id]['pos_indx'] return self.graph.nodes[node_id]['pos_indx']
@property
def nodes(self):
return self._nodes_labels
@property
def edges(self):
return list(self.graph.edges)
@property
def nodes_indexes(self):
return self._nodes_indexes
@property
def nodes_values(self):
return self._nodes_values
@property @property
def time_scalar_indexing_strucure(self): def time_scalar_indexing_strucure(self):
return self._time_scalar_indexing_structure return self._time_scalar_indexing_structure
@ -244,16 +291,8 @@ class NetworkGraph():
def transition_filtering(self): def transition_filtering(self):
return self._transition_filtering return self._transition_filtering
"""def remove_node(self, node_id): @property
node_indx = self.get_node_indx(node_id) def p_combs(self):
self.graph_struct.remove_node(node_id) return self._p_combs_structure
self.graph.remove_node(node_id)
del self._fancy_indexing[node_indx]
del self._time_filtering[node_indx]
del self._nodes_labels[node_indx]
del self._transition_scalar_indexing_structure[node_indx]
del self._transition_filtering[node_indx]
del self._time_scalar_indexing_structure[node_indx]
del self.aggregated_info_about_nodes_parents[node_indx]
del self._nodes_indexes[node_indx]"""

@ -1,32 +1,52 @@
import os
from line_profiler import LineProfiler
from numba.experimental import jitclass
import numpy as np import numpy as np
import network_graph as ng
import sample_path as sp
import sets_of_cims_container as acims import sets_of_cims_container as acims
import set_of_cims as sofc
import sample_path as sp
import network_graph as ng
class ParametersEstimator: class ParametersEstimator:
"""
Has the task of computing the cims of particular node given the trajectories in samplepath and the net structure
in the graph net_graph
:sample_path: the container of the trajectories
:net_graph: the net structure
:single_srt_of_cims: the set of cims object that will hold the cims of the node
"""
def __init__(self, sample_path, net_graph): def __init__(self, sample_path: sp.SamplePath, net_graph: ng.NetworkGraph):
self.sample_path = sample_path self.sample_path = sample_path
self.net_graph = net_graph self.net_graph = net_graph
self.sets_of_cims_struct = None self.sets_of_cims_struct = None
self.single_set_of_cims = None
def init_sets_cims_container(self): def init_sets_cims_container(self):
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.get_nodes(), self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
self.net_graph.get_states_number_of_all_nodes_sorted(), self.net_graph.nodes_values,
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes()) self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
self.net_graph.p_combs)
def fast_init(self, node_id: str):
"""
Initializes all the necessary structures for the parameters estimation.
Parameters:
node_id: the node label
Returns:
void
"""
p_vals = self.net_graph.aggregated_info_about_nodes_parents[2]
node_states_number = self.net_graph.get_states_number(node_id)
self.single_set_of_cims = sofc.SetOfCims(node_id, p_vals, node_states_number, self.net_graph.p_combs)
def compute_parameters(self): def compute_parameters(self):
#print(self.net_graph.get_nodes()) #print(self.net_graph.get_nodes())
#print(self.amalgamated_cims_struct.sets_of_cims) #print(self.amalgamated_cims_struct.sets_of_cims)
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) #enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
for indx, aggr in enumerate(zip(self.net_graph.get_nodes(), self.sets_of_cims_struct.sets_of_cims)): for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
#print(self.net_graph.time_filtering[indx]) #print(self.net_graph.time_filtering[indx])
#print(self.net_graph.time_scalar_indexing_strucure[indx]) #print(self.net_graph.time_scalar_indexing_strucure[indx])
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
@ -43,74 +63,79 @@ class ParametersEstimator:
aggr[1].transition_matrices) aggr[1].transition_matrices)
aggr[1].build_cims(aggr[1].state_residence_times, aggr[1].transition_matrices) aggr[1].build_cims(aggr[1].state_residence_times, aggr[1].transition_matrices)
def compute_parameters_for_node(self, node_id): def compute_parameters_for_node(self, node_id: str) -> sofc.SetOfCims:
pos_index = self.net_graph.graph_struct.get_positional_node_indx(node_id) """
Compute the CIMS of the node identified by the label node_id
Parameters:
node_id: the node label
Returns:
A setOfCims object filled with the computed CIMS
"""
node_indx = self.net_graph.get_node_indx(node_id) node_indx = self.net_graph.get_node_indx(node_id)
#print("Nodes", self.net_graph.get_nodes()) state_res_times = self.single_set_of_cims.state_residence_times
transition_matrices = self.single_set_of_cims.transition_matrices
trajectory = self.sample_path.trajectories.trajectory
self.compute_state_res_time_for_node(node_indx, self.sample_path.trajectories.times, self.compute_state_res_time_for_node(node_indx, self.sample_path.trajectories.times,
self.sample_path.trajectories.trajectory, trajectory,
self.net_graph.time_filtering[pos_index], self.net_graph.time_filtering,
self.net_graph.time_scalar_indexing_strucure[pos_index], self.net_graph.time_scalar_indexing_strucure,
self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times) state_res_times)
# print(self.net_graph.transition_filtering[indx])
# print(self.net_graph.transition_scalar_indexing_structure[indx])
self.compute_state_transitions_for_a_node(node_indx, self.compute_state_transitions_for_a_node(node_indx,
self.sample_path.trajectories.complete_trajectory, self.sample_path.trajectories.complete_trajectory,
self.net_graph.transition_filtering[pos_index], self.net_graph.transition_filtering,
self.net_graph.transition_scalar_indexing_structure[pos_index], self.net_graph.transition_scalar_indexing_structure,
self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices) transition_matrices)
self.sets_of_cims_struct.sets_of_cims[pos_index].build_cims( self.single_set_of_cims.build_cims(state_res_times, transition_matrices)
self.sets_of_cims_struct.sets_of_cims[pos_index].state_residence_times, return self.single_set_of_cims
self.sets_of_cims_struct.sets_of_cims[pos_index].transition_matrices)
def compute_state_res_time_for_node(self, node_indx: int, times: np.ndarray, trajectory: np.ndarray,
def compute_state_res_time_for_node(self, node_indx, times, trajectory, cols_filter, scalar_indexes_struct, T): cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, T: np.ndarray):
#print(times.size) """
#print(trajectory) Compute the state residence times for a node and fill the matrix T with the results
#print(cols_filter)
#print(scalar_indexes_struct) Parameters:
#print(T) node_indx: the index of the node
times: the times deltas vector
trajectory: the trajectory
cols_filter: the columns filtering structure
scalar_indexes_struct: the indexing structure
T: the state residence times vectors
Returns:
void
"""
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1)
.astype(np.int), \ .astype(np.int), \
times, times,
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1])
#print("Done This NODE", T)
def compute_state_residence_time_for_all_nodes(self):
for node_indx, set_of_cims in enumerate(self.amalgamated_cims_struct.sets_of_cims):
self.compute_state_res_time_for_node(node_indx, self.sample_path.trajectories[0].get_times(),
self.sample_path.trajectories[0].get_trajectory(), self.columns_filtering_structure[node_indx],
self.scalar_indexes_converter[node_indx], set_of_cims.state_residence_times)
def compute_state_transitions_for_a_node(self, node_indx, trajectory, cols_filter, scalar_indexing, M): def compute_state_transitions_for_a_node(self, node_indx, trajectory, cols_filter, scalar_indexing, M):
#print(node_indx) """
#print(trajectory) Compute the state residence times for a node and fill the matrices M with the results
#print(cols_filter)
#print(scalar_indexing) Parameters:
#print(M) node_indx: the index of the node
times: the times deltas vector
trajectory: the trajectory
cols_filter: the columns filtering structure
scalar_indexes: the indexing structure
M: the state transition matrices
Returns:
void
"""
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])],
dtype=np.int64) dtype=np.int64)
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0]
#print(trj_tmp)
#print("Summing", np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int))
#print(M.shape[1])
#print(M.shape[2])
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int), M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int),
minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2])
M_raveled = M.ravel() M_raveled = M.ravel()
M_raveled[diag_indices] = 0 M_raveled[diag_indices] = 0
#print(M_raveled)
M_raveled[diag_indices] = np.sum(M, axis=2).ravel() M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
#print(M_raveled)
#print(M)
def compute_state_transitions_for_all_nodes(self):
for node_indx, set_of_cims in enumerate(self.amalgamated_cims_struct.sets_of_cims):
self.compute_state_transitions_for_a_node(node_indx, self.sample_path.trajectories[0].get_complete_trajectory(),
self.transition_filtering[node_indx],
self.transition_scalar_index_converter[node_indx], set_of_cims.transition_matrices)

@ -1,29 +1,41 @@
import abstract_sample_path as asam
import json_importer as imp import json_importer as imp
import trajectory as tr import trajectory as tr
import structure as st import structure as st
class SamplePath: class SamplePath(asam.AbstractSamplePath):
""" """
Contiene l'aggregazione di una o più traiettorie e la struttura della rete. Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
Ha il compito dato di costruire tutte gli oggetti Trajectory e l'oggetto Structure cardinalites.
a partire dai dataframe contenuti in self.importer Has the task of creating the objects that will contain the mentioned data.
:importer: the Importer objects that will import ad process data
:trajectories: the Trajectory object that will contain all the concatenated trajectories
:structure: the Structure Object that will contain all the structurral infos about the net
:total_variables_count: the number of variables in the net
:importer: l'oggetto Importer che ha il compito di caricare i dataset
:trajectories: lista di oggetti Trajectories
:structure: oggetto Structure
""" """
def __init__(self, files_path, samples_label, structure_label, variables_label, time_key, variables_key): #def __init__(self, files_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
self.importer = imp.JsonImporter(files_path, samples_label, structure_label, #variables_key: str):
variables_label, time_key, variables_key) def __init__(self, importer: imp.JsonImporter):
self._trajectories = None #self.importer =importer
self._structure = None super().__init__(importer)
#self._trajectories = None
#self._structure = None
self.total_variables_count = None self.total_variables_count = None
def build_trajectories(self): def build_trajectories(self):
"""
Builds the Trajectory object that will contain all the trajectories.
Clears all the unused dataframes in Importer Object
Parameters:
void
Returns:
void
"""
self.importer.import_data() self.importer.import_data()
self._trajectories = \ self._trajectories = \
tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples), tr.Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples),
@ -32,8 +44,20 @@ class SamplePath:
self.importer.clear_concatenated_frame() self.importer.clear_concatenated_frame()
def build_structure(self): def build_structure(self):
"""
Builds the Structure object that aggregates all the infos about the net.
Parameters:
void
Returns:
void
"""
self.total_variables_count = len(self.importer.sorter) self.total_variables_count = len(self.importer.sorter)
self._structure = st.Structure(self.importer.structure, self.importer.variables, labels = self.importer.variables[self.importer.variables_key].to_list()
#print("SAMPLE PATH LABELS",labels)
indxs = self.importer.variables.index.to_numpy()
vals = self.importer.variables['Value'].to_numpy()
edges = list(self.importer.structure.to_records(index=False))
self._structure = st.Structure(labels, indxs, vals, edges,
self.total_variables_count) self.total_variables_count)
@property @property
@ -47,12 +71,6 @@ class SamplePath:
def total_variables_count(self): def total_variables_count(self):
return self.total_variables_count return self.total_variables_count
"""def build_possible_values_variables_structure(self):
possible_val_list = []
print(self.importer.variables)
for cardinality in self.importer.variables['Value']:
possible_val_list.append(list(range(0, cardinality)))
self.possible_variables_values = possible_val_list"""

@ -1,27 +1,41 @@
import numpy as np import numpy as np
import typing
import conditional_intensity_matrix as cim import conditional_intensity_matrix as cim
class SetOfCims: class SetOfCims:
""" """
Rappresenta la struttura che aggrega tutte le CIM per la variabile di label node_id Aggregates all the CIMS of the node identified by the label node_id.
:node_id: la label della varibile a cui fanno riferimento le CIM :node_id: the node label
:ordered_parent_set: il set dei parent della variabile node_id ordinata secondo la property indx :parents_states_number: the cardinalities of the parents
:value: il numero massimo di stati assumibili dalla variabile :node_states_number: the caridinality of the node
:actual_cims: le CIM della varibile :p_combs: the relative p_comb structure
:state_residence_time: matrix containing all the state residence time vectors for the node
:transition_matrices: matrix containing all the transition matrices for the node
:actaul_cims: the cims of the node
""" """
def __init__(self, node_id, parents_states_number, node_states_number): def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
self.node_id = node_id self.node_id = node_id
self.parents_states_number = parents_states_number self.parents_states_number = parents_states_number
self.node_states_number = node_states_number self.node_states_number = node_states_number
self.actual_cims = [] self.actual_cims = []
self.state_residence_times = None self.state_residence_times = None
self.transition_matrices = None self.transition_matrices = None
self.build_actual_cims_structure() self.p_combs = p_combs
self.build_times_and_transitions_structures()
def build_times_and_transitions_structures(self):
"""
Initializes at the correct dimensions the state residence times matrix and the state transition matrices
def build_actual_cims_structure(self): Parameters:
void
Returns:
void
"""
if not self.parents_states_number: if not self.parents_states_number:
self.state_residence_times = np.zeros((1, self.node_states_number), dtype=np.float) self.state_residence_times = np.zeros((1, self.node_states_number), dtype=np.float)
self.transition_matrices = np.zeros((1,self.node_states_number, self.node_states_number), dtype=np.int) self.transition_matrices = np.zeros((1,self.node_states_number, self.node_states_number), dtype=np.int)
@ -31,41 +45,60 @@ class SetOfCims:
self.transition_matrices = np.zeros([np.prod(self.parents_states_number), self.node_states_number, self.transition_matrices = np.zeros([np.prod(self.parents_states_number), self.node_states_number,
self.node_states_number], dtype=np.int) self.node_states_number], dtype=np.int)
def build_cims(self, state_res_times: typing.List, transition_matrices: typing.List):
"""
Build the ConditionalIntensityMatrix object given the state residence times and transitions matrices.
Compute the cim coefficients.
def get_cims_number(self): Parameters:
return len(self.actual_cims) state_res_times: the state residence times matrix
transition_matrices: the transition matrices
def indexes_converter(self, indexes): # Si aspetta array del tipo [2,2] dove Returns:
assert len(indexes) == len(self.parents_states_number) void
vector_index = 0 """
if not indexes:
return vector_index
else:
for indx, value in enumerate(indexes):
vector_index = vector_index*self.parents_states_number[indx] + indexes[indx]
return vector_index
def build_cims(self, state_res_times, transition_matrices):
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
#print(state_res_time_vector, transition_matrix)
cim_to_add = cim.ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) cim_to_add = cim.ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
cim_to_add.compute_cim_coefficients() cim_to_add.compute_cim_coefficients()
#print(cim_to_add)
self.actual_cims.append(cim_to_add) self.actual_cims.append(cim_to_add)
self.actual_cims = np.array(self.actual_cims)
self.transition_matrices = None self.transition_matrices = None
self.state_residence_times = None self.state_residence_times = None
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray:
"""
Filter the cims contained in the array actual_cims given the boolean mask mask_arr and the index comb.
Parameters:
mask_arr: the boolean mask
comb: the indexes of the selected cims
Returns:
Array of ConditionalIntensityMatrix
"""
if mask_arr.size <= 1:
return self.actual_cims
else:
tmp_parents_comb_from_ids = np.argwhere(np.all(self.p_combs[:, mask_arr] == comb, axis=1)).ravel()
return self.actual_cims[tmp_parents_comb_from_ids]
@property
def get_cims(self): def get_cims(self):
return self.actual_cims return self.actual_cims
def get_cims_number(self):
return len(self.actual_cims)
"""
def get_cim(self, index): def get_cim(self, index):
flat_index = self.indexes_converter(index) flat_index = self.indexes_converter(index)
return self.actual_cims[flat_index] return self.actual_cims[flat_index]
def indexes_converter(self, indexes):
assert len(indexes) == len(self.parents_states_number)
vector_index = 0
if not indexes:
return vector_index
else:
for indx, value in enumerate(indexes):
vector_index = vector_index*self.parents_states_number[indx] + indexes[indx]
return vector_index"""
"""sofc = SetOfCims('Z', [3, 3], 3)
sofc.build_actual_cims_structure()
print(sofc.actual_cims)
print(sofc.actual_cims[0,0])
print(sofc.actual_cims[1,2])
#print(sofc.indexes_converter([]))"""

@ -1,21 +1,20 @@
import set_of_cims as socim import set_of_cims as socim
class SetsOfCimsContainer: class SetsOfCimsContainer:
""" """
Aggrega un insieme di oggetti SetOfCims Aggrega un insieme di oggetti SetOfCims
""" """
def __init__(self, list_of_keys, states_number_per_node, list_of_parents_states_number): def __init__(self, list_of_keys, states_number_per_node, list_of_parents_states_number, p_combs_list):
self.sets_of_cims = None self.sets_of_cims = None
self.init_cims_structure(list_of_keys, states_number_per_node, list_of_parents_states_number) self.init_cims_structure(list_of_keys, states_number_per_node, list_of_parents_states_number, p_combs_list)
#self.states_per_variable = states_number #self.states_per_variable = states_number
def init_cims_structure(self, keys, states_number_per_node, list_of_parents_states_number): def init_cims_structure(self, keys, states_number_per_node, list_of_parents_states_number, p_combs_list):
"""for indx, key in enumerate(keys): """for indx, key in enumerate(keys):
self.sets_of_cims.append( self.sets_of_cims.append(
socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx]))""" socim.SetOfCims(key, list_of_parents_states_number[indx], states_number_per_node[indx]))"""
self.sets_of_cims = [socim.SetOfCims(pair[1], list_of_parents_states_number[pair[0]], states_number_per_node[pair[0]]) self.sets_of_cims = [socim.SetOfCims(pair[1], list_of_parents_states_number[pair[0]], states_number_per_node[pair[0]], p_combs_list[pair[0]])
for pair in enumerate(keys)] for pair in enumerate(keys)]
def get_set_of_cims(self, node_indx): def get_set_of_cims(self, node_indx):
@ -24,13 +23,3 @@ class SetsOfCimsContainer:
def get_cims_of_node(self, node_indx, cim_indx): def get_cims_of_node(self, node_indx, cim_indx):
return self.sets_of_cims[node_indx].get_cim(cim_indx) return self.sets_of_cims[node_indx].get_cim(cim_indx)
"""
def get_vars_order(self, node):
return self.actual_cims[node][1]
def update_state_transition_for_matrix(self, node, which_matrix, element_indx):
self.sets_of_cims[node].update_state_transition(which_matrix, element_indx)
def update_state_residence_time_for_matrix(self, which_node, which_matrix, which_element, time):
self.sets_of_cims[which_node].update_state_residence_time(which_matrix, which_element, time)
"""

@ -1,68 +1,73 @@
import typing as ty
import numpy as np import numpy as np
class Structure: class Structure:
""" """
Contiene tutte il informazioni sulla struttura della rete (connessione dei nodi, valori assumibili dalle variabili) Contains all the infos about the network structure(nodes names, nodes caridinalites, edges...)
:structure_frame: il dataframe contenente le connessioni dei nodi della rete :nodes_labels_list: the symbolic names of the variables
:variables_frame: il data_frame contenente i valori assumibili dalle variabili e si suppone il corretto ordinamento :nodes_indexes_arr: the indexes of the nodes
rispetto alle colonne del dataset :nodes_vals_arr: the cardinalites of the nodes
:edges_list: the edges of the network
:total_variables_number: the total number of variables in the net
""" """
def __init__(self, structure, variables, total_variables_number): def __init__(self, nodes_label_list: ty.List, node_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray,
self.structure_frame = structure edges_list: ty.List, total_variables_number: int):
self.variables_frame = variables self._nodes_labels_list = nodes_label_list
self.total_variables_number = total_variables_number self._nodes_indexes_arr = node_indexes_arr
self.name_label = variables.columns.values[0] self._nodes_vals_arr = nodes_vals_arr
self.value_label = variables.columns.values[1] self._edges_list = edges_list
self._total_variables_number = total_variables_number
def list_of_edges(self):
records = self.structure_frame.to_records(index=False) @property
edges_list = list(records) def edges(self):
return edges_list #records = self.structure_frame.to_records(index=False)
#edges_list = list(records)
def list_of_nodes_labels(self): return self._edges_list
return self.variables_frame[self.name_label].values.tolist()
@property
def list_of_nodes_indexes(self): def nodes_labels(self):
return self.variables_frame.index.to_list() return self._nodes_labels_list
def get_node_id(self, node_indx): @property
return self.variables_frame[self.name_label][node_indx] def nodes_indexes(self) -> np.ndarray:
return self._nodes_indexes_arr
def get_node_indx(self, node_id):
return self.variables_frame[self.name_label][self.variables_frame[self.name_label] == node_id].index[0] @property
def nodes_values(self) -> np.ndarray:
def get_positional_node_indx(self, node_id): return self._nodes_vals_arr
return np.flatnonzero(self.variables_frame[self.name_label] == node_id)[0]
@property
def total_variables_number(self):
return self._total_variables_number
def get_states_number(self, node): def get_node_id(self, node_indx: int) -> str:
#print("node", node) return self._nodes_labels_list[node_indx]
return self.variables_frame[self.value_label][self.get_node_indx(node)]
def get_states_number_by_indx(self, node_indx): def get_node_indx(self, node_id: str) -> int:
#print(self.value_label) pos_indx = self._nodes_labels_list.index(node_id)
#print("Node indx", node_indx) return self._nodes_indexes_arr[pos_indx]
return self.variables_frame[self.value_label][node_indx]
def nodes_values(self): def get_positional_node_indx(self, node_id: str) -> int:
return self.variables_frame[self.value_label].to_list() return self._nodes_labels_list.index(node_id)
def total_variables_number(self): def get_states_number(self, node: str) -> int:
return self.total_variables_number pos_indx = self._nodes_labels_list.index(node)
return self._nodes_vals_arr[pos_indx]
def __repr__(self): def __repr__(self):
return "Variables:\n" + str(self.variables_frame) + "\nEdges: \n" + str(self.structure_frame) return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
"\nEdges: \n" + str(self._edges_list)
def __eq__(self, other): def __eq__(self, other):
"""Overrides the default implementation""" """Overrides the default implementation"""
if isinstance(other, Structure): if isinstance(other, Structure):
return self.structure_frame.equals(other.structure_frame) and \ return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
self.variables_frame.equals(other.variables_frame) np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
self._edges_list == other._edges_list
return NotImplemented return NotImplemented
"""def remove_node(self, node_id):
self.variables_frame = self.variables_frame[self.variables_frame.Name != node_id]
self.structure_frame = self.structure_frame[(self.structure_frame.From != node_id) &
(self.structure_frame.To != node_id)]"""

@ -1,145 +1,179 @@
import pandas as pd
import numpy as np import numpy as np
import itertools import itertools
import networkx as nx import networkx as nx
from networkx.readwrite import json_graph
import json
import typing
from scipy.stats import f as f_dist from scipy.stats import f as f_dist
from scipy.stats import chi2 as chi2_dist from scipy.stats import chi2 as chi2_dist
import sample_path as sp import sample_path as sp
import structure as st import structure as st
import network_graph as ng import network_graph as ng
import conditional_intensity_matrix as condim
import parameters_estimator as pe import parameters_estimator as pe
import cache as ch import cache as ch
class StructureEstimator: class StructureEstimator:
"""
Has the task of estimating the network structure given the trajectories in samplepath.
:sample_path: the sample_path object containing the trajectories and the real structure
:exp_test_sign: the significance level for the exponential Hp test
:chi_test_alfa: the significance level for the chi Hp test
:nodes: the nodes labels
:nodes_vals: the nodes cardinalities
:nodes_indxs: the nodes indexes
:complete_graph: the complete directed graph built using the nodes labels in nodes
:cache: the cache object
"""
def __init__(self, sample_path, exp_test_alfa, chi_test_alfa): def __init__(self, sample_path: sp.SamplePath, exp_test_alfa: float, chi_test_alfa: float):
self.sample_path = sample_path self.sample_path = sample_path
self.complete_graph_frame = self.build_complete_graph_frame(self.sample_path.structure.list_of_nodes_labels()) self.nodes = np.array(self.sample_path.structure.nodes_labels)
self.complete_graph = self.build_complete_graph(self.sample_path.structure.list_of_nodes_labels()) self.nodes_vals = self.sample_path.structure.nodes_values
self.nodes_indxs = self.sample_path.structure.nodes_indexes
self.complete_graph = self.build_complete_graph(self.sample_path.structure.nodes_labels)
self.exp_test_sign = exp_test_alfa self.exp_test_sign = exp_test_alfa
self.chi_test_alfa = chi_test_alfa self.chi_test_alfa = chi_test_alfa
self.cache = ch.Cache() self.cache = ch.Cache()
def build_complete_graph_frame(self, node_ids): def build_complete_graph(self, node_ids: typing.List):
complete_frame = pd.DataFrame(itertools.permutations(node_ids, 2)) """
complete_frame.columns = ['From', 'To'] Builds a complete directed graph (no self loops) given the nodes labels in the list node_ids:
return complete_frame
def build_complete_graph(self, node_ids): Parameters:
node_ids: the list of nodes labels
Returns:
a complete Digraph Object
"""
complete_graph = nx.DiGraph() complete_graph = nx.DiGraph()
complete_graph.add_nodes_from(node_ids) complete_graph.add_nodes_from(node_ids)
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph return complete_graph
#TODO Tutti i valori che riguardano il test child possono essere settati una volta sola
def complete_test(self, tmp_df, test_parent, test_child, parent_set, child_states_numb): def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int):
"""
Permorms a complete independence test on the directed graphs G1 = test_child U parent_set
G2 = G1 U test_parent (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests.
Parameters:
test_parent: the node label of the test parent
test_child: the node label of the child
parent_set: the common parent set
child_states_numb: the cardinality of the test_child
tot_vars_count_ the total number of variables in the net
Returns:
True iff test_child and test_parent are independent given the sep_set parent_set
False otherwise
"""
#print("Test Parent:", test_parent)
#print("Sep Set", parent_set)
p_set = parent_set[:] p_set = parent_set[:]
complete_info = parent_set[:] complete_info = parent_set[:]
complete_info.append(test_parent)
#tmp_df = self.complete_graph_frame.loc[self.complete_graph_frame['To'].isin([test_child])]
#tmp_df = self.complete_graph_frame.loc[np.in1d(self.complete_graph_frame['To'], test_child)]
d2 = tmp_df.loc[tmp_df['From'].isin(complete_info)]
complete_info.append(test_child) complete_info.append(test_child)
values_frame = self.sample_path.structure.variables_frame
v2 = values_frame.loc[
values_frame['Name'].isin(complete_info)]
#print(tmp_df)
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)]
#parent_set.append(test_child)
#print(parent_set)
"""v1 = self.sample_path.structure.variables_frame.loc[self.sample_path.structure.variables_frame['Name'].isin(parent_set)]
s1 = st.Structure(d1, v1, self.sample_path.total_variables_count)
g1 = ng.NetworkGraph(s1)
g1.init_graph()"""
#parent_set.append(test_parent) parents = np.array(parent_set)
"""d2 = tmp_df.loc[tmp_df['From'].isin(parent_set)] parents = np.append(parents, test_parent)
v2 = self.sample_path.structure.variables_frame.loc[self.sample_path.structure.variables_frame['Name'].isin(parent_set)] #print("PARENTS", parents)
s2 = st.Structure(d2, v2, self.sample_path.total_variables_count) #parents.sort()
g2 = ng.NetworkGraph(s2) sorted_parents = self.nodes[np.isin(self.nodes, parents)]
g2.init_graph()""" #print("SORTED PARENTS", sorted_parents)
#parent_set.append(test_child) cims_filter = sorted_parents != test_parent
#sofc1 = None #print("PARENTS NO FROM MASK", cims_filter)
#if not sofc1: #if not p_set:
if not p_set: #print("EMPTY PSET TRYING TO FIND", test_child)
sofc1 = self.cache.find(test_child) #sofc1 = self.cache.find(test_child)
else: #else:
sofc1 = self.cache.find(set(p_set)) sofc1 = self.cache.find(set(p_set))
if not sofc1: if not sofc1:
#d1 = tmp_df.loc[tmp_df['From'].isin(parent_set)] #print("CACHE MISSS SOFC1")
d1 = d2[d2.From != test_parent] bool_mask1 = np.isin(self.nodes,complete_info)
#print("Bool mask 1", bool_mask1)
#v1 = self.sample_path.structure.variables_frame.loc[ l1 = list(self.nodes[bool_mask1])
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)] #print("L1", l1)
v1 = v2[v2.Name != test_parent] indxs1 = self.nodes_indxs[bool_mask1]
#print("D1", d1) #print("INDXS 1", indxs1)
#print("V1", v1) vals1 = self.nodes_vals[bool_mask1]
#TODO il numero di variabili puo essere passato dall'esterno eds1 = list(itertools.product(parent_set,test_child))
s1 = st.Structure(d1, v1, self.sample_path.total_variables_count) s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count)
g1 = ng.NetworkGraph(s1) g1 = ng.NetworkGraph(s1)
g1.init_graph() g1.fast_init(test_child)
p1 = pe.ParametersEstimator(self.sample_path, g1) p1 = pe.ParametersEstimator(self.sample_path, g1)
p1.init_sets_cims_container() p1.fast_init(test_child)
p1.compute_parameters_for_node(test_child) sofc1 = p1.compute_parameters_for_node(test_child)
sofc1 = p1.sets_of_cims_struct.sets_of_cims[g1.get_positional_node_indx(test_child)] #if not p_set:
if not p_set: #self.cache.put(test_child, sofc1)
self.cache.put(test_child, sofc1) #else:
else:
self.cache.put(set(p_set), sofc1) self.cache.put(set(p_set), sofc1)
sofc2 = None sofc2 = None
p_set.append(test_parent) #p_set.append(test_parent)
p_set.insert(0, test_parent)
if p_set: if p_set:
#print("FULL PSET TRYING TO FIND", p_set)
#p_set.append(test_parent) #p_set.append(test_parent)
#print("PSET ", p_set) #print("PSET ", p_set)
#set_p_set = set(p_set) #set_p_set = set(p_set)
sofc2 = self.cache.find(set(p_set)) sofc2 = self.cache.find(set(p_set))
#print("Sofc2 ", sofc2) #if sofc2:
#print("Sofc2 in CACHE ", sofc2.actual_cims)
#print(self.cache.list_of_sets_of_indxs) #print(self.cache.list_of_sets_of_indxs)
"""p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container()
#p2.compute_parameters()
p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[s2.get_positional_node_indx(test_child)]"""
if not sofc2: if not sofc2:
#print("Cache Miss SOC2") #print("Cache MISSS SOFC2")
#parent_set.append(test_parent) complete_info.append(test_parent)
#d2 = tmp_df.loc[tmp_df['From'].isin(p_set)] bool_mask2 = np.isin(self.nodes, complete_info)
#v2 = self.sample_path.structure.variables_frame.loc[ #print("BOOL MASK 2",bool_mask2)
#self.sample_path.structure.variables_frame['Name'].isin(parent_set)] l2 = list(self.nodes[bool_mask2])
#print("D2", d2) #print("L2", l2)
#print("V2", v2) indxs2 = self.nodes_indxs[bool_mask2]
#s2 = st.Structure(d2, v2, self.sample_path.total_variables_count) #print("INDXS 2", indxs2)
s2 = st.Structure(d2, v2, self.sample_path.total_variables_count) vals2 = self.nodes_vals[bool_mask2]
eds2 = list(itertools.product(p_set, test_child))
s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count)
g2 = ng.NetworkGraph(s2) g2 = ng.NetworkGraph(s2)
g2.init_graph() g2.fast_init(test_child)
p2 = pe.ParametersEstimator(self.sample_path, g2) p2 = pe.ParametersEstimator(self.sample_path, g2)
p2.init_sets_cims_container() p2.fast_init(test_child)
p2.compute_parameters_for_node(test_child) sofc2 = p2.compute_parameters_for_node(test_child)
sofc2 = p2.sets_of_cims_struct.sets_of_cims[g2.get_positional_node_indx(test_child)]
if p_set:
#set_p_set = set(p_set)
self.cache.put(set(p_set), sofc2) self.cache.put(set(p_set), sofc2)
end = 0 for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
increment = self.sample_path.structure.get_states_number(test_parent) #print("GETTING THIS P COMB", p_comb)
for cim1 in sofc1.actual_cims: #if len(parent_set) > 1:
start = end cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
end = start + increment #else:
for j in range(start, end): #cond_cims = sofc2.actual_cims
#print("COnd Cims", cond_cims)
for cim2 in cond_cims:
#cim2 = sofc2.actual_cims[j] #cim2 = sofc2.actual_cims[j]
#print(indx) #print(indx)
#print("Run Test", i, j) #print("Run Test", i, j)
if not self.independence_test(child_states_numb, cim1, sofc2.actual_cims[j]): if not self.independence_test(child_states_numb, cim1, cim2):
return False return False
return True return True
def independence_test(self, child_states_numb, cim1, cim2): def independence_test(self, child_states_numb: int, cim1: condim.ConditionalIntensityMatrix,
cim2: condim.ConditionalIntensityMatrix):
"""
Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected,
it is permormed also the chi_test.
Parameters:
child_states_numb: the cardinality of the test child
cim1: a cim belonging to the graph without test parent
cim2: a cim belonging to the graph with test parent
Returns:
True iff both tests do NOT reject the null hypothesis of indipendence
False otherwise
"""
M1 = cim1.state_transition_matrix M1 = cim1.state_transition_matrix
M2 = cim2.state_transition_matrix M2 = cim2.state_transition_matrix
r1s = M1.diagonal() r1s = M1.diagonal()
@ -147,11 +181,11 @@ class StructureEstimator:
C1 = cim1.cim C1 = cim1.cim
C2 = cim2.cim C2 = cim2.cim
F_stats = C2.diagonal() / C1.diagonal() F_stats = C2.diagonal() / C1.diagonal()
#child_states_numb = self.sample_path.structure.get_states_number(tested_child) exp_alfa = self.exp_test_sign
for val in range(0, child_states_numb): for val in range(0, child_states_numb):
if F_stats[val] < f_dist.ppf(self.exp_test_sign / 2, r1s[val], r2s[val]) or \ if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \
F_stats[val] > f_dist.ppf(1 - self.exp_test_sign / 2, r1s[val], r2s[val]): F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]):
print("CONDITIONALLY DEPENDENT EXP") #print("CONDITIONALLY DEPENDENT EXP")
return False return False
#M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix) #M1_no_diag = self.remove_diagonal_elements(cim1.state_transition_matrix)
#M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix) #M2_no_diag = self.remove_diagonal_elements(cim2.state_transition_matrix)
@ -176,49 +210,49 @@ class StructureEstimator:
#print("Chi Quantile", chi_2_quantile) #print("Chi Quantile", chi_2_quantile)
if Chi > chi_2_quantile: if Chi > chi_2_quantile:
#if np.any(chi_stats > chi_2_quantile): #if np.any(chi_stats > chi_2_quantile):
print("CONDITIONALLY DEPENDENT CHI") #print("CONDITIONALLY DEPENDENT CHI")
return False return False
#print("Chi test", Chi) #print("Chi test", Chi)
return True return True
def one_iteration_of_CTPC_algorithm(self, var_id): def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int):
"""
Performs an iteration of the CTPC algorithm using the node var_id as test_child.
Parameters:
var_id: the node label of the test child
tot_vars_count: the number of nodes in the net
Returns:
void
"""
print("##################TESTING VAR################", var_id)
u = list(self.complete_graph.predecessors(var_id)) u = list(self.complete_graph.predecessors(var_id))
tests_parents_numb = len(u) #tests_parents_numb = len(u)
complete_frame = self.complete_graph_frame #complete_frame = self.complete_graph_frame
test_frame = complete_frame.loc[complete_frame['To'].isin([var_id])] #test_frame = complete_frame.loc[complete_frame['To'].isin([var_id])]
child_states_numb = self.sample_path.structure.get_states_number(var_id) child_states_numb = self.sample_path.structure.get_states_number(var_id)
b = 0 b = 0
while b < len(u): while b < len(u):
#for parent_id in u: #for parent_id in u:
parent_indx = 0 parent_indx = 0
while parent_indx < len(u): while parent_indx < len(u):
# list_without_test_parent = u.remove(parent_id)
removed = False removed = False
#print("b", b)
#print("Parent Indx", parent_indx)
#if not list(self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])): #if not list(self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])):
#break #break
S = self.generate_possible_sub_sets_of_size(u, b, parent_indx) S = self.generate_possible_sub_sets_of_size(u, b, u[parent_indx])
#print("U Set", u) #print("U Set", u)
#print("S", S) #print("S", S)
test_parent = u[parent_indx]
#print("Test Parent", test_parent)
for parents_set in S: for parents_set in S:
#print("Parent Set", parents_set) #print("Parent Set", parents_set)
#print("Test Parent", u[parent_indx]) #print("Test Parent", test_parent)
if self.complete_test(test_frame, u[parent_indx], var_id, parents_set, child_states_numb): if self.complete_test(test_parent, var_id, parents_set, child_states_numb, tot_vars_count):
#print("Removing EDGE:", u[parent_indx], var_id) #print("Removing EDGE:", test_parent, var_id)
self.complete_graph.remove_edge(u[parent_indx], var_id) self.complete_graph.remove_edge(test_parent, var_id)
#print(self.complete_graph_frame) u.remove(test_parent)
"""self.complete_graph_frame = \
self.complete_graph_frame.drop(
self.complete_graph_frame[(self.complete_graph_frame.From ==
u[parent_indx]) & (self.complete_graph_frame.To == var_id)].index)"""
complete_frame.drop(complete_frame[(complete_frame.From == u[parent_indx]) &
(complete_frame.To == var_id)].index, inplace=True)
#print(self.complete_graph_frame)
#u.remove(u[parent_indx])
del u[parent_indx]
removed = True removed = True
break
#else: #else:
#parent_indx += 1 #parent_indx += 1
if not removed: if not removed:
@ -226,27 +260,55 @@ class StructureEstimator:
b += 1 b += 1
self.cache.clear() self.cache.clear()
def generate_possible_sub_sets_of_size(self, u, size, parent_indx): def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str):
#print("Inside Generate subsets", u) """
#print("InsideGenerate Subsets", parent_id) Creates a list containing all possible subsets of the list u of size size,
that do not contains a the node identified by parent_label.
Parameters:
u: the list of nodes
size: the size of the subsets
parent_label: the nodes to exclude in the subsets generation
Returns:
a Map Object containing a list of lists
"""
list_without_test_parent = u[:] list_without_test_parent = u[:]
del list_without_test_parent[parent_indx] list_without_test_parent.remove(parent_label)
# u.remove(parent_id)
#print(list(map(list, itertools.combinations(list_without_test_parent, size))))
return map(list, itertools.combinations(list_without_test_parent, size)) return map(list, itertools.combinations(list_without_test_parent, size))
def ctpc_algorithm(self):
"""
Compute the CTPC algorithm.
Parameters:
void
Returns:
void
"""
ctpc_algo = self.one_iteration_of_CTPC_algorithm
total_vars_numb = self.sample_path.total_variables_count
[ctpc_algo(n, total_vars_numb) for n in self.nodes]
def save_results(self):
"""
Save the estimated Structure to a .json file
Parameters:
void
Returns:
void
"""
res = json_graph.node_link_data(self.complete_graph)
name = self.sample_path.importer.file_path.rsplit('/',1)[-1]
#print(name)
name = 'results_' + name
with open(name, 'w') as f:
json.dump(res, f)
def remove_diagonal_elements(self, matrix): def remove_diagonal_elements(self, matrix):
m = matrix.shape[0] m = matrix.shape[0]
strided = np.lib.stride_tricks.as_strided strided = np.lib.stride_tricks.as_strided
s0, s1 = matrix.strides s0, s1 = matrix.strides
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
def ctpc_algorithm(self):
ctpc_algo = self.one_iteration_of_CTPC_algorithm
nodes = self.sample_path.structure.list_of_nodes_labels()
#for node_id in self.sample_path.structure.list_of_nodes_labels():
#print("TESTING VAR:", node_id)
#self.one_iteration_of_CTPC_algorithm(node_id)
#print(self.complete_graph_frame)
[ctpc_algo(n) for n in nodes]

@ -4,30 +4,41 @@ import numpy as np
class Trajectory: class Trajectory:
""" """
Rappresenta una traiettoria come un numpy_array contenente n-ple (indx, T_k,S_i,.....,Sj) Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles and a numpy matrix
Offre i metodi utili alla computazione sulla struttura stessa. of ints.
Una Trajectory viene costruita a partire da una lista di numpyarray dove ogni elemento rappresenta una colonna :list_of_columns: the list containing the times array and values matrix
della traj :original_cols_numb: total number of cols in the data
:actual_trajectory: the trajectory containing also the duplicated and shifted values
:actual_trajectory: il numpy_array contenente la successione di n-ple (indx, T_k,S_i,.....,Sj) :times: the array containing the time deltas
""" """
def __init__(self, list_of_columns, original_cols_number): def __init__(self, list_of_columns, original_cols_number):
if type(list_of_columns[0][0]) != np.float64: if type(list_of_columns[0][0]) != np.float64:
raise TypeError('The first array in the list has to be Times') raise TypeError('The first array in the list has to be Times')
#TODO valutare se vale la pena ordinare la lista di numpy array per tipo
self.original_cols_number = original_cols_number self.original_cols_number = original_cols_number
self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T self._actual_trajectory = np.array(list_of_columns[1:], dtype=np.int).T
self._times = np.array(list_of_columns[0], dtype=np.float) self._times = np.array(list_of_columns[0], dtype=np.float)
@property @property
def trajectory(self): def trajectory(self) -> np.ndarray:
"""
Parameters:
void
Returns:
a numpy matrix containing ONLY the original columns values, not the shifted ones
"""
return self._actual_trajectory[:, :self.original_cols_number] return self._actual_trajectory[:, :self.original_cols_number]
@property @property
def complete_trajectory(self): def complete_trajectory(self) -> np.ndarray:
"""
Parameters:
void
Returns:
a numpy matrix containing all the values
"""
return self._actual_trajectory return self._actual_trajectory
@property @property

File diff suppressed because it is too large Load Diff

@ -0,0 +1,61 @@
import unittest
import numpy as np
import cache as ch
import set_of_cims as soci
class TestCache(unittest.TestCase):
def test_init(self):
c1 = ch.Cache()
self.assertFalse(c1.list_of_sets_of_parents)
self.assertFalse(c1.actual_cache)
def test_put(self):
c1 = ch.Cache()
pset1 = {'X', 'Y'}
sofc1 = soci.SetOfCims('Z', [], 3, np.array([]))
c1.put(pset1, sofc1)
self.assertEqual(1, len(c1.actual_cache))
self.assertEqual(1, len(c1.list_of_sets_of_parents))
self.assertEqual(sofc1, c1.actual_cache[0])
pset2 = {'X'}
sofc2 = soci.SetOfCims('Z', [], 3, np.array([]))
c1.put(pset2, sofc2)
self.assertEqual(2, len(c1.actual_cache))
self.assertEqual(2, len(c1.list_of_sets_of_parents))
self.assertEqual(sofc2, c1.actual_cache[1])
def test_find(self):
c1 = ch.Cache()
pset1 = {'X', 'Y'}
sofc1 = soci.SetOfCims('Z', [], 3, np.array([]))
c1.put(pset1, sofc1)
self.assertEqual(1, len(c1.actual_cache))
self.assertEqual(1, len(c1.list_of_sets_of_parents))
self.assertIsInstance(c1.find(pset1), soci.SetOfCims)
self.assertEqual(sofc1, c1.find(pset1))
self.assertIsInstance(c1.find({'Y', 'X'}), soci.SetOfCims)
self.assertEqual(sofc1, c1.find({'Y', 'X'}))
self.assertIsNone(c1.find({'X'}))
def test_clear(self):
c1 = ch.Cache()
pset1 = {'X', 'Y'}
sofc1 = soci.SetOfCims('Z', [], 3, np.array([]))
c1.put(pset1, sofc1)
self.assertEqual(1, len(c1.actual_cache))
self.assertEqual(1, len(c1.list_of_sets_of_parents))
c1.clear()
self.assertFalse(c1.list_of_sets_of_parents)
self.assertFalse(c1.actual_cache)
if __name__ == '__main__':
unittest.main()

@ -1,25 +1,32 @@
import sys import sys
sys.path.append("/Users/Zalum/Desktop/Tesi/CTBN_Project/main_package/classes/") sys.path.append("/Users/Zalum/Desktop/Tesi/CTBN_Project/main_package/classes/")
import unittest import unittest
import os
import glob
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import json_importer as ji import json_importer as ji
from line_profiler import LineProfiler
import os import os
import json import json
class TestJsonImporter(unittest.TestCase): class TestJsonImporter(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('../data', "*.json"))
def test_init(self): def test_init(self):
path = os.getcwd() j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1 = ji.JsonImporter(path, 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertEqual(j1.samples_label, 'samples') self.assertEqual(j1.samples_label, 'samples')
self.assertEqual(j1.structure_label, 'dyn.str') self.assertEqual(j1.structure_label, 'dyn.str')
self.assertEqual(j1.variables_label, 'variables') self.assertEqual(j1.variables_label, 'variables')
self.assertEqual(j1.time_key, 'Time') self.assertEqual(j1.time_key, 'Time')
self.assertEqual(j1.variables_key, 'Name') self.assertEqual(j1.variables_key, 'Name')
self.assertEqual(j1.files_path, path) self.assertEqual(j1.file_path, self.read_files[0])
self.assertFalse(j1.df_samples_list) self.assertFalse(j1.df_samples_list)
self.assertTrue(j1.variables.empty) self.assertTrue(j1.variables.empty)
self.assertTrue(j1.structure.empty) self.assertTrue(j1.structure.empty)
@ -31,6 +38,7 @@ class TestJsonImporter(unittest.TestCase):
with open('data.json', 'w') as f: with open('data.json', 'w') as f:
json.dump(data_set, f) json.dump(data_set, f)
path = os.getcwd() path = os.getcwd()
path = path + '/data.json'
j1 = ji.JsonImporter(path, '', '', '', '', '') j1 = ji.JsonImporter(path, '', '', '', '', '')
imported_data = j1.read_json_file() imported_data = j1.read_json_file()
self.assertTrue(self.ordered(data_set) == self.ordered(imported_data)) self.assertTrue(self.ordered(data_set) == self.ordered(imported_data))
@ -38,28 +46,30 @@ class TestJsonImporter(unittest.TestCase):
def test_read_json_file_not_found(self): def test_read_json_file_not_found(self):
path = os.getcwd() path = os.getcwd()
path = path + '/data.json'
j1 = ji.JsonImporter(path, '', '', '', '', '') j1 = ji.JsonImporter(path, '', '', '', '', '')
self.assertIsNone(j1.read_json_file()) self.assertRaises(FileNotFoundError, j1.read_json_file)
def test_normalize_trajectories(self): def test_normalize_trajectories(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
#print(raw_data)
j1.normalize_trajectories(raw_data, 0, j1.samples_label) j1.normalize_trajectories(raw_data, 0, j1.samples_label)
self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label])) self.assertEqual(len(j1.df_samples_list), len(raw_data[0][j1.samples_label]))
self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter) self.assertEqual(list(j1.df_samples_list[0].columns.values)[1:], j1.sorter)
def test_normalize_trajectories_wrong_indx(self): def test_normalize_trajectories_wrong_indx(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 1, j1.samples_label) self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1.samples_label)
def test_normalize_trajectories_wrong_key(self): def test_normalize_trajectories_wrong_key(self):
j1 = ji.JsonImporter('../data', 'sample', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
self.assertRaises(KeyError, j1.normalize_trajectories, raw_data, 0, j1.samples_label) self.assertRaises(KeyError, j1.normalize_trajectories, raw_data, 0, j1.samples_label)
def test_compute_row_delta_single_samples_frame(self): def test_compute_row_delta_single_samples_frame(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
j1.normalize_trajectories(raw_data, 0, j1.samples_label) j1.normalize_trajectories(raw_data, 0, j1.samples_label)
sample_frame = j1.df_samples_list[0] sample_frame = j1.df_samples_list[0]
@ -72,14 +82,15 @@ class TestJsonImporter(unittest.TestCase):
self.assertEqual(sample_frame.shape[0] - 1, new_sample_frame.shape[0]) self.assertEqual(sample_frame.shape[0] - 1, new_sample_frame.shape[0])
def test_compute_row_delta_in_all_frames(self): def test_compute_row_delta_in_all_frames(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
j1.import_trajectories(raw_data) j1.import_trajectories(raw_data)
j1.compute_row_delta_in_all_samples_frames(j1.time_key) j1.compute_row_delta_in_all_samples_frames(j1.time_key)
self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values)) self.assertEqual(list(j1.df_samples_list[0].columns.values), list(j1.concatenated_samples.columns.values))
self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1.time_key)
def test_clear_data_frame_list(self): def test_clear_data_frame_list(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
j1.import_trajectories(raw_data) j1.import_trajectories(raw_data)
j1.compute_row_delta_in_all_samples_frames(j1.time_key) j1.compute_row_delta_in_all_samples_frames(j1.time_key)
@ -92,6 +103,7 @@ class TestJsonImporter(unittest.TestCase):
with open('data.json', 'w') as f: with open('data.json', 'w') as f:
json.dump(data_set, f) json.dump(data_set, f)
path = os.getcwd() path = os.getcwd()
path = path + '/data.json'
j1 = ji.JsonImporter(path, '', '', '', '', '') j1 = ji.JsonImporter(path, '', '', '', '', '')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
frame = pd.DataFrame(raw_data) frame = pd.DataFrame(raw_data)
@ -104,15 +116,20 @@ class TestJsonImporter(unittest.TestCase):
os.remove('data.json') os.remove('data.json')
def test_import_variables(self): def test_import_variables(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
sorter = ['X', 'Y', 'Z'] sorter = ['X', 'Y', 'Z']
raw_data = [{'variables':{"Name": ['Z', 'Y', 'X'], "value": [3, 3, 3]}}] raw_data = [{'variables':{"Name": ['Z', 'Y', 'X'], "value": [3, 3, 3]}}]
j1.import_variables(raw_data, sorter) j1.import_variables(raw_data, sorter)
self.assertEqual(list(j1.variables[j1.variables_key]), sorter) self.assertEqual(list(j1.variables[j1.variables_key]), sorter)
def test_import_data(self): def test_import_data(self):
j1 = ji.JsonImporter('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = ji.JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data() lp = LineProfiler()
lp_wrapper = lp(j1.import_data)
lp_wrapper()
lp.print_stats()
#j1.import_data()
self.assertEqual(list(j1.variables[j1.variables_key]), self.assertEqual(list(j1.variables[j1.variables_key]),
list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1.variables_key]) + 1])) list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1.variables_key]) + 1]))
print(j1.variables) print(j1.variables)

@ -1,16 +1,23 @@
import unittest import unittest
import glob
import os
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import itertools
from line_profiler import LineProfiler from line_profiler import LineProfiler
import sample_path as sp import sample_path as sp
import network_graph as ng import network_graph as ng
import json_importer as ji
class TestNetworkGraph(unittest.TestCase): class TestNetworkGraph(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.s1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') cls.read_files = glob.glob(os.path.join('../data', "*.json"))
cls.importer = ji.JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
@ -18,26 +25,26 @@ class TestNetworkGraph(unittest.TestCase):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
self.assertEqual(self.s1.structure, g1.graph_struct) self.assertEqual(self.s1.structure, g1.graph_struct)
self.assertIsInstance(g1.graph, nx.DiGraph) self.assertIsInstance(g1.graph, nx.DiGraph)
#TODO MANCANO TUTTI I TEST DI INIZIALIZZAZIONE DEI DATI PRIVATI della classe aggiungere le property necessarie self.assertTrue(np.array_equal(g1._nodes_indexes, self.s1.structure.nodes_indexes))
self.assertListEqual(g1._nodes_labels, self.s1.structure.nodes_labels)
self.assertTrue(np.array_equal(g1._nodes_values, self.s1.structure.nodes_values))
self.assertIsNone(g1._fancy_indexing)
self.assertIsNone(g1.time_scalar_indexing_strucure)
self.assertIsNone(g1.transition_scalar_indexing_structure)
self.assertIsNone(g1.transition_filtering)
self.assertIsNone(g1.p_combs)
def test_add_nodes(self): def test_add_nodes(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels()) g1.add_nodes(self.s1.structure.nodes_labels)
for n1, n2 in zip(g1.get_nodes(), self.s1.structure.list_of_nodes_labels()): for n1, n2 in zip(g1.nodes, self.s1.structure.nodes_labels):
self.assertEqual(n1, n2) self.assertEqual(n1, n2)
def test_add_edges(self): def test_add_edges(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_edges(self.s1.structure.list_of_edges()) g1.add_edges(self.s1.structure.edges)
for e in self.s1.structure.list_of_edges(): for e in self.s1.structure.edges:
self.assertIn(tuple(e), g1.get_edges()) self.assertIn(tuple(e), g1.edges)
"""def test_get_ordered_by_indx_set_of_parents(self):
g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels())
g1.add_edges(self.s1.structure.list_of_edges())
sorted_par_list_aggregated_info = g1.get_ordered_by_indx_set_of_parents(g1.get_nodes()[2])
self.test_aggregated_par_list_data(g1, g1.get_nodes()[2], sorted_par_list_aggregated_info)"""
def aux_aggregated_par_list_data(self, graph, node_id, sorted_par_list_aggregated_info): def aux_aggregated_par_list_data(self, graph, node_id, sorted_par_list_aggregated_info):
for indx, element in enumerate(sorted_par_list_aggregated_info): for indx, element in enumerate(sorted_par_list_aggregated_info):
@ -55,12 +62,12 @@ class TestNetworkGraph(unittest.TestCase):
def test_get_ord_set_of_par_of_all_nodes(self): def test_get_ord_set_of_par_of_all_nodes(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels()) g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.list_of_edges()) g1.add_edges(self.s1.structure.edges)
sorted_list_of_par_lists = g1.get_ord_set_of_par_of_all_nodes() sorted_list_of_par_lists = g1.get_ord_set_of_par_of_all_nodes()
for node, par_list in zip(g1.get_nodes_sorted_by_indx(), sorted_list_of_par_lists): for node, par_list in zip(g1.nodes, sorted_list_of_par_lists):
self.aux_aggregated_par_list_data(g1, node, par_list) self.aux_aggregated_par_list_data(g1, node, par_list)
"""
def test_get_ordered_by_indx_parents_values_for_all_nodes(self): def test_get_ordered_by_indx_parents_values_for_all_nodes(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels()) g1.add_nodes(self.s1.structure.list_of_nodes_labels())
@ -89,67 +96,67 @@ class TestNetworkGraph(unittest.TestCase):
self.assertEqual(par_indxs, aggr[1]) self.assertEqual(par_indxs, aggr[1])
def test_build_fancy_indexing_structure_offset(self): def test_build_fancy_indexing_structure_offset(self):
pass #TODO il codice di netgraph deve gestire questo caso pass #TODO il codice di netgraph deve gestire questo caso"""
def aux_build_time_scalar_indexing_structure_for_a_node(self, graph, node_indx, parents_indxs): def aux_build_time_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels, parents_vals):
time_scalar_indexing = graph.build_time_scalar_indexing_structure_for_a_node(node_indx, parents_indxs) time_scalar_indexing = graph.build_time_scalar_indexing_structure_for_a_node(node_id, parents_vals)
self.assertEqual(len(time_scalar_indexing), len(parents_indxs) + 1) self.assertEqual(len(time_scalar_indexing), len(parents_indxs) + 1)
merged_list = parents_indxs[:] merged_list = parents_labels[:]
merged_list.insert(0, node_indx) merged_list.insert(0, node_id)
#print(merged_list)
vals_list = [] vals_list = []
for node in merged_list: for node in merged_list:
vals_list.append(graph.get_states_number_by_indx(node)) vals_list.append(graph.get_states_number(node))
t_vec = np.array(vals_list) t_vec = np.array(vals_list)
t_vec = t_vec.cumprod() t_vec = t_vec.cumprod()
#print(t_vec)
self.assertTrue(np.array_equal(time_scalar_indexing, t_vec)) self.assertTrue(np.array_equal(time_scalar_indexing, t_vec))
def aux_build_transition_scalar_indexing_structure_for_a_node(self, graph, node_indx, parents_indxs): def aux_build_transition_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels,
transition_scalar_indexing = graph.build_transition_scalar_indexing_structure_for_a_node(node_indx, parents_values):
parents_indxs) transition_scalar_indexing = graph.build_transition_scalar_indexing_structure_for_a_node(node_id,
print(transition_scalar_indexing) parents_values)
self.assertEqual(len(transition_scalar_indexing), len(parents_indxs) + 2) self.assertEqual(len(transition_scalar_indexing), len(parents_indxs) + 2)
merged_list = parents_indxs[:] merged_list = parents_labels[:]
merged_list.insert(0, node_indx) merged_list.insert(0, node_id)
merged_list.insert(0, node_indx) merged_list.insert(0, node_id)
vals_list = [] vals_list = []
for node in merged_list: for node_id in merged_list:
vals_list.append(graph.get_states_number_by_indx(node)) vals_list.append(graph.get_states_number(node_id))
m_vec = np.array([vals_list]) m_vec = np.array([vals_list])
m_vec = m_vec.cumprod() m_vec = m_vec.cumprod()
self.assertTrue(np.array_equal(transition_scalar_indexing, m_vec)) self.assertTrue(np.array_equal(transition_scalar_indexing, m_vec))
def test_build_transition_scalar_indexing_structure(self): def test_build_transition_scalar_indexing_structure(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels()) g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.list_of_edges()) g1.add_edges(self.s1.structure.edges)
g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes()
p_labels = [i[0] for i in g1.aggregated_info_about_nodes_parents]
p_vals = g1.get_ordered_by_indx_parents_values_for_all_nodes()
fancy_indx = g1.build_fancy_indexing_structure(0) fancy_indx = g1.build_fancy_indexing_structure(0)
print(fancy_indx) for node_id, p_i ,p_l, p_v in zip(g1.graph_struct.nodes_labels, fancy_indx, p_labels, p_vals):
for node_id, p_indxs in zip(g1.graph_struct.list_of_nodes_indexes(), fancy_indx): self.aux_build_transition_scalar_indexing_structure_for_a_node(g1, node_id, p_i ,p_l, p_v)
self.aux_build_transition_scalar_indexing_structure_for_a_node(g1, node_id, p_indxs)
def test_build_time_scalar_indexing_structure(self): def test_build_time_scalar_indexing_structure(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels()) g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.list_of_edges()) g1.add_edges(self.s1.structure.edges)
g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes()
fancy_indx = g1.build_fancy_indexing_structure(0) fancy_indx = g1.build_fancy_indexing_structure(0)
p_labels = [i[0] for i in g1.aggregated_info_about_nodes_parents]
p_vals = g1.get_ordered_by_indx_parents_values_for_all_nodes()
#print(fancy_indx) #print(fancy_indx)
for node_id, p_indxs in zip(g1.graph_struct.list_of_nodes_indexes(), fancy_indx): for node_id, p_indxs, p_labels, p_v in zip(g1.graph_struct.nodes_labels, fancy_indx, p_labels, p_vals):
self.aux_build_time_scalar_indexing_structure_for_a_node(g1, node_id, p_indxs) self.aux_build_time_scalar_indexing_structure_for_a_node(g1, node_id, p_indxs, p_labels, p_v)
#TODO Sei arrivato QUI
def test_build_time_columns_filtering_structure(self): def test_build_time_columns_filtering_structure(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels()) g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.list_of_edges()) g1.add_edges(self.s1.structure.edges)
g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes()
g1._fancy_indexing = g1.build_fancy_indexing_structure(0) g1._fancy_indexing = g1.build_fancy_indexing_structure(0)
g1.build_time_columns_filtering_structure() g1.build_time_columns_filtering_structure()
print(g1.time_filtering)
t_filter = [] t_filter = []
for node_id, p_indxs in zip(g1.get_nodes_sorted_by_indx(), g1._fancy_indexing): for node_id, p_indxs in zip(g1.nodes, g1._fancy_indexing):
single_filter = [] single_filter = []
single_filter.append(g1.get_node_indx(node_id)) single_filter.append(g1.get_node_indx(node_id))
single_filter.extend(p_indxs) single_filter.extend(p_indxs)
@ -160,47 +167,72 @@ class TestNetworkGraph(unittest.TestCase):
def test_build_transition_columns_filtering_structure(self): def test_build_transition_columns_filtering_structure(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.list_of_nodes_labels()) g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.list_of_edges()) g1.add_edges(self.s1.structure.edges)
g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes() g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes()
g1._fancy_indexing = g1.build_fancy_indexing_structure(0) g1._fancy_indexing = g1.build_fancy_indexing_structure(0)
g1.build_transition_columns_filtering_structure() g1.build_transition_columns_filtering_structure()
print(g1.transition_filtering)
m_filter = [] m_filter = []
for node_id, p_indxs in zip(g1.get_nodes_sorted_by_indx(), g1._fancy_indexing): for node_id, p_indxs in zip(g1.nodes, g1._fancy_indexing):
single_filter = [] single_filter = []
single_filter.append(g1.get_node_indx(node_id) + g1.graph_struct.total_variables_number) single_filter.append(g1.get_node_indx(node_id) + g1.graph_struct.total_variables_number)
single_filter.append(g1.get_node_indx(node_id)) single_filter.append(g1.get_node_indx(node_id))
single_filter.extend(p_indxs) single_filter.extend(p_indxs)
m_filter.append(np.array(single_filter)) m_filter.append(np.array(single_filter))
print(m_filter)
for a1, a2 in zip(g1.transition_filtering, m_filter): for a1, a2 in zip(g1.transition_filtering, m_filter):
self.assertTrue(np.array_equal(a1, a2)) self.assertTrue(np.array_equal(a1, a2))
def test_init_graph(self): def test_build_p_combs_structure(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
#g1.build_scalar_indexing_structures() g1.add_nodes(self.s1.structure.nodes_labels)
lp = LineProfiler() g1.add_edges(self.s1.structure.edges)
#lp.add_function(g1.get_ordered_by_indx_set_of_parents) g1.aggregated_info_about_nodes_parents = g1.get_ord_set_of_par_of_all_nodes()
#lp.add_function(g1.get_states_number) p_vals = g1.get_ordered_by_indx_parents_values_for_all_nodes()
lp_wrapper = lp(g1.init_graph) p_combs = g1.build_p_combs_structure()
print(g1.time_scalar_indexing_strucure)
print(g1.transition_scalar_indexing_structure) for matrix, p_v in zip(p_combs, p_vals):
"""[array([3]), array([3, 9]), array([ 3, 9, 27])] p_possible_vals = []
[array([3, 9]), array([ 3, 9, 27]), array([ 3, 9, 27, 81])]""" for val in p_v:
lp_wrapper() vals = [v for v in range(val)]
lp.print_stats() p_possible_vals.extend(vals)
comb_struct = set(itertools.product(p_possible_vals,repeat=len(p_v)))
"""def test_remove_node(self): #print(comb_struct)
for comb in comb_struct:
self.assertIn(np.array(comb), matrix)
def test_fast_init(self):
g1 = ng.NetworkGraph(self.s1.structure) g1 = ng.NetworkGraph(self.s1.structure)
g2 = ng.NetworkGraph(self.s1.structure)
g1.init_graph() g1.init_graph()
g1.remove_node('Y') for indx, node in enumerate(g1.nodes):
print(g1.get_nodes()) g2.fast_init(node)
print(g1.get_edges())""" self.assertListEqual(g2._fancy_indexing, g1._fancy_indexing[indx])
self.assertTrue(np.array_equal(g2.time_scalar_indexing_strucure, g1.time_scalar_indexing_strucure[indx]))
self.assertTrue(np.array_equal(g2.transition_scalar_indexing_structure, g1.transition_scalar_indexing_structure[indx]))
self.assertTrue(np.array_equal(g2.time_filtering, g1.time_filtering[indx]))
self.assertTrue(np.array_equal(g2.transition_filtering, g1.transition_filtering[indx]))
self.assertTrue(np.array_equal(g2.p_combs, g1.p_combs[indx]))
def test_get_parents_by_id(self):
g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in g1.nodes:
self.assertListEqual(g1.get_parents_by_id(node), list(g1.graph.predecessors(node)))
def test_get_states_number(self):
g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node, val in zip(g1.nodes, g1.nodes_values):
self.assertEqual(val, g1.get_states_number(node))
def test_get_node_indx(self):
g1 = ng.NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node, indx in zip(g1.nodes, g1.nodes_indexes):
self.assertEqual(indx, g1.get_node_indx(node))
#TODO mancano i test sulle property e sui getters_vari
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -1,24 +1,69 @@
import unittest import unittest
import numpy as np import numpy as np
import glob
import os
from line_profiler import LineProfiler from line_profiler import LineProfiler
import network_graph as ng import network_graph as ng
import sample_path as sp import sample_path as sp
import sets_of_cims_container as scc import set_of_cims as sofc
import parameters_estimator as pe import parameters_estimator as pe
import json_importer as ji import json_importer as ji
#TODO bisogna trovare un modo per testare i metodi che stimano i tempi e le transizioni per i singoli nodi
class TestParametersEstimatior(unittest.TestCase): class TestParametersEstimatior(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
cls.s1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') cls.read_files = glob.glob(os.path.join('../data', "*.json"))
cls.importer = ji.JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
cls.g1 = ng.NetworkGraph(cls.s1.structure) cls.g1 = ng.NetworkGraph(cls.s1.structure)
cls.g1.init_graph() cls.g1.init_graph()
def test_fast_init(self):
for node in self.g1.nodes:
g = ng.NetworkGraph(self.s1.structure)
g.fast_init(node)
p1 = pe.ParametersEstimator(self.s1, g)
self.assertEqual(p1.sample_path, self.s1)
self.assertEqual(p1.net_graph, g)
self.assertIsNone(p1.single_set_of_cims)
p1.fast_init(node)
self.assertIsInstance(p1.single_set_of_cims, sofc.SetOfCims)
def test_compute_parameters_for_node(self):
for indx, node in enumerate(self.g1.nodes):
print(node)
g = ng.NetworkGraph(self.s1.structure)
g.fast_init(node)
p1 = pe.ParametersEstimator(self.s1, g)
p1.fast_init(node)
sofc1 = p1.compute_parameters_for_node(node)
sampled_cims = self.aux_import_sampled_cims('dyn.cims')
sc = list(sampled_cims.values())
#print(sc[indx])
self.equality_of_cims_of_node(sc[indx], sofc1.actual_cims)
def equality_of_cims_of_node(self, sampled_cims, estimated_cims):
#print(sampled_cims)
#print(estimated_cims)
self.assertEqual(len(sampled_cims), len(estimated_cims))
for c1, c2 in zip(sampled_cims, estimated_cims):
self.cim_equality_test(c1, c2.cim)
def cim_equality_test(self, cim1, cim2):
for r1, r2 in zip(cim1, cim2):
self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
def aux_import_sampled_cims(self, cims_label):
i1 = ji.JsonImporter(self.read_files[0], '', '', '', '', '')
raw_data = i1.read_json_file()
return i1.import_sampled_cims(raw_data, 0, cims_label)
"""
def test_init(self): def test_init(self):
self.aux_test_init(self.s1, self.g1) self.aux_test_init(self.s1, self.g1)
@ -38,45 +83,6 @@ class TestParametersEstimatior(unittest.TestCase):
def test_compute_parameters(self): def test_compute_parameters(self):
self.aux_test_compute_parameters(self.s1, self.g1) self.aux_test_compute_parameters(self.s1, self.g1)
"""
def aux_test_compute_parameters(self, sample_p, graph):
pe1 = pe.ParametersEstimator(sample_p, graph)
pe1.init_sets_cims_container()
pe1.compute_parameters()
samples_cims = self.aux_import_sampled_cims('dyn.cims')
for indx, sc in enumerate(samples_cims.values()):
self.equality_of_cims_of_node(sc, pe1.sets_of_cims_struct.get_set_of_cims(indx).get_cims())
def equality_of_cims_of_node(self, sampled_cims, estimated_cims):
self.assertEqual(len(sampled_cims), len(estimated_cims))
for c1, c2 in zip(sampled_cims, estimated_cims):
self.cim_equality_test(c1, c2.cim)
def cim_equality_test(self, cim1, cim2):
for r1, r2 in zip(cim1, cim2):
self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
def test_compute_parameters_for_node(self):#TODO Questo non è un test
pe1 = pe.ParametersEstimator(self.s1, self.g1)
#pe1.init_sets_cims_container()
lp = LineProfiler()
lp_wrapper = lp(pe1.init_sets_cims_container)
#lp.add_function(pe1.sets_of_cims_struct.init_cims_structure)
lp_wrapper()
lp.print_stats()
#pe1.init_sets_cims_container()
#pe1.compute_parameters_for_node('Y')
print(pe1.sets_of_cims_struct.get_set_of_cims(0).actual_cims)
def aux_import_sampled_cims(self, cims_label):
i1 = ji.JsonImporter('../data', '', '', '', '', '')
raw_data = i1.read_json_file()
return i1.import_sampled_cims(raw_data, 0, cims_label)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -1,4 +1,7 @@
import unittest import unittest
import glob
import os
import json_importer as ji
import sample_path as sp import sample_path as sp
import trajectory as tr import trajectory as tr
import structure as st import structure as st
@ -6,8 +9,13 @@ import structure as st
class TestSamplePath(unittest.TestCase): class TestSamplePath(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('../data', "*.json"))
cls.importer = ji.JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
def test_init(self): def test_init(self):
s1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') s1 = sp.SamplePath(self.importer)
s1.build_trajectories() s1.build_trajectories()
self.assertIsNotNone(s1.trajectories) self.assertIsNotNone(s1.trajectories)
self.assertIsInstance(s1.trajectories, tr.Trajectory) self.assertIsInstance(s1.trajectories, tr.Trajectory)

@ -11,64 +11,84 @@ class TestSetOfCims(unittest.TestCase):
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
cls.node_id = 'X' cls.node_id = 'X'
cls.possible_cardinalities = [2, 3] cls.possible_cardinalities = [2, 3]
#cls.possible_states = [[0,1], [0, 1, 2]] cls.possible_states = [[0,1], [0, 1, 2]]
cls.node_states_number = range(2, 4) cls.node_states_number = range(2, 4)
def test_init(self): def test_init(self):
# empty parent set # empty parent set
for sn in self.node_states_number: for sn in self.node_states_number:
self.aux_test_init(self.node_id, [], sn) p_combs = self.build_p_comb_structure_for_a_node([])
self.aux_test_init(self.node_id, [], sn, p_combs)
# one parent # one parent
for sn in self.node_states_number: for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=1): for p in itertools.product(self.possible_cardinalities, repeat=1):
self.aux_test_init(self.node_id, list(p), sn) p_combs = self.build_p_comb_structure_for_a_node(list(p))
self.aux_test_init(self.node_id, list(p), sn, p_combs)
#two parents #two parents
for sn in self.node_states_number: for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=2): for p in itertools.product(self.possible_cardinalities, repeat=2):
self.aux_test_init(self.node_id, list(p), sn) p_combs = self.build_p_comb_structure_for_a_node(list(p))
self.aux_test_init(self.node_id, list(p), sn, p_combs)
def test_indexes_converter(self): def test_build_cims(self):
# empty parent set # empty parent set
for sn in self.node_states_number: for sn in self.node_states_number:
self.aux_test_indexes_converter(self.node_id, [], sn) p_combs = self.build_p_comb_structure_for_a_node([])
self.aux_test_build_cims(self.node_id, [], sn, p_combs)
# one parent # one parent
for sn in self.node_states_number: for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=1): for p in itertools.product(self.possible_cardinalities, repeat=1):
self.aux_test_init(self.node_id, list(p), sn) p_combs = self.build_p_comb_structure_for_a_node(list(p))
# two parents self.aux_test_build_cims(self.node_id, list(p), sn, p_combs)
#two parents
for sn in self.node_states_number: for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=2): for p in itertools.product(self.possible_cardinalities, repeat=2):
self.aux_test_init(self.node_id, list(p), sn) p_combs = self.build_p_comb_structure_for_a_node(list(p))
self.aux_test_build_cims(self.node_id, list(p), sn, p_combs)
def aux_test_indexes_converter(self, node_id, parents_states_number, node_states_number):
sofcims = soci.SetOfCims(node_id, parents_states_number, node_states_number)
if not parents_states_number:
self.assertEqual(sofcims.indexes_converter([]), 0)
else:
parents_possible_values = []
for cardi in parents_states_number:
parents_possible_values.extend(range(0, cardi))
for p in itertools.permutations(parents_possible_values, len(parents_states_number)):
self.assertEqual(sofcims.indexes_converter(list(p)), np.ravel_multi_index(list(p), parents_states_number))
def test_build_cims(self): def test_filter_cims_with_mask(self):
p_combs = self.build_p_comb_structure_for_a_node(self.possible_cardinalities)
sofc1 = soci.SetOfCims('X', self.possible_cardinalities, 3, p_combs)
state_res_times_list = [] state_res_times_list = []
transition_matrices_list = [] transition_matrices_list = []
so1 = soci.SetOfCims('X',[3], 3) for i in range(len(p_combs)):
for i in range(0, 3):
state_res_times = np.random.rand(1, 3)[0] state_res_times = np.random.rand(1, 3)[0]
state_res_times = state_res_times * 1000 state_res_times = state_res_times * 1000
state_transition_matrix = np.random.randint(1, 10000, (3, 3)) state_transition_matrix = np.random.randint(1, 10000, (3, 3))
state_res_times_list.append(state_res_times) state_res_times_list.append(state_res_times)
transition_matrices_list.append(state_transition_matrix) transition_matrices_list.append(state_transition_matrix)
sofc1.build_cims(state_res_times_list, transition_matrices_list)
for length_of_mask in range(3):
for mask in list(itertools.permutations([True, False],r=length_of_mask)):
m = np.array(mask)
for parent_value in range(self.possible_cardinalities[0]):
cims = sofc1.filter_cims_with_mask(m, [parent_value])
if length_of_mask == 0 or length_of_mask == 1:
self.assertTrue(np.array_equal(sofc1.actual_cims, cims))
else:
indxs = self.another_filtering_method(p_combs, m, [parent_value])
self.assertTrue(np.array_equal(cims, sofc1.actual_cims[indxs]))
def aux_test_build_cims(self, node_id, p_values, node_states, p_combs):
state_res_times_list = []
transition_matrices_list = []
so1 = soci.SetOfCims(node_id, p_values, node_states, p_combs)
for i in range(len(p_combs)):
state_res_times = np.random.rand(1, node_states)[0]
state_res_times = state_res_times * 1000
state_transition_matrix = np.random.randint(1, 10000, (node_states, node_states))
state_res_times_list.append(state_res_times)
transition_matrices_list.append(state_transition_matrix)
so1.build_cims(state_res_times_list, transition_matrices_list) so1.build_cims(state_res_times_list, transition_matrices_list)
self.assertEqual(len(state_res_times_list), so1.get_cims_number()) self.assertEqual(len(state_res_times_list), so1.get_cims_number())
self.assertIsInstance(so1.actual_cims, np.ndarray)
self.assertIsNone(so1.transition_matrices) self.assertIsNone(so1.transition_matrices)
self.assertIsNone(so1.state_residence_times) self.assertIsNone(so1.state_residence_times)
def aux_test_init(self, node_id, parents_states_number, node_states_number): def aux_test_init(self, node_id, parents_states_number, node_states_number, p_combs):
sofcims = soci.SetOfCims(node_id, parents_states_number, node_states_number) sofcims = soci.SetOfCims(node_id, parents_states_number, node_states_number, p_combs)
self.assertEqual(sofcims.node_id, node_id) self.assertEqual(sofcims.node_id, node_id)
self.assertTrue(np.array_equal(sofcims.p_combs, p_combs))
self.assertTrue(np.array_equal(sofcims.parents_states_number, parents_states_number)) self.assertTrue(np.array_equal(sofcims.parents_states_number, parents_states_number))
self.assertEqual(sofcims.node_states_number, node_states_number) self.assertEqual(sofcims.node_states_number, node_states_number)
self.assertFalse(sofcims.actual_cims) self.assertFalse(sofcims.actual_cims)
@ -77,7 +97,46 @@ class TestSetOfCims(unittest.TestCase):
self.assertEqual(sofcims.transition_matrices.shape[0], np.prod(np.array(parents_states_number))) self.assertEqual(sofcims.transition_matrices.shape[0], np.prod(np.array(parents_states_number)))
self.assertEqual(len(sofcims.transition_matrices[0][0]), node_states_number) self.assertEqual(len(sofcims.transition_matrices[0][0]), node_states_number)
def aux_test_indexes_converter(self, node_id, parents_states_number, node_states_number):
sofcims = soci.SetOfCims(node_id, parents_states_number, node_states_number)
if not parents_states_number:
self.assertEqual(sofcims.indexes_converter([]), 0)
else:
parents_possible_values = []
for cardi in parents_states_number:
parents_possible_values.extend(range(0, cardi))
for p in itertools.permutations(parents_possible_values, len(parents_states_number)):
self.assertEqual(sofcims.indexes_converter(list(p)), np.ravel_multi_index(list(p), parents_states_number))
def build_p_comb_structure_for_a_node(self, parents_values):
"""
Builds the combinatory structure that contains the combinations of all the values contained in parents_values.
Parameters:
parents_values: the cardinalities of the nodes
Returns:
a numpy matrix containing a grid of the combinations
"""
tmp = []
for val in parents_values:
tmp.append([x for x in range(val)])
if len(parents_values) > 0:
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
if len(parents_values) > 1:
tmp_comb = parents_comb[:, 1].copy()
parents_comb[:, 1] = parents_comb[:, 0].copy()
parents_comb[:, 0] = tmp_comb
else:
parents_comb = np.array([[]], dtype=np.int)
return parents_comb
def another_filtering_method(self,p_combs, mask, parent_value):
masked_combs = p_combs[:, mask]
indxs = []
for indx, val in enumerate(masked_combs):
if val == parent_value:
indxs.append(indx)
return np.array(indxs)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -1,80 +1,73 @@
import sys import sys
sys.path.append("/Users/Zalum/Desktop/Tesi/CTBN_Project/main_package/classes/") sys.path.append("/Users/Zalum/Desktop/Tesi/CTBN_Project/main_package/classes/")
import unittest import unittest
import pandas as pd import numpy as np
import structure as st import structure as st
class TestStructure(unittest.TestCase): class TestStructure(unittest.TestCase):
def setUp(self): @classmethod
self.structure_frame = pd.DataFrame([{"From":"X","To":"Z"}, {"From":"X","To":"Y"},{"From":"Y","To":"X"}, def setUpClass(cls):
{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}, {"From":"Z","To":"X"} ]) cls.labels = ['X','Y','Z']
self.variables_frame = pd.DataFrame([{"Name":"X","Value":3},{"Name":"Y","Value":3},{"Name":"Z","Value":3}]) cls.indxs = np.array([0,1,2])
cls.vals = np.array([3,3,3])
cls.edges = [('X','Z'),('Y','Z'), ('Z','Y')]
cls.vars_numb = len(cls.labels)
def test_init(self): def test_init(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
self.assertTrue(self.structure_frame.equals(s1.structure_frame)) self.assertListEqual(self.labels,s1.nodes_labels)
self.assertTrue(self.variables_frame.equals(s1.variables_frame)) self.assertIsInstance(s1.nodes_indexes, np.ndarray)
self.assertEqual(self.variables_frame.columns.values[0], s1.name_label) self.assertTrue(np.array_equal(self.indxs, s1.nodes_indexes))
self.assertEqual(self.variables_frame.columns.values[1], s1.value_label) self.assertIsInstance(s1.nodes_values, np.ndarray)
#print(len(self.variables_frame.index)) self.assertTrue(np.array_equal(self.vals, s1.nodes_values))
self.assertEqual(len(self.variables_frame.index), s1.total_variables_number) self.assertListEqual(self.edges, s1.edges)
self.assertEqual(self.vars_numb, s1.total_variables_number)
def test_list_of_edges(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
records = self.structure_frame.to_records(index=False)
result = list(records)
for e1, e2 in zip(result, s1.list_of_edges()):
self.assertEqual(e1, e2)
def test_list_of_nodes_labels(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
self.assertEqual(list(self.variables_frame['Name']), s1.list_of_nodes_labels())
def test_get_node_id(self): def test_get_node_id(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) s1 = st.Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
for indx, var in enumerate(list(self.variables_frame['Name'])): for indx, var in enumerate(self.labels):
self.assertEqual(var, s1.get_node_id(indx)) self.assertEqual(var, s1.get_node_id(indx))
def test_get_node_indx(self): def test_get_node_indx(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index) l2 = self.labels[:]
#print(filtered_frame) l2.remove('Y')
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index)) i2 = self.indxs.copy()
for indx, var in zip(filtered_frame.index, filtered_frame['Name']): np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X','Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for indx, var in zip(i2, l2):
self.assertEqual(indx, s1.get_node_indx(var)) self.assertEqual(indx, s1.get_node_indx(var))
def test_list_of_node_indxs(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index)
# print(filtered_frame)
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index))
for indx1, indx2 in zip(filtered_frame.index, s1.list_of_nodes_indexes()):
self.assertEqual(indx1, indx2)
def test_get_positional_node_indx(self): def test_get_positional_node_indx(self):
filtered_frame = self.variables_frame.drop(self.variables_frame[self.variables_frame['Name'] == 'Y'].index) l2 = self.labels[:]
# print(filtered_frame) l2.remove('Y')
s1 = st.Structure(self.structure_frame, filtered_frame, len(self.variables_frame.index)) i2 = self.indxs.copy()
for indx, var in enumerate(s1.list_of_nodes_labels()): np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X', 'Z')]
n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for indx, var in enumerate(s1.nodes_labels):
self.assertEqual(indx, s1.get_positional_node_indx(var)) self.assertEqual(indx, s1.get_positional_node_indx(var))
def test_get_states_number(self): def test_get_states_number(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) l2 = self.labels[:]
for indx, row in self.variables_frame.iterrows(): l2.remove('Y')
self.assertEqual(row[1], s1.get_states_number(row[0])) i2 = self.indxs.copy()
np.delete(i2, 1)
def test_get_states_numeber_by_indx(self): v2 = self.vals.copy()
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index)) np.delete(v2, 1)
for indx, row in self.variables_frame.iterrows(): e2 = [('X', 'Z')]
self.assertEqual(row[1], s1.get_states_number_by_indx(indx)) n2 = self.vars_numb - 1
s1 = st.Structure(l2, i2, v2, e2, n2)
for val, node in zip(v2, l2):
self.assertEqual(val, s1.get_states_number(node))
def test_remove_node(self):
s1 = st.Structure(self.structure_frame, self.variables_frame, len(self.variables_frame.index))
s1.remove_node('Y')
print(s1.variables_frame)
print(s1.structure_frame)
print(s1.get_node_indx('Z'))
print(s1.get_positional_node_indx('Z'))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -1,28 +1,71 @@
import unittest import unittest
import numpy as np
import networkx as nx
import glob
import os
import math
from line_profiler import LineProfiler from line_profiler import LineProfiler
from multiprocessing import Pool import psutil
import json_importer as ji
import sample_path as sp import sample_path as sp
import structure_estimator as se import structure_estimator as se
import cache as ch
class TestStructureEstimator(unittest.TestCase): class TestStructureEstimator(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
cls.s1 = sp.SamplePath('../data', 'samples', 'dyn.str', 'variables', 'Time', 'Name') cls.read_files = glob.glob(os.path.join('../data', "*.json"))
cls.importer = ji.JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
def test_init(self): def test_init(self):
se1 = se.StructureEstimator(self.s1) exp_alfa = 0.1
chi_alfa = 0.1
se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa)
self.assertEqual(self.s1, se1.sample_path) self.assertEqual(self.s1, se1.sample_path)
self.assertEqual(se1.complete_graph_frame.shape[0], self.assertTrue(np.array_equal(se1.nodes, np.array(self.s1.structure.nodes_labels)))
self.s1.total_variables_count *(self.s1.total_variables_count - 1)) self.assertTrue(np.array_equal(se1.nodes_indxs, self.s1.structure.nodes_indexes))
self.assertTrue(np.array_equal(se1.nodes_vals, self.s1.structure.nodes_values))
self.assertEqual(se1.exp_test_sign, exp_alfa)
self.assertEqual(se1.chi_test_alfa, chi_alfa)
self.assertIsInstance(se1.complete_graph, nx.DiGraph)
self.assertIsInstance(se1.cache, ch.Cache)
def test_build_complete_graph(self):
exp_alfa = 0.1
chi_alfa = 0.1
nodes_numb = len(self.s1.structure.nodes_labels)
se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa)
cg = se1.build_complete_graph(self.s1.structure.nodes_labels)
self.assertEqual(len(cg.edges), nodes_numb*(nodes_numb - 1))
for node in self.s1.structure.nodes_labels:
no_self_loops = self.s1.structure.nodes_labels[:]
no_self_loops.remove(node)
for n2 in no_self_loops:
self.assertIn((node, n2), cg.edges)
def test_generate_possible_sub_sets_of_size(self):
exp_alfa = 0.1
chi_alfa = 0.1
nodes_numb = len(self.s1.structure.nodes_labels)
se1 = se.StructureEstimator(self.s1, exp_alfa, chi_alfa)
def test_one_iteration(self): for node in self.s1.structure.nodes_labels:
for b in range(nodes_numb):
sets = se1.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node)
sets2 = se1.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node)
self.assertEqual(len(list(sets)), math.floor(math.factorial(nodes_numb - 1) /
(math.factorial(b)*math.factorial(nodes_numb -1 - b))))
for sset in sets2:
self.assertFalse(node in sset)
def test_time(self):
se1 = se.StructureEstimator(self.s1, 0.1, 0.1) se1 = se.StructureEstimator(self.s1, 0.1, 0.1)
#se1.one_iteration_of_CTPC_algorithm('X')
#self.aux_test_complete_test(se1, 'X', 'Y', ['Z'])
lp = LineProfiler() lp = LineProfiler()
lp.add_function(se1.complete_test) lp.add_function(se1.complete_test)
lp.add_function(se1.one_iteration_of_CTPC_algorithm) lp.add_function(se1.one_iteration_of_CTPC_algorithm)
@ -30,13 +73,29 @@ class TestStructureEstimator(unittest.TestCase):
lp_wrapper = lp(se1.ctpc_algorithm) lp_wrapper = lp(se1.ctpc_algorithm)
lp_wrapper() lp_wrapper()
lp.print_stats() lp.print_stats()
#se1.ctpc_algorithm()
print(se1.complete_graph.edges) print(se1.complete_graph.edges)
<<<<<<< HEAD
print(self.s1.structure.list_of_edges()) print(self.s1.structure.list_of_edges())
=======
print(self.s1.structure.edges)
for ed in self.s1.structure.edges:
self.assertIn(tuple(ed), se1.complete_graph.edges)
tuples_edges = [tuple(rec) for rec in self.s1.structure.edges]
spurious_edges = []
for ed in se1.complete_graph.edges:
if not(ed in tuples_edges):
spurious_edges.append(ed)
>>>>>>> 6ced913c442e75d14d07c379635b2afdead1d9ea
def aux_test_complete_test(self, estimator, test_par, test_child, p_set): print("Spurious Edges:",spurious_edges)
estimator.complete_test(test_par, test_child, p_set) se1.save_results()
def test_memory(self):
se1 = se.StructureEstimator(self.s1, 0.1, 0.1)
se1.ctpc_algorithm()
current_process = psutil.Process(os.getpid())
mem = current_process.memory_info().rss
print("Average Memory Usage in MB:", mem / 10**6)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -8,7 +8,7 @@ class TestTrajectory(unittest.TestCase):
def test_init(self): def test_init(self):
cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)] cols_list = [np.array([1.2,1.3,.14]), np.arange(1,4), np.arange(4,7)]
t1 = tr.Trajectory(cols_list, len(cols_list)) t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[0], t1.times)) self.assertTrue(np.array_equal(cols_list[0], t1.times))
self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1])) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, : 1]), cols_list[1]))
self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2])) self.assertTrue(np.array_equal(np.ravel(t1.complete_trajectory[:, 1: 2]), cols_list[2]))
@ -19,6 +19,21 @@ class TestTrajectory(unittest.TestCase):
cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])] cols_list = [np.arange(1, 4), np.arange(4, 7), np.array([1.2, 1.3, .14])]
self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list)) self.assertRaises(TypeError, tr.Trajectory, cols_list, len(cols_list))
def test_complete_trajectory(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
complete = np.column_stack((cols_list[1], cols_list[2]))
self.assertTrue(np.array_equal(t1.complete_trajectory, complete))
def test_trajectory(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[1], t1.trajectory.ravel()))
def test_times(self):
cols_list = [np.array([1.2, 1.3, .14]), np.arange(1, 4), np.arange(4, 7)]
t1 = tr.Trajectory(cols_list, len(cols_list) - 2)
self.assertTrue(np.array_equal(cols_list[0], t1.times))
if __name__ == '__main__': if __name__ == '__main__':