parent
01f53c5607
commit
788bdc73af
@ -0,0 +1,9 @@ |
|||||||
|
include MANIFEST.in |
||||||
|
include setup.py |
||||||
|
include README.rst |
||||||
|
prune PyCTBN/test_data |
||||||
|
prune PyCTBN/tests |
||||||
|
prune tests |
||||||
|
prune test_data |
||||||
|
prune *tests* |
||||||
|
prune *test* |
@ -0,0 +1,2 @@ |
|||||||
|
import PyCTBN.PyCTBN |
||||||
|
from PyCTBN.PyCTBN import * |
@ -1,8 +0,0 @@ |
|||||||
import PyCTBN.estimators |
|
||||||
from PyCTBN.estimators import * |
|
||||||
import PyCTBN.optimizers |
|
||||||
from PyCTBN.optimizers import * |
|
||||||
import PyCTBN.structure_graph |
|
||||||
from PyCTBN.structure_graph import * |
|
||||||
import PyCTBN.utility |
|
||||||
from PyCTBN.utility import * |
|
@ -1,5 +0,0 @@ |
|||||||
from .fam_score_calculator import FamScoreCalculator |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from .structure_estimator import StructureEstimator |
|
||||||
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator |
|
||||||
from .structure_score_based_estimator import StructureScoreBasedEstimator |
|
@ -1,272 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
|
|
||||||
from math import log |
|
||||||
|
|
||||||
from scipy.special import loggamma |
|
||||||
from random import choice |
|
||||||
|
|
||||||
from ..structure_graph.set_of_cims import SetOfCims |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
|
|
||||||
|
|
||||||
''' |
|
||||||
|
|
||||||
''' |
|
||||||
|
|
||||||
|
|
||||||
class FamScoreCalculator: |
|
||||||
""" |
|
||||||
Has the task of calculating the FamScore of a node by using a Bayesian score function |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self): |
|
||||||
#np.seterr('raise') |
|
||||||
pass |
|
||||||
|
|
||||||
# region theta |
|
||||||
|
|
||||||
def marginal_likelihood_theta(self, |
|
||||||
cims: ConditionalIntensityMatrix, |
|
||||||
alpha_xu: float, |
|
||||||
alpha_xxu: float): |
|
||||||
""" |
|
||||||
Calculate the FamScore value of the node identified by the label node_id |
|
||||||
|
|
||||||
:param cims: np.array with all the node's cims |
|
||||||
:type cims: np.array |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type alpha_xu: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
return np.sum( |
|
||||||
[self.variable_cim_xu_marginal_likelihood_theta(cim, |
|
||||||
alpha_xu, |
|
||||||
alpha_xxu) |
|
||||||
for cim in cims]) |
|
||||||
|
|
||||||
def variable_cim_xu_marginal_likelihood_theta(self, |
|
||||||
cim: ConditionalIntensityMatrix, |
|
||||||
alpha_xu: float, |
|
||||||
alpha_xxu: float): |
|
||||||
""" |
|
||||||
Calculate the value of the marginal likelihood over theta given a cim |
|
||||||
|
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type alpha_xu: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
'get cim length' |
|
||||||
values = len(cim._state_residence_times) |
|
||||||
|
|
||||||
'compute the marginal likelihood for the current cim' |
|
||||||
return np.sum([ |
|
||||||
self.single_cim_xu_marginal_likelihood_theta( |
|
||||||
index, |
|
||||||
cim, |
|
||||||
alpha_xu, |
|
||||||
alpha_xxu) |
|
||||||
for index in range(values)]) |
|
||||||
|
|
||||||
def single_cim_xu_marginal_likelihood_theta(self, |
|
||||||
index: int, |
|
||||||
cim: ConditionalIntensityMatrix, |
|
||||||
alpha_xu: float, |
|
||||||
alpha_xxu: float): |
|
||||||
""" |
|
||||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
|
||||||
and a specif parents's assignment |
|
||||||
|
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
values = list(range(len(cim._state_residence_times))) |
|
||||||
|
|
||||||
'remove the index because of the x != x^ condition in the summation ' |
|
||||||
values.remove(index) |
|
||||||
|
|
||||||
'uncomment for alpha xx not uniform' |
|
||||||
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) |
|
||||||
|
|
||||||
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ |
|
||||||
+ \ |
|
||||||
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( |
|
||||||
cim.state_transition_matrix[index,index_x_first], |
|
||||||
alpha_xxu) |
|
||||||
for index_x_first in values]) |
|
||||||
|
|
||||||
|
|
||||||
def single_internal_cim_xxu_marginal_likelihood_theta(self, |
|
||||||
M_xxu_suff_stats: float, |
|
||||||
alpha_xxu: float=1): |
|
||||||
"""Calculate the second part of the marginal likelihood over theta formula |
|
||||||
|
|
||||||
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u] |
|
||||||
:type M_xxu_suff_stats: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu) |
|
||||||
|
|
||||||
# endregion |
|
||||||
|
|
||||||
# region q |
|
||||||
|
|
||||||
def marginal_likelihood_q(self, |
|
||||||
cims: np.array, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the value of the marginal likelihood over q of the node identified by the label node_id |
|
||||||
|
|
||||||
:param cims: np.array with all the node's cims |
|
||||||
:type cims: np.array |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type tau_xu: float |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
|
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over q |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) |
|
||||||
|
|
||||||
def variable_cim_xu_marginal_likelihood_q(self, |
|
||||||
cim: ConditionalIntensityMatrix, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the value of the marginal likelihood over q given a cim |
|
||||||
|
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type tau_xu: float |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
|
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over q |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
'get cim length' |
|
||||||
values=len(cim._state_residence_times) |
|
||||||
|
|
||||||
'compute the marginal likelihood for the current cim' |
|
||||||
return np.sum([ |
|
||||||
self.single_cim_xu_marginal_likelihood_q( |
|
||||||
cim.state_transition_matrix[index, index], |
|
||||||
cim._state_residence_times[index], |
|
||||||
tau_xu, |
|
||||||
alpha_xu) |
|
||||||
for index in range(values)]) |
|
||||||
|
|
||||||
|
|
||||||
def single_cim_xu_marginal_likelihood_q(self, |
|
||||||
M_xu_suff_stats: float, |
|
||||||
T_xu_suff_stats: float, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
|
||||||
and a specif parents's assignment |
|
||||||
|
|
||||||
:param M_xu_suff_stats: value of the suffucient statistic M[x|u] |
|
||||||
:type M_xxu_suff_stats: float |
|
||||||
:param T_xu_suff_stats: value of the suffucient statistic T[x|u] |
|
||||||
:type T_xu_suff_stats: float |
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type tau_xu: float |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
|
|
||||||
|
|
||||||
:return: the value of the marginal likelihood of the node when assumes a specif value |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
return ( |
|
||||||
loggamma(alpha_xu + M_xu_suff_stats + 1) + |
|
||||||
(log(tau_xu) |
|
||||||
* |
|
||||||
(alpha_xu+1)) |
|
||||||
) \ |
|
||||||
- \ |
|
||||||
(loggamma(alpha_xu + 1)+( |
|
||||||
log(tau_xu + T_xu_suff_stats) |
|
||||||
* |
|
||||||
(alpha_xu + M_xu_suff_stats + 1)) |
|
||||||
) |
|
||||||
|
|
||||||
# end region |
|
||||||
|
|
||||||
def get_fam_score(self, |
|
||||||
cims: np.array, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the FamScore value of the node |
|
||||||
|
|
||||||
|
|
||||||
:param cims: np.array with all the node's cims |
|
||||||
:type cims: np.array |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type tau_xu: float, optional |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
|
||||||
:type alpha_xu: float, optional |
|
||||||
|
|
||||||
|
|
||||||
:return: the FamScore value of the node |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
#print("------") |
|
||||||
#print(self.marginal_likelihood_q(cims, |
|
||||||
# tau_xu, |
|
||||||
# alpha_xu)) |
|
||||||
|
|
||||||
#print(self.marginal_likelihood_theta(cims, |
|
||||||
# alpha_xu, |
|
||||||
# alpha_xxu)) |
|
||||||
'calculate alpha_xxu as a uniform distribution' |
|
||||||
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) |
|
||||||
|
|
||||||
return self.marginal_likelihood_q(cims, |
|
||||||
tau_xu, |
|
||||||
alpha_xu) \ |
|
||||||
+ \ |
|
||||||
self.marginal_likelihood_theta(cims, |
|
||||||
alpha_xu, |
|
||||||
alpha_xxu) |
|
@ -1,143 +0,0 @@ |
|||||||
import sys |
|
||||||
sys.path.append('../') |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from ..structure_graph.set_of_cims import SetOfCims |
|
||||||
from ..structure_graph.trajectory import Trajectory |
|
||||||
|
|
||||||
|
|
||||||
class ParametersEstimator(object): |
|
||||||
"""Has the task of computing the cims of particular node given the trajectories and the net structure |
|
||||||
in the graph ``_net_graph``. |
|
||||||
|
|
||||||
:param trajectories: the trajectories |
|
||||||
:type trajectories: Trajectory |
|
||||||
:param net_graph: the net structure |
|
||||||
:type net_graph: NetworkGraph |
|
||||||
:_single_set_of_cims: the set of cims object that will hold the cims of the node |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._trajectories = trajectories |
|
||||||
self._net_graph = net_graph |
|
||||||
self._single_set_of_cims = None |
|
||||||
|
|
||||||
def fast_init(self, node_id: str) -> None: |
|
||||||
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
""" |
|
||||||
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] |
|
||||||
node_states_number = self._net_graph.get_states_number(node_id) |
|
||||||
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) |
|
||||||
|
|
||||||
def compute_parameters_for_node(self, node_id: str) -> SetOfCims: |
|
||||||
"""Compute the CIMS of the node identified by the label ``node_id``. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:return: A SetOfCims object filled with the computed CIMS |
|
||||||
:rtype: SetOfCims |
|
||||||
""" |
|
||||||
node_indx = self._net_graph.get_node_indx(node_id) |
|
||||||
state_res_times = self._single_set_of_cims._state_residence_times |
|
||||||
transition_matrices = self._single_set_of_cims._transition_matrices |
|
||||||
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, |
|
||||||
self._trajectories.trajectory, |
|
||||||
self._net_graph.time_filtering, |
|
||||||
self._net_graph.time_scalar_indexing_strucure, |
|
||||||
state_res_times) |
|
||||||
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, |
|
||||||
self._net_graph.transition_filtering, |
|
||||||
self._net_graph.transition_scalar_indexing_structure, |
|
||||||
transition_matrices) |
|
||||||
self._single_set_of_cims.build_cims(state_res_times, transition_matrices) |
|
||||||
return self._single_set_of_cims |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, |
|
||||||
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, |
|
||||||
T: np.ndarray) -> None: |
|
||||||
"""Compute the state residence times for a node and fill the matrix ``T`` with the results |
|
||||||
|
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param times: the times deltas vector |
|
||||||
:type times: numpy.array |
|
||||||
:param trajectory: the trajectory |
|
||||||
:type trajectory: numpy.ndArray |
|
||||||
:param cols_filter: the columns filtering structure |
|
||||||
:type cols_filter: numpy.array |
|
||||||
:param scalar_indexes_struct: the indexing structure |
|
||||||
:type scalar_indexes_struct: numpy.array |
|
||||||
:param T: the state residence times vectors |
|
||||||
:type T: numpy.ndArray |
|
||||||
""" |
|
||||||
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) |
|
||||||
.astype(np.int), \ |
|
||||||
times, |
|
||||||
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, |
|
||||||
scalar_indexing: np.ndarray, M: np.ndarray) -> None: |
|
||||||
"""Compute the state residence times for a node and fill the matrices ``M`` with the results. |
|
||||||
|
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param trajectory: the trajectory |
|
||||||
:type trajectory: numpy.ndArray |
|
||||||
:param cols_filter: the columns filtering structure |
|
||||||
:type cols_filter: numpy.array |
|
||||||
:param scalar_indexing: the indexing structure |
|
||||||
:type scalar_indexing: numpy.array |
|
||||||
:param M: the state transitions matrices |
|
||||||
:type M: numpy.ndArray |
|
||||||
""" |
|
||||||
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], |
|
||||||
dtype=np.int64) |
|
||||||
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] |
|
||||||
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) |
|
||||||
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) |
|
||||||
M_raveled = M.ravel() |
|
||||||
M_raveled[diag_indices] = 0 |
|
||||||
M_raveled[diag_indices] = np.sum(M, axis=2).ravel() |
|
||||||
|
|
||||||
def init_sets_cims_container(self): |
|
||||||
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, |
|
||||||
self.net_graph.nodes_values, |
|
||||||
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), |
|
||||||
self.net_graph.p_combs) |
|
||||||
|
|
||||||
def compute_parameters(self): |
|
||||||
#print(self.net_graph.get_nodes()) |
|
||||||
#print(self.amalgamated_cims_struct.sets_of_cims) |
|
||||||
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) |
|
||||||
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): |
|
||||||
#print(self.net_graph.time_filtering[indx]) |
|
||||||
#print(self.net_graph.time_scalar_indexing_strucure[indx]) |
|
||||||
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, |
|
||||||
self.sample_path.trajectories.trajectory, |
|
||||||
self.net_graph.time_filtering[indx], |
|
||||||
self.net_graph.time_scalar_indexing_strucure[indx], |
|
||||||
aggr[1]._state_residence_times) |
|
||||||
#print(self.net_graph.transition_filtering[indx]) |
|
||||||
#print(self.net_graph.transition_scalar_indexing_structure[indx]) |
|
||||||
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), |
|
||||||
self.sample_path.trajectories.complete_trajectory, |
|
||||||
self.net_graph.transition_filtering[indx], |
|
||||||
self.net_graph.transition_scalar_indexing_structure[indx], |
|
||||||
aggr[1]._transition_matrices) |
|
||||||
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,238 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
import os |
|
||||||
from scipy.stats import chi2 as chi2_dist |
|
||||||
from scipy.stats import f as f_dist |
|
||||||
from tqdm import tqdm |
|
||||||
|
|
||||||
from ..utility.cache import Cache |
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from .structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.sample_path import SamplePath |
|
||||||
from ..structure_graph.structure import Structure |
|
||||||
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer |
|
||||||
|
|
||||||
import concurrent.futures |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import multiprocessing |
|
||||||
from multiprocessing import Pool |
|
||||||
|
|
||||||
|
|
||||||
class StructureConstraintBasedEstimator(StructureEstimator): |
|
||||||
""" |
|
||||||
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. |
|
||||||
|
|
||||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
|
||||||
:type sample_path: SamplePath |
|
||||||
:param exp_test_alfa: the significance level for the exponential Hp test |
|
||||||
:type exp_test_alfa: float |
|
||||||
:param chi_test_alfa: the significance level for the chi Hp test |
|
||||||
:type chi_test_alfa: float |
|
||||||
:_nodes: the nodes labels |
|
||||||
:_nodes_vals: the nodes cardinalities |
|
||||||
:_nodes_indxs: the nodes indexes |
|
||||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
|
||||||
:_cache: the Cache object |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): |
|
||||||
super().__init__(sample_path,known_edges) |
|
||||||
self._exp_test_sign = exp_test_alfa |
|
||||||
self._chi_test_alfa = chi_test_alfa |
|
||||||
self._thumb_threshold = thumb_threshold |
|
||||||
self._cache = Cache() |
|
||||||
|
|
||||||
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, |
|
||||||
tot_vars_count: int, parent_indx, child_indx) -> bool: |
|
||||||
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} |
|
||||||
G2 = {G1 U test_parent} (added as an additional parent of the test_child). |
|
||||||
Generates all the necessary structures and datas to perform the tests. |
|
||||||
|
|
||||||
:param test_parent: the node label of the test parent |
|
||||||
:type test_parent: string |
|
||||||
:param test_child: the node label of the child |
|
||||||
:type test_child: string |
|
||||||
:param parent_set: the common parent set |
|
||||||
:type parent_set: List |
|
||||||
:param child_states_numb: the cardinality of the ``test_child`` |
|
||||||
:type child_states_numb: int |
|
||||||
:param tot_vars_count: the total number of variables in the net |
|
||||||
:type tot_vars_count: int |
|
||||||
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise |
|
||||||
:rtype: bool |
|
||||||
""" |
|
||||||
p_set = parent_set[:] |
|
||||||
complete_info = parent_set[:] |
|
||||||
complete_info.append(test_child) |
|
||||||
|
|
||||||
parents = np.array(parent_set) |
|
||||||
parents = np.append(parents, test_parent) |
|
||||||
sorted_parents = self._nodes[np.isin(self._nodes, parents)] |
|
||||||
cims_filter = sorted_parents != test_parent |
|
||||||
|
|
||||||
p_set.insert(0, test_parent) |
|
||||||
sofc2 = self._cache.find(set(p_set)) |
|
||||||
|
|
||||||
if not sofc2: |
|
||||||
complete_info.append(test_parent) |
|
||||||
bool_mask2 = np.isin(self._nodes, complete_info) |
|
||||||
l2 = list(self._nodes[bool_mask2]) |
|
||||||
indxs2 = self._nodes_indxs[bool_mask2] |
|
||||||
vals2 = self._nodes_vals[bool_mask2] |
|
||||||
eds2 = list(itertools.product(p_set, test_child)) |
|
||||||
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) |
|
||||||
g2 = NetworkGraph(s2) |
|
||||||
g2.fast_init(test_child) |
|
||||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
|
||||||
p2.fast_init(test_child) |
|
||||||
sofc2 = p2.compute_parameters_for_node(test_child) |
|
||||||
self._cache.put(set(p_set), sofc2) |
|
||||||
|
|
||||||
del p_set[0] |
|
||||||
sofc1 = self._cache.find(set(p_set)) |
|
||||||
if not sofc1: |
|
||||||
g2.remove_node(test_parent) |
|
||||||
g2.fast_init(test_child) |
|
||||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
|
||||||
p2.fast_init(test_child) |
|
||||||
sofc1 = p2.compute_parameters_for_node(test_child) |
|
||||||
self._cache.put(set(p_set), sofc1) |
|
||||||
thumb_value = 0.0 |
|
||||||
if child_states_numb > 2: |
|
||||||
parent_val = self._sample_path.structure.get_states_number(test_parent) |
|
||||||
bool_mask_vals = np.isin(self._nodes, parent_set) |
|
||||||
parents_vals = self._nodes_vals[bool_mask_vals] |
|
||||||
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) |
|
||||||
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): |
|
||||||
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) |
|
||||||
for cim2 in cond_cims: |
|
||||||
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): |
|
||||||
return False |
|
||||||
return True |
|
||||||
|
|
||||||
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, |
|
||||||
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: |
|
||||||
"""Compute the actual independence test using two cims. |
|
||||||
It is performed first the exponential test and if the null hypothesis is not rejected, |
|
||||||
it is performed also the chi_test. |
|
||||||
|
|
||||||
:param child_states_numb: the cardinality of the test child |
|
||||||
:type child_states_numb: int |
|
||||||
:param cim1: a cim belonging to the graph without test parent |
|
||||||
:type cim1: ConditionalIntensityMatrix |
|
||||||
:param cim2: a cim belonging to the graph with test parent |
|
||||||
:type cim2: ConditionalIntensityMatrix |
|
||||||
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. |
|
||||||
:rtype: bool |
|
||||||
""" |
|
||||||
M1 = cim1.state_transition_matrix |
|
||||||
M2 = cim2.state_transition_matrix |
|
||||||
r1s = M1.diagonal() |
|
||||||
r2s = M2.diagonal() |
|
||||||
C1 = cim1.cim |
|
||||||
C2 = cim2.cim |
|
||||||
if child_states_numb > 2: |
|
||||||
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: |
|
||||||
self._removable_edges_matrix[parent_indx][child_indx] = False |
|
||||||
return False |
|
||||||
F_stats = C2.diagonal() / C1.diagonal() |
|
||||||
exp_alfa = self._exp_test_sign |
|
||||||
for val in range(0, child_states_numb): |
|
||||||
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ |
|
||||||
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): |
|
||||||
return False |
|
||||||
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) |
|
||||||
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( |
|
||||||
M2.shape[0], -1) |
|
||||||
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) |
|
||||||
Ks = np.sqrt(r1s / r2s) |
|
||||||
Ls = np.sqrt(r2s / r1s) |
|
||||||
for val in range(0, child_states_numb): |
|
||||||
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / |
|
||||||
(M1_no_diag[val] + M2_no_diag[val])) |
|
||||||
if Chi > chi_2_quantile: |
|
||||||
return False |
|
||||||
return True |
|
||||||
|
|
||||||
def compute_thumb_value(self, parent_val, child_val, parent_set_vals): |
|
||||||
"""Compute the value to test against the thumb_threshold. |
|
||||||
|
|
||||||
:param parent_val: test parent's variable cardinality |
|
||||||
:type parent_val: int |
|
||||||
:param child_val: test child's variable cardinality |
|
||||||
:type child_val: int |
|
||||||
:param parent_set_vals: the cardinalities of the nodes in the current sep-set |
|
||||||
:type parent_set_vals: List |
|
||||||
:return: the thumb value for the current independence test |
|
||||||
:rtype: int |
|
||||||
""" |
|
||||||
df = (child_val - 1) ** 2 |
|
||||||
df = df * parent_val |
|
||||||
for v in parent_set_vals: |
|
||||||
df = df * v |
|
||||||
return df |
|
||||||
|
|
||||||
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: |
|
||||||
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. |
|
||||||
|
|
||||||
:param var_id: the node label of the test child |
|
||||||
:type var_id: string |
|
||||||
""" |
|
||||||
optimizer_obj = ConstraintBasedOptimizer( |
|
||||||
node_id = var_id, |
|
||||||
structure_estimator = self, |
|
||||||
tot_vars_count = tot_vars_count) |
|
||||||
return optimizer_obj.optimize_structure() |
|
||||||
|
|
||||||
|
|
||||||
def ctpc_algorithm(self,disable_multiprocessing:bool= False ): |
|
||||||
"""Compute the CTPC algorithm over the entire net. |
|
||||||
""" |
|
||||||
ctpc_algo = self.one_iteration_of_CTPC_algorithm |
|
||||||
total_vars_numb = self._sample_path.total_variables_count |
|
||||||
|
|
||||||
n_nodes= len(self._nodes) |
|
||||||
|
|
||||||
total_vars_numb_array = [total_vars_numb] * n_nodes |
|
||||||
|
|
||||||
'get the number of CPU' |
|
||||||
cpu_count = multiprocessing.cpu_count() |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'Remove all the edges from the structure' |
|
||||||
self._sample_path.structure.clean_structure_edges() |
|
||||||
|
|
||||||
'Estimate the best parents for each node' |
|
||||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
|
||||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
|
||||||
if disable_multiprocessing: |
|
||||||
print("DISABILITATO") |
|
||||||
cpu_count = 1 |
|
||||||
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
|
||||||
else: |
|
||||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
|
||||||
list_edges_partial = executor.map(ctpc_algo, |
|
||||||
self._nodes, |
|
||||||
total_vars_numb_array) |
|
||||||
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
|
||||||
|
|
||||||
return set(itertools.chain.from_iterable(list_edges_partial)) |
|
||||||
|
|
||||||
|
|
||||||
def estimate_structure(self,disable_multiprocessing:bool=False): |
|
||||||
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,187 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import matplotlib.pyplot as plt |
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
import abc |
|
||||||
|
|
||||||
from ..utility.cache import Cache |
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from ..structure_graph.sample_path import SamplePath |
|
||||||
from ..structure_graph.structure import Structure |
|
||||||
|
|
||||||
|
|
||||||
class StructureEstimator(object): |
|
||||||
"""Has the task of estimating the network structure given the trajectories in ``samplepath``. |
|
||||||
|
|
||||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
|
||||||
:type sample_path: SamplePath |
|
||||||
:_nodes: the nodes labels |
|
||||||
:_nodes_vals: the nodes cardinalities |
|
||||||
:_nodes_indxs: the nodes indexes |
|
||||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): |
|
||||||
self._sample_path = sample_path |
|
||||||
self._nodes = np.array(self._sample_path.structure.nodes_labels) |
|
||||||
self._nodes_vals = self._sample_path.structure.nodes_values |
|
||||||
self._nodes_indxs = self._sample_path.structure.nodes_indexes |
|
||||||
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) |
|
||||||
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) |
|
||||||
|
|
||||||
|
|
||||||
def build_removable_edges_matrix(self, known_edges: typing.List): |
|
||||||
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: |
|
||||||
|
|
||||||
:param known_edges: the list of nodes labels |
|
||||||
:type known_edges: List |
|
||||||
:return: a boolean matrix |
|
||||||
:rtype: np.ndarray |
|
||||||
""" |
|
||||||
tot_vars_count = self._sample_path.total_variables_count |
|
||||||
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) |
|
||||||
if known_edges: |
|
||||||
for edge in known_edges: |
|
||||||
i = self._sample_path.structure.get_node_indx(edge[0]) |
|
||||||
j = self._sample_path.structure.get_node_indx(edge[1]) |
|
||||||
complete_adj_matrix[i][j] = False |
|
||||||
return complete_adj_matrix |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: |
|
||||||
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: |
|
||||||
|
|
||||||
:param node_ids: the list of nodes labels |
|
||||||
:type node_ids: List |
|
||||||
:return: a complete Digraph Object |
|
||||||
:rtype: networkx.DiGraph |
|
||||||
""" |
|
||||||
complete_graph = nx.DiGraph() |
|
||||||
complete_graph.add_nodes_from(node_ids) |
|
||||||
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) |
|
||||||
return complete_graph |
|
||||||
|
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): |
|
||||||
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``, |
|
||||||
that do not contains a the node identified by ``parent_label``. |
|
||||||
|
|
||||||
:param u: the list of nodes |
|
||||||
:type u: List |
|
||||||
:param size: the size of the subsets |
|
||||||
:type size: int |
|
||||||
:param parent_label: the node to exclude in the subsets generation |
|
||||||
:type parent_label: string |
|
||||||
:return: an Iterator Object containing a list of lists |
|
||||||
:rtype: Iterator |
|
||||||
""" |
|
||||||
list_without_test_parent = u[:] |
|
||||||
list_without_test_parent.remove(parent_label) |
|
||||||
return map(list, itertools.combinations(list_without_test_parent, size)) |
|
||||||
|
|
||||||
def save_results(self) -> None: |
|
||||||
"""Save the estimated Structure to a .json file in the path where the data are loaded from. |
|
||||||
The file is named as the input dataset but the `results_` word is appended to the results file. |
|
||||||
""" |
|
||||||
res = json_graph.node_link_data(self._complete_graph) |
|
||||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
|
||||||
name = name.split('.', 1)[0] |
|
||||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
|
||||||
name += '.json' |
|
||||||
file_name = 'results_' + name |
|
||||||
with open(file_name, 'w') as f: |
|
||||||
json.dump(res, f) |
|
||||||
|
|
||||||
|
|
||||||
def remove_diagonal_elements(self, matrix): |
|
||||||
m = matrix.shape[0] |
|
||||||
strided = np.lib.stride_tricks.as_strided |
|
||||||
s0, s1 = matrix.strides |
|
||||||
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) |
|
||||||
|
|
||||||
|
|
||||||
@abc.abstractmethod |
|
||||||
def estimate_structure(self) -> typing.List: |
|
||||||
"""Abstract method to estimate the structure |
|
||||||
|
|
||||||
:return: List of estimated edges |
|
||||||
:rtype: Typing.List |
|
||||||
""" |
|
||||||
pass |
|
||||||
|
|
||||||
|
|
||||||
def adjacency_matrix(self) -> np.ndarray: |
|
||||||
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. |
|
||||||
|
|
||||||
:return: The adjacency matrix of the graph ``_complete_graph`` |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
return nx.adj_matrix(self._complete_graph).toarray().astype(bool) |
|
||||||
|
|
||||||
def spurious_edges(self) -> typing.List: |
|
||||||
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in |
|
||||||
``_sample_path.structure``. |
|
||||||
|
|
||||||
:return: A list containing the spurious edges |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
if not self._sample_path.has_prior_net_structure: |
|
||||||
raise RuntimeError("Can not compute spurious edges with no prior net structure!") |
|
||||||
real_graph = nx.DiGraph() |
|
||||||
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) |
|
||||||
real_graph.add_edges_from(self._sample_path.structure.edges) |
|
||||||
return nx.difference(real_graph, self._complete_graph).edges |
|
||||||
|
|
||||||
def save_plot_estimated_structure_graph(self) -> None: |
|
||||||
"""Plot the estimated structure in a graphical model style. |
|
||||||
Spurious edges are colored in red. |
|
||||||
""" |
|
||||||
graph_to_draw = nx.DiGraph() |
|
||||||
spurious_edges = self.spurious_edges() |
|
||||||
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) |
|
||||||
print(non_spurious_edges) |
|
||||||
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] |
|
||||||
graph_to_draw.add_edges_from(spurious_edges) |
|
||||||
graph_to_draw.add_edges_from(non_spurious_edges) |
|
||||||
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) |
|
||||||
options = { |
|
||||||
"node_size": 2000, |
|
||||||
"node_color": "white", |
|
||||||
"edgecolors": "black", |
|
||||||
'linewidths':2, |
|
||||||
"with_labels":True, |
|
||||||
"font_size":13, |
|
||||||
'connectionstyle': 'arc3, rad = 0.1', |
|
||||||
"arrowsize": 15, |
|
||||||
"arrowstyle": '<|-', |
|
||||||
"width": 1, |
|
||||||
"edge_color":edges_colors, |
|
||||||
} |
|
||||||
|
|
||||||
nx.draw(graph_to_draw, pos, **options) |
|
||||||
ax = plt.gca() |
|
||||||
ax.margins(0.20) |
|
||||||
plt.axis("off") |
|
||||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
|
||||||
name = name.split('.', 1)[0] |
|
||||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
|
||||||
name += '.png' |
|
||||||
plt.savefig(name) |
|
||||||
plt.clf() |
|
||||||
print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,244 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
|
|
||||||
from random import choice |
|
||||||
|
|
||||||
import concurrent.futures |
|
||||||
|
|
||||||
import copy |
|
||||||
|
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from .structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.sample_path import SamplePath |
|
||||||
from ..structure_graph.structure import Structure |
|
||||||
from .fam_score_calculator import FamScoreCalculator |
|
||||||
from ..optimizers.hill_climbing_search import HillClimbing |
|
||||||
from ..optimizers.tabu_search import TabuSearch |
|
||||||
|
|
||||||
|
|
||||||
import multiprocessing |
|
||||||
from multiprocessing import Pool |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class StructureScoreBasedEstimator(StructureEstimator): |
|
||||||
""" |
|
||||||
Has the task of estimating the network structure given the trajectories in samplepath by |
|
||||||
using a score based approach. |
|
||||||
|
|
||||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
|
||||||
:type sample_path: SamplePath |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type tau_xu: float, optional |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
|
||||||
:type alpha_xu: float, optional |
|
||||||
:param known_edges: List of known edges, default to [] |
|
||||||
:type known_edges: List, optional |
|
||||||
|
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): |
|
||||||
super().__init__(sample_path,known_edges) |
|
||||||
self.tau_xu=tau_xu |
|
||||||
self.alpha_xu=alpha_xu |
|
||||||
|
|
||||||
|
|
||||||
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, |
|
||||||
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None, |
|
||||||
optimizer: str = 'tabu',disable_multiprocessing:bool= False ): |
|
||||||
""" |
|
||||||
Compute the score-based algorithm to find the optimal structure |
|
||||||
|
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
|
||||||
:type tabu_length: int, optional |
|
||||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
|
||||||
:type tabu_rules_duration: int, optional |
|
||||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
|
||||||
:type optimizer: string, optional |
|
||||||
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False |
|
||||||
:type disable_multiprocessing: Boolean, optional |
|
||||||
""" |
|
||||||
'Save the true edges structure in tuples' |
|
||||||
true_edges = copy.deepcopy(self._sample_path.structure.edges) |
|
||||||
true_edges = set(map(tuple, true_edges)) |
|
||||||
|
|
||||||
'Remove all the edges from the structure' |
|
||||||
self._sample_path.structure.clean_structure_edges() |
|
||||||
|
|
||||||
estimate_parents = self.estimate_parents |
|
||||||
|
|
||||||
n_nodes= len(self._nodes) |
|
||||||
|
|
||||||
l_max_parents= [max_parents] * n_nodes |
|
||||||
l_iterations_number = [iterations_number] * n_nodes |
|
||||||
l_patience = [patience] * n_nodes |
|
||||||
l_tabu_length = [tabu_length] * n_nodes |
|
||||||
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes |
|
||||||
l_optimizer = [optimizer] * n_nodes |
|
||||||
|
|
||||||
|
|
||||||
'get the number of CPU' |
|
||||||
cpu_count = multiprocessing.cpu_count() |
|
||||||
print(f"CPU COUNT: {cpu_count}") |
|
||||||
|
|
||||||
if disable_multiprocessing: |
|
||||||
cpu_count = 1 |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
|
||||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
|
||||||
|
|
||||||
'Estimate the best parents for each node' |
|
||||||
if disable_multiprocessing: |
|
||||||
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] |
|
||||||
else: |
|
||||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
|
||||||
list_edges_partial = executor.map(estimate_parents, |
|
||||||
self._nodes, |
|
||||||
l_max_parents, |
|
||||||
l_iterations_number, |
|
||||||
l_patience, |
|
||||||
l_tabu_length, |
|
||||||
l_tabu_rules_duration, |
|
||||||
l_optimizer) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#list_edges_partial = p.map(estimate_parents, self._nodes) |
|
||||||
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) |
|
||||||
|
|
||||||
'Concatenate all the edges list' |
|
||||||
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) |
|
||||||
|
|
||||||
#print('-------------------------') |
|
||||||
|
|
||||||
|
|
||||||
'calculate precision and recall' |
|
||||||
n_missing_edges = 0 |
|
||||||
n_added_fake_edges = 0 |
|
||||||
|
|
||||||
try: |
|
||||||
n_added_fake_edges = len(set_list_edges.difference(true_edges)) |
|
||||||
|
|
||||||
n_missing_edges = len(true_edges.difference(set_list_edges)) |
|
||||||
|
|
||||||
n_true_positive = len(true_edges) - n_missing_edges |
|
||||||
|
|
||||||
precision = n_true_positive / (n_true_positive + n_added_fake_edges) |
|
||||||
|
|
||||||
recall = n_true_positive / (n_true_positive + n_missing_edges) |
|
||||||
|
|
||||||
|
|
||||||
# print(f"n archi reali non trovati: {n_missing_edges}") |
|
||||||
# print(f"n archi non reali aggiunti: {n_added_fake_edges}") |
|
||||||
print(true_edges) |
|
||||||
print(set_list_edges) |
|
||||||
print(f"precision: {precision} ") |
|
||||||
print(f"recall: {recall} ") |
|
||||||
except Exception as e: |
|
||||||
print(f"errore: {e}") |
|
||||||
|
|
||||||
return set_list_edges |
|
||||||
|
|
||||||
|
|
||||||
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, |
|
||||||
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, |
|
||||||
optimizer:str = 'hill' ): |
|
||||||
""" |
|
||||||
Use the FamScore of a node in order to find the best parent nodes |
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
|
||||||
:type tabu_length: int, optional |
|
||||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
|
||||||
:type tabu_rules_duration: int, optional |
|
||||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
|
||||||
:type optimizer: string, optional |
|
||||||
|
|
||||||
:return: A list of the best edges for the currente node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
|
|
||||||
"choose the optimizer algotithm" |
|
||||||
if optimizer == 'tabu': |
|
||||||
optimizer = TabuSearch( |
|
||||||
node_id = node_id, |
|
||||||
structure_estimator = self, |
|
||||||
max_parents = max_parents, |
|
||||||
iterations_number = iterations_number, |
|
||||||
patience = patience, |
|
||||||
tabu_length = tabu_length, |
|
||||||
tabu_rules_duration = tabu_rules_duration) |
|
||||||
else: #if optimizer == 'hill': |
|
||||||
optimizer = HillClimbing( |
|
||||||
node_id = node_id, |
|
||||||
structure_estimator = self, |
|
||||||
max_parents = max_parents, |
|
||||||
iterations_number = iterations_number, |
|
||||||
patience = patience) |
|
||||||
|
|
||||||
"call the optmizer's function that calculates the current node's parents" |
|
||||||
return optimizer.optimize_structure() |
|
||||||
|
|
||||||
|
|
||||||
def get_score_from_graph(self, |
|
||||||
graph: NetworkGraph, |
|
||||||
node_id:str): |
|
||||||
""" |
|
||||||
Get the FamScore of a node |
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param graph: current graph to be computed |
|
||||||
:type graph: class:'NetworkGraph' |
|
||||||
|
|
||||||
|
|
||||||
:return: The FamSCore for this graph structure |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
'inizialize the graph for a single node' |
|
||||||
graph.fast_init(node_id) |
|
||||||
|
|
||||||
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) |
|
||||||
|
|
||||||
'Inizialize and compute parameters for node' |
|
||||||
params_estimation.fast_init(node_id) |
|
||||||
SoCims = params_estimation.compute_parameters_for_node(node_id) |
|
||||||
|
|
||||||
'calculate the FamScore for the node' |
|
||||||
fam_score_obj = FamScoreCalculator() |
|
||||||
|
|
||||||
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) |
|
||||||
|
|
||||||
#print(f" lo score per {node_id} risulta: {score} ") |
|
||||||
return score |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,4 +0,0 @@ |
|||||||
from .optimizer import Optimizer |
|
||||||
from .tabu_search import TabuSearch |
|
||||||
from .hill_climbing_search import HillClimbing |
|
||||||
from .constraint_based_optimizer import ConstraintBasedOptimizer |
|
@ -1,87 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from random import choice |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
import copy |
|
||||||
|
|
||||||
|
|
||||||
from .optimizer import Optimizer |
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
|
|
||||||
|
|
||||||
class ConstraintBasedOptimizer(Optimizer): |
|
||||||
""" |
|
||||||
Optimizer class that implement a CTPC Algorithm |
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: a structure estimator object with the information about the net |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
:param tot_vars_count: number of variables in the dataset |
|
||||||
:type tot_vars_count: int |
|
||||||
""" |
|
||||||
def __init__(self, |
|
||||||
node_id:str, |
|
||||||
structure_estimator: StructureEstimator, |
|
||||||
tot_vars_count:int |
|
||||||
): |
|
||||||
""" |
|
||||||
Constructor |
|
||||||
""" |
|
||||||
super().__init__(node_id, structure_estimator) |
|
||||||
self.tot_vars_count = tot_vars_count |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def optimize_structure(self): |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator by using a CTPC Algorithm |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
print("##################TESTING VAR################", self.node_id) |
|
||||||
|
|
||||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
|
||||||
|
|
||||||
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] |
|
||||||
|
|
||||||
for possible_parent in other_nodes: |
|
||||||
graph.add_edges([(possible_parent,self.node_id)]) |
|
||||||
|
|
||||||
|
|
||||||
u = other_nodes |
|
||||||
#tests_parents_numb = len(u) |
|
||||||
#complete_frame = self.complete_graph_frame |
|
||||||
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])] |
|
||||||
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) |
|
||||||
b = 0 |
|
||||||
while b < len(u): |
|
||||||
parent_indx = 0 |
|
||||||
while parent_indx < len(u): |
|
||||||
removed = False |
|
||||||
test_parent = u[parent_indx] |
|
||||||
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) |
|
||||||
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) |
|
||||||
if self.structure_estimator._removable_edges_matrix[i][j]: |
|
||||||
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) |
|
||||||
for parents_set in S: |
|
||||||
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): |
|
||||||
graph.remove_edges([(test_parent, self.node_id)]) |
|
||||||
u.remove(test_parent) |
|
||||||
removed = True |
|
||||||
break |
|
||||||
if not removed: |
|
||||||
parent_indx += 1 |
|
||||||
b += 1 |
|
||||||
self.structure_estimator._cache.clear() |
|
||||||
return graph.edges |
|
@ -1,135 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from random import choice |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
|
|
||||||
from .optimizer import Optimizer |
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
|
|
||||||
|
|
||||||
class HillClimbing(Optimizer): |
|
||||||
""" |
|
||||||
Optimizer class that implement Hill Climbing Search |
|
||||||
|
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: a structure estimator object with the information about the net |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
""" |
|
||||||
def __init__(self, |
|
||||||
node_id:str, |
|
||||||
structure_estimator: StructureEstimator, |
|
||||||
max_parents:int = None, |
|
||||||
iterations_number:int= 40, |
|
||||||
patience:int = None |
|
||||||
): |
|
||||||
""" |
|
||||||
Constructor |
|
||||||
""" |
|
||||||
super().__init__(node_id, structure_estimator) |
|
||||||
self.max_parents = max_parents |
|
||||||
self.iterations_number = iterations_number |
|
||||||
self.patience = patience |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def optimize_structure(self) -> typing.List: |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
|
|
||||||
#'Create the graph for the single node' |
|
||||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
|
||||||
|
|
||||||
'get the index for the current node' |
|
||||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
|
||||||
|
|
||||||
'list of prior edges' |
|
||||||
prior_parents = set() |
|
||||||
|
|
||||||
'Add the edges from prior knowledge' |
|
||||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
|
||||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
|
||||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
|
||||||
prior_parents.add(parent_id) |
|
||||||
|
|
||||||
'Add the node to the starting structure' |
|
||||||
graph.add_edges([(parent_id, self.node_id)]) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'get all the possible parents' |
|
||||||
other_nodes = [node for node in |
|
||||||
self.structure_estimator._sample_path.structure.nodes_labels if |
|
||||||
node != self.node_id and |
|
||||||
not prior_parents.__contains__(node)] |
|
||||||
|
|
||||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
patince_count = 0 |
|
||||||
for i in range(self.iterations_number): |
|
||||||
'choose a new random edge' |
|
||||||
current_new_parent = choice(other_nodes) |
|
||||||
current_edge = (current_new_parent,self.node_id) |
|
||||||
added = False |
|
||||||
parent_removed = None |
|
||||||
|
|
||||||
|
|
||||||
if graph.has_edge(current_edge): |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
else: |
|
||||||
'check the max_parents constraint' |
|
||||||
if self.max_parents is not None: |
|
||||||
parents_list = graph.get_parents_by_id(self.node_id) |
|
||||||
if len(parents_list) >= self.max_parents : |
|
||||||
parent_removed = (choice(parents_list), self.node_id) |
|
||||||
graph.remove_edges([parent_removed]) |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
added = True |
|
||||||
#print('**************************') |
|
||||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
|
|
||||||
if current_score > actual_best_score: |
|
||||||
'update current best score' |
|
||||||
actual_best_score = current_score |
|
||||||
patince_count = 0 |
|
||||||
else: |
|
||||||
'undo the last update' |
|
||||||
if added: |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
'If a parent was removed, add it again to the graph' |
|
||||||
if parent_removed is not None: |
|
||||||
graph.add_edges([parent_removed]) |
|
||||||
else: |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
'update patience count' |
|
||||||
patince_count += 1 |
|
||||||
|
|
||||||
if self.patience is not None and patince_count > self.patience: |
|
||||||
break |
|
||||||
|
|
||||||
print(f"finito variabile: {self.node_id}") |
|
||||||
return graph.edges |
|
@ -1,39 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
import abc |
|
||||||
|
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Optimizer(abc.ABC): |
|
||||||
""" |
|
||||||
Interface class for all the optimizer's child PyCTBN |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: A structureEstimator Object to predict the structure |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
|
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, node_id:str, structure_estimator: StructureEstimator): |
|
||||||
self.node_id = node_id |
|
||||||
self.structure_estimator = structure_estimator |
|
||||||
|
|
||||||
|
|
||||||
@abc.abstractmethod |
|
||||||
def optimize_structure(self) -> typing.List: |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
pass |
|
@ -1,199 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from random import choice,sample |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
|
|
||||||
from .optimizer import Optimizer |
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
|
|
||||||
import queue |
|
||||||
|
|
||||||
|
|
||||||
class TabuSearch(Optimizer): |
|
||||||
""" |
|
||||||
Optimizer class that implement Tabu Search |
|
||||||
|
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: a structure estimator object with the information about the net |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
|
||||||
:type tabu_length: int, optional |
|
||||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
|
||||||
:type tabu_rules_duration: int, optional |
|
||||||
|
|
||||||
|
|
||||||
""" |
|
||||||
def __init__(self, |
|
||||||
node_id:str, |
|
||||||
structure_estimator: StructureEstimator, |
|
||||||
max_parents:int = None, |
|
||||||
iterations_number:int= 40, |
|
||||||
patience:int = None, |
|
||||||
tabu_length:int = None, |
|
||||||
tabu_rules_duration = None |
|
||||||
): |
|
||||||
""" |
|
||||||
Constructor |
|
||||||
""" |
|
||||||
super().__init__(node_id, structure_estimator) |
|
||||||
self.max_parents = max_parents |
|
||||||
self.iterations_number = iterations_number |
|
||||||
self.patience = patience |
|
||||||
self.tabu_length = tabu_length |
|
||||||
self.tabu_rules_duration = tabu_rules_duration |
|
||||||
|
|
||||||
|
|
||||||
def optimize_structure(self) -> typing.List: |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
print(f"tabu search is processing the structure of {self.node_id}") |
|
||||||
|
|
||||||
'Create the graph for the single node' |
|
||||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
|
||||||
|
|
||||||
'get the index for the current node' |
|
||||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
|
||||||
|
|
||||||
'list of prior edges' |
|
||||||
prior_parents = set() |
|
||||||
|
|
||||||
'Add the edges from prior knowledge' |
|
||||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
|
||||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
|
||||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
|
||||||
prior_parents.add(parent_id) |
|
||||||
|
|
||||||
'Add the node to the starting structure' |
|
||||||
graph.add_edges([(parent_id, self.node_id)]) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'get all the possible parents' |
|
||||||
other_nodes = set([node for node in |
|
||||||
self.structure_estimator._sample_path.structure.nodes_labels if |
|
||||||
node != self.node_id and |
|
||||||
not prior_parents.__contains__(node)]) |
|
||||||
|
|
||||||
'calculate the score for the node without parents' |
|
||||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
|
|
||||||
'initialize tabu_length and tabu_rules_duration if None' |
|
||||||
if self.tabu_length is None: |
|
||||||
self.tabu_length = len(other_nodes) |
|
||||||
|
|
||||||
if self.tabu_rules_duration is None: |
|
||||||
self.tabu_tabu_rules_durationength = len(other_nodes) |
|
||||||
|
|
||||||
'inizialize the data structures' |
|
||||||
tabu_set = set() |
|
||||||
tabu_queue = queue.Queue() |
|
||||||
|
|
||||||
patince_count = 0 |
|
||||||
tabu_count = 0 |
|
||||||
for i in range(self.iterations_number): |
|
||||||
|
|
||||||
current_possible_nodes = other_nodes.difference(tabu_set) |
|
||||||
|
|
||||||
'choose a new random edge according to tabu restiction' |
|
||||||
if(len(current_possible_nodes) > 0): |
|
||||||
current_new_parent = sample(current_possible_nodes,k=1)[0] |
|
||||||
else: |
|
||||||
current_new_parent = tabu_queue.get() |
|
||||||
tabu_set.remove(current_new_parent) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
current_edge = (current_new_parent,self.node_id) |
|
||||||
added = False |
|
||||||
parent_removed = None |
|
||||||
|
|
||||||
if graph.has_edge(current_edge): |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
else: |
|
||||||
'check the max_parents constraint' |
|
||||||
if self.max_parents is not None: |
|
||||||
parents_list = graph.get_parents_by_id(self.node_id) |
|
||||||
if len(parents_list) >= self.max_parents : |
|
||||||
parent_removed = (choice(parents_list), self.node_id) |
|
||||||
graph.remove_edges([parent_removed]) |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
added = True |
|
||||||
#print('**************************') |
|
||||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
|
|
||||||
# print("-------------------------------------------") |
|
||||||
# print(f"Current new parent: {current_new_parent}") |
|
||||||
# print(f"Current score: {current_score}") |
|
||||||
# print(f"Current best score: {actual_best_score}") |
|
||||||
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") |
|
||||||
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") |
|
||||||
# print(f"graph edges: {graph.edges}") |
|
||||||
|
|
||||||
# print("-------------------------------------------") |
|
||||||
# input() |
|
||||||
if current_score > actual_best_score: |
|
||||||
'update current best score' |
|
||||||
actual_best_score = current_score |
|
||||||
patince_count = 0 |
|
||||||
'update tabu list' |
|
||||||
|
|
||||||
|
|
||||||
else: |
|
||||||
'undo the last update' |
|
||||||
if added: |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
'If a parent was removed, add it again to the graph' |
|
||||||
if parent_removed is not None: |
|
||||||
graph.add_edges([parent_removed]) |
|
||||||
else: |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
'update patience count' |
|
||||||
patince_count += 1 |
|
||||||
|
|
||||||
|
|
||||||
if tabu_queue.qsize() >= self.tabu_length: |
|
||||||
current_removed = tabu_queue.get() |
|
||||||
tabu_set.remove(current_removed) |
|
||||||
'Add the node on the tabu list' |
|
||||||
tabu_queue.put(current_new_parent) |
|
||||||
tabu_set.add(current_new_parent) |
|
||||||
|
|
||||||
tabu_count += 1 |
|
||||||
|
|
||||||
'Every tabu_rules_duration step remove an item from the tabu list ' |
|
||||||
if tabu_count % self.tabu_rules_duration == 0: |
|
||||||
if tabu_queue.qsize() > 0: |
|
||||||
current_removed = tabu_queue.get() |
|
||||||
tabu_set.remove(current_removed) |
|
||||||
tabu_count = 0 |
|
||||||
else: |
|
||||||
tabu_count = 0 |
|
||||||
|
|
||||||
if self.patience is not None and patince_count > self.patience: |
|
||||||
break |
|
||||||
|
|
||||||
print(f"finito variabile: {self.node_id}") |
|
||||||
return graph.edges |
|
@ -1,6 +0,0 @@ |
|||||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from .network_graph import NetworkGraph |
|
||||||
from .sample_path import SamplePath |
|
||||||
from .set_of_cims import SetOfCims |
|
||||||
from .structure import Structure |
|
||||||
from .trajectory import Trajectory |
|
@ -1,42 +0,0 @@ |
|||||||
import numpy as np |
|
||||||
|
|
||||||
|
|
||||||
class ConditionalIntensityMatrix(object): |
|
||||||
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector |
|
||||||
and state transition matrix and the actual CIM matrix. |
|
||||||
|
|
||||||
:param state_residence_times: state residence times vector |
|
||||||
:type state_residence_times: numpy.array |
|
||||||
:param state_transition_matrix: the transitions count matrix |
|
||||||
:type state_transition_matrix: numpy.ndArray |
|
||||||
:_cim: the actual cim of the node |
|
||||||
""" |
|
||||||
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._state_residence_times = state_residence_times |
|
||||||
self._state_transition_matrix = state_transition_matrix |
|
||||||
self._cim = self.state_transition_matrix.astype(np.float64) |
|
||||||
|
|
||||||
def compute_cim_coefficients(self) -> None: |
|
||||||
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. |
|
||||||
The class member ``_cim`` will contain the computed cim |
|
||||||
""" |
|
||||||
np.fill_diagonal(self._cim, self._cim.diagonal() * -1) |
|
||||||
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T |
|
||||||
|
|
||||||
@property |
|
||||||
def state_residence_times(self) -> np.ndarray: |
|
||||||
return self._state_residence_times |
|
||||||
|
|
||||||
@property |
|
||||||
def state_transition_matrix(self) -> np.ndarray: |
|
||||||
return self._state_transition_matrix |
|
||||||
|
|
||||||
@property |
|
||||||
def cim(self) -> np.ndarray: |
|
||||||
return self._cim |
|
||||||
|
|
||||||
def __repr__(self): |
|
||||||
return 'CIM:\n' + str(self.cim) |
|
||||||
|
|
@ -1,293 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from .structure import Structure |
|
||||||
|
|
||||||
|
|
||||||
class NetworkGraph(object): |
|
||||||
"""Abstracts the infos contained in the Structure class in the form of a directed graph. |
|
||||||
Has the task of creating all the necessary filtering and indexing structures for parameters estimation |
|
||||||
|
|
||||||
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted |
|
||||||
:type graph_struct: Structure |
|
||||||
:_graph: directed graph |
|
||||||
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos |
|
||||||
about every parents of the node of which all the indexing and filtering structures will be constructed. |
|
||||||
:_time_scalar_indexing_structure: the indexing structure for state res time estimation |
|
||||||
:_transition_scalar_indexing_structure: the indexing structure for transition computation |
|
||||||
:_time_filtering: the columns filtering structure used in the computation of the state res times |
|
||||||
:_transition_filtering: the columns filtering structure used in the computation of the transition |
|
||||||
from one state to another |
|
||||||
:_p_combs_structure: all the possible parents states combination for the node of interest |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, graph_struct: Structure): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._graph_struct = graph_struct |
|
||||||
self._graph = nx.DiGraph() |
|
||||||
self._aggregated_info_about_nodes_parents = None |
|
||||||
self._time_scalar_indexing_structure = None |
|
||||||
self._transition_scalar_indexing_structure = None |
|
||||||
self._time_filtering = None |
|
||||||
self._transition_filtering = None |
|
||||||
self._p_combs_structure = None |
|
||||||
|
|
||||||
def init_graph(self): |
|
||||||
self.add_nodes(self._nodes_labels) |
|
||||||
self.add_edges(self.graph_struct.edges) |
|
||||||
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() |
|
||||||
self._fancy_indexing = self.build_fancy_indexing_structure(0) |
|
||||||
self.build_scalar_indexing_structures() |
|
||||||
self.build_time_columns_filtering_structure() |
|
||||||
self.build_transition_columns_filtering_structure() |
|
||||||
self._p_combs_structure = self.build_p_combs_structure() |
|
||||||
|
|
||||||
def fast_init(self, node_id: str) -> None: |
|
||||||
"""Initializes all the necessary structures for parameters estimation of the node identified by the label |
|
||||||
node_id |
|
||||||
|
|
||||||
:param node_id: the label of the node |
|
||||||
:type node_id: string |
|
||||||
""" |
|
||||||
self.add_nodes(self._graph_struct.nodes_labels) |
|
||||||
self.add_edges(self._graph_struct.edges) |
|
||||||
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) |
|
||||||
p_indxs = self._aggregated_info_about_nodes_parents[1] |
|
||||||
p_vals = self._aggregated_info_about_nodes_parents[2] |
|
||||||
node_states = self.get_states_number(node_id) |
|
||||||
node_indx = self.get_node_indx(node_id) |
|
||||||
cols_number = self._graph_struct.total_variables_number |
|
||||||
self._time_scalar_indexing_structure = NetworkGraph.\ |
|
||||||
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) |
|
||||||
self._transition_scalar_indexing_structure = NetworkGraph.\ |
|
||||||
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) |
|
||||||
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) |
|
||||||
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) |
|
||||||
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) |
|
||||||
|
|
||||||
def add_nodes(self, list_of_nodes: typing.List) -> None: |
|
||||||
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. |
|
||||||
Sets all the properties that identify a nodes (index, positional index, cardinality) |
|
||||||
|
|
||||||
:param list_of_nodes: the nodes to add to ``_graph`` |
|
||||||
:type list_of_nodes: List |
|
||||||
""" |
|
||||||
nodes_indxs = self._graph_struct.nodes_indexes |
|
||||||
nodes_vals = self._graph_struct.nodes_values |
|
||||||
pos = 0 |
|
||||||
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): |
|
||||||
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) |
|
||||||
pos += 1 |
|
||||||
|
|
||||||
def has_edge(self,edge:tuple)-> bool: |
|
||||||
""" |
|
||||||
Check if the graph contains a specific edge |
|
||||||
|
|
||||||
Parameters: |
|
||||||
edge: a tuple that rappresents the edge |
|
||||||
Returns: |
|
||||||
bool |
|
||||||
""" |
|
||||||
return self._graph.has_edge(edge[0],edge[1]) |
|
||||||
|
|
||||||
def add_edges(self, list_of_edges: typing.List) -> None: |
|
||||||
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``. |
|
||||||
|
|
||||||
:param list_of_edges: the list containing of tuples containing the edges |
|
||||||
:type list_of_edges: List |
|
||||||
""" |
|
||||||
self._graph.add_edges_from(list_of_edges) |
|
||||||
|
|
||||||
def remove_node(self, node_id: str) -> None: |
|
||||||
"""Remove the node ``node_id`` from all the class members. |
|
||||||
Initialize all the filtering/indexing structures. |
|
||||||
""" |
|
||||||
self._graph.remove_node(node_id) |
|
||||||
self._graph_struct.remove_node(node_id) |
|
||||||
self.clear_indexing_filtering_structures() |
|
||||||
|
|
||||||
def clear_indexing_filtering_structures(self) -> None: |
|
||||||
"""Initialize all the filtering/indexing structures. |
|
||||||
""" |
|
||||||
self._aggregated_info_about_nodes_parents = None |
|
||||||
self._time_scalar_indexing_structure = None |
|
||||||
self._transition_scalar_indexing_structure = None |
|
||||||
self._time_filtering = None |
|
||||||
self._transition_filtering = None |
|
||||||
self._p_combs_structure = None |
|
||||||
|
|
||||||
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: |
|
||||||
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely |
|
||||||
(parents_labels, parents_indexes, parents_cardinalities). |
|
||||||
|
|
||||||
:param node: the label of the node |
|
||||||
:type node: string |
|
||||||
:return: a tuple containing all the parent set infos |
|
||||||
:rtype: Tuple |
|
||||||
""" |
|
||||||
parents = self.get_parents_by_id(node) |
|
||||||
nodes = self._graph_struct.nodes_labels |
|
||||||
d = {v: i for i, v in enumerate(nodes)} |
|
||||||
sorted_parents = sorted(parents, key=lambda v: d[v]) |
|
||||||
get_node_indx = self.get_node_indx |
|
||||||
p_indxes = [get_node_indx(node) for node in sorted_parents] |
|
||||||
p_values = [self.get_states_number(node) for node in sorted_parents] |
|
||||||
return sorted_parents, p_indxes, p_values |
|
||||||
|
|
||||||
def remove_edges(self, list_of_edges: typing.List) -> None: |
|
||||||
"""Remove the edges to the graph contained in the list list_of_edges. |
|
||||||
|
|
||||||
:param list_of_edges: The edges to remove from the graph |
|
||||||
:type list_of_edges: List |
|
||||||
""" |
|
||||||
self._graph.remove_edges_from(list_of_edges) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_time_scalar_indexing_structure_for_a_node(node_states: int, |
|
||||||
parents_vals: typing.List) -> np.ndarray: |
|
||||||
"""Builds an indexing structure for the computation of state residence times values. |
|
||||||
|
|
||||||
:param node_states: the node cardinality |
|
||||||
:type node_states: int |
|
||||||
:param parents_vals: the caridinalites of the node's parents |
|
||||||
:type parents_vals: List |
|
||||||
:return: The time indexing structure |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
T_vector = np.array([node_states]) |
|
||||||
T_vector = np.append(T_vector, parents_vals) |
|
||||||
T_vector = T_vector.cumprod().astype(np.int) |
|
||||||
return T_vector |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ |
|
||||||
-> np.ndarray: |
|
||||||
"""Builds an indexing structure for the computation of state transitions values. |
|
||||||
|
|
||||||
:param node_states_number: the node cardinality |
|
||||||
:type node_states_number: int |
|
||||||
:param parents_vals: the caridinalites of the node's parents |
|
||||||
:type parents_vals: List |
|
||||||
:return: The transition indexing structure |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
M_vector = np.array([node_states_number, |
|
||||||
node_states_number]) |
|
||||||
M_vector = np.append(M_vector, parents_vals) |
|
||||||
M_vector = M_vector.cumprod().astype(np.int) |
|
||||||
return M_vector |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: |
|
||||||
""" |
|
||||||
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
|
||||||
in the dataset. |
|
||||||
This structute will be used in the computation of the state res times. |
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param p_indxs: the indexes of the node's parents |
|
||||||
:type p_indxs: List |
|
||||||
:return: The filtering structure for times estimation |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ |
|
||||||
-> np.ndarray: |
|
||||||
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
|
||||||
in the dataset. |
|
||||||
This structure will be used in the computation of the state transitions values. |
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param p_indxs: the indexes of the node's parents |
|
||||||
:type p_indxs: List |
|
||||||
:param nodes_number: the total number of nodes in the dataset |
|
||||||
:type nodes_number: int |
|
||||||
:return: The filtering structure for transitions estimation |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: |
|
||||||
""" |
|
||||||
Builds the combinatorial structure that contains the combinations of all the values contained in |
|
||||||
``parents_values``. |
|
||||||
|
|
||||||
:param parents_values: the cardinalities of the nodes |
|
||||||
:type parents_values: List |
|
||||||
:return: A numpy matrix containing a grid of the combinations |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
tmp = [] |
|
||||||
for val in parents_values: |
|
||||||
tmp.append([x for x in range(val)]) |
|
||||||
if len(parents_values) > 0: |
|
||||||
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) |
|
||||||
if len(parents_values) > 1: |
|
||||||
tmp_comb = parents_comb[:, 1].copy() |
|
||||||
parents_comb[:, 1] = parents_comb[:, 0].copy() |
|
||||||
parents_comb[:, 0] = tmp_comb |
|
||||||
else: |
|
||||||
parents_comb = np.array([[]], dtype=np.int) |
|
||||||
return parents_comb |
|
||||||
|
|
||||||
def get_parents_by_id(self, node_id) -> typing.List: |
|
||||||
"""Returns a list of labels of the parents of the node ``node_id`` |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:return: a List of labels of the parents |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
return list(self._graph.predecessors(node_id)) |
|
||||||
|
|
||||||
def get_states_number(self, node_id) -> int: |
|
||||||
return self._graph.nodes[node_id]['val'] |
|
||||||
|
|
||||||
def get_node_indx(self, node_id) -> int: |
|
||||||
return nx.get_node_attributes(self._graph, 'indx')[node_id] |
|
||||||
|
|
||||||
def get_positional_node_indx(self, node_id) -> int: |
|
||||||
return self._graph.nodes[node_id]['pos_indx'] |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes(self) -> typing.List: |
|
||||||
return self._graph_struct.nodes_labels |
|
||||||
|
|
||||||
@property |
|
||||||
def edges(self) -> typing.List: |
|
||||||
return list(self._graph.edges) |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_indexes(self) -> np.ndarray: |
|
||||||
return self._graph_struct.nodes_indexes |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_values(self) -> np.ndarray: |
|
||||||
return self._graph_struct.nodes_values |
|
||||||
|
|
||||||
@property |
|
||||||
def time_scalar_indexing_strucure(self) -> np.ndarray: |
|
||||||
return self._time_scalar_indexing_structure |
|
||||||
|
|
||||||
@property |
|
||||||
def time_filtering(self) -> np.ndarray: |
|
||||||
return self._time_filtering |
|
||||||
|
|
||||||
@property |
|
||||||
def transition_scalar_indexing_structure(self) -> np.ndarray: |
|
||||||
return self._transition_scalar_indexing_structure |
|
||||||
|
|
||||||
@property |
|
||||||
def transition_filtering(self) -> np.ndarray: |
|
||||||
return self._transition_filtering |
|
||||||
|
|
||||||
@property |
|
||||||
def p_combs(self) -> np.ndarray: |
|
||||||
return self._p_combs_structure |
|
@ -1,91 +0,0 @@ |
|||||||
|
|
||||||
|
|
||||||
import numpy as np |
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
from .structure import Structure |
|
||||||
from .trajectory import Trajectory |
|
||||||
from ..utility.abstract_importer import AbstractImporter |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SamplePath(object): |
|
||||||
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables |
|
||||||
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will |
|
||||||
contain the mentioned data. |
|
||||||
|
|
||||||
:param importer: the Importer object which contains the imported and processed data |
|
||||||
:type importer: AbstractImporter |
|
||||||
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories |
|
||||||
:_structure: the ``Structure`` Object that will contain all the structural infos about the net |
|
||||||
:_total_variables_count: the number of variables in the net |
|
||||||
""" |
|
||||||
def __init__(self, importer: AbstractImporter): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._importer = importer |
|
||||||
if self._importer._df_variables is None or self._importer._concatenated_samples is None: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
if self._importer._df_variables.empty: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
if isinstance(self._importer._concatenated_samples, pd.DataFrame): |
|
||||||
if self._importer._concatenated_samples.empty: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
if isinstance(self._importer._concatenated_samples, np.ndarray): |
|
||||||
if self._importer._concatenated_samples.size == 0: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
self._trajectories = None |
|
||||||
self._structure = None |
|
||||||
self._total_variables_count = None |
|
||||||
|
|
||||||
def build_trajectories(self) -> None: |
|
||||||
"""Builds the Trajectory object that will contain all the trajectories. |
|
||||||
Clears all the unused dataframes in ``_importer`` Object |
|
||||||
""" |
|
||||||
self._trajectories = \ |
|
||||||
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), |
|
||||||
len(self._importer.sorter) + 1) |
|
||||||
self._importer.clear_concatenated_frame() |
|
||||||
|
|
||||||
def build_structure(self) -> None: |
|
||||||
""" |
|
||||||
Builds the ``Structure`` object that aggregates all the infos about the net. |
|
||||||
""" |
|
||||||
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): |
|
||||||
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") |
|
||||||
|
|
||||||
self._total_variables_count = len(self._importer.sorter) |
|
||||||
labels = self._importer.variables.iloc[:, 0].to_list() |
|
||||||
indxs = self._importer.variables.index.to_numpy() |
|
||||||
vals = self._importer.variables.iloc[:, 1].to_numpy() |
|
||||||
if self._importer.structure is None or self._importer.structure.empty: |
|
||||||
edges = [] |
|
||||||
else: |
|
||||||
edges = list(self._importer.structure.to_records(index=False)) |
|
||||||
self._structure = Structure(labels, indxs, vals, edges, |
|
||||||
self._total_variables_count) |
|
||||||
|
|
||||||
def clear_memory(self): |
|
||||||
self._importer._raw_data = [] |
|
||||||
|
|
||||||
@property |
|
||||||
def trajectories(self) -> Trajectory: |
|
||||||
return self._trajectories |
|
||||||
|
|
||||||
@property |
|
||||||
def structure(self) -> Structure: |
|
||||||
return self._structure |
|
||||||
|
|
||||||
@property |
|
||||||
def total_variables_count(self) -> int: |
|
||||||
return self._total_variables_count |
|
||||||
|
|
||||||
@property |
|
||||||
def has_prior_net_structure(self) -> bool: |
|
||||||
return bool(self._structure.edges) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,97 +0,0 @@ |
|||||||
|
|
||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
|
|
||||||
|
|
||||||
class SetOfCims(object): |
|
||||||
"""Aggregates all the CIMS of the node identified by the label _node_id. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_ind: string |
|
||||||
:param parents_states_number: the cardinalities of the parents |
|
||||||
:type parents_states_number: List |
|
||||||
:param node_states_number: the caridinality of the node |
|
||||||
:type node_states_number: int |
|
||||||
:param p_combs: the p_comb structure bound to this node |
|
||||||
:type p_combs: numpy.ndArray |
|
||||||
:_state_residence_time: matrix containing all the state residence time vectors for the node |
|
||||||
:_transition_matrices: matrix containing all the transition matrices for the node |
|
||||||
:_actual_cims: the cims of the node |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._node_id = node_id |
|
||||||
self._parents_states_number = parents_states_number |
|
||||||
self._node_states_number = node_states_number |
|
||||||
self._actual_cims = [] |
|
||||||
self._state_residence_times = None |
|
||||||
self._transition_matrices = None |
|
||||||
self._p_combs = p_combs |
|
||||||
self.build_times_and_transitions_structures() |
|
||||||
|
|
||||||
def build_times_and_transitions_structures(self) -> None: |
|
||||||
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices. |
|
||||||
""" |
|
||||||
if not self._parents_states_number: |
|
||||||
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) |
|
||||||
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) |
|
||||||
else: |
|
||||||
self._state_residence_times = \ |
|
||||||
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) |
|
||||||
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, |
|
||||||
self._node_states_number], dtype=np.int) |
|
||||||
|
|
||||||
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: |
|
||||||
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. |
|
||||||
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. |
|
||||||
|
|
||||||
:param state_res_times: the state residence times matrix |
|
||||||
:type state_res_times: numpy.ndArray |
|
||||||
:param transition_matrices: the transition matrices |
|
||||||
:type transition_matrices: numpy.ndArray |
|
||||||
""" |
|
||||||
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): |
|
||||||
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) |
|
||||||
cim_to_add.compute_cim_coefficients() |
|
||||||
self._actual_cims.append(cim_to_add) |
|
||||||
self._actual_cims = np.array(self._actual_cims) |
|
||||||
self._transition_matrices = None |
|
||||||
self._state_residence_times = None |
|
||||||
|
|
||||||
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: |
|
||||||
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index |
|
||||||
``comb``. |
|
||||||
|
|
||||||
:param mask_arr: the boolean mask that indicates which parent to consider |
|
||||||
:type mask_arr: numpy.array |
|
||||||
:param comb: the state/s of the filtered parents |
|
||||||
:type comb: numpy.array |
|
||||||
:return: Array of ``ConditionalIntensityMatrix`` objects |
|
||||||
:rtype: numpy.array |
|
||||||
""" |
|
||||||
if mask_arr.size <= 1: |
|
||||||
return self._actual_cims |
|
||||||
else: |
|
||||||
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() |
|
||||||
return self._actual_cims[flat_indxs] |
|
||||||
|
|
||||||
@property |
|
||||||
def actual_cims(self) -> np.ndarray: |
|
||||||
return self._actual_cims |
|
||||||
|
|
||||||
@property |
|
||||||
def p_combs(self) -> np.ndarray: |
|
||||||
return self._p_combs |
|
||||||
|
|
||||||
def get_cims_number(self): |
|
||||||
return len(self._actual_cims) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,124 +0,0 @@ |
|||||||
|
|
||||||
import typing as ty |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
|
|
||||||
class Structure(object): |
|
||||||
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) |
|
||||||
|
|
||||||
:param nodes_labels_list: the symbolic names of the variables |
|
||||||
:type nodes_labels_list: List |
|
||||||
:param nodes_indexes_arr: the indexes of the nodes |
|
||||||
:type nodes_indexes_arr: numpy.ndArray |
|
||||||
:param nodes_vals_arr: the cardinalites of the nodes |
|
||||||
:type nodes_vals_arr: numpy.ndArray |
|
||||||
:param edges_list: the edges of the network |
|
||||||
:type edges_list: List |
|
||||||
:param total_variables_number: the total number of variables in the dataset |
|
||||||
:type total_variables_number: int |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, |
|
||||||
edges_list: ty.List, total_variables_number: int): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._nodes_labels_list = nodes_labels_list |
|
||||||
self._nodes_indexes_arr = nodes_indexes_arr |
|
||||||
self._nodes_vals_arr = nodes_vals_arr |
|
||||||
self._edges_list = edges_list |
|
||||||
self._total_variables_number = total_variables_number |
|
||||||
|
|
||||||
def remove_node(self, node_id: str) -> None: |
|
||||||
"""Remove the node ``node_id`` from all the class members. |
|
||||||
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. |
|
||||||
""" |
|
||||||
node_positional_indx = self._nodes_labels_list.index(node_id) |
|
||||||
del self._nodes_labels_list[node_positional_indx] |
|
||||||
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) |
|
||||||
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) |
|
||||||
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id |
|
||||||
and to_node != node_id)] |
|
||||||
|
|
||||||
@property |
|
||||||
def edges(self) -> ty.List: |
|
||||||
return self._edges_list |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_labels(self) -> ty.List: |
|
||||||
return self._nodes_labels_list |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_indexes(self) -> np.ndarray: |
|
||||||
return self._nodes_indexes_arr |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_values(self) -> np.ndarray: |
|
||||||
return self._nodes_vals_arr |
|
||||||
|
|
||||||
@property |
|
||||||
def total_variables_number(self) -> int: |
|
||||||
return self._total_variables_number |
|
||||||
|
|
||||||
def get_node_id(self, node_indx: int) -> str: |
|
||||||
"""Given the ``node_index`` returns the node label. |
|
||||||
|
|
||||||
:param node_indx: the node index |
|
||||||
:type node_indx: int |
|
||||||
:return: the node label |
|
||||||
:rtype: string |
|
||||||
""" |
|
||||||
return self._nodes_labels_list[node_indx] |
|
||||||
|
|
||||||
def clean_structure_edges(self): |
|
||||||
self._edges_list = list() |
|
||||||
|
|
||||||
def add_edge(self,edge: tuple): |
|
||||||
self._edges_list.append(tuple) |
|
||||||
print(self._edges_list) |
|
||||||
|
|
||||||
def remove_edge(self,edge: tuple): |
|
||||||
self._edges_list.remove(tuple) |
|
||||||
|
|
||||||
def contains_edge(self,edge:tuple) -> bool: |
|
||||||
return edge in self._edges_list |
|
||||||
|
|
||||||
def get_node_indx(self, node_id: str) -> int: |
|
||||||
"""Given the ``node_index`` returns the node label. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:return: the node index |
|
||||||
:rtype: int |
|
||||||
""" |
|
||||||
pos_indx = self._nodes_labels_list.index(node_id) |
|
||||||
return self._nodes_indexes_arr[pos_indx] |
|
||||||
|
|
||||||
def get_positional_node_indx(self, node_id: str) -> int: |
|
||||||
return self._nodes_labels_list.index(node_id) |
|
||||||
|
|
||||||
def get_states_number(self, node: str) -> int: |
|
||||||
"""Given the node label ``node`` returns the cardinality of the node. |
|
||||||
|
|
||||||
:param node: the node label |
|
||||||
:type node: string |
|
||||||
:return: the node cardinality |
|
||||||
:rtype: int |
|
||||||
""" |
|
||||||
pos_indx = self._nodes_labels_list.index(node) |
|
||||||
return self._nodes_vals_arr[pos_indx] |
|
||||||
|
|
||||||
def __repr__(self): |
|
||||||
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ |
|
||||||
"\nEdges: \n" + str(self._edges_list) |
|
||||||
|
|
||||||
def __eq__(self, other): |
|
||||||
"""Overrides the default implementation""" |
|
||||||
if isinstance(other, Structure): |
|
||||||
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ |
|
||||||
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ |
|
||||||
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ |
|
||||||
self._edges_list == other._edges_list |
|
||||||
|
|
||||||
return False |
|
||||||
|
|
@ -1,45 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
|
|
||||||
class Trajectory(object): |
|
||||||
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles |
|
||||||
(the time deltas) and a numpy matrix of ints (the changes of states). |
|
||||||
|
|
||||||
:param list_of_columns: the list containing the times array and values matrix |
|
||||||
:type list_of_columns: List |
|
||||||
:param original_cols_number: total number of cols in the data |
|
||||||
:type original_cols_number: int |
|
||||||
:_actual_trajectory: the trajectory containing also the duplicated/shifted values |
|
||||||
:_times: the array containing the time deltas |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, list_of_columns: typing.List, original_cols_number: int): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._times = list_of_columns[0] |
|
||||||
self._actual_trajectory = list_of_columns[1] |
|
||||||
self._original_cols_number = original_cols_number |
|
||||||
|
|
||||||
@property |
|
||||||
def trajectory(self) -> np.ndarray: |
|
||||||
return self._actual_trajectory[:, :self._original_cols_number - 1] |
|
||||||
|
|
||||||
@property |
|
||||||
def complete_trajectory(self) -> np.ndarray: |
|
||||||
return self._actual_trajectory |
|
||||||
|
|
||||||
@property |
|
||||||
def times(self): |
|
||||||
return self._times |
|
||||||
|
|
||||||
def size(self): |
|
||||||
return self._actual_trajectory.shape[0] |
|
||||||
|
|
||||||
def __repr__(self): |
|
||||||
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \ |
|
||||||
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__() |
|
||||||
|
|
||||||
|
|
@ -1,4 +0,0 @@ |
|||||||
from .abstract_importer import AbstractImporter |
|
||||||
from .cache import Cache |
|
||||||
from .json_importer import JsonImporter |
|
||||||
from .sample_importer import SampleImporter |
|
@ -1,164 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
from abc import ABC, abstractmethod |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
import copy |
|
||||||
|
|
||||||
#from sklearn.utils import resample |
|
||||||
|
|
||||||
|
|
||||||
class AbstractImporter(ABC): |
|
||||||
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure. |
|
||||||
|
|
||||||
:param file_path: the file path, or dataset name if you import already processed data |
|
||||||
:type file_path: str |
|
||||||
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories |
|
||||||
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray] |
|
||||||
:param variables: Dataframe containing the nodes labels and cardinalities |
|
||||||
:type variables: pandas.DataFrame |
|
||||||
:prior_net_structure: Dataframe containing the structure of the network (edges) |
|
||||||
:type prior_net_structure: pandas.DataFrame |
|
||||||
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` |
|
||||||
|
|
||||||
.. warning:: |
|
||||||
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed |
|
||||||
as Pandas Dataframes with the following structure: |
|
||||||
Header of _df_structure = [From_Node | To_Node] |
|
||||||
Header of _df_variables = [Variable_Label | Variable_Cardinality] |
|
||||||
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. |
|
||||||
|
|
||||||
.. note:: |
|
||||||
See :class:``JsonImporter`` for an example implementation |
|
||||||
|
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None, |
|
||||||
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): |
|
||||||
"""Constructor |
|
||||||
""" |
|
||||||
self._file_path = file_path |
|
||||||
self._df_samples_list = trajectory_list |
|
||||||
self._concatenated_samples = [] |
|
||||||
self._df_variables = variables |
|
||||||
self._df_structure = prior_net_structure |
|
||||||
self._sorter = None |
|
||||||
super().__init__() |
|
||||||
|
|
||||||
@abstractmethod |
|
||||||
def build_sorter(self, trajecory_header: object) -> typing.List: |
|
||||||
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame |
|
||||||
and keeping ONLY the variables symbolic labels, cutting out the time label in the header. |
|
||||||
|
|
||||||
:param trajecory_header: an object that will be used to define the header |
|
||||||
:type trajecory_header: object |
|
||||||
:return: A list containing the processed header. |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
pass |
|
||||||
|
|
||||||
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, |
|
||||||
columns_header: typing.List, shifted_cols_header: typing.List) \ |
|
||||||
-> pd.DataFrame: |
|
||||||
"""Computes the difference between each value present in th time column. |
|
||||||
Copies and shift by one position up all the values present in the remaining columns. |
|
||||||
|
|
||||||
:param sample_frame: the traj to be processed |
|
||||||
:type sample_frame: pandas.Dataframe |
|
||||||
:param columns_header: the original header of sample_frame |
|
||||||
:type columns_header: List |
|
||||||
:param shifted_cols_header: a copy of columns_header with changed names of the contents |
|
||||||
:type shifted_cols_header: List |
|
||||||
:return: The processed dataframe |
|
||||||
:rtype: pandas.Dataframe |
|
||||||
|
|
||||||
.. warning:: |
|
||||||
the Dataframe ``sample_frame`` has to follow the column structure of this header: |
|
||||||
Header of sample_frame = [Time | Variable values] |
|
||||||
""" |
|
||||||
sample_frame = copy.deepcopy(sample_frame) |
|
||||||
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) |
|
||||||
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') |
|
||||||
shifted_cols.columns = shifted_cols_header |
|
||||||
sample_frame = sample_frame.assign(**shifted_cols) |
|
||||||
sample_frame.drop(sample_frame.tail(1).index, inplace=True) |
|
||||||
return sample_frame |
|
||||||
|
|
||||||
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: |
|
||||||
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list |
|
||||||
``df_samples_list``. |
|
||||||
Concatenates the result in the dataframe ``concatanated_samples`` |
|
||||||
|
|
||||||
:param df_samples_list: the datframe's list to be processed and concatenated |
|
||||||
:type df_samples_list: List |
|
||||||
|
|
||||||
.. warning:: |
|
||||||
The Dataframe sample_frame has to follow the column structure of this header: |
|
||||||
Header of sample_frame = [Time | Variable values] |
|
||||||
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) |
|
||||||
.. note:: |
|
||||||
After the call of this method the class member ``concatanated_samples`` will contain all processed |
|
||||||
and merged trajectories |
|
||||||
""" |
|
||||||
if not self._sorter: |
|
||||||
raise RuntimeError("The class member self._sorter has to be INITIALIZED!") |
|
||||||
shifted_cols_header = [s + "S" for s in self._sorter] |
|
||||||
compute_row_delta = self.compute_row_delta_sigle_samples_frame |
|
||||||
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) |
|
||||||
for sample in df_samples_list] |
|
||||||
self._concatenated_samples = pd.concat(proc_samples_list) |
|
||||||
|
|
||||||
complete_header = self._sorter[:] |
|
||||||
complete_header.insert(0,'Time') |
|
||||||
complete_header.extend(shifted_cols_header) |
|
||||||
self._concatenated_samples = self._concatenated_samples[complete_header] |
|
||||||
|
|
||||||
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List: |
|
||||||
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix |
|
||||||
|
|
||||||
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted |
|
||||||
and converted |
|
||||||
:type concatenated_sample: pandas.Dataframe |
|
||||||
:return: the resulting list of numpy arrays |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
|
|
||||||
concatenated_array = concatenated_sample.to_numpy() |
|
||||||
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] |
|
||||||
|
|
||||||
return columns_list |
|
||||||
|
|
||||||
def clear_concatenated_frame(self) -> None: |
|
||||||
"""Removes all values in the dataframe concatenated_samples. |
|
||||||
""" |
|
||||||
if isinstance(self._concatenated_samples, pd.DataFrame): |
|
||||||
self._concatenated_samples = self._concatenated_samples.iloc[0:0] |
|
||||||
|
|
||||||
@abstractmethod |
|
||||||
def dataset_id(self) -> object: |
|
||||||
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current |
|
||||||
dataset |
|
||||||
""" |
|
||||||
pass |
|
||||||
|
|
||||||
@property |
|
||||||
def concatenated_samples(self) -> pd.DataFrame: |
|
||||||
return self._concatenated_samples |
|
||||||
|
|
||||||
@property |
|
||||||
def variables(self) -> pd.DataFrame: |
|
||||||
return self._df_variables |
|
||||||
|
|
||||||
@property |
|
||||||
def structure(self) -> pd.DataFrame: |
|
||||||
return self._df_structure |
|
||||||
|
|
||||||
@property |
|
||||||
def sorter(self) -> typing.List: |
|
||||||
return self._sorter |
|
||||||
|
|
||||||
@property |
|
||||||
def file_path(self) -> str: |
|
||||||
return self._file_path |
|
@ -1,58 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
from ..structure_graph.set_of_cims import SetOfCims |
|
||||||
|
|
||||||
|
|
||||||
class Cache: |
|
||||||
"""This class acts as a cache of ``SetOfCims`` objects for a node. |
|
||||||
|
|
||||||
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME |
|
||||||
index is related |
|
||||||
:__actual_cache: a list of setOfCims objects |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._list_of_sets_of_parents = [] |
|
||||||
self._actual_cache = [] |
|
||||||
|
|
||||||
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] |
|
||||||
""" |
|
||||||
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims`` |
|
||||||
related to that ``parents_comb``. |
|
||||||
|
|
||||||
:param parents_comb: the parents related to that ``SetOfCims`` |
|
||||||
:type parents_comb: Set |
|
||||||
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``. |
|
||||||
None otherwise. |
|
||||||
:rtype: SetOfCims |
|
||||||
""" |
|
||||||
try: |
|
||||||
#print("Cache State:", self.list_of_sets_of_indxs) |
|
||||||
#print("Look For:", parents_comb) |
|
||||||
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] |
|
||||||
#print("CACHE HIT!!!!", parents_comb) |
|
||||||
return result |
|
||||||
except ValueError: |
|
||||||
return None |
|
||||||
|
|
||||||
def put(self, parents_comb: typing.Set, socim: SetOfCims): |
|
||||||
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in |
|
||||||
``__list_of_sets_of_parents``. |
|
||||||
|
|
||||||
:param parents_comb: the symbolic set index |
|
||||||
:type parents_comb: Set |
|
||||||
:param socim: the related SetOfCims object |
|
||||||
:type socim: SetOfCims |
|
||||||
""" |
|
||||||
#print("Putting in cache:", parents_comb) |
|
||||||
self._list_of_sets_of_parents.append(parents_comb) |
|
||||||
self._actual_cache.append(socim) |
|
||||||
|
|
||||||
def clear(self): |
|
||||||
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``. |
|
||||||
""" |
|
||||||
del self._list_of_sets_of_parents[:] |
|
||||||
del self._actual_cache[:] |
|
@ -1,176 +0,0 @@ |
|||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
|
|
||||||
from .abstract_importer import AbstractImporter |
|
||||||
|
|
||||||
|
|
||||||
class JsonImporter(AbstractImporter): |
|
||||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
|
||||||
the data in json extension. |
|
||||||
|
|
||||||
:param file_path: the path of the file that contains tha data to be imported |
|
||||||
:type file_path: string |
|
||||||
:param samples_label: the reference key for the samples in the trajectories |
|
||||||
:type samples_label: string |
|
||||||
:param structure_label: the reference key for the structure of the network data |
|
||||||
:type structure_label: string |
|
||||||
:param variables_label: the reference key for the cardinalites of the nodes data |
|
||||||
:type variables_label: string |
|
||||||
:param time_key: the key used to identify the timestamps in each trajectory |
|
||||||
:type time_key: string |
|
||||||
:param variables_key: the key used to identify the names of the variables in the net |
|
||||||
:type variables_key: string |
|
||||||
:_array_indx: the index of the outer JsonArray to extract the data from |
|
||||||
:type _array_indx: int |
|
||||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
|
||||||
:_raw_data: The raw contents of the json file to import |
|
||||||
:type _raw_data: List |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, |
|
||||||
variables_key: str): |
|
||||||
"""Constructor method |
|
||||||
|
|
||||||
.. note:: |
|
||||||
This constructor calls also the method ``read_json_file()``, so after the construction of the object |
|
||||||
the class member ``_raw_data`` will contain the raw imported json data. |
|
||||||
|
|
||||||
""" |
|
||||||
self._samples_label = samples_label |
|
||||||
self._structure_label = structure_label |
|
||||||
self._variables_label = variables_label |
|
||||||
self._time_key = time_key |
|
||||||
self._variables_key = variables_key |
|
||||||
self._df_samples_list = None |
|
||||||
self._array_indx = None |
|
||||||
super(JsonImporter, self).__init__(file_path) |
|
||||||
self._raw_data = self.read_json_file() |
|
||||||
|
|
||||||
def import_data(self, indx: int) -> None: |
|
||||||
"""Implements the abstract method of :class:`AbstractImporter`. |
|
||||||
|
|
||||||
:param indx: the index of the outer JsonArray to extract the data from |
|
||||||
:type indx: int |
|
||||||
""" |
|
||||||
self._array_indx = indx |
|
||||||
self._df_samples_list = self.import_trajectories(self._raw_data) |
|
||||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
|
||||||
self.compute_row_delta_in_all_samples_frames(self._df_samples_list) |
|
||||||
self.clear_data_frame_list() |
|
||||||
self._df_structure = self.import_structure(self._raw_data) |
|
||||||
self._df_variables = self.import_variables(self._raw_data) |
|
||||||
|
|
||||||
def import_trajectories(self, raw_data: typing.List) -> typing.List: |
|
||||||
"""Imports the trajectories from the list of dicts ``raw_data``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:return: List of dataframes containing all the trajectories |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) |
|
||||||
|
|
||||||
def import_structure(self, raw_data: typing.List) -> pd.DataFrame: |
|
||||||
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:return: Dataframe containg the starting node a ending node of every arc of the network |
|
||||||
:rtype: pandas.Dataframe |
|
||||||
""" |
|
||||||
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) |
|
||||||
|
|
||||||
def import_variables(self, raw_data: typing.List) -> pd.DataFrame: |
|
||||||
"""Imports the data in ``raw_data`` at the key ``_variables_label``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:return: Datframe containg the variables simbolic labels and their cardinalities |
|
||||||
:rtype: pandas.Dataframe |
|
||||||
""" |
|
||||||
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) |
|
||||||
|
|
||||||
def read_json_file(self) -> typing.List: |
|
||||||
"""Reads the JSON file in the path self.filePath. |
|
||||||
|
|
||||||
:return: The contents of the json file |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
with open(self._file_path) as f: |
|
||||||
data = json.load(f) |
|
||||||
return data |
|
||||||
|
|
||||||
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: |
|
||||||
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:param indx: The index of the array from which the data have to be extracted |
|
||||||
:type indx: int |
|
||||||
:param key: the key for the Dicts from which exctract data |
|
||||||
:type key: string |
|
||||||
:return: A normalized dataframe |
|
||||||
:rtype: pandas.Datframe |
|
||||||
""" |
|
||||||
return pd.DataFrame(raw_data[indx][key]) |
|
||||||
|
|
||||||
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: |
|
||||||
""" |
|
||||||
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:param indx: The index of the array from which the data have to be extracted |
|
||||||
:type indx: int |
|
||||||
:param trajectories_key: the key of the trajectories objects |
|
||||||
:type trajectories_key: string |
|
||||||
:return: A list of daframes containg the trajectories |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
dataframe = pd.DataFrame |
|
||||||
smps = raw_data[indx][trajectories_key] |
|
||||||
df_samples_list = [dataframe(sample) for sample in smps] |
|
||||||
return df_samples_list |
|
||||||
|
|
||||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
|
||||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. |
|
||||||
""" |
|
||||||
columns_header = list(sample_frame.columns.values) |
|
||||||
columns_header.remove(self._time_key) |
|
||||||
return columns_header |
|
||||||
|
|
||||||
def clear_data_frame_list(self) -> None: |
|
||||||
"""Removes all values present in the dataframes in the list ``_df_samples_list``. |
|
||||||
""" |
|
||||||
for indx in range(len(self._df_samples_list)): |
|
||||||
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] |
|
||||||
|
|
||||||
def dataset_id(self) -> object: |
|
||||||
return self._array_indx |
|
||||||
|
|
||||||
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: |
|
||||||
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels |
|
||||||
as keys for the set of CIMS of a particular node. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:param indx: The index of the array from which the data have to be extracted |
|
||||||
:type indx: int |
|
||||||
:param cims_key: the key where the json object cims are placed |
|
||||||
:type cims_key: string |
|
||||||
:return: a dictionary containing the sampled CIMS for all the variables in the net |
|
||||||
:rtype: Dictionary |
|
||||||
""" |
|
||||||
cims_for_all_vars = {} |
|
||||||
for var in raw_data[indx][cims_key]: |
|
||||||
sampled_cims_list = [] |
|
||||||
cims_for_all_vars[var] = sampled_cims_list |
|
||||||
for p_comb in raw_data[indx][cims_key][var]: |
|
||||||
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) |
|
||||||
return cims_for_all_vars |
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,65 +0,0 @@ |
|||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import pandas as pd |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from .abstract_importer import AbstractImporter |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SampleImporter(AbstractImporter): |
|
||||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
|
||||||
the data loaded directly by using DataFrame |
|
||||||
|
|
||||||
:param trajectory_list: the data that describes the trajectories |
|
||||||
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
|
||||||
:param variables: the data that describes the variables with name and cardinality |
|
||||||
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
|
||||||
:param prior_net_structure: the data of the real structure, if it exists |
|
||||||
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
|
||||||
|
|
||||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
|
||||||
:_raw_data: The raw contents of the json file to import |
|
||||||
:type _raw_data: List |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, |
|
||||||
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
|
||||||
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
|
||||||
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): |
|
||||||
|
|
||||||
'If the data are not DataFrame, it will be converted' |
|
||||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
|
||||||
variables = pd.DataFrame(variables) |
|
||||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
|
||||||
prior_net_structure=pd.DataFrame(prior_net_structure) |
|
||||||
|
|
||||||
super(SampleImporter, self).__init__(trajectory_list =trajectory_list, |
|
||||||
variables= variables, |
|
||||||
prior_net_structure=prior_net_structure) |
|
||||||
|
|
||||||
def import_data(self, header_column = None): |
|
||||||
|
|
||||||
if header_column is not None: |
|
||||||
self._sorter = header_column |
|
||||||
else: |
|
||||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
|
||||||
|
|
||||||
samples_list= self._df_samples_list |
|
||||||
|
|
||||||
if isinstance(samples_list, np.ndarray): |
|
||||||
samples_list = samples_list.tolist() |
|
||||||
|
|
||||||
self.compute_row_delta_in_all_samples_frames(samples_list) |
|
||||||
|
|
||||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
|
||||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. |
|
||||||
""" |
|
||||||
columns_header = list(sample_frame.columns.values) |
|
||||||
del columns_header[0] |
|
||||||
return columns_header |
|
||||||
|
|
||||||
|
|
||||||
def dataset_id(self) -> object: |
|
||||||
pass |
|
@ -1,8 +0,0 @@ |
|||||||
import PyCTBN.PyCTBN.estimators |
|
||||||
from PyCTBN.PyCTBN.estimators import * |
|
||||||
import PyCTBN.PyCTBN.optimizers |
|
||||||
from PyCTBN.PyCTBN.optimizers import * |
|
||||||
import PyCTBN.PyCTBN.structure_graph |
|
||||||
from PyCTBN.PyCTBN.structure_graph import * |
|
||||||
import PyCTBN.PyCTBN.utility |
|
||||||
from PyCTBN.PyCTBN.utility import * |
|
@ -1,5 +0,0 @@ |
|||||||
from .fam_score_calculator import FamScoreCalculator |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from .structure_estimator import StructureEstimator |
|
||||||
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator |
|
||||||
from .structure_score_based_estimator import StructureScoreBasedEstimator |
|
@ -1,272 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
|
|
||||||
from math import log |
|
||||||
|
|
||||||
from scipy.special import loggamma |
|
||||||
from random import choice |
|
||||||
|
|
||||||
from ..structure_graph.set_of_cims import SetOfCims |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
|
|
||||||
|
|
||||||
''' |
|
||||||
|
|
||||||
''' |
|
||||||
|
|
||||||
|
|
||||||
class FamScoreCalculator: |
|
||||||
""" |
|
||||||
Has the task of calculating the FamScore of a node by using a Bayesian score function |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self): |
|
||||||
#np.seterr('raise') |
|
||||||
pass |
|
||||||
|
|
||||||
# region theta |
|
||||||
|
|
||||||
def marginal_likelihood_theta(self, |
|
||||||
cims: ConditionalIntensityMatrix, |
|
||||||
alpha_xu: float, |
|
||||||
alpha_xxu: float): |
|
||||||
""" |
|
||||||
Calculate the FamScore value of the node identified by the label node_id |
|
||||||
|
|
||||||
:param cims: np.array with all the node's cims |
|
||||||
:type cims: np.array |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type alpha_xu: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
return np.sum( |
|
||||||
[self.variable_cim_xu_marginal_likelihood_theta(cim, |
|
||||||
alpha_xu, |
|
||||||
alpha_xxu) |
|
||||||
for cim in cims]) |
|
||||||
|
|
||||||
def variable_cim_xu_marginal_likelihood_theta(self, |
|
||||||
cim: ConditionalIntensityMatrix, |
|
||||||
alpha_xu: float, |
|
||||||
alpha_xxu: float): |
|
||||||
""" |
|
||||||
Calculate the value of the marginal likelihood over theta given a cim |
|
||||||
|
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type alpha_xu: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
'get cim length' |
|
||||||
values = len(cim._state_residence_times) |
|
||||||
|
|
||||||
'compute the marginal likelihood for the current cim' |
|
||||||
return np.sum([ |
|
||||||
self.single_cim_xu_marginal_likelihood_theta( |
|
||||||
index, |
|
||||||
cim, |
|
||||||
alpha_xu, |
|
||||||
alpha_xxu) |
|
||||||
for index in range(values)]) |
|
||||||
|
|
||||||
def single_cim_xu_marginal_likelihood_theta(self, |
|
||||||
index: int, |
|
||||||
cim: ConditionalIntensityMatrix, |
|
||||||
alpha_xu: float, |
|
||||||
alpha_xxu: float): |
|
||||||
""" |
|
||||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
|
||||||
and a specif parents's assignment |
|
||||||
|
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
values = list(range(len(cim._state_residence_times))) |
|
||||||
|
|
||||||
'remove the index because of the x != x^ condition in the summation ' |
|
||||||
values.remove(index) |
|
||||||
|
|
||||||
'uncomment for alpha xx not uniform' |
|
||||||
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) |
|
||||||
|
|
||||||
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ |
|
||||||
+ \ |
|
||||||
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( |
|
||||||
cim.state_transition_matrix[index,index_x_first], |
|
||||||
alpha_xxu) |
|
||||||
for index_x_first in values]) |
|
||||||
|
|
||||||
|
|
||||||
def single_internal_cim_xxu_marginal_likelihood_theta(self, |
|
||||||
M_xxu_suff_stats: float, |
|
||||||
alpha_xxu: float=1): |
|
||||||
"""Calculate the second part of the marginal likelihood over theta formula |
|
||||||
|
|
||||||
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u] |
|
||||||
:type M_xxu_suff_stats: float |
|
||||||
:param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters |
|
||||||
:type alpha_xxu: float |
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over theta when the node assumes a specif value |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu) |
|
||||||
|
|
||||||
# endregion |
|
||||||
|
|
||||||
# region q |
|
||||||
|
|
||||||
def marginal_likelihood_q(self, |
|
||||||
cims: np.array, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the value of the marginal likelihood over q of the node identified by the label node_id |
|
||||||
|
|
||||||
:param cims: np.array with all the node's cims |
|
||||||
:type cims: np.array |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type tau_xu: float |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
|
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over q |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) |
|
||||||
|
|
||||||
def variable_cim_xu_marginal_likelihood_q(self, |
|
||||||
cim: ConditionalIntensityMatrix, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the value of the marginal likelihood over q given a cim |
|
||||||
|
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type tau_xu: float |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
|
|
||||||
|
|
||||||
:return: the value of the marginal likelihood over q |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
'get cim length' |
|
||||||
values=len(cim._state_residence_times) |
|
||||||
|
|
||||||
'compute the marginal likelihood for the current cim' |
|
||||||
return np.sum([ |
|
||||||
self.single_cim_xu_marginal_likelihood_q( |
|
||||||
cim.state_transition_matrix[index, index], |
|
||||||
cim._state_residence_times[index], |
|
||||||
tau_xu, |
|
||||||
alpha_xu) |
|
||||||
for index in range(values)]) |
|
||||||
|
|
||||||
|
|
||||||
def single_cim_xu_marginal_likelihood_q(self, |
|
||||||
M_xu_suff_stats: float, |
|
||||||
T_xu_suff_stats: float, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the marginal likelihood on q of the node when assumes a specif value |
|
||||||
and a specif parents's assignment |
|
||||||
|
|
||||||
:param M_xu_suff_stats: value of the suffucient statistic M[x|u] |
|
||||||
:type M_xxu_suff_stats: float |
|
||||||
:param T_xu_suff_stats: value of the suffucient statistic T[x|u] |
|
||||||
:type T_xu_suff_stats: float |
|
||||||
:param cim: A conditional_intensity_matrix object with the sufficient statistics |
|
||||||
:type cim: class:'ConditionalIntensityMatrix' |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type tau_xu: float |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters |
|
||||||
:type alpha_xu: float |
|
||||||
|
|
||||||
|
|
||||||
:return: the value of the marginal likelihood of the node when assumes a specif value |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
return ( |
|
||||||
loggamma(alpha_xu + M_xu_suff_stats + 1) + |
|
||||||
(log(tau_xu) |
|
||||||
* |
|
||||||
(alpha_xu+1)) |
|
||||||
) \ |
|
||||||
- \ |
|
||||||
(loggamma(alpha_xu + 1)+( |
|
||||||
log(tau_xu + T_xu_suff_stats) |
|
||||||
* |
|
||||||
(alpha_xu + M_xu_suff_stats + 1)) |
|
||||||
) |
|
||||||
|
|
||||||
# end region |
|
||||||
|
|
||||||
def get_fam_score(self, |
|
||||||
cims: np.array, |
|
||||||
tau_xu: float=0.1, |
|
||||||
alpha_xu: float=1): |
|
||||||
""" |
|
||||||
Calculate the FamScore value of the node |
|
||||||
|
|
||||||
|
|
||||||
:param cims: np.array with all the node's cims |
|
||||||
:type cims: np.array |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type tau_xu: float, optional |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
|
||||||
:type alpha_xu: float, optional |
|
||||||
|
|
||||||
|
|
||||||
:return: the FamScore value of the node |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
#print("------") |
|
||||||
#print(self.marginal_likelihood_q(cims, |
|
||||||
# tau_xu, |
|
||||||
# alpha_xu)) |
|
||||||
|
|
||||||
#print(self.marginal_likelihood_theta(cims, |
|
||||||
# alpha_xu, |
|
||||||
# alpha_xxu)) |
|
||||||
'calculate alpha_xxu as a uniform distribution' |
|
||||||
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) |
|
||||||
|
|
||||||
return self.marginal_likelihood_q(cims, |
|
||||||
tau_xu, |
|
||||||
alpha_xu) \ |
|
||||||
+ \ |
|
||||||
self.marginal_likelihood_theta(cims, |
|
||||||
alpha_xu, |
|
||||||
alpha_xxu) |
|
@ -1,143 +0,0 @@ |
|||||||
import sys |
|
||||||
sys.path.append('../') |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from ..structure_graph.set_of_cims import SetOfCims |
|
||||||
from ..structure_graph.trajectory import Trajectory |
|
||||||
|
|
||||||
|
|
||||||
class ParametersEstimator(object): |
|
||||||
"""Has the task of computing the cims of particular node given the trajectories and the net structure |
|
||||||
in the graph ``_net_graph``. |
|
||||||
|
|
||||||
:param trajectories: the trajectories |
|
||||||
:type trajectories: Trajectory |
|
||||||
:param net_graph: the net structure |
|
||||||
:type net_graph: NetworkGraph |
|
||||||
:_single_set_of_cims: the set of cims object that will hold the cims of the node |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._trajectories = trajectories |
|
||||||
self._net_graph = net_graph |
|
||||||
self._single_set_of_cims = None |
|
||||||
|
|
||||||
def fast_init(self, node_id: str) -> None: |
|
||||||
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
""" |
|
||||||
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] |
|
||||||
node_states_number = self._net_graph.get_states_number(node_id) |
|
||||||
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) |
|
||||||
|
|
||||||
def compute_parameters_for_node(self, node_id: str) -> SetOfCims: |
|
||||||
"""Compute the CIMS of the node identified by the label ``node_id``. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:return: A SetOfCims object filled with the computed CIMS |
|
||||||
:rtype: SetOfCims |
|
||||||
""" |
|
||||||
node_indx = self._net_graph.get_node_indx(node_id) |
|
||||||
state_res_times = self._single_set_of_cims._state_residence_times |
|
||||||
transition_matrices = self._single_set_of_cims._transition_matrices |
|
||||||
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, |
|
||||||
self._trajectories.trajectory, |
|
||||||
self._net_graph.time_filtering, |
|
||||||
self._net_graph.time_scalar_indexing_strucure, |
|
||||||
state_res_times) |
|
||||||
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, |
|
||||||
self._net_graph.transition_filtering, |
|
||||||
self._net_graph.transition_scalar_indexing_structure, |
|
||||||
transition_matrices) |
|
||||||
self._single_set_of_cims.build_cims(state_res_times, transition_matrices) |
|
||||||
return self._single_set_of_cims |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, |
|
||||||
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, |
|
||||||
T: np.ndarray) -> None: |
|
||||||
"""Compute the state residence times for a node and fill the matrix ``T`` with the results |
|
||||||
|
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param times: the times deltas vector |
|
||||||
:type times: numpy.array |
|
||||||
:param trajectory: the trajectory |
|
||||||
:type trajectory: numpy.ndArray |
|
||||||
:param cols_filter: the columns filtering structure |
|
||||||
:type cols_filter: numpy.array |
|
||||||
:param scalar_indexes_struct: the indexing structure |
|
||||||
:type scalar_indexes_struct: numpy.array |
|
||||||
:param T: the state residence times vectors |
|
||||||
:type T: numpy.ndArray |
|
||||||
""" |
|
||||||
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) |
|
||||||
.astype(np.int), \ |
|
||||||
times, |
|
||||||
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, |
|
||||||
scalar_indexing: np.ndarray, M: np.ndarray) -> None: |
|
||||||
"""Compute the state residence times for a node and fill the matrices ``M`` with the results. |
|
||||||
|
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param trajectory: the trajectory |
|
||||||
:type trajectory: numpy.ndArray |
|
||||||
:param cols_filter: the columns filtering structure |
|
||||||
:type cols_filter: numpy.array |
|
||||||
:param scalar_indexing: the indexing structure |
|
||||||
:type scalar_indexing: numpy.array |
|
||||||
:param M: the state transitions matrices |
|
||||||
:type M: numpy.ndArray |
|
||||||
""" |
|
||||||
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], |
|
||||||
dtype=np.int64) |
|
||||||
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] |
|
||||||
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) |
|
||||||
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) |
|
||||||
M_raveled = M.ravel() |
|
||||||
M_raveled[diag_indices] = 0 |
|
||||||
M_raveled[diag_indices] = np.sum(M, axis=2).ravel() |
|
||||||
|
|
||||||
def init_sets_cims_container(self): |
|
||||||
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, |
|
||||||
self.net_graph.nodes_values, |
|
||||||
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), |
|
||||||
self.net_graph.p_combs) |
|
||||||
|
|
||||||
def compute_parameters(self): |
|
||||||
#print(self.net_graph.get_nodes()) |
|
||||||
#print(self.amalgamated_cims_struct.sets_of_cims) |
|
||||||
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) |
|
||||||
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): |
|
||||||
#print(self.net_graph.time_filtering[indx]) |
|
||||||
#print(self.net_graph.time_scalar_indexing_strucure[indx]) |
|
||||||
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, |
|
||||||
self.sample_path.trajectories.trajectory, |
|
||||||
self.net_graph.time_filtering[indx], |
|
||||||
self.net_graph.time_scalar_indexing_strucure[indx], |
|
||||||
aggr[1]._state_residence_times) |
|
||||||
#print(self.net_graph.transition_filtering[indx]) |
|
||||||
#print(self.net_graph.transition_scalar_indexing_structure[indx]) |
|
||||||
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), |
|
||||||
self.sample_path.trajectories.complete_trajectory, |
|
||||||
self.net_graph.transition_filtering[indx], |
|
||||||
self.net_graph.transition_scalar_indexing_structure[indx], |
|
||||||
aggr[1]._transition_matrices) |
|
||||||
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,238 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
import os |
|
||||||
from scipy.stats import chi2 as chi2_dist |
|
||||||
from scipy.stats import f as f_dist |
|
||||||
from tqdm import tqdm |
|
||||||
|
|
||||||
from ..utility.cache import Cache |
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from .structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.sample_path import SamplePath |
|
||||||
from ..structure_graph.structure import Structure |
|
||||||
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer |
|
||||||
|
|
||||||
import concurrent.futures |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import multiprocessing |
|
||||||
from multiprocessing import Pool |
|
||||||
|
|
||||||
|
|
||||||
class StructureConstraintBasedEstimator(StructureEstimator): |
|
||||||
""" |
|
||||||
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. |
|
||||||
|
|
||||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
|
||||||
:type sample_path: SamplePath |
|
||||||
:param exp_test_alfa: the significance level for the exponential Hp test |
|
||||||
:type exp_test_alfa: float |
|
||||||
:param chi_test_alfa: the significance level for the chi Hp test |
|
||||||
:type chi_test_alfa: float |
|
||||||
:_nodes: the nodes labels |
|
||||||
:_nodes_vals: the nodes cardinalities |
|
||||||
:_nodes_indxs: the nodes indexes |
|
||||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
|
||||||
:_cache: the Cache object |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): |
|
||||||
super().__init__(sample_path,known_edges) |
|
||||||
self._exp_test_sign = exp_test_alfa |
|
||||||
self._chi_test_alfa = chi_test_alfa |
|
||||||
self._thumb_threshold = thumb_threshold |
|
||||||
self._cache = Cache() |
|
||||||
|
|
||||||
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, |
|
||||||
tot_vars_count: int, parent_indx, child_indx) -> bool: |
|
||||||
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} |
|
||||||
G2 = {G1 U test_parent} (added as an additional parent of the test_child). |
|
||||||
Generates all the necessary structures and datas to perform the tests. |
|
||||||
|
|
||||||
:param test_parent: the node label of the test parent |
|
||||||
:type test_parent: string |
|
||||||
:param test_child: the node label of the child |
|
||||||
:type test_child: string |
|
||||||
:param parent_set: the common parent set |
|
||||||
:type parent_set: List |
|
||||||
:param child_states_numb: the cardinality of the ``test_child`` |
|
||||||
:type child_states_numb: int |
|
||||||
:param tot_vars_count: the total number of variables in the net |
|
||||||
:type tot_vars_count: int |
|
||||||
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise |
|
||||||
:rtype: bool |
|
||||||
""" |
|
||||||
p_set = parent_set[:] |
|
||||||
complete_info = parent_set[:] |
|
||||||
complete_info.append(test_child) |
|
||||||
|
|
||||||
parents = np.array(parent_set) |
|
||||||
parents = np.append(parents, test_parent) |
|
||||||
sorted_parents = self._nodes[np.isin(self._nodes, parents)] |
|
||||||
cims_filter = sorted_parents != test_parent |
|
||||||
|
|
||||||
p_set.insert(0, test_parent) |
|
||||||
sofc2 = self._cache.find(set(p_set)) |
|
||||||
|
|
||||||
if not sofc2: |
|
||||||
complete_info.append(test_parent) |
|
||||||
bool_mask2 = np.isin(self._nodes, complete_info) |
|
||||||
l2 = list(self._nodes[bool_mask2]) |
|
||||||
indxs2 = self._nodes_indxs[bool_mask2] |
|
||||||
vals2 = self._nodes_vals[bool_mask2] |
|
||||||
eds2 = list(itertools.product(p_set, test_child)) |
|
||||||
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) |
|
||||||
g2 = NetworkGraph(s2) |
|
||||||
g2.fast_init(test_child) |
|
||||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
|
||||||
p2.fast_init(test_child) |
|
||||||
sofc2 = p2.compute_parameters_for_node(test_child) |
|
||||||
self._cache.put(set(p_set), sofc2) |
|
||||||
|
|
||||||
del p_set[0] |
|
||||||
sofc1 = self._cache.find(set(p_set)) |
|
||||||
if not sofc1: |
|
||||||
g2.remove_node(test_parent) |
|
||||||
g2.fast_init(test_child) |
|
||||||
p2 = ParametersEstimator(self._sample_path.trajectories, g2) |
|
||||||
p2.fast_init(test_child) |
|
||||||
sofc1 = p2.compute_parameters_for_node(test_child) |
|
||||||
self._cache.put(set(p_set), sofc1) |
|
||||||
thumb_value = 0.0 |
|
||||||
if child_states_numb > 2: |
|
||||||
parent_val = self._sample_path.structure.get_states_number(test_parent) |
|
||||||
bool_mask_vals = np.isin(self._nodes, parent_set) |
|
||||||
parents_vals = self._nodes_vals[bool_mask_vals] |
|
||||||
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) |
|
||||||
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): |
|
||||||
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) |
|
||||||
for cim2 in cond_cims: |
|
||||||
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): |
|
||||||
return False |
|
||||||
return True |
|
||||||
|
|
||||||
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, |
|
||||||
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: |
|
||||||
"""Compute the actual independence test using two cims. |
|
||||||
It is performed first the exponential test and if the null hypothesis is not rejected, |
|
||||||
it is performed also the chi_test. |
|
||||||
|
|
||||||
:param child_states_numb: the cardinality of the test child |
|
||||||
:type child_states_numb: int |
|
||||||
:param cim1: a cim belonging to the graph without test parent |
|
||||||
:type cim1: ConditionalIntensityMatrix |
|
||||||
:param cim2: a cim belonging to the graph with test parent |
|
||||||
:type cim2: ConditionalIntensityMatrix |
|
||||||
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. |
|
||||||
:rtype: bool |
|
||||||
""" |
|
||||||
M1 = cim1.state_transition_matrix |
|
||||||
M2 = cim2.state_transition_matrix |
|
||||||
r1s = M1.diagonal() |
|
||||||
r2s = M2.diagonal() |
|
||||||
C1 = cim1.cim |
|
||||||
C2 = cim2.cim |
|
||||||
if child_states_numb > 2: |
|
||||||
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: |
|
||||||
self._removable_edges_matrix[parent_indx][child_indx] = False |
|
||||||
return False |
|
||||||
F_stats = C2.diagonal() / C1.diagonal() |
|
||||||
exp_alfa = self._exp_test_sign |
|
||||||
for val in range(0, child_states_numb): |
|
||||||
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ |
|
||||||
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): |
|
||||||
return False |
|
||||||
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) |
|
||||||
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( |
|
||||||
M2.shape[0], -1) |
|
||||||
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) |
|
||||||
Ks = np.sqrt(r1s / r2s) |
|
||||||
Ls = np.sqrt(r2s / r1s) |
|
||||||
for val in range(0, child_states_numb): |
|
||||||
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / |
|
||||||
(M1_no_diag[val] + M2_no_diag[val])) |
|
||||||
if Chi > chi_2_quantile: |
|
||||||
return False |
|
||||||
return True |
|
||||||
|
|
||||||
def compute_thumb_value(self, parent_val, child_val, parent_set_vals): |
|
||||||
"""Compute the value to test against the thumb_threshold. |
|
||||||
|
|
||||||
:param parent_val: test parent's variable cardinality |
|
||||||
:type parent_val: int |
|
||||||
:param child_val: test child's variable cardinality |
|
||||||
:type child_val: int |
|
||||||
:param parent_set_vals: the cardinalities of the nodes in the current sep-set |
|
||||||
:type parent_set_vals: List |
|
||||||
:return: the thumb value for the current independence test |
|
||||||
:rtype: int |
|
||||||
""" |
|
||||||
df = (child_val - 1) ** 2 |
|
||||||
df = df * parent_val |
|
||||||
for v in parent_set_vals: |
|
||||||
df = df * v |
|
||||||
return df |
|
||||||
|
|
||||||
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: |
|
||||||
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. |
|
||||||
|
|
||||||
:param var_id: the node label of the test child |
|
||||||
:type var_id: string |
|
||||||
""" |
|
||||||
optimizer_obj = ConstraintBasedOptimizer( |
|
||||||
node_id = var_id, |
|
||||||
structure_estimator = self, |
|
||||||
tot_vars_count = tot_vars_count) |
|
||||||
return optimizer_obj.optimize_structure() |
|
||||||
|
|
||||||
|
|
||||||
def ctpc_algorithm(self,disable_multiprocessing:bool= False ): |
|
||||||
"""Compute the CTPC algorithm over the entire net. |
|
||||||
""" |
|
||||||
ctpc_algo = self.one_iteration_of_CTPC_algorithm |
|
||||||
total_vars_numb = self._sample_path.total_variables_count |
|
||||||
|
|
||||||
n_nodes= len(self._nodes) |
|
||||||
|
|
||||||
total_vars_numb_array = [total_vars_numb] * n_nodes |
|
||||||
|
|
||||||
'get the number of CPU' |
|
||||||
cpu_count = multiprocessing.cpu_count() |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'Remove all the edges from the structure' |
|
||||||
self._sample_path.structure.clean_structure_edges() |
|
||||||
|
|
||||||
'Estimate the best parents for each node' |
|
||||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
|
||||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
|
||||||
if disable_multiprocessing: |
|
||||||
print("DISABILITATO") |
|
||||||
cpu_count = 1 |
|
||||||
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
|
||||||
else: |
|
||||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
|
||||||
list_edges_partial = executor.map(ctpc_algo, |
|
||||||
self._nodes, |
|
||||||
total_vars_numb_array) |
|
||||||
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] |
|
||||||
|
|
||||||
return set(itertools.chain.from_iterable(list_edges_partial)) |
|
||||||
|
|
||||||
|
|
||||||
def estimate_structure(self,disable_multiprocessing:bool=False): |
|
||||||
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,187 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import matplotlib.pyplot as plt |
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
import abc |
|
||||||
|
|
||||||
from ..utility.cache import Cache |
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from ..structure_graph.sample_path import SamplePath |
|
||||||
from ..structure_graph.structure import Structure |
|
||||||
|
|
||||||
|
|
||||||
class StructureEstimator(object): |
|
||||||
"""Has the task of estimating the network structure given the trajectories in ``samplepath``. |
|
||||||
|
|
||||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
|
||||||
:type sample_path: SamplePath |
|
||||||
:_nodes: the nodes labels |
|
||||||
:_nodes_vals: the nodes cardinalities |
|
||||||
:_nodes_indxs: the nodes indexes |
|
||||||
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): |
|
||||||
self._sample_path = sample_path |
|
||||||
self._nodes = np.array(self._sample_path.structure.nodes_labels) |
|
||||||
self._nodes_vals = self._sample_path.structure.nodes_values |
|
||||||
self._nodes_indxs = self._sample_path.structure.nodes_indexes |
|
||||||
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) |
|
||||||
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) |
|
||||||
|
|
||||||
|
|
||||||
def build_removable_edges_matrix(self, known_edges: typing.List): |
|
||||||
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: |
|
||||||
|
|
||||||
:param known_edges: the list of nodes labels |
|
||||||
:type known_edges: List |
|
||||||
:return: a boolean matrix |
|
||||||
:rtype: np.ndarray |
|
||||||
""" |
|
||||||
tot_vars_count = self._sample_path.total_variables_count |
|
||||||
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) |
|
||||||
if known_edges: |
|
||||||
for edge in known_edges: |
|
||||||
i = self._sample_path.structure.get_node_indx(edge[0]) |
|
||||||
j = self._sample_path.structure.get_node_indx(edge[1]) |
|
||||||
complete_adj_matrix[i][j] = False |
|
||||||
return complete_adj_matrix |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: |
|
||||||
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: |
|
||||||
|
|
||||||
:param node_ids: the list of nodes labels |
|
||||||
:type node_ids: List |
|
||||||
:return: a complete Digraph Object |
|
||||||
:rtype: networkx.DiGraph |
|
||||||
""" |
|
||||||
complete_graph = nx.DiGraph() |
|
||||||
complete_graph.add_nodes_from(node_ids) |
|
||||||
complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) |
|
||||||
return complete_graph |
|
||||||
|
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): |
|
||||||
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``, |
|
||||||
that do not contains a the node identified by ``parent_label``. |
|
||||||
|
|
||||||
:param u: the list of nodes |
|
||||||
:type u: List |
|
||||||
:param size: the size of the subsets |
|
||||||
:type size: int |
|
||||||
:param parent_label: the node to exclude in the subsets generation |
|
||||||
:type parent_label: string |
|
||||||
:return: an Iterator Object containing a list of lists |
|
||||||
:rtype: Iterator |
|
||||||
""" |
|
||||||
list_without_test_parent = u[:] |
|
||||||
list_without_test_parent.remove(parent_label) |
|
||||||
return map(list, itertools.combinations(list_without_test_parent, size)) |
|
||||||
|
|
||||||
def save_results(self) -> None: |
|
||||||
"""Save the estimated Structure to a .json file in the path where the data are loaded from. |
|
||||||
The file is named as the input dataset but the `results_` word is appended to the results file. |
|
||||||
""" |
|
||||||
res = json_graph.node_link_data(self._complete_graph) |
|
||||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
|
||||||
name = name.split('.', 1)[0] |
|
||||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
|
||||||
name += '.json' |
|
||||||
file_name = 'results_' + name |
|
||||||
with open(file_name, 'w') as f: |
|
||||||
json.dump(res, f) |
|
||||||
|
|
||||||
|
|
||||||
def remove_diagonal_elements(self, matrix): |
|
||||||
m = matrix.shape[0] |
|
||||||
strided = np.lib.stride_tricks.as_strided |
|
||||||
s0, s1 = matrix.strides |
|
||||||
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) |
|
||||||
|
|
||||||
|
|
||||||
@abc.abstractmethod |
|
||||||
def estimate_structure(self) -> typing.List: |
|
||||||
"""Abstract method to estimate the structure |
|
||||||
|
|
||||||
:return: List of estimated edges |
|
||||||
:rtype: Typing.List |
|
||||||
""" |
|
||||||
pass |
|
||||||
|
|
||||||
|
|
||||||
def adjacency_matrix(self) -> np.ndarray: |
|
||||||
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. |
|
||||||
|
|
||||||
:return: The adjacency matrix of the graph ``_complete_graph`` |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
return nx.adj_matrix(self._complete_graph).toarray().astype(bool) |
|
||||||
|
|
||||||
def spurious_edges(self) -> typing.List: |
|
||||||
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in |
|
||||||
``_sample_path.structure``. |
|
||||||
|
|
||||||
:return: A list containing the spurious edges |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
if not self._sample_path.has_prior_net_structure: |
|
||||||
raise RuntimeError("Can not compute spurious edges with no prior net structure!") |
|
||||||
real_graph = nx.DiGraph() |
|
||||||
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) |
|
||||||
real_graph.add_edges_from(self._sample_path.structure.edges) |
|
||||||
return nx.difference(real_graph, self._complete_graph).edges |
|
||||||
|
|
||||||
def save_plot_estimated_structure_graph(self) -> None: |
|
||||||
"""Plot the estimated structure in a graphical model style. |
|
||||||
Spurious edges are colored in red. |
|
||||||
""" |
|
||||||
graph_to_draw = nx.DiGraph() |
|
||||||
spurious_edges = self.spurious_edges() |
|
||||||
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) |
|
||||||
print(non_spurious_edges) |
|
||||||
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] |
|
||||||
graph_to_draw.add_edges_from(spurious_edges) |
|
||||||
graph_to_draw.add_edges_from(non_spurious_edges) |
|
||||||
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) |
|
||||||
options = { |
|
||||||
"node_size": 2000, |
|
||||||
"node_color": "white", |
|
||||||
"edgecolors": "black", |
|
||||||
'linewidths':2, |
|
||||||
"with_labels":True, |
|
||||||
"font_size":13, |
|
||||||
'connectionstyle': 'arc3, rad = 0.1', |
|
||||||
"arrowsize": 15, |
|
||||||
"arrowstyle": '<|-', |
|
||||||
"width": 1, |
|
||||||
"edge_color":edges_colors, |
|
||||||
} |
|
||||||
|
|
||||||
nx.draw(graph_to_draw, pos, **options) |
|
||||||
ax = plt.gca() |
|
||||||
ax.margins(0.20) |
|
||||||
plt.axis("off") |
|
||||||
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] |
|
||||||
name = name.split('.', 1)[0] |
|
||||||
name += '_' + str(self._sample_path._importer.dataset_id()) |
|
||||||
name += '.png' |
|
||||||
plt.savefig(name) |
|
||||||
plt.clf() |
|
||||||
print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,244 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
from networkx.readwrite import json_graph |
|
||||||
|
|
||||||
from random import choice |
|
||||||
|
|
||||||
import concurrent.futures |
|
||||||
|
|
||||||
import copy |
|
||||||
|
|
||||||
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
from .parameters_estimator import ParametersEstimator |
|
||||||
from .structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.sample_path import SamplePath |
|
||||||
from ..structure_graph.structure import Structure |
|
||||||
from .fam_score_calculator import FamScoreCalculator |
|
||||||
from ..optimizers.hill_climbing_search import HillClimbing |
|
||||||
from ..optimizers.tabu_search import TabuSearch |
|
||||||
|
|
||||||
|
|
||||||
import multiprocessing |
|
||||||
from multiprocessing import Pool |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class StructureScoreBasedEstimator(StructureEstimator): |
|
||||||
""" |
|
||||||
Has the task of estimating the network structure given the trajectories in samplepath by |
|
||||||
using a score based approach. |
|
||||||
|
|
||||||
:param sample_path: the _sample_path object containing the trajectories and the real structure |
|
||||||
:type sample_path: SamplePath |
|
||||||
:param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 |
|
||||||
:type tau_xu: float, optional |
|
||||||
:param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 |
|
||||||
:type alpha_xu: float, optional |
|
||||||
:param known_edges: List of known edges, default to [] |
|
||||||
:type known_edges: List, optional |
|
||||||
|
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): |
|
||||||
super().__init__(sample_path,known_edges) |
|
||||||
self.tau_xu=tau_xu |
|
||||||
self.alpha_xu=alpha_xu |
|
||||||
|
|
||||||
|
|
||||||
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, |
|
||||||
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None, |
|
||||||
optimizer: str = 'tabu',disable_multiprocessing:bool= False ): |
|
||||||
""" |
|
||||||
Compute the score-based algorithm to find the optimal structure |
|
||||||
|
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
|
||||||
:type tabu_length: int, optional |
|
||||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
|
||||||
:type tabu_rules_duration: int, optional |
|
||||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
|
||||||
:type optimizer: string, optional |
|
||||||
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False |
|
||||||
:type disable_multiprocessing: Boolean, optional |
|
||||||
""" |
|
||||||
'Save the true edges structure in tuples' |
|
||||||
true_edges = copy.deepcopy(self._sample_path.structure.edges) |
|
||||||
true_edges = set(map(tuple, true_edges)) |
|
||||||
|
|
||||||
'Remove all the edges from the structure' |
|
||||||
self._sample_path.structure.clean_structure_edges() |
|
||||||
|
|
||||||
estimate_parents = self.estimate_parents |
|
||||||
|
|
||||||
n_nodes= len(self._nodes) |
|
||||||
|
|
||||||
l_max_parents= [max_parents] * n_nodes |
|
||||||
l_iterations_number = [iterations_number] * n_nodes |
|
||||||
l_patience = [patience] * n_nodes |
|
||||||
l_tabu_length = [tabu_length] * n_nodes |
|
||||||
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes |
|
||||||
l_optimizer = [optimizer] * n_nodes |
|
||||||
|
|
||||||
|
|
||||||
'get the number of CPU' |
|
||||||
cpu_count = multiprocessing.cpu_count() |
|
||||||
print(f"CPU COUNT: {cpu_count}") |
|
||||||
|
|
||||||
if disable_multiprocessing: |
|
||||||
cpu_count = 1 |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#with get_context("spawn").Pool(processes=cpu_count) as pool: |
|
||||||
#with multiprocessing.Pool(processes=cpu_count) as pool: |
|
||||||
|
|
||||||
'Estimate the best parents for each node' |
|
||||||
if disable_multiprocessing: |
|
||||||
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] |
|
||||||
else: |
|
||||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: |
|
||||||
list_edges_partial = executor.map(estimate_parents, |
|
||||||
self._nodes, |
|
||||||
l_max_parents, |
|
||||||
l_iterations_number, |
|
||||||
l_patience, |
|
||||||
l_tabu_length, |
|
||||||
l_tabu_rules_duration, |
|
||||||
l_optimizer) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#list_edges_partial = p.map(estimate_parents, self._nodes) |
|
||||||
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) |
|
||||||
|
|
||||||
'Concatenate all the edges list' |
|
||||||
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) |
|
||||||
|
|
||||||
#print('-------------------------') |
|
||||||
|
|
||||||
|
|
||||||
'calculate precision and recall' |
|
||||||
n_missing_edges = 0 |
|
||||||
n_added_fake_edges = 0 |
|
||||||
|
|
||||||
try: |
|
||||||
n_added_fake_edges = len(set_list_edges.difference(true_edges)) |
|
||||||
|
|
||||||
n_missing_edges = len(true_edges.difference(set_list_edges)) |
|
||||||
|
|
||||||
n_true_positive = len(true_edges) - n_missing_edges |
|
||||||
|
|
||||||
precision = n_true_positive / (n_true_positive + n_added_fake_edges) |
|
||||||
|
|
||||||
recall = n_true_positive / (n_true_positive + n_missing_edges) |
|
||||||
|
|
||||||
|
|
||||||
# print(f"n archi reali non trovati: {n_missing_edges}") |
|
||||||
# print(f"n archi non reali aggiunti: {n_added_fake_edges}") |
|
||||||
print(true_edges) |
|
||||||
print(set_list_edges) |
|
||||||
print(f"precision: {precision} ") |
|
||||||
print(f"recall: {recall} ") |
|
||||||
except Exception as e: |
|
||||||
print(f"errore: {e}") |
|
||||||
|
|
||||||
return set_list_edges |
|
||||||
|
|
||||||
|
|
||||||
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, |
|
||||||
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, |
|
||||||
optimizer:str = 'hill' ): |
|
||||||
""" |
|
||||||
Use the FamScore of a node in order to find the best parent nodes |
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
|
||||||
:type tabu_length: int, optional |
|
||||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
|
||||||
:type tabu_rules_duration: int, optional |
|
||||||
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' |
|
||||||
:type optimizer: string, optional |
|
||||||
|
|
||||||
:return: A list of the best edges for the currente node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
|
|
||||||
"choose the optimizer algotithm" |
|
||||||
if optimizer == 'tabu': |
|
||||||
optimizer = TabuSearch( |
|
||||||
node_id = node_id, |
|
||||||
structure_estimator = self, |
|
||||||
max_parents = max_parents, |
|
||||||
iterations_number = iterations_number, |
|
||||||
patience = patience, |
|
||||||
tabu_length = tabu_length, |
|
||||||
tabu_rules_duration = tabu_rules_duration) |
|
||||||
else: #if optimizer == 'hill': |
|
||||||
optimizer = HillClimbing( |
|
||||||
node_id = node_id, |
|
||||||
structure_estimator = self, |
|
||||||
max_parents = max_parents, |
|
||||||
iterations_number = iterations_number, |
|
||||||
patience = patience) |
|
||||||
|
|
||||||
"call the optmizer's function that calculates the current node's parents" |
|
||||||
return optimizer.optimize_structure() |
|
||||||
|
|
||||||
|
|
||||||
def get_score_from_graph(self, |
|
||||||
graph: NetworkGraph, |
|
||||||
node_id:str): |
|
||||||
""" |
|
||||||
Get the FamScore of a node |
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param graph: current graph to be computed |
|
||||||
:type graph: class:'NetworkGraph' |
|
||||||
|
|
||||||
|
|
||||||
:return: The FamSCore for this graph structure |
|
||||||
:rtype: float |
|
||||||
""" |
|
||||||
|
|
||||||
'inizialize the graph for a single node' |
|
||||||
graph.fast_init(node_id) |
|
||||||
|
|
||||||
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) |
|
||||||
|
|
||||||
'Inizialize and compute parameters for node' |
|
||||||
params_estimation.fast_init(node_id) |
|
||||||
SoCims = params_estimation.compute_parameters_for_node(node_id) |
|
||||||
|
|
||||||
'calculate the FamScore for the node' |
|
||||||
fam_score_obj = FamScoreCalculator() |
|
||||||
|
|
||||||
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) |
|
||||||
|
|
||||||
#print(f" lo score per {node_id} risulta: {score} ") |
|
||||||
return score |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,4 +0,0 @@ |
|||||||
from .optimizer import Optimizer |
|
||||||
from .tabu_search import TabuSearch |
|
||||||
from .hill_climbing_search import HillClimbing |
|
||||||
from .constraint_based_optimizer import ConstraintBasedOptimizer |
|
@ -1,87 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from random import choice |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
import copy |
|
||||||
|
|
||||||
|
|
||||||
from .optimizer import Optimizer |
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
|
|
||||||
|
|
||||||
class ConstraintBasedOptimizer(Optimizer): |
|
||||||
""" |
|
||||||
Optimizer class that implement a CTPC Algorithm |
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: a structure estimator object with the information about the net |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
:param tot_vars_count: number of variables in the dataset |
|
||||||
:type tot_vars_count: int |
|
||||||
""" |
|
||||||
def __init__(self, |
|
||||||
node_id:str, |
|
||||||
structure_estimator: StructureEstimator, |
|
||||||
tot_vars_count:int |
|
||||||
): |
|
||||||
""" |
|
||||||
Constructor |
|
||||||
""" |
|
||||||
super().__init__(node_id, structure_estimator) |
|
||||||
self.tot_vars_count = tot_vars_count |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def optimize_structure(self): |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator by using a CTPC Algorithm |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
print("##################TESTING VAR################", self.node_id) |
|
||||||
|
|
||||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
|
||||||
|
|
||||||
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] |
|
||||||
|
|
||||||
for possible_parent in other_nodes: |
|
||||||
graph.add_edges([(possible_parent,self.node_id)]) |
|
||||||
|
|
||||||
|
|
||||||
u = other_nodes |
|
||||||
#tests_parents_numb = len(u) |
|
||||||
#complete_frame = self.complete_graph_frame |
|
||||||
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])] |
|
||||||
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) |
|
||||||
b = 0 |
|
||||||
while b < len(u): |
|
||||||
parent_indx = 0 |
|
||||||
while parent_indx < len(u): |
|
||||||
removed = False |
|
||||||
test_parent = u[parent_indx] |
|
||||||
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) |
|
||||||
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) |
|
||||||
if self.structure_estimator._removable_edges_matrix[i][j]: |
|
||||||
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) |
|
||||||
for parents_set in S: |
|
||||||
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): |
|
||||||
graph.remove_edges([(test_parent, self.node_id)]) |
|
||||||
u.remove(test_parent) |
|
||||||
removed = True |
|
||||||
break |
|
||||||
if not removed: |
|
||||||
parent_indx += 1 |
|
||||||
b += 1 |
|
||||||
self.structure_estimator._cache.clear() |
|
||||||
return graph.edges |
|
@ -1,135 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from random import choice |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
|
|
||||||
from .optimizer import Optimizer |
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
|
|
||||||
|
|
||||||
class HillClimbing(Optimizer): |
|
||||||
""" |
|
||||||
Optimizer class that implement Hill Climbing Search |
|
||||||
|
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: a structure estimator object with the information about the net |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
""" |
|
||||||
def __init__(self, |
|
||||||
node_id:str, |
|
||||||
structure_estimator: StructureEstimator, |
|
||||||
max_parents:int = None, |
|
||||||
iterations_number:int= 40, |
|
||||||
patience:int = None |
|
||||||
): |
|
||||||
""" |
|
||||||
Constructor |
|
||||||
""" |
|
||||||
super().__init__(node_id, structure_estimator) |
|
||||||
self.max_parents = max_parents |
|
||||||
self.iterations_number = iterations_number |
|
||||||
self.patience = patience |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def optimize_structure(self) -> typing.List: |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
|
|
||||||
#'Create the graph for the single node' |
|
||||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
|
||||||
|
|
||||||
'get the index for the current node' |
|
||||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
|
||||||
|
|
||||||
'list of prior edges' |
|
||||||
prior_parents = set() |
|
||||||
|
|
||||||
'Add the edges from prior knowledge' |
|
||||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
|
||||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
|
||||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
|
||||||
prior_parents.add(parent_id) |
|
||||||
|
|
||||||
'Add the node to the starting structure' |
|
||||||
graph.add_edges([(parent_id, self.node_id)]) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'get all the possible parents' |
|
||||||
other_nodes = [node for node in |
|
||||||
self.structure_estimator._sample_path.structure.nodes_labels if |
|
||||||
node != self.node_id and |
|
||||||
not prior_parents.__contains__(node)] |
|
||||||
|
|
||||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
patince_count = 0 |
|
||||||
for i in range(self.iterations_number): |
|
||||||
'choose a new random edge' |
|
||||||
current_new_parent = choice(other_nodes) |
|
||||||
current_edge = (current_new_parent,self.node_id) |
|
||||||
added = False |
|
||||||
parent_removed = None |
|
||||||
|
|
||||||
|
|
||||||
if graph.has_edge(current_edge): |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
else: |
|
||||||
'check the max_parents constraint' |
|
||||||
if self.max_parents is not None: |
|
||||||
parents_list = graph.get_parents_by_id(self.node_id) |
|
||||||
if len(parents_list) >= self.max_parents : |
|
||||||
parent_removed = (choice(parents_list), self.node_id) |
|
||||||
graph.remove_edges([parent_removed]) |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
added = True |
|
||||||
#print('**************************') |
|
||||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
|
|
||||||
if current_score > actual_best_score: |
|
||||||
'update current best score' |
|
||||||
actual_best_score = current_score |
|
||||||
patince_count = 0 |
|
||||||
else: |
|
||||||
'undo the last update' |
|
||||||
if added: |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
'If a parent was removed, add it again to the graph' |
|
||||||
if parent_removed is not None: |
|
||||||
graph.add_edges([parent_removed]) |
|
||||||
else: |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
'update patience count' |
|
||||||
patince_count += 1 |
|
||||||
|
|
||||||
if self.patience is not None and patince_count > self.patience: |
|
||||||
break |
|
||||||
|
|
||||||
print(f"finito variabile: {self.node_id}") |
|
||||||
return graph.edges |
|
@ -1,39 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
import abc |
|
||||||
|
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Optimizer(abc.ABC): |
|
||||||
""" |
|
||||||
Interface class for all the optimizer's child PyCTBN |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: A structureEstimator Object to predict the structure |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
|
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, node_id:str, structure_estimator: StructureEstimator): |
|
||||||
self.node_id = node_id |
|
||||||
self.structure_estimator = structure_estimator |
|
||||||
|
|
||||||
|
|
||||||
@abc.abstractmethod |
|
||||||
def optimize_structure(self) -> typing.List: |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
pass |
|
@ -1,199 +0,0 @@ |
|||||||
|
|
||||||
import itertools |
|
||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from random import choice,sample |
|
||||||
|
|
||||||
from abc import ABC |
|
||||||
|
|
||||||
|
|
||||||
from .optimizer import Optimizer |
|
||||||
from ..estimators.structure_estimator import StructureEstimator |
|
||||||
from ..structure_graph.network_graph import NetworkGraph |
|
||||||
|
|
||||||
import queue |
|
||||||
|
|
||||||
|
|
||||||
class TabuSearch(Optimizer): |
|
||||||
""" |
|
||||||
Optimizer class that implement Tabu Search |
|
||||||
|
|
||||||
|
|
||||||
:param node_id: current node's id |
|
||||||
:type node_id: string |
|
||||||
:param structure_estimator: a structure estimator object with the information about the net |
|
||||||
:type structure_estimator: class:'StructureEstimator' |
|
||||||
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None |
|
||||||
:type max_parents: int, optional |
|
||||||
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40 |
|
||||||
:type iterations_number: int, optional |
|
||||||
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None |
|
||||||
:type patience: int, optional |
|
||||||
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None |
|
||||||
:type tabu_length: int, optional |
|
||||||
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None |
|
||||||
:type tabu_rules_duration: int, optional |
|
||||||
|
|
||||||
|
|
||||||
""" |
|
||||||
def __init__(self, |
|
||||||
node_id:str, |
|
||||||
structure_estimator: StructureEstimator, |
|
||||||
max_parents:int = None, |
|
||||||
iterations_number:int= 40, |
|
||||||
patience:int = None, |
|
||||||
tabu_length:int = None, |
|
||||||
tabu_rules_duration = None |
|
||||||
): |
|
||||||
""" |
|
||||||
Constructor |
|
||||||
""" |
|
||||||
super().__init__(node_id, structure_estimator) |
|
||||||
self.max_parents = max_parents |
|
||||||
self.iterations_number = iterations_number |
|
||||||
self.patience = patience |
|
||||||
self.tabu_length = tabu_length |
|
||||||
self.tabu_rules_duration = tabu_rules_duration |
|
||||||
|
|
||||||
|
|
||||||
def optimize_structure(self) -> typing.List: |
|
||||||
""" |
|
||||||
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm |
|
||||||
|
|
||||||
:return: the estimated structure for the node |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
print(f"tabu search is processing the structure of {self.node_id}") |
|
||||||
|
|
||||||
'Create the graph for the single node' |
|
||||||
graph = NetworkGraph(self.structure_estimator._sample_path.structure) |
|
||||||
|
|
||||||
'get the index for the current node' |
|
||||||
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) |
|
||||||
|
|
||||||
'list of prior edges' |
|
||||||
prior_parents = set() |
|
||||||
|
|
||||||
'Add the edges from prior knowledge' |
|
||||||
for i in range(len(self.structure_estimator._removable_edges_matrix)): |
|
||||||
if not self.structure_estimator._removable_edges_matrix[i][node_index]: |
|
||||||
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) |
|
||||||
prior_parents.add(parent_id) |
|
||||||
|
|
||||||
'Add the node to the starting structure' |
|
||||||
graph.add_edges([(parent_id, self.node_id)]) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'get all the possible parents' |
|
||||||
other_nodes = set([node for node in |
|
||||||
self.structure_estimator._sample_path.structure.nodes_labels if |
|
||||||
node != self.node_id and |
|
||||||
not prior_parents.__contains__(node)]) |
|
||||||
|
|
||||||
'calculate the score for the node without parents' |
|
||||||
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
|
|
||||||
'initialize tabu_length and tabu_rules_duration if None' |
|
||||||
if self.tabu_length is None: |
|
||||||
self.tabu_length = len(other_nodes) |
|
||||||
|
|
||||||
if self.tabu_rules_duration is None: |
|
||||||
self.tabu_tabu_rules_durationength = len(other_nodes) |
|
||||||
|
|
||||||
'inizialize the data structures' |
|
||||||
tabu_set = set() |
|
||||||
tabu_queue = queue.Queue() |
|
||||||
|
|
||||||
patince_count = 0 |
|
||||||
tabu_count = 0 |
|
||||||
for i in range(self.iterations_number): |
|
||||||
|
|
||||||
current_possible_nodes = other_nodes.difference(tabu_set) |
|
||||||
|
|
||||||
'choose a new random edge according to tabu restiction' |
|
||||||
if(len(current_possible_nodes) > 0): |
|
||||||
current_new_parent = sample(current_possible_nodes,k=1)[0] |
|
||||||
else: |
|
||||||
current_new_parent = tabu_queue.get() |
|
||||||
tabu_set.remove(current_new_parent) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
current_edge = (current_new_parent,self.node_id) |
|
||||||
added = False |
|
||||||
parent_removed = None |
|
||||||
|
|
||||||
if graph.has_edge(current_edge): |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
else: |
|
||||||
'check the max_parents constraint' |
|
||||||
if self.max_parents is not None: |
|
||||||
parents_list = graph.get_parents_by_id(self.node_id) |
|
||||||
if len(parents_list) >= self.max_parents : |
|
||||||
parent_removed = (choice(parents_list), self.node_id) |
|
||||||
graph.remove_edges([parent_removed]) |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
added = True |
|
||||||
#print('**************************') |
|
||||||
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) |
|
||||||
|
|
||||||
|
|
||||||
# print("-------------------------------------------") |
|
||||||
# print(f"Current new parent: {current_new_parent}") |
|
||||||
# print(f"Current score: {current_score}") |
|
||||||
# print(f"Current best score: {actual_best_score}") |
|
||||||
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") |
|
||||||
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") |
|
||||||
# print(f"graph edges: {graph.edges}") |
|
||||||
|
|
||||||
# print("-------------------------------------------") |
|
||||||
# input() |
|
||||||
if current_score > actual_best_score: |
|
||||||
'update current best score' |
|
||||||
actual_best_score = current_score |
|
||||||
patince_count = 0 |
|
||||||
'update tabu list' |
|
||||||
|
|
||||||
|
|
||||||
else: |
|
||||||
'undo the last update' |
|
||||||
if added: |
|
||||||
graph.remove_edges([current_edge]) |
|
||||||
'If a parent was removed, add it again to the graph' |
|
||||||
if parent_removed is not None: |
|
||||||
graph.add_edges([parent_removed]) |
|
||||||
else: |
|
||||||
graph.add_edges([current_edge]) |
|
||||||
'update patience count' |
|
||||||
patince_count += 1 |
|
||||||
|
|
||||||
|
|
||||||
if tabu_queue.qsize() >= self.tabu_length: |
|
||||||
current_removed = tabu_queue.get() |
|
||||||
tabu_set.remove(current_removed) |
|
||||||
'Add the node on the tabu list' |
|
||||||
tabu_queue.put(current_new_parent) |
|
||||||
tabu_set.add(current_new_parent) |
|
||||||
|
|
||||||
tabu_count += 1 |
|
||||||
|
|
||||||
'Every tabu_rules_duration step remove an item from the tabu list ' |
|
||||||
if tabu_count % self.tabu_rules_duration == 0: |
|
||||||
if tabu_queue.qsize() > 0: |
|
||||||
current_removed = tabu_queue.get() |
|
||||||
tabu_set.remove(current_removed) |
|
||||||
tabu_count = 0 |
|
||||||
else: |
|
||||||
tabu_count = 0 |
|
||||||
|
|
||||||
if self.patience is not None and patince_count > self.patience: |
|
||||||
break |
|
||||||
|
|
||||||
print(f"finito variabile: {self.node_id}") |
|
||||||
return graph.edges |
|
@ -1,6 +0,0 @@ |
|||||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
from .network_graph import NetworkGraph |
|
||||||
from .sample_path import SamplePath |
|
||||||
from .set_of_cims import SetOfCims |
|
||||||
from .structure import Structure |
|
||||||
from .trajectory import Trajectory |
|
@ -1,42 +0,0 @@ |
|||||||
import numpy as np |
|
||||||
|
|
||||||
|
|
||||||
class ConditionalIntensityMatrix(object): |
|
||||||
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector |
|
||||||
and state transition matrix and the actual CIM matrix. |
|
||||||
|
|
||||||
:param state_residence_times: state residence times vector |
|
||||||
:type state_residence_times: numpy.array |
|
||||||
:param state_transition_matrix: the transitions count matrix |
|
||||||
:type state_transition_matrix: numpy.ndArray |
|
||||||
:_cim: the actual cim of the node |
|
||||||
""" |
|
||||||
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._state_residence_times = state_residence_times |
|
||||||
self._state_transition_matrix = state_transition_matrix |
|
||||||
self._cim = self.state_transition_matrix.astype(np.float64) |
|
||||||
|
|
||||||
def compute_cim_coefficients(self) -> None: |
|
||||||
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. |
|
||||||
The class member ``_cim`` will contain the computed cim |
|
||||||
""" |
|
||||||
np.fill_diagonal(self._cim, self._cim.diagonal() * -1) |
|
||||||
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T |
|
||||||
|
|
||||||
@property |
|
||||||
def state_residence_times(self) -> np.ndarray: |
|
||||||
return self._state_residence_times |
|
||||||
|
|
||||||
@property |
|
||||||
def state_transition_matrix(self) -> np.ndarray: |
|
||||||
return self._state_transition_matrix |
|
||||||
|
|
||||||
@property |
|
||||||
def cim(self) -> np.ndarray: |
|
||||||
return self._cim |
|
||||||
|
|
||||||
def __repr__(self): |
|
||||||
return 'CIM:\n' + str(self.cim) |
|
||||||
|
|
@ -1,293 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
import networkx as nx |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from .structure import Structure |
|
||||||
|
|
||||||
|
|
||||||
class NetworkGraph(object): |
|
||||||
"""Abstracts the infos contained in the Structure class in the form of a directed graph. |
|
||||||
Has the task of creating all the necessary filtering and indexing structures for parameters estimation |
|
||||||
|
|
||||||
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted |
|
||||||
:type graph_struct: Structure |
|
||||||
:_graph: directed graph |
|
||||||
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos |
|
||||||
about every parents of the node of which all the indexing and filtering structures will be constructed. |
|
||||||
:_time_scalar_indexing_structure: the indexing structure for state res time estimation |
|
||||||
:_transition_scalar_indexing_structure: the indexing structure for transition computation |
|
||||||
:_time_filtering: the columns filtering structure used in the computation of the state res times |
|
||||||
:_transition_filtering: the columns filtering structure used in the computation of the transition |
|
||||||
from one state to another |
|
||||||
:_p_combs_structure: all the possible parents states combination for the node of interest |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, graph_struct: Structure): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._graph_struct = graph_struct |
|
||||||
self._graph = nx.DiGraph() |
|
||||||
self._aggregated_info_about_nodes_parents = None |
|
||||||
self._time_scalar_indexing_structure = None |
|
||||||
self._transition_scalar_indexing_structure = None |
|
||||||
self._time_filtering = None |
|
||||||
self._transition_filtering = None |
|
||||||
self._p_combs_structure = None |
|
||||||
|
|
||||||
def init_graph(self): |
|
||||||
self.add_nodes(self._nodes_labels) |
|
||||||
self.add_edges(self.graph_struct.edges) |
|
||||||
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() |
|
||||||
self._fancy_indexing = self.build_fancy_indexing_structure(0) |
|
||||||
self.build_scalar_indexing_structures() |
|
||||||
self.build_time_columns_filtering_structure() |
|
||||||
self.build_transition_columns_filtering_structure() |
|
||||||
self._p_combs_structure = self.build_p_combs_structure() |
|
||||||
|
|
||||||
def fast_init(self, node_id: str) -> None: |
|
||||||
"""Initializes all the necessary structures for parameters estimation of the node identified by the label |
|
||||||
node_id |
|
||||||
|
|
||||||
:param node_id: the label of the node |
|
||||||
:type node_id: string |
|
||||||
""" |
|
||||||
self.add_nodes(self._graph_struct.nodes_labels) |
|
||||||
self.add_edges(self._graph_struct.edges) |
|
||||||
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) |
|
||||||
p_indxs = self._aggregated_info_about_nodes_parents[1] |
|
||||||
p_vals = self._aggregated_info_about_nodes_parents[2] |
|
||||||
node_states = self.get_states_number(node_id) |
|
||||||
node_indx = self.get_node_indx(node_id) |
|
||||||
cols_number = self._graph_struct.total_variables_number |
|
||||||
self._time_scalar_indexing_structure = NetworkGraph.\ |
|
||||||
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) |
|
||||||
self._transition_scalar_indexing_structure = NetworkGraph.\ |
|
||||||
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) |
|
||||||
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) |
|
||||||
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) |
|
||||||
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) |
|
||||||
|
|
||||||
def add_nodes(self, list_of_nodes: typing.List) -> None: |
|
||||||
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. |
|
||||||
Sets all the properties that identify a nodes (index, positional index, cardinality) |
|
||||||
|
|
||||||
:param list_of_nodes: the nodes to add to ``_graph`` |
|
||||||
:type list_of_nodes: List |
|
||||||
""" |
|
||||||
nodes_indxs = self._graph_struct.nodes_indexes |
|
||||||
nodes_vals = self._graph_struct.nodes_values |
|
||||||
pos = 0 |
|
||||||
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): |
|
||||||
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) |
|
||||||
pos += 1 |
|
||||||
|
|
||||||
def has_edge(self,edge:tuple)-> bool: |
|
||||||
""" |
|
||||||
Check if the graph contains a specific edge |
|
||||||
|
|
||||||
Parameters: |
|
||||||
edge: a tuple that rappresents the edge |
|
||||||
Returns: |
|
||||||
bool |
|
||||||
""" |
|
||||||
return self._graph.has_edge(edge[0],edge[1]) |
|
||||||
|
|
||||||
def add_edges(self, list_of_edges: typing.List) -> None: |
|
||||||
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``. |
|
||||||
|
|
||||||
:param list_of_edges: the list containing of tuples containing the edges |
|
||||||
:type list_of_edges: List |
|
||||||
""" |
|
||||||
self._graph.add_edges_from(list_of_edges) |
|
||||||
|
|
||||||
def remove_node(self, node_id: str) -> None: |
|
||||||
"""Remove the node ``node_id`` from all the class members. |
|
||||||
Initialize all the filtering/indexing structures. |
|
||||||
""" |
|
||||||
self._graph.remove_node(node_id) |
|
||||||
self._graph_struct.remove_node(node_id) |
|
||||||
self.clear_indexing_filtering_structures() |
|
||||||
|
|
||||||
def clear_indexing_filtering_structures(self) -> None: |
|
||||||
"""Initialize all the filtering/indexing structures. |
|
||||||
""" |
|
||||||
self._aggregated_info_about_nodes_parents = None |
|
||||||
self._time_scalar_indexing_structure = None |
|
||||||
self._transition_scalar_indexing_structure = None |
|
||||||
self._time_filtering = None |
|
||||||
self._transition_filtering = None |
|
||||||
self._p_combs_structure = None |
|
||||||
|
|
||||||
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: |
|
||||||
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely |
|
||||||
(parents_labels, parents_indexes, parents_cardinalities). |
|
||||||
|
|
||||||
:param node: the label of the node |
|
||||||
:type node: string |
|
||||||
:return: a tuple containing all the parent set infos |
|
||||||
:rtype: Tuple |
|
||||||
""" |
|
||||||
parents = self.get_parents_by_id(node) |
|
||||||
nodes = self._graph_struct.nodes_labels |
|
||||||
d = {v: i for i, v in enumerate(nodes)} |
|
||||||
sorted_parents = sorted(parents, key=lambda v: d[v]) |
|
||||||
get_node_indx = self.get_node_indx |
|
||||||
p_indxes = [get_node_indx(node) for node in sorted_parents] |
|
||||||
p_values = [self.get_states_number(node) for node in sorted_parents] |
|
||||||
return sorted_parents, p_indxes, p_values |
|
||||||
|
|
||||||
def remove_edges(self, list_of_edges: typing.List) -> None: |
|
||||||
"""Remove the edges to the graph contained in the list list_of_edges. |
|
||||||
|
|
||||||
:param list_of_edges: The edges to remove from the graph |
|
||||||
:type list_of_edges: List |
|
||||||
""" |
|
||||||
self._graph.remove_edges_from(list_of_edges) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_time_scalar_indexing_structure_for_a_node(node_states: int, |
|
||||||
parents_vals: typing.List) -> np.ndarray: |
|
||||||
"""Builds an indexing structure for the computation of state residence times values. |
|
||||||
|
|
||||||
:param node_states: the node cardinality |
|
||||||
:type node_states: int |
|
||||||
:param parents_vals: the caridinalites of the node's parents |
|
||||||
:type parents_vals: List |
|
||||||
:return: The time indexing structure |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
T_vector = np.array([node_states]) |
|
||||||
T_vector = np.append(T_vector, parents_vals) |
|
||||||
T_vector = T_vector.cumprod().astype(np.int) |
|
||||||
return T_vector |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ |
|
||||||
-> np.ndarray: |
|
||||||
"""Builds an indexing structure for the computation of state transitions values. |
|
||||||
|
|
||||||
:param node_states_number: the node cardinality |
|
||||||
:type node_states_number: int |
|
||||||
:param parents_vals: the caridinalites of the node's parents |
|
||||||
:type parents_vals: List |
|
||||||
:return: The transition indexing structure |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
M_vector = np.array([node_states_number, |
|
||||||
node_states_number]) |
|
||||||
M_vector = np.append(M_vector, parents_vals) |
|
||||||
M_vector = M_vector.cumprod().astype(np.int) |
|
||||||
return M_vector |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: |
|
||||||
""" |
|
||||||
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
|
||||||
in the dataset. |
|
||||||
This structute will be used in the computation of the state res times. |
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param p_indxs: the indexes of the node's parents |
|
||||||
:type p_indxs: List |
|
||||||
:return: The filtering structure for times estimation |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ |
|
||||||
-> np.ndarray: |
|
||||||
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` |
|
||||||
in the dataset. |
|
||||||
This structure will be used in the computation of the state transitions values. |
|
||||||
:param node_indx: the index of the node |
|
||||||
:type node_indx: int |
|
||||||
:param p_indxs: the indexes of the node's parents |
|
||||||
:type p_indxs: List |
|
||||||
:param nodes_number: the total number of nodes in the dataset |
|
||||||
:type nodes_number: int |
|
||||||
:return: The filtering structure for transitions estimation |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) |
|
||||||
|
|
||||||
@staticmethod |
|
||||||
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: |
|
||||||
""" |
|
||||||
Builds the combinatorial structure that contains the combinations of all the values contained in |
|
||||||
``parents_values``. |
|
||||||
|
|
||||||
:param parents_values: the cardinalities of the nodes |
|
||||||
:type parents_values: List |
|
||||||
:return: A numpy matrix containing a grid of the combinations |
|
||||||
:rtype: numpy.ndArray |
|
||||||
""" |
|
||||||
tmp = [] |
|
||||||
for val in parents_values: |
|
||||||
tmp.append([x for x in range(val)]) |
|
||||||
if len(parents_values) > 0: |
|
||||||
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) |
|
||||||
if len(parents_values) > 1: |
|
||||||
tmp_comb = parents_comb[:, 1].copy() |
|
||||||
parents_comb[:, 1] = parents_comb[:, 0].copy() |
|
||||||
parents_comb[:, 0] = tmp_comb |
|
||||||
else: |
|
||||||
parents_comb = np.array([[]], dtype=np.int) |
|
||||||
return parents_comb |
|
||||||
|
|
||||||
def get_parents_by_id(self, node_id) -> typing.List: |
|
||||||
"""Returns a list of labels of the parents of the node ``node_id`` |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:return: a List of labels of the parents |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
return list(self._graph.predecessors(node_id)) |
|
||||||
|
|
||||||
def get_states_number(self, node_id) -> int: |
|
||||||
return self._graph.nodes[node_id]['val'] |
|
||||||
|
|
||||||
def get_node_indx(self, node_id) -> int: |
|
||||||
return nx.get_node_attributes(self._graph, 'indx')[node_id] |
|
||||||
|
|
||||||
def get_positional_node_indx(self, node_id) -> int: |
|
||||||
return self._graph.nodes[node_id]['pos_indx'] |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes(self) -> typing.List: |
|
||||||
return self._graph_struct.nodes_labels |
|
||||||
|
|
||||||
@property |
|
||||||
def edges(self) -> typing.List: |
|
||||||
return list(self._graph.edges) |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_indexes(self) -> np.ndarray: |
|
||||||
return self._graph_struct.nodes_indexes |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_values(self) -> np.ndarray: |
|
||||||
return self._graph_struct.nodes_values |
|
||||||
|
|
||||||
@property |
|
||||||
def time_scalar_indexing_strucure(self) -> np.ndarray: |
|
||||||
return self._time_scalar_indexing_structure |
|
||||||
|
|
||||||
@property |
|
||||||
def time_filtering(self) -> np.ndarray: |
|
||||||
return self._time_filtering |
|
||||||
|
|
||||||
@property |
|
||||||
def transition_scalar_indexing_structure(self) -> np.ndarray: |
|
||||||
return self._transition_scalar_indexing_structure |
|
||||||
|
|
||||||
@property |
|
||||||
def transition_filtering(self) -> np.ndarray: |
|
||||||
return self._transition_filtering |
|
||||||
|
|
||||||
@property |
|
||||||
def p_combs(self) -> np.ndarray: |
|
||||||
return self._p_combs_structure |
|
@ -1,91 +0,0 @@ |
|||||||
|
|
||||||
|
|
||||||
import numpy as np |
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
from .structure import Structure |
|
||||||
from .trajectory import Trajectory |
|
||||||
from ..utility.abstract_importer import AbstractImporter |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SamplePath(object): |
|
||||||
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables |
|
||||||
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will |
|
||||||
contain the mentioned data. |
|
||||||
|
|
||||||
:param importer: the Importer object which contains the imported and processed data |
|
||||||
:type importer: AbstractImporter |
|
||||||
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories |
|
||||||
:_structure: the ``Structure`` Object that will contain all the structural infos about the net |
|
||||||
:_total_variables_count: the number of variables in the net |
|
||||||
""" |
|
||||||
def __init__(self, importer: AbstractImporter): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._importer = importer |
|
||||||
if self._importer._df_variables is None or self._importer._concatenated_samples is None: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
if self._importer._df_variables.empty: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
if isinstance(self._importer._concatenated_samples, pd.DataFrame): |
|
||||||
if self._importer._concatenated_samples.empty: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
if isinstance(self._importer._concatenated_samples, np.ndarray): |
|
||||||
if self._importer._concatenated_samples.size == 0: |
|
||||||
raise RuntimeError('The importer object has to contain the all processed data!') |
|
||||||
self._trajectories = None |
|
||||||
self._structure = None |
|
||||||
self._total_variables_count = None |
|
||||||
|
|
||||||
def build_trajectories(self) -> None: |
|
||||||
"""Builds the Trajectory object that will contain all the trajectories. |
|
||||||
Clears all the unused dataframes in ``_importer`` Object |
|
||||||
""" |
|
||||||
self._trajectories = \ |
|
||||||
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), |
|
||||||
len(self._importer.sorter) + 1) |
|
||||||
self._importer.clear_concatenated_frame() |
|
||||||
|
|
||||||
def build_structure(self) -> None: |
|
||||||
""" |
|
||||||
Builds the ``Structure`` object that aggregates all the infos about the net. |
|
||||||
""" |
|
||||||
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): |
|
||||||
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") |
|
||||||
|
|
||||||
self._total_variables_count = len(self._importer.sorter) |
|
||||||
labels = self._importer.variables.iloc[:, 0].to_list() |
|
||||||
indxs = self._importer.variables.index.to_numpy() |
|
||||||
vals = self._importer.variables.iloc[:, 1].to_numpy() |
|
||||||
if self._importer.structure is None or self._importer.structure.empty: |
|
||||||
edges = [] |
|
||||||
else: |
|
||||||
edges = list(self._importer.structure.to_records(index=False)) |
|
||||||
self._structure = Structure(labels, indxs, vals, edges, |
|
||||||
self._total_variables_count) |
|
||||||
|
|
||||||
def clear_memory(self): |
|
||||||
self._importer._raw_data = [] |
|
||||||
|
|
||||||
@property |
|
||||||
def trajectories(self) -> Trajectory: |
|
||||||
return self._trajectories |
|
||||||
|
|
||||||
@property |
|
||||||
def structure(self) -> Structure: |
|
||||||
return self._structure |
|
||||||
|
|
||||||
@property |
|
||||||
def total_variables_count(self) -> int: |
|
||||||
return self._total_variables_count |
|
||||||
|
|
||||||
@property |
|
||||||
def has_prior_net_structure(self) -> bool: |
|
||||||
return bool(self._structure.edges) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,97 +0,0 @@ |
|||||||
|
|
||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from .conditional_intensity_matrix import ConditionalIntensityMatrix |
|
||||||
|
|
||||||
|
|
||||||
class SetOfCims(object): |
|
||||||
"""Aggregates all the CIMS of the node identified by the label _node_id. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_ind: string |
|
||||||
:param parents_states_number: the cardinalities of the parents |
|
||||||
:type parents_states_number: List |
|
||||||
:param node_states_number: the caridinality of the node |
|
||||||
:type node_states_number: int |
|
||||||
:param p_combs: the p_comb structure bound to this node |
|
||||||
:type p_combs: numpy.ndArray |
|
||||||
:_state_residence_time: matrix containing all the state residence time vectors for the node |
|
||||||
:_transition_matrices: matrix containing all the transition matrices for the node |
|
||||||
:_actual_cims: the cims of the node |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._node_id = node_id |
|
||||||
self._parents_states_number = parents_states_number |
|
||||||
self._node_states_number = node_states_number |
|
||||||
self._actual_cims = [] |
|
||||||
self._state_residence_times = None |
|
||||||
self._transition_matrices = None |
|
||||||
self._p_combs = p_combs |
|
||||||
self.build_times_and_transitions_structures() |
|
||||||
|
|
||||||
def build_times_and_transitions_structures(self) -> None: |
|
||||||
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices. |
|
||||||
""" |
|
||||||
if not self._parents_states_number: |
|
||||||
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) |
|
||||||
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) |
|
||||||
else: |
|
||||||
self._state_residence_times = \ |
|
||||||
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) |
|
||||||
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, |
|
||||||
self._node_states_number], dtype=np.int) |
|
||||||
|
|
||||||
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: |
|
||||||
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. |
|
||||||
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. |
|
||||||
|
|
||||||
:param state_res_times: the state residence times matrix |
|
||||||
:type state_res_times: numpy.ndArray |
|
||||||
:param transition_matrices: the transition matrices |
|
||||||
:type transition_matrices: numpy.ndArray |
|
||||||
""" |
|
||||||
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): |
|
||||||
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) |
|
||||||
cim_to_add.compute_cim_coefficients() |
|
||||||
self._actual_cims.append(cim_to_add) |
|
||||||
self._actual_cims = np.array(self._actual_cims) |
|
||||||
self._transition_matrices = None |
|
||||||
self._state_residence_times = None |
|
||||||
|
|
||||||
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: |
|
||||||
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index |
|
||||||
``comb``. |
|
||||||
|
|
||||||
:param mask_arr: the boolean mask that indicates which parent to consider |
|
||||||
:type mask_arr: numpy.array |
|
||||||
:param comb: the state/s of the filtered parents |
|
||||||
:type comb: numpy.array |
|
||||||
:return: Array of ``ConditionalIntensityMatrix`` objects |
|
||||||
:rtype: numpy.array |
|
||||||
""" |
|
||||||
if mask_arr.size <= 1: |
|
||||||
return self._actual_cims |
|
||||||
else: |
|
||||||
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() |
|
||||||
return self._actual_cims[flat_indxs] |
|
||||||
|
|
||||||
@property |
|
||||||
def actual_cims(self) -> np.ndarray: |
|
||||||
return self._actual_cims |
|
||||||
|
|
||||||
@property |
|
||||||
def p_combs(self) -> np.ndarray: |
|
||||||
return self._p_combs |
|
||||||
|
|
||||||
def get_cims_number(self): |
|
||||||
return len(self._actual_cims) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,124 +0,0 @@ |
|||||||
|
|
||||||
import typing as ty |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
|
|
||||||
class Structure(object): |
|
||||||
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) |
|
||||||
|
|
||||||
:param nodes_labels_list: the symbolic names of the variables |
|
||||||
:type nodes_labels_list: List |
|
||||||
:param nodes_indexes_arr: the indexes of the nodes |
|
||||||
:type nodes_indexes_arr: numpy.ndArray |
|
||||||
:param nodes_vals_arr: the cardinalites of the nodes |
|
||||||
:type nodes_vals_arr: numpy.ndArray |
|
||||||
:param edges_list: the edges of the network |
|
||||||
:type edges_list: List |
|
||||||
:param total_variables_number: the total number of variables in the dataset |
|
||||||
:type total_variables_number: int |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, |
|
||||||
edges_list: ty.List, total_variables_number: int): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._nodes_labels_list = nodes_labels_list |
|
||||||
self._nodes_indexes_arr = nodes_indexes_arr |
|
||||||
self._nodes_vals_arr = nodes_vals_arr |
|
||||||
self._edges_list = edges_list |
|
||||||
self._total_variables_number = total_variables_number |
|
||||||
|
|
||||||
def remove_node(self, node_id: str) -> None: |
|
||||||
"""Remove the node ``node_id`` from all the class members. |
|
||||||
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. |
|
||||||
""" |
|
||||||
node_positional_indx = self._nodes_labels_list.index(node_id) |
|
||||||
del self._nodes_labels_list[node_positional_indx] |
|
||||||
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) |
|
||||||
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) |
|
||||||
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id |
|
||||||
and to_node != node_id)] |
|
||||||
|
|
||||||
@property |
|
||||||
def edges(self) -> ty.List: |
|
||||||
return self._edges_list |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_labels(self) -> ty.List: |
|
||||||
return self._nodes_labels_list |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_indexes(self) -> np.ndarray: |
|
||||||
return self._nodes_indexes_arr |
|
||||||
|
|
||||||
@property |
|
||||||
def nodes_values(self) -> np.ndarray: |
|
||||||
return self._nodes_vals_arr |
|
||||||
|
|
||||||
@property |
|
||||||
def total_variables_number(self) -> int: |
|
||||||
return self._total_variables_number |
|
||||||
|
|
||||||
def get_node_id(self, node_indx: int) -> str: |
|
||||||
"""Given the ``node_index`` returns the node label. |
|
||||||
|
|
||||||
:param node_indx: the node index |
|
||||||
:type node_indx: int |
|
||||||
:return: the node label |
|
||||||
:rtype: string |
|
||||||
""" |
|
||||||
return self._nodes_labels_list[node_indx] |
|
||||||
|
|
||||||
def clean_structure_edges(self): |
|
||||||
self._edges_list = list() |
|
||||||
|
|
||||||
def add_edge(self,edge: tuple): |
|
||||||
self._edges_list.append(tuple) |
|
||||||
print(self._edges_list) |
|
||||||
|
|
||||||
def remove_edge(self,edge: tuple): |
|
||||||
self._edges_list.remove(tuple) |
|
||||||
|
|
||||||
def contains_edge(self,edge:tuple) -> bool: |
|
||||||
return edge in self._edges_list |
|
||||||
|
|
||||||
def get_node_indx(self, node_id: str) -> int: |
|
||||||
"""Given the ``node_index`` returns the node label. |
|
||||||
|
|
||||||
:param node_id: the node label |
|
||||||
:type node_id: string |
|
||||||
:return: the node index |
|
||||||
:rtype: int |
|
||||||
""" |
|
||||||
pos_indx = self._nodes_labels_list.index(node_id) |
|
||||||
return self._nodes_indexes_arr[pos_indx] |
|
||||||
|
|
||||||
def get_positional_node_indx(self, node_id: str) -> int: |
|
||||||
return self._nodes_labels_list.index(node_id) |
|
||||||
|
|
||||||
def get_states_number(self, node: str) -> int: |
|
||||||
"""Given the node label ``node`` returns the cardinality of the node. |
|
||||||
|
|
||||||
:param node: the node label |
|
||||||
:type node: string |
|
||||||
:return: the node cardinality |
|
||||||
:rtype: int |
|
||||||
""" |
|
||||||
pos_indx = self._nodes_labels_list.index(node) |
|
||||||
return self._nodes_vals_arr[pos_indx] |
|
||||||
|
|
||||||
def __repr__(self): |
|
||||||
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ |
|
||||||
"\nEdges: \n" + str(self._edges_list) |
|
||||||
|
|
||||||
def __eq__(self, other): |
|
||||||
"""Overrides the default implementation""" |
|
||||||
if isinstance(other, Structure): |
|
||||||
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ |
|
||||||
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ |
|
||||||
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ |
|
||||||
self._edges_list == other._edges_list |
|
||||||
|
|
||||||
return False |
|
||||||
|
|
@ -1,45 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
|
|
||||||
|
|
||||||
class Trajectory(object): |
|
||||||
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles |
|
||||||
(the time deltas) and a numpy matrix of ints (the changes of states). |
|
||||||
|
|
||||||
:param list_of_columns: the list containing the times array and values matrix |
|
||||||
:type list_of_columns: List |
|
||||||
:param original_cols_number: total number of cols in the data |
|
||||||
:type original_cols_number: int |
|
||||||
:_actual_trajectory: the trajectory containing also the duplicated/shifted values |
|
||||||
:_times: the array containing the time deltas |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, list_of_columns: typing.List, original_cols_number: int): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._times = list_of_columns[0] |
|
||||||
self._actual_trajectory = list_of_columns[1] |
|
||||||
self._original_cols_number = original_cols_number |
|
||||||
|
|
||||||
@property |
|
||||||
def trajectory(self) -> np.ndarray: |
|
||||||
return self._actual_trajectory[:, :self._original_cols_number - 1] |
|
||||||
|
|
||||||
@property |
|
||||||
def complete_trajectory(self) -> np.ndarray: |
|
||||||
return self._actual_trajectory |
|
||||||
|
|
||||||
@property |
|
||||||
def times(self): |
|
||||||
return self._times |
|
||||||
|
|
||||||
def size(self): |
|
||||||
return self._actual_trajectory.shape[0] |
|
||||||
|
|
||||||
def __repr__(self): |
|
||||||
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \ |
|
||||||
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__() |
|
||||||
|
|
||||||
|
|
@ -1,4 +0,0 @@ |
|||||||
from .abstract_importer import AbstractImporter |
|
||||||
from .cache import Cache |
|
||||||
from .json_importer import JsonImporter |
|
||||||
from .sample_importer import SampleImporter |
|
@ -1,164 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
from abc import ABC, abstractmethod |
|
||||||
|
|
||||||
import numpy as np |
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
import copy |
|
||||||
|
|
||||||
#from sklearn.utils import resample |
|
||||||
|
|
||||||
|
|
||||||
class AbstractImporter(ABC): |
|
||||||
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure. |
|
||||||
|
|
||||||
:param file_path: the file path, or dataset name if you import already processed data |
|
||||||
:type file_path: str |
|
||||||
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories |
|
||||||
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray] |
|
||||||
:param variables: Dataframe containing the nodes labels and cardinalities |
|
||||||
:type variables: pandas.DataFrame |
|
||||||
:prior_net_structure: Dataframe containing the structure of the network (edges) |
|
||||||
:type prior_net_structure: pandas.DataFrame |
|
||||||
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` |
|
||||||
|
|
||||||
.. warning:: |
|
||||||
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed |
|
||||||
as Pandas Dataframes with the following structure: |
|
||||||
Header of _df_structure = [From_Node | To_Node] |
|
||||||
Header of _df_variables = [Variable_Label | Variable_Cardinality] |
|
||||||
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. |
|
||||||
|
|
||||||
.. note:: |
|
||||||
See :class:``JsonImporter`` for an example implementation |
|
||||||
|
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None, |
|
||||||
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): |
|
||||||
"""Constructor |
|
||||||
""" |
|
||||||
self._file_path = file_path |
|
||||||
self._df_samples_list = trajectory_list |
|
||||||
self._concatenated_samples = [] |
|
||||||
self._df_variables = variables |
|
||||||
self._df_structure = prior_net_structure |
|
||||||
self._sorter = None |
|
||||||
super().__init__() |
|
||||||
|
|
||||||
@abstractmethod |
|
||||||
def build_sorter(self, trajecory_header: object) -> typing.List: |
|
||||||
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame |
|
||||||
and keeping ONLY the variables symbolic labels, cutting out the time label in the header. |
|
||||||
|
|
||||||
:param trajecory_header: an object that will be used to define the header |
|
||||||
:type trajecory_header: object |
|
||||||
:return: A list containing the processed header. |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
pass |
|
||||||
|
|
||||||
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, |
|
||||||
columns_header: typing.List, shifted_cols_header: typing.List) \ |
|
||||||
-> pd.DataFrame: |
|
||||||
"""Computes the difference between each value present in th time column. |
|
||||||
Copies and shift by one position up all the values present in the remaining columns. |
|
||||||
|
|
||||||
:param sample_frame: the traj to be processed |
|
||||||
:type sample_frame: pandas.Dataframe |
|
||||||
:param columns_header: the original header of sample_frame |
|
||||||
:type columns_header: List |
|
||||||
:param shifted_cols_header: a copy of columns_header with changed names of the contents |
|
||||||
:type shifted_cols_header: List |
|
||||||
:return: The processed dataframe |
|
||||||
:rtype: pandas.Dataframe |
|
||||||
|
|
||||||
.. warning:: |
|
||||||
the Dataframe ``sample_frame`` has to follow the column structure of this header: |
|
||||||
Header of sample_frame = [Time | Variable values] |
|
||||||
""" |
|
||||||
sample_frame = copy.deepcopy(sample_frame) |
|
||||||
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) |
|
||||||
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') |
|
||||||
shifted_cols.columns = shifted_cols_header |
|
||||||
sample_frame = sample_frame.assign(**shifted_cols) |
|
||||||
sample_frame.drop(sample_frame.tail(1).index, inplace=True) |
|
||||||
return sample_frame |
|
||||||
|
|
||||||
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: |
|
||||||
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list |
|
||||||
``df_samples_list``. |
|
||||||
Concatenates the result in the dataframe ``concatanated_samples`` |
|
||||||
|
|
||||||
:param df_samples_list: the datframe's list to be processed and concatenated |
|
||||||
:type df_samples_list: List |
|
||||||
|
|
||||||
.. warning:: |
|
||||||
The Dataframe sample_frame has to follow the column structure of this header: |
|
||||||
Header of sample_frame = [Time | Variable values] |
|
||||||
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) |
|
||||||
.. note:: |
|
||||||
After the call of this method the class member ``concatanated_samples`` will contain all processed |
|
||||||
and merged trajectories |
|
||||||
""" |
|
||||||
if not self._sorter: |
|
||||||
raise RuntimeError("The class member self._sorter has to be INITIALIZED!") |
|
||||||
shifted_cols_header = [s + "S" for s in self._sorter] |
|
||||||
compute_row_delta = self.compute_row_delta_sigle_samples_frame |
|
||||||
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) |
|
||||||
for sample in df_samples_list] |
|
||||||
self._concatenated_samples = pd.concat(proc_samples_list) |
|
||||||
|
|
||||||
complete_header = self._sorter[:] |
|
||||||
complete_header.insert(0,'Time') |
|
||||||
complete_header.extend(shifted_cols_header) |
|
||||||
self._concatenated_samples = self._concatenated_samples[complete_header] |
|
||||||
|
|
||||||
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List: |
|
||||||
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix |
|
||||||
|
|
||||||
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted |
|
||||||
and converted |
|
||||||
:type concatenated_sample: pandas.Dataframe |
|
||||||
:return: the resulting list of numpy arrays |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
|
|
||||||
concatenated_array = concatenated_sample.to_numpy() |
|
||||||
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] |
|
||||||
|
|
||||||
return columns_list |
|
||||||
|
|
||||||
def clear_concatenated_frame(self) -> None: |
|
||||||
"""Removes all values in the dataframe concatenated_samples. |
|
||||||
""" |
|
||||||
if isinstance(self._concatenated_samples, pd.DataFrame): |
|
||||||
self._concatenated_samples = self._concatenated_samples.iloc[0:0] |
|
||||||
|
|
||||||
@abstractmethod |
|
||||||
def dataset_id(self) -> object: |
|
||||||
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current |
|
||||||
dataset |
|
||||||
""" |
|
||||||
pass |
|
||||||
|
|
||||||
@property |
|
||||||
def concatenated_samples(self) -> pd.DataFrame: |
|
||||||
return self._concatenated_samples |
|
||||||
|
|
||||||
@property |
|
||||||
def variables(self) -> pd.DataFrame: |
|
||||||
return self._df_variables |
|
||||||
|
|
||||||
@property |
|
||||||
def structure(self) -> pd.DataFrame: |
|
||||||
return self._df_structure |
|
||||||
|
|
||||||
@property |
|
||||||
def sorter(self) -> typing.List: |
|
||||||
return self._sorter |
|
||||||
|
|
||||||
@property |
|
||||||
def file_path(self) -> str: |
|
||||||
return self._file_path |
|
@ -1,58 +0,0 @@ |
|||||||
|
|
||||||
import typing |
|
||||||
|
|
||||||
from ..structure_graph.set_of_cims import SetOfCims |
|
||||||
|
|
||||||
|
|
||||||
class Cache: |
|
||||||
"""This class acts as a cache of ``SetOfCims`` objects for a node. |
|
||||||
|
|
||||||
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME |
|
||||||
index is related |
|
||||||
:__actual_cache: a list of setOfCims objects |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self): |
|
||||||
"""Constructor Method |
|
||||||
""" |
|
||||||
self._list_of_sets_of_parents = [] |
|
||||||
self._actual_cache = [] |
|
||||||
|
|
||||||
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] |
|
||||||
""" |
|
||||||
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims`` |
|
||||||
related to that ``parents_comb``. |
|
||||||
|
|
||||||
:param parents_comb: the parents related to that ``SetOfCims`` |
|
||||||
:type parents_comb: Set |
|
||||||
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``. |
|
||||||
None otherwise. |
|
||||||
:rtype: SetOfCims |
|
||||||
""" |
|
||||||
try: |
|
||||||
#print("Cache State:", self.list_of_sets_of_indxs) |
|
||||||
#print("Look For:", parents_comb) |
|
||||||
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] |
|
||||||
#print("CACHE HIT!!!!", parents_comb) |
|
||||||
return result |
|
||||||
except ValueError: |
|
||||||
return None |
|
||||||
|
|
||||||
def put(self, parents_comb: typing.Set, socim: SetOfCims): |
|
||||||
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in |
|
||||||
``__list_of_sets_of_parents``. |
|
||||||
|
|
||||||
:param parents_comb: the symbolic set index |
|
||||||
:type parents_comb: Set |
|
||||||
:param socim: the related SetOfCims object |
|
||||||
:type socim: SetOfCims |
|
||||||
""" |
|
||||||
#print("Putting in cache:", parents_comb) |
|
||||||
self._list_of_sets_of_parents.append(parents_comb) |
|
||||||
self._actual_cache.append(socim) |
|
||||||
|
|
||||||
def clear(self): |
|
||||||
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``. |
|
||||||
""" |
|
||||||
del self._list_of_sets_of_parents[:] |
|
||||||
del self._actual_cache[:] |
|
@ -1,176 +0,0 @@ |
|||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
|
|
||||||
from .abstract_importer import AbstractImporter |
|
||||||
|
|
||||||
|
|
||||||
class JsonImporter(AbstractImporter): |
|
||||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
|
||||||
the data in json extension. |
|
||||||
|
|
||||||
:param file_path: the path of the file that contains tha data to be imported |
|
||||||
:type file_path: string |
|
||||||
:param samples_label: the reference key for the samples in the trajectories |
|
||||||
:type samples_label: string |
|
||||||
:param structure_label: the reference key for the structure of the network data |
|
||||||
:type structure_label: string |
|
||||||
:param variables_label: the reference key for the cardinalites of the nodes data |
|
||||||
:type variables_label: string |
|
||||||
:param time_key: the key used to identify the timestamps in each trajectory |
|
||||||
:type time_key: string |
|
||||||
:param variables_key: the key used to identify the names of the variables in the net |
|
||||||
:type variables_key: string |
|
||||||
:_array_indx: the index of the outer JsonArray to extract the data from |
|
||||||
:type _array_indx: int |
|
||||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
|
||||||
:_raw_data: The raw contents of the json file to import |
|
||||||
:type _raw_data: List |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, |
|
||||||
variables_key: str): |
|
||||||
"""Constructor method |
|
||||||
|
|
||||||
.. note:: |
|
||||||
This constructor calls also the method ``read_json_file()``, so after the construction of the object |
|
||||||
the class member ``_raw_data`` will contain the raw imported json data. |
|
||||||
|
|
||||||
""" |
|
||||||
self._samples_label = samples_label |
|
||||||
self._structure_label = structure_label |
|
||||||
self._variables_label = variables_label |
|
||||||
self._time_key = time_key |
|
||||||
self._variables_key = variables_key |
|
||||||
self._df_samples_list = None |
|
||||||
self._array_indx = None |
|
||||||
super(JsonImporter, self).__init__(file_path) |
|
||||||
self._raw_data = self.read_json_file() |
|
||||||
|
|
||||||
def import_data(self, indx: int) -> None: |
|
||||||
"""Implements the abstract method of :class:`AbstractImporter`. |
|
||||||
|
|
||||||
:param indx: the index of the outer JsonArray to extract the data from |
|
||||||
:type indx: int |
|
||||||
""" |
|
||||||
self._array_indx = indx |
|
||||||
self._df_samples_list = self.import_trajectories(self._raw_data) |
|
||||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
|
||||||
self.compute_row_delta_in_all_samples_frames(self._df_samples_list) |
|
||||||
self.clear_data_frame_list() |
|
||||||
self._df_structure = self.import_structure(self._raw_data) |
|
||||||
self._df_variables = self.import_variables(self._raw_data) |
|
||||||
|
|
||||||
def import_trajectories(self, raw_data: typing.List) -> typing.List: |
|
||||||
"""Imports the trajectories from the list of dicts ``raw_data``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:return: List of dataframes containing all the trajectories |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) |
|
||||||
|
|
||||||
def import_structure(self, raw_data: typing.List) -> pd.DataFrame: |
|
||||||
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:return: Dataframe containg the starting node a ending node of every arc of the network |
|
||||||
:rtype: pandas.Dataframe |
|
||||||
""" |
|
||||||
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) |
|
||||||
|
|
||||||
def import_variables(self, raw_data: typing.List) -> pd.DataFrame: |
|
||||||
"""Imports the data in ``raw_data`` at the key ``_variables_label``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:return: Datframe containg the variables simbolic labels and their cardinalities |
|
||||||
:rtype: pandas.Dataframe |
|
||||||
""" |
|
||||||
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) |
|
||||||
|
|
||||||
def read_json_file(self) -> typing.List: |
|
||||||
"""Reads the JSON file in the path self.filePath. |
|
||||||
|
|
||||||
:return: The contents of the json file |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
with open(self._file_path) as f: |
|
||||||
data = json.load(f) |
|
||||||
return data |
|
||||||
|
|
||||||
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: |
|
||||||
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:param indx: The index of the array from which the data have to be extracted |
|
||||||
:type indx: int |
|
||||||
:param key: the key for the Dicts from which exctract data |
|
||||||
:type key: string |
|
||||||
:return: A normalized dataframe |
|
||||||
:rtype: pandas.Datframe |
|
||||||
""" |
|
||||||
return pd.DataFrame(raw_data[indx][key]) |
|
||||||
|
|
||||||
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: |
|
||||||
""" |
|
||||||
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:param indx: The index of the array from which the data have to be extracted |
|
||||||
:type indx: int |
|
||||||
:param trajectories_key: the key of the trajectories objects |
|
||||||
:type trajectories_key: string |
|
||||||
:return: A list of daframes containg the trajectories |
|
||||||
:rtype: List |
|
||||||
""" |
|
||||||
dataframe = pd.DataFrame |
|
||||||
smps = raw_data[indx][trajectories_key] |
|
||||||
df_samples_list = [dataframe(sample) for sample in smps] |
|
||||||
return df_samples_list |
|
||||||
|
|
||||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
|
||||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. |
|
||||||
""" |
|
||||||
columns_header = list(sample_frame.columns.values) |
|
||||||
columns_header.remove(self._time_key) |
|
||||||
return columns_header |
|
||||||
|
|
||||||
def clear_data_frame_list(self) -> None: |
|
||||||
"""Removes all values present in the dataframes in the list ``_df_samples_list``. |
|
||||||
""" |
|
||||||
for indx in range(len(self._df_samples_list)): |
|
||||||
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] |
|
||||||
|
|
||||||
def dataset_id(self) -> object: |
|
||||||
return self._array_indx |
|
||||||
|
|
||||||
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: |
|
||||||
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels |
|
||||||
as keys for the set of CIMS of a particular node. |
|
||||||
|
|
||||||
:param raw_data: List of Dicts |
|
||||||
:type raw_data: List |
|
||||||
:param indx: The index of the array from which the data have to be extracted |
|
||||||
:type indx: int |
|
||||||
:param cims_key: the key where the json object cims are placed |
|
||||||
:type cims_key: string |
|
||||||
:return: a dictionary containing the sampled CIMS for all the variables in the net |
|
||||||
:rtype: Dictionary |
|
||||||
""" |
|
||||||
cims_for_all_vars = {} |
|
||||||
for var in raw_data[indx][cims_key]: |
|
||||||
sampled_cims_list = [] |
|
||||||
cims_for_all_vars[var] = sampled_cims_list |
|
||||||
for p_comb in raw_data[indx][cims_key][var]: |
|
||||||
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) |
|
||||||
return cims_for_all_vars |
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,65 +0,0 @@ |
|||||||
import json |
|
||||||
import typing |
|
||||||
|
|
||||||
import pandas as pd |
|
||||||
import numpy as np |
|
||||||
|
|
||||||
from .abstract_importer import AbstractImporter |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SampleImporter(AbstractImporter): |
|
||||||
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare |
|
||||||
the data loaded directly by using DataFrame |
|
||||||
|
|
||||||
:param trajectory_list: the data that describes the trajectories |
|
||||||
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
|
||||||
:param variables: the data that describes the variables with name and cardinality |
|
||||||
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
|
||||||
:param prior_net_structure: the data of the real structure, if it exists |
|
||||||
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] |
|
||||||
|
|
||||||
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory |
|
||||||
:_raw_data: The raw contents of the json file to import |
|
||||||
:type _raw_data: List |
|
||||||
""" |
|
||||||
|
|
||||||
def __init__(self, |
|
||||||
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
|
||||||
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, |
|
||||||
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): |
|
||||||
|
|
||||||
'If the data are not DataFrame, it will be converted' |
|
||||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
|
||||||
variables = pd.DataFrame(variables) |
|
||||||
if isinstance(variables,list) or isinstance(variables,np.ndarray): |
|
||||||
prior_net_structure=pd.DataFrame(prior_net_structure) |
|
||||||
|
|
||||||
super(SampleImporter, self).__init__(trajectory_list =trajectory_list, |
|
||||||
variables= variables, |
|
||||||
prior_net_structure=prior_net_structure) |
|
||||||
|
|
||||||
def import_data(self, header_column = None): |
|
||||||
|
|
||||||
if header_column is not None: |
|
||||||
self._sorter = header_column |
|
||||||
else: |
|
||||||
self._sorter = self.build_sorter(self._df_samples_list[0]) |
|
||||||
|
|
||||||
samples_list= self._df_samples_list |
|
||||||
|
|
||||||
if isinstance(samples_list, np.ndarray): |
|
||||||
samples_list = samples_list.tolist() |
|
||||||
|
|
||||||
self.compute_row_delta_in_all_samples_frames(samples_list) |
|
||||||
|
|
||||||
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: |
|
||||||
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. |
|
||||||
""" |
|
||||||
columns_header = list(sample_frame.columns.values) |
|
||||||
del columns_header[0] |
|
||||||
return columns_header |
|
||||||
|
|
||||||
|
|
||||||
def dataset_id(self) -> object: |
|
||||||
pass |
|
@ -1 +0,0 @@ |
|||||||
|
|
File diff suppressed because it is too large
Load Diff
Reference in new issue