diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3e16c78
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+__pycache__
+.vscode
+**/__pycache__
+**/data
+**/PyCTBN.egg-info
+**/dist
+**/results_data
+**/.scannerwork
\ No newline at end of file
diff --git a/CTBN_project_dominio.pdf b/CTBN_project_dominio.pdf
new file mode 100644
index 0000000..6bceea1
Binary files /dev/null and b/CTBN_project_dominio.pdf differ
diff --git a/PyCTBN/PyCTBN/__init__.py b/PyCTBN/PyCTBN/__init__.py
new file mode 100644
index 0000000..023c0f1
--- /dev/null
+++ b/PyCTBN/PyCTBN/__init__.py
@@ -0,0 +1,8 @@
+import PyCTBN.estimators
+from PyCTBN.estimators import *
+import PyCTBN.optimizers
+from PyCTBN.optimizers import *
+import PyCTBN.structure_graph
+from PyCTBN.structure_graph import *
+import PyCTBN.utility
+from PyCTBN.utility import *
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/estimators/__init__.py b/PyCTBN/PyCTBN/estimators/__init__.py
new file mode 100644
index 0000000..112086f
--- /dev/null
+++ b/PyCTBN/PyCTBN/estimators/__init__.py
@@ -0,0 +1,5 @@
+from .fam_score_calculator import FamScoreCalculator
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from .structure_constraint_based_estimator import StructureConstraintBasedEstimator
+from .structure_score_based_estimator import StructureScoreBasedEstimator
diff --git a/PyCTBN/PyCTBN/estimators/fam_score_calculator.py b/PyCTBN/PyCTBN/estimators/fam_score_calculator.py
new file mode 100644
index 0000000..5b0b591
--- /dev/null
+++ b/PyCTBN/PyCTBN/estimators/fam_score_calculator.py
@@ -0,0 +1,272 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from math import log
+
+from scipy.special import loggamma
+from random import choice
+
+from ..structure_graph.set_of_cims import SetOfCims
+from ..structure_graph.network_graph import NetworkGraph
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+
+
+'''
+
+'''
+
+
+class FamScoreCalculator:
+ """
+ Has the task of calculating the FamScore of a node by using a Bayesian score function
+ """
+
+ def __init__(self):
+ #np.seterr('raise')
+ pass
+
+ # region theta
+
+ def marginal_likelihood_theta(self,
+ cims: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the FamScore value of the node identified by the label node_id
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta
+ :rtype: float
+ """
+ return np.sum(
+ [self.variable_cim_xu_marginal_likelihood_theta(cim,
+ alpha_xu,
+ alpha_xxu)
+ for cim in cims])
+
+ def variable_cim_xu_marginal_likelihood_theta(self,
+ cim: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the value of the marginal likelihood over theta given a cim
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta
+ :rtype: float
+ """
+
+ 'get cim length'
+ values = len(cim._state_residence_times)
+
+ 'compute the marginal likelihood for the current cim'
+ return np.sum([
+ self.single_cim_xu_marginal_likelihood_theta(
+ index,
+ cim,
+ alpha_xu,
+ alpha_xxu)
+ for index in range(values)])
+
+ def single_cim_xu_marginal_likelihood_theta(self,
+ index: int,
+ cim: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the marginal likelihood on q of the node when assumes a specif value
+ and a specif parents's assignment
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta when the node assumes a specif value
+ :rtype: float
+ """
+
+ values = list(range(len(cim._state_residence_times)))
+
+ 'remove the index because of the x != x^ condition in the summation '
+ values.remove(index)
+
+ 'uncomment for alpha xx not uniform'
+ #alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
+
+ return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \
+ + \
+ np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
+ cim.state_transition_matrix[index,index_x_first],
+ alpha_xxu)
+ for index_x_first in values])
+
+
+ def single_internal_cim_xxu_marginal_likelihood_theta(self,
+ M_xxu_suff_stats: float,
+ alpha_xxu: float=1):
+ """Calculate the second part of the marginal likelihood over theta formula
+
+ :param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u]
+ :type M_xxu_suff_stats: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta when the node assumes a specif value
+ :rtype: float
+ """
+ return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu)
+
+ # endregion
+
+ # region q
+
+ def marginal_likelihood_q(self,
+ cims: np.array,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the value of the marginal likelihood over q of the node identified by the label node_id
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood over q
+ :rtype: float
+ """
+
+ return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims])
+
+ def variable_cim_xu_marginal_likelihood_q(self,
+ cim: ConditionalIntensityMatrix,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the value of the marginal likelihood over q given a cim
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood over q
+ :rtype: float
+ """
+
+ 'get cim length'
+ values=len(cim._state_residence_times)
+
+ 'compute the marginal likelihood for the current cim'
+ return np.sum([
+ self.single_cim_xu_marginal_likelihood_q(
+ cim.state_transition_matrix[index, index],
+ cim._state_residence_times[index],
+ tau_xu,
+ alpha_xu)
+ for index in range(values)])
+
+
+ def single_cim_xu_marginal_likelihood_q(self,
+ M_xu_suff_stats: float,
+ T_xu_suff_stats: float,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the marginal likelihood on q of the node when assumes a specif value
+ and a specif parents's assignment
+
+ :param M_xu_suff_stats: value of the suffucient statistic M[x|u]
+ :type M_xxu_suff_stats: float
+ :param T_xu_suff_stats: value of the suffucient statistic T[x|u]
+ :type T_xu_suff_stats: float
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood of the node when assumes a specif value
+ :rtype: float
+ """
+ return (
+ loggamma(alpha_xu + M_xu_suff_stats + 1) +
+ (log(tau_xu)
+ *
+ (alpha_xu+1))
+ ) \
+ - \
+ (loggamma(alpha_xu + 1)+(
+ log(tau_xu + T_xu_suff_stats)
+ *
+ (alpha_xu + M_xu_suff_stats + 1))
+ )
+
+ # end region
+
+ def get_fam_score(self,
+ cims: np.array,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the FamScore value of the node
+
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type tau_xu: float, optional
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1
+ :type alpha_xu: float, optional
+
+
+ :return: the FamScore value of the node
+ :rtype: float
+ """
+ #print("------")
+ #print(self.marginal_likelihood_q(cims,
+ # tau_xu,
+ # alpha_xu))
+
+ #print(self.marginal_likelihood_theta(cims,
+ # alpha_xu,
+ # alpha_xxu))
+ 'calculate alpha_xxu as a uniform distribution'
+ alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1)
+
+ return self.marginal_likelihood_q(cims,
+ tau_xu,
+ alpha_xu) \
+ + \
+ self.marginal_likelihood_theta(cims,
+ alpha_xu,
+ alpha_xxu)
diff --git a/PyCTBN/PyCTBN/estimators/parameters_estimator.py b/PyCTBN/PyCTBN/estimators/parameters_estimator.py
new file mode 100644
index 0000000..4754d58
--- /dev/null
+++ b/PyCTBN/PyCTBN/estimators/parameters_estimator.py
@@ -0,0 +1,143 @@
+import sys
+sys.path.append('../')
+import numpy as np
+
+from ..structure_graph.network_graph import NetworkGraph
+from ..structure_graph.set_of_cims import SetOfCims
+from ..structure_graph.trajectory import Trajectory
+
+
+class ParametersEstimator(object):
+ """Has the task of computing the cims of particular node given the trajectories and the net structure
+ in the graph ``_net_graph``.
+
+ :param trajectories: the trajectories
+ :type trajectories: Trajectory
+ :param net_graph: the net structure
+ :type net_graph: NetworkGraph
+ :_single_set_of_cims: the set of cims object that will hold the cims of the node
+ """
+
+ def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph):
+ """Constructor Method
+ """
+ self._trajectories = trajectories
+ self._net_graph = net_graph
+ self._single_set_of_cims = None
+
+ def fast_init(self, node_id: str) -> None:
+ """Initializes all the necessary structures for the parameters estimation for the node ``node_id``.
+
+ :param node_id: the node label
+ :type node_id: string
+ """
+ p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
+ node_states_number = self._net_graph.get_states_number(node_id)
+ self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
+
+ def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
+ """Compute the CIMS of the node identified by the label ``node_id``.
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: A SetOfCims object filled with the computed CIMS
+ :rtype: SetOfCims
+ """
+ node_indx = self._net_graph.get_node_indx(node_id)
+ state_res_times = self._single_set_of_cims._state_residence_times
+ transition_matrices = self._single_set_of_cims._transition_matrices
+ ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times,
+ self._trajectories.trajectory,
+ self._net_graph.time_filtering,
+ self._net_graph.time_scalar_indexing_strucure,
+ state_res_times)
+ ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory,
+ self._net_graph.transition_filtering,
+ self._net_graph.transition_scalar_indexing_structure,
+ transition_matrices)
+ self._single_set_of_cims.build_cims(state_res_times, transition_matrices)
+ return self._single_set_of_cims
+
+ @staticmethod
+ def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray,
+ cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray,
+ T: np.ndarray) -> None:
+ """Compute the state residence times for a node and fill the matrix ``T`` with the results
+
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param times: the times deltas vector
+ :type times: numpy.array
+ :param trajectory: the trajectory
+ :type trajectory: numpy.ndArray
+ :param cols_filter: the columns filtering structure
+ :type cols_filter: numpy.array
+ :param scalar_indexes_struct: the indexing structure
+ :type scalar_indexes_struct: numpy.array
+ :param T: the state residence times vectors
+ :type T: numpy.ndArray
+ """
+ T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1)
+ .astype(np.int), \
+ times,
+ minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1])
+
+ @staticmethod
+ def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray,
+ scalar_indexing: np.ndarray, M: np.ndarray) -> None:
+ """Compute the state residence times for a node and fill the matrices ``M`` with the results.
+
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param trajectory: the trajectory
+ :type trajectory: numpy.ndArray
+ :param cols_filter: the columns filtering structure
+ :type cols_filter: numpy.array
+ :param scalar_indexing: the indexing structure
+ :type scalar_indexing: numpy.array
+ :param M: the state transitions matrices
+ :type M: numpy.ndArray
+ """
+ diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])],
+ dtype=np.int64)
+ trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0]
+ M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int)
+ , minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2])
+ M_raveled = M.ravel()
+ M_raveled[diag_indices] = 0
+ M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
+
+ def init_sets_cims_container(self):
+ self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
+ self.net_graph.nodes_values,
+ self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
+ self.net_graph.p_combs)
+
+ def compute_parameters(self):
+ #print(self.net_graph.get_nodes())
+ #print(self.amalgamated_cims_struct.sets_of_cims)
+ #enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
+ for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
+ #print(self.net_graph.time_filtering[indx])
+ #print(self.net_graph.time_scalar_indexing_strucure[indx])
+ self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
+ self.sample_path.trajectories.trajectory,
+ self.net_graph.time_filtering[indx],
+ self.net_graph.time_scalar_indexing_strucure[indx],
+ aggr[1]._state_residence_times)
+ #print(self.net_graph.transition_filtering[indx])
+ #print(self.net_graph.transition_scalar_indexing_structure[indx])
+ self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]),
+ self.sample_path.trajectories.complete_trajectory,
+ self.net_graph.transition_filtering[indx],
+ self.net_graph.transition_scalar_indexing_structure[indx],
+ aggr[1]._transition_matrices)
+ aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices)
+
+
+
+
+
+
+
+
diff --git a/PyCTBN/PyCTBN/estimators/structure_constraint_based_estimator.py b/PyCTBN/PyCTBN/estimators/structure_constraint_based_estimator.py
new file mode 100644
index 0000000..7d5721e
--- /dev/null
+++ b/PyCTBN/PyCTBN/estimators/structure_constraint_based_estimator.py
@@ -0,0 +1,238 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+import os
+from scipy.stats import chi2 as chi2_dist
+from scipy.stats import f as f_dist
+from tqdm import tqdm
+
+from ..utility.cache import Cache
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer
+
+import concurrent.futures
+
+
+
+import multiprocessing
+from multiprocessing import Pool
+
+
+class StructureConstraintBasedEstimator(StructureEstimator):
+ """
+ Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :param exp_test_alfa: the significance level for the exponential Hp test
+ :type exp_test_alfa: float
+ :param chi_test_alfa: the significance level for the chi Hp test
+ :type chi_test_alfa: float
+ :_nodes: the nodes labels
+ :_nodes_vals: the nodes cardinalities
+ :_nodes_indxs: the nodes indexes
+ :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
+ :_cache: the Cache object
+ """
+
+ def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25):
+ super().__init__(sample_path,known_edges)
+ self._exp_test_sign = exp_test_alfa
+ self._chi_test_alfa = chi_test_alfa
+ self._thumb_threshold = thumb_threshold
+ self._cache = Cache()
+
+ def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
+ tot_vars_count: int, parent_indx, child_indx) -> bool:
+ """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
+ G2 = {G1 U test_parent} (added as an additional parent of the test_child).
+ Generates all the necessary structures and datas to perform the tests.
+
+ :param test_parent: the node label of the test parent
+ :type test_parent: string
+ :param test_child: the node label of the child
+ :type test_child: string
+ :param parent_set: the common parent set
+ :type parent_set: List
+ :param child_states_numb: the cardinality of the ``test_child``
+ :type child_states_numb: int
+ :param tot_vars_count: the total number of variables in the net
+ :type tot_vars_count: int
+ :return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise
+ :rtype: bool
+ """
+ p_set = parent_set[:]
+ complete_info = parent_set[:]
+ complete_info.append(test_child)
+
+ parents = np.array(parent_set)
+ parents = np.append(parents, test_parent)
+ sorted_parents = self._nodes[np.isin(self._nodes, parents)]
+ cims_filter = sorted_parents != test_parent
+
+ p_set.insert(0, test_parent)
+ sofc2 = self._cache.find(set(p_set))
+
+ if not sofc2:
+ complete_info.append(test_parent)
+ bool_mask2 = np.isin(self._nodes, complete_info)
+ l2 = list(self._nodes[bool_mask2])
+ indxs2 = self._nodes_indxs[bool_mask2]
+ vals2 = self._nodes_vals[bool_mask2]
+ eds2 = list(itertools.product(p_set, test_child))
+ s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count)
+ g2 = NetworkGraph(s2)
+ g2.fast_init(test_child)
+ p2 = ParametersEstimator(self._sample_path.trajectories, g2)
+ p2.fast_init(test_child)
+ sofc2 = p2.compute_parameters_for_node(test_child)
+ self._cache.put(set(p_set), sofc2)
+
+ del p_set[0]
+ sofc1 = self._cache.find(set(p_set))
+ if not sofc1:
+ g2.remove_node(test_parent)
+ g2.fast_init(test_child)
+ p2 = ParametersEstimator(self._sample_path.trajectories, g2)
+ p2.fast_init(test_child)
+ sofc1 = p2.compute_parameters_for_node(test_child)
+ self._cache.put(set(p_set), sofc1)
+ thumb_value = 0.0
+ if child_states_numb > 2:
+ parent_val = self._sample_path.structure.get_states_number(test_parent)
+ bool_mask_vals = np.isin(self._nodes, parent_set)
+ parents_vals = self._nodes_vals[bool_mask_vals]
+ thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
+ for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
+ cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
+ for cim2 in cond_cims:
+ if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
+ return False
+ return True
+
+ def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
+ cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
+ """Compute the actual independence test using two cims.
+ It is performed first the exponential test and if the null hypothesis is not rejected,
+ it is performed also the chi_test.
+
+ :param child_states_numb: the cardinality of the test child
+ :type child_states_numb: int
+ :param cim1: a cim belonging to the graph without test parent
+ :type cim1: ConditionalIntensityMatrix
+ :param cim2: a cim belonging to the graph with test parent
+ :type cim2: ConditionalIntensityMatrix
+ :return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise.
+ :rtype: bool
+ """
+ M1 = cim1.state_transition_matrix
+ M2 = cim2.state_transition_matrix
+ r1s = M1.diagonal()
+ r2s = M2.diagonal()
+ C1 = cim1.cim
+ C2 = cim2.cim
+ if child_states_numb > 2:
+ if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
+ self._removable_edges_matrix[parent_indx][child_indx] = False
+ return False
+ F_stats = C2.diagonal() / C1.diagonal()
+ exp_alfa = self._exp_test_sign
+ for val in range(0, child_states_numb):
+ if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \
+ F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]):
+ return False
+ M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1)
+ M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape(
+ M2.shape[0], -1)
+ chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1)
+ Ks = np.sqrt(r1s / r2s)
+ Ls = np.sqrt(r2s / r1s)
+ for val in range(0, child_states_numb):
+ Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
+ (M1_no_diag[val] + M2_no_diag[val]))
+ if Chi > chi_2_quantile:
+ return False
+ return True
+
+ def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
+ """Compute the value to test against the thumb_threshold.
+
+ :param parent_val: test parent's variable cardinality
+ :type parent_val: int
+ :param child_val: test child's variable cardinality
+ :type child_val: int
+ :param parent_set_vals: the cardinalities of the nodes in the current sep-set
+ :type parent_set_vals: List
+ :return: the thumb value for the current independence test
+ :rtype: int
+ """
+ df = (child_val - 1) ** 2
+ df = df * parent_val
+ for v in parent_set_vals:
+ df = df * v
+ return df
+
+ def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List:
+ """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
+
+ :param var_id: the node label of the test child
+ :type var_id: string
+ """
+ optimizer_obj = ConstraintBasedOptimizer(
+ node_id = var_id,
+ structure_estimator = self,
+ tot_vars_count = tot_vars_count)
+ return optimizer_obj.optimize_structure()
+
+
+ def ctpc_algorithm(self,disable_multiprocessing:bool= False ):
+ """Compute the CTPC algorithm over the entire net.
+ """
+ ctpc_algo = self.one_iteration_of_CTPC_algorithm
+ total_vars_numb = self._sample_path.total_variables_count
+
+ n_nodes= len(self._nodes)
+
+ total_vars_numb_array = [total_vars_numb] * n_nodes
+
+ 'get the number of CPU'
+ cpu_count = multiprocessing.cpu_count()
+
+
+
+ 'Remove all the edges from the structure'
+ self._sample_path.structure.clean_structure_edges()
+
+ 'Estimate the best parents for each node'
+ #with multiprocessing.Pool(processes=cpu_count) as pool:
+ #with get_context("spawn").Pool(processes=cpu_count) as pool:
+ if disable_multiprocessing:
+ print("DISABILITATO")
+ cpu_count = 1
+ list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
+ else:
+ with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
+ list_edges_partial = executor.map(ctpc_algo,
+ self._nodes,
+ total_vars_numb_array)
+ #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
+
+ return set(itertools.chain.from_iterable(list_edges_partial))
+
+
+ def estimate_structure(self,disable_multiprocessing:bool=False):
+ return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing)
+
+
+
+
diff --git a/PyCTBN/PyCTBN/estimators/structure_estimator.py b/PyCTBN/PyCTBN/estimators/structure_estimator.py
new file mode 100644
index 0000000..fbf8ea9
--- /dev/null
+++ b/PyCTBN/PyCTBN/estimators/structure_estimator.py
@@ -0,0 +1,187 @@
+
+import itertools
+import json
+import typing
+
+import matplotlib.pyplot as plt
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from abc import ABC
+
+import abc
+
+from ..utility.cache import Cache
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+
+
+class StructureEstimator(object):
+ """Has the task of estimating the network structure given the trajectories in ``samplepath``.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :_nodes: the nodes labels
+ :_nodes_vals: the nodes cardinalities
+ :_nodes_indxs: the nodes indexes
+ :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
+ """
+
+ def __init__(self, sample_path: SamplePath, known_edges: typing.List = None):
+ self._sample_path = sample_path
+ self._nodes = np.array(self._sample_path.structure.nodes_labels)
+ self._nodes_vals = self._sample_path.structure.nodes_values
+ self._nodes_indxs = self._sample_path.structure.nodes_indexes
+ self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
+ self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
+
+
+ def build_removable_edges_matrix(self, known_edges: typing.List):
+ """Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given:
+
+ :param known_edges: the list of nodes labels
+ :type known_edges: List
+ :return: a boolean matrix
+ :rtype: np.ndarray
+ """
+ tot_vars_count = self._sample_path.total_variables_count
+ complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
+ if known_edges:
+ for edge in known_edges:
+ i = self._sample_path.structure.get_node_indx(edge[0])
+ j = self._sample_path.structure.get_node_indx(edge[1])
+ complete_adj_matrix[i][j] = False
+ return complete_adj_matrix
+
+ @staticmethod
+ def build_complete_graph(node_ids: typing.List) -> nx.DiGraph:
+ """Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``:
+
+ :param node_ids: the list of nodes labels
+ :type node_ids: List
+ :return: a complete Digraph Object
+ :rtype: networkx.DiGraph
+ """
+ complete_graph = nx.DiGraph()
+ complete_graph.add_nodes_from(node_ids)
+ complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
+ return complete_graph
+
+
+ @staticmethod
+ def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str):
+ """Creates a list containing all possible subsets of the list ``u`` of size ``size``,
+ that do not contains a the node identified by ``parent_label``.
+
+ :param u: the list of nodes
+ :type u: List
+ :param size: the size of the subsets
+ :type size: int
+ :param parent_label: the node to exclude in the subsets generation
+ :type parent_label: string
+ :return: an Iterator Object containing a list of lists
+ :rtype: Iterator
+ """
+ list_without_test_parent = u[:]
+ list_without_test_parent.remove(parent_label)
+ return map(list, itertools.combinations(list_without_test_parent, size))
+
+ def save_results(self) -> None:
+ """Save the estimated Structure to a .json file in the path where the data are loaded from.
+ The file is named as the input dataset but the `results_` word is appended to the results file.
+ """
+ res = json_graph.node_link_data(self._complete_graph)
+ name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
+ name = name.split('.', 1)[0]
+ name += '_' + str(self._sample_path._importer.dataset_id())
+ name += '.json'
+ file_name = 'results_' + name
+ with open(file_name, 'w') as f:
+ json.dump(res, f)
+
+
+ def remove_diagonal_elements(self, matrix):
+ m = matrix.shape[0]
+ strided = np.lib.stride_tricks.as_strided
+ s0, s1 = matrix.strides
+ return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
+
+
+ @abc.abstractmethod
+ def estimate_structure(self) -> typing.List:
+ """Abstract method to estimate the structure
+
+ :return: List of estimated edges
+ :rtype: Typing.List
+ """
+ pass
+
+
+ def adjacency_matrix(self) -> np.ndarray:
+ """Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation.
+
+ :return: The adjacency matrix of the graph ``_complete_graph``
+ :rtype: numpy.ndArray
+ """
+ return nx.adj_matrix(self._complete_graph).toarray().astype(bool)
+
+ def spurious_edges(self) -> typing.List:
+ """Return the spurious edges present in the estimated structure, if a prior net structure is present in
+ ``_sample_path.structure``.
+
+ :return: A list containing the spurious edges
+ :rtype: List
+ """
+ if not self._sample_path.has_prior_net_structure:
+ raise RuntimeError("Can not compute spurious edges with no prior net structure!")
+ real_graph = nx.DiGraph()
+ real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
+ real_graph.add_edges_from(self._sample_path.structure.edges)
+ return nx.difference(real_graph, self._complete_graph).edges
+
+ def save_plot_estimated_structure_graph(self) -> None:
+ """Plot the estimated structure in a graphical model style.
+ Spurious edges are colored in red.
+ """
+ graph_to_draw = nx.DiGraph()
+ spurious_edges = self.spurious_edges()
+ non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
+ print(non_spurious_edges)
+ edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
+ graph_to_draw.add_edges_from(spurious_edges)
+ graph_to_draw.add_edges_from(non_spurious_edges)
+ pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
+ options = {
+ "node_size": 2000,
+ "node_color": "white",
+ "edgecolors": "black",
+ 'linewidths':2,
+ "with_labels":True,
+ "font_size":13,
+ 'connectionstyle': 'arc3, rad = 0.1',
+ "arrowsize": 15,
+ "arrowstyle": '<|-',
+ "width": 1,
+ "edge_color":edges_colors,
+ }
+
+ nx.draw(graph_to_draw, pos, **options)
+ ax = plt.gca()
+ ax.margins(0.20)
+ plt.axis("off")
+ name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
+ name = name.split('.', 1)[0]
+ name += '_' + str(self._sample_path._importer.dataset_id())
+ name += '.png'
+ plt.savefig(name)
+ plt.clf()
+ print("Estimated Structure Plot Saved At: ", os.path.abspath(name))
+
+
+
+
+
diff --git a/PyCTBN/PyCTBN/estimators/structure_score_based_estimator.py b/PyCTBN/PyCTBN/estimators/structure_score_based_estimator.py
new file mode 100644
index 0000000..2903db3
--- /dev/null
+++ b/PyCTBN/PyCTBN/estimators/structure_score_based_estimator.py
@@ -0,0 +1,244 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from random import choice
+
+import concurrent.futures
+
+import copy
+
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+from .fam_score_calculator import FamScoreCalculator
+from ..optimizers.hill_climbing_search import HillClimbing
+from ..optimizers.tabu_search import TabuSearch
+
+
+import multiprocessing
+from multiprocessing import Pool
+
+
+
+
+class StructureScoreBasedEstimator(StructureEstimator):
+ """
+ Has the task of estimating the network structure given the trajectories in samplepath by
+ using a score based approach.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type tau_xu: float, optional
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1
+ :type alpha_xu: float, optional
+ :param known_edges: List of known edges, default to []
+ :type known_edges: List, optional
+
+ """
+
+ def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []):
+ super().__init__(sample_path,known_edges)
+ self.tau_xu=tau_xu
+ self.alpha_xu=alpha_xu
+
+
+ def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
+ patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None,
+ optimizer: str = 'tabu',disable_multiprocessing:bool= False ):
+ """
+ Compute the score-based algorithm to find the optimal structure
+
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+ :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
+ :type optimizer: string, optional
+ :param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False
+ :type disable_multiprocessing: Boolean, optional
+ """
+ 'Save the true edges structure in tuples'
+ true_edges = copy.deepcopy(self._sample_path.structure.edges)
+ true_edges = set(map(tuple, true_edges))
+
+ 'Remove all the edges from the structure'
+ self._sample_path.structure.clean_structure_edges()
+
+ estimate_parents = self.estimate_parents
+
+ n_nodes= len(self._nodes)
+
+ l_max_parents= [max_parents] * n_nodes
+ l_iterations_number = [iterations_number] * n_nodes
+ l_patience = [patience] * n_nodes
+ l_tabu_length = [tabu_length] * n_nodes
+ l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
+ l_optimizer = [optimizer] * n_nodes
+
+
+ 'get the number of CPU'
+ cpu_count = multiprocessing.cpu_count()
+ print(f"CPU COUNT: {cpu_count}")
+
+ if disable_multiprocessing:
+ cpu_count = 1
+
+
+
+
+
+ #with get_context("spawn").Pool(processes=cpu_count) as pool:
+ #with multiprocessing.Pool(processes=cpu_count) as pool:
+
+ 'Estimate the best parents for each node'
+ if disable_multiprocessing:
+ list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes]
+ else:
+ with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
+ list_edges_partial = executor.map(estimate_parents,
+ self._nodes,
+ l_max_parents,
+ l_iterations_number,
+ l_patience,
+ l_tabu_length,
+ l_tabu_rules_duration,
+ l_optimizer)
+
+
+
+ #list_edges_partial = p.map(estimate_parents, self._nodes)
+ #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
+
+ 'Concatenate all the edges list'
+ set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
+
+ #print('-------------------------')
+
+
+ 'calculate precision and recall'
+ n_missing_edges = 0
+ n_added_fake_edges = 0
+
+ try:
+ n_added_fake_edges = len(set_list_edges.difference(true_edges))
+
+ n_missing_edges = len(true_edges.difference(set_list_edges))
+
+ n_true_positive = len(true_edges) - n_missing_edges
+
+ precision = n_true_positive / (n_true_positive + n_added_fake_edges)
+
+ recall = n_true_positive / (n_true_positive + n_missing_edges)
+
+
+ # print(f"n archi reali non trovati: {n_missing_edges}")
+ # print(f"n archi non reali aggiunti: {n_added_fake_edges}")
+ print(true_edges)
+ print(set_list_edges)
+ print(f"precision: {precision} ")
+ print(f"recall: {recall} ")
+ except Exception as e:
+ print(f"errore: {e}")
+
+ return set_list_edges
+
+
+ def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
+ patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5,
+ optimizer:str = 'hill' ):
+ """
+ Use the FamScore of a node in order to find the best parent nodes
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+ :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
+ :type optimizer: string, optional
+
+ :return: A list of the best edges for the currente node
+ :rtype: List
+ """
+
+ "choose the optimizer algotithm"
+ if optimizer == 'tabu':
+ optimizer = TabuSearch(
+ node_id = node_id,
+ structure_estimator = self,
+ max_parents = max_parents,
+ iterations_number = iterations_number,
+ patience = patience,
+ tabu_length = tabu_length,
+ tabu_rules_duration = tabu_rules_duration)
+ else: #if optimizer == 'hill':
+ optimizer = HillClimbing(
+ node_id = node_id,
+ structure_estimator = self,
+ max_parents = max_parents,
+ iterations_number = iterations_number,
+ patience = patience)
+
+ "call the optmizer's function that calculates the current node's parents"
+ return optimizer.optimize_structure()
+
+
+ def get_score_from_graph(self,
+ graph: NetworkGraph,
+ node_id:str):
+ """
+ Get the FamScore of a node
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param graph: current graph to be computed
+ :type graph: class:'NetworkGraph'
+
+
+ :return: The FamSCore for this graph structure
+ :rtype: float
+ """
+
+ 'inizialize the graph for a single node'
+ graph.fast_init(node_id)
+
+ params_estimation = ParametersEstimator(self._sample_path.trajectories, graph)
+
+ 'Inizialize and compute parameters for node'
+ params_estimation.fast_init(node_id)
+ SoCims = params_estimation.compute_parameters_for_node(node_id)
+
+ 'calculate the FamScore for the node'
+ fam_score_obj = FamScoreCalculator()
+
+ score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu)
+
+ #print(f" lo score per {node_id} risulta: {score} ")
+ return score
+
+
+
+
diff --git a/PyCTBN/PyCTBN/optimizers/__init__.py b/PyCTBN/PyCTBN/optimizers/__init__.py
new file mode 100644
index 0000000..4162bf1
--- /dev/null
+++ b/PyCTBN/PyCTBN/optimizers/__init__.py
@@ -0,0 +1,4 @@
+from .optimizer import Optimizer
+from .tabu_search import TabuSearch
+from .hill_climbing_search import HillClimbing
+from .constraint_based_optimizer import ConstraintBasedOptimizer
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/optimizers/constraint_based_optimizer.py b/PyCTBN/PyCTBN/optimizers/constraint_based_optimizer.py
new file mode 100644
index 0000000..65bc19c
--- /dev/null
+++ b/PyCTBN/PyCTBN/optimizers/constraint_based_optimizer.py
@@ -0,0 +1,87 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice
+
+from abc import ABC
+
+import copy
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+
+class ConstraintBasedOptimizer(Optimizer):
+ """
+ Optimizer class that implement a CTPC Algorithm
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param tot_vars_count: number of variables in the dataset
+ :type tot_vars_count: int
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ tot_vars_count:int
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.tot_vars_count = tot_vars_count
+
+
+
+ def optimize_structure(self):
+ """
+ Compute Optimization process for a structure_estimator by using a CTPC Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ print("##################TESTING VAR################", self.node_id)
+
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id]
+
+ for possible_parent in other_nodes:
+ graph.add_edges([(possible_parent,self.node_id)])
+
+
+ u = other_nodes
+ #tests_parents_numb = len(u)
+ #complete_frame = self.complete_graph_frame
+ #test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
+ child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
+ b = 0
+ while b < len(u):
+ parent_indx = 0
+ while parent_indx < len(u):
+ removed = False
+ test_parent = u[parent_indx]
+ i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent)
+ j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id)
+ if self.structure_estimator._removable_edges_matrix[i][j]:
+ S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
+ for parents_set in S:
+ if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j):
+ graph.remove_edges([(test_parent, self.node_id)])
+ u.remove(test_parent)
+ removed = True
+ break
+ if not removed:
+ parent_indx += 1
+ b += 1
+ self.structure_estimator._cache.clear()
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/optimizers/hill_climbing_search.py b/PyCTBN/PyCTBN/optimizers/hill_climbing_search.py
new file mode 100644
index 0000000..6783be0
--- /dev/null
+++ b/PyCTBN/PyCTBN/optimizers/hill_climbing_search.py
@@ -0,0 +1,135 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice
+
+from abc import ABC
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+
+class HillClimbing(Optimizer):
+ """
+ Optimizer class that implement Hill Climbing Search
+
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+
+
+
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ max_parents:int = None,
+ iterations_number:int= 40,
+ patience:int = None
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.max_parents = max_parents
+ self.iterations_number = iterations_number
+ self.patience = patience
+
+
+
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+
+ #'Create the graph for the single node'
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ 'get the index for the current node'
+ node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
+
+ 'list of prior edges'
+ prior_parents = set()
+
+ 'Add the edges from prior knowledge'
+ for i in range(len(self.structure_estimator._removable_edges_matrix)):
+ if not self.structure_estimator._removable_edges_matrix[i][node_index]:
+ parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
+ prior_parents.add(parent_id)
+
+ 'Add the node to the starting structure'
+ graph.add_edges([(parent_id, self.node_id)])
+
+
+
+ 'get all the possible parents'
+ other_nodes = [node for node in
+ self.structure_estimator._sample_path.structure.nodes_labels if
+ node != self.node_id and
+ not prior_parents.__contains__(node)]
+
+ actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+ patince_count = 0
+ for i in range(self.iterations_number):
+ 'choose a new random edge'
+ current_new_parent = choice(other_nodes)
+ current_edge = (current_new_parent,self.node_id)
+ added = False
+ parent_removed = None
+
+
+ if graph.has_edge(current_edge):
+ graph.remove_edges([current_edge])
+ else:
+ 'check the max_parents constraint'
+ if self.max_parents is not None:
+ parents_list = graph.get_parents_by_id(self.node_id)
+ if len(parents_list) >= self.max_parents :
+ parent_removed = (choice(parents_list), self.node_id)
+ graph.remove_edges([parent_removed])
+ graph.add_edges([current_edge])
+ added = True
+ #print('**************************')
+ current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ if current_score > actual_best_score:
+ 'update current best score'
+ actual_best_score = current_score
+ patince_count = 0
+ else:
+ 'undo the last update'
+ if added:
+ graph.remove_edges([current_edge])
+ 'If a parent was removed, add it again to the graph'
+ if parent_removed is not None:
+ graph.add_edges([parent_removed])
+ else:
+ graph.add_edges([current_edge])
+ 'update patience count'
+ patince_count += 1
+
+ if self.patience is not None and patince_count > self.patience:
+ break
+
+ print(f"finito variabile: {self.node_id}")
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/optimizers/optimizer.py b/PyCTBN/PyCTBN/optimizers/optimizer.py
new file mode 100644
index 0000000..36445c0
--- /dev/null
+++ b/PyCTBN/PyCTBN/optimizers/optimizer.py
@@ -0,0 +1,39 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+import abc
+
+from ..estimators.structure_estimator import StructureEstimator
+
+
+
+class Optimizer(abc.ABC):
+ """
+ Interface class for all the optimizer's child PyCTBN
+
+ :param node_id: the node label
+ :type node_id: string
+ :param structure_estimator: A structureEstimator Object to predict the structure
+ :type structure_estimator: class:'StructureEstimator'
+
+ """
+
+ def __init__(self, node_id:str, structure_estimator: StructureEstimator):
+ self.node_id = node_id
+ self.structure_estimator = structure_estimator
+
+
+ @abc.abstractmethod
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ pass
diff --git a/PyCTBN/PyCTBN/optimizers/tabu_search.py b/PyCTBN/PyCTBN/optimizers/tabu_search.py
new file mode 100644
index 0000000..e15dd40
--- /dev/null
+++ b/PyCTBN/PyCTBN/optimizers/tabu_search.py
@@ -0,0 +1,199 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice,sample
+
+from abc import ABC
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+import queue
+
+
+class TabuSearch(Optimizer):
+ """
+ Optimizer class that implement Tabu Search
+
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+
+
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ max_parents:int = None,
+ iterations_number:int= 40,
+ patience:int = None,
+ tabu_length:int = None,
+ tabu_rules_duration = None
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.max_parents = max_parents
+ self.iterations_number = iterations_number
+ self.patience = patience
+ self.tabu_length = tabu_length
+ self.tabu_rules_duration = tabu_rules_duration
+
+
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ print(f"tabu search is processing the structure of {self.node_id}")
+
+ 'Create the graph for the single node'
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ 'get the index for the current node'
+ node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
+
+ 'list of prior edges'
+ prior_parents = set()
+
+ 'Add the edges from prior knowledge'
+ for i in range(len(self.structure_estimator._removable_edges_matrix)):
+ if not self.structure_estimator._removable_edges_matrix[i][node_index]:
+ parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
+ prior_parents.add(parent_id)
+
+ 'Add the node to the starting structure'
+ graph.add_edges([(parent_id, self.node_id)])
+
+
+
+ 'get all the possible parents'
+ other_nodes = set([node for node in
+ self.structure_estimator._sample_path.structure.nodes_labels if
+ node != self.node_id and
+ not prior_parents.__contains__(node)])
+
+ 'calculate the score for the node without parents'
+ actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ 'initialize tabu_length and tabu_rules_duration if None'
+ if self.tabu_length is None:
+ self.tabu_length = len(other_nodes)
+
+ if self.tabu_rules_duration is None:
+ self.tabu_tabu_rules_durationength = len(other_nodes)
+
+ 'inizialize the data structures'
+ tabu_set = set()
+ tabu_queue = queue.Queue()
+
+ patince_count = 0
+ tabu_count = 0
+ for i in range(self.iterations_number):
+
+ current_possible_nodes = other_nodes.difference(tabu_set)
+
+ 'choose a new random edge according to tabu restiction'
+ if(len(current_possible_nodes) > 0):
+ current_new_parent = sample(current_possible_nodes,k=1)[0]
+ else:
+ current_new_parent = tabu_queue.get()
+ tabu_set.remove(current_new_parent)
+
+
+
+ current_edge = (current_new_parent,self.node_id)
+ added = False
+ parent_removed = None
+
+ if graph.has_edge(current_edge):
+ graph.remove_edges([current_edge])
+ else:
+ 'check the max_parents constraint'
+ if self.max_parents is not None:
+ parents_list = graph.get_parents_by_id(self.node_id)
+ if len(parents_list) >= self.max_parents :
+ parent_removed = (choice(parents_list), self.node_id)
+ graph.remove_edges([parent_removed])
+ graph.add_edges([current_edge])
+ added = True
+ #print('**************************')
+ current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ # print("-------------------------------------------")
+ # print(f"Current new parent: {current_new_parent}")
+ # print(f"Current score: {current_score}")
+ # print(f"Current best score: {actual_best_score}")
+ # print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
+ # print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
+ # print(f"graph edges: {graph.edges}")
+
+ # print("-------------------------------------------")
+ # input()
+ if current_score > actual_best_score:
+ 'update current best score'
+ actual_best_score = current_score
+ patince_count = 0
+ 'update tabu list'
+
+
+ else:
+ 'undo the last update'
+ if added:
+ graph.remove_edges([current_edge])
+ 'If a parent was removed, add it again to the graph'
+ if parent_removed is not None:
+ graph.add_edges([parent_removed])
+ else:
+ graph.add_edges([current_edge])
+ 'update patience count'
+ patince_count += 1
+
+
+ if tabu_queue.qsize() >= self.tabu_length:
+ current_removed = tabu_queue.get()
+ tabu_set.remove(current_removed)
+ 'Add the node on the tabu list'
+ tabu_queue.put(current_new_parent)
+ tabu_set.add(current_new_parent)
+
+ tabu_count += 1
+
+ 'Every tabu_rules_duration step remove an item from the tabu list '
+ if tabu_count % self.tabu_rules_duration == 0:
+ if tabu_queue.qsize() > 0:
+ current_removed = tabu_queue.get()
+ tabu_set.remove(current_removed)
+ tabu_count = 0
+ else:
+ tabu_count = 0
+
+ if self.patience is not None and patince_count > self.patience:
+ break
+
+ print(f"finito variabile: {self.node_id}")
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/structure_graph/__init__.py b/PyCTBN/PyCTBN/structure_graph/__init__.py
new file mode 100644
index 0000000..85f18a2
--- /dev/null
+++ b/PyCTBN/PyCTBN/structure_graph/__init__.py
@@ -0,0 +1,6 @@
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+from .network_graph import NetworkGraph
+from .sample_path import SamplePath
+from .set_of_cims import SetOfCims
+from .structure import Structure
+from .trajectory import Trajectory
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/structure_graph/conditional_intensity_matrix.py b/PyCTBN/PyCTBN/structure_graph/conditional_intensity_matrix.py
new file mode 100644
index 0000000..4abfdd0
--- /dev/null
+++ b/PyCTBN/PyCTBN/structure_graph/conditional_intensity_matrix.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+
+class ConditionalIntensityMatrix(object):
+ """Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector
+ and state transition matrix and the actual CIM matrix.
+
+ :param state_residence_times: state residence times vector
+ :type state_residence_times: numpy.array
+ :param state_transition_matrix: the transitions count matrix
+ :type state_transition_matrix: numpy.ndArray
+ :_cim: the actual cim of the node
+ """
+ def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
+ """Constructor Method
+ """
+ self._state_residence_times = state_residence_times
+ self._state_transition_matrix = state_transition_matrix
+ self._cim = self.state_transition_matrix.astype(np.float64)
+
+ def compute_cim_coefficients(self) -> None:
+ """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].
+ The class member ``_cim`` will contain the computed cim
+ """
+ np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
+ self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T
+
+ @property
+ def state_residence_times(self) -> np.ndarray:
+ return self._state_residence_times
+
+ @property
+ def state_transition_matrix(self) -> np.ndarray:
+ return self._state_transition_matrix
+
+ @property
+ def cim(self) -> np.ndarray:
+ return self._cim
+
+ def __repr__(self):
+ return 'CIM:\n' + str(self.cim)
+
diff --git a/PyCTBN/PyCTBN/structure_graph/network_graph.py b/PyCTBN/PyCTBN/structure_graph/network_graph.py
new file mode 100644
index 0000000..623981d
--- /dev/null
+++ b/PyCTBN/PyCTBN/structure_graph/network_graph.py
@@ -0,0 +1,293 @@
+
+import typing
+
+import networkx as nx
+import numpy as np
+
+from .structure import Structure
+
+
+class NetworkGraph(object):
+ """Abstracts the infos contained in the Structure class in the form of a directed graph.
+ Has the task of creating all the necessary filtering and indexing structures for parameters estimation
+
+ :param graph_struct: the ``Structure`` object from which infos about the net will be extracted
+ :type graph_struct: Structure
+ :_graph: directed graph
+ :_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos
+ about every parents of the node of which all the indexing and filtering structures will be constructed.
+ :_time_scalar_indexing_structure: the indexing structure for state res time estimation
+ :_transition_scalar_indexing_structure: the indexing structure for transition computation
+ :_time_filtering: the columns filtering structure used in the computation of the state res times
+ :_transition_filtering: the columns filtering structure used in the computation of the transition
+ from one state to another
+ :_p_combs_structure: all the possible parents states combination for the node of interest
+ """
+
+ def __init__(self, graph_struct: Structure):
+ """Constructor Method
+ """
+ self._graph_struct = graph_struct
+ self._graph = nx.DiGraph()
+ self._aggregated_info_about_nodes_parents = None
+ self._time_scalar_indexing_structure = None
+ self._transition_scalar_indexing_structure = None
+ self._time_filtering = None
+ self._transition_filtering = None
+ self._p_combs_structure = None
+
+ def init_graph(self):
+ self.add_nodes(self._nodes_labels)
+ self.add_edges(self.graph_struct.edges)
+ self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
+ self._fancy_indexing = self.build_fancy_indexing_structure(0)
+ self.build_scalar_indexing_structures()
+ self.build_time_columns_filtering_structure()
+ self.build_transition_columns_filtering_structure()
+ self._p_combs_structure = self.build_p_combs_structure()
+
+ def fast_init(self, node_id: str) -> None:
+ """Initializes all the necessary structures for parameters estimation of the node identified by the label
+ node_id
+
+ :param node_id: the label of the node
+ :type node_id: string
+ """
+ self.add_nodes(self._graph_struct.nodes_labels)
+ self.add_edges(self._graph_struct.edges)
+ self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id)
+ p_indxs = self._aggregated_info_about_nodes_parents[1]
+ p_vals = self._aggregated_info_about_nodes_parents[2]
+ node_states = self.get_states_number(node_id)
+ node_indx = self.get_node_indx(node_id)
+ cols_number = self._graph_struct.total_variables_number
+ self._time_scalar_indexing_structure = NetworkGraph.\
+ build_time_scalar_indexing_structure_for_a_node(node_states, p_vals)
+ self._transition_scalar_indexing_structure = NetworkGraph.\
+ build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals)
+ self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs)
+ self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number)
+ self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals)
+
+ def add_nodes(self, list_of_nodes: typing.List) -> None:
+ """Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``.
+ Sets all the properties that identify a nodes (index, positional index, cardinality)
+
+ :param list_of_nodes: the nodes to add to ``_graph``
+ :type list_of_nodes: List
+ """
+ nodes_indxs = self._graph_struct.nodes_indexes
+ nodes_vals = self._graph_struct.nodes_values
+ pos = 0
+ for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
+ self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
+ pos += 1
+
+ def has_edge(self,edge:tuple)-> bool:
+ """
+ Check if the graph contains a specific edge
+
+ Parameters:
+ edge: a tuple that rappresents the edge
+ Returns:
+ bool
+ """
+ return self._graph.has_edge(edge[0],edge[1])
+
+ def add_edges(self, list_of_edges: typing.List) -> None:
+ """Add the edges to the ``_graph`` contained in the list ``list_of_edges``.
+
+ :param list_of_edges: the list containing of tuples containing the edges
+ :type list_of_edges: List
+ """
+ self._graph.add_edges_from(list_of_edges)
+
+ def remove_node(self, node_id: str) -> None:
+ """Remove the node ``node_id`` from all the class members.
+ Initialize all the filtering/indexing structures.
+ """
+ self._graph.remove_node(node_id)
+ self._graph_struct.remove_node(node_id)
+ self.clear_indexing_filtering_structures()
+
+ def clear_indexing_filtering_structures(self) -> None:
+ """Initialize all the filtering/indexing structures.
+ """
+ self._aggregated_info_about_nodes_parents = None
+ self._time_scalar_indexing_structure = None
+ self._transition_scalar_indexing_structure = None
+ self._time_filtering = None
+ self._transition_filtering = None
+ self._p_combs_structure = None
+
+ def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple:
+ """Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
+ (parents_labels, parents_indexes, parents_cardinalities).
+
+ :param node: the label of the node
+ :type node: string
+ :return: a tuple containing all the parent set infos
+ :rtype: Tuple
+ """
+ parents = self.get_parents_by_id(node)
+ nodes = self._graph_struct.nodes_labels
+ d = {v: i for i, v in enumerate(nodes)}
+ sorted_parents = sorted(parents, key=lambda v: d[v])
+ get_node_indx = self.get_node_indx
+ p_indxes = [get_node_indx(node) for node in sorted_parents]
+ p_values = [self.get_states_number(node) for node in sorted_parents]
+ return sorted_parents, p_indxes, p_values
+
+ def remove_edges(self, list_of_edges: typing.List) -> None:
+ """Remove the edges to the graph contained in the list list_of_edges.
+
+ :param list_of_edges: The edges to remove from the graph
+ :type list_of_edges: List
+ """
+ self._graph.remove_edges_from(list_of_edges)
+
+ @staticmethod
+ def build_time_scalar_indexing_structure_for_a_node(node_states: int,
+ parents_vals: typing.List) -> np.ndarray:
+ """Builds an indexing structure for the computation of state residence times values.
+
+ :param node_states: the node cardinality
+ :type node_states: int
+ :param parents_vals: the caridinalites of the node's parents
+ :type parents_vals: List
+ :return: The time indexing structure
+ :rtype: numpy.ndArray
+ """
+ T_vector = np.array([node_states])
+ T_vector = np.append(T_vector, parents_vals)
+ T_vector = T_vector.cumprod().astype(np.int)
+ return T_vector
+
+ @staticmethod
+ def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \
+ -> np.ndarray:
+ """Builds an indexing structure for the computation of state transitions values.
+
+ :param node_states_number: the node cardinality
+ :type node_states_number: int
+ :param parents_vals: the caridinalites of the node's parents
+ :type parents_vals: List
+ :return: The transition indexing structure
+ :rtype: numpy.ndArray
+ """
+ M_vector = np.array([node_states_number,
+ node_states_number])
+ M_vector = np.append(M_vector, parents_vals)
+ M_vector = M_vector.cumprod().astype(np.int)
+ return M_vector
+
+ @staticmethod
+ def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray:
+ """
+ Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
+ in the dataset.
+ This structute will be used in the computation of the state res times.
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param p_indxs: the indexes of the node's parents
+ :type p_indxs: List
+ :return: The filtering structure for times estimation
+ :rtype: numpy.ndArray
+ """
+ return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
+
+ @staticmethod
+ def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \
+ -> np.ndarray:
+ """Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
+ in the dataset.
+ This structure will be used in the computation of the state transitions values.
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param p_indxs: the indexes of the node's parents
+ :type p_indxs: List
+ :param nodes_number: the total number of nodes in the dataset
+ :type nodes_number: int
+ :return: The filtering structure for transitions estimation
+ :rtype: numpy.ndArray
+ """
+ return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
+
+ @staticmethod
+ def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray:
+ """
+ Builds the combinatorial structure that contains the combinations of all the values contained in
+ ``parents_values``.
+
+ :param parents_values: the cardinalities of the nodes
+ :type parents_values: List
+ :return: A numpy matrix containing a grid of the combinations
+ :rtype: numpy.ndArray
+ """
+ tmp = []
+ for val in parents_values:
+ tmp.append([x for x in range(val)])
+ if len(parents_values) > 0:
+ parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
+ if len(parents_values) > 1:
+ tmp_comb = parents_comb[:, 1].copy()
+ parents_comb[:, 1] = parents_comb[:, 0].copy()
+ parents_comb[:, 0] = tmp_comb
+ else:
+ parents_comb = np.array([[]], dtype=np.int)
+ return parents_comb
+
+ def get_parents_by_id(self, node_id) -> typing.List:
+ """Returns a list of labels of the parents of the node ``node_id``
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: a List of labels of the parents
+ :rtype: List
+ """
+ return list(self._graph.predecessors(node_id))
+
+ def get_states_number(self, node_id) -> int:
+ return self._graph.nodes[node_id]['val']
+
+ def get_node_indx(self, node_id) -> int:
+ return nx.get_node_attributes(self._graph, 'indx')[node_id]
+
+ def get_positional_node_indx(self, node_id) -> int:
+ return self._graph.nodes[node_id]['pos_indx']
+
+ @property
+ def nodes(self) -> typing.List:
+ return self._graph_struct.nodes_labels
+
+ @property
+ def edges(self) -> typing.List:
+ return list(self._graph.edges)
+
+ @property
+ def nodes_indexes(self) -> np.ndarray:
+ return self._graph_struct.nodes_indexes
+
+ @property
+ def nodes_values(self) -> np.ndarray:
+ return self._graph_struct.nodes_values
+
+ @property
+ def time_scalar_indexing_strucure(self) -> np.ndarray:
+ return self._time_scalar_indexing_structure
+
+ @property
+ def time_filtering(self) -> np.ndarray:
+ return self._time_filtering
+
+ @property
+ def transition_scalar_indexing_structure(self) -> np.ndarray:
+ return self._transition_scalar_indexing_structure
+
+ @property
+ def transition_filtering(self) -> np.ndarray:
+ return self._transition_filtering
+
+ @property
+ def p_combs(self) -> np.ndarray:
+ return self._p_combs_structure
diff --git a/PyCTBN/PyCTBN/structure_graph/sample_path.py b/PyCTBN/PyCTBN/structure_graph/sample_path.py
new file mode 100644
index 0000000..80b51d9
--- /dev/null
+++ b/PyCTBN/PyCTBN/structure_graph/sample_path.py
@@ -0,0 +1,91 @@
+
+
+import numpy as np
+import pandas as pd
+
+from .structure import Structure
+from .trajectory import Trajectory
+from ..utility.abstract_importer import AbstractImporter
+
+
+
+class SamplePath(object):
+ """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
+ cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
+ contain the mentioned data.
+
+ :param importer: the Importer object which contains the imported and processed data
+ :type importer: AbstractImporter
+ :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
+ :_structure: the ``Structure`` Object that will contain all the structural infos about the net
+ :_total_variables_count: the number of variables in the net
+ """
+ def __init__(self, importer: AbstractImporter):
+ """Constructor Method
+ """
+ self._importer = importer
+ if self._importer._df_variables is None or self._importer._concatenated_samples is None:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if self._importer._df_variables.empty:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if isinstance(self._importer._concatenated_samples, pd.DataFrame):
+ if self._importer._concatenated_samples.empty:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if isinstance(self._importer._concatenated_samples, np.ndarray):
+ if self._importer._concatenated_samples.size == 0:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ self._trajectories = None
+ self._structure = None
+ self._total_variables_count = None
+
+ def build_trajectories(self) -> None:
+ """Builds the Trajectory object that will contain all the trajectories.
+ Clears all the unused dataframes in ``_importer`` Object
+ """
+ self._trajectories = \
+ Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
+ len(self._importer.sorter) + 1)
+ self._importer.clear_concatenated_frame()
+
+ def build_structure(self) -> None:
+ """
+ Builds the ``Structure`` object that aggregates all the infos about the net.
+ """
+ if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
+ raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
+
+ self._total_variables_count = len(self._importer.sorter)
+ labels = self._importer.variables.iloc[:, 0].to_list()
+ indxs = self._importer.variables.index.to_numpy()
+ vals = self._importer.variables.iloc[:, 1].to_numpy()
+ if self._importer.structure is None or self._importer.structure.empty:
+ edges = []
+ else:
+ edges = list(self._importer.structure.to_records(index=False))
+ self._structure = Structure(labels, indxs, vals, edges,
+ self._total_variables_count)
+
+ def clear_memory(self):
+ self._importer._raw_data = []
+
+ @property
+ def trajectories(self) -> Trajectory:
+ return self._trajectories
+
+ @property
+ def structure(self) -> Structure:
+ return self._structure
+
+ @property
+ def total_variables_count(self) -> int:
+ return self._total_variables_count
+
+ @property
+ def has_prior_net_structure(self) -> bool:
+ return bool(self._structure.edges)
+
+
+
+
+
+
diff --git a/PyCTBN/PyCTBN/structure_graph/set_of_cims.py b/PyCTBN/PyCTBN/structure_graph/set_of_cims.py
new file mode 100644
index 0000000..81caff5
--- /dev/null
+++ b/PyCTBN/PyCTBN/structure_graph/set_of_cims.py
@@ -0,0 +1,97 @@
+
+
+import typing
+
+import numpy as np
+
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+
+
+class SetOfCims(object):
+ """Aggregates all the CIMS of the node identified by the label _node_id.
+
+ :param node_id: the node label
+ :type node_ind: string
+ :param parents_states_number: the cardinalities of the parents
+ :type parents_states_number: List
+ :param node_states_number: the caridinality of the node
+ :type node_states_number: int
+ :param p_combs: the p_comb structure bound to this node
+ :type p_combs: numpy.ndArray
+ :_state_residence_time: matrix containing all the state residence time vectors for the node
+ :_transition_matrices: matrix containing all the transition matrices for the node
+ :_actual_cims: the cims of the node
+ """
+
+ def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
+ """Constructor Method
+ """
+ self._node_id = node_id
+ self._parents_states_number = parents_states_number
+ self._node_states_number = node_states_number
+ self._actual_cims = []
+ self._state_residence_times = None
+ self._transition_matrices = None
+ self._p_combs = p_combs
+ self.build_times_and_transitions_structures()
+
+ def build_times_and_transitions_structures(self) -> None:
+ """Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
+ """
+ if not self._parents_states_number:
+ self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float)
+ self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int)
+ else:
+ self._state_residence_times = \
+ np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float)
+ self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number,
+ self._node_states_number], dtype=np.int)
+
+ def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None:
+ """Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices.
+ Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims.
+
+ :param state_res_times: the state residence times matrix
+ :type state_res_times: numpy.ndArray
+ :param transition_matrices: the transition matrices
+ :type transition_matrices: numpy.ndArray
+ """
+ for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
+ cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
+ cim_to_add.compute_cim_coefficients()
+ self._actual_cims.append(cim_to_add)
+ self._actual_cims = np.array(self._actual_cims)
+ self._transition_matrices = None
+ self._state_residence_times = None
+
+ def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray:
+ """Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index
+ ``comb``.
+
+ :param mask_arr: the boolean mask that indicates which parent to consider
+ :type mask_arr: numpy.array
+ :param comb: the state/s of the filtered parents
+ :type comb: numpy.array
+ :return: Array of ``ConditionalIntensityMatrix`` objects
+ :rtype: numpy.array
+ """
+ if mask_arr.size <= 1:
+ return self._actual_cims
+ else:
+ flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
+ return self._actual_cims[flat_indxs]
+
+ @property
+ def actual_cims(self) -> np.ndarray:
+ return self._actual_cims
+
+ @property
+ def p_combs(self) -> np.ndarray:
+ return self._p_combs
+
+ def get_cims_number(self):
+ return len(self._actual_cims)
+
+
+
+
diff --git a/PyCTBN/PyCTBN/structure_graph/structure.py b/PyCTBN/PyCTBN/structure_graph/structure.py
new file mode 100644
index 0000000..a9d60cc
--- /dev/null
+++ b/PyCTBN/PyCTBN/structure_graph/structure.py
@@ -0,0 +1,124 @@
+
+import typing as ty
+
+import numpy as np
+
+
+class Structure(object):
+ """Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes)
+
+ :param nodes_labels_list: the symbolic names of the variables
+ :type nodes_labels_list: List
+ :param nodes_indexes_arr: the indexes of the nodes
+ :type nodes_indexes_arr: numpy.ndArray
+ :param nodes_vals_arr: the cardinalites of the nodes
+ :type nodes_vals_arr: numpy.ndArray
+ :param edges_list: the edges of the network
+ :type edges_list: List
+ :param total_variables_number: the total number of variables in the dataset
+ :type total_variables_number: int
+ """
+
+ def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray,
+ edges_list: ty.List, total_variables_number: int):
+ """Constructor Method
+ """
+ self._nodes_labels_list = nodes_labels_list
+ self._nodes_indexes_arr = nodes_indexes_arr
+ self._nodes_vals_arr = nodes_vals_arr
+ self._edges_list = edges_list
+ self._total_variables_number = total_variables_number
+
+ def remove_node(self, node_id: str) -> None:
+ """Remove the node ``node_id`` from all the class members.
+ The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset.
+ """
+ node_positional_indx = self._nodes_labels_list.index(node_id)
+ del self._nodes_labels_list[node_positional_indx]
+ self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx)
+ self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx)
+ self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id
+ and to_node != node_id)]
+
+ @property
+ def edges(self) -> ty.List:
+ return self._edges_list
+
+ @property
+ def nodes_labels(self) -> ty.List:
+ return self._nodes_labels_list
+
+ @property
+ def nodes_indexes(self) -> np.ndarray:
+ return self._nodes_indexes_arr
+
+ @property
+ def nodes_values(self) -> np.ndarray:
+ return self._nodes_vals_arr
+
+ @property
+ def total_variables_number(self) -> int:
+ return self._total_variables_number
+
+ def get_node_id(self, node_indx: int) -> str:
+ """Given the ``node_index`` returns the node label.
+
+ :param node_indx: the node index
+ :type node_indx: int
+ :return: the node label
+ :rtype: string
+ """
+ return self._nodes_labels_list[node_indx]
+
+ def clean_structure_edges(self):
+ self._edges_list = list()
+
+ def add_edge(self,edge: tuple):
+ self._edges_list.append(tuple)
+ print(self._edges_list)
+
+ def remove_edge(self,edge: tuple):
+ self._edges_list.remove(tuple)
+
+ def contains_edge(self,edge:tuple) -> bool:
+ return edge in self._edges_list
+
+ def get_node_indx(self, node_id: str) -> int:
+ """Given the ``node_index`` returns the node label.
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: the node index
+ :rtype: int
+ """
+ pos_indx = self._nodes_labels_list.index(node_id)
+ return self._nodes_indexes_arr[pos_indx]
+
+ def get_positional_node_indx(self, node_id: str) -> int:
+ return self._nodes_labels_list.index(node_id)
+
+ def get_states_number(self, node: str) -> int:
+ """Given the node label ``node`` returns the cardinality of the node.
+
+ :param node: the node label
+ :type node: string
+ :return: the node cardinality
+ :rtype: int
+ """
+ pos_indx = self._nodes_labels_list.index(node)
+ return self._nodes_vals_arr[pos_indx]
+
+ def __repr__(self):
+ return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
+ "\nEdges: \n" + str(self._edges_list)
+
+ def __eq__(self, other):
+ """Overrides the default implementation"""
+ if isinstance(other, Structure):
+ return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
+ np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
+ np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
+ self._edges_list == other._edges_list
+
+ return False
+
diff --git a/PyCTBN/PyCTBN/structure_graph/trajectory.py b/PyCTBN/PyCTBN/structure_graph/trajectory.py
new file mode 100644
index 0000000..36899b3
--- /dev/null
+++ b/PyCTBN/PyCTBN/structure_graph/trajectory.py
@@ -0,0 +1,45 @@
+
+import typing
+
+import numpy as np
+
+
+class Trajectory(object):
+ """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
+ (the time deltas) and a numpy matrix of ints (the changes of states).
+
+ :param list_of_columns: the list containing the times array and values matrix
+ :type list_of_columns: List
+ :param original_cols_number: total number of cols in the data
+ :type original_cols_number: int
+ :_actual_trajectory: the trajectory containing also the duplicated/shifted values
+ :_times: the array containing the time deltas
+ """
+
+ def __init__(self, list_of_columns: typing.List, original_cols_number: int):
+ """Constructor Method
+ """
+ self._times = list_of_columns[0]
+ self._actual_trajectory = list_of_columns[1]
+ self._original_cols_number = original_cols_number
+
+ @property
+ def trajectory(self) -> np.ndarray:
+ return self._actual_trajectory[:, :self._original_cols_number - 1]
+
+ @property
+ def complete_trajectory(self) -> np.ndarray:
+ return self._actual_trajectory
+
+ @property
+ def times(self):
+ return self._times
+
+ def size(self):
+ return self._actual_trajectory.shape[0]
+
+ def __repr__(self):
+ return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \
+ "\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__()
+
+
diff --git a/PyCTBN/PyCTBN/utility/__init__.py b/PyCTBN/PyCTBN/utility/__init__.py
new file mode 100644
index 0000000..f79749c
--- /dev/null
+++ b/PyCTBN/PyCTBN/utility/__init__.py
@@ -0,0 +1,4 @@
+from .abstract_importer import AbstractImporter
+from .cache import Cache
+from .json_importer import JsonImporter
+from .sample_importer import SampleImporter
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/utility/abstract_importer.py b/PyCTBN/PyCTBN/utility/abstract_importer.py
new file mode 100644
index 0000000..1cad352
--- /dev/null
+++ b/PyCTBN/PyCTBN/utility/abstract_importer.py
@@ -0,0 +1,164 @@
+
+import typing
+from abc import ABC, abstractmethod
+
+import numpy as np
+import pandas as pd
+
+import copy
+
+#from sklearn.utils import resample
+
+
+class AbstractImporter(ABC):
+ """Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
+
+ :param file_path: the file path, or dataset name if you import already processed data
+ :type file_path: str
+ :param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
+ :type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
+ :param variables: Dataframe containing the nodes labels and cardinalities
+ :type variables: pandas.DataFrame
+ :prior_net_structure: Dataframe containing the structure of the network (edges)
+ :type prior_net_structure: pandas.DataFrame
+ :_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
+
+ .. warning::
+ The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
+ as Pandas Dataframes with the following structure:
+ Header of _df_structure = [From_Node | To_Node]
+ Header of _df_variables = [Variable_Label | Variable_Cardinality]
+ See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
+
+ .. note::
+ See :class:``JsonImporter`` for an example implementation
+
+ """
+
+ def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
+ variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
+ """Constructor
+ """
+ self._file_path = file_path
+ self._df_samples_list = trajectory_list
+ self._concatenated_samples = []
+ self._df_variables = variables
+ self._df_structure = prior_net_structure
+ self._sorter = None
+ super().__init__()
+
+ @abstractmethod
+ def build_sorter(self, trajecory_header: object) -> typing.List:
+ """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
+ and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
+
+ :param trajecory_header: an object that will be used to define the header
+ :type trajecory_header: object
+ :return: A list containing the processed header.
+ :rtype: List
+ """
+ pass
+
+ def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
+ columns_header: typing.List, shifted_cols_header: typing.List) \
+ -> pd.DataFrame:
+ """Computes the difference between each value present in th time column.
+ Copies and shift by one position up all the values present in the remaining columns.
+
+ :param sample_frame: the traj to be processed
+ :type sample_frame: pandas.Dataframe
+ :param columns_header: the original header of sample_frame
+ :type columns_header: List
+ :param shifted_cols_header: a copy of columns_header with changed names of the contents
+ :type shifted_cols_header: List
+ :return: The processed dataframe
+ :rtype: pandas.Dataframe
+
+ .. warning::
+ the Dataframe ``sample_frame`` has to follow the column structure of this header:
+ Header of sample_frame = [Time | Variable values]
+ """
+ sample_frame = copy.deepcopy(sample_frame)
+ sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
+ shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
+ shifted_cols.columns = shifted_cols_header
+ sample_frame = sample_frame.assign(**shifted_cols)
+ sample_frame.drop(sample_frame.tail(1).index, inplace=True)
+ return sample_frame
+
+ def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
+ """Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
+ ``df_samples_list``.
+ Concatenates the result in the dataframe ``concatanated_samples``
+
+ :param df_samples_list: the datframe's list to be processed and concatenated
+ :type df_samples_list: List
+
+ .. warning::
+ The Dataframe sample_frame has to follow the column structure of this header:
+ Header of sample_frame = [Time | Variable values]
+ The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
+ .. note::
+ After the call of this method the class member ``concatanated_samples`` will contain all processed
+ and merged trajectories
+ """
+ if not self._sorter:
+ raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
+ shifted_cols_header = [s + "S" for s in self._sorter]
+ compute_row_delta = self.compute_row_delta_sigle_samples_frame
+ proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
+ for sample in df_samples_list]
+ self._concatenated_samples = pd.concat(proc_samples_list)
+
+ complete_header = self._sorter[:]
+ complete_header.insert(0,'Time')
+ complete_header.extend(shifted_cols_header)
+ self._concatenated_samples = self._concatenated_samples[complete_header]
+
+ def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
+ """Builds a List containing the the delta times numpy array, and the complete transitions matrix
+
+ :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
+ and converted
+ :type concatenated_sample: pandas.Dataframe
+ :return: the resulting list of numpy arrays
+ :rtype: List
+ """
+
+ concatenated_array = concatenated_sample.to_numpy()
+ columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
+
+ return columns_list
+
+ def clear_concatenated_frame(self) -> None:
+ """Removes all values in the dataframe concatenated_samples.
+ """
+ if isinstance(self._concatenated_samples, pd.DataFrame):
+ self._concatenated_samples = self._concatenated_samples.iloc[0:0]
+
+ @abstractmethod
+ def dataset_id(self) -> object:
+ """If the original dataset contains multiple dataset, this method returns a unique id to identify the current
+ dataset
+ """
+ pass
+
+ @property
+ def concatenated_samples(self) -> pd.DataFrame:
+ return self._concatenated_samples
+
+ @property
+ def variables(self) -> pd.DataFrame:
+ return self._df_variables
+
+ @property
+ def structure(self) -> pd.DataFrame:
+ return self._df_structure
+
+ @property
+ def sorter(self) -> typing.List:
+ return self._sorter
+
+ @property
+ def file_path(self) -> str:
+ return self._file_path
diff --git a/PyCTBN/PyCTBN/utility/cache.py b/PyCTBN/PyCTBN/utility/cache.py
new file mode 100644
index 0000000..8e0369b
--- /dev/null
+++ b/PyCTBN/PyCTBN/utility/cache.py
@@ -0,0 +1,58 @@
+
+import typing
+
+from ..structure_graph.set_of_cims import SetOfCims
+
+
+class Cache:
+ """This class acts as a cache of ``SetOfCims`` objects for a node.
+
+ :__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME
+ index is related
+ :__actual_cache: a list of setOfCims objects
+ """
+
+ def __init__(self):
+ """Constructor Method
+ """
+ self._list_of_sets_of_parents = []
+ self._actual_cache = []
+
+ def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
+ """
+ Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims``
+ related to that ``parents_comb``.
+
+ :param parents_comb: the parents related to that ``SetOfCims``
+ :type parents_comb: Set
+ :return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``.
+ None otherwise.
+ :rtype: SetOfCims
+ """
+ try:
+ #print("Cache State:", self.list_of_sets_of_indxs)
+ #print("Look For:", parents_comb)
+ result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)]
+ #print("CACHE HIT!!!!", parents_comb)
+ return result
+ except ValueError:
+ return None
+
+ def put(self, parents_comb: typing.Set, socim: SetOfCims):
+ """Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in
+ ``__list_of_sets_of_parents``.
+
+ :param parents_comb: the symbolic set index
+ :type parents_comb: Set
+ :param socim: the related SetOfCims object
+ :type socim: SetOfCims
+ """
+ #print("Putting in cache:", parents_comb)
+ self._list_of_sets_of_parents.append(parents_comb)
+ self._actual_cache.append(socim)
+
+ def clear(self):
+ """Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``.
+ """
+ del self._list_of_sets_of_parents[:]
+ del self._actual_cache[:]
\ No newline at end of file
diff --git a/PyCTBN/PyCTBN/utility/json_importer.py b/PyCTBN/PyCTBN/utility/json_importer.py
new file mode 100644
index 0000000..edff212
--- /dev/null
+++ b/PyCTBN/PyCTBN/utility/json_importer.py
@@ -0,0 +1,176 @@
+import json
+import typing
+
+import pandas as pd
+
+
+from .abstract_importer import AbstractImporter
+
+
+class JsonImporter(AbstractImporter):
+ """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
+ the data in json extension.
+
+ :param file_path: the path of the file that contains tha data to be imported
+ :type file_path: string
+ :param samples_label: the reference key for the samples in the trajectories
+ :type samples_label: string
+ :param structure_label: the reference key for the structure of the network data
+ :type structure_label: string
+ :param variables_label: the reference key for the cardinalites of the nodes data
+ :type variables_label: string
+ :param time_key: the key used to identify the timestamps in each trajectory
+ :type time_key: string
+ :param variables_key: the key used to identify the names of the variables in the net
+ :type variables_key: string
+ :_array_indx: the index of the outer JsonArray to extract the data from
+ :type _array_indx: int
+ :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
+ :_raw_data: The raw contents of the json file to import
+ :type _raw_data: List
+ """
+
+ def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
+ variables_key: str):
+ """Constructor method
+
+ .. note::
+ This constructor calls also the method ``read_json_file()``, so after the construction of the object
+ the class member ``_raw_data`` will contain the raw imported json data.
+
+ """
+ self._samples_label = samples_label
+ self._structure_label = structure_label
+ self._variables_label = variables_label
+ self._time_key = time_key
+ self._variables_key = variables_key
+ self._df_samples_list = None
+ self._array_indx = None
+ super(JsonImporter, self).__init__(file_path)
+ self._raw_data = self.read_json_file()
+
+ def import_data(self, indx: int) -> None:
+ """Implements the abstract method of :class:`AbstractImporter`.
+
+ :param indx: the index of the outer JsonArray to extract the data from
+ :type indx: int
+ """
+ self._array_indx = indx
+ self._df_samples_list = self.import_trajectories(self._raw_data)
+ self._sorter = self.build_sorter(self._df_samples_list[0])
+ self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
+ self.clear_data_frame_list()
+ self._df_structure = self.import_structure(self._raw_data)
+ self._df_variables = self.import_variables(self._raw_data)
+
+ def import_trajectories(self, raw_data: typing.List) -> typing.List:
+ """Imports the trajectories from the list of dicts ``raw_data``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: List of dataframes containing all the trajectories
+ :rtype: List
+ """
+ return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
+
+ def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
+ """Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: Dataframe containg the starting node a ending node of every arc of the network
+ :rtype: pandas.Dataframe
+ """
+ return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
+
+ def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
+ """Imports the data in ``raw_data`` at the key ``_variables_label``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: Datframe containg the variables simbolic labels and their cardinalities
+ :rtype: pandas.Dataframe
+ """
+ return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
+
+ def read_json_file(self) -> typing.List:
+ """Reads the JSON file in the path self.filePath.
+
+ :return: The contents of the json file
+ :rtype: List
+ """
+ with open(self._file_path) as f:
+ data = json.load(f)
+ return data
+
+ def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
+ """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param key: the key for the Dicts from which exctract data
+ :type key: string
+ :return: A normalized dataframe
+ :rtype: pandas.Datframe
+ """
+ return pd.DataFrame(raw_data[indx][key])
+
+ def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
+ """
+ Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param trajectories_key: the key of the trajectories objects
+ :type trajectories_key: string
+ :return: A list of daframes containg the trajectories
+ :rtype: List
+ """
+ dataframe = pd.DataFrame
+ smps = raw_data[indx][trajectories_key]
+ df_samples_list = [dataframe(sample) for sample in smps]
+ return df_samples_list
+
+ def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
+ """Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
+ """
+ columns_header = list(sample_frame.columns.values)
+ columns_header.remove(self._time_key)
+ return columns_header
+
+ def clear_data_frame_list(self) -> None:
+ """Removes all values present in the dataframes in the list ``_df_samples_list``.
+ """
+ for indx in range(len(self._df_samples_list)):
+ self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
+
+ def dataset_id(self) -> object:
+ return self._array_indx
+
+ def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
+ """Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
+ as keys for the set of CIMS of a particular node.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param cims_key: the key where the json object cims are placed
+ :type cims_key: string
+ :return: a dictionary containing the sampled CIMS for all the variables in the net
+ :rtype: Dictionary
+ """
+ cims_for_all_vars = {}
+ for var in raw_data[indx][cims_key]:
+ sampled_cims_list = []
+ cims_for_all_vars[var] = sampled_cims_list
+ for p_comb in raw_data[indx][cims_key][var]:
+ cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
+ return cims_for_all_vars
+
+
+
diff --git a/PyCTBN/PyCTBN/utility/sample_importer.py b/PyCTBN/PyCTBN/utility/sample_importer.py
new file mode 100644
index 0000000..05073c8
--- /dev/null
+++ b/PyCTBN/PyCTBN/utility/sample_importer.py
@@ -0,0 +1,65 @@
+import json
+import typing
+
+import pandas as pd
+import numpy as np
+
+from .abstract_importer import AbstractImporter
+
+
+
+class SampleImporter(AbstractImporter):
+ """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
+ the data loaded directly by using DataFrame
+
+ :param trajectory_list: the data that describes the trajectories
+ :type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+ :param variables: the data that describes the variables with name and cardinality
+ :type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+ :param prior_net_structure: the data of the real structure, if it exists
+ :type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+
+ :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
+ :_raw_data: The raw contents of the json file to import
+ :type _raw_data: List
+ """
+
+ def __init__(self,
+ trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
+ variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
+ prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None):
+
+ 'If the data are not DataFrame, it will be converted'
+ if isinstance(variables,list) or isinstance(variables,np.ndarray):
+ variables = pd.DataFrame(variables)
+ if isinstance(variables,list) or isinstance(variables,np.ndarray):
+ prior_net_structure=pd.DataFrame(prior_net_structure)
+
+ super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
+ variables= variables,
+ prior_net_structure=prior_net_structure)
+
+ def import_data(self, header_column = None):
+
+ if header_column is not None:
+ self._sorter = header_column
+ else:
+ self._sorter = self.build_sorter(self._df_samples_list[0])
+
+ samples_list= self._df_samples_list
+
+ if isinstance(samples_list, np.ndarray):
+ samples_list = samples_list.tolist()
+
+ self.compute_row_delta_in_all_samples_frames(samples_list)
+
+ def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
+ """Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list.
+ """
+ columns_header = list(sample_frame.columns.values)
+ del columns_header[0]
+ return columns_header
+
+
+ def dataset_id(self) -> object:
+ pass
\ No newline at end of file
diff --git a/PyCTBN/basic_main.py b/PyCTBN/basic_main.py
new file mode 100644
index 0000000..b1288db
--- /dev/null
+++ b/PyCTBN/basic_main.py
@@ -0,0 +1,39 @@
+import glob
+import os
+
+import sys
+sys.path.append("./PyCTBN/")
+
+import structure_graph.network_graph as ng
+import structure_graph.sample_path as sp
+import structure_graph.set_of_cims as sofc
+import estimators.parameters_estimator as pe
+import utility.json_importer as ji
+
+
+def main():
+ read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir
+ #import data
+ importer = ji.JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+ #Create a SamplePath Obj
+ s1 = sp.SamplePath(importer)
+ #Build The trajectries and the structural infos
+ s1.build_trajectories()
+ s1.build_structure()
+ #From The Structure Object build the Graph
+ g = ng.NetworkGraph(s1.structure)
+ #Select a node you want to estimate the parameters
+ node = g.nodes[1]
+ #Init the graph specifically for THIS node
+ g.fast_init(node)
+ #Use SamplePath and Grpah to create a ParametersEstimator Object
+ p1 = pe.ParametersEstimator(s1, g)
+ #Init the peEst specifically for THIS node
+ p1.fast_init(node)
+ #Compute the parameters
+ sofc1 = p1.compute_parameters_for_node(node)
+ #The est CIMS are inside the resultant SetOfCIms Obj
+ print(sofc1.actual_cims)
+
+if __name__ == "__main__":
+ main()
diff --git a/PyCTBN/build/lib/PyCTBN/__init__.py b/PyCTBN/build/lib/PyCTBN/__init__.py
new file mode 100644
index 0000000..023c0f1
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/__init__.py
@@ -0,0 +1,8 @@
+import PyCTBN.estimators
+from PyCTBN.estimators import *
+import PyCTBN.optimizers
+from PyCTBN.optimizers import *
+import PyCTBN.structure_graph
+from PyCTBN.structure_graph import *
+import PyCTBN.utility
+from PyCTBN.utility import *
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/estimators/__init__.py b/PyCTBN/build/lib/PyCTBN/estimators/__init__.py
new file mode 100644
index 0000000..112086f
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/estimators/__init__.py
@@ -0,0 +1,5 @@
+from .fam_score_calculator import FamScoreCalculator
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from .structure_constraint_based_estimator import StructureConstraintBasedEstimator
+from .structure_score_based_estimator import StructureScoreBasedEstimator
diff --git a/PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py b/PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py
new file mode 100644
index 0000000..5b0b591
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py
@@ -0,0 +1,272 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from math import log
+
+from scipy.special import loggamma
+from random import choice
+
+from ..structure_graph.set_of_cims import SetOfCims
+from ..structure_graph.network_graph import NetworkGraph
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+
+
+'''
+
+'''
+
+
+class FamScoreCalculator:
+ """
+ Has the task of calculating the FamScore of a node by using a Bayesian score function
+ """
+
+ def __init__(self):
+ #np.seterr('raise')
+ pass
+
+ # region theta
+
+ def marginal_likelihood_theta(self,
+ cims: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the FamScore value of the node identified by the label node_id
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta
+ :rtype: float
+ """
+ return np.sum(
+ [self.variable_cim_xu_marginal_likelihood_theta(cim,
+ alpha_xu,
+ alpha_xxu)
+ for cim in cims])
+
+ def variable_cim_xu_marginal_likelihood_theta(self,
+ cim: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the value of the marginal likelihood over theta given a cim
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta
+ :rtype: float
+ """
+
+ 'get cim length'
+ values = len(cim._state_residence_times)
+
+ 'compute the marginal likelihood for the current cim'
+ return np.sum([
+ self.single_cim_xu_marginal_likelihood_theta(
+ index,
+ cim,
+ alpha_xu,
+ alpha_xxu)
+ for index in range(values)])
+
+ def single_cim_xu_marginal_likelihood_theta(self,
+ index: int,
+ cim: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the marginal likelihood on q of the node when assumes a specif value
+ and a specif parents's assignment
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta when the node assumes a specif value
+ :rtype: float
+ """
+
+ values = list(range(len(cim._state_residence_times)))
+
+ 'remove the index because of the x != x^ condition in the summation '
+ values.remove(index)
+
+ 'uncomment for alpha xx not uniform'
+ #alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
+
+ return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \
+ + \
+ np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
+ cim.state_transition_matrix[index,index_x_first],
+ alpha_xxu)
+ for index_x_first in values])
+
+
+ def single_internal_cim_xxu_marginal_likelihood_theta(self,
+ M_xxu_suff_stats: float,
+ alpha_xxu: float=1):
+ """Calculate the second part of the marginal likelihood over theta formula
+
+ :param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u]
+ :type M_xxu_suff_stats: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta when the node assumes a specif value
+ :rtype: float
+ """
+ return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu)
+
+ # endregion
+
+ # region q
+
+ def marginal_likelihood_q(self,
+ cims: np.array,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the value of the marginal likelihood over q of the node identified by the label node_id
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood over q
+ :rtype: float
+ """
+
+ return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims])
+
+ def variable_cim_xu_marginal_likelihood_q(self,
+ cim: ConditionalIntensityMatrix,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the value of the marginal likelihood over q given a cim
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood over q
+ :rtype: float
+ """
+
+ 'get cim length'
+ values=len(cim._state_residence_times)
+
+ 'compute the marginal likelihood for the current cim'
+ return np.sum([
+ self.single_cim_xu_marginal_likelihood_q(
+ cim.state_transition_matrix[index, index],
+ cim._state_residence_times[index],
+ tau_xu,
+ alpha_xu)
+ for index in range(values)])
+
+
+ def single_cim_xu_marginal_likelihood_q(self,
+ M_xu_suff_stats: float,
+ T_xu_suff_stats: float,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the marginal likelihood on q of the node when assumes a specif value
+ and a specif parents's assignment
+
+ :param M_xu_suff_stats: value of the suffucient statistic M[x|u]
+ :type M_xxu_suff_stats: float
+ :param T_xu_suff_stats: value of the suffucient statistic T[x|u]
+ :type T_xu_suff_stats: float
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood of the node when assumes a specif value
+ :rtype: float
+ """
+ return (
+ loggamma(alpha_xu + M_xu_suff_stats + 1) +
+ (log(tau_xu)
+ *
+ (alpha_xu+1))
+ ) \
+ - \
+ (loggamma(alpha_xu + 1)+(
+ log(tau_xu + T_xu_suff_stats)
+ *
+ (alpha_xu + M_xu_suff_stats + 1))
+ )
+
+ # end region
+
+ def get_fam_score(self,
+ cims: np.array,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the FamScore value of the node
+
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type tau_xu: float, optional
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1
+ :type alpha_xu: float, optional
+
+
+ :return: the FamScore value of the node
+ :rtype: float
+ """
+ #print("------")
+ #print(self.marginal_likelihood_q(cims,
+ # tau_xu,
+ # alpha_xu))
+
+ #print(self.marginal_likelihood_theta(cims,
+ # alpha_xu,
+ # alpha_xxu))
+ 'calculate alpha_xxu as a uniform distribution'
+ alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1)
+
+ return self.marginal_likelihood_q(cims,
+ tau_xu,
+ alpha_xu) \
+ + \
+ self.marginal_likelihood_theta(cims,
+ alpha_xu,
+ alpha_xxu)
diff --git a/PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py
new file mode 100644
index 0000000..4754d58
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py
@@ -0,0 +1,143 @@
+import sys
+sys.path.append('../')
+import numpy as np
+
+from ..structure_graph.network_graph import NetworkGraph
+from ..structure_graph.set_of_cims import SetOfCims
+from ..structure_graph.trajectory import Trajectory
+
+
+class ParametersEstimator(object):
+ """Has the task of computing the cims of particular node given the trajectories and the net structure
+ in the graph ``_net_graph``.
+
+ :param trajectories: the trajectories
+ :type trajectories: Trajectory
+ :param net_graph: the net structure
+ :type net_graph: NetworkGraph
+ :_single_set_of_cims: the set of cims object that will hold the cims of the node
+ """
+
+ def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph):
+ """Constructor Method
+ """
+ self._trajectories = trajectories
+ self._net_graph = net_graph
+ self._single_set_of_cims = None
+
+ def fast_init(self, node_id: str) -> None:
+ """Initializes all the necessary structures for the parameters estimation for the node ``node_id``.
+
+ :param node_id: the node label
+ :type node_id: string
+ """
+ p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
+ node_states_number = self._net_graph.get_states_number(node_id)
+ self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
+
+ def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
+ """Compute the CIMS of the node identified by the label ``node_id``.
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: A SetOfCims object filled with the computed CIMS
+ :rtype: SetOfCims
+ """
+ node_indx = self._net_graph.get_node_indx(node_id)
+ state_res_times = self._single_set_of_cims._state_residence_times
+ transition_matrices = self._single_set_of_cims._transition_matrices
+ ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times,
+ self._trajectories.trajectory,
+ self._net_graph.time_filtering,
+ self._net_graph.time_scalar_indexing_strucure,
+ state_res_times)
+ ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory,
+ self._net_graph.transition_filtering,
+ self._net_graph.transition_scalar_indexing_structure,
+ transition_matrices)
+ self._single_set_of_cims.build_cims(state_res_times, transition_matrices)
+ return self._single_set_of_cims
+
+ @staticmethod
+ def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray,
+ cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray,
+ T: np.ndarray) -> None:
+ """Compute the state residence times for a node and fill the matrix ``T`` with the results
+
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param times: the times deltas vector
+ :type times: numpy.array
+ :param trajectory: the trajectory
+ :type trajectory: numpy.ndArray
+ :param cols_filter: the columns filtering structure
+ :type cols_filter: numpy.array
+ :param scalar_indexes_struct: the indexing structure
+ :type scalar_indexes_struct: numpy.array
+ :param T: the state residence times vectors
+ :type T: numpy.ndArray
+ """
+ T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1)
+ .astype(np.int), \
+ times,
+ minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1])
+
+ @staticmethod
+ def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray,
+ scalar_indexing: np.ndarray, M: np.ndarray) -> None:
+ """Compute the state residence times for a node and fill the matrices ``M`` with the results.
+
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param trajectory: the trajectory
+ :type trajectory: numpy.ndArray
+ :param cols_filter: the columns filtering structure
+ :type cols_filter: numpy.array
+ :param scalar_indexing: the indexing structure
+ :type scalar_indexing: numpy.array
+ :param M: the state transitions matrices
+ :type M: numpy.ndArray
+ """
+ diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])],
+ dtype=np.int64)
+ trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0]
+ M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int)
+ , minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2])
+ M_raveled = M.ravel()
+ M_raveled[diag_indices] = 0
+ M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
+
+ def init_sets_cims_container(self):
+ self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
+ self.net_graph.nodes_values,
+ self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
+ self.net_graph.p_combs)
+
+ def compute_parameters(self):
+ #print(self.net_graph.get_nodes())
+ #print(self.amalgamated_cims_struct.sets_of_cims)
+ #enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
+ for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
+ #print(self.net_graph.time_filtering[indx])
+ #print(self.net_graph.time_scalar_indexing_strucure[indx])
+ self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
+ self.sample_path.trajectories.trajectory,
+ self.net_graph.time_filtering[indx],
+ self.net_graph.time_scalar_indexing_strucure[indx],
+ aggr[1]._state_residence_times)
+ #print(self.net_graph.transition_filtering[indx])
+ #print(self.net_graph.transition_scalar_indexing_structure[indx])
+ self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]),
+ self.sample_path.trajectories.complete_trajectory,
+ self.net_graph.transition_filtering[indx],
+ self.net_graph.transition_scalar_indexing_structure[indx],
+ aggr[1]._transition_matrices)
+ aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices)
+
+
+
+
+
+
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py
new file mode 100644
index 0000000..7d5721e
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py
@@ -0,0 +1,238 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+import os
+from scipy.stats import chi2 as chi2_dist
+from scipy.stats import f as f_dist
+from tqdm import tqdm
+
+from ..utility.cache import Cache
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer
+
+import concurrent.futures
+
+
+
+import multiprocessing
+from multiprocessing import Pool
+
+
+class StructureConstraintBasedEstimator(StructureEstimator):
+ """
+ Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :param exp_test_alfa: the significance level for the exponential Hp test
+ :type exp_test_alfa: float
+ :param chi_test_alfa: the significance level for the chi Hp test
+ :type chi_test_alfa: float
+ :_nodes: the nodes labels
+ :_nodes_vals: the nodes cardinalities
+ :_nodes_indxs: the nodes indexes
+ :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
+ :_cache: the Cache object
+ """
+
+ def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25):
+ super().__init__(sample_path,known_edges)
+ self._exp_test_sign = exp_test_alfa
+ self._chi_test_alfa = chi_test_alfa
+ self._thumb_threshold = thumb_threshold
+ self._cache = Cache()
+
+ def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
+ tot_vars_count: int, parent_indx, child_indx) -> bool:
+ """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
+ G2 = {G1 U test_parent} (added as an additional parent of the test_child).
+ Generates all the necessary structures and datas to perform the tests.
+
+ :param test_parent: the node label of the test parent
+ :type test_parent: string
+ :param test_child: the node label of the child
+ :type test_child: string
+ :param parent_set: the common parent set
+ :type parent_set: List
+ :param child_states_numb: the cardinality of the ``test_child``
+ :type child_states_numb: int
+ :param tot_vars_count: the total number of variables in the net
+ :type tot_vars_count: int
+ :return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise
+ :rtype: bool
+ """
+ p_set = parent_set[:]
+ complete_info = parent_set[:]
+ complete_info.append(test_child)
+
+ parents = np.array(parent_set)
+ parents = np.append(parents, test_parent)
+ sorted_parents = self._nodes[np.isin(self._nodes, parents)]
+ cims_filter = sorted_parents != test_parent
+
+ p_set.insert(0, test_parent)
+ sofc2 = self._cache.find(set(p_set))
+
+ if not sofc2:
+ complete_info.append(test_parent)
+ bool_mask2 = np.isin(self._nodes, complete_info)
+ l2 = list(self._nodes[bool_mask2])
+ indxs2 = self._nodes_indxs[bool_mask2]
+ vals2 = self._nodes_vals[bool_mask2]
+ eds2 = list(itertools.product(p_set, test_child))
+ s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count)
+ g2 = NetworkGraph(s2)
+ g2.fast_init(test_child)
+ p2 = ParametersEstimator(self._sample_path.trajectories, g2)
+ p2.fast_init(test_child)
+ sofc2 = p2.compute_parameters_for_node(test_child)
+ self._cache.put(set(p_set), sofc2)
+
+ del p_set[0]
+ sofc1 = self._cache.find(set(p_set))
+ if not sofc1:
+ g2.remove_node(test_parent)
+ g2.fast_init(test_child)
+ p2 = ParametersEstimator(self._sample_path.trajectories, g2)
+ p2.fast_init(test_child)
+ sofc1 = p2.compute_parameters_for_node(test_child)
+ self._cache.put(set(p_set), sofc1)
+ thumb_value = 0.0
+ if child_states_numb > 2:
+ parent_val = self._sample_path.structure.get_states_number(test_parent)
+ bool_mask_vals = np.isin(self._nodes, parent_set)
+ parents_vals = self._nodes_vals[bool_mask_vals]
+ thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
+ for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
+ cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
+ for cim2 in cond_cims:
+ if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
+ return False
+ return True
+
+ def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
+ cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
+ """Compute the actual independence test using two cims.
+ It is performed first the exponential test and if the null hypothesis is not rejected,
+ it is performed also the chi_test.
+
+ :param child_states_numb: the cardinality of the test child
+ :type child_states_numb: int
+ :param cim1: a cim belonging to the graph without test parent
+ :type cim1: ConditionalIntensityMatrix
+ :param cim2: a cim belonging to the graph with test parent
+ :type cim2: ConditionalIntensityMatrix
+ :return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise.
+ :rtype: bool
+ """
+ M1 = cim1.state_transition_matrix
+ M2 = cim2.state_transition_matrix
+ r1s = M1.diagonal()
+ r2s = M2.diagonal()
+ C1 = cim1.cim
+ C2 = cim2.cim
+ if child_states_numb > 2:
+ if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
+ self._removable_edges_matrix[parent_indx][child_indx] = False
+ return False
+ F_stats = C2.diagonal() / C1.diagonal()
+ exp_alfa = self._exp_test_sign
+ for val in range(0, child_states_numb):
+ if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \
+ F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]):
+ return False
+ M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1)
+ M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape(
+ M2.shape[0], -1)
+ chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1)
+ Ks = np.sqrt(r1s / r2s)
+ Ls = np.sqrt(r2s / r1s)
+ for val in range(0, child_states_numb):
+ Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
+ (M1_no_diag[val] + M2_no_diag[val]))
+ if Chi > chi_2_quantile:
+ return False
+ return True
+
+ def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
+ """Compute the value to test against the thumb_threshold.
+
+ :param parent_val: test parent's variable cardinality
+ :type parent_val: int
+ :param child_val: test child's variable cardinality
+ :type child_val: int
+ :param parent_set_vals: the cardinalities of the nodes in the current sep-set
+ :type parent_set_vals: List
+ :return: the thumb value for the current independence test
+ :rtype: int
+ """
+ df = (child_val - 1) ** 2
+ df = df * parent_val
+ for v in parent_set_vals:
+ df = df * v
+ return df
+
+ def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List:
+ """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
+
+ :param var_id: the node label of the test child
+ :type var_id: string
+ """
+ optimizer_obj = ConstraintBasedOptimizer(
+ node_id = var_id,
+ structure_estimator = self,
+ tot_vars_count = tot_vars_count)
+ return optimizer_obj.optimize_structure()
+
+
+ def ctpc_algorithm(self,disable_multiprocessing:bool= False ):
+ """Compute the CTPC algorithm over the entire net.
+ """
+ ctpc_algo = self.one_iteration_of_CTPC_algorithm
+ total_vars_numb = self._sample_path.total_variables_count
+
+ n_nodes= len(self._nodes)
+
+ total_vars_numb_array = [total_vars_numb] * n_nodes
+
+ 'get the number of CPU'
+ cpu_count = multiprocessing.cpu_count()
+
+
+
+ 'Remove all the edges from the structure'
+ self._sample_path.structure.clean_structure_edges()
+
+ 'Estimate the best parents for each node'
+ #with multiprocessing.Pool(processes=cpu_count) as pool:
+ #with get_context("spawn").Pool(processes=cpu_count) as pool:
+ if disable_multiprocessing:
+ print("DISABILITATO")
+ cpu_count = 1
+ list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
+ else:
+ with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
+ list_edges_partial = executor.map(ctpc_algo,
+ self._nodes,
+ total_vars_numb_array)
+ #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
+
+ return set(itertools.chain.from_iterable(list_edges_partial))
+
+
+ def estimate_structure(self,disable_multiprocessing:bool=False):
+ return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing)
+
+
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py
new file mode 100644
index 0000000..fbf8ea9
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py
@@ -0,0 +1,187 @@
+
+import itertools
+import json
+import typing
+
+import matplotlib.pyplot as plt
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from abc import ABC
+
+import abc
+
+from ..utility.cache import Cache
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+
+
+class StructureEstimator(object):
+ """Has the task of estimating the network structure given the trajectories in ``samplepath``.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :_nodes: the nodes labels
+ :_nodes_vals: the nodes cardinalities
+ :_nodes_indxs: the nodes indexes
+ :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
+ """
+
+ def __init__(self, sample_path: SamplePath, known_edges: typing.List = None):
+ self._sample_path = sample_path
+ self._nodes = np.array(self._sample_path.structure.nodes_labels)
+ self._nodes_vals = self._sample_path.structure.nodes_values
+ self._nodes_indxs = self._sample_path.structure.nodes_indexes
+ self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
+ self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
+
+
+ def build_removable_edges_matrix(self, known_edges: typing.List):
+ """Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given:
+
+ :param known_edges: the list of nodes labels
+ :type known_edges: List
+ :return: a boolean matrix
+ :rtype: np.ndarray
+ """
+ tot_vars_count = self._sample_path.total_variables_count
+ complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
+ if known_edges:
+ for edge in known_edges:
+ i = self._sample_path.structure.get_node_indx(edge[0])
+ j = self._sample_path.structure.get_node_indx(edge[1])
+ complete_adj_matrix[i][j] = False
+ return complete_adj_matrix
+
+ @staticmethod
+ def build_complete_graph(node_ids: typing.List) -> nx.DiGraph:
+ """Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``:
+
+ :param node_ids: the list of nodes labels
+ :type node_ids: List
+ :return: a complete Digraph Object
+ :rtype: networkx.DiGraph
+ """
+ complete_graph = nx.DiGraph()
+ complete_graph.add_nodes_from(node_ids)
+ complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
+ return complete_graph
+
+
+ @staticmethod
+ def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str):
+ """Creates a list containing all possible subsets of the list ``u`` of size ``size``,
+ that do not contains a the node identified by ``parent_label``.
+
+ :param u: the list of nodes
+ :type u: List
+ :param size: the size of the subsets
+ :type size: int
+ :param parent_label: the node to exclude in the subsets generation
+ :type parent_label: string
+ :return: an Iterator Object containing a list of lists
+ :rtype: Iterator
+ """
+ list_without_test_parent = u[:]
+ list_without_test_parent.remove(parent_label)
+ return map(list, itertools.combinations(list_without_test_parent, size))
+
+ def save_results(self) -> None:
+ """Save the estimated Structure to a .json file in the path where the data are loaded from.
+ The file is named as the input dataset but the `results_` word is appended to the results file.
+ """
+ res = json_graph.node_link_data(self._complete_graph)
+ name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
+ name = name.split('.', 1)[0]
+ name += '_' + str(self._sample_path._importer.dataset_id())
+ name += '.json'
+ file_name = 'results_' + name
+ with open(file_name, 'w') as f:
+ json.dump(res, f)
+
+
+ def remove_diagonal_elements(self, matrix):
+ m = matrix.shape[0]
+ strided = np.lib.stride_tricks.as_strided
+ s0, s1 = matrix.strides
+ return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
+
+
+ @abc.abstractmethod
+ def estimate_structure(self) -> typing.List:
+ """Abstract method to estimate the structure
+
+ :return: List of estimated edges
+ :rtype: Typing.List
+ """
+ pass
+
+
+ def adjacency_matrix(self) -> np.ndarray:
+ """Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation.
+
+ :return: The adjacency matrix of the graph ``_complete_graph``
+ :rtype: numpy.ndArray
+ """
+ return nx.adj_matrix(self._complete_graph).toarray().astype(bool)
+
+ def spurious_edges(self) -> typing.List:
+ """Return the spurious edges present in the estimated structure, if a prior net structure is present in
+ ``_sample_path.structure``.
+
+ :return: A list containing the spurious edges
+ :rtype: List
+ """
+ if not self._sample_path.has_prior_net_structure:
+ raise RuntimeError("Can not compute spurious edges with no prior net structure!")
+ real_graph = nx.DiGraph()
+ real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
+ real_graph.add_edges_from(self._sample_path.structure.edges)
+ return nx.difference(real_graph, self._complete_graph).edges
+
+ def save_plot_estimated_structure_graph(self) -> None:
+ """Plot the estimated structure in a graphical model style.
+ Spurious edges are colored in red.
+ """
+ graph_to_draw = nx.DiGraph()
+ spurious_edges = self.spurious_edges()
+ non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
+ print(non_spurious_edges)
+ edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
+ graph_to_draw.add_edges_from(spurious_edges)
+ graph_to_draw.add_edges_from(non_spurious_edges)
+ pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
+ options = {
+ "node_size": 2000,
+ "node_color": "white",
+ "edgecolors": "black",
+ 'linewidths':2,
+ "with_labels":True,
+ "font_size":13,
+ 'connectionstyle': 'arc3, rad = 0.1',
+ "arrowsize": 15,
+ "arrowstyle": '<|-',
+ "width": 1,
+ "edge_color":edges_colors,
+ }
+
+ nx.draw(graph_to_draw, pos, **options)
+ ax = plt.gca()
+ ax.margins(0.20)
+ plt.axis("off")
+ name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
+ name = name.split('.', 1)[0]
+ name += '_' + str(self._sample_path._importer.dataset_id())
+ name += '.png'
+ plt.savefig(name)
+ plt.clf()
+ print("Estimated Structure Plot Saved At: ", os.path.abspath(name))
+
+
+
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py
new file mode 100644
index 0000000..2903db3
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py
@@ -0,0 +1,244 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from random import choice
+
+import concurrent.futures
+
+import copy
+
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+from .fam_score_calculator import FamScoreCalculator
+from ..optimizers.hill_climbing_search import HillClimbing
+from ..optimizers.tabu_search import TabuSearch
+
+
+import multiprocessing
+from multiprocessing import Pool
+
+
+
+
+class StructureScoreBasedEstimator(StructureEstimator):
+ """
+ Has the task of estimating the network structure given the trajectories in samplepath by
+ using a score based approach.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type tau_xu: float, optional
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1
+ :type alpha_xu: float, optional
+ :param known_edges: List of known edges, default to []
+ :type known_edges: List, optional
+
+ """
+
+ def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []):
+ super().__init__(sample_path,known_edges)
+ self.tau_xu=tau_xu
+ self.alpha_xu=alpha_xu
+
+
+ def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
+ patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None,
+ optimizer: str = 'tabu',disable_multiprocessing:bool= False ):
+ """
+ Compute the score-based algorithm to find the optimal structure
+
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+ :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
+ :type optimizer: string, optional
+ :param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False
+ :type disable_multiprocessing: Boolean, optional
+ """
+ 'Save the true edges structure in tuples'
+ true_edges = copy.deepcopy(self._sample_path.structure.edges)
+ true_edges = set(map(tuple, true_edges))
+
+ 'Remove all the edges from the structure'
+ self._sample_path.structure.clean_structure_edges()
+
+ estimate_parents = self.estimate_parents
+
+ n_nodes= len(self._nodes)
+
+ l_max_parents= [max_parents] * n_nodes
+ l_iterations_number = [iterations_number] * n_nodes
+ l_patience = [patience] * n_nodes
+ l_tabu_length = [tabu_length] * n_nodes
+ l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
+ l_optimizer = [optimizer] * n_nodes
+
+
+ 'get the number of CPU'
+ cpu_count = multiprocessing.cpu_count()
+ print(f"CPU COUNT: {cpu_count}")
+
+ if disable_multiprocessing:
+ cpu_count = 1
+
+
+
+
+
+ #with get_context("spawn").Pool(processes=cpu_count) as pool:
+ #with multiprocessing.Pool(processes=cpu_count) as pool:
+
+ 'Estimate the best parents for each node'
+ if disable_multiprocessing:
+ list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes]
+ else:
+ with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
+ list_edges_partial = executor.map(estimate_parents,
+ self._nodes,
+ l_max_parents,
+ l_iterations_number,
+ l_patience,
+ l_tabu_length,
+ l_tabu_rules_duration,
+ l_optimizer)
+
+
+
+ #list_edges_partial = p.map(estimate_parents, self._nodes)
+ #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
+
+ 'Concatenate all the edges list'
+ set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
+
+ #print('-------------------------')
+
+
+ 'calculate precision and recall'
+ n_missing_edges = 0
+ n_added_fake_edges = 0
+
+ try:
+ n_added_fake_edges = len(set_list_edges.difference(true_edges))
+
+ n_missing_edges = len(true_edges.difference(set_list_edges))
+
+ n_true_positive = len(true_edges) - n_missing_edges
+
+ precision = n_true_positive / (n_true_positive + n_added_fake_edges)
+
+ recall = n_true_positive / (n_true_positive + n_missing_edges)
+
+
+ # print(f"n archi reali non trovati: {n_missing_edges}")
+ # print(f"n archi non reali aggiunti: {n_added_fake_edges}")
+ print(true_edges)
+ print(set_list_edges)
+ print(f"precision: {precision} ")
+ print(f"recall: {recall} ")
+ except Exception as e:
+ print(f"errore: {e}")
+
+ return set_list_edges
+
+
+ def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
+ patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5,
+ optimizer:str = 'hill' ):
+ """
+ Use the FamScore of a node in order to find the best parent nodes
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+ :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
+ :type optimizer: string, optional
+
+ :return: A list of the best edges for the currente node
+ :rtype: List
+ """
+
+ "choose the optimizer algotithm"
+ if optimizer == 'tabu':
+ optimizer = TabuSearch(
+ node_id = node_id,
+ structure_estimator = self,
+ max_parents = max_parents,
+ iterations_number = iterations_number,
+ patience = patience,
+ tabu_length = tabu_length,
+ tabu_rules_duration = tabu_rules_duration)
+ else: #if optimizer == 'hill':
+ optimizer = HillClimbing(
+ node_id = node_id,
+ structure_estimator = self,
+ max_parents = max_parents,
+ iterations_number = iterations_number,
+ patience = patience)
+
+ "call the optmizer's function that calculates the current node's parents"
+ return optimizer.optimize_structure()
+
+
+ def get_score_from_graph(self,
+ graph: NetworkGraph,
+ node_id:str):
+ """
+ Get the FamScore of a node
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param graph: current graph to be computed
+ :type graph: class:'NetworkGraph'
+
+
+ :return: The FamSCore for this graph structure
+ :rtype: float
+ """
+
+ 'inizialize the graph for a single node'
+ graph.fast_init(node_id)
+
+ params_estimation = ParametersEstimator(self._sample_path.trajectories, graph)
+
+ 'Inizialize and compute parameters for node'
+ params_estimation.fast_init(node_id)
+ SoCims = params_estimation.compute_parameters_for_node(node_id)
+
+ 'calculate the FamScore for the node'
+ fam_score_obj = FamScoreCalculator()
+
+ score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu)
+
+ #print(f" lo score per {node_id} risulta: {score} ")
+ return score
+
+
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/__init__.py b/PyCTBN/build/lib/PyCTBN/optimizers/__init__.py
new file mode 100644
index 0000000..4162bf1
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/optimizers/__init__.py
@@ -0,0 +1,4 @@
+from .optimizer import Optimizer
+from .tabu_search import TabuSearch
+from .hill_climbing_search import HillClimbing
+from .constraint_based_optimizer import ConstraintBasedOptimizer
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py b/PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py
new file mode 100644
index 0000000..65bc19c
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py
@@ -0,0 +1,87 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice
+
+from abc import ABC
+
+import copy
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+
+class ConstraintBasedOptimizer(Optimizer):
+ """
+ Optimizer class that implement a CTPC Algorithm
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param tot_vars_count: number of variables in the dataset
+ :type tot_vars_count: int
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ tot_vars_count:int
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.tot_vars_count = tot_vars_count
+
+
+
+ def optimize_structure(self):
+ """
+ Compute Optimization process for a structure_estimator by using a CTPC Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ print("##################TESTING VAR################", self.node_id)
+
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id]
+
+ for possible_parent in other_nodes:
+ graph.add_edges([(possible_parent,self.node_id)])
+
+
+ u = other_nodes
+ #tests_parents_numb = len(u)
+ #complete_frame = self.complete_graph_frame
+ #test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
+ child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
+ b = 0
+ while b < len(u):
+ parent_indx = 0
+ while parent_indx < len(u):
+ removed = False
+ test_parent = u[parent_indx]
+ i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent)
+ j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id)
+ if self.structure_estimator._removable_edges_matrix[i][j]:
+ S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
+ for parents_set in S:
+ if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j):
+ graph.remove_edges([(test_parent, self.node_id)])
+ u.remove(test_parent)
+ removed = True
+ break
+ if not removed:
+ parent_indx += 1
+ b += 1
+ self.structure_estimator._cache.clear()
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py b/PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py
new file mode 100644
index 0000000..6783be0
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py
@@ -0,0 +1,135 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice
+
+from abc import ABC
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+
+class HillClimbing(Optimizer):
+ """
+ Optimizer class that implement Hill Climbing Search
+
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+
+
+
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ max_parents:int = None,
+ iterations_number:int= 40,
+ patience:int = None
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.max_parents = max_parents
+ self.iterations_number = iterations_number
+ self.patience = patience
+
+
+
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+
+ #'Create the graph for the single node'
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ 'get the index for the current node'
+ node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
+
+ 'list of prior edges'
+ prior_parents = set()
+
+ 'Add the edges from prior knowledge'
+ for i in range(len(self.structure_estimator._removable_edges_matrix)):
+ if not self.structure_estimator._removable_edges_matrix[i][node_index]:
+ parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
+ prior_parents.add(parent_id)
+
+ 'Add the node to the starting structure'
+ graph.add_edges([(parent_id, self.node_id)])
+
+
+
+ 'get all the possible parents'
+ other_nodes = [node for node in
+ self.structure_estimator._sample_path.structure.nodes_labels if
+ node != self.node_id and
+ not prior_parents.__contains__(node)]
+
+ actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+ patince_count = 0
+ for i in range(self.iterations_number):
+ 'choose a new random edge'
+ current_new_parent = choice(other_nodes)
+ current_edge = (current_new_parent,self.node_id)
+ added = False
+ parent_removed = None
+
+
+ if graph.has_edge(current_edge):
+ graph.remove_edges([current_edge])
+ else:
+ 'check the max_parents constraint'
+ if self.max_parents is not None:
+ parents_list = graph.get_parents_by_id(self.node_id)
+ if len(parents_list) >= self.max_parents :
+ parent_removed = (choice(parents_list), self.node_id)
+ graph.remove_edges([parent_removed])
+ graph.add_edges([current_edge])
+ added = True
+ #print('**************************')
+ current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ if current_score > actual_best_score:
+ 'update current best score'
+ actual_best_score = current_score
+ patince_count = 0
+ else:
+ 'undo the last update'
+ if added:
+ graph.remove_edges([current_edge])
+ 'If a parent was removed, add it again to the graph'
+ if parent_removed is not None:
+ graph.add_edges([parent_removed])
+ else:
+ graph.add_edges([current_edge])
+ 'update patience count'
+ patince_count += 1
+
+ if self.patience is not None and patince_count > self.patience:
+ break
+
+ print(f"finito variabile: {self.node_id}")
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py b/PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py
new file mode 100644
index 0000000..36445c0
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py
@@ -0,0 +1,39 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+import abc
+
+from ..estimators.structure_estimator import StructureEstimator
+
+
+
+class Optimizer(abc.ABC):
+ """
+ Interface class for all the optimizer's child PyCTBN
+
+ :param node_id: the node label
+ :type node_id: string
+ :param structure_estimator: A structureEstimator Object to predict the structure
+ :type structure_estimator: class:'StructureEstimator'
+
+ """
+
+ def __init__(self, node_id:str, structure_estimator: StructureEstimator):
+ self.node_id = node_id
+ self.structure_estimator = structure_estimator
+
+
+ @abc.abstractmethod
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ pass
diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py b/PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py
new file mode 100644
index 0000000..e15dd40
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py
@@ -0,0 +1,199 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice,sample
+
+from abc import ABC
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+import queue
+
+
+class TabuSearch(Optimizer):
+ """
+ Optimizer class that implement Tabu Search
+
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+
+
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ max_parents:int = None,
+ iterations_number:int= 40,
+ patience:int = None,
+ tabu_length:int = None,
+ tabu_rules_duration = None
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.max_parents = max_parents
+ self.iterations_number = iterations_number
+ self.patience = patience
+ self.tabu_length = tabu_length
+ self.tabu_rules_duration = tabu_rules_duration
+
+
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ print(f"tabu search is processing the structure of {self.node_id}")
+
+ 'Create the graph for the single node'
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ 'get the index for the current node'
+ node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
+
+ 'list of prior edges'
+ prior_parents = set()
+
+ 'Add the edges from prior knowledge'
+ for i in range(len(self.structure_estimator._removable_edges_matrix)):
+ if not self.structure_estimator._removable_edges_matrix[i][node_index]:
+ parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
+ prior_parents.add(parent_id)
+
+ 'Add the node to the starting structure'
+ graph.add_edges([(parent_id, self.node_id)])
+
+
+
+ 'get all the possible parents'
+ other_nodes = set([node for node in
+ self.structure_estimator._sample_path.structure.nodes_labels if
+ node != self.node_id and
+ not prior_parents.__contains__(node)])
+
+ 'calculate the score for the node without parents'
+ actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ 'initialize tabu_length and tabu_rules_duration if None'
+ if self.tabu_length is None:
+ self.tabu_length = len(other_nodes)
+
+ if self.tabu_rules_duration is None:
+ self.tabu_tabu_rules_durationength = len(other_nodes)
+
+ 'inizialize the data structures'
+ tabu_set = set()
+ tabu_queue = queue.Queue()
+
+ patince_count = 0
+ tabu_count = 0
+ for i in range(self.iterations_number):
+
+ current_possible_nodes = other_nodes.difference(tabu_set)
+
+ 'choose a new random edge according to tabu restiction'
+ if(len(current_possible_nodes) > 0):
+ current_new_parent = sample(current_possible_nodes,k=1)[0]
+ else:
+ current_new_parent = tabu_queue.get()
+ tabu_set.remove(current_new_parent)
+
+
+
+ current_edge = (current_new_parent,self.node_id)
+ added = False
+ parent_removed = None
+
+ if graph.has_edge(current_edge):
+ graph.remove_edges([current_edge])
+ else:
+ 'check the max_parents constraint'
+ if self.max_parents is not None:
+ parents_list = graph.get_parents_by_id(self.node_id)
+ if len(parents_list) >= self.max_parents :
+ parent_removed = (choice(parents_list), self.node_id)
+ graph.remove_edges([parent_removed])
+ graph.add_edges([current_edge])
+ added = True
+ #print('**************************')
+ current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ # print("-------------------------------------------")
+ # print(f"Current new parent: {current_new_parent}")
+ # print(f"Current score: {current_score}")
+ # print(f"Current best score: {actual_best_score}")
+ # print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
+ # print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
+ # print(f"graph edges: {graph.edges}")
+
+ # print("-------------------------------------------")
+ # input()
+ if current_score > actual_best_score:
+ 'update current best score'
+ actual_best_score = current_score
+ patince_count = 0
+ 'update tabu list'
+
+
+ else:
+ 'undo the last update'
+ if added:
+ graph.remove_edges([current_edge])
+ 'If a parent was removed, add it again to the graph'
+ if parent_removed is not None:
+ graph.add_edges([parent_removed])
+ else:
+ graph.add_edges([current_edge])
+ 'update patience count'
+ patince_count += 1
+
+
+ if tabu_queue.qsize() >= self.tabu_length:
+ current_removed = tabu_queue.get()
+ tabu_set.remove(current_removed)
+ 'Add the node on the tabu list'
+ tabu_queue.put(current_new_parent)
+ tabu_set.add(current_new_parent)
+
+ tabu_count += 1
+
+ 'Every tabu_rules_duration step remove an item from the tabu list '
+ if tabu_count % self.tabu_rules_duration == 0:
+ if tabu_queue.qsize() > 0:
+ current_removed = tabu_queue.get()
+ tabu_set.remove(current_removed)
+ tabu_count = 0
+ else:
+ tabu_count = 0
+
+ if self.patience is not None and patince_count > self.patience:
+ break
+
+ print(f"finito variabile: {self.node_id}")
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py b/PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py
new file mode 100644
index 0000000..85f18a2
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py
@@ -0,0 +1,6 @@
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+from .network_graph import NetworkGraph
+from .sample_path import SamplePath
+from .set_of_cims import SetOfCims
+from .structure import Structure
+from .trajectory import Trajectory
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py b/PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py
new file mode 100644
index 0000000..4abfdd0
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+
+class ConditionalIntensityMatrix(object):
+ """Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector
+ and state transition matrix and the actual CIM matrix.
+
+ :param state_residence_times: state residence times vector
+ :type state_residence_times: numpy.array
+ :param state_transition_matrix: the transitions count matrix
+ :type state_transition_matrix: numpy.ndArray
+ :_cim: the actual cim of the node
+ """
+ def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
+ """Constructor Method
+ """
+ self._state_residence_times = state_residence_times
+ self._state_transition_matrix = state_transition_matrix
+ self._cim = self.state_transition_matrix.astype(np.float64)
+
+ def compute_cim_coefficients(self) -> None:
+ """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].
+ The class member ``_cim`` will contain the computed cim
+ """
+ np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
+ self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T
+
+ @property
+ def state_residence_times(self) -> np.ndarray:
+ return self._state_residence_times
+
+ @property
+ def state_transition_matrix(self) -> np.ndarray:
+ return self._state_transition_matrix
+
+ @property
+ def cim(self) -> np.ndarray:
+ return self._cim
+
+ def __repr__(self):
+ return 'CIM:\n' + str(self.cim)
+
diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py b/PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py
new file mode 100644
index 0000000..623981d
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py
@@ -0,0 +1,293 @@
+
+import typing
+
+import networkx as nx
+import numpy as np
+
+from .structure import Structure
+
+
+class NetworkGraph(object):
+ """Abstracts the infos contained in the Structure class in the form of a directed graph.
+ Has the task of creating all the necessary filtering and indexing structures for parameters estimation
+
+ :param graph_struct: the ``Structure`` object from which infos about the net will be extracted
+ :type graph_struct: Structure
+ :_graph: directed graph
+ :_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos
+ about every parents of the node of which all the indexing and filtering structures will be constructed.
+ :_time_scalar_indexing_structure: the indexing structure for state res time estimation
+ :_transition_scalar_indexing_structure: the indexing structure for transition computation
+ :_time_filtering: the columns filtering structure used in the computation of the state res times
+ :_transition_filtering: the columns filtering structure used in the computation of the transition
+ from one state to another
+ :_p_combs_structure: all the possible parents states combination for the node of interest
+ """
+
+ def __init__(self, graph_struct: Structure):
+ """Constructor Method
+ """
+ self._graph_struct = graph_struct
+ self._graph = nx.DiGraph()
+ self._aggregated_info_about_nodes_parents = None
+ self._time_scalar_indexing_structure = None
+ self._transition_scalar_indexing_structure = None
+ self._time_filtering = None
+ self._transition_filtering = None
+ self._p_combs_structure = None
+
+ def init_graph(self):
+ self.add_nodes(self._nodes_labels)
+ self.add_edges(self.graph_struct.edges)
+ self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
+ self._fancy_indexing = self.build_fancy_indexing_structure(0)
+ self.build_scalar_indexing_structures()
+ self.build_time_columns_filtering_structure()
+ self.build_transition_columns_filtering_structure()
+ self._p_combs_structure = self.build_p_combs_structure()
+
+ def fast_init(self, node_id: str) -> None:
+ """Initializes all the necessary structures for parameters estimation of the node identified by the label
+ node_id
+
+ :param node_id: the label of the node
+ :type node_id: string
+ """
+ self.add_nodes(self._graph_struct.nodes_labels)
+ self.add_edges(self._graph_struct.edges)
+ self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id)
+ p_indxs = self._aggregated_info_about_nodes_parents[1]
+ p_vals = self._aggregated_info_about_nodes_parents[2]
+ node_states = self.get_states_number(node_id)
+ node_indx = self.get_node_indx(node_id)
+ cols_number = self._graph_struct.total_variables_number
+ self._time_scalar_indexing_structure = NetworkGraph.\
+ build_time_scalar_indexing_structure_for_a_node(node_states, p_vals)
+ self._transition_scalar_indexing_structure = NetworkGraph.\
+ build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals)
+ self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs)
+ self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number)
+ self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals)
+
+ def add_nodes(self, list_of_nodes: typing.List) -> None:
+ """Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``.
+ Sets all the properties that identify a nodes (index, positional index, cardinality)
+
+ :param list_of_nodes: the nodes to add to ``_graph``
+ :type list_of_nodes: List
+ """
+ nodes_indxs = self._graph_struct.nodes_indexes
+ nodes_vals = self._graph_struct.nodes_values
+ pos = 0
+ for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
+ self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
+ pos += 1
+
+ def has_edge(self,edge:tuple)-> bool:
+ """
+ Check if the graph contains a specific edge
+
+ Parameters:
+ edge: a tuple that rappresents the edge
+ Returns:
+ bool
+ """
+ return self._graph.has_edge(edge[0],edge[1])
+
+ def add_edges(self, list_of_edges: typing.List) -> None:
+ """Add the edges to the ``_graph`` contained in the list ``list_of_edges``.
+
+ :param list_of_edges: the list containing of tuples containing the edges
+ :type list_of_edges: List
+ """
+ self._graph.add_edges_from(list_of_edges)
+
+ def remove_node(self, node_id: str) -> None:
+ """Remove the node ``node_id`` from all the class members.
+ Initialize all the filtering/indexing structures.
+ """
+ self._graph.remove_node(node_id)
+ self._graph_struct.remove_node(node_id)
+ self.clear_indexing_filtering_structures()
+
+ def clear_indexing_filtering_structures(self) -> None:
+ """Initialize all the filtering/indexing structures.
+ """
+ self._aggregated_info_about_nodes_parents = None
+ self._time_scalar_indexing_structure = None
+ self._transition_scalar_indexing_structure = None
+ self._time_filtering = None
+ self._transition_filtering = None
+ self._p_combs_structure = None
+
+ def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple:
+ """Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
+ (parents_labels, parents_indexes, parents_cardinalities).
+
+ :param node: the label of the node
+ :type node: string
+ :return: a tuple containing all the parent set infos
+ :rtype: Tuple
+ """
+ parents = self.get_parents_by_id(node)
+ nodes = self._graph_struct.nodes_labels
+ d = {v: i for i, v in enumerate(nodes)}
+ sorted_parents = sorted(parents, key=lambda v: d[v])
+ get_node_indx = self.get_node_indx
+ p_indxes = [get_node_indx(node) for node in sorted_parents]
+ p_values = [self.get_states_number(node) for node in sorted_parents]
+ return sorted_parents, p_indxes, p_values
+
+ def remove_edges(self, list_of_edges: typing.List) -> None:
+ """Remove the edges to the graph contained in the list list_of_edges.
+
+ :param list_of_edges: The edges to remove from the graph
+ :type list_of_edges: List
+ """
+ self._graph.remove_edges_from(list_of_edges)
+
+ @staticmethod
+ def build_time_scalar_indexing_structure_for_a_node(node_states: int,
+ parents_vals: typing.List) -> np.ndarray:
+ """Builds an indexing structure for the computation of state residence times values.
+
+ :param node_states: the node cardinality
+ :type node_states: int
+ :param parents_vals: the caridinalites of the node's parents
+ :type parents_vals: List
+ :return: The time indexing structure
+ :rtype: numpy.ndArray
+ """
+ T_vector = np.array([node_states])
+ T_vector = np.append(T_vector, parents_vals)
+ T_vector = T_vector.cumprod().astype(np.int)
+ return T_vector
+
+ @staticmethod
+ def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \
+ -> np.ndarray:
+ """Builds an indexing structure for the computation of state transitions values.
+
+ :param node_states_number: the node cardinality
+ :type node_states_number: int
+ :param parents_vals: the caridinalites of the node's parents
+ :type parents_vals: List
+ :return: The transition indexing structure
+ :rtype: numpy.ndArray
+ """
+ M_vector = np.array([node_states_number,
+ node_states_number])
+ M_vector = np.append(M_vector, parents_vals)
+ M_vector = M_vector.cumprod().astype(np.int)
+ return M_vector
+
+ @staticmethod
+ def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray:
+ """
+ Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
+ in the dataset.
+ This structute will be used in the computation of the state res times.
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param p_indxs: the indexes of the node's parents
+ :type p_indxs: List
+ :return: The filtering structure for times estimation
+ :rtype: numpy.ndArray
+ """
+ return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
+
+ @staticmethod
+ def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \
+ -> np.ndarray:
+ """Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
+ in the dataset.
+ This structure will be used in the computation of the state transitions values.
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param p_indxs: the indexes of the node's parents
+ :type p_indxs: List
+ :param nodes_number: the total number of nodes in the dataset
+ :type nodes_number: int
+ :return: The filtering structure for transitions estimation
+ :rtype: numpy.ndArray
+ """
+ return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
+
+ @staticmethod
+ def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray:
+ """
+ Builds the combinatorial structure that contains the combinations of all the values contained in
+ ``parents_values``.
+
+ :param parents_values: the cardinalities of the nodes
+ :type parents_values: List
+ :return: A numpy matrix containing a grid of the combinations
+ :rtype: numpy.ndArray
+ """
+ tmp = []
+ for val in parents_values:
+ tmp.append([x for x in range(val)])
+ if len(parents_values) > 0:
+ parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
+ if len(parents_values) > 1:
+ tmp_comb = parents_comb[:, 1].copy()
+ parents_comb[:, 1] = parents_comb[:, 0].copy()
+ parents_comb[:, 0] = tmp_comb
+ else:
+ parents_comb = np.array([[]], dtype=np.int)
+ return parents_comb
+
+ def get_parents_by_id(self, node_id) -> typing.List:
+ """Returns a list of labels of the parents of the node ``node_id``
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: a List of labels of the parents
+ :rtype: List
+ """
+ return list(self._graph.predecessors(node_id))
+
+ def get_states_number(self, node_id) -> int:
+ return self._graph.nodes[node_id]['val']
+
+ def get_node_indx(self, node_id) -> int:
+ return nx.get_node_attributes(self._graph, 'indx')[node_id]
+
+ def get_positional_node_indx(self, node_id) -> int:
+ return self._graph.nodes[node_id]['pos_indx']
+
+ @property
+ def nodes(self) -> typing.List:
+ return self._graph_struct.nodes_labels
+
+ @property
+ def edges(self) -> typing.List:
+ return list(self._graph.edges)
+
+ @property
+ def nodes_indexes(self) -> np.ndarray:
+ return self._graph_struct.nodes_indexes
+
+ @property
+ def nodes_values(self) -> np.ndarray:
+ return self._graph_struct.nodes_values
+
+ @property
+ def time_scalar_indexing_strucure(self) -> np.ndarray:
+ return self._time_scalar_indexing_structure
+
+ @property
+ def time_filtering(self) -> np.ndarray:
+ return self._time_filtering
+
+ @property
+ def transition_scalar_indexing_structure(self) -> np.ndarray:
+ return self._transition_scalar_indexing_structure
+
+ @property
+ def transition_filtering(self) -> np.ndarray:
+ return self._transition_filtering
+
+ @property
+ def p_combs(self) -> np.ndarray:
+ return self._p_combs_structure
diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py b/PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py
new file mode 100644
index 0000000..80b51d9
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py
@@ -0,0 +1,91 @@
+
+
+import numpy as np
+import pandas as pd
+
+from .structure import Structure
+from .trajectory import Trajectory
+from ..utility.abstract_importer import AbstractImporter
+
+
+
+class SamplePath(object):
+ """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
+ cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
+ contain the mentioned data.
+
+ :param importer: the Importer object which contains the imported and processed data
+ :type importer: AbstractImporter
+ :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
+ :_structure: the ``Structure`` Object that will contain all the structural infos about the net
+ :_total_variables_count: the number of variables in the net
+ """
+ def __init__(self, importer: AbstractImporter):
+ """Constructor Method
+ """
+ self._importer = importer
+ if self._importer._df_variables is None or self._importer._concatenated_samples is None:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if self._importer._df_variables.empty:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if isinstance(self._importer._concatenated_samples, pd.DataFrame):
+ if self._importer._concatenated_samples.empty:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if isinstance(self._importer._concatenated_samples, np.ndarray):
+ if self._importer._concatenated_samples.size == 0:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ self._trajectories = None
+ self._structure = None
+ self._total_variables_count = None
+
+ def build_trajectories(self) -> None:
+ """Builds the Trajectory object that will contain all the trajectories.
+ Clears all the unused dataframes in ``_importer`` Object
+ """
+ self._trajectories = \
+ Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
+ len(self._importer.sorter) + 1)
+ self._importer.clear_concatenated_frame()
+
+ def build_structure(self) -> None:
+ """
+ Builds the ``Structure`` object that aggregates all the infos about the net.
+ """
+ if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
+ raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
+
+ self._total_variables_count = len(self._importer.sorter)
+ labels = self._importer.variables.iloc[:, 0].to_list()
+ indxs = self._importer.variables.index.to_numpy()
+ vals = self._importer.variables.iloc[:, 1].to_numpy()
+ if self._importer.structure is None or self._importer.structure.empty:
+ edges = []
+ else:
+ edges = list(self._importer.structure.to_records(index=False))
+ self._structure = Structure(labels, indxs, vals, edges,
+ self._total_variables_count)
+
+ def clear_memory(self):
+ self._importer._raw_data = []
+
+ @property
+ def trajectories(self) -> Trajectory:
+ return self._trajectories
+
+ @property
+ def structure(self) -> Structure:
+ return self._structure
+
+ @property
+ def total_variables_count(self) -> int:
+ return self._total_variables_count
+
+ @property
+ def has_prior_net_structure(self) -> bool:
+ return bool(self._structure.edges)
+
+
+
+
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py b/PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py
new file mode 100644
index 0000000..81caff5
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py
@@ -0,0 +1,97 @@
+
+
+import typing
+
+import numpy as np
+
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+
+
+class SetOfCims(object):
+ """Aggregates all the CIMS of the node identified by the label _node_id.
+
+ :param node_id: the node label
+ :type node_ind: string
+ :param parents_states_number: the cardinalities of the parents
+ :type parents_states_number: List
+ :param node_states_number: the caridinality of the node
+ :type node_states_number: int
+ :param p_combs: the p_comb structure bound to this node
+ :type p_combs: numpy.ndArray
+ :_state_residence_time: matrix containing all the state residence time vectors for the node
+ :_transition_matrices: matrix containing all the transition matrices for the node
+ :_actual_cims: the cims of the node
+ """
+
+ def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
+ """Constructor Method
+ """
+ self._node_id = node_id
+ self._parents_states_number = parents_states_number
+ self._node_states_number = node_states_number
+ self._actual_cims = []
+ self._state_residence_times = None
+ self._transition_matrices = None
+ self._p_combs = p_combs
+ self.build_times_and_transitions_structures()
+
+ def build_times_and_transitions_structures(self) -> None:
+ """Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
+ """
+ if not self._parents_states_number:
+ self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float)
+ self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int)
+ else:
+ self._state_residence_times = \
+ np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float)
+ self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number,
+ self._node_states_number], dtype=np.int)
+
+ def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None:
+ """Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices.
+ Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims.
+
+ :param state_res_times: the state residence times matrix
+ :type state_res_times: numpy.ndArray
+ :param transition_matrices: the transition matrices
+ :type transition_matrices: numpy.ndArray
+ """
+ for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
+ cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
+ cim_to_add.compute_cim_coefficients()
+ self._actual_cims.append(cim_to_add)
+ self._actual_cims = np.array(self._actual_cims)
+ self._transition_matrices = None
+ self._state_residence_times = None
+
+ def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray:
+ """Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index
+ ``comb``.
+
+ :param mask_arr: the boolean mask that indicates which parent to consider
+ :type mask_arr: numpy.array
+ :param comb: the state/s of the filtered parents
+ :type comb: numpy.array
+ :return: Array of ``ConditionalIntensityMatrix`` objects
+ :rtype: numpy.array
+ """
+ if mask_arr.size <= 1:
+ return self._actual_cims
+ else:
+ flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
+ return self._actual_cims[flat_indxs]
+
+ @property
+ def actual_cims(self) -> np.ndarray:
+ return self._actual_cims
+
+ @property
+ def p_combs(self) -> np.ndarray:
+ return self._p_combs
+
+ def get_cims_number(self):
+ return len(self._actual_cims)
+
+
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/structure.py b/PyCTBN/build/lib/PyCTBN/structure_graph/structure.py
new file mode 100644
index 0000000..a9d60cc
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/structure_graph/structure.py
@@ -0,0 +1,124 @@
+
+import typing as ty
+
+import numpy as np
+
+
+class Structure(object):
+ """Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes)
+
+ :param nodes_labels_list: the symbolic names of the variables
+ :type nodes_labels_list: List
+ :param nodes_indexes_arr: the indexes of the nodes
+ :type nodes_indexes_arr: numpy.ndArray
+ :param nodes_vals_arr: the cardinalites of the nodes
+ :type nodes_vals_arr: numpy.ndArray
+ :param edges_list: the edges of the network
+ :type edges_list: List
+ :param total_variables_number: the total number of variables in the dataset
+ :type total_variables_number: int
+ """
+
+ def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray,
+ edges_list: ty.List, total_variables_number: int):
+ """Constructor Method
+ """
+ self._nodes_labels_list = nodes_labels_list
+ self._nodes_indexes_arr = nodes_indexes_arr
+ self._nodes_vals_arr = nodes_vals_arr
+ self._edges_list = edges_list
+ self._total_variables_number = total_variables_number
+
+ def remove_node(self, node_id: str) -> None:
+ """Remove the node ``node_id`` from all the class members.
+ The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset.
+ """
+ node_positional_indx = self._nodes_labels_list.index(node_id)
+ del self._nodes_labels_list[node_positional_indx]
+ self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx)
+ self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx)
+ self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id
+ and to_node != node_id)]
+
+ @property
+ def edges(self) -> ty.List:
+ return self._edges_list
+
+ @property
+ def nodes_labels(self) -> ty.List:
+ return self._nodes_labels_list
+
+ @property
+ def nodes_indexes(self) -> np.ndarray:
+ return self._nodes_indexes_arr
+
+ @property
+ def nodes_values(self) -> np.ndarray:
+ return self._nodes_vals_arr
+
+ @property
+ def total_variables_number(self) -> int:
+ return self._total_variables_number
+
+ def get_node_id(self, node_indx: int) -> str:
+ """Given the ``node_index`` returns the node label.
+
+ :param node_indx: the node index
+ :type node_indx: int
+ :return: the node label
+ :rtype: string
+ """
+ return self._nodes_labels_list[node_indx]
+
+ def clean_structure_edges(self):
+ self._edges_list = list()
+
+ def add_edge(self,edge: tuple):
+ self._edges_list.append(tuple)
+ print(self._edges_list)
+
+ def remove_edge(self,edge: tuple):
+ self._edges_list.remove(tuple)
+
+ def contains_edge(self,edge:tuple) -> bool:
+ return edge in self._edges_list
+
+ def get_node_indx(self, node_id: str) -> int:
+ """Given the ``node_index`` returns the node label.
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: the node index
+ :rtype: int
+ """
+ pos_indx = self._nodes_labels_list.index(node_id)
+ return self._nodes_indexes_arr[pos_indx]
+
+ def get_positional_node_indx(self, node_id: str) -> int:
+ return self._nodes_labels_list.index(node_id)
+
+ def get_states_number(self, node: str) -> int:
+ """Given the node label ``node`` returns the cardinality of the node.
+
+ :param node: the node label
+ :type node: string
+ :return: the node cardinality
+ :rtype: int
+ """
+ pos_indx = self._nodes_labels_list.index(node)
+ return self._nodes_vals_arr[pos_indx]
+
+ def __repr__(self):
+ return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
+ "\nEdges: \n" + str(self._edges_list)
+
+ def __eq__(self, other):
+ """Overrides the default implementation"""
+ if isinstance(other, Structure):
+ return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
+ np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
+ np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
+ self._edges_list == other._edges_list
+
+ return False
+
diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py b/PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py
new file mode 100644
index 0000000..36899b3
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py
@@ -0,0 +1,45 @@
+
+import typing
+
+import numpy as np
+
+
+class Trajectory(object):
+ """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
+ (the time deltas) and a numpy matrix of ints (the changes of states).
+
+ :param list_of_columns: the list containing the times array and values matrix
+ :type list_of_columns: List
+ :param original_cols_number: total number of cols in the data
+ :type original_cols_number: int
+ :_actual_trajectory: the trajectory containing also the duplicated/shifted values
+ :_times: the array containing the time deltas
+ """
+
+ def __init__(self, list_of_columns: typing.List, original_cols_number: int):
+ """Constructor Method
+ """
+ self._times = list_of_columns[0]
+ self._actual_trajectory = list_of_columns[1]
+ self._original_cols_number = original_cols_number
+
+ @property
+ def trajectory(self) -> np.ndarray:
+ return self._actual_trajectory[:, :self._original_cols_number - 1]
+
+ @property
+ def complete_trajectory(self) -> np.ndarray:
+ return self._actual_trajectory
+
+ @property
+ def times(self):
+ return self._times
+
+ def size(self):
+ return self._actual_trajectory.shape[0]
+
+ def __repr__(self):
+ return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \
+ "\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__()
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/utility/__init__.py b/PyCTBN/build/lib/PyCTBN/utility/__init__.py
new file mode 100644
index 0000000..f79749c
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/utility/__init__.py
@@ -0,0 +1,4 @@
+from .abstract_importer import AbstractImporter
+from .cache import Cache
+from .json_importer import JsonImporter
+from .sample_importer import SampleImporter
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py b/PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py
new file mode 100644
index 0000000..1cad352
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py
@@ -0,0 +1,164 @@
+
+import typing
+from abc import ABC, abstractmethod
+
+import numpy as np
+import pandas as pd
+
+import copy
+
+#from sklearn.utils import resample
+
+
+class AbstractImporter(ABC):
+ """Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
+
+ :param file_path: the file path, or dataset name if you import already processed data
+ :type file_path: str
+ :param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
+ :type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
+ :param variables: Dataframe containing the nodes labels and cardinalities
+ :type variables: pandas.DataFrame
+ :prior_net_structure: Dataframe containing the structure of the network (edges)
+ :type prior_net_structure: pandas.DataFrame
+ :_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
+
+ .. warning::
+ The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
+ as Pandas Dataframes with the following structure:
+ Header of _df_structure = [From_Node | To_Node]
+ Header of _df_variables = [Variable_Label | Variable_Cardinality]
+ See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
+
+ .. note::
+ See :class:``JsonImporter`` for an example implementation
+
+ """
+
+ def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
+ variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
+ """Constructor
+ """
+ self._file_path = file_path
+ self._df_samples_list = trajectory_list
+ self._concatenated_samples = []
+ self._df_variables = variables
+ self._df_structure = prior_net_structure
+ self._sorter = None
+ super().__init__()
+
+ @abstractmethod
+ def build_sorter(self, trajecory_header: object) -> typing.List:
+ """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
+ and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
+
+ :param trajecory_header: an object that will be used to define the header
+ :type trajecory_header: object
+ :return: A list containing the processed header.
+ :rtype: List
+ """
+ pass
+
+ def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
+ columns_header: typing.List, shifted_cols_header: typing.List) \
+ -> pd.DataFrame:
+ """Computes the difference between each value present in th time column.
+ Copies and shift by one position up all the values present in the remaining columns.
+
+ :param sample_frame: the traj to be processed
+ :type sample_frame: pandas.Dataframe
+ :param columns_header: the original header of sample_frame
+ :type columns_header: List
+ :param shifted_cols_header: a copy of columns_header with changed names of the contents
+ :type shifted_cols_header: List
+ :return: The processed dataframe
+ :rtype: pandas.Dataframe
+
+ .. warning::
+ the Dataframe ``sample_frame`` has to follow the column structure of this header:
+ Header of sample_frame = [Time | Variable values]
+ """
+ sample_frame = copy.deepcopy(sample_frame)
+ sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
+ shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
+ shifted_cols.columns = shifted_cols_header
+ sample_frame = sample_frame.assign(**shifted_cols)
+ sample_frame.drop(sample_frame.tail(1).index, inplace=True)
+ return sample_frame
+
+ def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
+ """Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
+ ``df_samples_list``.
+ Concatenates the result in the dataframe ``concatanated_samples``
+
+ :param df_samples_list: the datframe's list to be processed and concatenated
+ :type df_samples_list: List
+
+ .. warning::
+ The Dataframe sample_frame has to follow the column structure of this header:
+ Header of sample_frame = [Time | Variable values]
+ The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
+ .. note::
+ After the call of this method the class member ``concatanated_samples`` will contain all processed
+ and merged trajectories
+ """
+ if not self._sorter:
+ raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
+ shifted_cols_header = [s + "S" for s in self._sorter]
+ compute_row_delta = self.compute_row_delta_sigle_samples_frame
+ proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
+ for sample in df_samples_list]
+ self._concatenated_samples = pd.concat(proc_samples_list)
+
+ complete_header = self._sorter[:]
+ complete_header.insert(0,'Time')
+ complete_header.extend(shifted_cols_header)
+ self._concatenated_samples = self._concatenated_samples[complete_header]
+
+ def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
+ """Builds a List containing the the delta times numpy array, and the complete transitions matrix
+
+ :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
+ and converted
+ :type concatenated_sample: pandas.Dataframe
+ :return: the resulting list of numpy arrays
+ :rtype: List
+ """
+
+ concatenated_array = concatenated_sample.to_numpy()
+ columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
+
+ return columns_list
+
+ def clear_concatenated_frame(self) -> None:
+ """Removes all values in the dataframe concatenated_samples.
+ """
+ if isinstance(self._concatenated_samples, pd.DataFrame):
+ self._concatenated_samples = self._concatenated_samples.iloc[0:0]
+
+ @abstractmethod
+ def dataset_id(self) -> object:
+ """If the original dataset contains multiple dataset, this method returns a unique id to identify the current
+ dataset
+ """
+ pass
+
+ @property
+ def concatenated_samples(self) -> pd.DataFrame:
+ return self._concatenated_samples
+
+ @property
+ def variables(self) -> pd.DataFrame:
+ return self._df_variables
+
+ @property
+ def structure(self) -> pd.DataFrame:
+ return self._df_structure
+
+ @property
+ def sorter(self) -> typing.List:
+ return self._sorter
+
+ @property
+ def file_path(self) -> str:
+ return self._file_path
diff --git a/PyCTBN/build/lib/PyCTBN/utility/cache.py b/PyCTBN/build/lib/PyCTBN/utility/cache.py
new file mode 100644
index 0000000..8e0369b
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/utility/cache.py
@@ -0,0 +1,58 @@
+
+import typing
+
+from ..structure_graph.set_of_cims import SetOfCims
+
+
+class Cache:
+ """This class acts as a cache of ``SetOfCims`` objects for a node.
+
+ :__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME
+ index is related
+ :__actual_cache: a list of setOfCims objects
+ """
+
+ def __init__(self):
+ """Constructor Method
+ """
+ self._list_of_sets_of_parents = []
+ self._actual_cache = []
+
+ def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
+ """
+ Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims``
+ related to that ``parents_comb``.
+
+ :param parents_comb: the parents related to that ``SetOfCims``
+ :type parents_comb: Set
+ :return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``.
+ None otherwise.
+ :rtype: SetOfCims
+ """
+ try:
+ #print("Cache State:", self.list_of_sets_of_indxs)
+ #print("Look For:", parents_comb)
+ result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)]
+ #print("CACHE HIT!!!!", parents_comb)
+ return result
+ except ValueError:
+ return None
+
+ def put(self, parents_comb: typing.Set, socim: SetOfCims):
+ """Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in
+ ``__list_of_sets_of_parents``.
+
+ :param parents_comb: the symbolic set index
+ :type parents_comb: Set
+ :param socim: the related SetOfCims object
+ :type socim: SetOfCims
+ """
+ #print("Putting in cache:", parents_comb)
+ self._list_of_sets_of_parents.append(parents_comb)
+ self._actual_cache.append(socim)
+
+ def clear(self):
+ """Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``.
+ """
+ del self._list_of_sets_of_parents[:]
+ del self._actual_cache[:]
\ No newline at end of file
diff --git a/PyCTBN/build/lib/PyCTBN/utility/json_importer.py b/PyCTBN/build/lib/PyCTBN/utility/json_importer.py
new file mode 100644
index 0000000..edff212
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/utility/json_importer.py
@@ -0,0 +1,176 @@
+import json
+import typing
+
+import pandas as pd
+
+
+from .abstract_importer import AbstractImporter
+
+
+class JsonImporter(AbstractImporter):
+ """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
+ the data in json extension.
+
+ :param file_path: the path of the file that contains tha data to be imported
+ :type file_path: string
+ :param samples_label: the reference key for the samples in the trajectories
+ :type samples_label: string
+ :param structure_label: the reference key for the structure of the network data
+ :type structure_label: string
+ :param variables_label: the reference key for the cardinalites of the nodes data
+ :type variables_label: string
+ :param time_key: the key used to identify the timestamps in each trajectory
+ :type time_key: string
+ :param variables_key: the key used to identify the names of the variables in the net
+ :type variables_key: string
+ :_array_indx: the index of the outer JsonArray to extract the data from
+ :type _array_indx: int
+ :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
+ :_raw_data: The raw contents of the json file to import
+ :type _raw_data: List
+ """
+
+ def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
+ variables_key: str):
+ """Constructor method
+
+ .. note::
+ This constructor calls also the method ``read_json_file()``, so after the construction of the object
+ the class member ``_raw_data`` will contain the raw imported json data.
+
+ """
+ self._samples_label = samples_label
+ self._structure_label = structure_label
+ self._variables_label = variables_label
+ self._time_key = time_key
+ self._variables_key = variables_key
+ self._df_samples_list = None
+ self._array_indx = None
+ super(JsonImporter, self).__init__(file_path)
+ self._raw_data = self.read_json_file()
+
+ def import_data(self, indx: int) -> None:
+ """Implements the abstract method of :class:`AbstractImporter`.
+
+ :param indx: the index of the outer JsonArray to extract the data from
+ :type indx: int
+ """
+ self._array_indx = indx
+ self._df_samples_list = self.import_trajectories(self._raw_data)
+ self._sorter = self.build_sorter(self._df_samples_list[0])
+ self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
+ self.clear_data_frame_list()
+ self._df_structure = self.import_structure(self._raw_data)
+ self._df_variables = self.import_variables(self._raw_data)
+
+ def import_trajectories(self, raw_data: typing.List) -> typing.List:
+ """Imports the trajectories from the list of dicts ``raw_data``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: List of dataframes containing all the trajectories
+ :rtype: List
+ """
+ return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
+
+ def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
+ """Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: Dataframe containg the starting node a ending node of every arc of the network
+ :rtype: pandas.Dataframe
+ """
+ return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
+
+ def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
+ """Imports the data in ``raw_data`` at the key ``_variables_label``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: Datframe containg the variables simbolic labels and their cardinalities
+ :rtype: pandas.Dataframe
+ """
+ return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
+
+ def read_json_file(self) -> typing.List:
+ """Reads the JSON file in the path self.filePath.
+
+ :return: The contents of the json file
+ :rtype: List
+ """
+ with open(self._file_path) as f:
+ data = json.load(f)
+ return data
+
+ def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
+ """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param key: the key for the Dicts from which exctract data
+ :type key: string
+ :return: A normalized dataframe
+ :rtype: pandas.Datframe
+ """
+ return pd.DataFrame(raw_data[indx][key])
+
+ def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
+ """
+ Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param trajectories_key: the key of the trajectories objects
+ :type trajectories_key: string
+ :return: A list of daframes containg the trajectories
+ :rtype: List
+ """
+ dataframe = pd.DataFrame
+ smps = raw_data[indx][trajectories_key]
+ df_samples_list = [dataframe(sample) for sample in smps]
+ return df_samples_list
+
+ def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
+ """Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
+ """
+ columns_header = list(sample_frame.columns.values)
+ columns_header.remove(self._time_key)
+ return columns_header
+
+ def clear_data_frame_list(self) -> None:
+ """Removes all values present in the dataframes in the list ``_df_samples_list``.
+ """
+ for indx in range(len(self._df_samples_list)):
+ self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
+
+ def dataset_id(self) -> object:
+ return self._array_indx
+
+ def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
+ """Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
+ as keys for the set of CIMS of a particular node.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param cims_key: the key where the json object cims are placed
+ :type cims_key: string
+ :return: a dictionary containing the sampled CIMS for all the variables in the net
+ :rtype: Dictionary
+ """
+ cims_for_all_vars = {}
+ for var in raw_data[indx][cims_key]:
+ sampled_cims_list = []
+ cims_for_all_vars[var] = sampled_cims_list
+ for p_comb in raw_data[indx][cims_key][var]:
+ cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
+ return cims_for_all_vars
+
+
+
diff --git a/PyCTBN/build/lib/PyCTBN/utility/sample_importer.py b/PyCTBN/build/lib/PyCTBN/utility/sample_importer.py
new file mode 100644
index 0000000..05073c8
--- /dev/null
+++ b/PyCTBN/build/lib/PyCTBN/utility/sample_importer.py
@@ -0,0 +1,65 @@
+import json
+import typing
+
+import pandas as pd
+import numpy as np
+
+from .abstract_importer import AbstractImporter
+
+
+
+class SampleImporter(AbstractImporter):
+ """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
+ the data loaded directly by using DataFrame
+
+ :param trajectory_list: the data that describes the trajectories
+ :type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+ :param variables: the data that describes the variables with name and cardinality
+ :type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+ :param prior_net_structure: the data of the real structure, if it exists
+ :type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+
+ :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
+ :_raw_data: The raw contents of the json file to import
+ :type _raw_data: List
+ """
+
+ def __init__(self,
+ trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
+ variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
+ prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None):
+
+ 'If the data are not DataFrame, it will be converted'
+ if isinstance(variables,list) or isinstance(variables,np.ndarray):
+ variables = pd.DataFrame(variables)
+ if isinstance(variables,list) or isinstance(variables,np.ndarray):
+ prior_net_structure=pd.DataFrame(prior_net_structure)
+
+ super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
+ variables= variables,
+ prior_net_structure=prior_net_structure)
+
+ def import_data(self, header_column = None):
+
+ if header_column is not None:
+ self._sorter = header_column
+ else:
+ self._sorter = self.build_sorter(self._df_samples_list[0])
+
+ samples_list= self._df_samples_list
+
+ if isinstance(samples_list, np.ndarray):
+ samples_list = samples_list.tolist()
+
+ self.compute_row_delta_in_all_samples_frames(samples_list)
+
+ def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
+ """Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list.
+ """
+ columns_header = list(sample_frame.columns.values)
+ del columns_header[0]
+ return columns_header
+
+
+ def dataset_id(self) -> object:
+ pass
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/__init__.py b/PyCTBN/build/lib/classes/__init__.py
new file mode 100644
index 0000000..faff79c
--- /dev/null
+++ b/PyCTBN/build/lib/classes/__init__.py
@@ -0,0 +1,8 @@
+import PyCTBN.PyCTBN.estimators
+from PyCTBN.PyCTBN.estimators import *
+import PyCTBN.PyCTBN.optimizers
+from PyCTBN.PyCTBN.optimizers import *
+import PyCTBN.PyCTBN.structure_graph
+from PyCTBN.PyCTBN.structure_graph import *
+import PyCTBN.PyCTBN.utility
+from PyCTBN.PyCTBN.utility import *
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/estimators/__init__.py b/PyCTBN/build/lib/classes/estimators/__init__.py
new file mode 100644
index 0000000..112086f
--- /dev/null
+++ b/PyCTBN/build/lib/classes/estimators/__init__.py
@@ -0,0 +1,5 @@
+from .fam_score_calculator import FamScoreCalculator
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from .structure_constraint_based_estimator import StructureConstraintBasedEstimator
+from .structure_score_based_estimator import StructureScoreBasedEstimator
diff --git a/PyCTBN/build/lib/classes/estimators/fam_score_calculator.py b/PyCTBN/build/lib/classes/estimators/fam_score_calculator.py
new file mode 100644
index 0000000..5b0b591
--- /dev/null
+++ b/PyCTBN/build/lib/classes/estimators/fam_score_calculator.py
@@ -0,0 +1,272 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from math import log
+
+from scipy.special import loggamma
+from random import choice
+
+from ..structure_graph.set_of_cims import SetOfCims
+from ..structure_graph.network_graph import NetworkGraph
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+
+
+'''
+
+'''
+
+
+class FamScoreCalculator:
+ """
+ Has the task of calculating the FamScore of a node by using a Bayesian score function
+ """
+
+ def __init__(self):
+ #np.seterr('raise')
+ pass
+
+ # region theta
+
+ def marginal_likelihood_theta(self,
+ cims: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the FamScore value of the node identified by the label node_id
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta
+ :rtype: float
+ """
+ return np.sum(
+ [self.variable_cim_xu_marginal_likelihood_theta(cim,
+ alpha_xu,
+ alpha_xxu)
+ for cim in cims])
+
+ def variable_cim_xu_marginal_likelihood_theta(self,
+ cim: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the value of the marginal likelihood over theta given a cim
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta
+ :rtype: float
+ """
+
+ 'get cim length'
+ values = len(cim._state_residence_times)
+
+ 'compute the marginal likelihood for the current cim'
+ return np.sum([
+ self.single_cim_xu_marginal_likelihood_theta(
+ index,
+ cim,
+ alpha_xu,
+ alpha_xxu)
+ for index in range(values)])
+
+ def single_cim_xu_marginal_likelihood_theta(self,
+ index: int,
+ cim: ConditionalIntensityMatrix,
+ alpha_xu: float,
+ alpha_xxu: float):
+ """
+ Calculate the marginal likelihood on q of the node when assumes a specif value
+ and a specif parents's assignment
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta when the node assumes a specif value
+ :rtype: float
+ """
+
+ values = list(range(len(cim._state_residence_times)))
+
+ 'remove the index because of the x != x^ condition in the summation '
+ values.remove(index)
+
+ 'uncomment for alpha xx not uniform'
+ #alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
+
+ return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \
+ + \
+ np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
+ cim.state_transition_matrix[index,index_x_first],
+ alpha_xxu)
+ for index_x_first in values])
+
+
+ def single_internal_cim_xxu_marginal_likelihood_theta(self,
+ M_xxu_suff_stats: float,
+ alpha_xxu: float=1):
+ """Calculate the second part of the marginal likelihood over theta formula
+
+ :param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u]
+ :type M_xxu_suff_stats: float
+ :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters
+ :type alpha_xxu: float
+
+ :return: the value of the marginal likelihood over theta when the node assumes a specif value
+ :rtype: float
+ """
+ return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu)
+
+ # endregion
+
+ # region q
+
+ def marginal_likelihood_q(self,
+ cims: np.array,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the value of the marginal likelihood over q of the node identified by the label node_id
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood over q
+ :rtype: float
+ """
+
+ return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims])
+
+ def variable_cim_xu_marginal_likelihood_q(self,
+ cim: ConditionalIntensityMatrix,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the value of the marginal likelihood over q given a cim
+
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood over q
+ :rtype: float
+ """
+
+ 'get cim length'
+ values=len(cim._state_residence_times)
+
+ 'compute the marginal likelihood for the current cim'
+ return np.sum([
+ self.single_cim_xu_marginal_likelihood_q(
+ cim.state_transition_matrix[index, index],
+ cim._state_residence_times[index],
+ tau_xu,
+ alpha_xu)
+ for index in range(values)])
+
+
+ def single_cim_xu_marginal_likelihood_q(self,
+ M_xu_suff_stats: float,
+ T_xu_suff_stats: float,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the marginal likelihood on q of the node when assumes a specif value
+ and a specif parents's assignment
+
+ :param M_xu_suff_stats: value of the suffucient statistic M[x|u]
+ :type M_xxu_suff_stats: float
+ :param T_xu_suff_stats: value of the suffucient statistic T[x|u]
+ :type T_xu_suff_stats: float
+ :param cim: A conditional_intensity_matrix object with the sufficient statistics
+ :type cim: class:'ConditionalIntensityMatrix'
+ :param tau_xu: hyperparameter over the CTBN’s q parameters
+ :type tau_xu: float
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters
+ :type alpha_xu: float
+
+
+ :return: the value of the marginal likelihood of the node when assumes a specif value
+ :rtype: float
+ """
+ return (
+ loggamma(alpha_xu + M_xu_suff_stats + 1) +
+ (log(tau_xu)
+ *
+ (alpha_xu+1))
+ ) \
+ - \
+ (loggamma(alpha_xu + 1)+(
+ log(tau_xu + T_xu_suff_stats)
+ *
+ (alpha_xu + M_xu_suff_stats + 1))
+ )
+
+ # end region
+
+ def get_fam_score(self,
+ cims: np.array,
+ tau_xu: float=0.1,
+ alpha_xu: float=1):
+ """
+ Calculate the FamScore value of the node
+
+
+ :param cims: np.array with all the node's cims
+ :type cims: np.array
+ :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type tau_xu: float, optional
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1
+ :type alpha_xu: float, optional
+
+
+ :return: the FamScore value of the node
+ :rtype: float
+ """
+ #print("------")
+ #print(self.marginal_likelihood_q(cims,
+ # tau_xu,
+ # alpha_xu))
+
+ #print(self.marginal_likelihood_theta(cims,
+ # alpha_xu,
+ # alpha_xxu))
+ 'calculate alpha_xxu as a uniform distribution'
+ alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1)
+
+ return self.marginal_likelihood_q(cims,
+ tau_xu,
+ alpha_xu) \
+ + \
+ self.marginal_likelihood_theta(cims,
+ alpha_xu,
+ alpha_xxu)
diff --git a/PyCTBN/build/lib/classes/estimators/parameters_estimator.py b/PyCTBN/build/lib/classes/estimators/parameters_estimator.py
new file mode 100644
index 0000000..4754d58
--- /dev/null
+++ b/PyCTBN/build/lib/classes/estimators/parameters_estimator.py
@@ -0,0 +1,143 @@
+import sys
+sys.path.append('../')
+import numpy as np
+
+from ..structure_graph.network_graph import NetworkGraph
+from ..structure_graph.set_of_cims import SetOfCims
+from ..structure_graph.trajectory import Trajectory
+
+
+class ParametersEstimator(object):
+ """Has the task of computing the cims of particular node given the trajectories and the net structure
+ in the graph ``_net_graph``.
+
+ :param trajectories: the trajectories
+ :type trajectories: Trajectory
+ :param net_graph: the net structure
+ :type net_graph: NetworkGraph
+ :_single_set_of_cims: the set of cims object that will hold the cims of the node
+ """
+
+ def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph):
+ """Constructor Method
+ """
+ self._trajectories = trajectories
+ self._net_graph = net_graph
+ self._single_set_of_cims = None
+
+ def fast_init(self, node_id: str) -> None:
+ """Initializes all the necessary structures for the parameters estimation for the node ``node_id``.
+
+ :param node_id: the node label
+ :type node_id: string
+ """
+ p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
+ node_states_number = self._net_graph.get_states_number(node_id)
+ self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
+
+ def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
+ """Compute the CIMS of the node identified by the label ``node_id``.
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: A SetOfCims object filled with the computed CIMS
+ :rtype: SetOfCims
+ """
+ node_indx = self._net_graph.get_node_indx(node_id)
+ state_res_times = self._single_set_of_cims._state_residence_times
+ transition_matrices = self._single_set_of_cims._transition_matrices
+ ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times,
+ self._trajectories.trajectory,
+ self._net_graph.time_filtering,
+ self._net_graph.time_scalar_indexing_strucure,
+ state_res_times)
+ ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory,
+ self._net_graph.transition_filtering,
+ self._net_graph.transition_scalar_indexing_structure,
+ transition_matrices)
+ self._single_set_of_cims.build_cims(state_res_times, transition_matrices)
+ return self._single_set_of_cims
+
+ @staticmethod
+ def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray,
+ cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray,
+ T: np.ndarray) -> None:
+ """Compute the state residence times for a node and fill the matrix ``T`` with the results
+
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param times: the times deltas vector
+ :type times: numpy.array
+ :param trajectory: the trajectory
+ :type trajectory: numpy.ndArray
+ :param cols_filter: the columns filtering structure
+ :type cols_filter: numpy.array
+ :param scalar_indexes_struct: the indexing structure
+ :type scalar_indexes_struct: numpy.array
+ :param T: the state residence times vectors
+ :type T: numpy.ndArray
+ """
+ T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1)
+ .astype(np.int), \
+ times,
+ minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1])
+
+ @staticmethod
+ def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray,
+ scalar_indexing: np.ndarray, M: np.ndarray) -> None:
+ """Compute the state residence times for a node and fill the matrices ``M`` with the results.
+
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param trajectory: the trajectory
+ :type trajectory: numpy.ndArray
+ :param cols_filter: the columns filtering structure
+ :type cols_filter: numpy.array
+ :param scalar_indexing: the indexing structure
+ :type scalar_indexing: numpy.array
+ :param M: the state transitions matrices
+ :type M: numpy.ndArray
+ """
+ diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])],
+ dtype=np.int64)
+ trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0]
+ M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int)
+ , minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2])
+ M_raveled = M.ravel()
+ M_raveled[diag_indices] = 0
+ M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
+
+ def init_sets_cims_container(self):
+ self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
+ self.net_graph.nodes_values,
+ self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
+ self.net_graph.p_combs)
+
+ def compute_parameters(self):
+ #print(self.net_graph.get_nodes())
+ #print(self.amalgamated_cims_struct.sets_of_cims)
+ #enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
+ for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
+ #print(self.net_graph.time_filtering[indx])
+ #print(self.net_graph.time_scalar_indexing_strucure[indx])
+ self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
+ self.sample_path.trajectories.trajectory,
+ self.net_graph.time_filtering[indx],
+ self.net_graph.time_scalar_indexing_strucure[indx],
+ aggr[1]._state_residence_times)
+ #print(self.net_graph.transition_filtering[indx])
+ #print(self.net_graph.transition_scalar_indexing_structure[indx])
+ self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]),
+ self.sample_path.trajectories.complete_trajectory,
+ self.net_graph.transition_filtering[indx],
+ self.net_graph.transition_scalar_indexing_structure[indx],
+ aggr[1]._transition_matrices)
+ aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices)
+
+
+
+
+
+
+
+
diff --git a/PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py b/PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py
new file mode 100644
index 0000000..7d5721e
--- /dev/null
+++ b/PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py
@@ -0,0 +1,238 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+import os
+from scipy.stats import chi2 as chi2_dist
+from scipy.stats import f as f_dist
+from tqdm import tqdm
+
+from ..utility.cache import Cache
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer
+
+import concurrent.futures
+
+
+
+import multiprocessing
+from multiprocessing import Pool
+
+
+class StructureConstraintBasedEstimator(StructureEstimator):
+ """
+ Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :param exp_test_alfa: the significance level for the exponential Hp test
+ :type exp_test_alfa: float
+ :param chi_test_alfa: the significance level for the chi Hp test
+ :type chi_test_alfa: float
+ :_nodes: the nodes labels
+ :_nodes_vals: the nodes cardinalities
+ :_nodes_indxs: the nodes indexes
+ :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
+ :_cache: the Cache object
+ """
+
+ def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25):
+ super().__init__(sample_path,known_edges)
+ self._exp_test_sign = exp_test_alfa
+ self._chi_test_alfa = chi_test_alfa
+ self._thumb_threshold = thumb_threshold
+ self._cache = Cache()
+
+ def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
+ tot_vars_count: int, parent_indx, child_indx) -> bool:
+ """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
+ G2 = {G1 U test_parent} (added as an additional parent of the test_child).
+ Generates all the necessary structures and datas to perform the tests.
+
+ :param test_parent: the node label of the test parent
+ :type test_parent: string
+ :param test_child: the node label of the child
+ :type test_child: string
+ :param parent_set: the common parent set
+ :type parent_set: List
+ :param child_states_numb: the cardinality of the ``test_child``
+ :type child_states_numb: int
+ :param tot_vars_count: the total number of variables in the net
+ :type tot_vars_count: int
+ :return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise
+ :rtype: bool
+ """
+ p_set = parent_set[:]
+ complete_info = parent_set[:]
+ complete_info.append(test_child)
+
+ parents = np.array(parent_set)
+ parents = np.append(parents, test_parent)
+ sorted_parents = self._nodes[np.isin(self._nodes, parents)]
+ cims_filter = sorted_parents != test_parent
+
+ p_set.insert(0, test_parent)
+ sofc2 = self._cache.find(set(p_set))
+
+ if not sofc2:
+ complete_info.append(test_parent)
+ bool_mask2 = np.isin(self._nodes, complete_info)
+ l2 = list(self._nodes[bool_mask2])
+ indxs2 = self._nodes_indxs[bool_mask2]
+ vals2 = self._nodes_vals[bool_mask2]
+ eds2 = list(itertools.product(p_set, test_child))
+ s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count)
+ g2 = NetworkGraph(s2)
+ g2.fast_init(test_child)
+ p2 = ParametersEstimator(self._sample_path.trajectories, g2)
+ p2.fast_init(test_child)
+ sofc2 = p2.compute_parameters_for_node(test_child)
+ self._cache.put(set(p_set), sofc2)
+
+ del p_set[0]
+ sofc1 = self._cache.find(set(p_set))
+ if not sofc1:
+ g2.remove_node(test_parent)
+ g2.fast_init(test_child)
+ p2 = ParametersEstimator(self._sample_path.trajectories, g2)
+ p2.fast_init(test_child)
+ sofc1 = p2.compute_parameters_for_node(test_child)
+ self._cache.put(set(p_set), sofc1)
+ thumb_value = 0.0
+ if child_states_numb > 2:
+ parent_val = self._sample_path.structure.get_states_number(test_parent)
+ bool_mask_vals = np.isin(self._nodes, parent_set)
+ parents_vals = self._nodes_vals[bool_mask_vals]
+ thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
+ for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
+ cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
+ for cim2 in cond_cims:
+ if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
+ return False
+ return True
+
+ def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
+ cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
+ """Compute the actual independence test using two cims.
+ It is performed first the exponential test and if the null hypothesis is not rejected,
+ it is performed also the chi_test.
+
+ :param child_states_numb: the cardinality of the test child
+ :type child_states_numb: int
+ :param cim1: a cim belonging to the graph without test parent
+ :type cim1: ConditionalIntensityMatrix
+ :param cim2: a cim belonging to the graph with test parent
+ :type cim2: ConditionalIntensityMatrix
+ :return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise.
+ :rtype: bool
+ """
+ M1 = cim1.state_transition_matrix
+ M2 = cim2.state_transition_matrix
+ r1s = M1.diagonal()
+ r2s = M2.diagonal()
+ C1 = cim1.cim
+ C2 = cim2.cim
+ if child_states_numb > 2:
+ if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
+ self._removable_edges_matrix[parent_indx][child_indx] = False
+ return False
+ F_stats = C2.diagonal() / C1.diagonal()
+ exp_alfa = self._exp_test_sign
+ for val in range(0, child_states_numb):
+ if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \
+ F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]):
+ return False
+ M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1)
+ M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape(
+ M2.shape[0], -1)
+ chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1)
+ Ks = np.sqrt(r1s / r2s)
+ Ls = np.sqrt(r2s / r1s)
+ for val in range(0, child_states_numb):
+ Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
+ (M1_no_diag[val] + M2_no_diag[val]))
+ if Chi > chi_2_quantile:
+ return False
+ return True
+
+ def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
+ """Compute the value to test against the thumb_threshold.
+
+ :param parent_val: test parent's variable cardinality
+ :type parent_val: int
+ :param child_val: test child's variable cardinality
+ :type child_val: int
+ :param parent_set_vals: the cardinalities of the nodes in the current sep-set
+ :type parent_set_vals: List
+ :return: the thumb value for the current independence test
+ :rtype: int
+ """
+ df = (child_val - 1) ** 2
+ df = df * parent_val
+ for v in parent_set_vals:
+ df = df * v
+ return df
+
+ def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List:
+ """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
+
+ :param var_id: the node label of the test child
+ :type var_id: string
+ """
+ optimizer_obj = ConstraintBasedOptimizer(
+ node_id = var_id,
+ structure_estimator = self,
+ tot_vars_count = tot_vars_count)
+ return optimizer_obj.optimize_structure()
+
+
+ def ctpc_algorithm(self,disable_multiprocessing:bool= False ):
+ """Compute the CTPC algorithm over the entire net.
+ """
+ ctpc_algo = self.one_iteration_of_CTPC_algorithm
+ total_vars_numb = self._sample_path.total_variables_count
+
+ n_nodes= len(self._nodes)
+
+ total_vars_numb_array = [total_vars_numb] * n_nodes
+
+ 'get the number of CPU'
+ cpu_count = multiprocessing.cpu_count()
+
+
+
+ 'Remove all the edges from the structure'
+ self._sample_path.structure.clean_structure_edges()
+
+ 'Estimate the best parents for each node'
+ #with multiprocessing.Pool(processes=cpu_count) as pool:
+ #with get_context("spawn").Pool(processes=cpu_count) as pool:
+ if disable_multiprocessing:
+ print("DISABILITATO")
+ cpu_count = 1
+ list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
+ else:
+ with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
+ list_edges_partial = executor.map(ctpc_algo,
+ self._nodes,
+ total_vars_numb_array)
+ #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
+
+ return set(itertools.chain.from_iterable(list_edges_partial))
+
+
+ def estimate_structure(self,disable_multiprocessing:bool=False):
+ return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing)
+
+
+
+
diff --git a/PyCTBN/build/lib/classes/estimators/structure_estimator.py b/PyCTBN/build/lib/classes/estimators/structure_estimator.py
new file mode 100644
index 0000000..fbf8ea9
--- /dev/null
+++ b/PyCTBN/build/lib/classes/estimators/structure_estimator.py
@@ -0,0 +1,187 @@
+
+import itertools
+import json
+import typing
+
+import matplotlib.pyplot as plt
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from abc import ABC
+
+import abc
+
+from ..utility.cache import Cache
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+
+
+class StructureEstimator(object):
+ """Has the task of estimating the network structure given the trajectories in ``samplepath``.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :_nodes: the nodes labels
+ :_nodes_vals: the nodes cardinalities
+ :_nodes_indxs: the nodes indexes
+ :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
+ """
+
+ def __init__(self, sample_path: SamplePath, known_edges: typing.List = None):
+ self._sample_path = sample_path
+ self._nodes = np.array(self._sample_path.structure.nodes_labels)
+ self._nodes_vals = self._sample_path.structure.nodes_values
+ self._nodes_indxs = self._sample_path.structure.nodes_indexes
+ self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
+ self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
+
+
+ def build_removable_edges_matrix(self, known_edges: typing.List):
+ """Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given:
+
+ :param known_edges: the list of nodes labels
+ :type known_edges: List
+ :return: a boolean matrix
+ :rtype: np.ndarray
+ """
+ tot_vars_count = self._sample_path.total_variables_count
+ complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
+ if known_edges:
+ for edge in known_edges:
+ i = self._sample_path.structure.get_node_indx(edge[0])
+ j = self._sample_path.structure.get_node_indx(edge[1])
+ complete_adj_matrix[i][j] = False
+ return complete_adj_matrix
+
+ @staticmethod
+ def build_complete_graph(node_ids: typing.List) -> nx.DiGraph:
+ """Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``:
+
+ :param node_ids: the list of nodes labels
+ :type node_ids: List
+ :return: a complete Digraph Object
+ :rtype: networkx.DiGraph
+ """
+ complete_graph = nx.DiGraph()
+ complete_graph.add_nodes_from(node_ids)
+ complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
+ return complete_graph
+
+
+ @staticmethod
+ def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str):
+ """Creates a list containing all possible subsets of the list ``u`` of size ``size``,
+ that do not contains a the node identified by ``parent_label``.
+
+ :param u: the list of nodes
+ :type u: List
+ :param size: the size of the subsets
+ :type size: int
+ :param parent_label: the node to exclude in the subsets generation
+ :type parent_label: string
+ :return: an Iterator Object containing a list of lists
+ :rtype: Iterator
+ """
+ list_without_test_parent = u[:]
+ list_without_test_parent.remove(parent_label)
+ return map(list, itertools.combinations(list_without_test_parent, size))
+
+ def save_results(self) -> None:
+ """Save the estimated Structure to a .json file in the path where the data are loaded from.
+ The file is named as the input dataset but the `results_` word is appended to the results file.
+ """
+ res = json_graph.node_link_data(self._complete_graph)
+ name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
+ name = name.split('.', 1)[0]
+ name += '_' + str(self._sample_path._importer.dataset_id())
+ name += '.json'
+ file_name = 'results_' + name
+ with open(file_name, 'w') as f:
+ json.dump(res, f)
+
+
+ def remove_diagonal_elements(self, matrix):
+ m = matrix.shape[0]
+ strided = np.lib.stride_tricks.as_strided
+ s0, s1 = matrix.strides
+ return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
+
+
+ @abc.abstractmethod
+ def estimate_structure(self) -> typing.List:
+ """Abstract method to estimate the structure
+
+ :return: List of estimated edges
+ :rtype: Typing.List
+ """
+ pass
+
+
+ def adjacency_matrix(self) -> np.ndarray:
+ """Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation.
+
+ :return: The adjacency matrix of the graph ``_complete_graph``
+ :rtype: numpy.ndArray
+ """
+ return nx.adj_matrix(self._complete_graph).toarray().astype(bool)
+
+ def spurious_edges(self) -> typing.List:
+ """Return the spurious edges present in the estimated structure, if a prior net structure is present in
+ ``_sample_path.structure``.
+
+ :return: A list containing the spurious edges
+ :rtype: List
+ """
+ if not self._sample_path.has_prior_net_structure:
+ raise RuntimeError("Can not compute spurious edges with no prior net structure!")
+ real_graph = nx.DiGraph()
+ real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
+ real_graph.add_edges_from(self._sample_path.structure.edges)
+ return nx.difference(real_graph, self._complete_graph).edges
+
+ def save_plot_estimated_structure_graph(self) -> None:
+ """Plot the estimated structure in a graphical model style.
+ Spurious edges are colored in red.
+ """
+ graph_to_draw = nx.DiGraph()
+ spurious_edges = self.spurious_edges()
+ non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
+ print(non_spurious_edges)
+ edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
+ graph_to_draw.add_edges_from(spurious_edges)
+ graph_to_draw.add_edges_from(non_spurious_edges)
+ pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
+ options = {
+ "node_size": 2000,
+ "node_color": "white",
+ "edgecolors": "black",
+ 'linewidths':2,
+ "with_labels":True,
+ "font_size":13,
+ 'connectionstyle': 'arc3, rad = 0.1',
+ "arrowsize": 15,
+ "arrowstyle": '<|-',
+ "width": 1,
+ "edge_color":edges_colors,
+ }
+
+ nx.draw(graph_to_draw, pos, **options)
+ ax = plt.gca()
+ ax.margins(0.20)
+ plt.axis("off")
+ name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
+ name = name.split('.', 1)[0]
+ name += '_' + str(self._sample_path._importer.dataset_id())
+ name += '.png'
+ plt.savefig(name)
+ plt.clf()
+ print("Estimated Structure Plot Saved At: ", os.path.abspath(name))
+
+
+
+
+
diff --git a/PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py b/PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py
new file mode 100644
index 0000000..2903db3
--- /dev/null
+++ b/PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py
@@ -0,0 +1,244 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from random import choice
+
+import concurrent.futures
+
+import copy
+
+from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
+from ..structure_graph.network_graph import NetworkGraph
+from .parameters_estimator import ParametersEstimator
+from .structure_estimator import StructureEstimator
+from ..structure_graph.sample_path import SamplePath
+from ..structure_graph.structure import Structure
+from .fam_score_calculator import FamScoreCalculator
+from ..optimizers.hill_climbing_search import HillClimbing
+from ..optimizers.tabu_search import TabuSearch
+
+
+import multiprocessing
+from multiprocessing import Pool
+
+
+
+
+class StructureScoreBasedEstimator(StructureEstimator):
+ """
+ Has the task of estimating the network structure given the trajectories in samplepath by
+ using a score based approach.
+
+ :param sample_path: the _sample_path object containing the trajectories and the real structure
+ :type sample_path: SamplePath
+ :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1
+ :type tau_xu: float, optional
+ :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1
+ :type alpha_xu: float, optional
+ :param known_edges: List of known edges, default to []
+ :type known_edges: List, optional
+
+ """
+
+ def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []):
+ super().__init__(sample_path,known_edges)
+ self.tau_xu=tau_xu
+ self.alpha_xu=alpha_xu
+
+
+ def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
+ patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None,
+ optimizer: str = 'tabu',disable_multiprocessing:bool= False ):
+ """
+ Compute the score-based algorithm to find the optimal structure
+
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+ :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
+ :type optimizer: string, optional
+ :param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False
+ :type disable_multiprocessing: Boolean, optional
+ """
+ 'Save the true edges structure in tuples'
+ true_edges = copy.deepcopy(self._sample_path.structure.edges)
+ true_edges = set(map(tuple, true_edges))
+
+ 'Remove all the edges from the structure'
+ self._sample_path.structure.clean_structure_edges()
+
+ estimate_parents = self.estimate_parents
+
+ n_nodes= len(self._nodes)
+
+ l_max_parents= [max_parents] * n_nodes
+ l_iterations_number = [iterations_number] * n_nodes
+ l_patience = [patience] * n_nodes
+ l_tabu_length = [tabu_length] * n_nodes
+ l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
+ l_optimizer = [optimizer] * n_nodes
+
+
+ 'get the number of CPU'
+ cpu_count = multiprocessing.cpu_count()
+ print(f"CPU COUNT: {cpu_count}")
+
+ if disable_multiprocessing:
+ cpu_count = 1
+
+
+
+
+
+ #with get_context("spawn").Pool(processes=cpu_count) as pool:
+ #with multiprocessing.Pool(processes=cpu_count) as pool:
+
+ 'Estimate the best parents for each node'
+ if disable_multiprocessing:
+ list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes]
+ else:
+ with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
+ list_edges_partial = executor.map(estimate_parents,
+ self._nodes,
+ l_max_parents,
+ l_iterations_number,
+ l_patience,
+ l_tabu_length,
+ l_tabu_rules_duration,
+ l_optimizer)
+
+
+
+ #list_edges_partial = p.map(estimate_parents, self._nodes)
+ #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
+
+ 'Concatenate all the edges list'
+ set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
+
+ #print('-------------------------')
+
+
+ 'calculate precision and recall'
+ n_missing_edges = 0
+ n_added_fake_edges = 0
+
+ try:
+ n_added_fake_edges = len(set_list_edges.difference(true_edges))
+
+ n_missing_edges = len(true_edges.difference(set_list_edges))
+
+ n_true_positive = len(true_edges) - n_missing_edges
+
+ precision = n_true_positive / (n_true_positive + n_added_fake_edges)
+
+ recall = n_true_positive / (n_true_positive + n_missing_edges)
+
+
+ # print(f"n archi reali non trovati: {n_missing_edges}")
+ # print(f"n archi non reali aggiunti: {n_added_fake_edges}")
+ print(true_edges)
+ print(set_list_edges)
+ print(f"precision: {precision} ")
+ print(f"recall: {recall} ")
+ except Exception as e:
+ print(f"errore: {e}")
+
+ return set_list_edges
+
+
+ def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
+ patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5,
+ optimizer:str = 'hill' ):
+ """
+ Use the FamScore of a node in order to find the best parent nodes
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+ :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
+ :type optimizer: string, optional
+
+ :return: A list of the best edges for the currente node
+ :rtype: List
+ """
+
+ "choose the optimizer algotithm"
+ if optimizer == 'tabu':
+ optimizer = TabuSearch(
+ node_id = node_id,
+ structure_estimator = self,
+ max_parents = max_parents,
+ iterations_number = iterations_number,
+ patience = patience,
+ tabu_length = tabu_length,
+ tabu_rules_duration = tabu_rules_duration)
+ else: #if optimizer == 'hill':
+ optimizer = HillClimbing(
+ node_id = node_id,
+ structure_estimator = self,
+ max_parents = max_parents,
+ iterations_number = iterations_number,
+ patience = patience)
+
+ "call the optmizer's function that calculates the current node's parents"
+ return optimizer.optimize_structure()
+
+
+ def get_score_from_graph(self,
+ graph: NetworkGraph,
+ node_id:str):
+ """
+ Get the FamScore of a node
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param graph: current graph to be computed
+ :type graph: class:'NetworkGraph'
+
+
+ :return: The FamSCore for this graph structure
+ :rtype: float
+ """
+
+ 'inizialize the graph for a single node'
+ graph.fast_init(node_id)
+
+ params_estimation = ParametersEstimator(self._sample_path.trajectories, graph)
+
+ 'Inizialize and compute parameters for node'
+ params_estimation.fast_init(node_id)
+ SoCims = params_estimation.compute_parameters_for_node(node_id)
+
+ 'calculate the FamScore for the node'
+ fam_score_obj = FamScoreCalculator()
+
+ score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu)
+
+ #print(f" lo score per {node_id} risulta: {score} ")
+ return score
+
+
+
+
diff --git a/PyCTBN/build/lib/classes/optimizers/__init__.py b/PyCTBN/build/lib/classes/optimizers/__init__.py
new file mode 100644
index 0000000..4162bf1
--- /dev/null
+++ b/PyCTBN/build/lib/classes/optimizers/__init__.py
@@ -0,0 +1,4 @@
+from .optimizer import Optimizer
+from .tabu_search import TabuSearch
+from .hill_climbing_search import HillClimbing
+from .constraint_based_optimizer import ConstraintBasedOptimizer
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py b/PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py
new file mode 100644
index 0000000..65bc19c
--- /dev/null
+++ b/PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py
@@ -0,0 +1,87 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice
+
+from abc import ABC
+
+import copy
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+
+class ConstraintBasedOptimizer(Optimizer):
+ """
+ Optimizer class that implement a CTPC Algorithm
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param tot_vars_count: number of variables in the dataset
+ :type tot_vars_count: int
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ tot_vars_count:int
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.tot_vars_count = tot_vars_count
+
+
+
+ def optimize_structure(self):
+ """
+ Compute Optimization process for a structure_estimator by using a CTPC Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ print("##################TESTING VAR################", self.node_id)
+
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id]
+
+ for possible_parent in other_nodes:
+ graph.add_edges([(possible_parent,self.node_id)])
+
+
+ u = other_nodes
+ #tests_parents_numb = len(u)
+ #complete_frame = self.complete_graph_frame
+ #test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
+ child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
+ b = 0
+ while b < len(u):
+ parent_indx = 0
+ while parent_indx < len(u):
+ removed = False
+ test_parent = u[parent_indx]
+ i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent)
+ j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id)
+ if self.structure_estimator._removable_edges_matrix[i][j]:
+ S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
+ for parents_set in S:
+ if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j):
+ graph.remove_edges([(test_parent, self.node_id)])
+ u.remove(test_parent)
+ removed = True
+ break
+ if not removed:
+ parent_indx += 1
+ b += 1
+ self.structure_estimator._cache.clear()
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py b/PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py
new file mode 100644
index 0000000..6783be0
--- /dev/null
+++ b/PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py
@@ -0,0 +1,135 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice
+
+from abc import ABC
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+
+class HillClimbing(Optimizer):
+ """
+ Optimizer class that implement Hill Climbing Search
+
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+
+
+
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ max_parents:int = None,
+ iterations_number:int= 40,
+ patience:int = None
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.max_parents = max_parents
+ self.iterations_number = iterations_number
+ self.patience = patience
+
+
+
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+
+ #'Create the graph for the single node'
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ 'get the index for the current node'
+ node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
+
+ 'list of prior edges'
+ prior_parents = set()
+
+ 'Add the edges from prior knowledge'
+ for i in range(len(self.structure_estimator._removable_edges_matrix)):
+ if not self.structure_estimator._removable_edges_matrix[i][node_index]:
+ parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
+ prior_parents.add(parent_id)
+
+ 'Add the node to the starting structure'
+ graph.add_edges([(parent_id, self.node_id)])
+
+
+
+ 'get all the possible parents'
+ other_nodes = [node for node in
+ self.structure_estimator._sample_path.structure.nodes_labels if
+ node != self.node_id and
+ not prior_parents.__contains__(node)]
+
+ actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+ patince_count = 0
+ for i in range(self.iterations_number):
+ 'choose a new random edge'
+ current_new_parent = choice(other_nodes)
+ current_edge = (current_new_parent,self.node_id)
+ added = False
+ parent_removed = None
+
+
+ if graph.has_edge(current_edge):
+ graph.remove_edges([current_edge])
+ else:
+ 'check the max_parents constraint'
+ if self.max_parents is not None:
+ parents_list = graph.get_parents_by_id(self.node_id)
+ if len(parents_list) >= self.max_parents :
+ parent_removed = (choice(parents_list), self.node_id)
+ graph.remove_edges([parent_removed])
+ graph.add_edges([current_edge])
+ added = True
+ #print('**************************')
+ current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ if current_score > actual_best_score:
+ 'update current best score'
+ actual_best_score = current_score
+ patince_count = 0
+ else:
+ 'undo the last update'
+ if added:
+ graph.remove_edges([current_edge])
+ 'If a parent was removed, add it again to the graph'
+ if parent_removed is not None:
+ graph.add_edges([parent_removed])
+ else:
+ graph.add_edges([current_edge])
+ 'update patience count'
+ patince_count += 1
+
+ if self.patience is not None and patince_count > self.patience:
+ break
+
+ print(f"finito variabile: {self.node_id}")
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/optimizers/optimizer.py b/PyCTBN/build/lib/classes/optimizers/optimizer.py
new file mode 100644
index 0000000..36445c0
--- /dev/null
+++ b/PyCTBN/build/lib/classes/optimizers/optimizer.py
@@ -0,0 +1,39 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+import abc
+
+from ..estimators.structure_estimator import StructureEstimator
+
+
+
+class Optimizer(abc.ABC):
+ """
+ Interface class for all the optimizer's child PyCTBN
+
+ :param node_id: the node label
+ :type node_id: string
+ :param structure_estimator: A structureEstimator Object to predict the structure
+ :type structure_estimator: class:'StructureEstimator'
+
+ """
+
+ def __init__(self, node_id:str, structure_estimator: StructureEstimator):
+ self.node_id = node_id
+ self.structure_estimator = structure_estimator
+
+
+ @abc.abstractmethod
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ pass
diff --git a/PyCTBN/build/lib/classes/optimizers/tabu_search.py b/PyCTBN/build/lib/classes/optimizers/tabu_search.py
new file mode 100644
index 0000000..e15dd40
--- /dev/null
+++ b/PyCTBN/build/lib/classes/optimizers/tabu_search.py
@@ -0,0 +1,199 @@
+
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+
+from random import choice,sample
+
+from abc import ABC
+
+
+from .optimizer import Optimizer
+from ..estimators.structure_estimator import StructureEstimator
+from ..structure_graph.network_graph import NetworkGraph
+
+import queue
+
+
+class TabuSearch(Optimizer):
+ """
+ Optimizer class that implement Tabu Search
+
+
+ :param node_id: current node's id
+ :type node_id: string
+ :param structure_estimator: a structure estimator object with the information about the net
+ :type structure_estimator: class:'StructureEstimator'
+ :param max_parents: maximum number of parents for each variable. If None, disabled, default to None
+ :type max_parents: int, optional
+ :param iterations_number: maximum number of optimization algorithm's iteration, default to 40
+ :type iterations_number: int, optional
+ :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
+ :type patience: int, optional
+ :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
+ :type tabu_length: int, optional
+ :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
+ :type tabu_rules_duration: int, optional
+
+
+ """
+ def __init__(self,
+ node_id:str,
+ structure_estimator: StructureEstimator,
+ max_parents:int = None,
+ iterations_number:int= 40,
+ patience:int = None,
+ tabu_length:int = None,
+ tabu_rules_duration = None
+ ):
+ """
+ Constructor
+ """
+ super().__init__(node_id, structure_estimator)
+ self.max_parents = max_parents
+ self.iterations_number = iterations_number
+ self.patience = patience
+ self.tabu_length = tabu_length
+ self.tabu_rules_duration = tabu_rules_duration
+
+
+ def optimize_structure(self) -> typing.List:
+ """
+ Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
+
+ :return: the estimated structure for the node
+ :rtype: List
+ """
+ print(f"tabu search is processing the structure of {self.node_id}")
+
+ 'Create the graph for the single node'
+ graph = NetworkGraph(self.structure_estimator._sample_path.structure)
+
+ 'get the index for the current node'
+ node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
+
+ 'list of prior edges'
+ prior_parents = set()
+
+ 'Add the edges from prior knowledge'
+ for i in range(len(self.structure_estimator._removable_edges_matrix)):
+ if not self.structure_estimator._removable_edges_matrix[i][node_index]:
+ parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
+ prior_parents.add(parent_id)
+
+ 'Add the node to the starting structure'
+ graph.add_edges([(parent_id, self.node_id)])
+
+
+
+ 'get all the possible parents'
+ other_nodes = set([node for node in
+ self.structure_estimator._sample_path.structure.nodes_labels if
+ node != self.node_id and
+ not prior_parents.__contains__(node)])
+
+ 'calculate the score for the node without parents'
+ actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ 'initialize tabu_length and tabu_rules_duration if None'
+ if self.tabu_length is None:
+ self.tabu_length = len(other_nodes)
+
+ if self.tabu_rules_duration is None:
+ self.tabu_tabu_rules_durationength = len(other_nodes)
+
+ 'inizialize the data structures'
+ tabu_set = set()
+ tabu_queue = queue.Queue()
+
+ patince_count = 0
+ tabu_count = 0
+ for i in range(self.iterations_number):
+
+ current_possible_nodes = other_nodes.difference(tabu_set)
+
+ 'choose a new random edge according to tabu restiction'
+ if(len(current_possible_nodes) > 0):
+ current_new_parent = sample(current_possible_nodes,k=1)[0]
+ else:
+ current_new_parent = tabu_queue.get()
+ tabu_set.remove(current_new_parent)
+
+
+
+ current_edge = (current_new_parent,self.node_id)
+ added = False
+ parent_removed = None
+
+ if graph.has_edge(current_edge):
+ graph.remove_edges([current_edge])
+ else:
+ 'check the max_parents constraint'
+ if self.max_parents is not None:
+ parents_list = graph.get_parents_by_id(self.node_id)
+ if len(parents_list) >= self.max_parents :
+ parent_removed = (choice(parents_list), self.node_id)
+ graph.remove_edges([parent_removed])
+ graph.add_edges([current_edge])
+ added = True
+ #print('**************************')
+ current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+ # print("-------------------------------------------")
+ # print(f"Current new parent: {current_new_parent}")
+ # print(f"Current score: {current_score}")
+ # print(f"Current best score: {actual_best_score}")
+ # print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
+ # print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
+ # print(f"graph edges: {graph.edges}")
+
+ # print("-------------------------------------------")
+ # input()
+ if current_score > actual_best_score:
+ 'update current best score'
+ actual_best_score = current_score
+ patince_count = 0
+ 'update tabu list'
+
+
+ else:
+ 'undo the last update'
+ if added:
+ graph.remove_edges([current_edge])
+ 'If a parent was removed, add it again to the graph'
+ if parent_removed is not None:
+ graph.add_edges([parent_removed])
+ else:
+ graph.add_edges([current_edge])
+ 'update patience count'
+ patince_count += 1
+
+
+ if tabu_queue.qsize() >= self.tabu_length:
+ current_removed = tabu_queue.get()
+ tabu_set.remove(current_removed)
+ 'Add the node on the tabu list'
+ tabu_queue.put(current_new_parent)
+ tabu_set.add(current_new_parent)
+
+ tabu_count += 1
+
+ 'Every tabu_rules_duration step remove an item from the tabu list '
+ if tabu_count % self.tabu_rules_duration == 0:
+ if tabu_queue.qsize() > 0:
+ current_removed = tabu_queue.get()
+ tabu_set.remove(current_removed)
+ tabu_count = 0
+ else:
+ tabu_count = 0
+
+ if self.patience is not None and patince_count > self.patience:
+ break
+
+ print(f"finito variabile: {self.node_id}")
+ return graph.edges
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/structure_graph/__init__.py b/PyCTBN/build/lib/classes/structure_graph/__init__.py
new file mode 100644
index 0000000..85f18a2
--- /dev/null
+++ b/PyCTBN/build/lib/classes/structure_graph/__init__.py
@@ -0,0 +1,6 @@
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+from .network_graph import NetworkGraph
+from .sample_path import SamplePath
+from .set_of_cims import SetOfCims
+from .structure import Structure
+from .trajectory import Trajectory
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py b/PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py
new file mode 100644
index 0000000..4abfdd0
--- /dev/null
+++ b/PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+
+class ConditionalIntensityMatrix(object):
+ """Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector
+ and state transition matrix and the actual CIM matrix.
+
+ :param state_residence_times: state residence times vector
+ :type state_residence_times: numpy.array
+ :param state_transition_matrix: the transitions count matrix
+ :type state_transition_matrix: numpy.ndArray
+ :_cim: the actual cim of the node
+ """
+ def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
+ """Constructor Method
+ """
+ self._state_residence_times = state_residence_times
+ self._state_transition_matrix = state_transition_matrix
+ self._cim = self.state_transition_matrix.astype(np.float64)
+
+ def compute_cim_coefficients(self) -> None:
+ """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].
+ The class member ``_cim`` will contain the computed cim
+ """
+ np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
+ self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T
+
+ @property
+ def state_residence_times(self) -> np.ndarray:
+ return self._state_residence_times
+
+ @property
+ def state_transition_matrix(self) -> np.ndarray:
+ return self._state_transition_matrix
+
+ @property
+ def cim(self) -> np.ndarray:
+ return self._cim
+
+ def __repr__(self):
+ return 'CIM:\n' + str(self.cim)
+
diff --git a/PyCTBN/build/lib/classes/structure_graph/network_graph.py b/PyCTBN/build/lib/classes/structure_graph/network_graph.py
new file mode 100644
index 0000000..623981d
--- /dev/null
+++ b/PyCTBN/build/lib/classes/structure_graph/network_graph.py
@@ -0,0 +1,293 @@
+
+import typing
+
+import networkx as nx
+import numpy as np
+
+from .structure import Structure
+
+
+class NetworkGraph(object):
+ """Abstracts the infos contained in the Structure class in the form of a directed graph.
+ Has the task of creating all the necessary filtering and indexing structures for parameters estimation
+
+ :param graph_struct: the ``Structure`` object from which infos about the net will be extracted
+ :type graph_struct: Structure
+ :_graph: directed graph
+ :_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos
+ about every parents of the node of which all the indexing and filtering structures will be constructed.
+ :_time_scalar_indexing_structure: the indexing structure for state res time estimation
+ :_transition_scalar_indexing_structure: the indexing structure for transition computation
+ :_time_filtering: the columns filtering structure used in the computation of the state res times
+ :_transition_filtering: the columns filtering structure used in the computation of the transition
+ from one state to another
+ :_p_combs_structure: all the possible parents states combination for the node of interest
+ """
+
+ def __init__(self, graph_struct: Structure):
+ """Constructor Method
+ """
+ self._graph_struct = graph_struct
+ self._graph = nx.DiGraph()
+ self._aggregated_info_about_nodes_parents = None
+ self._time_scalar_indexing_structure = None
+ self._transition_scalar_indexing_structure = None
+ self._time_filtering = None
+ self._transition_filtering = None
+ self._p_combs_structure = None
+
+ def init_graph(self):
+ self.add_nodes(self._nodes_labels)
+ self.add_edges(self.graph_struct.edges)
+ self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
+ self._fancy_indexing = self.build_fancy_indexing_structure(0)
+ self.build_scalar_indexing_structures()
+ self.build_time_columns_filtering_structure()
+ self.build_transition_columns_filtering_structure()
+ self._p_combs_structure = self.build_p_combs_structure()
+
+ def fast_init(self, node_id: str) -> None:
+ """Initializes all the necessary structures for parameters estimation of the node identified by the label
+ node_id
+
+ :param node_id: the label of the node
+ :type node_id: string
+ """
+ self.add_nodes(self._graph_struct.nodes_labels)
+ self.add_edges(self._graph_struct.edges)
+ self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id)
+ p_indxs = self._aggregated_info_about_nodes_parents[1]
+ p_vals = self._aggregated_info_about_nodes_parents[2]
+ node_states = self.get_states_number(node_id)
+ node_indx = self.get_node_indx(node_id)
+ cols_number = self._graph_struct.total_variables_number
+ self._time_scalar_indexing_structure = NetworkGraph.\
+ build_time_scalar_indexing_structure_for_a_node(node_states, p_vals)
+ self._transition_scalar_indexing_structure = NetworkGraph.\
+ build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals)
+ self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs)
+ self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number)
+ self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals)
+
+ def add_nodes(self, list_of_nodes: typing.List) -> None:
+ """Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``.
+ Sets all the properties that identify a nodes (index, positional index, cardinality)
+
+ :param list_of_nodes: the nodes to add to ``_graph``
+ :type list_of_nodes: List
+ """
+ nodes_indxs = self._graph_struct.nodes_indexes
+ nodes_vals = self._graph_struct.nodes_values
+ pos = 0
+ for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
+ self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
+ pos += 1
+
+ def has_edge(self,edge:tuple)-> bool:
+ """
+ Check if the graph contains a specific edge
+
+ Parameters:
+ edge: a tuple that rappresents the edge
+ Returns:
+ bool
+ """
+ return self._graph.has_edge(edge[0],edge[1])
+
+ def add_edges(self, list_of_edges: typing.List) -> None:
+ """Add the edges to the ``_graph`` contained in the list ``list_of_edges``.
+
+ :param list_of_edges: the list containing of tuples containing the edges
+ :type list_of_edges: List
+ """
+ self._graph.add_edges_from(list_of_edges)
+
+ def remove_node(self, node_id: str) -> None:
+ """Remove the node ``node_id`` from all the class members.
+ Initialize all the filtering/indexing structures.
+ """
+ self._graph.remove_node(node_id)
+ self._graph_struct.remove_node(node_id)
+ self.clear_indexing_filtering_structures()
+
+ def clear_indexing_filtering_structures(self) -> None:
+ """Initialize all the filtering/indexing structures.
+ """
+ self._aggregated_info_about_nodes_parents = None
+ self._time_scalar_indexing_structure = None
+ self._transition_scalar_indexing_structure = None
+ self._time_filtering = None
+ self._transition_filtering = None
+ self._p_combs_structure = None
+
+ def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple:
+ """Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
+ (parents_labels, parents_indexes, parents_cardinalities).
+
+ :param node: the label of the node
+ :type node: string
+ :return: a tuple containing all the parent set infos
+ :rtype: Tuple
+ """
+ parents = self.get_parents_by_id(node)
+ nodes = self._graph_struct.nodes_labels
+ d = {v: i for i, v in enumerate(nodes)}
+ sorted_parents = sorted(parents, key=lambda v: d[v])
+ get_node_indx = self.get_node_indx
+ p_indxes = [get_node_indx(node) for node in sorted_parents]
+ p_values = [self.get_states_number(node) for node in sorted_parents]
+ return sorted_parents, p_indxes, p_values
+
+ def remove_edges(self, list_of_edges: typing.List) -> None:
+ """Remove the edges to the graph contained in the list list_of_edges.
+
+ :param list_of_edges: The edges to remove from the graph
+ :type list_of_edges: List
+ """
+ self._graph.remove_edges_from(list_of_edges)
+
+ @staticmethod
+ def build_time_scalar_indexing_structure_for_a_node(node_states: int,
+ parents_vals: typing.List) -> np.ndarray:
+ """Builds an indexing structure for the computation of state residence times values.
+
+ :param node_states: the node cardinality
+ :type node_states: int
+ :param parents_vals: the caridinalites of the node's parents
+ :type parents_vals: List
+ :return: The time indexing structure
+ :rtype: numpy.ndArray
+ """
+ T_vector = np.array([node_states])
+ T_vector = np.append(T_vector, parents_vals)
+ T_vector = T_vector.cumprod().astype(np.int)
+ return T_vector
+
+ @staticmethod
+ def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \
+ -> np.ndarray:
+ """Builds an indexing structure for the computation of state transitions values.
+
+ :param node_states_number: the node cardinality
+ :type node_states_number: int
+ :param parents_vals: the caridinalites of the node's parents
+ :type parents_vals: List
+ :return: The transition indexing structure
+ :rtype: numpy.ndArray
+ """
+ M_vector = np.array([node_states_number,
+ node_states_number])
+ M_vector = np.append(M_vector, parents_vals)
+ M_vector = M_vector.cumprod().astype(np.int)
+ return M_vector
+
+ @staticmethod
+ def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray:
+ """
+ Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
+ in the dataset.
+ This structute will be used in the computation of the state res times.
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param p_indxs: the indexes of the node's parents
+ :type p_indxs: List
+ :return: The filtering structure for times estimation
+ :rtype: numpy.ndArray
+ """
+ return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
+
+ @staticmethod
+ def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \
+ -> np.ndarray:
+ """Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
+ in the dataset.
+ This structure will be used in the computation of the state transitions values.
+ :param node_indx: the index of the node
+ :type node_indx: int
+ :param p_indxs: the indexes of the node's parents
+ :type p_indxs: List
+ :param nodes_number: the total number of nodes in the dataset
+ :type nodes_number: int
+ :return: The filtering structure for transitions estimation
+ :rtype: numpy.ndArray
+ """
+ return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
+
+ @staticmethod
+ def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray:
+ """
+ Builds the combinatorial structure that contains the combinations of all the values contained in
+ ``parents_values``.
+
+ :param parents_values: the cardinalities of the nodes
+ :type parents_values: List
+ :return: A numpy matrix containing a grid of the combinations
+ :rtype: numpy.ndArray
+ """
+ tmp = []
+ for val in parents_values:
+ tmp.append([x for x in range(val)])
+ if len(parents_values) > 0:
+ parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
+ if len(parents_values) > 1:
+ tmp_comb = parents_comb[:, 1].copy()
+ parents_comb[:, 1] = parents_comb[:, 0].copy()
+ parents_comb[:, 0] = tmp_comb
+ else:
+ parents_comb = np.array([[]], dtype=np.int)
+ return parents_comb
+
+ def get_parents_by_id(self, node_id) -> typing.List:
+ """Returns a list of labels of the parents of the node ``node_id``
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: a List of labels of the parents
+ :rtype: List
+ """
+ return list(self._graph.predecessors(node_id))
+
+ def get_states_number(self, node_id) -> int:
+ return self._graph.nodes[node_id]['val']
+
+ def get_node_indx(self, node_id) -> int:
+ return nx.get_node_attributes(self._graph, 'indx')[node_id]
+
+ def get_positional_node_indx(self, node_id) -> int:
+ return self._graph.nodes[node_id]['pos_indx']
+
+ @property
+ def nodes(self) -> typing.List:
+ return self._graph_struct.nodes_labels
+
+ @property
+ def edges(self) -> typing.List:
+ return list(self._graph.edges)
+
+ @property
+ def nodes_indexes(self) -> np.ndarray:
+ return self._graph_struct.nodes_indexes
+
+ @property
+ def nodes_values(self) -> np.ndarray:
+ return self._graph_struct.nodes_values
+
+ @property
+ def time_scalar_indexing_strucure(self) -> np.ndarray:
+ return self._time_scalar_indexing_structure
+
+ @property
+ def time_filtering(self) -> np.ndarray:
+ return self._time_filtering
+
+ @property
+ def transition_scalar_indexing_structure(self) -> np.ndarray:
+ return self._transition_scalar_indexing_structure
+
+ @property
+ def transition_filtering(self) -> np.ndarray:
+ return self._transition_filtering
+
+ @property
+ def p_combs(self) -> np.ndarray:
+ return self._p_combs_structure
diff --git a/PyCTBN/build/lib/classes/structure_graph/sample_path.py b/PyCTBN/build/lib/classes/structure_graph/sample_path.py
new file mode 100644
index 0000000..80b51d9
--- /dev/null
+++ b/PyCTBN/build/lib/classes/structure_graph/sample_path.py
@@ -0,0 +1,91 @@
+
+
+import numpy as np
+import pandas as pd
+
+from .structure import Structure
+from .trajectory import Trajectory
+from ..utility.abstract_importer import AbstractImporter
+
+
+
+class SamplePath(object):
+ """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
+ cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
+ contain the mentioned data.
+
+ :param importer: the Importer object which contains the imported and processed data
+ :type importer: AbstractImporter
+ :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
+ :_structure: the ``Structure`` Object that will contain all the structural infos about the net
+ :_total_variables_count: the number of variables in the net
+ """
+ def __init__(self, importer: AbstractImporter):
+ """Constructor Method
+ """
+ self._importer = importer
+ if self._importer._df_variables is None or self._importer._concatenated_samples is None:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if self._importer._df_variables.empty:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if isinstance(self._importer._concatenated_samples, pd.DataFrame):
+ if self._importer._concatenated_samples.empty:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ if isinstance(self._importer._concatenated_samples, np.ndarray):
+ if self._importer._concatenated_samples.size == 0:
+ raise RuntimeError('The importer object has to contain the all processed data!')
+ self._trajectories = None
+ self._structure = None
+ self._total_variables_count = None
+
+ def build_trajectories(self) -> None:
+ """Builds the Trajectory object that will contain all the trajectories.
+ Clears all the unused dataframes in ``_importer`` Object
+ """
+ self._trajectories = \
+ Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
+ len(self._importer.sorter) + 1)
+ self._importer.clear_concatenated_frame()
+
+ def build_structure(self) -> None:
+ """
+ Builds the ``Structure`` object that aggregates all the infos about the net.
+ """
+ if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
+ raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
+
+ self._total_variables_count = len(self._importer.sorter)
+ labels = self._importer.variables.iloc[:, 0].to_list()
+ indxs = self._importer.variables.index.to_numpy()
+ vals = self._importer.variables.iloc[:, 1].to_numpy()
+ if self._importer.structure is None or self._importer.structure.empty:
+ edges = []
+ else:
+ edges = list(self._importer.structure.to_records(index=False))
+ self._structure = Structure(labels, indxs, vals, edges,
+ self._total_variables_count)
+
+ def clear_memory(self):
+ self._importer._raw_data = []
+
+ @property
+ def trajectories(self) -> Trajectory:
+ return self._trajectories
+
+ @property
+ def structure(self) -> Structure:
+ return self._structure
+
+ @property
+ def total_variables_count(self) -> int:
+ return self._total_variables_count
+
+ @property
+ def has_prior_net_structure(self) -> bool:
+ return bool(self._structure.edges)
+
+
+
+
+
+
diff --git a/PyCTBN/build/lib/classes/structure_graph/set_of_cims.py b/PyCTBN/build/lib/classes/structure_graph/set_of_cims.py
new file mode 100644
index 0000000..81caff5
--- /dev/null
+++ b/PyCTBN/build/lib/classes/structure_graph/set_of_cims.py
@@ -0,0 +1,97 @@
+
+
+import typing
+
+import numpy as np
+
+from .conditional_intensity_matrix import ConditionalIntensityMatrix
+
+
+class SetOfCims(object):
+ """Aggregates all the CIMS of the node identified by the label _node_id.
+
+ :param node_id: the node label
+ :type node_ind: string
+ :param parents_states_number: the cardinalities of the parents
+ :type parents_states_number: List
+ :param node_states_number: the caridinality of the node
+ :type node_states_number: int
+ :param p_combs: the p_comb structure bound to this node
+ :type p_combs: numpy.ndArray
+ :_state_residence_time: matrix containing all the state residence time vectors for the node
+ :_transition_matrices: matrix containing all the transition matrices for the node
+ :_actual_cims: the cims of the node
+ """
+
+ def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
+ """Constructor Method
+ """
+ self._node_id = node_id
+ self._parents_states_number = parents_states_number
+ self._node_states_number = node_states_number
+ self._actual_cims = []
+ self._state_residence_times = None
+ self._transition_matrices = None
+ self._p_combs = p_combs
+ self.build_times_and_transitions_structures()
+
+ def build_times_and_transitions_structures(self) -> None:
+ """Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
+ """
+ if not self._parents_states_number:
+ self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float)
+ self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int)
+ else:
+ self._state_residence_times = \
+ np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float)
+ self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number,
+ self._node_states_number], dtype=np.int)
+
+ def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None:
+ """Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices.
+ Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims.
+
+ :param state_res_times: the state residence times matrix
+ :type state_res_times: numpy.ndArray
+ :param transition_matrices: the transition matrices
+ :type transition_matrices: numpy.ndArray
+ """
+ for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
+ cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
+ cim_to_add.compute_cim_coefficients()
+ self._actual_cims.append(cim_to_add)
+ self._actual_cims = np.array(self._actual_cims)
+ self._transition_matrices = None
+ self._state_residence_times = None
+
+ def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray:
+ """Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index
+ ``comb``.
+
+ :param mask_arr: the boolean mask that indicates which parent to consider
+ :type mask_arr: numpy.array
+ :param comb: the state/s of the filtered parents
+ :type comb: numpy.array
+ :return: Array of ``ConditionalIntensityMatrix`` objects
+ :rtype: numpy.array
+ """
+ if mask_arr.size <= 1:
+ return self._actual_cims
+ else:
+ flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
+ return self._actual_cims[flat_indxs]
+
+ @property
+ def actual_cims(self) -> np.ndarray:
+ return self._actual_cims
+
+ @property
+ def p_combs(self) -> np.ndarray:
+ return self._p_combs
+
+ def get_cims_number(self):
+ return len(self._actual_cims)
+
+
+
+
diff --git a/PyCTBN/build/lib/classes/structure_graph/structure.py b/PyCTBN/build/lib/classes/structure_graph/structure.py
new file mode 100644
index 0000000..a9d60cc
--- /dev/null
+++ b/PyCTBN/build/lib/classes/structure_graph/structure.py
@@ -0,0 +1,124 @@
+
+import typing as ty
+
+import numpy as np
+
+
+class Structure(object):
+ """Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes)
+
+ :param nodes_labels_list: the symbolic names of the variables
+ :type nodes_labels_list: List
+ :param nodes_indexes_arr: the indexes of the nodes
+ :type nodes_indexes_arr: numpy.ndArray
+ :param nodes_vals_arr: the cardinalites of the nodes
+ :type nodes_vals_arr: numpy.ndArray
+ :param edges_list: the edges of the network
+ :type edges_list: List
+ :param total_variables_number: the total number of variables in the dataset
+ :type total_variables_number: int
+ """
+
+ def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray,
+ edges_list: ty.List, total_variables_number: int):
+ """Constructor Method
+ """
+ self._nodes_labels_list = nodes_labels_list
+ self._nodes_indexes_arr = nodes_indexes_arr
+ self._nodes_vals_arr = nodes_vals_arr
+ self._edges_list = edges_list
+ self._total_variables_number = total_variables_number
+
+ def remove_node(self, node_id: str) -> None:
+ """Remove the node ``node_id`` from all the class members.
+ The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset.
+ """
+ node_positional_indx = self._nodes_labels_list.index(node_id)
+ del self._nodes_labels_list[node_positional_indx]
+ self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx)
+ self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx)
+ self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id
+ and to_node != node_id)]
+
+ @property
+ def edges(self) -> ty.List:
+ return self._edges_list
+
+ @property
+ def nodes_labels(self) -> ty.List:
+ return self._nodes_labels_list
+
+ @property
+ def nodes_indexes(self) -> np.ndarray:
+ return self._nodes_indexes_arr
+
+ @property
+ def nodes_values(self) -> np.ndarray:
+ return self._nodes_vals_arr
+
+ @property
+ def total_variables_number(self) -> int:
+ return self._total_variables_number
+
+ def get_node_id(self, node_indx: int) -> str:
+ """Given the ``node_index`` returns the node label.
+
+ :param node_indx: the node index
+ :type node_indx: int
+ :return: the node label
+ :rtype: string
+ """
+ return self._nodes_labels_list[node_indx]
+
+ def clean_structure_edges(self):
+ self._edges_list = list()
+
+ def add_edge(self,edge: tuple):
+ self._edges_list.append(tuple)
+ print(self._edges_list)
+
+ def remove_edge(self,edge: tuple):
+ self._edges_list.remove(tuple)
+
+ def contains_edge(self,edge:tuple) -> bool:
+ return edge in self._edges_list
+
+ def get_node_indx(self, node_id: str) -> int:
+ """Given the ``node_index`` returns the node label.
+
+ :param node_id: the node label
+ :type node_id: string
+ :return: the node index
+ :rtype: int
+ """
+ pos_indx = self._nodes_labels_list.index(node_id)
+ return self._nodes_indexes_arr[pos_indx]
+
+ def get_positional_node_indx(self, node_id: str) -> int:
+ return self._nodes_labels_list.index(node_id)
+
+ def get_states_number(self, node: str) -> int:
+ """Given the node label ``node`` returns the cardinality of the node.
+
+ :param node: the node label
+ :type node: string
+ :return: the node cardinality
+ :rtype: int
+ """
+ pos_indx = self._nodes_labels_list.index(node)
+ return self._nodes_vals_arr[pos_indx]
+
+ def __repr__(self):
+ return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
+ "\nEdges: \n" + str(self._edges_list)
+
+ def __eq__(self, other):
+ """Overrides the default implementation"""
+ if isinstance(other, Structure):
+ return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
+ np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
+ np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
+ self._edges_list == other._edges_list
+
+ return False
+
diff --git a/PyCTBN/build/lib/classes/structure_graph/trajectory.py b/PyCTBN/build/lib/classes/structure_graph/trajectory.py
new file mode 100644
index 0000000..36899b3
--- /dev/null
+++ b/PyCTBN/build/lib/classes/structure_graph/trajectory.py
@@ -0,0 +1,45 @@
+
+import typing
+
+import numpy as np
+
+
+class Trajectory(object):
+ """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
+ (the time deltas) and a numpy matrix of ints (the changes of states).
+
+ :param list_of_columns: the list containing the times array and values matrix
+ :type list_of_columns: List
+ :param original_cols_number: total number of cols in the data
+ :type original_cols_number: int
+ :_actual_trajectory: the trajectory containing also the duplicated/shifted values
+ :_times: the array containing the time deltas
+ """
+
+ def __init__(self, list_of_columns: typing.List, original_cols_number: int):
+ """Constructor Method
+ """
+ self._times = list_of_columns[0]
+ self._actual_trajectory = list_of_columns[1]
+ self._original_cols_number = original_cols_number
+
+ @property
+ def trajectory(self) -> np.ndarray:
+ return self._actual_trajectory[:, :self._original_cols_number - 1]
+
+ @property
+ def complete_trajectory(self) -> np.ndarray:
+ return self._actual_trajectory
+
+ @property
+ def times(self):
+ return self._times
+
+ def size(self):
+ return self._actual_trajectory.shape[0]
+
+ def __repr__(self):
+ return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \
+ "\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__()
+
+
diff --git a/PyCTBN/build/lib/classes/utility/__init__.py b/PyCTBN/build/lib/classes/utility/__init__.py
new file mode 100644
index 0000000..f79749c
--- /dev/null
+++ b/PyCTBN/build/lib/classes/utility/__init__.py
@@ -0,0 +1,4 @@
+from .abstract_importer import AbstractImporter
+from .cache import Cache
+from .json_importer import JsonImporter
+from .sample_importer import SampleImporter
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/utility/abstract_importer.py b/PyCTBN/build/lib/classes/utility/abstract_importer.py
new file mode 100644
index 0000000..1cad352
--- /dev/null
+++ b/PyCTBN/build/lib/classes/utility/abstract_importer.py
@@ -0,0 +1,164 @@
+
+import typing
+from abc import ABC, abstractmethod
+
+import numpy as np
+import pandas as pd
+
+import copy
+
+#from sklearn.utils import resample
+
+
+class AbstractImporter(ABC):
+ """Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
+
+ :param file_path: the file path, or dataset name if you import already processed data
+ :type file_path: str
+ :param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
+ :type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
+ :param variables: Dataframe containing the nodes labels and cardinalities
+ :type variables: pandas.DataFrame
+ :prior_net_structure: Dataframe containing the structure of the network (edges)
+ :type prior_net_structure: pandas.DataFrame
+ :_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
+
+ .. warning::
+ The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
+ as Pandas Dataframes with the following structure:
+ Header of _df_structure = [From_Node | To_Node]
+ Header of _df_variables = [Variable_Label | Variable_Cardinality]
+ See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
+
+ .. note::
+ See :class:``JsonImporter`` for an example implementation
+
+ """
+
+ def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
+ variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
+ """Constructor
+ """
+ self._file_path = file_path
+ self._df_samples_list = trajectory_list
+ self._concatenated_samples = []
+ self._df_variables = variables
+ self._df_structure = prior_net_structure
+ self._sorter = None
+ super().__init__()
+
+ @abstractmethod
+ def build_sorter(self, trajecory_header: object) -> typing.List:
+ """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
+ and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
+
+ :param trajecory_header: an object that will be used to define the header
+ :type trajecory_header: object
+ :return: A list containing the processed header.
+ :rtype: List
+ """
+ pass
+
+ def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
+ columns_header: typing.List, shifted_cols_header: typing.List) \
+ -> pd.DataFrame:
+ """Computes the difference between each value present in th time column.
+ Copies and shift by one position up all the values present in the remaining columns.
+
+ :param sample_frame: the traj to be processed
+ :type sample_frame: pandas.Dataframe
+ :param columns_header: the original header of sample_frame
+ :type columns_header: List
+ :param shifted_cols_header: a copy of columns_header with changed names of the contents
+ :type shifted_cols_header: List
+ :return: The processed dataframe
+ :rtype: pandas.Dataframe
+
+ .. warning::
+ the Dataframe ``sample_frame`` has to follow the column structure of this header:
+ Header of sample_frame = [Time | Variable values]
+ """
+ sample_frame = copy.deepcopy(sample_frame)
+ sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
+ shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
+ shifted_cols.columns = shifted_cols_header
+ sample_frame = sample_frame.assign(**shifted_cols)
+ sample_frame.drop(sample_frame.tail(1).index, inplace=True)
+ return sample_frame
+
+ def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
+ """Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
+ ``df_samples_list``.
+ Concatenates the result in the dataframe ``concatanated_samples``
+
+ :param df_samples_list: the datframe's list to be processed and concatenated
+ :type df_samples_list: List
+
+ .. warning::
+ The Dataframe sample_frame has to follow the column structure of this header:
+ Header of sample_frame = [Time | Variable values]
+ The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
+ .. note::
+ After the call of this method the class member ``concatanated_samples`` will contain all processed
+ and merged trajectories
+ """
+ if not self._sorter:
+ raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
+ shifted_cols_header = [s + "S" for s in self._sorter]
+ compute_row_delta = self.compute_row_delta_sigle_samples_frame
+ proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
+ for sample in df_samples_list]
+ self._concatenated_samples = pd.concat(proc_samples_list)
+
+ complete_header = self._sorter[:]
+ complete_header.insert(0,'Time')
+ complete_header.extend(shifted_cols_header)
+ self._concatenated_samples = self._concatenated_samples[complete_header]
+
+ def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
+ """Builds a List containing the the delta times numpy array, and the complete transitions matrix
+
+ :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
+ and converted
+ :type concatenated_sample: pandas.Dataframe
+ :return: the resulting list of numpy arrays
+ :rtype: List
+ """
+
+ concatenated_array = concatenated_sample.to_numpy()
+ columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
+
+ return columns_list
+
+ def clear_concatenated_frame(self) -> None:
+ """Removes all values in the dataframe concatenated_samples.
+ """
+ if isinstance(self._concatenated_samples, pd.DataFrame):
+ self._concatenated_samples = self._concatenated_samples.iloc[0:0]
+
+ @abstractmethod
+ def dataset_id(self) -> object:
+ """If the original dataset contains multiple dataset, this method returns a unique id to identify the current
+ dataset
+ """
+ pass
+
+ @property
+ def concatenated_samples(self) -> pd.DataFrame:
+ return self._concatenated_samples
+
+ @property
+ def variables(self) -> pd.DataFrame:
+ return self._df_variables
+
+ @property
+ def structure(self) -> pd.DataFrame:
+ return self._df_structure
+
+ @property
+ def sorter(self) -> typing.List:
+ return self._sorter
+
+ @property
+ def file_path(self) -> str:
+ return self._file_path
diff --git a/PyCTBN/build/lib/classes/utility/cache.py b/PyCTBN/build/lib/classes/utility/cache.py
new file mode 100644
index 0000000..8e0369b
--- /dev/null
+++ b/PyCTBN/build/lib/classes/utility/cache.py
@@ -0,0 +1,58 @@
+
+import typing
+
+from ..structure_graph.set_of_cims import SetOfCims
+
+
+class Cache:
+ """This class acts as a cache of ``SetOfCims`` objects for a node.
+
+ :__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME
+ index is related
+ :__actual_cache: a list of setOfCims objects
+ """
+
+ def __init__(self):
+ """Constructor Method
+ """
+ self._list_of_sets_of_parents = []
+ self._actual_cache = []
+
+ def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
+ """
+ Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims``
+ related to that ``parents_comb``.
+
+ :param parents_comb: the parents related to that ``SetOfCims``
+ :type parents_comb: Set
+ :return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``.
+ None otherwise.
+ :rtype: SetOfCims
+ """
+ try:
+ #print("Cache State:", self.list_of_sets_of_indxs)
+ #print("Look For:", parents_comb)
+ result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)]
+ #print("CACHE HIT!!!!", parents_comb)
+ return result
+ except ValueError:
+ return None
+
+ def put(self, parents_comb: typing.Set, socim: SetOfCims):
+ """Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in
+ ``__list_of_sets_of_parents``.
+
+ :param parents_comb: the symbolic set index
+ :type parents_comb: Set
+ :param socim: the related SetOfCims object
+ :type socim: SetOfCims
+ """
+ #print("Putting in cache:", parents_comb)
+ self._list_of_sets_of_parents.append(parents_comb)
+ self._actual_cache.append(socim)
+
+ def clear(self):
+ """Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``.
+ """
+ del self._list_of_sets_of_parents[:]
+ del self._actual_cache[:]
\ No newline at end of file
diff --git a/PyCTBN/build/lib/classes/utility/json_importer.py b/PyCTBN/build/lib/classes/utility/json_importer.py
new file mode 100644
index 0000000..edff212
--- /dev/null
+++ b/PyCTBN/build/lib/classes/utility/json_importer.py
@@ -0,0 +1,176 @@
+import json
+import typing
+
+import pandas as pd
+
+
+from .abstract_importer import AbstractImporter
+
+
+class JsonImporter(AbstractImporter):
+ """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
+ the data in json extension.
+
+ :param file_path: the path of the file that contains tha data to be imported
+ :type file_path: string
+ :param samples_label: the reference key for the samples in the trajectories
+ :type samples_label: string
+ :param structure_label: the reference key for the structure of the network data
+ :type structure_label: string
+ :param variables_label: the reference key for the cardinalites of the nodes data
+ :type variables_label: string
+ :param time_key: the key used to identify the timestamps in each trajectory
+ :type time_key: string
+ :param variables_key: the key used to identify the names of the variables in the net
+ :type variables_key: string
+ :_array_indx: the index of the outer JsonArray to extract the data from
+ :type _array_indx: int
+ :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
+ :_raw_data: The raw contents of the json file to import
+ :type _raw_data: List
+ """
+
+ def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
+ variables_key: str):
+ """Constructor method
+
+ .. note::
+ This constructor calls also the method ``read_json_file()``, so after the construction of the object
+ the class member ``_raw_data`` will contain the raw imported json data.
+
+ """
+ self._samples_label = samples_label
+ self._structure_label = structure_label
+ self._variables_label = variables_label
+ self._time_key = time_key
+ self._variables_key = variables_key
+ self._df_samples_list = None
+ self._array_indx = None
+ super(JsonImporter, self).__init__(file_path)
+ self._raw_data = self.read_json_file()
+
+ def import_data(self, indx: int) -> None:
+ """Implements the abstract method of :class:`AbstractImporter`.
+
+ :param indx: the index of the outer JsonArray to extract the data from
+ :type indx: int
+ """
+ self._array_indx = indx
+ self._df_samples_list = self.import_trajectories(self._raw_data)
+ self._sorter = self.build_sorter(self._df_samples_list[0])
+ self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
+ self.clear_data_frame_list()
+ self._df_structure = self.import_structure(self._raw_data)
+ self._df_variables = self.import_variables(self._raw_data)
+
+ def import_trajectories(self, raw_data: typing.List) -> typing.List:
+ """Imports the trajectories from the list of dicts ``raw_data``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: List of dataframes containing all the trajectories
+ :rtype: List
+ """
+ return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
+
+ def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
+ """Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: Dataframe containg the starting node a ending node of every arc of the network
+ :rtype: pandas.Dataframe
+ """
+ return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
+
+ def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
+ """Imports the data in ``raw_data`` at the key ``_variables_label``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :return: Datframe containg the variables simbolic labels and their cardinalities
+ :rtype: pandas.Dataframe
+ """
+ return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
+
+ def read_json_file(self) -> typing.List:
+ """Reads the JSON file in the path self.filePath.
+
+ :return: The contents of the json file
+ :rtype: List
+ """
+ with open(self._file_path) as f:
+ data = json.load(f)
+ return data
+
+ def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
+ """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param key: the key for the Dicts from which exctract data
+ :type key: string
+ :return: A normalized dataframe
+ :rtype: pandas.Datframe
+ """
+ return pd.DataFrame(raw_data[indx][key])
+
+ def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
+ """
+ Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param trajectories_key: the key of the trajectories objects
+ :type trajectories_key: string
+ :return: A list of daframes containg the trajectories
+ :rtype: List
+ """
+ dataframe = pd.DataFrame
+ smps = raw_data[indx][trajectories_key]
+ df_samples_list = [dataframe(sample) for sample in smps]
+ return df_samples_list
+
+ def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
+ """Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
+ """
+ columns_header = list(sample_frame.columns.values)
+ columns_header.remove(self._time_key)
+ return columns_header
+
+ def clear_data_frame_list(self) -> None:
+ """Removes all values present in the dataframes in the list ``_df_samples_list``.
+ """
+ for indx in range(len(self._df_samples_list)):
+ self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
+
+ def dataset_id(self) -> object:
+ return self._array_indx
+
+ def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
+ """Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
+ as keys for the set of CIMS of a particular node.
+
+ :param raw_data: List of Dicts
+ :type raw_data: List
+ :param indx: The index of the array from which the data have to be extracted
+ :type indx: int
+ :param cims_key: the key where the json object cims are placed
+ :type cims_key: string
+ :return: a dictionary containing the sampled CIMS for all the variables in the net
+ :rtype: Dictionary
+ """
+ cims_for_all_vars = {}
+ for var in raw_data[indx][cims_key]:
+ sampled_cims_list = []
+ cims_for_all_vars[var] = sampled_cims_list
+ for p_comb in raw_data[indx][cims_key][var]:
+ cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
+ return cims_for_all_vars
+
+
+
diff --git a/PyCTBN/build/lib/classes/utility/sample_importer.py b/PyCTBN/build/lib/classes/utility/sample_importer.py
new file mode 100644
index 0000000..05073c8
--- /dev/null
+++ b/PyCTBN/build/lib/classes/utility/sample_importer.py
@@ -0,0 +1,65 @@
+import json
+import typing
+
+import pandas as pd
+import numpy as np
+
+from .abstract_importer import AbstractImporter
+
+
+
+class SampleImporter(AbstractImporter):
+ """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
+ the data loaded directly by using DataFrame
+
+ :param trajectory_list: the data that describes the trajectories
+ :type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+ :param variables: the data that describes the variables with name and cardinality
+ :type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+ :param prior_net_structure: the data of the real structure, if it exists
+ :type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List]
+
+ :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
+ :_raw_data: The raw contents of the json file to import
+ :type _raw_data: List
+ """
+
+ def __init__(self,
+ trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
+ variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
+ prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None):
+
+ 'If the data are not DataFrame, it will be converted'
+ if isinstance(variables,list) or isinstance(variables,np.ndarray):
+ variables = pd.DataFrame(variables)
+ if isinstance(variables,list) or isinstance(variables,np.ndarray):
+ prior_net_structure=pd.DataFrame(prior_net_structure)
+
+ super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
+ variables= variables,
+ prior_net_structure=prior_net_structure)
+
+ def import_data(self, header_column = None):
+
+ if header_column is not None:
+ self._sorter = header_column
+ else:
+ self._sorter = self.build_sorter(self._df_samples_list[0])
+
+ samples_list= self._df_samples_list
+
+ if isinstance(samples_list, np.ndarray):
+ samples_list = samples_list.tolist()
+
+ self.compute_row_delta_in_all_samples_frames(samples_list)
+
+ def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
+ """Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list.
+ """
+ columns_header = list(sample_frame.columns.values)
+ del columns_header[0]
+ return columns_header
+
+
+ def dataset_id(self) -> object:
+ pass
\ No newline at end of file
diff --git a/PyCTBN/build/lib/tests/__init__.py b/PyCTBN/build/lib/tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/PyCTBN/build/lib/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/PyCTBN/setup.py b/PyCTBN/setup.py
new file mode 100644
index 0000000..56dd72f
--- /dev/null
+++ b/PyCTBN/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup, find_packages
+
+
+setup(name='PyCTBN',
+ version='1.0',
+ url='https://github.com/philipMartini/PyCTBN',
+ license='MIT',
+ author=['Alessandro Bregoli', 'Filippo Martini','Luca Moretti'],
+ author_email=['a.bregoli1@campus.unimib.it', 'f.martini@campus.unimib.it','lucamoretti96@gmail.com'],
+ description='A Continuous Time Bayesian Networks Library',
+ packages=find_packages('.', exclude=['tests']),
+ #packages=['PyCTBN.PyCTBN'],
+ install_requires=[
+ 'numpy', 'pandas', 'networkx', 'scipy', 'matplotlib', 'tqdm'],
+ dependency_links=['https://github.com/numpy/numpy', 'https://github.com/pandas-dev/pandas',
+ 'https://github.com/networkx/networkx', 'https://github.com/scipy/scipy',
+ 'https://github.com/tqdm/tqdm'],
+ #long_description=open('../README.md').read(),
+ zip_safe=False,
+ python_requires='>=3.6')
diff --git a/PyCTBN/tests/__init__.py b/PyCTBN/tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/PyCTBN/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/PyCTBN/tests/coverage.xml b/PyCTBN/tests/coverage.xml
new file mode 100644
index 0000000..094af83
--- /dev/null
+++ b/PyCTBN/tests/coverage.xml
@@ -0,0 +1,963 @@
+
+