From 788bdc73afe6ae31421d90d29da4391ef2b91cca Mon Sep 17 00:00:00 2001 From: Luca Moretti Date: Tue, 2 Mar 2021 14:45:35 +0100 Subject: [PATCH] Updated setup and added manifest --- PyCTBN/MANIFEST.in | 9 + PyCTBN/__init__.py | 2 + PyCTBN/build/lib/PyCTBN/__init__.py | 8 - .../build/lib/PyCTBN/estimators/__init__.py | 5 - .../PyCTBN/estimators/fam_score_calculator.py | 272 -- .../PyCTBN/estimators/parameters_estimator.py | 143 -- .../structure_constraint_based_estimator.py | 238 -- .../PyCTBN/estimators/structure_estimator.py | 187 -- .../structure_score_based_estimator.py | 244 -- .../build/lib/PyCTBN/optimizers/__init__.py | 4 - .../optimizers/constraint_based_optimizer.py | 87 - .../PyCTBN/optimizers/hill_climbing_search.py | 135 - .../build/lib/PyCTBN/optimizers/optimizer.py | 39 - .../lib/PyCTBN/optimizers/tabu_search.py | 199 -- .../lib/PyCTBN/structure_graph/__init__.py | 6 - .../conditional_intensity_matrix.py | 42 - .../PyCTBN/structure_graph/network_graph.py | 293 --- .../lib/PyCTBN/structure_graph/sample_path.py | 91 - .../lib/PyCTBN/structure_graph/set_of_cims.py | 97 - .../lib/PyCTBN/structure_graph/structure.py | 124 - .../lib/PyCTBN/structure_graph/trajectory.py | 45 - PyCTBN/build/lib/PyCTBN/utility/__init__.py | 4 - .../lib/PyCTBN/utility/abstract_importer.py | 164 -- PyCTBN/build/lib/PyCTBN/utility/cache.py | 58 - .../build/lib/PyCTBN/utility/json_importer.py | 176 -- .../lib/PyCTBN/utility/sample_importer.py | 65 - PyCTBN/build/lib/classes/__init__.py | 8 - .../build/lib/classes/estimators/__init__.py | 5 - .../estimators/fam_score_calculator.py | 272 -- .../estimators/parameters_estimator.py | 143 -- .../structure_constraint_based_estimator.py | 238 -- .../classes/estimators/structure_estimator.py | 187 -- .../structure_score_based_estimator.py | 244 -- .../build/lib/classes/optimizers/__init__.py | 4 - .../optimizers/constraint_based_optimizer.py | 87 - .../optimizers/hill_climbing_search.py | 135 - .../build/lib/classes/optimizers/optimizer.py | 39 - .../lib/classes/optimizers/tabu_search.py | 199 -- .../lib/classes/structure_graph/__init__.py | 6 - .../conditional_intensity_matrix.py | 42 - .../classes/structure_graph/network_graph.py | 293 --- .../classes/structure_graph/sample_path.py | 91 - .../classes/structure_graph/set_of_cims.py | 97 - .../lib/classes/structure_graph/structure.py | 124 - .../lib/classes/structure_graph/trajectory.py | 45 - PyCTBN/build/lib/classes/utility/__init__.py | 4 - .../lib/classes/utility/abstract_importer.py | 164 -- PyCTBN/build/lib/classes/utility/cache.py | 58 - .../lib/classes/utility/json_importer.py | 176 -- .../lib/classes/utility/sample_importer.py | 65 - PyCTBN/build/lib/tests/__init__.py | 1 - basic_main.py | 2 + coverage copy.xml | 2219 ----------------- setup.py | 4 +- 54 files changed, 16 insertions(+), 7673 deletions(-) create mode 100644 PyCTBN/MANIFEST.in delete mode 100644 PyCTBN/build/lib/PyCTBN/__init__.py delete mode 100644 PyCTBN/build/lib/PyCTBN/estimators/__init__.py delete mode 100644 PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py delete mode 100644 PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py delete mode 100644 PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py delete mode 100644 PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py delete mode 100644 PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py delete mode 100644 PyCTBN/build/lib/PyCTBN/optimizers/__init__.py delete mode 100644 PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py delete mode 100644 PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py delete mode 100644 PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py delete mode 100644 PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py delete mode 100644 PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py delete mode 100644 PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py delete mode 100644 PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py delete mode 100644 PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py delete mode 100644 PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py delete mode 100644 PyCTBN/build/lib/PyCTBN/structure_graph/structure.py delete mode 100644 PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py delete mode 100644 PyCTBN/build/lib/PyCTBN/utility/__init__.py delete mode 100644 PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py delete mode 100644 PyCTBN/build/lib/PyCTBN/utility/cache.py delete mode 100644 PyCTBN/build/lib/PyCTBN/utility/json_importer.py delete mode 100644 PyCTBN/build/lib/PyCTBN/utility/sample_importer.py delete mode 100644 PyCTBN/build/lib/classes/__init__.py delete mode 100644 PyCTBN/build/lib/classes/estimators/__init__.py delete mode 100644 PyCTBN/build/lib/classes/estimators/fam_score_calculator.py delete mode 100644 PyCTBN/build/lib/classes/estimators/parameters_estimator.py delete mode 100644 PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py delete mode 100644 PyCTBN/build/lib/classes/estimators/structure_estimator.py delete mode 100644 PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py delete mode 100644 PyCTBN/build/lib/classes/optimizers/__init__.py delete mode 100644 PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py delete mode 100644 PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py delete mode 100644 PyCTBN/build/lib/classes/optimizers/optimizer.py delete mode 100644 PyCTBN/build/lib/classes/optimizers/tabu_search.py delete mode 100644 PyCTBN/build/lib/classes/structure_graph/__init__.py delete mode 100644 PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py delete mode 100644 PyCTBN/build/lib/classes/structure_graph/network_graph.py delete mode 100644 PyCTBN/build/lib/classes/structure_graph/sample_path.py delete mode 100644 PyCTBN/build/lib/classes/structure_graph/set_of_cims.py delete mode 100644 PyCTBN/build/lib/classes/structure_graph/structure.py delete mode 100644 PyCTBN/build/lib/classes/structure_graph/trajectory.py delete mode 100644 PyCTBN/build/lib/classes/utility/__init__.py delete mode 100644 PyCTBN/build/lib/classes/utility/abstract_importer.py delete mode 100644 PyCTBN/build/lib/classes/utility/cache.py delete mode 100644 PyCTBN/build/lib/classes/utility/json_importer.py delete mode 100644 PyCTBN/build/lib/classes/utility/sample_importer.py delete mode 100644 PyCTBN/build/lib/tests/__init__.py delete mode 100644 coverage copy.xml diff --git a/PyCTBN/MANIFEST.in b/PyCTBN/MANIFEST.in new file mode 100644 index 0000000..3a0d37d --- /dev/null +++ b/PyCTBN/MANIFEST.in @@ -0,0 +1,9 @@ +include MANIFEST.in +include setup.py +include README.rst +prune PyCTBN/test_data +prune PyCTBN/tests +prune tests +prune test_data +prune *tests* +prune *test* \ No newline at end of file diff --git a/PyCTBN/__init__.py b/PyCTBN/__init__.py index e69de29..7adf0d7 100644 --- a/PyCTBN/__init__.py +++ b/PyCTBN/__init__.py @@ -0,0 +1,2 @@ +import PyCTBN.PyCTBN +from PyCTBN.PyCTBN import * diff --git a/PyCTBN/build/lib/PyCTBN/__init__.py b/PyCTBN/build/lib/PyCTBN/__init__.py deleted file mode 100644 index 023c0f1..0000000 --- a/PyCTBN/build/lib/PyCTBN/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -import PyCTBN.estimators -from PyCTBN.estimators import * -import PyCTBN.optimizers -from PyCTBN.optimizers import * -import PyCTBN.structure_graph -from PyCTBN.structure_graph import * -import PyCTBN.utility -from PyCTBN.utility import * \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/estimators/__init__.py b/PyCTBN/build/lib/PyCTBN/estimators/__init__.py deleted file mode 100644 index 112086f..0000000 --- a/PyCTBN/build/lib/PyCTBN/estimators/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .fam_score_calculator import FamScoreCalculator -from .parameters_estimator import ParametersEstimator -from .structure_estimator import StructureEstimator -from .structure_constraint_based_estimator import StructureConstraintBasedEstimator -from .structure_score_based_estimator import StructureScoreBasedEstimator diff --git a/PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py b/PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py deleted file mode 100644 index 5b0b591..0000000 --- a/PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py +++ /dev/null @@ -1,272 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph - -from math import log - -from scipy.special import loggamma -from random import choice - -from ..structure_graph.set_of_cims import SetOfCims -from ..structure_graph.network_graph import NetworkGraph -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix - - -''' - -''' - - -class FamScoreCalculator: - """ - Has the task of calculating the FamScore of a node by using a Bayesian score function - """ - - def __init__(self): - #np.seterr('raise') - pass - - # region theta - - def marginal_likelihood_theta(self, - cims: ConditionalIntensityMatrix, - alpha_xu: float, - alpha_xxu: float): - """ - Calculate the FamScore value of the node identified by the label node_id - - :param cims: np.array with all the node's cims - :type cims: np.array - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type alpha_xu: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta - :rtype: float - """ - return np.sum( - [self.variable_cim_xu_marginal_likelihood_theta(cim, - alpha_xu, - alpha_xxu) - for cim in cims]) - - def variable_cim_xu_marginal_likelihood_theta(self, - cim: ConditionalIntensityMatrix, - alpha_xu: float, - alpha_xxu: float): - """ - Calculate the value of the marginal likelihood over theta given a cim - - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type alpha_xu: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta - :rtype: float - """ - - 'get cim length' - values = len(cim._state_residence_times) - - 'compute the marginal likelihood for the current cim' - return np.sum([ - self.single_cim_xu_marginal_likelihood_theta( - index, - cim, - alpha_xu, - alpha_xxu) - for index in range(values)]) - - def single_cim_xu_marginal_likelihood_theta(self, - index: int, - cim: ConditionalIntensityMatrix, - alpha_xu: float, - alpha_xxu: float): - """ - Calculate the marginal likelihood on q of the node when assumes a specif value - and a specif parents's assignment - - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta when the node assumes a specif value - :rtype: float - """ - - values = list(range(len(cim._state_residence_times))) - - 'remove the index because of the x != x^ condition in the summation ' - values.remove(index) - - 'uncomment for alpha xx not uniform' - #alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) - - return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ - + \ - np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( - cim.state_transition_matrix[index,index_x_first], - alpha_xxu) - for index_x_first in values]) - - - def single_internal_cim_xxu_marginal_likelihood_theta(self, - M_xxu_suff_stats: float, - alpha_xxu: float=1): - """Calculate the second part of the marginal likelihood over theta formula - - :param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u] - :type M_xxu_suff_stats: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta when the node assumes a specif value - :rtype: float - """ - return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu) - - # endregion - - # region q - - def marginal_likelihood_q(self, - cims: np.array, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the value of the marginal likelihood over q of the node identified by the label node_id - - :param cims: np.array with all the node's cims - :type cims: np.array - :param tau_xu: hyperparameter over the CTBN’s q parameters - :type tau_xu: float - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - - - :return: the value of the marginal likelihood over q - :rtype: float - """ - - return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) - - def variable_cim_xu_marginal_likelihood_q(self, - cim: ConditionalIntensityMatrix, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the value of the marginal likelihood over q given a cim - - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param tau_xu: hyperparameter over the CTBN’s q parameters - :type tau_xu: float - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - - - :return: the value of the marginal likelihood over q - :rtype: float - """ - - 'get cim length' - values=len(cim._state_residence_times) - - 'compute the marginal likelihood for the current cim' - return np.sum([ - self.single_cim_xu_marginal_likelihood_q( - cim.state_transition_matrix[index, index], - cim._state_residence_times[index], - tau_xu, - alpha_xu) - for index in range(values)]) - - - def single_cim_xu_marginal_likelihood_q(self, - M_xu_suff_stats: float, - T_xu_suff_stats: float, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the marginal likelihood on q of the node when assumes a specif value - and a specif parents's assignment - - :param M_xu_suff_stats: value of the suffucient statistic M[x|u] - :type M_xxu_suff_stats: float - :param T_xu_suff_stats: value of the suffucient statistic T[x|u] - :type T_xu_suff_stats: float - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param tau_xu: hyperparameter over the CTBN’s q parameters - :type tau_xu: float - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - - - :return: the value of the marginal likelihood of the node when assumes a specif value - :rtype: float - """ - return ( - loggamma(alpha_xu + M_xu_suff_stats + 1) + - (log(tau_xu) - * - (alpha_xu+1)) - ) \ - - \ - (loggamma(alpha_xu + 1)+( - log(tau_xu + T_xu_suff_stats) - * - (alpha_xu + M_xu_suff_stats + 1)) - ) - - # end region - - def get_fam_score(self, - cims: np.array, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the FamScore value of the node - - - :param cims: np.array with all the node's cims - :type cims: np.array - :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type tau_xu: float, optional - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 - :type alpha_xu: float, optional - - - :return: the FamScore value of the node - :rtype: float - """ - #print("------") - #print(self.marginal_likelihood_q(cims, - # tau_xu, - # alpha_xu)) - - #print(self.marginal_likelihood_theta(cims, - # alpha_xu, - # alpha_xxu)) - 'calculate alpha_xxu as a uniform distribution' - alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) - - return self.marginal_likelihood_q(cims, - tau_xu, - alpha_xu) \ - + \ - self.marginal_likelihood_theta(cims, - alpha_xu, - alpha_xxu) diff --git a/PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py deleted file mode 100644 index 4754d58..0000000 --- a/PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py +++ /dev/null @@ -1,143 +0,0 @@ -import sys -sys.path.append('../') -import numpy as np - -from ..structure_graph.network_graph import NetworkGraph -from ..structure_graph.set_of_cims import SetOfCims -from ..structure_graph.trajectory import Trajectory - - -class ParametersEstimator(object): - """Has the task of computing the cims of particular node given the trajectories and the net structure - in the graph ``_net_graph``. - - :param trajectories: the trajectories - :type trajectories: Trajectory - :param net_graph: the net structure - :type net_graph: NetworkGraph - :_single_set_of_cims: the set of cims object that will hold the cims of the node - """ - - def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): - """Constructor Method - """ - self._trajectories = trajectories - self._net_graph = net_graph - self._single_set_of_cims = None - - def fast_init(self, node_id: str) -> None: - """Initializes all the necessary structures for the parameters estimation for the node ``node_id``. - - :param node_id: the node label - :type node_id: string - """ - p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] - node_states_number = self._net_graph.get_states_number(node_id) - self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) - - def compute_parameters_for_node(self, node_id: str) -> SetOfCims: - """Compute the CIMS of the node identified by the label ``node_id``. - - :param node_id: the node label - :type node_id: string - :return: A SetOfCims object filled with the computed CIMS - :rtype: SetOfCims - """ - node_indx = self._net_graph.get_node_indx(node_id) - state_res_times = self._single_set_of_cims._state_residence_times - transition_matrices = self._single_set_of_cims._transition_matrices - ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, - self._trajectories.trajectory, - self._net_graph.time_filtering, - self._net_graph.time_scalar_indexing_strucure, - state_res_times) - ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, - self._net_graph.transition_filtering, - self._net_graph.transition_scalar_indexing_structure, - transition_matrices) - self._single_set_of_cims.build_cims(state_res_times, transition_matrices) - return self._single_set_of_cims - - @staticmethod - def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, - cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, - T: np.ndarray) -> None: - """Compute the state residence times for a node and fill the matrix ``T`` with the results - - :param node_indx: the index of the node - :type node_indx: int - :param times: the times deltas vector - :type times: numpy.array - :param trajectory: the trajectory - :type trajectory: numpy.ndArray - :param cols_filter: the columns filtering structure - :type cols_filter: numpy.array - :param scalar_indexes_struct: the indexing structure - :type scalar_indexes_struct: numpy.array - :param T: the state residence times vectors - :type T: numpy.ndArray - """ - T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) - .astype(np.int), \ - times, - minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) - - @staticmethod - def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, - scalar_indexing: np.ndarray, M: np.ndarray) -> None: - """Compute the state residence times for a node and fill the matrices ``M`` with the results. - - :param node_indx: the index of the node - :type node_indx: int - :param trajectory: the trajectory - :type trajectory: numpy.ndArray - :param cols_filter: the columns filtering structure - :type cols_filter: numpy.array - :param scalar_indexing: the indexing structure - :type scalar_indexing: numpy.array - :param M: the state transitions matrices - :type M: numpy.ndArray - """ - diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], - dtype=np.int64) - trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] - M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) - , minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) - M_raveled = M.ravel() - M_raveled[diag_indices] = 0 - M_raveled[diag_indices] = np.sum(M, axis=2).ravel() - - def init_sets_cims_container(self): - self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, - self.net_graph.nodes_values, - self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), - self.net_graph.p_combs) - - def compute_parameters(self): - #print(self.net_graph.get_nodes()) - #print(self.amalgamated_cims_struct.sets_of_cims) - #enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) - for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): - #print(self.net_graph.time_filtering[indx]) - #print(self.net_graph.time_scalar_indexing_strucure[indx]) - self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, - self.sample_path.trajectories.trajectory, - self.net_graph.time_filtering[indx], - self.net_graph.time_scalar_indexing_strucure[indx], - aggr[1]._state_residence_times) - #print(self.net_graph.transition_filtering[indx]) - #print(self.net_graph.transition_scalar_indexing_structure[indx]) - self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), - self.sample_path.trajectories.complete_trajectory, - self.net_graph.transition_filtering[indx], - self.net_graph.transition_scalar_indexing_structure[indx], - aggr[1]._transition_matrices) - aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) - - - - - - - - diff --git a/PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py deleted file mode 100644 index 7d5721e..0000000 --- a/PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py +++ /dev/null @@ -1,238 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph -import os -from scipy.stats import chi2 as chi2_dist -from scipy.stats import f as f_dist -from tqdm import tqdm - -from ..utility.cache import Cache -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix -from ..structure_graph.network_graph import NetworkGraph -from .parameters_estimator import ParametersEstimator -from .structure_estimator import StructureEstimator -from ..structure_graph.sample_path import SamplePath -from ..structure_graph.structure import Structure -from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer - -import concurrent.futures - - - -import multiprocessing -from multiprocessing import Pool - - -class StructureConstraintBasedEstimator(StructureEstimator): - """ - Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. - - :param sample_path: the _sample_path object containing the trajectories and the real structure - :type sample_path: SamplePath - :param exp_test_alfa: the significance level for the exponential Hp test - :type exp_test_alfa: float - :param chi_test_alfa: the significance level for the chi Hp test - :type chi_test_alfa: float - :_nodes: the nodes labels - :_nodes_vals: the nodes cardinalities - :_nodes_indxs: the nodes indexes - :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` - :_cache: the Cache object - """ - - def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): - super().__init__(sample_path,known_edges) - self._exp_test_sign = exp_test_alfa - self._chi_test_alfa = chi_test_alfa - self._thumb_threshold = thumb_threshold - self._cache = Cache() - - def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, - tot_vars_count: int, parent_indx, child_indx) -> bool: - """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} - G2 = {G1 U test_parent} (added as an additional parent of the test_child). - Generates all the necessary structures and datas to perform the tests. - - :param test_parent: the node label of the test parent - :type test_parent: string - :param test_child: the node label of the child - :type test_child: string - :param parent_set: the common parent set - :type parent_set: List - :param child_states_numb: the cardinality of the ``test_child`` - :type child_states_numb: int - :param tot_vars_count: the total number of variables in the net - :type tot_vars_count: int - :return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise - :rtype: bool - """ - p_set = parent_set[:] - complete_info = parent_set[:] - complete_info.append(test_child) - - parents = np.array(parent_set) - parents = np.append(parents, test_parent) - sorted_parents = self._nodes[np.isin(self._nodes, parents)] - cims_filter = sorted_parents != test_parent - - p_set.insert(0, test_parent) - sofc2 = self._cache.find(set(p_set)) - - if not sofc2: - complete_info.append(test_parent) - bool_mask2 = np.isin(self._nodes, complete_info) - l2 = list(self._nodes[bool_mask2]) - indxs2 = self._nodes_indxs[bool_mask2] - vals2 = self._nodes_vals[bool_mask2] - eds2 = list(itertools.product(p_set, test_child)) - s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) - g2 = NetworkGraph(s2) - g2.fast_init(test_child) - p2 = ParametersEstimator(self._sample_path.trajectories, g2) - p2.fast_init(test_child) - sofc2 = p2.compute_parameters_for_node(test_child) - self._cache.put(set(p_set), sofc2) - - del p_set[0] - sofc1 = self._cache.find(set(p_set)) - if not sofc1: - g2.remove_node(test_parent) - g2.fast_init(test_child) - p2 = ParametersEstimator(self._sample_path.trajectories, g2) - p2.fast_init(test_child) - sofc1 = p2.compute_parameters_for_node(test_child) - self._cache.put(set(p_set), sofc1) - thumb_value = 0.0 - if child_states_numb > 2: - parent_val = self._sample_path.structure.get_states_number(test_parent) - bool_mask_vals = np.isin(self._nodes, parent_set) - parents_vals = self._nodes_vals[bool_mask_vals] - thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) - for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): - cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) - for cim2 in cond_cims: - if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): - return False - return True - - def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, - cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: - """Compute the actual independence test using two cims. - It is performed first the exponential test and if the null hypothesis is not rejected, - it is performed also the chi_test. - - :param child_states_numb: the cardinality of the test child - :type child_states_numb: int - :param cim1: a cim belonging to the graph without test parent - :type cim1: ConditionalIntensityMatrix - :param cim2: a cim belonging to the graph with test parent - :type cim2: ConditionalIntensityMatrix - :return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. - :rtype: bool - """ - M1 = cim1.state_transition_matrix - M2 = cim2.state_transition_matrix - r1s = M1.diagonal() - r2s = M2.diagonal() - C1 = cim1.cim - C2 = cim2.cim - if child_states_numb > 2: - if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: - self._removable_edges_matrix[parent_indx][child_indx] = False - return False - F_stats = C2.diagonal() / C1.diagonal() - exp_alfa = self._exp_test_sign - for val in range(0, child_states_numb): - if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ - F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): - return False - M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) - M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( - M2.shape[0], -1) - chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) - Ks = np.sqrt(r1s / r2s) - Ls = np.sqrt(r2s / r1s) - for val in range(0, child_states_numb): - Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / - (M1_no_diag[val] + M2_no_diag[val])) - if Chi > chi_2_quantile: - return False - return True - - def compute_thumb_value(self, parent_val, child_val, parent_set_vals): - """Compute the value to test against the thumb_threshold. - - :param parent_val: test parent's variable cardinality - :type parent_val: int - :param child_val: test child's variable cardinality - :type child_val: int - :param parent_set_vals: the cardinalities of the nodes in the current sep-set - :type parent_set_vals: List - :return: the thumb value for the current independence test - :rtype: int - """ - df = (child_val - 1) ** 2 - df = df * parent_val - for v in parent_set_vals: - df = df * v - return df - - def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: - """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. - - :param var_id: the node label of the test child - :type var_id: string - """ - optimizer_obj = ConstraintBasedOptimizer( - node_id = var_id, - structure_estimator = self, - tot_vars_count = tot_vars_count) - return optimizer_obj.optimize_structure() - - - def ctpc_algorithm(self,disable_multiprocessing:bool= False ): - """Compute the CTPC algorithm over the entire net. - """ - ctpc_algo = self.one_iteration_of_CTPC_algorithm - total_vars_numb = self._sample_path.total_variables_count - - n_nodes= len(self._nodes) - - total_vars_numb_array = [total_vars_numb] * n_nodes - - 'get the number of CPU' - cpu_count = multiprocessing.cpu_count() - - - - 'Remove all the edges from the structure' - self._sample_path.structure.clean_structure_edges() - - 'Estimate the best parents for each node' - #with multiprocessing.Pool(processes=cpu_count) as pool: - #with get_context("spawn").Pool(processes=cpu_count) as pool: - if disable_multiprocessing: - print("DISABILITATO") - cpu_count = 1 - list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] - else: - with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: - list_edges_partial = executor.map(ctpc_algo, - self._nodes, - total_vars_numb_array) - #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] - - return set(itertools.chain.from_iterable(list_edges_partial)) - - - def estimate_structure(self,disable_multiprocessing:bool=False): - return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) - - - - diff --git a/PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py deleted file mode 100644 index fbf8ea9..0000000 --- a/PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py +++ /dev/null @@ -1,187 +0,0 @@ - -import itertools -import json -import typing - -import matplotlib.pyplot as plt -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph - -from abc import ABC - -import abc - -from ..utility.cache import Cache -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix -from ..structure_graph.network_graph import NetworkGraph -from .parameters_estimator import ParametersEstimator -from ..structure_graph.sample_path import SamplePath -from ..structure_graph.structure import Structure - - -class StructureEstimator(object): - """Has the task of estimating the network structure given the trajectories in ``samplepath``. - - :param sample_path: the _sample_path object containing the trajectories and the real structure - :type sample_path: SamplePath - :_nodes: the nodes labels - :_nodes_vals: the nodes cardinalities - :_nodes_indxs: the nodes indexes - :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` - """ - - def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): - self._sample_path = sample_path - self._nodes = np.array(self._sample_path.structure.nodes_labels) - self._nodes_vals = self._sample_path.structure.nodes_values - self._nodes_indxs = self._sample_path.structure.nodes_indexes - self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) - self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) - - - def build_removable_edges_matrix(self, known_edges: typing.List): - """Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: - - :param known_edges: the list of nodes labels - :type known_edges: List - :return: a boolean matrix - :rtype: np.ndarray - """ - tot_vars_count = self._sample_path.total_variables_count - complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) - if known_edges: - for edge in known_edges: - i = self._sample_path.structure.get_node_indx(edge[0]) - j = self._sample_path.structure.get_node_indx(edge[1]) - complete_adj_matrix[i][j] = False - return complete_adj_matrix - - @staticmethod - def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: - """Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: - - :param node_ids: the list of nodes labels - :type node_ids: List - :return: a complete Digraph Object - :rtype: networkx.DiGraph - """ - complete_graph = nx.DiGraph() - complete_graph.add_nodes_from(node_ids) - complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) - return complete_graph - - - @staticmethod - def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): - """Creates a list containing all possible subsets of the list ``u`` of size ``size``, - that do not contains a the node identified by ``parent_label``. - - :param u: the list of nodes - :type u: List - :param size: the size of the subsets - :type size: int - :param parent_label: the node to exclude in the subsets generation - :type parent_label: string - :return: an Iterator Object containing a list of lists - :rtype: Iterator - """ - list_without_test_parent = u[:] - list_without_test_parent.remove(parent_label) - return map(list, itertools.combinations(list_without_test_parent, size)) - - def save_results(self) -> None: - """Save the estimated Structure to a .json file in the path where the data are loaded from. - The file is named as the input dataset but the `results_` word is appended to the results file. - """ - res = json_graph.node_link_data(self._complete_graph) - name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] - name = name.split('.', 1)[0] - name += '_' + str(self._sample_path._importer.dataset_id()) - name += '.json' - file_name = 'results_' + name - with open(file_name, 'w') as f: - json.dump(res, f) - - - def remove_diagonal_elements(self, matrix): - m = matrix.shape[0] - strided = np.lib.stride_tricks.as_strided - s0, s1 = matrix.strides - return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) - - - @abc.abstractmethod - def estimate_structure(self) -> typing.List: - """Abstract method to estimate the structure - - :return: List of estimated edges - :rtype: Typing.List - """ - pass - - - def adjacency_matrix(self) -> np.ndarray: - """Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. - - :return: The adjacency matrix of the graph ``_complete_graph`` - :rtype: numpy.ndArray - """ - return nx.adj_matrix(self._complete_graph).toarray().astype(bool) - - def spurious_edges(self) -> typing.List: - """Return the spurious edges present in the estimated structure, if a prior net structure is present in - ``_sample_path.structure``. - - :return: A list containing the spurious edges - :rtype: List - """ - if not self._sample_path.has_prior_net_structure: - raise RuntimeError("Can not compute spurious edges with no prior net structure!") - real_graph = nx.DiGraph() - real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) - real_graph.add_edges_from(self._sample_path.structure.edges) - return nx.difference(real_graph, self._complete_graph).edges - - def save_plot_estimated_structure_graph(self) -> None: - """Plot the estimated structure in a graphical model style. - Spurious edges are colored in red. - """ - graph_to_draw = nx.DiGraph() - spurious_edges = self.spurious_edges() - non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) - print(non_spurious_edges) - edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] - graph_to_draw.add_edges_from(spurious_edges) - graph_to_draw.add_edges_from(non_spurious_edges) - pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) - options = { - "node_size": 2000, - "node_color": "white", - "edgecolors": "black", - 'linewidths':2, - "with_labels":True, - "font_size":13, - 'connectionstyle': 'arc3, rad = 0.1', - "arrowsize": 15, - "arrowstyle": '<|-', - "width": 1, - "edge_color":edges_colors, - } - - nx.draw(graph_to_draw, pos, **options) - ax = plt.gca() - ax.margins(0.20) - plt.axis("off") - name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] - name = name.split('.', 1)[0] - name += '_' + str(self._sample_path._importer.dataset_id()) - name += '.png' - plt.savefig(name) - plt.clf() - print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) - - - - - diff --git a/PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py b/PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py deleted file mode 100644 index 2903db3..0000000 --- a/PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py +++ /dev/null @@ -1,244 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph - -from random import choice - -import concurrent.futures - -import copy - -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix -from ..structure_graph.network_graph import NetworkGraph -from .parameters_estimator import ParametersEstimator -from .structure_estimator import StructureEstimator -from ..structure_graph.sample_path import SamplePath -from ..structure_graph.structure import Structure -from .fam_score_calculator import FamScoreCalculator -from ..optimizers.hill_climbing_search import HillClimbing -from ..optimizers.tabu_search import TabuSearch - - -import multiprocessing -from multiprocessing import Pool - - - - -class StructureScoreBasedEstimator(StructureEstimator): - """ - Has the task of estimating the network structure given the trajectories in samplepath by - using a score based approach. - - :param sample_path: the _sample_path object containing the trajectories and the real structure - :type sample_path: SamplePath - :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type tau_xu: float, optional - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 - :type alpha_xu: float, optional - :param known_edges: List of known edges, default to [] - :type known_edges: List, optional - - """ - - def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): - super().__init__(sample_path,known_edges) - self.tau_xu=tau_xu - self.alpha_xu=alpha_xu - - - def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, - patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None, - optimizer: str = 'tabu',disable_multiprocessing:bool= False ): - """ - Compute the score-based algorithm to find the optimal structure - - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None - :type tabu_length: int, optional - :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None - :type tabu_rules_duration: int, optional - :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' - :type optimizer: string, optional - :param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False - :type disable_multiprocessing: Boolean, optional - """ - 'Save the true edges structure in tuples' - true_edges = copy.deepcopy(self._sample_path.structure.edges) - true_edges = set(map(tuple, true_edges)) - - 'Remove all the edges from the structure' - self._sample_path.structure.clean_structure_edges() - - estimate_parents = self.estimate_parents - - n_nodes= len(self._nodes) - - l_max_parents= [max_parents] * n_nodes - l_iterations_number = [iterations_number] * n_nodes - l_patience = [patience] * n_nodes - l_tabu_length = [tabu_length] * n_nodes - l_tabu_rules_duration = [tabu_rules_duration] * n_nodes - l_optimizer = [optimizer] * n_nodes - - - 'get the number of CPU' - cpu_count = multiprocessing.cpu_count() - print(f"CPU COUNT: {cpu_count}") - - if disable_multiprocessing: - cpu_count = 1 - - - - - - #with get_context("spawn").Pool(processes=cpu_count) as pool: - #with multiprocessing.Pool(processes=cpu_count) as pool: - - 'Estimate the best parents for each node' - if disable_multiprocessing: - list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] - else: - with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: - list_edges_partial = executor.map(estimate_parents, - self._nodes, - l_max_parents, - l_iterations_number, - l_patience, - l_tabu_length, - l_tabu_rules_duration, - l_optimizer) - - - - #list_edges_partial = p.map(estimate_parents, self._nodes) - #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) - - 'Concatenate all the edges list' - set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) - - #print('-------------------------') - - - 'calculate precision and recall' - n_missing_edges = 0 - n_added_fake_edges = 0 - - try: - n_added_fake_edges = len(set_list_edges.difference(true_edges)) - - n_missing_edges = len(true_edges.difference(set_list_edges)) - - n_true_positive = len(true_edges) - n_missing_edges - - precision = n_true_positive / (n_true_positive + n_added_fake_edges) - - recall = n_true_positive / (n_true_positive + n_missing_edges) - - - # print(f"n archi reali non trovati: {n_missing_edges}") - # print(f"n archi non reali aggiunti: {n_added_fake_edges}") - print(true_edges) - print(set_list_edges) - print(f"precision: {precision} ") - print(f"recall: {recall} ") - except Exception as e: - print(f"errore: {e}") - - return set_list_edges - - - def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, - patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, - optimizer:str = 'hill' ): - """ - Use the FamScore of a node in order to find the best parent nodes - - :param node_id: current node's id - :type node_id: string - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None - :type tabu_length: int, optional - :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None - :type tabu_rules_duration: int, optional - :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' - :type optimizer: string, optional - - :return: A list of the best edges for the currente node - :rtype: List - """ - - "choose the optimizer algotithm" - if optimizer == 'tabu': - optimizer = TabuSearch( - node_id = node_id, - structure_estimator = self, - max_parents = max_parents, - iterations_number = iterations_number, - patience = patience, - tabu_length = tabu_length, - tabu_rules_duration = tabu_rules_duration) - else: #if optimizer == 'hill': - optimizer = HillClimbing( - node_id = node_id, - structure_estimator = self, - max_parents = max_parents, - iterations_number = iterations_number, - patience = patience) - - "call the optmizer's function that calculates the current node's parents" - return optimizer.optimize_structure() - - - def get_score_from_graph(self, - graph: NetworkGraph, - node_id:str): - """ - Get the FamScore of a node - - :param node_id: current node's id - :type node_id: string - :param graph: current graph to be computed - :type graph: class:'NetworkGraph' - - - :return: The FamSCore for this graph structure - :rtype: float - """ - - 'inizialize the graph for a single node' - graph.fast_init(node_id) - - params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) - - 'Inizialize and compute parameters for node' - params_estimation.fast_init(node_id) - SoCims = params_estimation.compute_parameters_for_node(node_id) - - 'calculate the FamScore for the node' - fam_score_obj = FamScoreCalculator() - - score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) - - #print(f" lo score per {node_id} risulta: {score} ") - return score - - - - diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/__init__.py b/PyCTBN/build/lib/PyCTBN/optimizers/__init__.py deleted file mode 100644 index 4162bf1..0000000 --- a/PyCTBN/build/lib/PyCTBN/optimizers/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .optimizer import Optimizer -from .tabu_search import TabuSearch -from .hill_climbing_search import HillClimbing -from .constraint_based_optimizer import ConstraintBasedOptimizer \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py b/PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py deleted file mode 100644 index 65bc19c..0000000 --- a/PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py +++ /dev/null @@ -1,87 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -from random import choice - -from abc import ABC - -import copy - - -from .optimizer import Optimizer -from ..estimators.structure_estimator import StructureEstimator -from ..structure_graph.network_graph import NetworkGraph - - -class ConstraintBasedOptimizer(Optimizer): - """ - Optimizer class that implement a CTPC Algorithm - - :param node_id: current node's id - :type node_id: string - :param structure_estimator: a structure estimator object with the information about the net - :type structure_estimator: class:'StructureEstimator' - :param tot_vars_count: number of variables in the dataset - :type tot_vars_count: int - """ - def __init__(self, - node_id:str, - structure_estimator: StructureEstimator, - tot_vars_count:int - ): - """ - Constructor - """ - super().__init__(node_id, structure_estimator) - self.tot_vars_count = tot_vars_count - - - - def optimize_structure(self): - """ - Compute Optimization process for a structure_estimator by using a CTPC Algorithm - - :return: the estimated structure for the node - :rtype: List - """ - print("##################TESTING VAR################", self.node_id) - - graph = NetworkGraph(self.structure_estimator._sample_path.structure) - - other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] - - for possible_parent in other_nodes: - graph.add_edges([(possible_parent,self.node_id)]) - - - u = other_nodes - #tests_parents_numb = len(u) - #complete_frame = self.complete_graph_frame - #test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])] - child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) - b = 0 - while b < len(u): - parent_indx = 0 - while parent_indx < len(u): - removed = False - test_parent = u[parent_indx] - i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) - j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) - if self.structure_estimator._removable_edges_matrix[i][j]: - S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) - for parents_set in S: - if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): - graph.remove_edges([(test_parent, self.node_id)]) - u.remove(test_parent) - removed = True - break - if not removed: - parent_indx += 1 - b += 1 - self.structure_estimator._cache.clear() - return graph.edges \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py b/PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py deleted file mode 100644 index 6783be0..0000000 --- a/PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py +++ /dev/null @@ -1,135 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -from random import choice - -from abc import ABC - - -from .optimizer import Optimizer -from ..estimators.structure_estimator import StructureEstimator -from ..structure_graph.network_graph import NetworkGraph - - -class HillClimbing(Optimizer): - """ - Optimizer class that implement Hill Climbing Search - - - :param node_id: current node's id - :type node_id: string - :param structure_estimator: a structure estimator object with the information about the net - :type structure_estimator: class:'StructureEstimator' - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - - - - """ - def __init__(self, - node_id:str, - structure_estimator: StructureEstimator, - max_parents:int = None, - iterations_number:int= 40, - patience:int = None - ): - """ - Constructor - """ - super().__init__(node_id, structure_estimator) - self.max_parents = max_parents - self.iterations_number = iterations_number - self.patience = patience - - - - def optimize_structure(self) -> typing.List: - """ - Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm - - :return: the estimated structure for the node - :rtype: List - """ - - #'Create the graph for the single node' - graph = NetworkGraph(self.structure_estimator._sample_path.structure) - - 'get the index for the current node' - node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) - - 'list of prior edges' - prior_parents = set() - - 'Add the edges from prior knowledge' - for i in range(len(self.structure_estimator._removable_edges_matrix)): - if not self.structure_estimator._removable_edges_matrix[i][node_index]: - parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) - prior_parents.add(parent_id) - - 'Add the node to the starting structure' - graph.add_edges([(parent_id, self.node_id)]) - - - - 'get all the possible parents' - other_nodes = [node for node in - self.structure_estimator._sample_path.structure.nodes_labels if - node != self.node_id and - not prior_parents.__contains__(node)] - - actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - patince_count = 0 - for i in range(self.iterations_number): - 'choose a new random edge' - current_new_parent = choice(other_nodes) - current_edge = (current_new_parent,self.node_id) - added = False - parent_removed = None - - - if graph.has_edge(current_edge): - graph.remove_edges([current_edge]) - else: - 'check the max_parents constraint' - if self.max_parents is not None: - parents_list = graph.get_parents_by_id(self.node_id) - if len(parents_list) >= self.max_parents : - parent_removed = (choice(parents_list), self.node_id) - graph.remove_edges([parent_removed]) - graph.add_edges([current_edge]) - added = True - #print('**************************') - current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - - if current_score > actual_best_score: - 'update current best score' - actual_best_score = current_score - patince_count = 0 - else: - 'undo the last update' - if added: - graph.remove_edges([current_edge]) - 'If a parent was removed, add it again to the graph' - if parent_removed is not None: - graph.add_edges([parent_removed]) - else: - graph.add_edges([current_edge]) - 'update patience count' - patince_count += 1 - - if self.patience is not None and patince_count > self.patience: - break - - print(f"finito variabile: {self.node_id}") - return graph.edges \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py b/PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py deleted file mode 100644 index 36445c0..0000000 --- a/PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py +++ /dev/null @@ -1,39 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -import abc - -from ..estimators.structure_estimator import StructureEstimator - - - -class Optimizer(abc.ABC): - """ - Interface class for all the optimizer's child PyCTBN - - :param node_id: the node label - :type node_id: string - :param structure_estimator: A structureEstimator Object to predict the structure - :type structure_estimator: class:'StructureEstimator' - - """ - - def __init__(self, node_id:str, structure_estimator: StructureEstimator): - self.node_id = node_id - self.structure_estimator = structure_estimator - - - @abc.abstractmethod - def optimize_structure(self) -> typing.List: - """ - Compute Optimization process for a structure_estimator - - :return: the estimated structure for the node - :rtype: List - """ - pass diff --git a/PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py b/PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py deleted file mode 100644 index e15dd40..0000000 --- a/PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py +++ /dev/null @@ -1,199 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -from random import choice,sample - -from abc import ABC - - -from .optimizer import Optimizer -from ..estimators.structure_estimator import StructureEstimator -from ..structure_graph.network_graph import NetworkGraph - -import queue - - -class TabuSearch(Optimizer): - """ - Optimizer class that implement Tabu Search - - - :param node_id: current node's id - :type node_id: string - :param structure_estimator: a structure estimator object with the information about the net - :type structure_estimator: class:'StructureEstimator' - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None - :type tabu_length: int, optional - :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None - :type tabu_rules_duration: int, optional - - - """ - def __init__(self, - node_id:str, - structure_estimator: StructureEstimator, - max_parents:int = None, - iterations_number:int= 40, - patience:int = None, - tabu_length:int = None, - tabu_rules_duration = None - ): - """ - Constructor - """ - super().__init__(node_id, structure_estimator) - self.max_parents = max_parents - self.iterations_number = iterations_number - self.patience = patience - self.tabu_length = tabu_length - self.tabu_rules_duration = tabu_rules_duration - - - def optimize_structure(self) -> typing.List: - """ - Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm - - :return: the estimated structure for the node - :rtype: List - """ - print(f"tabu search is processing the structure of {self.node_id}") - - 'Create the graph for the single node' - graph = NetworkGraph(self.structure_estimator._sample_path.structure) - - 'get the index for the current node' - node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) - - 'list of prior edges' - prior_parents = set() - - 'Add the edges from prior knowledge' - for i in range(len(self.structure_estimator._removable_edges_matrix)): - if not self.structure_estimator._removable_edges_matrix[i][node_index]: - parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) - prior_parents.add(parent_id) - - 'Add the node to the starting structure' - graph.add_edges([(parent_id, self.node_id)]) - - - - 'get all the possible parents' - other_nodes = set([node for node in - self.structure_estimator._sample_path.structure.nodes_labels if - node != self.node_id and - not prior_parents.__contains__(node)]) - - 'calculate the score for the node without parents' - actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - - 'initialize tabu_length and tabu_rules_duration if None' - if self.tabu_length is None: - self.tabu_length = len(other_nodes) - - if self.tabu_rules_duration is None: - self.tabu_tabu_rules_durationength = len(other_nodes) - - 'inizialize the data structures' - tabu_set = set() - tabu_queue = queue.Queue() - - patince_count = 0 - tabu_count = 0 - for i in range(self.iterations_number): - - current_possible_nodes = other_nodes.difference(tabu_set) - - 'choose a new random edge according to tabu restiction' - if(len(current_possible_nodes) > 0): - current_new_parent = sample(current_possible_nodes,k=1)[0] - else: - current_new_parent = tabu_queue.get() - tabu_set.remove(current_new_parent) - - - - current_edge = (current_new_parent,self.node_id) - added = False - parent_removed = None - - if graph.has_edge(current_edge): - graph.remove_edges([current_edge]) - else: - 'check the max_parents constraint' - if self.max_parents is not None: - parents_list = graph.get_parents_by_id(self.node_id) - if len(parents_list) >= self.max_parents : - parent_removed = (choice(parents_list), self.node_id) - graph.remove_edges([parent_removed]) - graph.add_edges([current_edge]) - added = True - #print('**************************') - current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - - # print("-------------------------------------------") - # print(f"Current new parent: {current_new_parent}") - # print(f"Current score: {current_score}") - # print(f"Current best score: {actual_best_score}") - # print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") - # print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") - # print(f"graph edges: {graph.edges}") - - # print("-------------------------------------------") - # input() - if current_score > actual_best_score: - 'update current best score' - actual_best_score = current_score - patince_count = 0 - 'update tabu list' - - - else: - 'undo the last update' - if added: - graph.remove_edges([current_edge]) - 'If a parent was removed, add it again to the graph' - if parent_removed is not None: - graph.add_edges([parent_removed]) - else: - graph.add_edges([current_edge]) - 'update patience count' - patince_count += 1 - - - if tabu_queue.qsize() >= self.tabu_length: - current_removed = tabu_queue.get() - tabu_set.remove(current_removed) - 'Add the node on the tabu list' - tabu_queue.put(current_new_parent) - tabu_set.add(current_new_parent) - - tabu_count += 1 - - 'Every tabu_rules_duration step remove an item from the tabu list ' - if tabu_count % self.tabu_rules_duration == 0: - if tabu_queue.qsize() > 0: - current_removed = tabu_queue.get() - tabu_set.remove(current_removed) - tabu_count = 0 - else: - tabu_count = 0 - - if self.patience is not None and patince_count > self.patience: - break - - print(f"finito variabile: {self.node_id}") - return graph.edges \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py b/PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py deleted file mode 100644 index 85f18a2..0000000 --- a/PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .conditional_intensity_matrix import ConditionalIntensityMatrix -from .network_graph import NetworkGraph -from .sample_path import SamplePath -from .set_of_cims import SetOfCims -from .structure import Structure -from .trajectory import Trajectory \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py b/PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py deleted file mode 100644 index 4abfdd0..0000000 --- a/PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np - - -class ConditionalIntensityMatrix(object): - """Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector - and state transition matrix and the actual CIM matrix. - - :param state_residence_times: state residence times vector - :type state_residence_times: numpy.array - :param state_transition_matrix: the transitions count matrix - :type state_transition_matrix: numpy.ndArray - :_cim: the actual cim of the node - """ - def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): - """Constructor Method - """ - self._state_residence_times = state_residence_times - self._state_transition_matrix = state_transition_matrix - self._cim = self.state_transition_matrix.astype(np.float64) - - def compute_cim_coefficients(self) -> None: - """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. - The class member ``_cim`` will contain the computed cim - """ - np.fill_diagonal(self._cim, self._cim.diagonal() * -1) - self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T - - @property - def state_residence_times(self) -> np.ndarray: - return self._state_residence_times - - @property - def state_transition_matrix(self) -> np.ndarray: - return self._state_transition_matrix - - @property - def cim(self) -> np.ndarray: - return self._cim - - def __repr__(self): - return 'CIM:\n' + str(self.cim) - diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py b/PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py deleted file mode 100644 index 623981d..0000000 --- a/PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py +++ /dev/null @@ -1,293 +0,0 @@ - -import typing - -import networkx as nx -import numpy as np - -from .structure import Structure - - -class NetworkGraph(object): - """Abstracts the infos contained in the Structure class in the form of a directed graph. - Has the task of creating all the necessary filtering and indexing structures for parameters estimation - - :param graph_struct: the ``Structure`` object from which infos about the net will be extracted - :type graph_struct: Structure - :_graph: directed graph - :_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos - about every parents of the node of which all the indexing and filtering structures will be constructed. - :_time_scalar_indexing_structure: the indexing structure for state res time estimation - :_transition_scalar_indexing_structure: the indexing structure for transition computation - :_time_filtering: the columns filtering structure used in the computation of the state res times - :_transition_filtering: the columns filtering structure used in the computation of the transition - from one state to another - :_p_combs_structure: all the possible parents states combination for the node of interest - """ - - def __init__(self, graph_struct: Structure): - """Constructor Method - """ - self._graph_struct = graph_struct - self._graph = nx.DiGraph() - self._aggregated_info_about_nodes_parents = None - self._time_scalar_indexing_structure = None - self._transition_scalar_indexing_structure = None - self._time_filtering = None - self._transition_filtering = None - self._p_combs_structure = None - - def init_graph(self): - self.add_nodes(self._nodes_labels) - self.add_edges(self.graph_struct.edges) - self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() - self._fancy_indexing = self.build_fancy_indexing_structure(0) - self.build_scalar_indexing_structures() - self.build_time_columns_filtering_structure() - self.build_transition_columns_filtering_structure() - self._p_combs_structure = self.build_p_combs_structure() - - def fast_init(self, node_id: str) -> None: - """Initializes all the necessary structures for parameters estimation of the node identified by the label - node_id - - :param node_id: the label of the node - :type node_id: string - """ - self.add_nodes(self._graph_struct.nodes_labels) - self.add_edges(self._graph_struct.edges) - self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) - p_indxs = self._aggregated_info_about_nodes_parents[1] - p_vals = self._aggregated_info_about_nodes_parents[2] - node_states = self.get_states_number(node_id) - node_indx = self.get_node_indx(node_id) - cols_number = self._graph_struct.total_variables_number - self._time_scalar_indexing_structure = NetworkGraph.\ - build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) - self._transition_scalar_indexing_structure = NetworkGraph.\ - build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) - self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) - self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) - self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) - - def add_nodes(self, list_of_nodes: typing.List) -> None: - """Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. - Sets all the properties that identify a nodes (index, positional index, cardinality) - - :param list_of_nodes: the nodes to add to ``_graph`` - :type list_of_nodes: List - """ - nodes_indxs = self._graph_struct.nodes_indexes - nodes_vals = self._graph_struct.nodes_values - pos = 0 - for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): - self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) - pos += 1 - - def has_edge(self,edge:tuple)-> bool: - """ - Check if the graph contains a specific edge - - Parameters: - edge: a tuple that rappresents the edge - Returns: - bool - """ - return self._graph.has_edge(edge[0],edge[1]) - - def add_edges(self, list_of_edges: typing.List) -> None: - """Add the edges to the ``_graph`` contained in the list ``list_of_edges``. - - :param list_of_edges: the list containing of tuples containing the edges - :type list_of_edges: List - """ - self._graph.add_edges_from(list_of_edges) - - def remove_node(self, node_id: str) -> None: - """Remove the node ``node_id`` from all the class members. - Initialize all the filtering/indexing structures. - """ - self._graph.remove_node(node_id) - self._graph_struct.remove_node(node_id) - self.clear_indexing_filtering_structures() - - def clear_indexing_filtering_structures(self) -> None: - """Initialize all the filtering/indexing structures. - """ - self._aggregated_info_about_nodes_parents = None - self._time_scalar_indexing_structure = None - self._transition_scalar_indexing_structure = None - self._time_filtering = None - self._transition_filtering = None - self._p_combs_structure = None - - def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: - """Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely - (parents_labels, parents_indexes, parents_cardinalities). - - :param node: the label of the node - :type node: string - :return: a tuple containing all the parent set infos - :rtype: Tuple - """ - parents = self.get_parents_by_id(node) - nodes = self._graph_struct.nodes_labels - d = {v: i for i, v in enumerate(nodes)} - sorted_parents = sorted(parents, key=lambda v: d[v]) - get_node_indx = self.get_node_indx - p_indxes = [get_node_indx(node) for node in sorted_parents] - p_values = [self.get_states_number(node) for node in sorted_parents] - return sorted_parents, p_indxes, p_values - - def remove_edges(self, list_of_edges: typing.List) -> None: - """Remove the edges to the graph contained in the list list_of_edges. - - :param list_of_edges: The edges to remove from the graph - :type list_of_edges: List - """ - self._graph.remove_edges_from(list_of_edges) - - @staticmethod - def build_time_scalar_indexing_structure_for_a_node(node_states: int, - parents_vals: typing.List) -> np.ndarray: - """Builds an indexing structure for the computation of state residence times values. - - :param node_states: the node cardinality - :type node_states: int - :param parents_vals: the caridinalites of the node's parents - :type parents_vals: List - :return: The time indexing structure - :rtype: numpy.ndArray - """ - T_vector = np.array([node_states]) - T_vector = np.append(T_vector, parents_vals) - T_vector = T_vector.cumprod().astype(np.int) - return T_vector - - @staticmethod - def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ - -> np.ndarray: - """Builds an indexing structure for the computation of state transitions values. - - :param node_states_number: the node cardinality - :type node_states_number: int - :param parents_vals: the caridinalites of the node's parents - :type parents_vals: List - :return: The transition indexing structure - :rtype: numpy.ndArray - """ - M_vector = np.array([node_states_number, - node_states_number]) - M_vector = np.append(M_vector, parents_vals) - M_vector = M_vector.cumprod().astype(np.int) - return M_vector - - @staticmethod - def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: - """ - Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` - in the dataset. - This structute will be used in the computation of the state res times. - :param node_indx: the index of the node - :type node_indx: int - :param p_indxs: the indexes of the node's parents - :type p_indxs: List - :return: The filtering structure for times estimation - :rtype: numpy.ndArray - """ - return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) - - @staticmethod - def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ - -> np.ndarray: - """Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` - in the dataset. - This structure will be used in the computation of the state transitions values. - :param node_indx: the index of the node - :type node_indx: int - :param p_indxs: the indexes of the node's parents - :type p_indxs: List - :param nodes_number: the total number of nodes in the dataset - :type nodes_number: int - :return: The filtering structure for transitions estimation - :rtype: numpy.ndArray - """ - return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) - - @staticmethod - def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: - """ - Builds the combinatorial structure that contains the combinations of all the values contained in - ``parents_values``. - - :param parents_values: the cardinalities of the nodes - :type parents_values: List - :return: A numpy matrix containing a grid of the combinations - :rtype: numpy.ndArray - """ - tmp = [] - for val in parents_values: - tmp.append([x for x in range(val)]) - if len(parents_values) > 0: - parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) - if len(parents_values) > 1: - tmp_comb = parents_comb[:, 1].copy() - parents_comb[:, 1] = parents_comb[:, 0].copy() - parents_comb[:, 0] = tmp_comb - else: - parents_comb = np.array([[]], dtype=np.int) - return parents_comb - - def get_parents_by_id(self, node_id) -> typing.List: - """Returns a list of labels of the parents of the node ``node_id`` - - :param node_id: the node label - :type node_id: string - :return: a List of labels of the parents - :rtype: List - """ - return list(self._graph.predecessors(node_id)) - - def get_states_number(self, node_id) -> int: - return self._graph.nodes[node_id]['val'] - - def get_node_indx(self, node_id) -> int: - return nx.get_node_attributes(self._graph, 'indx')[node_id] - - def get_positional_node_indx(self, node_id) -> int: - return self._graph.nodes[node_id]['pos_indx'] - - @property - def nodes(self) -> typing.List: - return self._graph_struct.nodes_labels - - @property - def edges(self) -> typing.List: - return list(self._graph.edges) - - @property - def nodes_indexes(self) -> np.ndarray: - return self._graph_struct.nodes_indexes - - @property - def nodes_values(self) -> np.ndarray: - return self._graph_struct.nodes_values - - @property - def time_scalar_indexing_strucure(self) -> np.ndarray: - return self._time_scalar_indexing_structure - - @property - def time_filtering(self) -> np.ndarray: - return self._time_filtering - - @property - def transition_scalar_indexing_structure(self) -> np.ndarray: - return self._transition_scalar_indexing_structure - - @property - def transition_filtering(self) -> np.ndarray: - return self._transition_filtering - - @property - def p_combs(self) -> np.ndarray: - return self._p_combs_structure diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py b/PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py deleted file mode 100644 index 80b51d9..0000000 --- a/PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py +++ /dev/null @@ -1,91 +0,0 @@ - - -import numpy as np -import pandas as pd - -from .structure import Structure -from .trajectory import Trajectory -from ..utility.abstract_importer import AbstractImporter - - - -class SamplePath(object): - """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables - cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will - contain the mentioned data. - - :param importer: the Importer object which contains the imported and processed data - :type importer: AbstractImporter - :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories - :_structure: the ``Structure`` Object that will contain all the structural infos about the net - :_total_variables_count: the number of variables in the net - """ - def __init__(self, importer: AbstractImporter): - """Constructor Method - """ - self._importer = importer - if self._importer._df_variables is None or self._importer._concatenated_samples is None: - raise RuntimeError('The importer object has to contain the all processed data!') - if self._importer._df_variables.empty: - raise RuntimeError('The importer object has to contain the all processed data!') - if isinstance(self._importer._concatenated_samples, pd.DataFrame): - if self._importer._concatenated_samples.empty: - raise RuntimeError('The importer object has to contain the all processed data!') - if isinstance(self._importer._concatenated_samples, np.ndarray): - if self._importer._concatenated_samples.size == 0: - raise RuntimeError('The importer object has to contain the all processed data!') - self._trajectories = None - self._structure = None - self._total_variables_count = None - - def build_trajectories(self) -> None: - """Builds the Trajectory object that will contain all the trajectories. - Clears all the unused dataframes in ``_importer`` Object - """ - self._trajectories = \ - Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), - len(self._importer.sorter) + 1) - self._importer.clear_concatenated_frame() - - def build_structure(self) -> None: - """ - Builds the ``Structure`` object that aggregates all the infos about the net. - """ - if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): - raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") - - self._total_variables_count = len(self._importer.sorter) - labels = self._importer.variables.iloc[:, 0].to_list() - indxs = self._importer.variables.index.to_numpy() - vals = self._importer.variables.iloc[:, 1].to_numpy() - if self._importer.structure is None or self._importer.structure.empty: - edges = [] - else: - edges = list(self._importer.structure.to_records(index=False)) - self._structure = Structure(labels, indxs, vals, edges, - self._total_variables_count) - - def clear_memory(self): - self._importer._raw_data = [] - - @property - def trajectories(self) -> Trajectory: - return self._trajectories - - @property - def structure(self) -> Structure: - return self._structure - - @property - def total_variables_count(self) -> int: - return self._total_variables_count - - @property - def has_prior_net_structure(self) -> bool: - return bool(self._structure.edges) - - - - - - diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py b/PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py deleted file mode 100644 index 81caff5..0000000 --- a/PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py +++ /dev/null @@ -1,97 +0,0 @@ - - -import typing - -import numpy as np - -from .conditional_intensity_matrix import ConditionalIntensityMatrix - - -class SetOfCims(object): - """Aggregates all the CIMS of the node identified by the label _node_id. - - :param node_id: the node label - :type node_ind: string - :param parents_states_number: the cardinalities of the parents - :type parents_states_number: List - :param node_states_number: the caridinality of the node - :type node_states_number: int - :param p_combs: the p_comb structure bound to this node - :type p_combs: numpy.ndArray - :_state_residence_time: matrix containing all the state residence time vectors for the node - :_transition_matrices: matrix containing all the transition matrices for the node - :_actual_cims: the cims of the node - """ - - def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): - """Constructor Method - """ - self._node_id = node_id - self._parents_states_number = parents_states_number - self._node_states_number = node_states_number - self._actual_cims = [] - self._state_residence_times = None - self._transition_matrices = None - self._p_combs = p_combs - self.build_times_and_transitions_structures() - - def build_times_and_transitions_structures(self) -> None: - """Initializes at the correct dimensions the state residence times matrix and the state transition matrices. - """ - if not self._parents_states_number: - self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) - self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) - else: - self._state_residence_times = \ - np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) - self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, - self._node_states_number], dtype=np.int) - - def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: - """Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. - Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. - - :param state_res_times: the state residence times matrix - :type state_res_times: numpy.ndArray - :param transition_matrices: the transition matrices - :type transition_matrices: numpy.ndArray - """ - for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): - cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) - cim_to_add.compute_cim_coefficients() - self._actual_cims.append(cim_to_add) - self._actual_cims = np.array(self._actual_cims) - self._transition_matrices = None - self._state_residence_times = None - - def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: - """Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index - ``comb``. - - :param mask_arr: the boolean mask that indicates which parent to consider - :type mask_arr: numpy.array - :param comb: the state/s of the filtered parents - :type comb: numpy.array - :return: Array of ``ConditionalIntensityMatrix`` objects - :rtype: numpy.array - """ - if mask_arr.size <= 1: - return self._actual_cims - else: - flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() - return self._actual_cims[flat_indxs] - - @property - def actual_cims(self) -> np.ndarray: - return self._actual_cims - - @property - def p_combs(self) -> np.ndarray: - return self._p_combs - - def get_cims_number(self): - return len(self._actual_cims) - - - - diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/structure.py b/PyCTBN/build/lib/PyCTBN/structure_graph/structure.py deleted file mode 100644 index a9d60cc..0000000 --- a/PyCTBN/build/lib/PyCTBN/structure_graph/structure.py +++ /dev/null @@ -1,124 +0,0 @@ - -import typing as ty - -import numpy as np - - -class Structure(object): - """Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) - - :param nodes_labels_list: the symbolic names of the variables - :type nodes_labels_list: List - :param nodes_indexes_arr: the indexes of the nodes - :type nodes_indexes_arr: numpy.ndArray - :param nodes_vals_arr: the cardinalites of the nodes - :type nodes_vals_arr: numpy.ndArray - :param edges_list: the edges of the network - :type edges_list: List - :param total_variables_number: the total number of variables in the dataset - :type total_variables_number: int - """ - - def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, - edges_list: ty.List, total_variables_number: int): - """Constructor Method - """ - self._nodes_labels_list = nodes_labels_list - self._nodes_indexes_arr = nodes_indexes_arr - self._nodes_vals_arr = nodes_vals_arr - self._edges_list = edges_list - self._total_variables_number = total_variables_number - - def remove_node(self, node_id: str) -> None: - """Remove the node ``node_id`` from all the class members. - The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. - """ - node_positional_indx = self._nodes_labels_list.index(node_id) - del self._nodes_labels_list[node_positional_indx] - self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) - self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) - self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id - and to_node != node_id)] - - @property - def edges(self) -> ty.List: - return self._edges_list - - @property - def nodes_labels(self) -> ty.List: - return self._nodes_labels_list - - @property - def nodes_indexes(self) -> np.ndarray: - return self._nodes_indexes_arr - - @property - def nodes_values(self) -> np.ndarray: - return self._nodes_vals_arr - - @property - def total_variables_number(self) -> int: - return self._total_variables_number - - def get_node_id(self, node_indx: int) -> str: - """Given the ``node_index`` returns the node label. - - :param node_indx: the node index - :type node_indx: int - :return: the node label - :rtype: string - """ - return self._nodes_labels_list[node_indx] - - def clean_structure_edges(self): - self._edges_list = list() - - def add_edge(self,edge: tuple): - self._edges_list.append(tuple) - print(self._edges_list) - - def remove_edge(self,edge: tuple): - self._edges_list.remove(tuple) - - def contains_edge(self,edge:tuple) -> bool: - return edge in self._edges_list - - def get_node_indx(self, node_id: str) -> int: - """Given the ``node_index`` returns the node label. - - :param node_id: the node label - :type node_id: string - :return: the node index - :rtype: int - """ - pos_indx = self._nodes_labels_list.index(node_id) - return self._nodes_indexes_arr[pos_indx] - - def get_positional_node_indx(self, node_id: str) -> int: - return self._nodes_labels_list.index(node_id) - - def get_states_number(self, node: str) -> int: - """Given the node label ``node`` returns the cardinality of the node. - - :param node: the node label - :type node: string - :return: the node cardinality - :rtype: int - """ - pos_indx = self._nodes_labels_list.index(node) - return self._nodes_vals_arr[pos_indx] - - def __repr__(self): - return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ - "\nEdges: \n" + str(self._edges_list) - - def __eq__(self, other): - """Overrides the default implementation""" - if isinstance(other, Structure): - return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ - np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ - np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ - self._edges_list == other._edges_list - - return False - diff --git a/PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py b/PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py deleted file mode 100644 index 36899b3..0000000 --- a/PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py +++ /dev/null @@ -1,45 +0,0 @@ - -import typing - -import numpy as np - - -class Trajectory(object): - """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles - (the time deltas) and a numpy matrix of ints (the changes of states). - - :param list_of_columns: the list containing the times array and values matrix - :type list_of_columns: List - :param original_cols_number: total number of cols in the data - :type original_cols_number: int - :_actual_trajectory: the trajectory containing also the duplicated/shifted values - :_times: the array containing the time deltas - """ - - def __init__(self, list_of_columns: typing.List, original_cols_number: int): - """Constructor Method - """ - self._times = list_of_columns[0] - self._actual_trajectory = list_of_columns[1] - self._original_cols_number = original_cols_number - - @property - def trajectory(self) -> np.ndarray: - return self._actual_trajectory[:, :self._original_cols_number - 1] - - @property - def complete_trajectory(self) -> np.ndarray: - return self._actual_trajectory - - @property - def times(self): - return self._times - - def size(self): - return self._actual_trajectory.shape[0] - - def __repr__(self): - return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \ - "\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__() - - diff --git a/PyCTBN/build/lib/PyCTBN/utility/__init__.py b/PyCTBN/build/lib/PyCTBN/utility/__init__.py deleted file mode 100644 index f79749c..0000000 --- a/PyCTBN/build/lib/PyCTBN/utility/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .abstract_importer import AbstractImporter -from .cache import Cache -from .json_importer import JsonImporter -from .sample_importer import SampleImporter \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py b/PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py deleted file mode 100644 index 1cad352..0000000 --- a/PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py +++ /dev/null @@ -1,164 +0,0 @@ - -import typing -from abc import ABC, abstractmethod - -import numpy as np -import pandas as pd - -import copy - -#from sklearn.utils import resample - - -class AbstractImporter(ABC): - """Abstract class that exposes all the necessary methods to process the trajectories and the net structure. - - :param file_path: the file path, or dataset name if you import already processed data - :type file_path: str - :param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories - :type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray] - :param variables: Dataframe containing the nodes labels and cardinalities - :type variables: pandas.DataFrame - :prior_net_structure: Dataframe containing the structure of the network (edges) - :type prior_net_structure: pandas.DataFrame - :_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` - - .. warning:: - The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed - as Pandas Dataframes with the following structure: - Header of _df_structure = [From_Node | To_Node] - Header of _df_variables = [Variable_Label | Variable_Cardinality] - See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. - - .. note:: - See :class:``JsonImporter`` for an example implementation - - """ - - def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None, - variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): - """Constructor - """ - self._file_path = file_path - self._df_samples_list = trajectory_list - self._concatenated_samples = [] - self._df_variables = variables - self._df_structure = prior_net_structure - self._sorter = None - super().__init__() - - @abstractmethod - def build_sorter(self, trajecory_header: object) -> typing.List: - """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame - and keeping ONLY the variables symbolic labels, cutting out the time label in the header. - - :param trajecory_header: an object that will be used to define the header - :type trajecory_header: object - :return: A list containing the processed header. - :rtype: List - """ - pass - - def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, - columns_header: typing.List, shifted_cols_header: typing.List) \ - -> pd.DataFrame: - """Computes the difference between each value present in th time column. - Copies and shift by one position up all the values present in the remaining columns. - - :param sample_frame: the traj to be processed - :type sample_frame: pandas.Dataframe - :param columns_header: the original header of sample_frame - :type columns_header: List - :param shifted_cols_header: a copy of columns_header with changed names of the contents - :type shifted_cols_header: List - :return: The processed dataframe - :rtype: pandas.Dataframe - - .. warning:: - the Dataframe ``sample_frame`` has to follow the column structure of this header: - Header of sample_frame = [Time | Variable values] - """ - sample_frame = copy.deepcopy(sample_frame) - sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) - shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') - shifted_cols.columns = shifted_cols_header - sample_frame = sample_frame.assign(**shifted_cols) - sample_frame.drop(sample_frame.tail(1).index, inplace=True) - return sample_frame - - def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: - """Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list - ``df_samples_list``. - Concatenates the result in the dataframe ``concatanated_samples`` - - :param df_samples_list: the datframe's list to be processed and concatenated - :type df_samples_list: List - - .. warning:: - The Dataframe sample_frame has to follow the column structure of this header: - Header of sample_frame = [Time | Variable values] - The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) - .. note:: - After the call of this method the class member ``concatanated_samples`` will contain all processed - and merged trajectories - """ - if not self._sorter: - raise RuntimeError("The class member self._sorter has to be INITIALIZED!") - shifted_cols_header = [s + "S" for s in self._sorter] - compute_row_delta = self.compute_row_delta_sigle_samples_frame - proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) - for sample in df_samples_list] - self._concatenated_samples = pd.concat(proc_samples_list) - - complete_header = self._sorter[:] - complete_header.insert(0,'Time') - complete_header.extend(shifted_cols_header) - self._concatenated_samples = self._concatenated_samples[complete_header] - - def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List: - """Builds a List containing the the delta times numpy array, and the complete transitions matrix - - :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted - and converted - :type concatenated_sample: pandas.Dataframe - :return: the resulting list of numpy arrays - :rtype: List - """ - - concatenated_array = concatenated_sample.to_numpy() - columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] - - return columns_list - - def clear_concatenated_frame(self) -> None: - """Removes all values in the dataframe concatenated_samples. - """ - if isinstance(self._concatenated_samples, pd.DataFrame): - self._concatenated_samples = self._concatenated_samples.iloc[0:0] - - @abstractmethod - def dataset_id(self) -> object: - """If the original dataset contains multiple dataset, this method returns a unique id to identify the current - dataset - """ - pass - - @property - def concatenated_samples(self) -> pd.DataFrame: - return self._concatenated_samples - - @property - def variables(self) -> pd.DataFrame: - return self._df_variables - - @property - def structure(self) -> pd.DataFrame: - return self._df_structure - - @property - def sorter(self) -> typing.List: - return self._sorter - - @property - def file_path(self) -> str: - return self._file_path diff --git a/PyCTBN/build/lib/PyCTBN/utility/cache.py b/PyCTBN/build/lib/PyCTBN/utility/cache.py deleted file mode 100644 index 8e0369b..0000000 --- a/PyCTBN/build/lib/PyCTBN/utility/cache.py +++ /dev/null @@ -1,58 +0,0 @@ - -import typing - -from ..structure_graph.set_of_cims import SetOfCims - - -class Cache: - """This class acts as a cache of ``SetOfCims`` objects for a node. - - :__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME - index is related - :__actual_cache: a list of setOfCims objects - """ - - def __init__(self): - """Constructor Method - """ - self._list_of_sets_of_parents = [] - self._actual_cache = [] - - def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] - """ - Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims`` - related to that ``parents_comb``. - - :param parents_comb: the parents related to that ``SetOfCims`` - :type parents_comb: Set - :return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``. - None otherwise. - :rtype: SetOfCims - """ - try: - #print("Cache State:", self.list_of_sets_of_indxs) - #print("Look For:", parents_comb) - result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] - #print("CACHE HIT!!!!", parents_comb) - return result - except ValueError: - return None - - def put(self, parents_comb: typing.Set, socim: SetOfCims): - """Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in - ``__list_of_sets_of_parents``. - - :param parents_comb: the symbolic set index - :type parents_comb: Set - :param socim: the related SetOfCims object - :type socim: SetOfCims - """ - #print("Putting in cache:", parents_comb) - self._list_of_sets_of_parents.append(parents_comb) - self._actual_cache.append(socim) - - def clear(self): - """Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``. - """ - del self._list_of_sets_of_parents[:] - del self._actual_cache[:] \ No newline at end of file diff --git a/PyCTBN/build/lib/PyCTBN/utility/json_importer.py b/PyCTBN/build/lib/PyCTBN/utility/json_importer.py deleted file mode 100644 index edff212..0000000 --- a/PyCTBN/build/lib/PyCTBN/utility/json_importer.py +++ /dev/null @@ -1,176 +0,0 @@ -import json -import typing - -import pandas as pd - - -from .abstract_importer import AbstractImporter - - -class JsonImporter(AbstractImporter): - """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare - the data in json extension. - - :param file_path: the path of the file that contains tha data to be imported - :type file_path: string - :param samples_label: the reference key for the samples in the trajectories - :type samples_label: string - :param structure_label: the reference key for the structure of the network data - :type structure_label: string - :param variables_label: the reference key for the cardinalites of the nodes data - :type variables_label: string - :param time_key: the key used to identify the timestamps in each trajectory - :type time_key: string - :param variables_key: the key used to identify the names of the variables in the net - :type variables_key: string - :_array_indx: the index of the outer JsonArray to extract the data from - :type _array_indx: int - :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory - :_raw_data: The raw contents of the json file to import - :type _raw_data: List - """ - - def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, - variables_key: str): - """Constructor method - - .. note:: - This constructor calls also the method ``read_json_file()``, so after the construction of the object - the class member ``_raw_data`` will contain the raw imported json data. - - """ - self._samples_label = samples_label - self._structure_label = structure_label - self._variables_label = variables_label - self._time_key = time_key - self._variables_key = variables_key - self._df_samples_list = None - self._array_indx = None - super(JsonImporter, self).__init__(file_path) - self._raw_data = self.read_json_file() - - def import_data(self, indx: int) -> None: - """Implements the abstract method of :class:`AbstractImporter`. - - :param indx: the index of the outer JsonArray to extract the data from - :type indx: int - """ - self._array_indx = indx - self._df_samples_list = self.import_trajectories(self._raw_data) - self._sorter = self.build_sorter(self._df_samples_list[0]) - self.compute_row_delta_in_all_samples_frames(self._df_samples_list) - self.clear_data_frame_list() - self._df_structure = self.import_structure(self._raw_data) - self._df_variables = self.import_variables(self._raw_data) - - def import_trajectories(self, raw_data: typing.List) -> typing.List: - """Imports the trajectories from the list of dicts ``raw_data``. - - :param raw_data: List of Dicts - :type raw_data: List - :return: List of dataframes containing all the trajectories - :rtype: List - """ - return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) - - def import_structure(self, raw_data: typing.List) -> pd.DataFrame: - """Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` - - :param raw_data: List of Dicts - :type raw_data: List - :return: Dataframe containg the starting node a ending node of every arc of the network - :rtype: pandas.Dataframe - """ - return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) - - def import_variables(self, raw_data: typing.List) -> pd.DataFrame: - """Imports the data in ``raw_data`` at the key ``_variables_label``. - - :param raw_data: List of Dicts - :type raw_data: List - :return: Datframe containg the variables simbolic labels and their cardinalities - :rtype: pandas.Dataframe - """ - return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) - - def read_json_file(self) -> typing.List: - """Reads the JSON file in the path self.filePath. - - :return: The contents of the json file - :rtype: List - """ - with open(self._file_path) as f: - data = json.load(f) - return data - - def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: - """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. - - :param raw_data: List of Dicts - :type raw_data: List - :param indx: The index of the array from which the data have to be extracted - :type indx: int - :param key: the key for the Dicts from which exctract data - :type key: string - :return: A normalized dataframe - :rtype: pandas.Datframe - """ - return pd.DataFrame(raw_data[indx][key]) - - def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: - """ - Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. - - :param raw_data: List of Dicts - :type raw_data: List - :param indx: The index of the array from which the data have to be extracted - :type indx: int - :param trajectories_key: the key of the trajectories objects - :type trajectories_key: string - :return: A list of daframes containg the trajectories - :rtype: List - """ - dataframe = pd.DataFrame - smps = raw_data[indx][trajectories_key] - df_samples_list = [dataframe(sample) for sample in smps] - return df_samples_list - - def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: - """Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. - """ - columns_header = list(sample_frame.columns.values) - columns_header.remove(self._time_key) - return columns_header - - def clear_data_frame_list(self) -> None: - """Removes all values present in the dataframes in the list ``_df_samples_list``. - """ - for indx in range(len(self._df_samples_list)): - self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] - - def dataset_id(self) -> object: - return self._array_indx - - def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: - """Imports the synthetic CIMS in the dataset in a dictionary, using variables labels - as keys for the set of CIMS of a particular node. - - :param raw_data: List of Dicts - :type raw_data: List - :param indx: The index of the array from which the data have to be extracted - :type indx: int - :param cims_key: the key where the json object cims are placed - :type cims_key: string - :return: a dictionary containing the sampled CIMS for all the variables in the net - :rtype: Dictionary - """ - cims_for_all_vars = {} - for var in raw_data[indx][cims_key]: - sampled_cims_list = [] - cims_for_all_vars[var] = sampled_cims_list - for p_comb in raw_data[indx][cims_key][var]: - cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) - return cims_for_all_vars - - - diff --git a/PyCTBN/build/lib/PyCTBN/utility/sample_importer.py b/PyCTBN/build/lib/PyCTBN/utility/sample_importer.py deleted file mode 100644 index 05073c8..0000000 --- a/PyCTBN/build/lib/PyCTBN/utility/sample_importer.py +++ /dev/null @@ -1,65 +0,0 @@ -import json -import typing - -import pandas as pd -import numpy as np - -from .abstract_importer import AbstractImporter - - - -class SampleImporter(AbstractImporter): - """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare - the data loaded directly by using DataFrame - - :param trajectory_list: the data that describes the trajectories - :type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] - :param variables: the data that describes the variables with name and cardinality - :type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] - :param prior_net_structure: the data of the real structure, if it exists - :type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] - - :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory - :_raw_data: The raw contents of the json file to import - :type _raw_data: List - """ - - def __init__(self, - trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, - variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, - prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): - - 'If the data are not DataFrame, it will be converted' - if isinstance(variables,list) or isinstance(variables,np.ndarray): - variables = pd.DataFrame(variables) - if isinstance(variables,list) or isinstance(variables,np.ndarray): - prior_net_structure=pd.DataFrame(prior_net_structure) - - super(SampleImporter, self).__init__(trajectory_list =trajectory_list, - variables= variables, - prior_net_structure=prior_net_structure) - - def import_data(self, header_column = None): - - if header_column is not None: - self._sorter = header_column - else: - self._sorter = self.build_sorter(self._df_samples_list[0]) - - samples_list= self._df_samples_list - - if isinstance(samples_list, np.ndarray): - samples_list = samples_list.tolist() - - self.compute_row_delta_in_all_samples_frames(samples_list) - - def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: - """Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. - """ - columns_header = list(sample_frame.columns.values) - del columns_header[0] - return columns_header - - - def dataset_id(self) -> object: - pass \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/__init__.py b/PyCTBN/build/lib/classes/__init__.py deleted file mode 100644 index faff79c..0000000 --- a/PyCTBN/build/lib/classes/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -import PyCTBN.PyCTBN.estimators -from PyCTBN.PyCTBN.estimators import * -import PyCTBN.PyCTBN.optimizers -from PyCTBN.PyCTBN.optimizers import * -import PyCTBN.PyCTBN.structure_graph -from PyCTBN.PyCTBN.structure_graph import * -import PyCTBN.PyCTBN.utility -from PyCTBN.PyCTBN.utility import * \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/estimators/__init__.py b/PyCTBN/build/lib/classes/estimators/__init__.py deleted file mode 100644 index 112086f..0000000 --- a/PyCTBN/build/lib/classes/estimators/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .fam_score_calculator import FamScoreCalculator -from .parameters_estimator import ParametersEstimator -from .structure_estimator import StructureEstimator -from .structure_constraint_based_estimator import StructureConstraintBasedEstimator -from .structure_score_based_estimator import StructureScoreBasedEstimator diff --git a/PyCTBN/build/lib/classes/estimators/fam_score_calculator.py b/PyCTBN/build/lib/classes/estimators/fam_score_calculator.py deleted file mode 100644 index 5b0b591..0000000 --- a/PyCTBN/build/lib/classes/estimators/fam_score_calculator.py +++ /dev/null @@ -1,272 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph - -from math import log - -from scipy.special import loggamma -from random import choice - -from ..structure_graph.set_of_cims import SetOfCims -from ..structure_graph.network_graph import NetworkGraph -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix - - -''' - -''' - - -class FamScoreCalculator: - """ - Has the task of calculating the FamScore of a node by using a Bayesian score function - """ - - def __init__(self): - #np.seterr('raise') - pass - - # region theta - - def marginal_likelihood_theta(self, - cims: ConditionalIntensityMatrix, - alpha_xu: float, - alpha_xxu: float): - """ - Calculate the FamScore value of the node identified by the label node_id - - :param cims: np.array with all the node's cims - :type cims: np.array - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type alpha_xu: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta - :rtype: float - """ - return np.sum( - [self.variable_cim_xu_marginal_likelihood_theta(cim, - alpha_xu, - alpha_xxu) - for cim in cims]) - - def variable_cim_xu_marginal_likelihood_theta(self, - cim: ConditionalIntensityMatrix, - alpha_xu: float, - alpha_xxu: float): - """ - Calculate the value of the marginal likelihood over theta given a cim - - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type alpha_xu: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta - :rtype: float - """ - - 'get cim length' - values = len(cim._state_residence_times) - - 'compute the marginal likelihood for the current cim' - return np.sum([ - self.single_cim_xu_marginal_likelihood_theta( - index, - cim, - alpha_xu, - alpha_xxu) - for index in range(values)]) - - def single_cim_xu_marginal_likelihood_theta(self, - index: int, - cim: ConditionalIntensityMatrix, - alpha_xu: float, - alpha_xxu: float): - """ - Calculate the marginal likelihood on q of the node when assumes a specif value - and a specif parents's assignment - - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta when the node assumes a specif value - :rtype: float - """ - - values = list(range(len(cim._state_residence_times))) - - 'remove the index because of the x != x^ condition in the summation ' - values.remove(index) - - 'uncomment for alpha xx not uniform' - #alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) - - return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ - + \ - np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( - cim.state_transition_matrix[index,index_x_first], - alpha_xxu) - for index_x_first in values]) - - - def single_internal_cim_xxu_marginal_likelihood_theta(self, - M_xxu_suff_stats: float, - alpha_xxu: float=1): - """Calculate the second part of the marginal likelihood over theta formula - - :param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u] - :type M_xxu_suff_stats: float - :param alpha_xxu: distribuited hyperparameter over the CTBN’s theta parameters - :type alpha_xxu: float - - :return: the value of the marginal likelihood over theta when the node assumes a specif value - :rtype: float - """ - return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu) - - # endregion - - # region q - - def marginal_likelihood_q(self, - cims: np.array, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the value of the marginal likelihood over q of the node identified by the label node_id - - :param cims: np.array with all the node's cims - :type cims: np.array - :param tau_xu: hyperparameter over the CTBN’s q parameters - :type tau_xu: float - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - - - :return: the value of the marginal likelihood over q - :rtype: float - """ - - return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims]) - - def variable_cim_xu_marginal_likelihood_q(self, - cim: ConditionalIntensityMatrix, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the value of the marginal likelihood over q given a cim - - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param tau_xu: hyperparameter over the CTBN’s q parameters - :type tau_xu: float - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - - - :return: the value of the marginal likelihood over q - :rtype: float - """ - - 'get cim length' - values=len(cim._state_residence_times) - - 'compute the marginal likelihood for the current cim' - return np.sum([ - self.single_cim_xu_marginal_likelihood_q( - cim.state_transition_matrix[index, index], - cim._state_residence_times[index], - tau_xu, - alpha_xu) - for index in range(values)]) - - - def single_cim_xu_marginal_likelihood_q(self, - M_xu_suff_stats: float, - T_xu_suff_stats: float, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the marginal likelihood on q of the node when assumes a specif value - and a specif parents's assignment - - :param M_xu_suff_stats: value of the suffucient statistic M[x|u] - :type M_xxu_suff_stats: float - :param T_xu_suff_stats: value of the suffucient statistic T[x|u] - :type T_xu_suff_stats: float - :param cim: A conditional_intensity_matrix object with the sufficient statistics - :type cim: class:'ConditionalIntensityMatrix' - :param tau_xu: hyperparameter over the CTBN’s q parameters - :type tau_xu: float - :param alpha_xu: hyperparameter over the CTBN’s q parameters - :type alpha_xu: float - - - :return: the value of the marginal likelihood of the node when assumes a specif value - :rtype: float - """ - return ( - loggamma(alpha_xu + M_xu_suff_stats + 1) + - (log(tau_xu) - * - (alpha_xu+1)) - ) \ - - \ - (loggamma(alpha_xu + 1)+( - log(tau_xu + T_xu_suff_stats) - * - (alpha_xu + M_xu_suff_stats + 1)) - ) - - # end region - - def get_fam_score(self, - cims: np.array, - tau_xu: float=0.1, - alpha_xu: float=1): - """ - Calculate the FamScore value of the node - - - :param cims: np.array with all the node's cims - :type cims: np.array - :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type tau_xu: float, optional - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 - :type alpha_xu: float, optional - - - :return: the FamScore value of the node - :rtype: float - """ - #print("------") - #print(self.marginal_likelihood_q(cims, - # tau_xu, - # alpha_xu)) - - #print(self.marginal_likelihood_theta(cims, - # alpha_xu, - # alpha_xxu)) - 'calculate alpha_xxu as a uniform distribution' - alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) - - return self.marginal_likelihood_q(cims, - tau_xu, - alpha_xu) \ - + \ - self.marginal_likelihood_theta(cims, - alpha_xu, - alpha_xxu) diff --git a/PyCTBN/build/lib/classes/estimators/parameters_estimator.py b/PyCTBN/build/lib/classes/estimators/parameters_estimator.py deleted file mode 100644 index 4754d58..0000000 --- a/PyCTBN/build/lib/classes/estimators/parameters_estimator.py +++ /dev/null @@ -1,143 +0,0 @@ -import sys -sys.path.append('../') -import numpy as np - -from ..structure_graph.network_graph import NetworkGraph -from ..structure_graph.set_of_cims import SetOfCims -from ..structure_graph.trajectory import Trajectory - - -class ParametersEstimator(object): - """Has the task of computing the cims of particular node given the trajectories and the net structure - in the graph ``_net_graph``. - - :param trajectories: the trajectories - :type trajectories: Trajectory - :param net_graph: the net structure - :type net_graph: NetworkGraph - :_single_set_of_cims: the set of cims object that will hold the cims of the node - """ - - def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph): - """Constructor Method - """ - self._trajectories = trajectories - self._net_graph = net_graph - self._single_set_of_cims = None - - def fast_init(self, node_id: str) -> None: - """Initializes all the necessary structures for the parameters estimation for the node ``node_id``. - - :param node_id: the node label - :type node_id: string - """ - p_vals = self._net_graph._aggregated_info_about_nodes_parents[2] - node_states_number = self._net_graph.get_states_number(node_id) - self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs) - - def compute_parameters_for_node(self, node_id: str) -> SetOfCims: - """Compute the CIMS of the node identified by the label ``node_id``. - - :param node_id: the node label - :type node_id: string - :return: A SetOfCims object filled with the computed CIMS - :rtype: SetOfCims - """ - node_indx = self._net_graph.get_node_indx(node_id) - state_res_times = self._single_set_of_cims._state_residence_times - transition_matrices = self._single_set_of_cims._transition_matrices - ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times, - self._trajectories.trajectory, - self._net_graph.time_filtering, - self._net_graph.time_scalar_indexing_strucure, - state_res_times) - ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory, - self._net_graph.transition_filtering, - self._net_graph.transition_scalar_indexing_structure, - transition_matrices) - self._single_set_of_cims.build_cims(state_res_times, transition_matrices) - return self._single_set_of_cims - - @staticmethod - def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray, - cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray, - T: np.ndarray) -> None: - """Compute the state residence times for a node and fill the matrix ``T`` with the results - - :param node_indx: the index of the node - :type node_indx: int - :param times: the times deltas vector - :type times: numpy.array - :param trajectory: the trajectory - :type trajectory: numpy.ndArray - :param cols_filter: the columns filtering structure - :type cols_filter: numpy.array - :param scalar_indexes_struct: the indexing structure - :type scalar_indexes_struct: numpy.array - :param T: the state residence times vectors - :type T: numpy.ndArray - """ - T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1) - .astype(np.int), \ - times, - minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1]) - - @staticmethod - def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray, - scalar_indexing: np.ndarray, M: np.ndarray) -> None: - """Compute the state residence times for a node and fill the matrices ``M`` with the results. - - :param node_indx: the index of the node - :type node_indx: int - :param trajectory: the trajectory - :type trajectory: numpy.ndArray - :param cols_filter: the columns filtering structure - :type cols_filter: numpy.array - :param scalar_indexing: the indexing structure - :type scalar_indexing: numpy.array - :param M: the state transitions matrices - :type M: numpy.ndArray - """ - diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])], - dtype=np.int64) - trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0] - M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int) - , minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2]) - M_raveled = M.ravel() - M_raveled[diag_indices] = 0 - M_raveled[diag_indices] = np.sum(M, axis=2).ravel() - - def init_sets_cims_container(self): - self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes, - self.net_graph.nodes_values, - self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(), - self.net_graph.p_combs) - - def compute_parameters(self): - #print(self.net_graph.get_nodes()) - #print(self.amalgamated_cims_struct.sets_of_cims) - #enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims)) - for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)): - #print(self.net_graph.time_filtering[indx]) - #print(self.net_graph.time_scalar_indexing_strucure[indx]) - self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times, - self.sample_path.trajectories.trajectory, - self.net_graph.time_filtering[indx], - self.net_graph.time_scalar_indexing_strucure[indx], - aggr[1]._state_residence_times) - #print(self.net_graph.transition_filtering[indx]) - #print(self.net_graph.transition_scalar_indexing_structure[indx]) - self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]), - self.sample_path.trajectories.complete_trajectory, - self.net_graph.transition_filtering[indx], - self.net_graph.transition_scalar_indexing_structure[indx], - aggr[1]._transition_matrices) - aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices) - - - - - - - - diff --git a/PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py b/PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py deleted file mode 100644 index 7d5721e..0000000 --- a/PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py +++ /dev/null @@ -1,238 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph -import os -from scipy.stats import chi2 as chi2_dist -from scipy.stats import f as f_dist -from tqdm import tqdm - -from ..utility.cache import Cache -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix -from ..structure_graph.network_graph import NetworkGraph -from .parameters_estimator import ParametersEstimator -from .structure_estimator import StructureEstimator -from ..structure_graph.sample_path import SamplePath -from ..structure_graph.structure import Structure -from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer - -import concurrent.futures - - - -import multiprocessing -from multiprocessing import Pool - - -class StructureConstraintBasedEstimator(StructureEstimator): - """ - Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach. - - :param sample_path: the _sample_path object containing the trajectories and the real structure - :type sample_path: SamplePath - :param exp_test_alfa: the significance level for the exponential Hp test - :type exp_test_alfa: float - :param chi_test_alfa: the significance level for the chi Hp test - :type chi_test_alfa: float - :_nodes: the nodes labels - :_nodes_vals: the nodes cardinalities - :_nodes_indxs: the nodes indexes - :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` - :_cache: the Cache object - """ - - def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25): - super().__init__(sample_path,known_edges) - self._exp_test_sign = exp_test_alfa - self._chi_test_alfa = chi_test_alfa - self._thumb_threshold = thumb_threshold - self._cache = Cache() - - def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int, - tot_vars_count: int, parent_indx, child_indx) -> bool: - """Performs a complete independence test on the directed graphs G1 = {test_child U parent_set} - G2 = {G1 U test_parent} (added as an additional parent of the test_child). - Generates all the necessary structures and datas to perform the tests. - - :param test_parent: the node label of the test parent - :type test_parent: string - :param test_child: the node label of the child - :type test_child: string - :param parent_set: the common parent set - :type parent_set: List - :param child_states_numb: the cardinality of the ``test_child`` - :type child_states_numb: int - :param tot_vars_count: the total number of variables in the net - :type tot_vars_count: int - :return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise - :rtype: bool - """ - p_set = parent_set[:] - complete_info = parent_set[:] - complete_info.append(test_child) - - parents = np.array(parent_set) - parents = np.append(parents, test_parent) - sorted_parents = self._nodes[np.isin(self._nodes, parents)] - cims_filter = sorted_parents != test_parent - - p_set.insert(0, test_parent) - sofc2 = self._cache.find(set(p_set)) - - if not sofc2: - complete_info.append(test_parent) - bool_mask2 = np.isin(self._nodes, complete_info) - l2 = list(self._nodes[bool_mask2]) - indxs2 = self._nodes_indxs[bool_mask2] - vals2 = self._nodes_vals[bool_mask2] - eds2 = list(itertools.product(p_set, test_child)) - s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count) - g2 = NetworkGraph(s2) - g2.fast_init(test_child) - p2 = ParametersEstimator(self._sample_path.trajectories, g2) - p2.fast_init(test_child) - sofc2 = p2.compute_parameters_for_node(test_child) - self._cache.put(set(p_set), sofc2) - - del p_set[0] - sofc1 = self._cache.find(set(p_set)) - if not sofc1: - g2.remove_node(test_parent) - g2.fast_init(test_child) - p2 = ParametersEstimator(self._sample_path.trajectories, g2) - p2.fast_init(test_child) - sofc1 = p2.compute_parameters_for_node(test_child) - self._cache.put(set(p_set), sofc1) - thumb_value = 0.0 - if child_states_numb > 2: - parent_val = self._sample_path.structure.get_states_number(test_parent) - bool_mask_vals = np.isin(self._nodes, parent_set) - parents_vals = self._nodes_vals[bool_mask_vals] - thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals) - for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs): - cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb) - for cim2 in cond_cims: - if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx): - return False - return True - - def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix, - cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool: - """Compute the actual independence test using two cims. - It is performed first the exponential test and if the null hypothesis is not rejected, - it is performed also the chi_test. - - :param child_states_numb: the cardinality of the test child - :type child_states_numb: int - :param cim1: a cim belonging to the graph without test parent - :type cim1: ConditionalIntensityMatrix - :param cim2: a cim belonging to the graph with test parent - :type cim2: ConditionalIntensityMatrix - :return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise. - :rtype: bool - """ - M1 = cim1.state_transition_matrix - M2 = cim2.state_transition_matrix - r1s = M1.diagonal() - r2s = M2.diagonal() - C1 = cim1.cim - C2 = cim2.cim - if child_states_numb > 2: - if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold: - self._removable_edges_matrix[parent_indx][child_indx] = False - return False - F_stats = C2.diagonal() / C1.diagonal() - exp_alfa = self._exp_test_sign - for val in range(0, child_states_numb): - if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \ - F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]): - return False - M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1) - M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape( - M2.shape[0], -1) - chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1) - Ks = np.sqrt(r1s / r2s) - Ls = np.sqrt(r2s / r1s) - for val in range(0, child_states_numb): - Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) / - (M1_no_diag[val] + M2_no_diag[val])) - if Chi > chi_2_quantile: - return False - return True - - def compute_thumb_value(self, parent_val, child_val, parent_set_vals): - """Compute the value to test against the thumb_threshold. - - :param parent_val: test parent's variable cardinality - :type parent_val: int - :param child_val: test child's variable cardinality - :type child_val: int - :param parent_set_vals: the cardinalities of the nodes in the current sep-set - :type parent_set_vals: List - :return: the thumb value for the current independence test - :rtype: int - """ - df = (child_val - 1) ** 2 - df = df * parent_val - for v in parent_set_vals: - df = df * v - return df - - def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List: - """Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``. - - :param var_id: the node label of the test child - :type var_id: string - """ - optimizer_obj = ConstraintBasedOptimizer( - node_id = var_id, - structure_estimator = self, - tot_vars_count = tot_vars_count) - return optimizer_obj.optimize_structure() - - - def ctpc_algorithm(self,disable_multiprocessing:bool= False ): - """Compute the CTPC algorithm over the entire net. - """ - ctpc_algo = self.one_iteration_of_CTPC_algorithm - total_vars_numb = self._sample_path.total_variables_count - - n_nodes= len(self._nodes) - - total_vars_numb_array = [total_vars_numb] * n_nodes - - 'get the number of CPU' - cpu_count = multiprocessing.cpu_count() - - - - 'Remove all the edges from the structure' - self._sample_path.structure.clean_structure_edges() - - 'Estimate the best parents for each node' - #with multiprocessing.Pool(processes=cpu_count) as pool: - #with get_context("spawn").Pool(processes=cpu_count) as pool: - if disable_multiprocessing: - print("DISABILITATO") - cpu_count = 1 - list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] - else: - with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: - list_edges_partial = executor.map(ctpc_algo, - self._nodes, - total_vars_numb_array) - #list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes] - - return set(itertools.chain.from_iterable(list_edges_partial)) - - - def estimate_structure(self,disable_multiprocessing:bool=False): - return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing) - - - - diff --git a/PyCTBN/build/lib/classes/estimators/structure_estimator.py b/PyCTBN/build/lib/classes/estimators/structure_estimator.py deleted file mode 100644 index fbf8ea9..0000000 --- a/PyCTBN/build/lib/classes/estimators/structure_estimator.py +++ /dev/null @@ -1,187 +0,0 @@ - -import itertools -import json -import typing - -import matplotlib.pyplot as plt -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph - -from abc import ABC - -import abc - -from ..utility.cache import Cache -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix -from ..structure_graph.network_graph import NetworkGraph -from .parameters_estimator import ParametersEstimator -from ..structure_graph.sample_path import SamplePath -from ..structure_graph.structure import Structure - - -class StructureEstimator(object): - """Has the task of estimating the network structure given the trajectories in ``samplepath``. - - :param sample_path: the _sample_path object containing the trajectories and the real structure - :type sample_path: SamplePath - :_nodes: the nodes labels - :_nodes_vals: the nodes cardinalities - :_nodes_indxs: the nodes indexes - :_complete_graph: the complete directed graph built using the nodes labels in ``_nodes`` - """ - - def __init__(self, sample_path: SamplePath, known_edges: typing.List = None): - self._sample_path = sample_path - self._nodes = np.array(self._sample_path.structure.nodes_labels) - self._nodes_vals = self._sample_path.structure.nodes_values - self._nodes_indxs = self._sample_path.structure.nodes_indexes - self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges) - self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels) - - - def build_removable_edges_matrix(self, known_edges: typing.List): - """Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given: - - :param known_edges: the list of nodes labels - :type known_edges: List - :return: a boolean matrix - :rtype: np.ndarray - """ - tot_vars_count = self._sample_path.total_variables_count - complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True) - if known_edges: - for edge in known_edges: - i = self._sample_path.structure.get_node_indx(edge[0]) - j = self._sample_path.structure.get_node_indx(edge[1]) - complete_adj_matrix[i][j] = False - return complete_adj_matrix - - @staticmethod - def build_complete_graph(node_ids: typing.List) -> nx.DiGraph: - """Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``: - - :param node_ids: the list of nodes labels - :type node_ids: List - :return: a complete Digraph Object - :rtype: networkx.DiGraph - """ - complete_graph = nx.DiGraph() - complete_graph.add_nodes_from(node_ids) - complete_graph.add_edges_from(itertools.permutations(node_ids, 2)) - return complete_graph - - - @staticmethod - def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str): - """Creates a list containing all possible subsets of the list ``u`` of size ``size``, - that do not contains a the node identified by ``parent_label``. - - :param u: the list of nodes - :type u: List - :param size: the size of the subsets - :type size: int - :param parent_label: the node to exclude in the subsets generation - :type parent_label: string - :return: an Iterator Object containing a list of lists - :rtype: Iterator - """ - list_without_test_parent = u[:] - list_without_test_parent.remove(parent_label) - return map(list, itertools.combinations(list_without_test_parent, size)) - - def save_results(self) -> None: - """Save the estimated Structure to a .json file in the path where the data are loaded from. - The file is named as the input dataset but the `results_` word is appended to the results file. - """ - res = json_graph.node_link_data(self._complete_graph) - name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] - name = name.split('.', 1)[0] - name += '_' + str(self._sample_path._importer.dataset_id()) - name += '.json' - file_name = 'results_' + name - with open(file_name, 'w') as f: - json.dump(res, f) - - - def remove_diagonal_elements(self, matrix): - m = matrix.shape[0] - strided = np.lib.stride_tricks.as_strided - s0, s1 = matrix.strides - return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) - - - @abc.abstractmethod - def estimate_structure(self) -> typing.List: - """Abstract method to estimate the structure - - :return: List of estimated edges - :rtype: Typing.List - """ - pass - - - def adjacency_matrix(self) -> np.ndarray: - """Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation. - - :return: The adjacency matrix of the graph ``_complete_graph`` - :rtype: numpy.ndArray - """ - return nx.adj_matrix(self._complete_graph).toarray().astype(bool) - - def spurious_edges(self) -> typing.List: - """Return the spurious edges present in the estimated structure, if a prior net structure is present in - ``_sample_path.structure``. - - :return: A list containing the spurious edges - :rtype: List - """ - if not self._sample_path.has_prior_net_structure: - raise RuntimeError("Can not compute spurious edges with no prior net structure!") - real_graph = nx.DiGraph() - real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) - real_graph.add_edges_from(self._sample_path.structure.edges) - return nx.difference(real_graph, self._complete_graph).edges - - def save_plot_estimated_structure_graph(self) -> None: - """Plot the estimated structure in a graphical model style. - Spurious edges are colored in red. - """ - graph_to_draw = nx.DiGraph() - spurious_edges = self.spurious_edges() - non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) - print(non_spurious_edges) - edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] - graph_to_draw.add_edges_from(spurious_edges) - graph_to_draw.add_edges_from(non_spurious_edges) - pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) - options = { - "node_size": 2000, - "node_color": "white", - "edgecolors": "black", - 'linewidths':2, - "with_labels":True, - "font_size":13, - 'connectionstyle': 'arc3, rad = 0.1', - "arrowsize": 15, - "arrowstyle": '<|-', - "width": 1, - "edge_color":edges_colors, - } - - nx.draw(graph_to_draw, pos, **options) - ax = plt.gca() - ax.margins(0.20) - plt.axis("off") - name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] - name = name.split('.', 1)[0] - name += '_' + str(self._sample_path._importer.dataset_id()) - name += '.png' - plt.savefig(name) - plt.clf() - print("Estimated Structure Plot Saved At: ", os.path.abspath(name)) - - - - - diff --git a/PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py b/PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py deleted file mode 100644 index 2903db3..0000000 --- a/PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py +++ /dev/null @@ -1,244 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np -from networkx.readwrite import json_graph - -from random import choice - -import concurrent.futures - -import copy - -from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix -from ..structure_graph.network_graph import NetworkGraph -from .parameters_estimator import ParametersEstimator -from .structure_estimator import StructureEstimator -from ..structure_graph.sample_path import SamplePath -from ..structure_graph.structure import Structure -from .fam_score_calculator import FamScoreCalculator -from ..optimizers.hill_climbing_search import HillClimbing -from ..optimizers.tabu_search import TabuSearch - - -import multiprocessing -from multiprocessing import Pool - - - - -class StructureScoreBasedEstimator(StructureEstimator): - """ - Has the task of estimating the network structure given the trajectories in samplepath by - using a score based approach. - - :param sample_path: the _sample_path object containing the trajectories and the real structure - :type sample_path: SamplePath - :param tau_xu: hyperparameter over the CTBN’s q parameters, default to 0.1 - :type tau_xu: float, optional - :param alpha_xu: hyperparameter over the CTBN’s q parameters, default to 1 - :type alpha_xu: float, optional - :param known_edges: List of known edges, default to [] - :type known_edges: List, optional - - """ - - def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []): - super().__init__(sample_path,known_edges) - self.tau_xu=tau_xu - self.alpha_xu=alpha_xu - - - def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, - patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None, - optimizer: str = 'tabu',disable_multiprocessing:bool= False ): - """ - Compute the score-based algorithm to find the optimal structure - - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None - :type tabu_length: int, optional - :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None - :type tabu_rules_duration: int, optional - :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' - :type optimizer: string, optional - :param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False - :type disable_multiprocessing: Boolean, optional - """ - 'Save the true edges structure in tuples' - true_edges = copy.deepcopy(self._sample_path.structure.edges) - true_edges = set(map(tuple, true_edges)) - - 'Remove all the edges from the structure' - self._sample_path.structure.clean_structure_edges() - - estimate_parents = self.estimate_parents - - n_nodes= len(self._nodes) - - l_max_parents= [max_parents] * n_nodes - l_iterations_number = [iterations_number] * n_nodes - l_patience = [patience] * n_nodes - l_tabu_length = [tabu_length] * n_nodes - l_tabu_rules_duration = [tabu_rules_duration] * n_nodes - l_optimizer = [optimizer] * n_nodes - - - 'get the number of CPU' - cpu_count = multiprocessing.cpu_count() - print(f"CPU COUNT: {cpu_count}") - - if disable_multiprocessing: - cpu_count = 1 - - - - - - #with get_context("spawn").Pool(processes=cpu_count) as pool: - #with multiprocessing.Pool(processes=cpu_count) as pool: - - 'Estimate the best parents for each node' - if disable_multiprocessing: - list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes] - else: - with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor: - list_edges_partial = executor.map(estimate_parents, - self._nodes, - l_max_parents, - l_iterations_number, - l_patience, - l_tabu_length, - l_tabu_rules_duration, - l_optimizer) - - - - #list_edges_partial = p.map(estimate_parents, self._nodes) - #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) - - 'Concatenate all the edges list' - set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) - - #print('-------------------------') - - - 'calculate precision and recall' - n_missing_edges = 0 - n_added_fake_edges = 0 - - try: - n_added_fake_edges = len(set_list_edges.difference(true_edges)) - - n_missing_edges = len(true_edges.difference(set_list_edges)) - - n_true_positive = len(true_edges) - n_missing_edges - - precision = n_true_positive / (n_true_positive + n_added_fake_edges) - - recall = n_true_positive / (n_true_positive + n_missing_edges) - - - # print(f"n archi reali non trovati: {n_missing_edges}") - # print(f"n archi non reali aggiunti: {n_added_fake_edges}") - print(true_edges) - print(set_list_edges) - print(f"precision: {precision} ") - print(f"recall: {recall} ") - except Exception as e: - print(f"errore: {e}") - - return set_list_edges - - - def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, - patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, - optimizer:str = 'hill' ): - """ - Use the FamScore of a node in order to find the best parent nodes - - :param node_id: current node's id - :type node_id: string - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None - :type tabu_length: int, optional - :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None - :type tabu_rules_duration: int, optional - :param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu' - :type optimizer: string, optional - - :return: A list of the best edges for the currente node - :rtype: List - """ - - "choose the optimizer algotithm" - if optimizer == 'tabu': - optimizer = TabuSearch( - node_id = node_id, - structure_estimator = self, - max_parents = max_parents, - iterations_number = iterations_number, - patience = patience, - tabu_length = tabu_length, - tabu_rules_duration = tabu_rules_duration) - else: #if optimizer == 'hill': - optimizer = HillClimbing( - node_id = node_id, - structure_estimator = self, - max_parents = max_parents, - iterations_number = iterations_number, - patience = patience) - - "call the optmizer's function that calculates the current node's parents" - return optimizer.optimize_structure() - - - def get_score_from_graph(self, - graph: NetworkGraph, - node_id:str): - """ - Get the FamScore of a node - - :param node_id: current node's id - :type node_id: string - :param graph: current graph to be computed - :type graph: class:'NetworkGraph' - - - :return: The FamSCore for this graph structure - :rtype: float - """ - - 'inizialize the graph for a single node' - graph.fast_init(node_id) - - params_estimation = ParametersEstimator(self._sample_path.trajectories, graph) - - 'Inizialize and compute parameters for node' - params_estimation.fast_init(node_id) - SoCims = params_estimation.compute_parameters_for_node(node_id) - - 'calculate the FamScore for the node' - fam_score_obj = FamScoreCalculator() - - score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu) - - #print(f" lo score per {node_id} risulta: {score} ") - return score - - - - diff --git a/PyCTBN/build/lib/classes/optimizers/__init__.py b/PyCTBN/build/lib/classes/optimizers/__init__.py deleted file mode 100644 index 4162bf1..0000000 --- a/PyCTBN/build/lib/classes/optimizers/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .optimizer import Optimizer -from .tabu_search import TabuSearch -from .hill_climbing_search import HillClimbing -from .constraint_based_optimizer import ConstraintBasedOptimizer \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py b/PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py deleted file mode 100644 index 65bc19c..0000000 --- a/PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py +++ /dev/null @@ -1,87 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -from random import choice - -from abc import ABC - -import copy - - -from .optimizer import Optimizer -from ..estimators.structure_estimator import StructureEstimator -from ..structure_graph.network_graph import NetworkGraph - - -class ConstraintBasedOptimizer(Optimizer): - """ - Optimizer class that implement a CTPC Algorithm - - :param node_id: current node's id - :type node_id: string - :param structure_estimator: a structure estimator object with the information about the net - :type structure_estimator: class:'StructureEstimator' - :param tot_vars_count: number of variables in the dataset - :type tot_vars_count: int - """ - def __init__(self, - node_id:str, - structure_estimator: StructureEstimator, - tot_vars_count:int - ): - """ - Constructor - """ - super().__init__(node_id, structure_estimator) - self.tot_vars_count = tot_vars_count - - - - def optimize_structure(self): - """ - Compute Optimization process for a structure_estimator by using a CTPC Algorithm - - :return: the estimated structure for the node - :rtype: List - """ - print("##################TESTING VAR################", self.node_id) - - graph = NetworkGraph(self.structure_estimator._sample_path.structure) - - other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id] - - for possible_parent in other_nodes: - graph.add_edges([(possible_parent,self.node_id)]) - - - u = other_nodes - #tests_parents_numb = len(u) - #complete_frame = self.complete_graph_frame - #test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])] - child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) - b = 0 - while b < len(u): - parent_indx = 0 - while parent_indx < len(u): - removed = False - test_parent = u[parent_indx] - i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent) - j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id) - if self.structure_estimator._removable_edges_matrix[i][j]: - S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent) - for parents_set in S: - if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j): - graph.remove_edges([(test_parent, self.node_id)]) - u.remove(test_parent) - removed = True - break - if not removed: - parent_indx += 1 - b += 1 - self.structure_estimator._cache.clear() - return graph.edges \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py b/PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py deleted file mode 100644 index 6783be0..0000000 --- a/PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py +++ /dev/null @@ -1,135 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -from random import choice - -from abc import ABC - - -from .optimizer import Optimizer -from ..estimators.structure_estimator import StructureEstimator -from ..structure_graph.network_graph import NetworkGraph - - -class HillClimbing(Optimizer): - """ - Optimizer class that implement Hill Climbing Search - - - :param node_id: current node's id - :type node_id: string - :param structure_estimator: a structure estimator object with the information about the net - :type structure_estimator: class:'StructureEstimator' - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - - - - """ - def __init__(self, - node_id:str, - structure_estimator: StructureEstimator, - max_parents:int = None, - iterations_number:int= 40, - patience:int = None - ): - """ - Constructor - """ - super().__init__(node_id, structure_estimator) - self.max_parents = max_parents - self.iterations_number = iterations_number - self.patience = patience - - - - def optimize_structure(self) -> typing.List: - """ - Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm - - :return: the estimated structure for the node - :rtype: List - """ - - #'Create the graph for the single node' - graph = NetworkGraph(self.structure_estimator._sample_path.structure) - - 'get the index for the current node' - node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) - - 'list of prior edges' - prior_parents = set() - - 'Add the edges from prior knowledge' - for i in range(len(self.structure_estimator._removable_edges_matrix)): - if not self.structure_estimator._removable_edges_matrix[i][node_index]: - parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) - prior_parents.add(parent_id) - - 'Add the node to the starting structure' - graph.add_edges([(parent_id, self.node_id)]) - - - - 'get all the possible parents' - other_nodes = [node for node in - self.structure_estimator._sample_path.structure.nodes_labels if - node != self.node_id and - not prior_parents.__contains__(node)] - - actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - patince_count = 0 - for i in range(self.iterations_number): - 'choose a new random edge' - current_new_parent = choice(other_nodes) - current_edge = (current_new_parent,self.node_id) - added = False - parent_removed = None - - - if graph.has_edge(current_edge): - graph.remove_edges([current_edge]) - else: - 'check the max_parents constraint' - if self.max_parents is not None: - parents_list = graph.get_parents_by_id(self.node_id) - if len(parents_list) >= self.max_parents : - parent_removed = (choice(parents_list), self.node_id) - graph.remove_edges([parent_removed]) - graph.add_edges([current_edge]) - added = True - #print('**************************') - current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - - if current_score > actual_best_score: - 'update current best score' - actual_best_score = current_score - patince_count = 0 - else: - 'undo the last update' - if added: - graph.remove_edges([current_edge]) - 'If a parent was removed, add it again to the graph' - if parent_removed is not None: - graph.add_edges([parent_removed]) - else: - graph.add_edges([current_edge]) - 'update patience count' - patince_count += 1 - - if self.patience is not None and patince_count > self.patience: - break - - print(f"finito variabile: {self.node_id}") - return graph.edges \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/optimizers/optimizer.py b/PyCTBN/build/lib/classes/optimizers/optimizer.py deleted file mode 100644 index 36445c0..0000000 --- a/PyCTBN/build/lib/classes/optimizers/optimizer.py +++ /dev/null @@ -1,39 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -import abc - -from ..estimators.structure_estimator import StructureEstimator - - - -class Optimizer(abc.ABC): - """ - Interface class for all the optimizer's child PyCTBN - - :param node_id: the node label - :type node_id: string - :param structure_estimator: A structureEstimator Object to predict the structure - :type structure_estimator: class:'StructureEstimator' - - """ - - def __init__(self, node_id:str, structure_estimator: StructureEstimator): - self.node_id = node_id - self.structure_estimator = structure_estimator - - - @abc.abstractmethod - def optimize_structure(self) -> typing.List: - """ - Compute Optimization process for a structure_estimator - - :return: the estimated structure for the node - :rtype: List - """ - pass diff --git a/PyCTBN/build/lib/classes/optimizers/tabu_search.py b/PyCTBN/build/lib/classes/optimizers/tabu_search.py deleted file mode 100644 index e15dd40..0000000 --- a/PyCTBN/build/lib/classes/optimizers/tabu_search.py +++ /dev/null @@ -1,199 +0,0 @@ - -import itertools -import json -import typing - -import networkx as nx -import numpy as np - -from random import choice,sample - -from abc import ABC - - -from .optimizer import Optimizer -from ..estimators.structure_estimator import StructureEstimator -from ..structure_graph.network_graph import NetworkGraph - -import queue - - -class TabuSearch(Optimizer): - """ - Optimizer class that implement Tabu Search - - - :param node_id: current node's id - :type node_id: string - :param structure_estimator: a structure estimator object with the information about the net - :type structure_estimator: class:'StructureEstimator' - :param max_parents: maximum number of parents for each variable. If None, disabled, default to None - :type max_parents: int, optional - :param iterations_number: maximum number of optimization algorithm's iteration, default to 40 - :type iterations_number: int, optional - :param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None - :type patience: int, optional - :param tabu_length: maximum lenght of the data structures used in the optimization process, default to None - :type tabu_length: int, optional - :param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None - :type tabu_rules_duration: int, optional - - - """ - def __init__(self, - node_id:str, - structure_estimator: StructureEstimator, - max_parents:int = None, - iterations_number:int= 40, - patience:int = None, - tabu_length:int = None, - tabu_rules_duration = None - ): - """ - Constructor - """ - super().__init__(node_id, structure_estimator) - self.max_parents = max_parents - self.iterations_number = iterations_number - self.patience = patience - self.tabu_length = tabu_length - self.tabu_rules_duration = tabu_rules_duration - - - def optimize_structure(self) -> typing.List: - """ - Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm - - :return: the estimated structure for the node - :rtype: List - """ - print(f"tabu search is processing the structure of {self.node_id}") - - 'Create the graph for the single node' - graph = NetworkGraph(self.structure_estimator._sample_path.structure) - - 'get the index for the current node' - node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id) - - 'list of prior edges' - prior_parents = set() - - 'Add the edges from prior knowledge' - for i in range(len(self.structure_estimator._removable_edges_matrix)): - if not self.structure_estimator._removable_edges_matrix[i][node_index]: - parent_id= self.structure_estimator._sample_path._structure.get_node_id(i) - prior_parents.add(parent_id) - - 'Add the node to the starting structure' - graph.add_edges([(parent_id, self.node_id)]) - - - - 'get all the possible parents' - other_nodes = set([node for node in - self.structure_estimator._sample_path.structure.nodes_labels if - node != self.node_id and - not prior_parents.__contains__(node)]) - - 'calculate the score for the node without parents' - actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - - 'initialize tabu_length and tabu_rules_duration if None' - if self.tabu_length is None: - self.tabu_length = len(other_nodes) - - if self.tabu_rules_duration is None: - self.tabu_tabu_rules_durationength = len(other_nodes) - - 'inizialize the data structures' - tabu_set = set() - tabu_queue = queue.Queue() - - patince_count = 0 - tabu_count = 0 - for i in range(self.iterations_number): - - current_possible_nodes = other_nodes.difference(tabu_set) - - 'choose a new random edge according to tabu restiction' - if(len(current_possible_nodes) > 0): - current_new_parent = sample(current_possible_nodes,k=1)[0] - else: - current_new_parent = tabu_queue.get() - tabu_set.remove(current_new_parent) - - - - current_edge = (current_new_parent,self.node_id) - added = False - parent_removed = None - - if graph.has_edge(current_edge): - graph.remove_edges([current_edge]) - else: - 'check the max_parents constraint' - if self.max_parents is not None: - parents_list = graph.get_parents_by_id(self.node_id) - if len(parents_list) >= self.max_parents : - parent_removed = (choice(parents_list), self.node_id) - graph.remove_edges([parent_removed]) - graph.add_edges([current_edge]) - added = True - #print('**************************') - current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) - - - # print("-------------------------------------------") - # print(f"Current new parent: {current_new_parent}") - # print(f"Current score: {current_score}") - # print(f"Current best score: {actual_best_score}") - # print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") - # print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") - # print(f"graph edges: {graph.edges}") - - # print("-------------------------------------------") - # input() - if current_score > actual_best_score: - 'update current best score' - actual_best_score = current_score - patince_count = 0 - 'update tabu list' - - - else: - 'undo the last update' - if added: - graph.remove_edges([current_edge]) - 'If a parent was removed, add it again to the graph' - if parent_removed is not None: - graph.add_edges([parent_removed]) - else: - graph.add_edges([current_edge]) - 'update patience count' - patince_count += 1 - - - if tabu_queue.qsize() >= self.tabu_length: - current_removed = tabu_queue.get() - tabu_set.remove(current_removed) - 'Add the node on the tabu list' - tabu_queue.put(current_new_parent) - tabu_set.add(current_new_parent) - - tabu_count += 1 - - 'Every tabu_rules_duration step remove an item from the tabu list ' - if tabu_count % self.tabu_rules_duration == 0: - if tabu_queue.qsize() > 0: - current_removed = tabu_queue.get() - tabu_set.remove(current_removed) - tabu_count = 0 - else: - tabu_count = 0 - - if self.patience is not None and patince_count > self.patience: - break - - print(f"finito variabile: {self.node_id}") - return graph.edges \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/structure_graph/__init__.py b/PyCTBN/build/lib/classes/structure_graph/__init__.py deleted file mode 100644 index 85f18a2..0000000 --- a/PyCTBN/build/lib/classes/structure_graph/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .conditional_intensity_matrix import ConditionalIntensityMatrix -from .network_graph import NetworkGraph -from .sample_path import SamplePath -from .set_of_cims import SetOfCims -from .structure import Structure -from .trajectory import Trajectory \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py b/PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py deleted file mode 100644 index 4abfdd0..0000000 --- a/PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np - - -class ConditionalIntensityMatrix(object): - """Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector - and state transition matrix and the actual CIM matrix. - - :param state_residence_times: state residence times vector - :type state_residence_times: numpy.array - :param state_transition_matrix: the transitions count matrix - :type state_transition_matrix: numpy.ndArray - :_cim: the actual cim of the node - """ - def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array): - """Constructor Method - """ - self._state_residence_times = state_residence_times - self._state_transition_matrix = state_transition_matrix - self._cim = self.state_transition_matrix.astype(np.float64) - - def compute_cim_coefficients(self) -> None: - """Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x]. - The class member ``_cim`` will contain the computed cim - """ - np.fill_diagonal(self._cim, self._cim.diagonal() * -1) - self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T - - @property - def state_residence_times(self) -> np.ndarray: - return self._state_residence_times - - @property - def state_transition_matrix(self) -> np.ndarray: - return self._state_transition_matrix - - @property - def cim(self) -> np.ndarray: - return self._cim - - def __repr__(self): - return 'CIM:\n' + str(self.cim) - diff --git a/PyCTBN/build/lib/classes/structure_graph/network_graph.py b/PyCTBN/build/lib/classes/structure_graph/network_graph.py deleted file mode 100644 index 623981d..0000000 --- a/PyCTBN/build/lib/classes/structure_graph/network_graph.py +++ /dev/null @@ -1,293 +0,0 @@ - -import typing - -import networkx as nx -import numpy as np - -from .structure import Structure - - -class NetworkGraph(object): - """Abstracts the infos contained in the Structure class in the form of a directed graph. - Has the task of creating all the necessary filtering and indexing structures for parameters estimation - - :param graph_struct: the ``Structure`` object from which infos about the net will be extracted - :type graph_struct: Structure - :_graph: directed graph - :_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos - about every parents of the node of which all the indexing and filtering structures will be constructed. - :_time_scalar_indexing_structure: the indexing structure for state res time estimation - :_transition_scalar_indexing_structure: the indexing structure for transition computation - :_time_filtering: the columns filtering structure used in the computation of the state res times - :_transition_filtering: the columns filtering structure used in the computation of the transition - from one state to another - :_p_combs_structure: all the possible parents states combination for the node of interest - """ - - def __init__(self, graph_struct: Structure): - """Constructor Method - """ - self._graph_struct = graph_struct - self._graph = nx.DiGraph() - self._aggregated_info_about_nodes_parents = None - self._time_scalar_indexing_structure = None - self._transition_scalar_indexing_structure = None - self._time_filtering = None - self._transition_filtering = None - self._p_combs_structure = None - - def init_graph(self): - self.add_nodes(self._nodes_labels) - self.add_edges(self.graph_struct.edges) - self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() - self._fancy_indexing = self.build_fancy_indexing_structure(0) - self.build_scalar_indexing_structures() - self.build_time_columns_filtering_structure() - self.build_transition_columns_filtering_structure() - self._p_combs_structure = self.build_p_combs_structure() - - def fast_init(self, node_id: str) -> None: - """Initializes all the necessary structures for parameters estimation of the node identified by the label - node_id - - :param node_id: the label of the node - :type node_id: string - """ - self.add_nodes(self._graph_struct.nodes_labels) - self.add_edges(self._graph_struct.edges) - self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id) - p_indxs = self._aggregated_info_about_nodes_parents[1] - p_vals = self._aggregated_info_about_nodes_parents[2] - node_states = self.get_states_number(node_id) - node_indx = self.get_node_indx(node_id) - cols_number = self._graph_struct.total_variables_number - self._time_scalar_indexing_structure = NetworkGraph.\ - build_time_scalar_indexing_structure_for_a_node(node_states, p_vals) - self._transition_scalar_indexing_structure = NetworkGraph.\ - build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals) - self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs) - self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number) - self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals) - - def add_nodes(self, list_of_nodes: typing.List) -> None: - """Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``. - Sets all the properties that identify a nodes (index, positional index, cardinality) - - :param list_of_nodes: the nodes to add to ``_graph`` - :type list_of_nodes: List - """ - nodes_indxs = self._graph_struct.nodes_indexes - nodes_vals = self._graph_struct.nodes_values - pos = 0 - for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals): - self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) - pos += 1 - - def has_edge(self,edge:tuple)-> bool: - """ - Check if the graph contains a specific edge - - Parameters: - edge: a tuple that rappresents the edge - Returns: - bool - """ - return self._graph.has_edge(edge[0],edge[1]) - - def add_edges(self, list_of_edges: typing.List) -> None: - """Add the edges to the ``_graph`` contained in the list ``list_of_edges``. - - :param list_of_edges: the list containing of tuples containing the edges - :type list_of_edges: List - """ - self._graph.add_edges_from(list_of_edges) - - def remove_node(self, node_id: str) -> None: - """Remove the node ``node_id`` from all the class members. - Initialize all the filtering/indexing structures. - """ - self._graph.remove_node(node_id) - self._graph_struct.remove_node(node_id) - self.clear_indexing_filtering_structures() - - def clear_indexing_filtering_structures(self) -> None: - """Initialize all the filtering/indexing structures. - """ - self._aggregated_info_about_nodes_parents = None - self._time_scalar_indexing_structure = None - self._transition_scalar_indexing_structure = None - self._time_filtering = None - self._transition_filtering = None - self._p_combs_structure = None - - def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple: - """Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely - (parents_labels, parents_indexes, parents_cardinalities). - - :param node: the label of the node - :type node: string - :return: a tuple containing all the parent set infos - :rtype: Tuple - """ - parents = self.get_parents_by_id(node) - nodes = self._graph_struct.nodes_labels - d = {v: i for i, v in enumerate(nodes)} - sorted_parents = sorted(parents, key=lambda v: d[v]) - get_node_indx = self.get_node_indx - p_indxes = [get_node_indx(node) for node in sorted_parents] - p_values = [self.get_states_number(node) for node in sorted_parents] - return sorted_parents, p_indxes, p_values - - def remove_edges(self, list_of_edges: typing.List) -> None: - """Remove the edges to the graph contained in the list list_of_edges. - - :param list_of_edges: The edges to remove from the graph - :type list_of_edges: List - """ - self._graph.remove_edges_from(list_of_edges) - - @staticmethod - def build_time_scalar_indexing_structure_for_a_node(node_states: int, - parents_vals: typing.List) -> np.ndarray: - """Builds an indexing structure for the computation of state residence times values. - - :param node_states: the node cardinality - :type node_states: int - :param parents_vals: the caridinalites of the node's parents - :type parents_vals: List - :return: The time indexing structure - :rtype: numpy.ndArray - """ - T_vector = np.array([node_states]) - T_vector = np.append(T_vector, parents_vals) - T_vector = T_vector.cumprod().astype(np.int) - return T_vector - - @staticmethod - def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \ - -> np.ndarray: - """Builds an indexing structure for the computation of state transitions values. - - :param node_states_number: the node cardinality - :type node_states_number: int - :param parents_vals: the caridinalites of the node's parents - :type parents_vals: List - :return: The transition indexing structure - :rtype: numpy.ndArray - """ - M_vector = np.array([node_states_number, - node_states_number]) - M_vector = np.append(M_vector, parents_vals) - M_vector = M_vector.cumprod().astype(np.int) - return M_vector - - @staticmethod - def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray: - """ - Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` - in the dataset. - This structute will be used in the computation of the state res times. - :param node_indx: the index of the node - :type node_indx: int - :param p_indxs: the indexes of the node's parents - :type p_indxs: List - :return: The filtering structure for times estimation - :rtype: numpy.ndArray - """ - return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int) - - @staticmethod - def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \ - -> np.ndarray: - """Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs`` - in the dataset. - This structure will be used in the computation of the state transitions values. - :param node_indx: the index of the node - :type node_indx: int - :param p_indxs: the indexes of the node's parents - :type p_indxs: List - :param nodes_number: the total number of nodes in the dataset - :type nodes_number: int - :return: The filtering structure for transitions estimation - :rtype: numpy.ndArray - """ - return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int) - - @staticmethod - def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray: - """ - Builds the combinatorial structure that contains the combinations of all the values contained in - ``parents_values``. - - :param parents_values: the cardinalities of the nodes - :type parents_values: List - :return: A numpy matrix containing a grid of the combinations - :rtype: numpy.ndArray - """ - tmp = [] - for val in parents_values: - tmp.append([x for x in range(val)]) - if len(parents_values) > 0: - parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values)) - if len(parents_values) > 1: - tmp_comb = parents_comb[:, 1].copy() - parents_comb[:, 1] = parents_comb[:, 0].copy() - parents_comb[:, 0] = tmp_comb - else: - parents_comb = np.array([[]], dtype=np.int) - return parents_comb - - def get_parents_by_id(self, node_id) -> typing.List: - """Returns a list of labels of the parents of the node ``node_id`` - - :param node_id: the node label - :type node_id: string - :return: a List of labels of the parents - :rtype: List - """ - return list(self._graph.predecessors(node_id)) - - def get_states_number(self, node_id) -> int: - return self._graph.nodes[node_id]['val'] - - def get_node_indx(self, node_id) -> int: - return nx.get_node_attributes(self._graph, 'indx')[node_id] - - def get_positional_node_indx(self, node_id) -> int: - return self._graph.nodes[node_id]['pos_indx'] - - @property - def nodes(self) -> typing.List: - return self._graph_struct.nodes_labels - - @property - def edges(self) -> typing.List: - return list(self._graph.edges) - - @property - def nodes_indexes(self) -> np.ndarray: - return self._graph_struct.nodes_indexes - - @property - def nodes_values(self) -> np.ndarray: - return self._graph_struct.nodes_values - - @property - def time_scalar_indexing_strucure(self) -> np.ndarray: - return self._time_scalar_indexing_structure - - @property - def time_filtering(self) -> np.ndarray: - return self._time_filtering - - @property - def transition_scalar_indexing_structure(self) -> np.ndarray: - return self._transition_scalar_indexing_structure - - @property - def transition_filtering(self) -> np.ndarray: - return self._transition_filtering - - @property - def p_combs(self) -> np.ndarray: - return self._p_combs_structure diff --git a/PyCTBN/build/lib/classes/structure_graph/sample_path.py b/PyCTBN/build/lib/classes/structure_graph/sample_path.py deleted file mode 100644 index 80b51d9..0000000 --- a/PyCTBN/build/lib/classes/structure_graph/sample_path.py +++ /dev/null @@ -1,91 +0,0 @@ - - -import numpy as np -import pandas as pd - -from .structure import Structure -from .trajectory import Trajectory -from ..utility.abstract_importer import AbstractImporter - - - -class SamplePath(object): - """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables - cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will - contain the mentioned data. - - :param importer: the Importer object which contains the imported and processed data - :type importer: AbstractImporter - :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories - :_structure: the ``Structure`` Object that will contain all the structural infos about the net - :_total_variables_count: the number of variables in the net - """ - def __init__(self, importer: AbstractImporter): - """Constructor Method - """ - self._importer = importer - if self._importer._df_variables is None or self._importer._concatenated_samples is None: - raise RuntimeError('The importer object has to contain the all processed data!') - if self._importer._df_variables.empty: - raise RuntimeError('The importer object has to contain the all processed data!') - if isinstance(self._importer._concatenated_samples, pd.DataFrame): - if self._importer._concatenated_samples.empty: - raise RuntimeError('The importer object has to contain the all processed data!') - if isinstance(self._importer._concatenated_samples, np.ndarray): - if self._importer._concatenated_samples.size == 0: - raise RuntimeError('The importer object has to contain the all processed data!') - self._trajectories = None - self._structure = None - self._total_variables_count = None - - def build_trajectories(self) -> None: - """Builds the Trajectory object that will contain all the trajectories. - Clears all the unused dataframes in ``_importer`` Object - """ - self._trajectories = \ - Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples), - len(self._importer.sorter) + 1) - self._importer.clear_concatenated_frame() - - def build_structure(self) -> None: - """ - Builds the ``Structure`` object that aggregates all the infos about the net. - """ - if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list(): - raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!") - - self._total_variables_count = len(self._importer.sorter) - labels = self._importer.variables.iloc[:, 0].to_list() - indxs = self._importer.variables.index.to_numpy() - vals = self._importer.variables.iloc[:, 1].to_numpy() - if self._importer.structure is None or self._importer.structure.empty: - edges = [] - else: - edges = list(self._importer.structure.to_records(index=False)) - self._structure = Structure(labels, indxs, vals, edges, - self._total_variables_count) - - def clear_memory(self): - self._importer._raw_data = [] - - @property - def trajectories(self) -> Trajectory: - return self._trajectories - - @property - def structure(self) -> Structure: - return self._structure - - @property - def total_variables_count(self) -> int: - return self._total_variables_count - - @property - def has_prior_net_structure(self) -> bool: - return bool(self._structure.edges) - - - - - - diff --git a/PyCTBN/build/lib/classes/structure_graph/set_of_cims.py b/PyCTBN/build/lib/classes/structure_graph/set_of_cims.py deleted file mode 100644 index 81caff5..0000000 --- a/PyCTBN/build/lib/classes/structure_graph/set_of_cims.py +++ /dev/null @@ -1,97 +0,0 @@ - - -import typing - -import numpy as np - -from .conditional_intensity_matrix import ConditionalIntensityMatrix - - -class SetOfCims(object): - """Aggregates all the CIMS of the node identified by the label _node_id. - - :param node_id: the node label - :type node_ind: string - :param parents_states_number: the cardinalities of the parents - :type parents_states_number: List - :param node_states_number: the caridinality of the node - :type node_states_number: int - :param p_combs: the p_comb structure bound to this node - :type p_combs: numpy.ndArray - :_state_residence_time: matrix containing all the state residence time vectors for the node - :_transition_matrices: matrix containing all the transition matrices for the node - :_actual_cims: the cims of the node - """ - - def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray): - """Constructor Method - """ - self._node_id = node_id - self._parents_states_number = parents_states_number - self._node_states_number = node_states_number - self._actual_cims = [] - self._state_residence_times = None - self._transition_matrices = None - self._p_combs = p_combs - self.build_times_and_transitions_structures() - - def build_times_and_transitions_structures(self) -> None: - """Initializes at the correct dimensions the state residence times matrix and the state transition matrices. - """ - if not self._parents_states_number: - self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float) - self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int) - else: - self._state_residence_times = \ - np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float) - self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number, - self._node_states_number], dtype=np.int) - - def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None: - """Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices. - Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims. - - :param state_res_times: the state residence times matrix - :type state_res_times: numpy.ndArray - :param transition_matrices: the transition matrices - :type transition_matrices: numpy.ndArray - """ - for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices): - cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix) - cim_to_add.compute_cim_coefficients() - self._actual_cims.append(cim_to_add) - self._actual_cims = np.array(self._actual_cims) - self._transition_matrices = None - self._state_residence_times = None - - def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray: - """Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index - ``comb``. - - :param mask_arr: the boolean mask that indicates which parent to consider - :type mask_arr: numpy.array - :param comb: the state/s of the filtered parents - :type comb: numpy.array - :return: Array of ``ConditionalIntensityMatrix`` objects - :rtype: numpy.array - """ - if mask_arr.size <= 1: - return self._actual_cims - else: - flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel() - return self._actual_cims[flat_indxs] - - @property - def actual_cims(self) -> np.ndarray: - return self._actual_cims - - @property - def p_combs(self) -> np.ndarray: - return self._p_combs - - def get_cims_number(self): - return len(self._actual_cims) - - - - diff --git a/PyCTBN/build/lib/classes/structure_graph/structure.py b/PyCTBN/build/lib/classes/structure_graph/structure.py deleted file mode 100644 index a9d60cc..0000000 --- a/PyCTBN/build/lib/classes/structure_graph/structure.py +++ /dev/null @@ -1,124 +0,0 @@ - -import typing as ty - -import numpy as np - - -class Structure(object): - """Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes) - - :param nodes_labels_list: the symbolic names of the variables - :type nodes_labels_list: List - :param nodes_indexes_arr: the indexes of the nodes - :type nodes_indexes_arr: numpy.ndArray - :param nodes_vals_arr: the cardinalites of the nodes - :type nodes_vals_arr: numpy.ndArray - :param edges_list: the edges of the network - :type edges_list: List - :param total_variables_number: the total number of variables in the dataset - :type total_variables_number: int - """ - - def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray, - edges_list: ty.List, total_variables_number: int): - """Constructor Method - """ - self._nodes_labels_list = nodes_labels_list - self._nodes_indexes_arr = nodes_indexes_arr - self._nodes_vals_arr = nodes_vals_arr - self._edges_list = edges_list - self._total_variables_number = total_variables_number - - def remove_node(self, node_id: str) -> None: - """Remove the node ``node_id`` from all the class members. - The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset. - """ - node_positional_indx = self._nodes_labels_list.index(node_id) - del self._nodes_labels_list[node_positional_indx] - self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx) - self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx) - self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id - and to_node != node_id)] - - @property - def edges(self) -> ty.List: - return self._edges_list - - @property - def nodes_labels(self) -> ty.List: - return self._nodes_labels_list - - @property - def nodes_indexes(self) -> np.ndarray: - return self._nodes_indexes_arr - - @property - def nodes_values(self) -> np.ndarray: - return self._nodes_vals_arr - - @property - def total_variables_number(self) -> int: - return self._total_variables_number - - def get_node_id(self, node_indx: int) -> str: - """Given the ``node_index`` returns the node label. - - :param node_indx: the node index - :type node_indx: int - :return: the node label - :rtype: string - """ - return self._nodes_labels_list[node_indx] - - def clean_structure_edges(self): - self._edges_list = list() - - def add_edge(self,edge: tuple): - self._edges_list.append(tuple) - print(self._edges_list) - - def remove_edge(self,edge: tuple): - self._edges_list.remove(tuple) - - def contains_edge(self,edge:tuple) -> bool: - return edge in self._edges_list - - def get_node_indx(self, node_id: str) -> int: - """Given the ``node_index`` returns the node label. - - :param node_id: the node label - :type node_id: string - :return: the node index - :rtype: int - """ - pos_indx = self._nodes_labels_list.index(node_id) - return self._nodes_indexes_arr[pos_indx] - - def get_positional_node_indx(self, node_id: str) -> int: - return self._nodes_labels_list.index(node_id) - - def get_states_number(self, node: str) -> int: - """Given the node label ``node`` returns the cardinality of the node. - - :param node: the node label - :type node: string - :return: the node cardinality - :rtype: int - """ - pos_indx = self._nodes_labels_list.index(node) - return self._nodes_vals_arr[pos_indx] - - def __repr__(self): - return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\ - "\nEdges: \n" + str(self._edges_list) - - def __eq__(self, other): - """Overrides the default implementation""" - if isinstance(other, Structure): - return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \ - np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \ - np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \ - self._edges_list == other._edges_list - - return False - diff --git a/PyCTBN/build/lib/classes/structure_graph/trajectory.py b/PyCTBN/build/lib/classes/structure_graph/trajectory.py deleted file mode 100644 index 36899b3..0000000 --- a/PyCTBN/build/lib/classes/structure_graph/trajectory.py +++ /dev/null @@ -1,45 +0,0 @@ - -import typing - -import numpy as np - - -class Trajectory(object): - """ Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles - (the time deltas) and a numpy matrix of ints (the changes of states). - - :param list_of_columns: the list containing the times array and values matrix - :type list_of_columns: List - :param original_cols_number: total number of cols in the data - :type original_cols_number: int - :_actual_trajectory: the trajectory containing also the duplicated/shifted values - :_times: the array containing the time deltas - """ - - def __init__(self, list_of_columns: typing.List, original_cols_number: int): - """Constructor Method - """ - self._times = list_of_columns[0] - self._actual_trajectory = list_of_columns[1] - self._original_cols_number = original_cols_number - - @property - def trajectory(self) -> np.ndarray: - return self._actual_trajectory[:, :self._original_cols_number - 1] - - @property - def complete_trajectory(self) -> np.ndarray: - return self._actual_trajectory - - @property - def times(self): - return self._times - - def size(self): - return self._actual_trajectory.shape[0] - - def __repr__(self): - return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \ - "\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__() - - diff --git a/PyCTBN/build/lib/classes/utility/__init__.py b/PyCTBN/build/lib/classes/utility/__init__.py deleted file mode 100644 index f79749c..0000000 --- a/PyCTBN/build/lib/classes/utility/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .abstract_importer import AbstractImporter -from .cache import Cache -from .json_importer import JsonImporter -from .sample_importer import SampleImporter \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/utility/abstract_importer.py b/PyCTBN/build/lib/classes/utility/abstract_importer.py deleted file mode 100644 index 1cad352..0000000 --- a/PyCTBN/build/lib/classes/utility/abstract_importer.py +++ /dev/null @@ -1,164 +0,0 @@ - -import typing -from abc import ABC, abstractmethod - -import numpy as np -import pandas as pd - -import copy - -#from sklearn.utils import resample - - -class AbstractImporter(ABC): - """Abstract class that exposes all the necessary methods to process the trajectories and the net structure. - - :param file_path: the file path, or dataset name if you import already processed data - :type file_path: str - :param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories - :type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray] - :param variables: Dataframe containing the nodes labels and cardinalities - :type variables: pandas.DataFrame - :prior_net_structure: Dataframe containing the structure of the network (edges) - :type prior_net_structure: pandas.DataFrame - :_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples`` - - .. warning:: - The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed - as Pandas Dataframes with the following structure: - Header of _df_structure = [From_Node | To_Node] - Header of _df_variables = [Variable_Label | Variable_Cardinality] - See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray. - - .. note:: - See :class:``JsonImporter`` for an example implementation - - """ - - def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None, - variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None): - """Constructor - """ - self._file_path = file_path - self._df_samples_list = trajectory_list - self._concatenated_samples = [] - self._df_variables = variables - self._df_structure = prior_net_structure - self._sorter = None - super().__init__() - - @abstractmethod - def build_sorter(self, trajecory_header: object) -> typing.List: - """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame - and keeping ONLY the variables symbolic labels, cutting out the time label in the header. - - :param trajecory_header: an object that will be used to define the header - :type trajecory_header: object - :return: A list containing the processed header. - :rtype: List - """ - pass - - def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame, - columns_header: typing.List, shifted_cols_header: typing.List) \ - -> pd.DataFrame: - """Computes the difference between each value present in th time column. - Copies and shift by one position up all the values present in the remaining columns. - - :param sample_frame: the traj to be processed - :type sample_frame: pandas.Dataframe - :param columns_header: the original header of sample_frame - :type columns_header: List - :param shifted_cols_header: a copy of columns_header with changed names of the contents - :type shifted_cols_header: List - :return: The processed dataframe - :rtype: pandas.Dataframe - - .. warning:: - the Dataframe ``sample_frame`` has to follow the column structure of this header: - Header of sample_frame = [Time | Variable values] - """ - sample_frame = copy.deepcopy(sample_frame) - sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1) - shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32') - shifted_cols.columns = shifted_cols_header - sample_frame = sample_frame.assign(**shifted_cols) - sample_frame.drop(sample_frame.tail(1).index, inplace=True) - return sample_frame - - def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None: - """Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list - ``df_samples_list``. - Concatenates the result in the dataframe ``concatanated_samples`` - - :param df_samples_list: the datframe's list to be processed and concatenated - :type df_samples_list: List - - .. warning:: - The Dataframe sample_frame has to follow the column structure of this header: - Header of sample_frame = [Time | Variable values] - The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc) - .. note:: - After the call of this method the class member ``concatanated_samples`` will contain all processed - and merged trajectories - """ - if not self._sorter: - raise RuntimeError("The class member self._sorter has to be INITIALIZED!") - shifted_cols_header = [s + "S" for s in self._sorter] - compute_row_delta = self.compute_row_delta_sigle_samples_frame - proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header) - for sample in df_samples_list] - self._concatenated_samples = pd.concat(proc_samples_list) - - complete_header = self._sorter[:] - complete_header.insert(0,'Time') - complete_header.extend(shifted_cols_header) - self._concatenated_samples = self._concatenated_samples[complete_header] - - def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List: - """Builds a List containing the the delta times numpy array, and the complete transitions matrix - - :param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted - and converted - :type concatenated_sample: pandas.Dataframe - :return: the resulting list of numpy arrays - :rtype: List - """ - - concatenated_array = concatenated_sample.to_numpy() - columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)] - - return columns_list - - def clear_concatenated_frame(self) -> None: - """Removes all values in the dataframe concatenated_samples. - """ - if isinstance(self._concatenated_samples, pd.DataFrame): - self._concatenated_samples = self._concatenated_samples.iloc[0:0] - - @abstractmethod - def dataset_id(self) -> object: - """If the original dataset contains multiple dataset, this method returns a unique id to identify the current - dataset - """ - pass - - @property - def concatenated_samples(self) -> pd.DataFrame: - return self._concatenated_samples - - @property - def variables(self) -> pd.DataFrame: - return self._df_variables - - @property - def structure(self) -> pd.DataFrame: - return self._df_structure - - @property - def sorter(self) -> typing.List: - return self._sorter - - @property - def file_path(self) -> str: - return self._file_path diff --git a/PyCTBN/build/lib/classes/utility/cache.py b/PyCTBN/build/lib/classes/utility/cache.py deleted file mode 100644 index 8e0369b..0000000 --- a/PyCTBN/build/lib/classes/utility/cache.py +++ /dev/null @@ -1,58 +0,0 @@ - -import typing - -from ..structure_graph.set_of_cims import SetOfCims - - -class Cache: - """This class acts as a cache of ``SetOfCims`` objects for a node. - - :__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME - index is related - :__actual_cache: a list of setOfCims objects - """ - - def __init__(self): - """Constructor Method - """ - self._list_of_sets_of_parents = [] - self._actual_cache = [] - - def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str] - """ - Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims`` - related to that ``parents_comb``. - - :param parents_comb: the parents related to that ``SetOfCims`` - :type parents_comb: Set - :return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``. - None otherwise. - :rtype: SetOfCims - """ - try: - #print("Cache State:", self.list_of_sets_of_indxs) - #print("Look For:", parents_comb) - result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] - #print("CACHE HIT!!!!", parents_comb) - return result - except ValueError: - return None - - def put(self, parents_comb: typing.Set, socim: SetOfCims): - """Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in - ``__list_of_sets_of_parents``. - - :param parents_comb: the symbolic set index - :type parents_comb: Set - :param socim: the related SetOfCims object - :type socim: SetOfCims - """ - #print("Putting in cache:", parents_comb) - self._list_of_sets_of_parents.append(parents_comb) - self._actual_cache.append(socim) - - def clear(self): - """Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``. - """ - del self._list_of_sets_of_parents[:] - del self._actual_cache[:] \ No newline at end of file diff --git a/PyCTBN/build/lib/classes/utility/json_importer.py b/PyCTBN/build/lib/classes/utility/json_importer.py deleted file mode 100644 index edff212..0000000 --- a/PyCTBN/build/lib/classes/utility/json_importer.py +++ /dev/null @@ -1,176 +0,0 @@ -import json -import typing - -import pandas as pd - - -from .abstract_importer import AbstractImporter - - -class JsonImporter(AbstractImporter): - """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare - the data in json extension. - - :param file_path: the path of the file that contains tha data to be imported - :type file_path: string - :param samples_label: the reference key for the samples in the trajectories - :type samples_label: string - :param structure_label: the reference key for the structure of the network data - :type structure_label: string - :param variables_label: the reference key for the cardinalites of the nodes data - :type variables_label: string - :param time_key: the key used to identify the timestamps in each trajectory - :type time_key: string - :param variables_key: the key used to identify the names of the variables in the net - :type variables_key: string - :_array_indx: the index of the outer JsonArray to extract the data from - :type _array_indx: int - :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory - :_raw_data: The raw contents of the json file to import - :type _raw_data: List - """ - - def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, - variables_key: str): - """Constructor method - - .. note:: - This constructor calls also the method ``read_json_file()``, so after the construction of the object - the class member ``_raw_data`` will contain the raw imported json data. - - """ - self._samples_label = samples_label - self._structure_label = structure_label - self._variables_label = variables_label - self._time_key = time_key - self._variables_key = variables_key - self._df_samples_list = None - self._array_indx = None - super(JsonImporter, self).__init__(file_path) - self._raw_data = self.read_json_file() - - def import_data(self, indx: int) -> None: - """Implements the abstract method of :class:`AbstractImporter`. - - :param indx: the index of the outer JsonArray to extract the data from - :type indx: int - """ - self._array_indx = indx - self._df_samples_list = self.import_trajectories(self._raw_data) - self._sorter = self.build_sorter(self._df_samples_list[0]) - self.compute_row_delta_in_all_samples_frames(self._df_samples_list) - self.clear_data_frame_list() - self._df_structure = self.import_structure(self._raw_data) - self._df_variables = self.import_variables(self._raw_data) - - def import_trajectories(self, raw_data: typing.List) -> typing.List: - """Imports the trajectories from the list of dicts ``raw_data``. - - :param raw_data: List of Dicts - :type raw_data: List - :return: List of dataframes containing all the trajectories - :rtype: List - """ - return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label) - - def import_structure(self, raw_data: typing.List) -> pd.DataFrame: - """Imports in a dataframe the data in the list raw_data at the key ``_structure_label`` - - :param raw_data: List of Dicts - :type raw_data: List - :return: Dataframe containg the starting node a ending node of every arc of the network - :rtype: pandas.Dataframe - """ - return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label) - - def import_variables(self, raw_data: typing.List) -> pd.DataFrame: - """Imports the data in ``raw_data`` at the key ``_variables_label``. - - :param raw_data: List of Dicts - :type raw_data: List - :return: Datframe containg the variables simbolic labels and their cardinalities - :rtype: pandas.Dataframe - """ - return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) - - def read_json_file(self) -> typing.List: - """Reads the JSON file in the path self.filePath. - - :return: The contents of the json file - :rtype: List - """ - with open(self._file_path) as f: - data = json.load(f) - return data - - def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame: - """Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``. - - :param raw_data: List of Dicts - :type raw_data: List - :param indx: The index of the array from which the data have to be extracted - :type indx: int - :param key: the key for the Dicts from which exctract data - :type key: string - :return: A normalized dataframe - :rtype: pandas.Datframe - """ - return pd.DataFrame(raw_data[indx][key]) - - def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List: - """ - Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``. - - :param raw_data: List of Dicts - :type raw_data: List - :param indx: The index of the array from which the data have to be extracted - :type indx: int - :param trajectories_key: the key of the trajectories objects - :type trajectories_key: string - :return: A list of daframes containg the trajectories - :rtype: List - """ - dataframe = pd.DataFrame - smps = raw_data[indx][trajectories_key] - df_samples_list = [dataframe(sample) for sample in smps] - return df_samples_list - - def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: - """Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset. - """ - columns_header = list(sample_frame.columns.values) - columns_header.remove(self._time_key) - return columns_header - - def clear_data_frame_list(self) -> None: - """Removes all values present in the dataframes in the list ``_df_samples_list``. - """ - for indx in range(len(self._df_samples_list)): - self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0] - - def dataset_id(self) -> object: - return self._array_indx - - def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict: - """Imports the synthetic CIMS in the dataset in a dictionary, using variables labels - as keys for the set of CIMS of a particular node. - - :param raw_data: List of Dicts - :type raw_data: List - :param indx: The index of the array from which the data have to be extracted - :type indx: int - :param cims_key: the key where the json object cims are placed - :type cims_key: string - :return: a dictionary containing the sampled CIMS for all the variables in the net - :rtype: Dictionary - """ - cims_for_all_vars = {} - for var in raw_data[indx][cims_key]: - sampled_cims_list = [] - cims_for_all_vars[var] = sampled_cims_list - for p_comb in raw_data[indx][cims_key][var]: - cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy()) - return cims_for_all_vars - - - diff --git a/PyCTBN/build/lib/classes/utility/sample_importer.py b/PyCTBN/build/lib/classes/utility/sample_importer.py deleted file mode 100644 index 05073c8..0000000 --- a/PyCTBN/build/lib/classes/utility/sample_importer.py +++ /dev/null @@ -1,65 +0,0 @@ -import json -import typing - -import pandas as pd -import numpy as np - -from .abstract_importer import AbstractImporter - - - -class SampleImporter(AbstractImporter): - """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare - the data loaded directly by using DataFrame - - :param trajectory_list: the data that describes the trajectories - :type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] - :param variables: the data that describes the variables with name and cardinality - :type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] - :param prior_net_structure: the data of the real structure, if it exists - :type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List] - - :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory - :_raw_data: The raw contents of the json file to import - :type _raw_data: List - """ - - def __init__(self, - trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, - variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None, - prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None): - - 'If the data are not DataFrame, it will be converted' - if isinstance(variables,list) or isinstance(variables,np.ndarray): - variables = pd.DataFrame(variables) - if isinstance(variables,list) or isinstance(variables,np.ndarray): - prior_net_structure=pd.DataFrame(prior_net_structure) - - super(SampleImporter, self).__init__(trajectory_list =trajectory_list, - variables= variables, - prior_net_structure=prior_net_structure) - - def import_data(self, header_column = None): - - if header_column is not None: - self._sorter = header_column - else: - self._sorter = self.build_sorter(self._df_samples_list[0]) - - samples_list= self._df_samples_list - - if isinstance(samples_list, np.ndarray): - samples_list = samples_list.tolist() - - self.compute_row_delta_in_all_samples_frames(samples_list) - - def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: - """Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list. - """ - columns_header = list(sample_frame.columns.values) - del columns_header[0] - return columns_header - - - def dataset_id(self) -> object: - pass \ No newline at end of file diff --git a/PyCTBN/build/lib/tests/__init__.py b/PyCTBN/build/lib/tests/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/PyCTBN/build/lib/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/basic_main.py b/basic_main.py index b1288db..6559d38 100644 --- a/basic_main.py +++ b/basic_main.py @@ -37,3 +37,5 @@ def main(): if __name__ == "__main__": main() + + diff --git a/coverage copy.xml b/coverage copy.xml deleted file mode 100644 index 0d526b2..0000000 --- a/coverage copy.xml +++ /dev/null @@ -1,2219 +0,0 @@ - - - - - - C:\Users\lucam\Documents\Università\_Tesi\PyCTBN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/setup.py b/setup.py index 56dd72f..7cad4ea 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,8 @@ setup(name='PyCTBN', author=['Alessandro Bregoli', 'Filippo Martini','Luca Moretti'], author_email=['a.bregoli1@campus.unimib.it', 'f.martini@campus.unimib.it','lucamoretti96@gmail.com'], description='A Continuous Time Bayesian Networks Library', - packages=find_packages('.', exclude=['tests']), + packages=find_packages(exclude=['*test*','test_data','tests','PyCTBN.tests','PyCTBN.test_data']), + exclude_package_data={'': ['*test*','test_data','tests','PyCTBN.tests','PyCTBN.test_data']}, #packages=['PyCTBN.PyCTBN'], install_requires=[ 'numpy', 'pandas', 'networkx', 'scipy', 'matplotlib', 'tqdm'], @@ -17,4 +18,5 @@ setup(name='PyCTBN', 'https://github.com/tqdm/tqdm'], #long_description=open('../README.md').read(), zip_safe=False, + include_package_data=True, python_requires='>=3.6')