1
0
Fork 0

Update Setup

master
Luca Moretti 4 years ago
parent 9eef55fde6
commit e216be304b
  1. 8
      .gitignore
  2. BIN
      CTBN_project_dominio.pdf
  3. 8
      PyCTBN/PyCTBN/__init__.py
  4. 5
      PyCTBN/PyCTBN/estimators/__init__.py
  5. 272
      PyCTBN/PyCTBN/estimators/fam_score_calculator.py
  6. 143
      PyCTBN/PyCTBN/estimators/parameters_estimator.py
  7. 238
      PyCTBN/PyCTBN/estimators/structure_constraint_based_estimator.py
  8. 187
      PyCTBN/PyCTBN/estimators/structure_estimator.py
  9. 244
      PyCTBN/PyCTBN/estimators/structure_score_based_estimator.py
  10. 4
      PyCTBN/PyCTBN/optimizers/__init__.py
  11. 87
      PyCTBN/PyCTBN/optimizers/constraint_based_optimizer.py
  12. 135
      PyCTBN/PyCTBN/optimizers/hill_climbing_search.py
  13. 39
      PyCTBN/PyCTBN/optimizers/optimizer.py
  14. 199
      PyCTBN/PyCTBN/optimizers/tabu_search.py
  15. 6
      PyCTBN/PyCTBN/structure_graph/__init__.py
  16. 42
      PyCTBN/PyCTBN/structure_graph/conditional_intensity_matrix.py
  17. 293
      PyCTBN/PyCTBN/structure_graph/network_graph.py
  18. 91
      PyCTBN/PyCTBN/structure_graph/sample_path.py
  19. 97
      PyCTBN/PyCTBN/structure_graph/set_of_cims.py
  20. 124
      PyCTBN/PyCTBN/structure_graph/structure.py
  21. 45
      PyCTBN/PyCTBN/structure_graph/trajectory.py
  22. 4
      PyCTBN/PyCTBN/utility/__init__.py
  23. 164
      PyCTBN/PyCTBN/utility/abstract_importer.py
  24. 58
      PyCTBN/PyCTBN/utility/cache.py
  25. 176
      PyCTBN/PyCTBN/utility/json_importer.py
  26. 65
      PyCTBN/PyCTBN/utility/sample_importer.py
  27. 39
      PyCTBN/basic_main.py
  28. 8
      PyCTBN/build/lib/PyCTBN/__init__.py
  29. 5
      PyCTBN/build/lib/PyCTBN/estimators/__init__.py
  30. 272
      PyCTBN/build/lib/PyCTBN/estimators/fam_score_calculator.py
  31. 143
      PyCTBN/build/lib/PyCTBN/estimators/parameters_estimator.py
  32. 238
      PyCTBN/build/lib/PyCTBN/estimators/structure_constraint_based_estimator.py
  33. 187
      PyCTBN/build/lib/PyCTBN/estimators/structure_estimator.py
  34. 244
      PyCTBN/build/lib/PyCTBN/estimators/structure_score_based_estimator.py
  35. 4
      PyCTBN/build/lib/PyCTBN/optimizers/__init__.py
  36. 87
      PyCTBN/build/lib/PyCTBN/optimizers/constraint_based_optimizer.py
  37. 135
      PyCTBN/build/lib/PyCTBN/optimizers/hill_climbing_search.py
  38. 39
      PyCTBN/build/lib/PyCTBN/optimizers/optimizer.py
  39. 199
      PyCTBN/build/lib/PyCTBN/optimizers/tabu_search.py
  40. 6
      PyCTBN/build/lib/PyCTBN/structure_graph/__init__.py
  41. 42
      PyCTBN/build/lib/PyCTBN/structure_graph/conditional_intensity_matrix.py
  42. 293
      PyCTBN/build/lib/PyCTBN/structure_graph/network_graph.py
  43. 91
      PyCTBN/build/lib/PyCTBN/structure_graph/sample_path.py
  44. 97
      PyCTBN/build/lib/PyCTBN/structure_graph/set_of_cims.py
  45. 124
      PyCTBN/build/lib/PyCTBN/structure_graph/structure.py
  46. 45
      PyCTBN/build/lib/PyCTBN/structure_graph/trajectory.py
  47. 4
      PyCTBN/build/lib/PyCTBN/utility/__init__.py
  48. 164
      PyCTBN/build/lib/PyCTBN/utility/abstract_importer.py
  49. 58
      PyCTBN/build/lib/PyCTBN/utility/cache.py
  50. 176
      PyCTBN/build/lib/PyCTBN/utility/json_importer.py
  51. 65
      PyCTBN/build/lib/PyCTBN/utility/sample_importer.py
  52. 8
      PyCTBN/build/lib/classes/__init__.py
  53. 5
      PyCTBN/build/lib/classes/estimators/__init__.py
  54. 272
      PyCTBN/build/lib/classes/estimators/fam_score_calculator.py
  55. 143
      PyCTBN/build/lib/classes/estimators/parameters_estimator.py
  56. 238
      PyCTBN/build/lib/classes/estimators/structure_constraint_based_estimator.py
  57. 187
      PyCTBN/build/lib/classes/estimators/structure_estimator.py
  58. 244
      PyCTBN/build/lib/classes/estimators/structure_score_based_estimator.py
  59. 4
      PyCTBN/build/lib/classes/optimizers/__init__.py
  60. 87
      PyCTBN/build/lib/classes/optimizers/constraint_based_optimizer.py
  61. 135
      PyCTBN/build/lib/classes/optimizers/hill_climbing_search.py
  62. 39
      PyCTBN/build/lib/classes/optimizers/optimizer.py
  63. 199
      PyCTBN/build/lib/classes/optimizers/tabu_search.py
  64. 6
      PyCTBN/build/lib/classes/structure_graph/__init__.py
  65. 42
      PyCTBN/build/lib/classes/structure_graph/conditional_intensity_matrix.py
  66. 293
      PyCTBN/build/lib/classes/structure_graph/network_graph.py
  67. 91
      PyCTBN/build/lib/classes/structure_graph/sample_path.py
  68. 97
      PyCTBN/build/lib/classes/structure_graph/set_of_cims.py
  69. 124
      PyCTBN/build/lib/classes/structure_graph/structure.py
  70. 45
      PyCTBN/build/lib/classes/structure_graph/trajectory.py
  71. 4
      PyCTBN/build/lib/classes/utility/__init__.py
  72. 164
      PyCTBN/build/lib/classes/utility/abstract_importer.py
  73. 58
      PyCTBN/build/lib/classes/utility/cache.py
  74. 176
      PyCTBN/build/lib/classes/utility/json_importer.py
  75. 65
      PyCTBN/build/lib/classes/utility/sample_importer.py
  76. 1
      PyCTBN/build/lib/tests/__init__.py
  77. 20
      PyCTBN/setup.py
  78. 1
      PyCTBN/tests/__init__.py
  79. 963
      PyCTBN/tests/coverage.xml
  80. 67
      PyCTBN/tests/estimators/test_parameters_estimator.py
  81. 64
      PyCTBN/tests/estimators/test_structure_constraint_based_estimator.py
  82. 59
      PyCTBN/tests/estimators/test_structure_constraint_based_estimator_server.py
  83. 82
      PyCTBN/tests/estimators/test_structure_score_based_estimator.py
  84. 79
      PyCTBN/tests/estimators/test_structure_score_based_estimator_server.py
  85. 54
      PyCTBN/tests/optimizers/test_hill_climbing_search.py
  86. 84
      PyCTBN/tests/optimizers/test_tabu_search.py
  87. 46
      PyCTBN/tests/structure_graph/test_cim.py
  88. 190
      PyCTBN/tests/structure_graph/test_networkgraph.py
  89. 72
      PyCTBN/tests/structure_graph/test_sample_path.py
  90. 133
      PyCTBN/tests/structure_graph/test_setofcims.py
  91. 81
      PyCTBN/tests/structure_graph/test_structure.py
  92. 27
      PyCTBN/tests/structure_graph/test_trajectory.py
  93. 57
      PyCTBN/tests/utility/test_cache.py
  94. 176
      PyCTBN/tests/utility/test_json_importer.py
  95. 80
      PyCTBN/tests/utility/test_sample_importer.py
  96. 2
      README.md
  97. 20
      docs/Makefile
  98. BIN
      docs/_build/doctrees/classes.doctree
  99. BIN
      docs/_build/doctrees/classes.estimators.doctree
  100. BIN
      docs/_build/doctrees/classes.optimizers.doctree
  101. Some files were not shown because too many files have changed in this diff Show More

8
.gitignore vendored

@ -0,0 +1,8 @@
__pycache__
.vscode
**/__pycache__
**/data
**/PyCTBN.egg-info
**/dist
**/results_data
**/.scannerwork

Binary file not shown.

@ -0,0 +1,8 @@
import PyCTBN.estimators
from PyCTBN.estimators import *
import PyCTBN.optimizers
from PyCTBN.optimizers import *
import PyCTBN.structure_graph
from PyCTBN.structure_graph import *
import PyCTBN.utility
from PyCTBN.utility import *

@ -0,0 +1,5 @@
from .fam_score_calculator import FamScoreCalculator
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator
from .structure_score_based_estimator import StructureScoreBasedEstimator

@ -0,0 +1,272 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from math import log
from scipy.special import loggamma
from random import choice
from ..structure_graph.set_of_cims import SetOfCims
from ..structure_graph.network_graph import NetworkGraph
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
'''
'''
class FamScoreCalculator:
"""
Has the task of calculating the FamScore of a node by using a Bayesian score function
"""
def __init__(self):
#np.seterr('raise')
pass
# region theta
def marginal_likelihood_theta(self,
cims: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the FamScore value of the node identified by the label node_id
:param cims: np.array with all the node's cims
:type cims: np.array
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta
:rtype: float
"""
return np.sum(
[self.variable_cim_xu_marginal_likelihood_theta(cim,
alpha_xu,
alpha_xxu)
for cim in cims])
def variable_cim_xu_marginal_likelihood_theta(self,
cim: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the value of the marginal likelihood over theta given a cim
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta
:rtype: float
"""
'get cim length'
values = len(cim._state_residence_times)
'compute the marginal likelihood for the current cim'
return np.sum([
self.single_cim_xu_marginal_likelihood_theta(
index,
cim,
alpha_xu,
alpha_xxu)
for index in range(values)])
def single_cim_xu_marginal_likelihood_theta(self,
index: int,
cim: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the marginal likelihood on q of the node when assumes a specif value
and a specif parents's assignment
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta when the node assumes a specif value
:rtype: float
"""
values = list(range(len(cim._state_residence_times)))
'remove the index because of the x != x^ condition in the summation '
values.remove(index)
'uncomment for alpha xx not uniform'
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \
+ \
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
cim.state_transition_matrix[index,index_x_first],
alpha_xxu)
for index_x_first in values])
def single_internal_cim_xxu_marginal_likelihood_theta(self,
M_xxu_suff_stats: float,
alpha_xxu: float=1):
"""Calculate the second part of the marginal likelihood over theta formula
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u]
:type M_xxu_suff_stats: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta when the node assumes a specif value
:rtype: float
"""
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu)
# endregion
# region q
def marginal_likelihood_q(self,
cims: np.array,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the value of the marginal likelihood over q of the node identified by the label node_id
:param cims: np.array with all the node's cims
:type cims: np.array
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood over q
:rtype: float
"""
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims])
def variable_cim_xu_marginal_likelihood_q(self,
cim: ConditionalIntensityMatrix,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the value of the marginal likelihood over q given a cim
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood over q
:rtype: float
"""
'get cim length'
values=len(cim._state_residence_times)
'compute the marginal likelihood for the current cim'
return np.sum([
self.single_cim_xu_marginal_likelihood_q(
cim.state_transition_matrix[index, index],
cim._state_residence_times[index],
tau_xu,
alpha_xu)
for index in range(values)])
def single_cim_xu_marginal_likelihood_q(self,
M_xu_suff_stats: float,
T_xu_suff_stats: float,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the marginal likelihood on q of the node when assumes a specif value
and a specif parents's assignment
:param M_xu_suff_stats: value of the suffucient statistic M[x|u]
:type M_xxu_suff_stats: float
:param T_xu_suff_stats: value of the suffucient statistic T[x|u]
:type T_xu_suff_stats: float
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood of the node when assumes a specif value
:rtype: float
"""
return (
loggamma(alpha_xu + M_xu_suff_stats + 1) +
(log(tau_xu)
*
(alpha_xu+1))
) \
- \
(loggamma(alpha_xu + 1)+(
log(tau_xu + T_xu_suff_stats)
*
(alpha_xu + M_xu_suff_stats + 1))
)
# end region
def get_fam_score(self,
cims: np.array,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the FamScore value of the node
:param cims: np.array with all the node's cims
:type cims: np.array
:param tau_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type tau_xu: float, optional
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 1
:type alpha_xu: float, optional
:return: the FamScore value of the node
:rtype: float
"""
#print("------")
#print(self.marginal_likelihood_q(cims,
# tau_xu,
# alpha_xu))
#print(self.marginal_likelihood_theta(cims,
# alpha_xu,
# alpha_xxu))
'calculate alpha_xxu as a uniform distribution'
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1)
return self.marginal_likelihood_q(cims,
tau_xu,
alpha_xu) \
+ \
self.marginal_likelihood_theta(cims,
alpha_xu,
alpha_xxu)

@ -0,0 +1,143 @@
import sys
sys.path.append('../')
import numpy as np
from ..structure_graph.network_graph import NetworkGraph
from ..structure_graph.set_of_cims import SetOfCims
from ..structure_graph.trajectory import Trajectory
class ParametersEstimator(object):
"""Has the task of computing the cims of particular node given the trajectories and the net structure
in the graph ``_net_graph``.
:param trajectories: the trajectories
:type trajectories: Trajectory
:param net_graph: the net structure
:type net_graph: NetworkGraph
:_single_set_of_cims: the set of cims object that will hold the cims of the node
"""
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph):
"""Constructor Method
"""
self._trajectories = trajectories
self._net_graph = net_graph
self._single_set_of_cims = None
def fast_init(self, node_id: str) -> None:
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``.
:param node_id: the node label
:type node_id: string
"""
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
node_states_number = self._net_graph.get_states_number(node_id)
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
"""Compute the CIMS of the node identified by the label ``node_id``.
:param node_id: the node label
:type node_id: string
:return: A SetOfCims object filled with the computed CIMS
:rtype: SetOfCims
"""
node_indx = self._net_graph.get_node_indx(node_id)
state_res_times = self._single_set_of_cims._state_residence_times
transition_matrices = self._single_set_of_cims._transition_matrices
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times,
self._trajectories.trajectory,
self._net_graph.time_filtering,
self._net_graph.time_scalar_indexing_strucure,
state_res_times)
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory,
self._net_graph.transition_filtering,
self._net_graph.transition_scalar_indexing_structure,
transition_matrices)
self._single_set_of_cims.build_cims(state_res_times, transition_matrices)
return self._single_set_of_cims
@staticmethod
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray,
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray,
T: np.ndarray) -> None:
"""Compute the state residence times for a node and fill the matrix ``T`` with the results
:param node_indx: the index of the node
:type node_indx: int
:param times: the times deltas vector
:type times: numpy.array
:param trajectory: the trajectory
:type trajectory: numpy.ndArray
:param cols_filter: the columns filtering structure
:type cols_filter: numpy.array
:param scalar_indexes_struct: the indexing structure
:type scalar_indexes_struct: numpy.array
:param T: the state residence times vectors
:type T: numpy.ndArray
"""
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1)
.astype(np.int), \
times,
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1])
@staticmethod
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray,
scalar_indexing: np.ndarray, M: np.ndarray) -> None:
"""Compute the state residence times for a node and fill the matrices ``M`` with the results.
:param node_indx: the index of the node
:type node_indx: int
:param trajectory: the trajectory
:type trajectory: numpy.ndArray
:param cols_filter: the columns filtering structure
:type cols_filter: numpy.array
:param scalar_indexing: the indexing structure
:type scalar_indexing: numpy.array
:param M: the state transitions matrices
:type M: numpy.ndArray
"""
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])],
dtype=np.int64)
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0]
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int)
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2])
M_raveled = M.ravel()
M_raveled[diag_indices] = 0
M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
def init_sets_cims_container(self):
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
self.net_graph.nodes_values,
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
self.net_graph.p_combs)
def compute_parameters(self):
#print(self.net_graph.get_nodes())
#print(self.amalgamated_cims_struct.sets_of_cims)
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
#print(self.net_graph.time_filtering[indx])
#print(self.net_graph.time_scalar_indexing_strucure[indx])
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
self.sample_path.trajectories.trajectory,
self.net_graph.time_filtering[indx],
self.net_graph.time_scalar_indexing_strucure[indx],
aggr[1]._state_residence_times)
#print(self.net_graph.transition_filtering[indx])
#print(self.net_graph.transition_scalar_indexing_structure[indx])
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]),
self.sample_path.trajectories.complete_trajectory,
self.net_graph.transition_filtering[indx],
self.net_graph.transition_scalar_indexing_structure[indx],
aggr[1]._transition_matrices)
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices)

@ -0,0 +1,238 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
import os
from scipy.stats import chi2 as chi2_dist
from scipy.stats import f as f_dist
from tqdm import tqdm
from ..utility.cache import Cache
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer
import concurrent.futures
import multiprocessing
from multiprocessing import Pool
class StructureConstraintBasedEstimator(StructureEstimator):
"""
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:param exp_test_alfa: the significance level for the exponential Hp test
:type exp_test_alfa: float
:param chi_test_alfa: the significance level for the chi Hp test
:type chi_test_alfa: float
:_nodes: the nodes labels
:_nodes_vals: the nodes cardinalities
:_nodes_indxs: the nodes indexes
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
:_cache: the Cache object
"""
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25):
super().__init__(sample_path,known_edges)
self._exp_test_sign = exp_test_alfa
self._chi_test_alfa = chi_test_alfa
self._thumb_threshold = thumb_threshold
self._cache = Cache()
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int, parent_indx, child_indx) -> bool:
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
G2 = {G1 U test_parent} (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests.
:param test_parent: the node label of the test parent
:type test_parent: string
:param test_child: the node label of the child
:type test_child: string
:param parent_set: the common parent set
:type parent_set: List
:param child_states_numb: the cardinality of the ``test_child``
:type child_states_numb: int
:param tot_vars_count: the total number of variables in the net
:type tot_vars_count: int
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise
:rtype: bool
"""
p_set = parent_set[:]
complete_info = parent_set[:]
complete_info.append(test_child)
parents = np.array(parent_set)
parents = np.append(parents, test_parent)
sorted_parents = self._nodes[np.isin(self._nodes, parents)]
cims_filter = sorted_parents != test_parent
p_set.insert(0, test_parent)
sofc2 = self._cache.find(set(p_set))
if not sofc2:
complete_info.append(test_parent)
bool_mask2 = np.isin(self._nodes, complete_info)
l2 = list(self._nodes[bool_mask2])
indxs2 = self._nodes_indxs[bool_mask2]
vals2 = self._nodes_vals[bool_mask2]
eds2 = list(itertools.product(p_set, test_child))
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count)
g2 = NetworkGraph(s2)
g2.fast_init(test_child)
p2 = ParametersEstimator(self._sample_path.trajectories, g2)
p2.fast_init(test_child)
sofc2 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc2)
del p_set[0]
sofc1 = self._cache.find(set(p_set))
if not sofc1:
g2.remove_node(test_parent)
g2.fast_init(test_child)
p2 = ParametersEstimator(self._sample_path.trajectories, g2)
p2.fast_init(test_child)
sofc1 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc1)
thumb_value = 0.0
if child_states_numb > 2:
parent_val = self._sample_path.structure.get_states_number(test_parent)
bool_mask_vals = np.isin(self._nodes, parent_set)
parents_vals = self._nodes_vals[bool_mask_vals]
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
for cim2 in cond_cims:
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
return False
return True
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
"""Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected,
it is performed also the chi_test.
:param child_states_numb: the cardinality of the test child
:type child_states_numb: int
:param cim1: a cim belonging to the graph without test parent
:type cim1: ConditionalIntensityMatrix
:param cim2: a cim belonging to the graph with test parent
:type cim2: ConditionalIntensityMatrix
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise.
:rtype: bool
"""
M1 = cim1.state_transition_matrix
M2 = cim2.state_transition_matrix
r1s = M1.diagonal()
r2s = M2.diagonal()
C1 = cim1.cim
C2 = cim2.cim
if child_states_numb > 2:
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
self._removable_edges_matrix[parent_indx][child_indx] = False
return False
F_stats = C2.diagonal() / C1.diagonal()
exp_alfa = self._exp_test_sign
for val in range(0, child_states_numb):
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]):
return False
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1)
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape(
M2.shape[0], -1)
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1)
Ks = np.sqrt(r1s / r2s)
Ls = np.sqrt(r2s / r1s)
for val in range(0, child_states_numb):
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
(M1_no_diag[val] + M2_no_diag[val]))
if Chi > chi_2_quantile:
return False
return True
def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
"""Compute the value to test against the thumb_threshold.
:param parent_val: test parent's variable cardinality
:type parent_val: int
:param child_val: test child's variable cardinality
:type child_val: int
:param parent_set_vals: the cardinalities of the nodes in the current sep-set
:type parent_set_vals: List
:return: the thumb value for the current independence test
:rtype: int
"""
df = (child_val - 1) ** 2
df = df * parent_val
for v in parent_set_vals:
df = df * v
return df
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List:
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
:param var_id: the node label of the test child
:type var_id: string
"""
optimizer_obj = ConstraintBasedOptimizer(
node_id = var_id,
structure_estimator = self,
tot_vars_count = tot_vars_count)
return optimizer_obj.optimize_structure()
def ctpc_algorithm(self,disable_multiprocessing:bool= False ):
"""Compute the CTPC algorithm over the entire net.
"""
ctpc_algo = self.one_iteration_of_CTPC_algorithm
total_vars_numb = self._sample_path.total_variables_count
n_nodes= len(self._nodes)
total_vars_numb_array = [total_vars_numb] * n_nodes
'get the number of CPU'
cpu_count = multiprocessing.cpu_count()
'Remove all the edges from the structure'
self._sample_path.structure.clean_structure_edges()
'Estimate the best parents for each node'
#with multiprocessing.Pool(processes=cpu_count) as pool:
#with get_context("spawn").Pool(processes=cpu_count) as pool:
if disable_multiprocessing:
print("DISABILITATO")
cpu_count = 1
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
else:
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
list_edges_partial = executor.map(ctpc_algo,
self._nodes,
total_vars_numb_array)
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
return set(itertools.chain.from_iterable(list_edges_partial))
def estimate_structure(self,disable_multiprocessing:bool=False):
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing)

@ -0,0 +1,187 @@
import itertools
import json
import typing
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from abc import ABC
import abc
from ..utility.cache import Cache
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
class StructureEstimator(object):
"""Has the task of estimating the network structure given the trajectories in ``samplepath``.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:_nodes: the nodes labels
:_nodes_vals: the nodes cardinalities
:_nodes_indxs: the nodes indexes
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
"""
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None):
self._sample_path = sample_path
self._nodes = np.array(self._sample_path.structure.nodes_labels)
self._nodes_vals = self._sample_path.structure.nodes_values
self._nodes_indxs = self._sample_path.structure.nodes_indexes
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
def build_removable_edges_matrix(self, known_edges: typing.List):
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given:
:param known_edges: the list of nodes labels
:type known_edges: List
:return: a boolean matrix
:rtype: np.ndarray
"""
tot_vars_count = self._sample_path.total_variables_count
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
if known_edges:
for edge in known_edges:
i = self._sample_path.structure.get_node_indx(edge[0])
j = self._sample_path.structure.get_node_indx(edge[1])
complete_adj_matrix[i][j] = False
return complete_adj_matrix
@staticmethod
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph:
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``:
:param node_ids: the list of nodes labels
:type node_ids: List
:return: a complete Digraph Object
:rtype: networkx.DiGraph
"""
complete_graph = nx.DiGraph()
complete_graph.add_nodes_from(node_ids)
complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph
@staticmethod
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str):
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``,
that do not contains a the node identified by ``parent_label``.
:param u: the list of nodes
:type u: List
:param size: the size of the subsets
:type size: int
:param parent_label: the node to exclude in the subsets generation
:type parent_label: string
:return: an Iterator Object containing a list of lists
:rtype: Iterator
"""
list_without_test_parent = u[:]
list_without_test_parent.remove(parent_label)
return map(list, itertools.combinations(list_without_test_parent, size))
def save_results(self) -> None:
"""Save the estimated Structure to a .json file in the path where the data are loaded from.
The file is named as the input dataset but the `results_` word is appended to the results file.
"""
res = json_graph.node_link_data(self._complete_graph)
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self._sample_path._importer.dataset_id())
name += '.json'
file_name = 'results_' + name
with open(file_name, 'w') as f:
json.dump(res, f)
def remove_diagonal_elements(self, matrix):
m = matrix.shape[0]
strided = np.lib.stride_tricks.as_strided
s0, s1 = matrix.strides
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
@abc.abstractmethod
def estimate_structure(self) -> typing.List:
"""Abstract method to estimate the structure
:return: List of estimated edges
:rtype: Typing.List
"""
pass
def adjacency_matrix(self) -> np.ndarray:
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation.
:return: The adjacency matrix of the graph ``_complete_graph``
:rtype: numpy.ndArray
"""
return nx.adj_matrix(self._complete_graph).toarray().astype(bool)
def spurious_edges(self) -> typing.List:
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in
``_sample_path.structure``.
:return: A list containing the spurious edges
:rtype: List
"""
if not self._sample_path.has_prior_net_structure:
raise RuntimeError("Can not compute spurious edges with no prior net structure!")
real_graph = nx.DiGraph()
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
real_graph.add_edges_from(self._sample_path.structure.edges)
return nx.difference(real_graph, self._complete_graph).edges
def save_plot_estimated_structure_graph(self) -> None:
"""Plot the estimated structure in a graphical model style.
Spurious edges are colored in red.
"""
graph_to_draw = nx.DiGraph()
spurious_edges = self.spurious_edges()
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
print(non_spurious_edges)
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
graph_to_draw.add_edges_from(spurious_edges)
graph_to_draw.add_edges_from(non_spurious_edges)
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
options = {
"node_size": 2000,
"node_color": "white",
"edgecolors": "black",
'linewidths':2,
"with_labels":True,
"font_size":13,
'connectionstyle': 'arc3, rad = 0.1',
"arrowsize": 15,
"arrowstyle": '<|-',
"width": 1,
"edge_color":edges_colors,
}
nx.draw(graph_to_draw, pos, **options)
ax = plt.gca()
ax.margins(0.20)
plt.axis("off")
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self._sample_path._importer.dataset_id())
name += '.png'
plt.savefig(name)
plt.clf()
print("Estimated Structure Plot Saved At: ", os.path.abspath(name))

@ -0,0 +1,244 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from random import choice
import concurrent.futures
import copy
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
from .fam_score_calculator import FamScoreCalculator
from ..optimizers.hill_climbing_search import HillClimbing
from ..optimizers.tabu_search import TabuSearch
import multiprocessing
from multiprocessing import Pool
class StructureScoreBasedEstimator(StructureEstimator):
"""
Has the task of estimating the network structure given the trajectories in samplepath by
using a score based approach.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:param tau_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type tau_xu: float, optional
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 1
:type alpha_xu: float, optional
:param known_edges: List of known edges, default to []
:type known_edges: List, optional
"""
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []):
super().__init__(sample_path,known_edges)
self.tau_xu=tau_xu
self.alpha_xu=alpha_xu
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None,
optimizer: str = 'tabu',disable_multiprocessing:bool= False ):
"""
Compute the score-based algorithm to find the optimal structure
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
:type optimizer: string, optional
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False
:type disable_multiprocessing: Boolean, optional
"""
'Save the true edges structure in tuples'
true_edges = copy.deepcopy(self._sample_path.structure.edges)
true_edges = set(map(tuple, true_edges))
'Remove all the edges from the structure'
self._sample_path.structure.clean_structure_edges()
estimate_parents = self.estimate_parents
n_nodes= len(self._nodes)
l_max_parents= [max_parents] * n_nodes
l_iterations_number = [iterations_number] * n_nodes
l_patience = [patience] * n_nodes
l_tabu_length = [tabu_length] * n_nodes
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
l_optimizer = [optimizer] * n_nodes
'get the number of CPU'
cpu_count = multiprocessing.cpu_count()
print(f"CPU COUNT: {cpu_count}")
if disable_multiprocessing:
cpu_count = 1
#with get_context("spawn").Pool(processes=cpu_count) as pool:
#with multiprocessing.Pool(processes=cpu_count) as pool:
'Estimate the best parents for each node'
if disable_multiprocessing:
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes]
else:
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
list_edges_partial = executor.map(estimate_parents,
self._nodes,
l_max_parents,
l_iterations_number,
l_patience,
l_tabu_length,
l_tabu_rules_duration,
l_optimizer)
#list_edges_partial = p.map(estimate_parents, self._nodes)
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
'Concatenate all the edges list'
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
#print('-------------------------')
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
try:
n_added_fake_edges = len(set_list_edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(set_list_edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
# print(f"n archi reali non trovati: {n_missing_edges}")
# print(f"n archi non reali aggiunti: {n_added_fake_edges}")
print(true_edges)
print(set_list_edges)
print(f"precision: {precision} ")
print(f"recall: {recall} ")
except Exception as e:
print(f"errore: {e}")
return set_list_edges
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5,
optimizer:str = 'hill' ):
"""
Use the FamScore of a node in order to find the best parent nodes
:param node_id: current node's id
:type node_id: string
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
:type optimizer: string, optional
:return: A list of the best edges for the currente node
:rtype: List
"""
"choose the optimizer algotithm"
if optimizer == 'tabu':
optimizer = TabuSearch(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience,
tabu_length = tabu_length,
tabu_rules_duration = tabu_rules_duration)
else: #if optimizer == 'hill':
optimizer = HillClimbing(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience)
"call the optmizer's function that calculates the current node's parents"
return optimizer.optimize_structure()
def get_score_from_graph(self,
graph: NetworkGraph,
node_id:str):
"""
Get the FamScore of a node
:param node_id: current node's id
:type node_id: string
:param graph: current graph to be computed
:type graph: class:'NetworkGraph'
:return: The FamSCore for this graph structure
:rtype: float
"""
'inizialize the graph for a single node'
graph.fast_init(node_id)
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph)
'Inizialize and compute parameters for node'
params_estimation.fast_init(node_id)
SoCims = params_estimation.compute_parameters_for_node(node_id)
'calculate the FamScore for the node'
fam_score_obj = FamScoreCalculator()
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu)
#print(f" lo score per {node_id} risulta: {score} ")
return score

@ -0,0 +1,4 @@
from .optimizer import Optimizer
from .tabu_search import TabuSearch
from .hill_climbing_search import HillClimbing
from .constraint_based_optimizer import ConstraintBasedOptimizer

@ -0,0 +1,87 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice
from abc import ABC
import copy
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
class ConstraintBasedOptimizer(Optimizer):
"""
Optimizer class that implement a CTPC Algorithm
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param tot_vars_count: number of variables in the dataset
:type tot_vars_count: int
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
tot_vars_count:int
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.tot_vars_count = tot_vars_count
def optimize_structure(self):
"""
Compute Optimization process for a structure_estimator by using a CTPC Algorithm
:return: the estimated structure for the node
:rtype: List
"""
print("##################TESTING VAR################", self.node_id)
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id]
for possible_parent in other_nodes:
graph.add_edges([(possible_parent,self.node_id)])
u = other_nodes
#tests_parents_numb = len(u)
#complete_frame = self.complete_graph_frame
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
b = 0
while b < len(u):
parent_indx = 0
while parent_indx < len(u):
removed = False
test_parent = u[parent_indx]
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent)
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id)
if self.structure_estimator._removable_edges_matrix[i][j]:
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
for parents_set in S:
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j):
graph.remove_edges([(test_parent, self.node_id)])
u.remove(test_parent)
removed = True
break
if not removed:
parent_indx += 1
b += 1
self.structure_estimator._cache.clear()
return graph.edges

@ -0,0 +1,135 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice
from abc import ABC
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
class HillClimbing(Optimizer):
"""
Optimizer class that implement Hill Climbing Search
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
:return: the estimated structure for the node
:rtype: List
"""
#'Create the graph for the single node'
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = [node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)]
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
patince_count = 0
for i in range(self.iterations_number):
'choose a new random edge'
current_new_parent = choice(other_nodes)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,39 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
import abc
from ..estimators.structure_estimator import StructureEstimator
class Optimizer(abc.ABC):
"""
Interface class for all the optimizer's child PyCTBN
:param node_id: the node label
:type node_id: string
:param structure_estimator: A structureEstimator Object to predict the structure
:type structure_estimator: class:'StructureEstimator'
"""
def __init__(self, node_id:str, structure_estimator: StructureEstimator):
self.node_id = node_id
self.structure_estimator = structure_estimator
@abc.abstractmethod
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator
:return: the estimated structure for the node
:rtype: List
"""
pass

@ -0,0 +1,199 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice,sample
from abc import ABC
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
import queue
class TabuSearch(Optimizer):
"""
Optimizer class that implement Tabu Search
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None,
tabu_length:int = None,
tabu_rules_duration = None
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
self.tabu_length = tabu_length
self.tabu_rules_duration = tabu_rules_duration
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
:return: the estimated structure for the node
:rtype: List
"""
print(f"tabu search is processing the structure of {self.node_id}")
'Create the graph for the single node'
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = set([node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)])
'calculate the score for the node without parents'
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
'initialize tabu_length and tabu_rules_duration if None'
if self.tabu_length is None:
self.tabu_length = len(other_nodes)
if self.tabu_rules_duration is None:
self.tabu_tabu_rules_durationength = len(other_nodes)
'inizialize the data structures'
tabu_set = set()
tabu_queue = queue.Queue()
patince_count = 0
tabu_count = 0
for i in range(self.iterations_number):
current_possible_nodes = other_nodes.difference(tabu_set)
'choose a new random edge according to tabu restiction'
if(len(current_possible_nodes) > 0):
current_new_parent = sample(current_possible_nodes,k=1)[0]
else:
current_new_parent = tabu_queue.get()
tabu_set.remove(current_new_parent)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
# print("-------------------------------------------")
# print(f"Current new parent: {current_new_parent}")
# print(f"Current score: {current_score}")
# print(f"Current best score: {actual_best_score}")
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
# print(f"graph edges: {graph.edges}")
# print("-------------------------------------------")
# input()
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
'update tabu list'
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if tabu_queue.qsize() >= self.tabu_length:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
'Add the node on the tabu list'
tabu_queue.put(current_new_parent)
tabu_set.add(current_new_parent)
tabu_count += 1
'Every tabu_rules_duration step remove an item from the tabu list '
if tabu_count % self.tabu_rules_duration == 0:
if tabu_queue.qsize() > 0:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
tabu_count = 0
else:
tabu_count = 0
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,6 @@
from .conditional_intensity_matrix import ConditionalIntensityMatrix
from .network_graph import NetworkGraph
from .sample_path import SamplePath
from .set_of_cims import SetOfCims
from .structure import Structure
from .trajectory import Trajectory

@ -0,0 +1,42 @@
import numpy as np
class ConditionalIntensityMatrix(object):
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector
and state transition matrix and the actual CIM matrix.
:param state_residence_times: state residence times vector
:type state_residence_times: numpy.array
:param state_transition_matrix: the transitions count matrix
:type state_transition_matrix: numpy.ndArray
:_cim: the actual cim of the node
"""
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
"""Constructor Method
"""
self._state_residence_times = state_residence_times
self._state_transition_matrix = state_transition_matrix
self._cim = self.state_transition_matrix.astype(np.float64)
def compute_cim_coefficients(self) -> None:
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].
The class member ``_cim`` will contain the computed cim
"""
np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T
@property
def state_residence_times(self) -> np.ndarray:
return self._state_residence_times
@property
def state_transition_matrix(self) -> np.ndarray:
return self._state_transition_matrix
@property
def cim(self) -> np.ndarray:
return self._cim
def __repr__(self):
return 'CIM:\n' + str(self.cim)

@ -0,0 +1,293 @@
import typing
import networkx as nx
import numpy as np
from .structure import Structure
class NetworkGraph(object):
"""Abstracts the infos contained in the Structure class in the form of a directed graph.
Has the task of creating all the necessary filtering and indexing structures for parameters estimation
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted
:type graph_struct: Structure
:_graph: directed graph
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos
about every parents of the node of which all the indexing and filtering structures will be constructed.
:_time_scalar_indexing_structure: the indexing structure for state res time estimation
:_transition_scalar_indexing_structure: the indexing structure for transition computation
:_time_filtering: the columns filtering structure used in the computation of the state res times
:_transition_filtering: the columns filtering structure used in the computation of the transition
from one state to another
:_p_combs_structure: all the possible parents states combination for the node of interest
"""
def __init__(self, graph_struct: Structure):
"""Constructor Method
"""
self._graph_struct = graph_struct
self._graph = nx.DiGraph()
self._aggregated_info_about_nodes_parents = None
self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = None
self._time_filtering = None
self._transition_filtering = None
self._p_combs_structure = None
def init_graph(self):
self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_scalar_indexing_structures()
self.build_time_columns_filtering_structure()
self.build_transition_columns_filtering_structure()
self._p_combs_structure = self.build_p_combs_structure()
def fast_init(self, node_id: str) -> None:
"""Initializes all the necessary structures for parameters estimation of the node identified by the label
node_id
:param node_id: the label of the node
:type node_id: string
"""
self.add_nodes(self._graph_struct.nodes_labels)
self.add_edges(self._graph_struct.edges)
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id)
p_indxs = self._aggregated_info_about_nodes_parents[1]
p_vals = self._aggregated_info_about_nodes_parents[2]
node_states = self.get_states_number(node_id)
node_indx = self.get_node_indx(node_id)
cols_number = self._graph_struct.total_variables_number
self._time_scalar_indexing_structure = NetworkGraph.\
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals)
self._transition_scalar_indexing_structure = NetworkGraph.\
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals)
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs)
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number)
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals)
def add_nodes(self, list_of_nodes: typing.List) -> None:
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``.
Sets all the properties that identify a nodes (index, positional index, cardinality)
:param list_of_nodes: the nodes to add to ``_graph``
:type list_of_nodes: List
"""
nodes_indxs = self._graph_struct.nodes_indexes
nodes_vals = self._graph_struct.nodes_values
pos = 0
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
pos += 1
def has_edge(self,edge:tuple)-> bool:
"""
Check if the graph contains a specific edge
Parameters:
edge: a tuple that rappresents the edge
Returns:
bool
"""
return self._graph.has_edge(edge[0],edge[1])
def add_edges(self, list_of_edges: typing.List) -> None:
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``.
:param list_of_edges: the list containing of tuples containing the edges
:type list_of_edges: List
"""
self._graph.add_edges_from(list_of_edges)
def remove_node(self, node_id: str) -> None:
"""Remove the node ``node_id`` from all the class members.
Initialize all the filtering/indexing structures.
"""
self._graph.remove_node(node_id)
self._graph_struct.remove_node(node_id)
self.clear_indexing_filtering_structures()
def clear_indexing_filtering_structures(self) -> None:
"""Initialize all the filtering/indexing structures.
"""
self._aggregated_info_about_nodes_parents = None
self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = None
self._time_filtering = None
self._transition_filtering = None
self._p_combs_structure = None
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple:
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
(parents_labels, parents_indexes, parents_cardinalities).
:param node: the label of the node
:type node: string
:return: a tuple containing all the parent set infos
:rtype: Tuple
"""
parents = self.get_parents_by_id(node)
nodes = self._graph_struct.nodes_labels
d = {v: i for i, v in enumerate(nodes)}
sorted_parents = sorted(parents, key=lambda v: d[v])
get_node_indx = self.get_node_indx
p_indxes = [get_node_indx(node) for node in sorted_parents]
p_values = [self.get_states_number(node) for node in sorted_parents]
return sorted_parents, p_indxes, p_values
def remove_edges(self, list_of_edges: typing.List) -> None:
"""Remove the edges to the graph contained in the list list_of_edges.
:param list_of_edges: The edges to remove from the graph
:type list_of_edges: List
"""
self._graph.remove_edges_from(list_of_edges)
@staticmethod
def build_time_scalar_indexing_structure_for_a_node(node_states: int,
parents_vals: typing.List) -> np.ndarray:
"""Builds an indexing structure for the computation of state residence times values.
:param node_states: the node cardinality
:type node_states: int
:param parents_vals: the caridinalites of the node's parents
:type parents_vals: List
:return: The time indexing structure
:rtype: numpy.ndArray
"""
T_vector = np.array([node_states])
T_vector = np.append(T_vector, parents_vals)
T_vector = T_vector.cumprod().astype(np.int)
return T_vector
@staticmethod
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \
-> np.ndarray:
"""Builds an indexing structure for the computation of state transitions values.
:param node_states_number: the node cardinality
:type node_states_number: int
:param parents_vals: the caridinalites of the node's parents
:type parents_vals: List
:return: The transition indexing structure
:rtype: numpy.ndArray
"""
M_vector = np.array([node_states_number,
node_states_number])
M_vector = np.append(M_vector, parents_vals)
M_vector = M_vector.cumprod().astype(np.int)
return M_vector
@staticmethod
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray:
"""
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
in the dataset.
This structute will be used in the computation of the state res times.
:param node_indx: the index of the node
:type node_indx: int
:param p_indxs: the indexes of the node's parents
:type p_indxs: List
:return: The filtering structure for times estimation
:rtype: numpy.ndArray
"""
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
@staticmethod
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \
-> np.ndarray:
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
in the dataset.
This structure will be used in the computation of the state transitions values.
:param node_indx: the index of the node
:type node_indx: int
:param p_indxs: the indexes of the node's parents
:type p_indxs: List
:param nodes_number: the total number of nodes in the dataset
:type nodes_number: int
:return: The filtering structure for transitions estimation
:rtype: numpy.ndArray
"""
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
@staticmethod
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray:
"""
Builds the combinatorial structure that contains the combinations of all the values contained in
``parents_values``.
:param parents_values: the cardinalities of the nodes
:type parents_values: List
:return: A numpy matrix containing a grid of the combinations
:rtype: numpy.ndArray
"""
tmp = []
for val in parents_values:
tmp.append([x for x in range(val)])
if len(parents_values) > 0:
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
if len(parents_values) > 1:
tmp_comb = parents_comb[:, 1].copy()
parents_comb[:, 1] = parents_comb[:, 0].copy()
parents_comb[:, 0] = tmp_comb
else:
parents_comb = np.array([[]], dtype=np.int)
return parents_comb
def get_parents_by_id(self, node_id) -> typing.List:
"""Returns a list of labels of the parents of the node ``node_id``
:param node_id: the node label
:type node_id: string
:return: a List of labels of the parents
:rtype: List
"""
return list(self._graph.predecessors(node_id))
def get_states_number(self, node_id) -> int:
return self._graph.nodes[node_id]['val']
def get_node_indx(self, node_id) -> int:
return nx.get_node_attributes(self._graph, 'indx')[node_id]
def get_positional_node_indx(self, node_id) -> int:
return self._graph.nodes[node_id]['pos_indx']
@property
def nodes(self) -> typing.List:
return self._graph_struct.nodes_labels
@property
def edges(self) -> typing.List:
return list(self._graph.edges)
@property
def nodes_indexes(self) -> np.ndarray:
return self._graph_struct.nodes_indexes
@property
def nodes_values(self) -> np.ndarray:
return self._graph_struct.nodes_values
@property
def time_scalar_indexing_strucure(self) -> np.ndarray:
return self._time_scalar_indexing_structure
@property
def time_filtering(self) -> np.ndarray:
return self._time_filtering
@property
def transition_scalar_indexing_structure(self) -> np.ndarray:
return self._transition_scalar_indexing_structure
@property
def transition_filtering(self) -> np.ndarray:
return self._transition_filtering
@property
def p_combs(self) -> np.ndarray:
return self._p_combs_structure

@ -0,0 +1,91 @@
import numpy as np
import pandas as pd
from .structure import Structure
from .trajectory import Trajectory
from ..utility.abstract_importer import AbstractImporter
class SamplePath(object):
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
contain the mentioned data.
:param importer: the Importer object which contains the imported and processed data
:type importer: AbstractImporter
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
:_structure: the ``Structure`` Object that will contain all the structural infos about the net
:_total_variables_count: the number of variables in the net
"""
def __init__(self, importer: AbstractImporter):
"""Constructor Method
"""
self._importer = importer
if self._importer._df_variables is None or self._importer._concatenated_samples is None:
raise RuntimeError('The importer object has to contain the all processed data!')
if self._importer._df_variables.empty:
raise RuntimeError('The importer object has to contain the all processed data!')
if isinstance(self._importer._concatenated_samples, pd.DataFrame):
if self._importer._concatenated_samples.empty:
raise RuntimeError('The importer object has to contain the all processed data!')
if isinstance(self._importer._concatenated_samples, np.ndarray):
if self._importer._concatenated_samples.size == 0:
raise RuntimeError('The importer object has to contain the all processed data!')
self._trajectories = None
self._structure = None
self._total_variables_count = None
def build_trajectories(self) -> None:
"""Builds the Trajectory object that will contain all the trajectories.
Clears all the unused dataframes in ``_importer`` Object
"""
self._trajectories = \
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
len(self._importer.sorter) + 1)
self._importer.clear_concatenated_frame()
def build_structure(self) -> None:
"""
Builds the ``Structure`` object that aggregates all the infos about the net.
"""
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
self._total_variables_count = len(self._importer.sorter)
labels = self._importer.variables.iloc[:, 0].to_list()
indxs = self._importer.variables.index.to_numpy()
vals = self._importer.variables.iloc[:, 1].to_numpy()
if self._importer.structure is None or self._importer.structure.empty:
edges = []
else:
edges = list(self._importer.structure.to_records(index=False))
self._structure = Structure(labels, indxs, vals, edges,
self._total_variables_count)
def clear_memory(self):
self._importer._raw_data = []
@property
def trajectories(self) -> Trajectory:
return self._trajectories
@property
def structure(self) -> Structure:
return self._structure
@property
def total_variables_count(self) -> int:
return self._total_variables_count
@property
def has_prior_net_structure(self) -> bool:
return bool(self._structure.edges)

@ -0,0 +1,97 @@
import typing
import numpy as np
from .conditional_intensity_matrix import ConditionalIntensityMatrix
class SetOfCims(object):
"""Aggregates all the CIMS of the node identified by the label _node_id.
:param node_id: the node label
:type node_ind: string
:param parents_states_number: the cardinalities of the parents
:type parents_states_number: List
:param node_states_number: the caridinality of the node
:type node_states_number: int
:param p_combs: the p_comb structure bound to this node
:type p_combs: numpy.ndArray
:_state_residence_time: matrix containing all the state residence time vectors for the node
:_transition_matrices: matrix containing all the transition matrices for the node
:_actual_cims: the cims of the node
"""
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
"""Constructor Method
"""
self._node_id = node_id
self._parents_states_number = parents_states_number
self._node_states_number = node_states_number
self._actual_cims = []
self._state_residence_times = None
self._transition_matrices = None
self._p_combs = p_combs
self.build_times_and_transitions_structures()
def build_times_and_transitions_structures(self) -> None:
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
"""
if not self._parents_states_number:
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float)
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int)
else:
self._state_residence_times = \
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float)
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number,
self._node_states_number], dtype=np.int)
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None:
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices.
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims.
:param state_res_times: the state residence times matrix
:type state_res_times: numpy.ndArray
:param transition_matrices: the transition matrices
:type transition_matrices: numpy.ndArray
"""
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
cim_to_add.compute_cim_coefficients()
self._actual_cims.append(cim_to_add)
self._actual_cims = np.array(self._actual_cims)
self._transition_matrices = None
self._state_residence_times = None
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray:
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index
``comb``.
:param mask_arr: the boolean mask that indicates which parent to consider
:type mask_arr: numpy.array
:param comb: the state/s of the filtered parents
:type comb: numpy.array
:return: Array of ``ConditionalIntensityMatrix`` objects
:rtype: numpy.array
"""
if mask_arr.size <= 1:
return self._actual_cims
else:
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
return self._actual_cims[flat_indxs]
@property
def actual_cims(self) -> np.ndarray:
return self._actual_cims
@property
def p_combs(self) -> np.ndarray:
return self._p_combs
def get_cims_number(self):
return len(self._actual_cims)

@ -0,0 +1,124 @@
import typing as ty
import numpy as np
class Structure(object):
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes)
:param nodes_labels_list: the symbolic names of the variables
:type nodes_labels_list: List
:param nodes_indexes_arr: the indexes of the nodes
:type nodes_indexes_arr: numpy.ndArray
:param nodes_vals_arr: the cardinalites of the nodes
:type nodes_vals_arr: numpy.ndArray
:param edges_list: the edges of the network
:type edges_list: List
:param total_variables_number: the total number of variables in the dataset
:type total_variables_number: int
"""
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray,
edges_list: ty.List, total_variables_number: int):
"""Constructor Method
"""
self._nodes_labels_list = nodes_labels_list
self._nodes_indexes_arr = nodes_indexes_arr
self._nodes_vals_arr = nodes_vals_arr
self._edges_list = edges_list
self._total_variables_number = total_variables_number
def remove_node(self, node_id: str) -> None:
"""Remove the node ``node_id`` from all the class members.
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset.
"""
node_positional_indx = self._nodes_labels_list.index(node_id)
del self._nodes_labels_list[node_positional_indx]
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx)
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx)
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id
and to_node != node_id)]
@property
def edges(self) -> ty.List:
return self._edges_list
@property
def nodes_labels(self) -> ty.List:
return self._nodes_labels_list
@property
def nodes_indexes(self) -> np.ndarray:
return self._nodes_indexes_arr
@property
def nodes_values(self) -> np.ndarray:
return self._nodes_vals_arr
@property
def total_variables_number(self) -> int:
return self._total_variables_number
def get_node_id(self, node_indx: int) -> str:
"""Given the ``node_index`` returns the node label.
:param node_indx: the node index
:type node_indx: int
:return: the node label
:rtype: string
"""
return self._nodes_labels_list[node_indx]
def clean_structure_edges(self):
self._edges_list = list()
def add_edge(self,edge: tuple):
self._edges_list.append(tuple)
print(self._edges_list)
def remove_edge(self,edge: tuple):
self._edges_list.remove(tuple)
def contains_edge(self,edge:tuple) -> bool:
return edge in self._edges_list
def get_node_indx(self, node_id: str) -> int:
"""Given the ``node_index`` returns the node label.
:param node_id: the node label
:type node_id: string
:return: the node index
:rtype: int
"""
pos_indx = self._nodes_labels_list.index(node_id)
return self._nodes_indexes_arr[pos_indx]
def get_positional_node_indx(self, node_id: str) -> int:
return self._nodes_labels_list.index(node_id)
def get_states_number(self, node: str) -> int:
"""Given the node label ``node`` returns the cardinality of the node.
:param node: the node label
:type node: string
:return: the node cardinality
:rtype: int
"""
pos_indx = self._nodes_labels_list.index(node)
return self._nodes_vals_arr[pos_indx]
def __repr__(self):
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
"\nEdges: \n" + str(self._edges_list)
def __eq__(self, other):
"""Overrides the default implementation"""
if isinstance(other, Structure):
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
self._edges_list == other._edges_list
return False

@ -0,0 +1,45 @@
import typing
import numpy as np
class Trajectory(object):
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
(the time deltas) and a numpy matrix of ints (the changes of states).
:param list_of_columns: the list containing the times array and values matrix
:type list_of_columns: List
:param original_cols_number: total number of cols in the data
:type original_cols_number: int
:_actual_trajectory: the trajectory containing also the duplicated/shifted values
:_times: the array containing the time deltas
"""
def __init__(self, list_of_columns: typing.List, original_cols_number: int):
"""Constructor Method
"""
self._times = list_of_columns[0]
self._actual_trajectory = list_of_columns[1]
self._original_cols_number = original_cols_number
@property
def trajectory(self) -> np.ndarray:
return self._actual_trajectory[:, :self._original_cols_number - 1]
@property
def complete_trajectory(self) -> np.ndarray:
return self._actual_trajectory
@property
def times(self):
return self._times
def size(self):
return self._actual_trajectory.shape[0]
def __repr__(self):
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__()

@ -0,0 +1,4 @@
from .abstract_importer import AbstractImporter
from .cache import Cache
from .json_importer import JsonImporter
from .sample_importer import SampleImporter

@ -0,0 +1,164 @@
import typing
from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
import copy
#from sklearn.utils import resample
class AbstractImporter(ABC):
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
:param file_path: the file path, or dataset name if you import already processed data
:type file_path: str
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
:param variables: Dataframe containing the nodes labels and cardinalities
:type variables: pandas.DataFrame
:prior_net_structure: Dataframe containing the structure of the network (edges)
:type prior_net_structure: pandas.DataFrame
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
.. warning::
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
.. note::
See :class:``JsonImporter`` for an example implementation
"""
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
"""Constructor
"""
self._file_path = file_path
self._df_samples_list = trajectory_list
self._concatenated_samples = []
self._df_variables = variables
self._df_structure = prior_net_structure
self._sorter = None
super().__init__()
@abstractmethod
def build_sorter(self, trajecory_header: object) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
:param trajecory_header: an object that will be used to define the header
:type trajecory_header: object
:return: A list containing the processed header.
:rtype: List
"""
pass
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame:
"""Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
:param sample_frame: the traj to be processed
:type sample_frame: pandas.Dataframe
:param columns_header: the original header of sample_frame
:type columns_header: List
:param shifted_cols_header: a copy of columns_header with changed names of the contents
:type shifted_cols_header: List
:return: The processed dataframe
:rtype: pandas.Dataframe
.. warning::
the Dataframe ``sample_frame`` has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
"""
sample_frame = copy.deepcopy(sample_frame)
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
shifted_cols.columns = shifted_cols_header
sample_frame = sample_frame.assign(**shifted_cols)
sample_frame.drop(sample_frame.tail(1).index, inplace=True)
return sample_frame
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
``df_samples_list``.
Concatenates the result in the dataframe ``concatanated_samples``
:param df_samples_list: the datframe's list to be processed and concatenated
:type df_samples_list: List
.. warning::
The Dataframe sample_frame has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
.. note::
After the call of this method the class member ``concatanated_samples`` will contain all processed
and merged trajectories
"""
if not self._sorter:
raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
shifted_cols_header = [s + "S" for s in self._sorter]
compute_row_delta = self.compute_row_delta_sigle_samples_frame
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
for sample in df_samples_list]
self._concatenated_samples = pd.concat(proc_samples_list)
complete_header = self._sorter[:]
complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header)
self._concatenated_samples = self._concatenated_samples[complete_header]
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
and converted
:type concatenated_sample: pandas.Dataframe
:return: the resulting list of numpy arrays
:rtype: List
"""
concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
return columns_list
def clear_concatenated_frame(self) -> None:
"""Removes all values in the dataframe concatenated_samples.
"""
if isinstance(self._concatenated_samples, pd.DataFrame):
self._concatenated_samples = self._concatenated_samples.iloc[0:0]
@abstractmethod
def dataset_id(self) -> object:
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current
dataset
"""
pass
@property
def concatenated_samples(self) -> pd.DataFrame:
return self._concatenated_samples
@property
def variables(self) -> pd.DataFrame:
return self._df_variables
@property
def structure(self) -> pd.DataFrame:
return self._df_structure
@property
def sorter(self) -> typing.List:
return self._sorter
@property
def file_path(self) -> str:
return self._file_path

@ -0,0 +1,58 @@
import typing
from ..structure_graph.set_of_cims import SetOfCims
class Cache:
"""This class acts as a cache of ``SetOfCims`` objects for a node.
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME
index is related
:__actual_cache: a list of setOfCims objects
"""
def __init__(self):
"""Constructor Method
"""
self._list_of_sets_of_parents = []
self._actual_cache = []
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
"""
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims``
related to that ``parents_comb``.
:param parents_comb: the parents related to that ``SetOfCims``
:type parents_comb: Set
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``.
None otherwise.
:rtype: SetOfCims
"""
try:
#print("Cache State:", self.list_of_sets_of_indxs)
#print("Look For:", parents_comb)
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)]
#print("CACHE HIT!!!!", parents_comb)
return result
except ValueError:
return None
def put(self, parents_comb: typing.Set, socim: SetOfCims):
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in
``__list_of_sets_of_parents``.
:param parents_comb: the symbolic set index
:type parents_comb: Set
:param socim: the related SetOfCims object
:type socim: SetOfCims
"""
#print("Putting in cache:", parents_comb)
self._list_of_sets_of_parents.append(parents_comb)
self._actual_cache.append(socim)
def clear(self):
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``.
"""
del self._list_of_sets_of_parents[:]
del self._actual_cache[:]

@ -0,0 +1,176 @@
import json
import typing
import pandas as pd
from .abstract_importer import AbstractImporter
class JsonImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data in json extension.
:param file_path: the path of the file that contains tha data to be imported
:type file_path: string
:param samples_label: the reference key for the samples in the trajectories
:type samples_label: string
:param structure_label: the reference key for the structure of the network data
:type structure_label: string
:param variables_label: the reference key for the cardinalites of the nodes data
:type variables_label: string
:param time_key: the key used to identify the timestamps in each trajectory
:type time_key: string
:param variables_key: the key used to identify the names of the variables in the net
:type variables_key: string
:_array_indx: the index of the outer JsonArray to extract the data from
:type _array_indx: int
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str):
"""Constructor method
.. note::
This constructor calls also the method ``read_json_file()``, so after the construction of the object
the class member ``_raw_data`` will contain the raw imported json data.
"""
self._samples_label = samples_label
self._structure_label = structure_label
self._variables_label = variables_label
self._time_key = time_key
self._variables_key = variables_key
self._df_samples_list = None
self._array_indx = None
super(JsonImporter, self).__init__(file_path)
self._raw_data = self.read_json_file()
def import_data(self, indx: int) -> None:
"""Implements the abstract method of :class:`AbstractImporter`.
:param indx: the index of the outer JsonArray to extract the data from
:type indx: int
"""
self._array_indx = indx
self._df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self._df_samples_list[0])
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
self.clear_data_frame_list()
self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(self._raw_data)
def import_trajectories(self, raw_data: typing.List) -> typing.List:
"""Imports the trajectories from the list of dicts ``raw_data``.
:param raw_data: List of Dicts
:type raw_data: List
:return: List of dataframes containing all the trajectories
:rtype: List
"""
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
:param raw_data: List of Dicts
:type raw_data: List
:return: Dataframe containg the starting node a ending node of every arc of the network
:rtype: pandas.Dataframe
"""
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports the data in ``raw_data`` at the key ``_variables_label``.
:param raw_data: List of Dicts
:type raw_data: List
:return: Datframe containg the variables simbolic labels and their cardinalities
:rtype: pandas.Dataframe
"""
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
def read_json_file(self) -> typing.List:
"""Reads the JSON file in the path self.filePath.
:return: The contents of the json file
:rtype: List
"""
with open(self._file_path) as f:
data = json.load(f)
return data
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param key: the key for the Dicts from which exctract data
:type key: string
:return: A normalized dataframe
:rtype: pandas.Datframe
"""
return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
"""
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param trajectories_key: the key of the trajectories objects
:type trajectories_key: string
:return: A list of daframes containg the trajectories
:rtype: List
"""
dataframe = pd.DataFrame
smps = raw_data[indx][trajectories_key]
df_samples_list = [dataframe(sample) for sample in smps]
return df_samples_list
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
"""
columns_header = list(sample_frame.columns.values)
columns_header.remove(self._time_key)
return columns_header
def clear_data_frame_list(self) -> None:
"""Removes all values present in the dataframes in the list ``_df_samples_list``.
"""
for indx in range(len(self._df_samples_list)):
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
def dataset_id(self) -> object:
return self._array_indx
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
as keys for the set of CIMS of a particular node.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param cims_key: the key where the json object cims are placed
:type cims_key: string
:return: a dictionary containing the sampled CIMS for all the variables in the net
:rtype: Dictionary
"""
cims_for_all_vars = {}
for var in raw_data[indx][cims_key]:
sampled_cims_list = []
cims_for_all_vars[var] = sampled_cims_list
for p_comb in raw_data[indx][cims_key][var]:
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
return cims_for_all_vars

@ -0,0 +1,65 @@
import json
import typing
import pandas as pd
import numpy as np
from .abstract_importer import AbstractImporter
class SampleImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data loaded directly by using DataFrame
:param trajectory_list: the data that describes the trajectories
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param variables: the data that describes the variables with name and cardinality
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param prior_net_structure: the data of the real structure, if it exists
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self,
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None):
'If the data are not DataFrame, it will be converted'
if isinstance(variables,list) or isinstance(variables,np.ndarray):
variables = pd.DataFrame(variables)
if isinstance(variables,list) or isinstance(variables,np.ndarray):
prior_net_structure=pd.DataFrame(prior_net_structure)
super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
variables= variables,
prior_net_structure=prior_net_structure)
def import_data(self, header_column = None):
if header_column is not None:
self._sorter = header_column
else:
self._sorter = self.build_sorter(self._df_samples_list[0])
samples_list= self._df_samples_list
if isinstance(samples_list, np.ndarray):
samples_list = samples_list.tolist()
self.compute_row_delta_in_all_samples_frames(samples_list)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list.
"""
columns_header = list(sample_frame.columns.values)
del columns_header[0]
return columns_header
def dataset_id(self) -> object:
pass

@ -0,0 +1,39 @@
import glob
import os
import sys
sys.path.append("./PyCTBN/")
import structure_graph.network_graph as ng
import structure_graph.sample_path as sp
import structure_graph.set_of_cims as sofc
import estimators.parameters_estimator as pe
import utility.json_importer as ji
def main():
read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir
#import data
importer = ji.JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
#Create a SamplePath Obj
s1 = sp.SamplePath(importer)
#Build The trajectries and the structural infos
s1.build_trajectories()
s1.build_structure()
#From The Structure Object build the Graph
g = ng.NetworkGraph(s1.structure)
#Select a node you want to estimate the parameters
node = g.nodes[1]
#Init the graph specifically for THIS node
g.fast_init(node)
#Use SamplePath and Grpah to create a ParametersEstimator Object
p1 = pe.ParametersEstimator(s1, g)
#Init the peEst specifically for THIS node
p1.fast_init(node)
#Compute the parameters
sofc1 = p1.compute_parameters_for_node(node)
#The est CIMS are inside the resultant SetOfCIms Obj
print(sofc1.actual_cims)
if __name__ == "__main__":
main()

@ -0,0 +1,8 @@
import PyCTBN.estimators
from PyCTBN.estimators import *
import PyCTBN.optimizers
from PyCTBN.optimizers import *
import PyCTBN.structure_graph
from PyCTBN.structure_graph import *
import PyCTBN.utility
from PyCTBN.utility import *

@ -0,0 +1,5 @@
from .fam_score_calculator import FamScoreCalculator
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator
from .structure_score_based_estimator import StructureScoreBasedEstimator

@ -0,0 +1,272 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from math import log
from scipy.special import loggamma
from random import choice
from ..structure_graph.set_of_cims import SetOfCims
from ..structure_graph.network_graph import NetworkGraph
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
'''
'''
class FamScoreCalculator:
"""
Has the task of calculating the FamScore of a node by using a Bayesian score function
"""
def __init__(self):
#np.seterr('raise')
pass
# region theta
def marginal_likelihood_theta(self,
cims: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the FamScore value of the node identified by the label node_id
:param cims: np.array with all the node's cims
:type cims: np.array
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta
:rtype: float
"""
return np.sum(
[self.variable_cim_xu_marginal_likelihood_theta(cim,
alpha_xu,
alpha_xxu)
for cim in cims])
def variable_cim_xu_marginal_likelihood_theta(self,
cim: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the value of the marginal likelihood over theta given a cim
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta
:rtype: float
"""
'get cim length'
values = len(cim._state_residence_times)
'compute the marginal likelihood for the current cim'
return np.sum([
self.single_cim_xu_marginal_likelihood_theta(
index,
cim,
alpha_xu,
alpha_xxu)
for index in range(values)])
def single_cim_xu_marginal_likelihood_theta(self,
index: int,
cim: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the marginal likelihood on q of the node when assumes a specif value
and a specif parents's assignment
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta when the node assumes a specif value
:rtype: float
"""
values = list(range(len(cim._state_residence_times)))
'remove the index because of the x != x^ condition in the summation '
values.remove(index)
'uncomment for alpha xx not uniform'
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \
+ \
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
cim.state_transition_matrix[index,index_x_first],
alpha_xxu)
for index_x_first in values])
def single_internal_cim_xxu_marginal_likelihood_theta(self,
M_xxu_suff_stats: float,
alpha_xxu: float=1):
"""Calculate the second part of the marginal likelihood over theta formula
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u]
:type M_xxu_suff_stats: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta when the node assumes a specif value
:rtype: float
"""
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu)
# endregion
# region q
def marginal_likelihood_q(self,
cims: np.array,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the value of the marginal likelihood over q of the node identified by the label node_id
:param cims: np.array with all the node's cims
:type cims: np.array
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood over q
:rtype: float
"""
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims])
def variable_cim_xu_marginal_likelihood_q(self,
cim: ConditionalIntensityMatrix,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the value of the marginal likelihood over q given a cim
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood over q
:rtype: float
"""
'get cim length'
values=len(cim._state_residence_times)
'compute the marginal likelihood for the current cim'
return np.sum([
self.single_cim_xu_marginal_likelihood_q(
cim.state_transition_matrix[index, index],
cim._state_residence_times[index],
tau_xu,
alpha_xu)
for index in range(values)])
def single_cim_xu_marginal_likelihood_q(self,
M_xu_suff_stats: float,
T_xu_suff_stats: float,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the marginal likelihood on q of the node when assumes a specif value
and a specif parents's assignment
:param M_xu_suff_stats: value of the suffucient statistic M[x|u]
:type M_xxu_suff_stats: float
:param T_xu_suff_stats: value of the suffucient statistic T[x|u]
:type T_xu_suff_stats: float
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood of the node when assumes a specif value
:rtype: float
"""
return (
loggamma(alpha_xu + M_xu_suff_stats + 1) +
(log(tau_xu)
*
(alpha_xu+1))
) \
- \
(loggamma(alpha_xu + 1)+(
log(tau_xu + T_xu_suff_stats)
*
(alpha_xu + M_xu_suff_stats + 1))
)
# end region
def get_fam_score(self,
cims: np.array,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the FamScore value of the node
:param cims: np.array with all the node's cims
:type cims: np.array
:param tau_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type tau_xu: float, optional
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 1
:type alpha_xu: float, optional
:return: the FamScore value of the node
:rtype: float
"""
#print("------")
#print(self.marginal_likelihood_q(cims,
# tau_xu,
# alpha_xu))
#print(self.marginal_likelihood_theta(cims,
# alpha_xu,
# alpha_xxu))
'calculate alpha_xxu as a uniform distribution'
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1)
return self.marginal_likelihood_q(cims,
tau_xu,
alpha_xu) \
+ \
self.marginal_likelihood_theta(cims,
alpha_xu,
alpha_xxu)

@ -0,0 +1,143 @@
import sys
sys.path.append('../')
import numpy as np
from ..structure_graph.network_graph import NetworkGraph
from ..structure_graph.set_of_cims import SetOfCims
from ..structure_graph.trajectory import Trajectory
class ParametersEstimator(object):
"""Has the task of computing the cims of particular node given the trajectories and the net structure
in the graph ``_net_graph``.
:param trajectories: the trajectories
:type trajectories: Trajectory
:param net_graph: the net structure
:type net_graph: NetworkGraph
:_single_set_of_cims: the set of cims object that will hold the cims of the node
"""
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph):
"""Constructor Method
"""
self._trajectories = trajectories
self._net_graph = net_graph
self._single_set_of_cims = None
def fast_init(self, node_id: str) -> None:
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``.
:param node_id: the node label
:type node_id: string
"""
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
node_states_number = self._net_graph.get_states_number(node_id)
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
"""Compute the CIMS of the node identified by the label ``node_id``.
:param node_id: the node label
:type node_id: string
:return: A SetOfCims object filled with the computed CIMS
:rtype: SetOfCims
"""
node_indx = self._net_graph.get_node_indx(node_id)
state_res_times = self._single_set_of_cims._state_residence_times
transition_matrices = self._single_set_of_cims._transition_matrices
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times,
self._trajectories.trajectory,
self._net_graph.time_filtering,
self._net_graph.time_scalar_indexing_strucure,
state_res_times)
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory,
self._net_graph.transition_filtering,
self._net_graph.transition_scalar_indexing_structure,
transition_matrices)
self._single_set_of_cims.build_cims(state_res_times, transition_matrices)
return self._single_set_of_cims
@staticmethod
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray,
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray,
T: np.ndarray) -> None:
"""Compute the state residence times for a node and fill the matrix ``T`` with the results
:param node_indx: the index of the node
:type node_indx: int
:param times: the times deltas vector
:type times: numpy.array
:param trajectory: the trajectory
:type trajectory: numpy.ndArray
:param cols_filter: the columns filtering structure
:type cols_filter: numpy.array
:param scalar_indexes_struct: the indexing structure
:type scalar_indexes_struct: numpy.array
:param T: the state residence times vectors
:type T: numpy.ndArray
"""
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1)
.astype(np.int), \
times,
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1])
@staticmethod
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray,
scalar_indexing: np.ndarray, M: np.ndarray) -> None:
"""Compute the state residence times for a node and fill the matrices ``M`` with the results.
:param node_indx: the index of the node
:type node_indx: int
:param trajectory: the trajectory
:type trajectory: numpy.ndArray
:param cols_filter: the columns filtering structure
:type cols_filter: numpy.array
:param scalar_indexing: the indexing structure
:type scalar_indexing: numpy.array
:param M: the state transitions matrices
:type M: numpy.ndArray
"""
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])],
dtype=np.int64)
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0]
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int)
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2])
M_raveled = M.ravel()
M_raveled[diag_indices] = 0
M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
def init_sets_cims_container(self):
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
self.net_graph.nodes_values,
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
self.net_graph.p_combs)
def compute_parameters(self):
#print(self.net_graph.get_nodes())
#print(self.amalgamated_cims_struct.sets_of_cims)
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
#print(self.net_graph.time_filtering[indx])
#print(self.net_graph.time_scalar_indexing_strucure[indx])
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
self.sample_path.trajectories.trajectory,
self.net_graph.time_filtering[indx],
self.net_graph.time_scalar_indexing_strucure[indx],
aggr[1]._state_residence_times)
#print(self.net_graph.transition_filtering[indx])
#print(self.net_graph.transition_scalar_indexing_structure[indx])
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]),
self.sample_path.trajectories.complete_trajectory,
self.net_graph.transition_filtering[indx],
self.net_graph.transition_scalar_indexing_structure[indx],
aggr[1]._transition_matrices)
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices)

@ -0,0 +1,238 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
import os
from scipy.stats import chi2 as chi2_dist
from scipy.stats import f as f_dist
from tqdm import tqdm
from ..utility.cache import Cache
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer
import concurrent.futures
import multiprocessing
from multiprocessing import Pool
class StructureConstraintBasedEstimator(StructureEstimator):
"""
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:param exp_test_alfa: the significance level for the exponential Hp test
:type exp_test_alfa: float
:param chi_test_alfa: the significance level for the chi Hp test
:type chi_test_alfa: float
:_nodes: the nodes labels
:_nodes_vals: the nodes cardinalities
:_nodes_indxs: the nodes indexes
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
:_cache: the Cache object
"""
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25):
super().__init__(sample_path,known_edges)
self._exp_test_sign = exp_test_alfa
self._chi_test_alfa = chi_test_alfa
self._thumb_threshold = thumb_threshold
self._cache = Cache()
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int, parent_indx, child_indx) -> bool:
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
G2 = {G1 U test_parent} (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests.
:param test_parent: the node label of the test parent
:type test_parent: string
:param test_child: the node label of the child
:type test_child: string
:param parent_set: the common parent set
:type parent_set: List
:param child_states_numb: the cardinality of the ``test_child``
:type child_states_numb: int
:param tot_vars_count: the total number of variables in the net
:type tot_vars_count: int
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise
:rtype: bool
"""
p_set = parent_set[:]
complete_info = parent_set[:]
complete_info.append(test_child)
parents = np.array(parent_set)
parents = np.append(parents, test_parent)
sorted_parents = self._nodes[np.isin(self._nodes, parents)]
cims_filter = sorted_parents != test_parent
p_set.insert(0, test_parent)
sofc2 = self._cache.find(set(p_set))
if not sofc2:
complete_info.append(test_parent)
bool_mask2 = np.isin(self._nodes, complete_info)
l2 = list(self._nodes[bool_mask2])
indxs2 = self._nodes_indxs[bool_mask2]
vals2 = self._nodes_vals[bool_mask2]
eds2 = list(itertools.product(p_set, test_child))
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count)
g2 = NetworkGraph(s2)
g2.fast_init(test_child)
p2 = ParametersEstimator(self._sample_path.trajectories, g2)
p2.fast_init(test_child)
sofc2 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc2)
del p_set[0]
sofc1 = self._cache.find(set(p_set))
if not sofc1:
g2.remove_node(test_parent)
g2.fast_init(test_child)
p2 = ParametersEstimator(self._sample_path.trajectories, g2)
p2.fast_init(test_child)
sofc1 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc1)
thumb_value = 0.0
if child_states_numb > 2:
parent_val = self._sample_path.structure.get_states_number(test_parent)
bool_mask_vals = np.isin(self._nodes, parent_set)
parents_vals = self._nodes_vals[bool_mask_vals]
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
for cim2 in cond_cims:
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
return False
return True
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
"""Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected,
it is performed also the chi_test.
:param child_states_numb: the cardinality of the test child
:type child_states_numb: int
:param cim1: a cim belonging to the graph without test parent
:type cim1: ConditionalIntensityMatrix
:param cim2: a cim belonging to the graph with test parent
:type cim2: ConditionalIntensityMatrix
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise.
:rtype: bool
"""
M1 = cim1.state_transition_matrix
M2 = cim2.state_transition_matrix
r1s = M1.diagonal()
r2s = M2.diagonal()
C1 = cim1.cim
C2 = cim2.cim
if child_states_numb > 2:
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
self._removable_edges_matrix[parent_indx][child_indx] = False
return False
F_stats = C2.diagonal() / C1.diagonal()
exp_alfa = self._exp_test_sign
for val in range(0, child_states_numb):
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]):
return False
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1)
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape(
M2.shape[0], -1)
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1)
Ks = np.sqrt(r1s / r2s)
Ls = np.sqrt(r2s / r1s)
for val in range(0, child_states_numb):
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
(M1_no_diag[val] + M2_no_diag[val]))
if Chi > chi_2_quantile:
return False
return True
def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
"""Compute the value to test against the thumb_threshold.
:param parent_val: test parent's variable cardinality
:type parent_val: int
:param child_val: test child's variable cardinality
:type child_val: int
:param parent_set_vals: the cardinalities of the nodes in the current sep-set
:type parent_set_vals: List
:return: the thumb value for the current independence test
:rtype: int
"""
df = (child_val - 1) ** 2
df = df * parent_val
for v in parent_set_vals:
df = df * v
return df
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List:
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
:param var_id: the node label of the test child
:type var_id: string
"""
optimizer_obj = ConstraintBasedOptimizer(
node_id = var_id,
structure_estimator = self,
tot_vars_count = tot_vars_count)
return optimizer_obj.optimize_structure()
def ctpc_algorithm(self,disable_multiprocessing:bool= False ):
"""Compute the CTPC algorithm over the entire net.
"""
ctpc_algo = self.one_iteration_of_CTPC_algorithm
total_vars_numb = self._sample_path.total_variables_count
n_nodes= len(self._nodes)
total_vars_numb_array = [total_vars_numb] * n_nodes
'get the number of CPU'
cpu_count = multiprocessing.cpu_count()
'Remove all the edges from the structure'
self._sample_path.structure.clean_structure_edges()
'Estimate the best parents for each node'
#with multiprocessing.Pool(processes=cpu_count) as pool:
#with get_context("spawn").Pool(processes=cpu_count) as pool:
if disable_multiprocessing:
print("DISABILITATO")
cpu_count = 1
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
else:
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
list_edges_partial = executor.map(ctpc_algo,
self._nodes,
total_vars_numb_array)
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
return set(itertools.chain.from_iterable(list_edges_partial))
def estimate_structure(self,disable_multiprocessing:bool=False):
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing)

@ -0,0 +1,187 @@
import itertools
import json
import typing
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from abc import ABC
import abc
from ..utility.cache import Cache
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
class StructureEstimator(object):
"""Has the task of estimating the network structure given the trajectories in ``samplepath``.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:_nodes: the nodes labels
:_nodes_vals: the nodes cardinalities
:_nodes_indxs: the nodes indexes
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
"""
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None):
self._sample_path = sample_path
self._nodes = np.array(self._sample_path.structure.nodes_labels)
self._nodes_vals = self._sample_path.structure.nodes_values
self._nodes_indxs = self._sample_path.structure.nodes_indexes
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
def build_removable_edges_matrix(self, known_edges: typing.List):
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given:
:param known_edges: the list of nodes labels
:type known_edges: List
:return: a boolean matrix
:rtype: np.ndarray
"""
tot_vars_count = self._sample_path.total_variables_count
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
if known_edges:
for edge in known_edges:
i = self._sample_path.structure.get_node_indx(edge[0])
j = self._sample_path.structure.get_node_indx(edge[1])
complete_adj_matrix[i][j] = False
return complete_adj_matrix
@staticmethod
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph:
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``:
:param node_ids: the list of nodes labels
:type node_ids: List
:return: a complete Digraph Object
:rtype: networkx.DiGraph
"""
complete_graph = nx.DiGraph()
complete_graph.add_nodes_from(node_ids)
complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph
@staticmethod
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str):
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``,
that do not contains a the node identified by ``parent_label``.
:param u: the list of nodes
:type u: List
:param size: the size of the subsets
:type size: int
:param parent_label: the node to exclude in the subsets generation
:type parent_label: string
:return: an Iterator Object containing a list of lists
:rtype: Iterator
"""
list_without_test_parent = u[:]
list_without_test_parent.remove(parent_label)
return map(list, itertools.combinations(list_without_test_parent, size))
def save_results(self) -> None:
"""Save the estimated Structure to a .json file in the path where the data are loaded from.
The file is named as the input dataset but the `results_` word is appended to the results file.
"""
res = json_graph.node_link_data(self._complete_graph)
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self._sample_path._importer.dataset_id())
name += '.json'
file_name = 'results_' + name
with open(file_name, 'w') as f:
json.dump(res, f)
def remove_diagonal_elements(self, matrix):
m = matrix.shape[0]
strided = np.lib.stride_tricks.as_strided
s0, s1 = matrix.strides
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
@abc.abstractmethod
def estimate_structure(self) -> typing.List:
"""Abstract method to estimate the structure
:return: List of estimated edges
:rtype: Typing.List
"""
pass
def adjacency_matrix(self) -> np.ndarray:
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation.
:return: The adjacency matrix of the graph ``_complete_graph``
:rtype: numpy.ndArray
"""
return nx.adj_matrix(self._complete_graph).toarray().astype(bool)
def spurious_edges(self) -> typing.List:
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in
``_sample_path.structure``.
:return: A list containing the spurious edges
:rtype: List
"""
if not self._sample_path.has_prior_net_structure:
raise RuntimeError("Can not compute spurious edges with no prior net structure!")
real_graph = nx.DiGraph()
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
real_graph.add_edges_from(self._sample_path.structure.edges)
return nx.difference(real_graph, self._complete_graph).edges
def save_plot_estimated_structure_graph(self) -> None:
"""Plot the estimated structure in a graphical model style.
Spurious edges are colored in red.
"""
graph_to_draw = nx.DiGraph()
spurious_edges = self.spurious_edges()
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
print(non_spurious_edges)
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
graph_to_draw.add_edges_from(spurious_edges)
graph_to_draw.add_edges_from(non_spurious_edges)
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
options = {
"node_size": 2000,
"node_color": "white",
"edgecolors": "black",
'linewidths':2,
"with_labels":True,
"font_size":13,
'connectionstyle': 'arc3, rad = 0.1',
"arrowsize": 15,
"arrowstyle": '<|-',
"width": 1,
"edge_color":edges_colors,
}
nx.draw(graph_to_draw, pos, **options)
ax = plt.gca()
ax.margins(0.20)
plt.axis("off")
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self._sample_path._importer.dataset_id())
name += '.png'
plt.savefig(name)
plt.clf()
print("Estimated Structure Plot Saved At: ", os.path.abspath(name))

@ -0,0 +1,244 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from random import choice
import concurrent.futures
import copy
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
from .fam_score_calculator import FamScoreCalculator
from ..optimizers.hill_climbing_search import HillClimbing
from ..optimizers.tabu_search import TabuSearch
import multiprocessing
from multiprocessing import Pool
class StructureScoreBasedEstimator(StructureEstimator):
"""
Has the task of estimating the network structure given the trajectories in samplepath by
using a score based approach.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:param tau_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type tau_xu: float, optional
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 1
:type alpha_xu: float, optional
:param known_edges: List of known edges, default to []
:type known_edges: List, optional
"""
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []):
super().__init__(sample_path,known_edges)
self.tau_xu=tau_xu
self.alpha_xu=alpha_xu
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None,
optimizer: str = 'tabu',disable_multiprocessing:bool= False ):
"""
Compute the score-based algorithm to find the optimal structure
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
:type optimizer: string, optional
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False
:type disable_multiprocessing: Boolean, optional
"""
'Save the true edges structure in tuples'
true_edges = copy.deepcopy(self._sample_path.structure.edges)
true_edges = set(map(tuple, true_edges))
'Remove all the edges from the structure'
self._sample_path.structure.clean_structure_edges()
estimate_parents = self.estimate_parents
n_nodes= len(self._nodes)
l_max_parents= [max_parents] * n_nodes
l_iterations_number = [iterations_number] * n_nodes
l_patience = [patience] * n_nodes
l_tabu_length = [tabu_length] * n_nodes
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
l_optimizer = [optimizer] * n_nodes
'get the number of CPU'
cpu_count = multiprocessing.cpu_count()
print(f"CPU COUNT: {cpu_count}")
if disable_multiprocessing:
cpu_count = 1
#with get_context("spawn").Pool(processes=cpu_count) as pool:
#with multiprocessing.Pool(processes=cpu_count) as pool:
'Estimate the best parents for each node'
if disable_multiprocessing:
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes]
else:
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
list_edges_partial = executor.map(estimate_parents,
self._nodes,
l_max_parents,
l_iterations_number,
l_patience,
l_tabu_length,
l_tabu_rules_duration,
l_optimizer)
#list_edges_partial = p.map(estimate_parents, self._nodes)
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
'Concatenate all the edges list'
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
#print('-------------------------')
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
try:
n_added_fake_edges = len(set_list_edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(set_list_edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
# print(f"n archi reali non trovati: {n_missing_edges}")
# print(f"n archi non reali aggiunti: {n_added_fake_edges}")
print(true_edges)
print(set_list_edges)
print(f"precision: {precision} ")
print(f"recall: {recall} ")
except Exception as e:
print(f"errore: {e}")
return set_list_edges
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5,
optimizer:str = 'hill' ):
"""
Use the FamScore of a node in order to find the best parent nodes
:param node_id: current node's id
:type node_id: string
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
:type optimizer: string, optional
:return: A list of the best edges for the currente node
:rtype: List
"""
"choose the optimizer algotithm"
if optimizer == 'tabu':
optimizer = TabuSearch(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience,
tabu_length = tabu_length,
tabu_rules_duration = tabu_rules_duration)
else: #if optimizer == 'hill':
optimizer = HillClimbing(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience)
"call the optmizer's function that calculates the current node's parents"
return optimizer.optimize_structure()
def get_score_from_graph(self,
graph: NetworkGraph,
node_id:str):
"""
Get the FamScore of a node
:param node_id: current node's id
:type node_id: string
:param graph: current graph to be computed
:type graph: class:'NetworkGraph'
:return: The FamSCore for this graph structure
:rtype: float
"""
'inizialize the graph for a single node'
graph.fast_init(node_id)
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph)
'Inizialize and compute parameters for node'
params_estimation.fast_init(node_id)
SoCims = params_estimation.compute_parameters_for_node(node_id)
'calculate the FamScore for the node'
fam_score_obj = FamScoreCalculator()
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu)
#print(f" lo score per {node_id} risulta: {score} ")
return score

@ -0,0 +1,4 @@
from .optimizer import Optimizer
from .tabu_search import TabuSearch
from .hill_climbing_search import HillClimbing
from .constraint_based_optimizer import ConstraintBasedOptimizer

@ -0,0 +1,87 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice
from abc import ABC
import copy
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
class ConstraintBasedOptimizer(Optimizer):
"""
Optimizer class that implement a CTPC Algorithm
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param tot_vars_count: number of variables in the dataset
:type tot_vars_count: int
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
tot_vars_count:int
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.tot_vars_count = tot_vars_count
def optimize_structure(self):
"""
Compute Optimization process for a structure_estimator by using a CTPC Algorithm
:return: the estimated structure for the node
:rtype: List
"""
print("##################TESTING VAR################", self.node_id)
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id]
for possible_parent in other_nodes:
graph.add_edges([(possible_parent,self.node_id)])
u = other_nodes
#tests_parents_numb = len(u)
#complete_frame = self.complete_graph_frame
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
b = 0
while b < len(u):
parent_indx = 0
while parent_indx < len(u):
removed = False
test_parent = u[parent_indx]
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent)
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id)
if self.structure_estimator._removable_edges_matrix[i][j]:
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
for parents_set in S:
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j):
graph.remove_edges([(test_parent, self.node_id)])
u.remove(test_parent)
removed = True
break
if not removed:
parent_indx += 1
b += 1
self.structure_estimator._cache.clear()
return graph.edges

@ -0,0 +1,135 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice
from abc import ABC
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
class HillClimbing(Optimizer):
"""
Optimizer class that implement Hill Climbing Search
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
:return: the estimated structure for the node
:rtype: List
"""
#'Create the graph for the single node'
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = [node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)]
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
patince_count = 0
for i in range(self.iterations_number):
'choose a new random edge'
current_new_parent = choice(other_nodes)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,39 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
import abc
from ..estimators.structure_estimator import StructureEstimator
class Optimizer(abc.ABC):
"""
Interface class for all the optimizer's child PyCTBN
:param node_id: the node label
:type node_id: string
:param structure_estimator: A structureEstimator Object to predict the structure
:type structure_estimator: class:'StructureEstimator'
"""
def __init__(self, node_id:str, structure_estimator: StructureEstimator):
self.node_id = node_id
self.structure_estimator = structure_estimator
@abc.abstractmethod
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator
:return: the estimated structure for the node
:rtype: List
"""
pass

@ -0,0 +1,199 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice,sample
from abc import ABC
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
import queue
class TabuSearch(Optimizer):
"""
Optimizer class that implement Tabu Search
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None,
tabu_length:int = None,
tabu_rules_duration = None
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
self.tabu_length = tabu_length
self.tabu_rules_duration = tabu_rules_duration
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
:return: the estimated structure for the node
:rtype: List
"""
print(f"tabu search is processing the structure of {self.node_id}")
'Create the graph for the single node'
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = set([node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)])
'calculate the score for the node without parents'
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
'initialize tabu_length and tabu_rules_duration if None'
if self.tabu_length is None:
self.tabu_length = len(other_nodes)
if self.tabu_rules_duration is None:
self.tabu_tabu_rules_durationength = len(other_nodes)
'inizialize the data structures'
tabu_set = set()
tabu_queue = queue.Queue()
patince_count = 0
tabu_count = 0
for i in range(self.iterations_number):
current_possible_nodes = other_nodes.difference(tabu_set)
'choose a new random edge according to tabu restiction'
if(len(current_possible_nodes) > 0):
current_new_parent = sample(current_possible_nodes,k=1)[0]
else:
current_new_parent = tabu_queue.get()
tabu_set.remove(current_new_parent)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
# print("-------------------------------------------")
# print(f"Current new parent: {current_new_parent}")
# print(f"Current score: {current_score}")
# print(f"Current best score: {actual_best_score}")
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
# print(f"graph edges: {graph.edges}")
# print("-------------------------------------------")
# input()
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
'update tabu list'
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if tabu_queue.qsize() >= self.tabu_length:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
'Add the node on the tabu list'
tabu_queue.put(current_new_parent)
tabu_set.add(current_new_parent)
tabu_count += 1
'Every tabu_rules_duration step remove an item from the tabu list '
if tabu_count % self.tabu_rules_duration == 0:
if tabu_queue.qsize() > 0:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
tabu_count = 0
else:
tabu_count = 0
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,6 @@
from .conditional_intensity_matrix import ConditionalIntensityMatrix
from .network_graph import NetworkGraph
from .sample_path import SamplePath
from .set_of_cims import SetOfCims
from .structure import Structure
from .trajectory import Trajectory

@ -0,0 +1,42 @@
import numpy as np
class ConditionalIntensityMatrix(object):
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector
and state transition matrix and the actual CIM matrix.
:param state_residence_times: state residence times vector
:type state_residence_times: numpy.array
:param state_transition_matrix: the transitions count matrix
:type state_transition_matrix: numpy.ndArray
:_cim: the actual cim of the node
"""
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
"""Constructor Method
"""
self._state_residence_times = state_residence_times
self._state_transition_matrix = state_transition_matrix
self._cim = self.state_transition_matrix.astype(np.float64)
def compute_cim_coefficients(self) -> None:
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].
The class member ``_cim`` will contain the computed cim
"""
np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T
@property
def state_residence_times(self) -> np.ndarray:
return self._state_residence_times
@property
def state_transition_matrix(self) -> np.ndarray:
return self._state_transition_matrix
@property
def cim(self) -> np.ndarray:
return self._cim
def __repr__(self):
return 'CIM:\n' + str(self.cim)

@ -0,0 +1,293 @@
import typing
import networkx as nx
import numpy as np
from .structure import Structure
class NetworkGraph(object):
"""Abstracts the infos contained in the Structure class in the form of a directed graph.
Has the task of creating all the necessary filtering and indexing structures for parameters estimation
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted
:type graph_struct: Structure
:_graph: directed graph
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos
about every parents of the node of which all the indexing and filtering structures will be constructed.
:_time_scalar_indexing_structure: the indexing structure for state res time estimation
:_transition_scalar_indexing_structure: the indexing structure for transition computation
:_time_filtering: the columns filtering structure used in the computation of the state res times
:_transition_filtering: the columns filtering structure used in the computation of the transition
from one state to another
:_p_combs_structure: all the possible parents states combination for the node of interest
"""
def __init__(self, graph_struct: Structure):
"""Constructor Method
"""
self._graph_struct = graph_struct
self._graph = nx.DiGraph()
self._aggregated_info_about_nodes_parents = None
self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = None
self._time_filtering = None
self._transition_filtering = None
self._p_combs_structure = None
def init_graph(self):
self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_scalar_indexing_structures()
self.build_time_columns_filtering_structure()
self.build_transition_columns_filtering_structure()
self._p_combs_structure = self.build_p_combs_structure()
def fast_init(self, node_id: str) -> None:
"""Initializes all the necessary structures for parameters estimation of the node identified by the label
node_id
:param node_id: the label of the node
:type node_id: string
"""
self.add_nodes(self._graph_struct.nodes_labels)
self.add_edges(self._graph_struct.edges)
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id)
p_indxs = self._aggregated_info_about_nodes_parents[1]
p_vals = self._aggregated_info_about_nodes_parents[2]
node_states = self.get_states_number(node_id)
node_indx = self.get_node_indx(node_id)
cols_number = self._graph_struct.total_variables_number
self._time_scalar_indexing_structure = NetworkGraph.\
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals)
self._transition_scalar_indexing_structure = NetworkGraph.\
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals)
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs)
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number)
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals)
def add_nodes(self, list_of_nodes: typing.List) -> None:
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``.
Sets all the properties that identify a nodes (index, positional index, cardinality)
:param list_of_nodes: the nodes to add to ``_graph``
:type list_of_nodes: List
"""
nodes_indxs = self._graph_struct.nodes_indexes
nodes_vals = self._graph_struct.nodes_values
pos = 0
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
pos += 1
def has_edge(self,edge:tuple)-> bool:
"""
Check if the graph contains a specific edge
Parameters:
edge: a tuple that rappresents the edge
Returns:
bool
"""
return self._graph.has_edge(edge[0],edge[1])
def add_edges(self, list_of_edges: typing.List) -> None:
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``.
:param list_of_edges: the list containing of tuples containing the edges
:type list_of_edges: List
"""
self._graph.add_edges_from(list_of_edges)
def remove_node(self, node_id: str) -> None:
"""Remove the node ``node_id`` from all the class members.
Initialize all the filtering/indexing structures.
"""
self._graph.remove_node(node_id)
self._graph_struct.remove_node(node_id)
self.clear_indexing_filtering_structures()
def clear_indexing_filtering_structures(self) -> None:
"""Initialize all the filtering/indexing structures.
"""
self._aggregated_info_about_nodes_parents = None
self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = None
self._time_filtering = None
self._transition_filtering = None
self._p_combs_structure = None
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple:
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
(parents_labels, parents_indexes, parents_cardinalities).
:param node: the label of the node
:type node: string
:return: a tuple containing all the parent set infos
:rtype: Tuple
"""
parents = self.get_parents_by_id(node)
nodes = self._graph_struct.nodes_labels
d = {v: i for i, v in enumerate(nodes)}
sorted_parents = sorted(parents, key=lambda v: d[v])
get_node_indx = self.get_node_indx
p_indxes = [get_node_indx(node) for node in sorted_parents]
p_values = [self.get_states_number(node) for node in sorted_parents]
return sorted_parents, p_indxes, p_values
def remove_edges(self, list_of_edges: typing.List) -> None:
"""Remove the edges to the graph contained in the list list_of_edges.
:param list_of_edges: The edges to remove from the graph
:type list_of_edges: List
"""
self._graph.remove_edges_from(list_of_edges)
@staticmethod
def build_time_scalar_indexing_structure_for_a_node(node_states: int,
parents_vals: typing.List) -> np.ndarray:
"""Builds an indexing structure for the computation of state residence times values.
:param node_states: the node cardinality
:type node_states: int
:param parents_vals: the caridinalites of the node's parents
:type parents_vals: List
:return: The time indexing structure
:rtype: numpy.ndArray
"""
T_vector = np.array([node_states])
T_vector = np.append(T_vector, parents_vals)
T_vector = T_vector.cumprod().astype(np.int)
return T_vector
@staticmethod
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \
-> np.ndarray:
"""Builds an indexing structure for the computation of state transitions values.
:param node_states_number: the node cardinality
:type node_states_number: int
:param parents_vals: the caridinalites of the node's parents
:type parents_vals: List
:return: The transition indexing structure
:rtype: numpy.ndArray
"""
M_vector = np.array([node_states_number,
node_states_number])
M_vector = np.append(M_vector, parents_vals)
M_vector = M_vector.cumprod().astype(np.int)
return M_vector
@staticmethod
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray:
"""
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
in the dataset.
This structute will be used in the computation of the state res times.
:param node_indx: the index of the node
:type node_indx: int
:param p_indxs: the indexes of the node's parents
:type p_indxs: List
:return: The filtering structure for times estimation
:rtype: numpy.ndArray
"""
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
@staticmethod
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \
-> np.ndarray:
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
in the dataset.
This structure will be used in the computation of the state transitions values.
:param node_indx: the index of the node
:type node_indx: int
:param p_indxs: the indexes of the node's parents
:type p_indxs: List
:param nodes_number: the total number of nodes in the dataset
:type nodes_number: int
:return: The filtering structure for transitions estimation
:rtype: numpy.ndArray
"""
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
@staticmethod
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray:
"""
Builds the combinatorial structure that contains the combinations of all the values contained in
``parents_values``.
:param parents_values: the cardinalities of the nodes
:type parents_values: List
:return: A numpy matrix containing a grid of the combinations
:rtype: numpy.ndArray
"""
tmp = []
for val in parents_values:
tmp.append([x for x in range(val)])
if len(parents_values) > 0:
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
if len(parents_values) > 1:
tmp_comb = parents_comb[:, 1].copy()
parents_comb[:, 1] = parents_comb[:, 0].copy()
parents_comb[:, 0] = tmp_comb
else:
parents_comb = np.array([[]], dtype=np.int)
return parents_comb
def get_parents_by_id(self, node_id) -> typing.List:
"""Returns a list of labels of the parents of the node ``node_id``
:param node_id: the node label
:type node_id: string
:return: a List of labels of the parents
:rtype: List
"""
return list(self._graph.predecessors(node_id))
def get_states_number(self, node_id) -> int:
return self._graph.nodes[node_id]['val']
def get_node_indx(self, node_id) -> int:
return nx.get_node_attributes(self._graph, 'indx')[node_id]
def get_positional_node_indx(self, node_id) -> int:
return self._graph.nodes[node_id]['pos_indx']
@property
def nodes(self) -> typing.List:
return self._graph_struct.nodes_labels
@property
def edges(self) -> typing.List:
return list(self._graph.edges)
@property
def nodes_indexes(self) -> np.ndarray:
return self._graph_struct.nodes_indexes
@property
def nodes_values(self) -> np.ndarray:
return self._graph_struct.nodes_values
@property
def time_scalar_indexing_strucure(self) -> np.ndarray:
return self._time_scalar_indexing_structure
@property
def time_filtering(self) -> np.ndarray:
return self._time_filtering
@property
def transition_scalar_indexing_structure(self) -> np.ndarray:
return self._transition_scalar_indexing_structure
@property
def transition_filtering(self) -> np.ndarray:
return self._transition_filtering
@property
def p_combs(self) -> np.ndarray:
return self._p_combs_structure

@ -0,0 +1,91 @@
import numpy as np
import pandas as pd
from .structure import Structure
from .trajectory import Trajectory
from ..utility.abstract_importer import AbstractImporter
class SamplePath(object):
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
contain the mentioned data.
:param importer: the Importer object which contains the imported and processed data
:type importer: AbstractImporter
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
:_structure: the ``Structure`` Object that will contain all the structural infos about the net
:_total_variables_count: the number of variables in the net
"""
def __init__(self, importer: AbstractImporter):
"""Constructor Method
"""
self._importer = importer
if self._importer._df_variables is None or self._importer._concatenated_samples is None:
raise RuntimeError('The importer object has to contain the all processed data!')
if self._importer._df_variables.empty:
raise RuntimeError('The importer object has to contain the all processed data!')
if isinstance(self._importer._concatenated_samples, pd.DataFrame):
if self._importer._concatenated_samples.empty:
raise RuntimeError('The importer object has to contain the all processed data!')
if isinstance(self._importer._concatenated_samples, np.ndarray):
if self._importer._concatenated_samples.size == 0:
raise RuntimeError('The importer object has to contain the all processed data!')
self._trajectories = None
self._structure = None
self._total_variables_count = None
def build_trajectories(self) -> None:
"""Builds the Trajectory object that will contain all the trajectories.
Clears all the unused dataframes in ``_importer`` Object
"""
self._trajectories = \
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
len(self._importer.sorter) + 1)
self._importer.clear_concatenated_frame()
def build_structure(self) -> None:
"""
Builds the ``Structure`` object that aggregates all the infos about the net.
"""
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
self._total_variables_count = len(self._importer.sorter)
labels = self._importer.variables.iloc[:, 0].to_list()
indxs = self._importer.variables.index.to_numpy()
vals = self._importer.variables.iloc[:, 1].to_numpy()
if self._importer.structure is None or self._importer.structure.empty:
edges = []
else:
edges = list(self._importer.structure.to_records(index=False))
self._structure = Structure(labels, indxs, vals, edges,
self._total_variables_count)
def clear_memory(self):
self._importer._raw_data = []
@property
def trajectories(self) -> Trajectory:
return self._trajectories
@property
def structure(self) -> Structure:
return self._structure
@property
def total_variables_count(self) -> int:
return self._total_variables_count
@property
def has_prior_net_structure(self) -> bool:
return bool(self._structure.edges)

@ -0,0 +1,97 @@
import typing
import numpy as np
from .conditional_intensity_matrix import ConditionalIntensityMatrix
class SetOfCims(object):
"""Aggregates all the CIMS of the node identified by the label _node_id.
:param node_id: the node label
:type node_ind: string
:param parents_states_number: the cardinalities of the parents
:type parents_states_number: List
:param node_states_number: the caridinality of the node
:type node_states_number: int
:param p_combs: the p_comb structure bound to this node
:type p_combs: numpy.ndArray
:_state_residence_time: matrix containing all the state residence time vectors for the node
:_transition_matrices: matrix containing all the transition matrices for the node
:_actual_cims: the cims of the node
"""
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
"""Constructor Method
"""
self._node_id = node_id
self._parents_states_number = parents_states_number
self._node_states_number = node_states_number
self._actual_cims = []
self._state_residence_times = None
self._transition_matrices = None
self._p_combs = p_combs
self.build_times_and_transitions_structures()
def build_times_and_transitions_structures(self) -> None:
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
"""
if not self._parents_states_number:
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float)
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int)
else:
self._state_residence_times = \
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float)
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number,
self._node_states_number], dtype=np.int)
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None:
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices.
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims.
:param state_res_times: the state residence times matrix
:type state_res_times: numpy.ndArray
:param transition_matrices: the transition matrices
:type transition_matrices: numpy.ndArray
"""
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
cim_to_add.compute_cim_coefficients()
self._actual_cims.append(cim_to_add)
self._actual_cims = np.array(self._actual_cims)
self._transition_matrices = None
self._state_residence_times = None
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray:
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index
``comb``.
:param mask_arr: the boolean mask that indicates which parent to consider
:type mask_arr: numpy.array
:param comb: the state/s of the filtered parents
:type comb: numpy.array
:return: Array of ``ConditionalIntensityMatrix`` objects
:rtype: numpy.array
"""
if mask_arr.size <= 1:
return self._actual_cims
else:
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
return self._actual_cims[flat_indxs]
@property
def actual_cims(self) -> np.ndarray:
return self._actual_cims
@property
def p_combs(self) -> np.ndarray:
return self._p_combs
def get_cims_number(self):
return len(self._actual_cims)

@ -0,0 +1,124 @@
import typing as ty
import numpy as np
class Structure(object):
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes)
:param nodes_labels_list: the symbolic names of the variables
:type nodes_labels_list: List
:param nodes_indexes_arr: the indexes of the nodes
:type nodes_indexes_arr: numpy.ndArray
:param nodes_vals_arr: the cardinalites of the nodes
:type nodes_vals_arr: numpy.ndArray
:param edges_list: the edges of the network
:type edges_list: List
:param total_variables_number: the total number of variables in the dataset
:type total_variables_number: int
"""
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray,
edges_list: ty.List, total_variables_number: int):
"""Constructor Method
"""
self._nodes_labels_list = nodes_labels_list
self._nodes_indexes_arr = nodes_indexes_arr
self._nodes_vals_arr = nodes_vals_arr
self._edges_list = edges_list
self._total_variables_number = total_variables_number
def remove_node(self, node_id: str) -> None:
"""Remove the node ``node_id`` from all the class members.
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset.
"""
node_positional_indx = self._nodes_labels_list.index(node_id)
del self._nodes_labels_list[node_positional_indx]
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx)
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx)
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id
and to_node != node_id)]
@property
def edges(self) -> ty.List:
return self._edges_list
@property
def nodes_labels(self) -> ty.List:
return self._nodes_labels_list
@property
def nodes_indexes(self) -> np.ndarray:
return self._nodes_indexes_arr
@property
def nodes_values(self) -> np.ndarray:
return self._nodes_vals_arr
@property
def total_variables_number(self) -> int:
return self._total_variables_number
def get_node_id(self, node_indx: int) -> str:
"""Given the ``node_index`` returns the node label.
:param node_indx: the node index
:type node_indx: int
:return: the node label
:rtype: string
"""
return self._nodes_labels_list[node_indx]
def clean_structure_edges(self):
self._edges_list = list()
def add_edge(self,edge: tuple):
self._edges_list.append(tuple)
print(self._edges_list)
def remove_edge(self,edge: tuple):
self._edges_list.remove(tuple)
def contains_edge(self,edge:tuple) -> bool:
return edge in self._edges_list
def get_node_indx(self, node_id: str) -> int:
"""Given the ``node_index`` returns the node label.
:param node_id: the node label
:type node_id: string
:return: the node index
:rtype: int
"""
pos_indx = self._nodes_labels_list.index(node_id)
return self._nodes_indexes_arr[pos_indx]
def get_positional_node_indx(self, node_id: str) -> int:
return self._nodes_labels_list.index(node_id)
def get_states_number(self, node: str) -> int:
"""Given the node label ``node`` returns the cardinality of the node.
:param node: the node label
:type node: string
:return: the node cardinality
:rtype: int
"""
pos_indx = self._nodes_labels_list.index(node)
return self._nodes_vals_arr[pos_indx]
def __repr__(self):
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
"\nEdges: \n" + str(self._edges_list)
def __eq__(self, other):
"""Overrides the default implementation"""
if isinstance(other, Structure):
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
self._edges_list == other._edges_list
return False

@ -0,0 +1,45 @@
import typing
import numpy as np
class Trajectory(object):
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
(the time deltas) and a numpy matrix of ints (the changes of states).
:param list_of_columns: the list containing the times array and values matrix
:type list_of_columns: List
:param original_cols_number: total number of cols in the data
:type original_cols_number: int
:_actual_trajectory: the trajectory containing also the duplicated/shifted values
:_times: the array containing the time deltas
"""
def __init__(self, list_of_columns: typing.List, original_cols_number: int):
"""Constructor Method
"""
self._times = list_of_columns[0]
self._actual_trajectory = list_of_columns[1]
self._original_cols_number = original_cols_number
@property
def trajectory(self) -> np.ndarray:
return self._actual_trajectory[:, :self._original_cols_number - 1]
@property
def complete_trajectory(self) -> np.ndarray:
return self._actual_trajectory
@property
def times(self):
return self._times
def size(self):
return self._actual_trajectory.shape[0]
def __repr__(self):
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__()

@ -0,0 +1,4 @@
from .abstract_importer import AbstractImporter
from .cache import Cache
from .json_importer import JsonImporter
from .sample_importer import SampleImporter

@ -0,0 +1,164 @@
import typing
from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
import copy
#from sklearn.utils import resample
class AbstractImporter(ABC):
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
:param file_path: the file path, or dataset name if you import already processed data
:type file_path: str
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
:param variables: Dataframe containing the nodes labels and cardinalities
:type variables: pandas.DataFrame
:prior_net_structure: Dataframe containing the structure of the network (edges)
:type prior_net_structure: pandas.DataFrame
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
.. warning::
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
.. note::
See :class:``JsonImporter`` for an example implementation
"""
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
"""Constructor
"""
self._file_path = file_path
self._df_samples_list = trajectory_list
self._concatenated_samples = []
self._df_variables = variables
self._df_structure = prior_net_structure
self._sorter = None
super().__init__()
@abstractmethod
def build_sorter(self, trajecory_header: object) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
:param trajecory_header: an object that will be used to define the header
:type trajecory_header: object
:return: A list containing the processed header.
:rtype: List
"""
pass
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame:
"""Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
:param sample_frame: the traj to be processed
:type sample_frame: pandas.Dataframe
:param columns_header: the original header of sample_frame
:type columns_header: List
:param shifted_cols_header: a copy of columns_header with changed names of the contents
:type shifted_cols_header: List
:return: The processed dataframe
:rtype: pandas.Dataframe
.. warning::
the Dataframe ``sample_frame`` has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
"""
sample_frame = copy.deepcopy(sample_frame)
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
shifted_cols.columns = shifted_cols_header
sample_frame = sample_frame.assign(**shifted_cols)
sample_frame.drop(sample_frame.tail(1).index, inplace=True)
return sample_frame
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
``df_samples_list``.
Concatenates the result in the dataframe ``concatanated_samples``
:param df_samples_list: the datframe's list to be processed and concatenated
:type df_samples_list: List
.. warning::
The Dataframe sample_frame has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
.. note::
After the call of this method the class member ``concatanated_samples`` will contain all processed
and merged trajectories
"""
if not self._sorter:
raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
shifted_cols_header = [s + "S" for s in self._sorter]
compute_row_delta = self.compute_row_delta_sigle_samples_frame
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
for sample in df_samples_list]
self._concatenated_samples = pd.concat(proc_samples_list)
complete_header = self._sorter[:]
complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header)
self._concatenated_samples = self._concatenated_samples[complete_header]
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
and converted
:type concatenated_sample: pandas.Dataframe
:return: the resulting list of numpy arrays
:rtype: List
"""
concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
return columns_list
def clear_concatenated_frame(self) -> None:
"""Removes all values in the dataframe concatenated_samples.
"""
if isinstance(self._concatenated_samples, pd.DataFrame):
self._concatenated_samples = self._concatenated_samples.iloc[0:0]
@abstractmethod
def dataset_id(self) -> object:
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current
dataset
"""
pass
@property
def concatenated_samples(self) -> pd.DataFrame:
return self._concatenated_samples
@property
def variables(self) -> pd.DataFrame:
return self._df_variables
@property
def structure(self) -> pd.DataFrame:
return self._df_structure
@property
def sorter(self) -> typing.List:
return self._sorter
@property
def file_path(self) -> str:
return self._file_path

@ -0,0 +1,58 @@
import typing
from ..structure_graph.set_of_cims import SetOfCims
class Cache:
"""This class acts as a cache of ``SetOfCims`` objects for a node.
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME
index is related
:__actual_cache: a list of setOfCims objects
"""
def __init__(self):
"""Constructor Method
"""
self._list_of_sets_of_parents = []
self._actual_cache = []
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
"""
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims``
related to that ``parents_comb``.
:param parents_comb: the parents related to that ``SetOfCims``
:type parents_comb: Set
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``.
None otherwise.
:rtype: SetOfCims
"""
try:
#print("Cache State:", self.list_of_sets_of_indxs)
#print("Look For:", parents_comb)
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)]
#print("CACHE HIT!!!!", parents_comb)
return result
except ValueError:
return None
def put(self, parents_comb: typing.Set, socim: SetOfCims):
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in
``__list_of_sets_of_parents``.
:param parents_comb: the symbolic set index
:type parents_comb: Set
:param socim: the related SetOfCims object
:type socim: SetOfCims
"""
#print("Putting in cache:", parents_comb)
self._list_of_sets_of_parents.append(parents_comb)
self._actual_cache.append(socim)
def clear(self):
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``.
"""
del self._list_of_sets_of_parents[:]
del self._actual_cache[:]

@ -0,0 +1,176 @@
import json
import typing
import pandas as pd
from .abstract_importer import AbstractImporter
class JsonImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data in json extension.
:param file_path: the path of the file that contains tha data to be imported
:type file_path: string
:param samples_label: the reference key for the samples in the trajectories
:type samples_label: string
:param structure_label: the reference key for the structure of the network data
:type structure_label: string
:param variables_label: the reference key for the cardinalites of the nodes data
:type variables_label: string
:param time_key: the key used to identify the timestamps in each trajectory
:type time_key: string
:param variables_key: the key used to identify the names of the variables in the net
:type variables_key: string
:_array_indx: the index of the outer JsonArray to extract the data from
:type _array_indx: int
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str):
"""Constructor method
.. note::
This constructor calls also the method ``read_json_file()``, so after the construction of the object
the class member ``_raw_data`` will contain the raw imported json data.
"""
self._samples_label = samples_label
self._structure_label = structure_label
self._variables_label = variables_label
self._time_key = time_key
self._variables_key = variables_key
self._df_samples_list = None
self._array_indx = None
super(JsonImporter, self).__init__(file_path)
self._raw_data = self.read_json_file()
def import_data(self, indx: int) -> None:
"""Implements the abstract method of :class:`AbstractImporter`.
:param indx: the index of the outer JsonArray to extract the data from
:type indx: int
"""
self._array_indx = indx
self._df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self._df_samples_list[0])
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
self.clear_data_frame_list()
self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(self._raw_data)
def import_trajectories(self, raw_data: typing.List) -> typing.List:
"""Imports the trajectories from the list of dicts ``raw_data``.
:param raw_data: List of Dicts
:type raw_data: List
:return: List of dataframes containing all the trajectories
:rtype: List
"""
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
:param raw_data: List of Dicts
:type raw_data: List
:return: Dataframe containg the starting node a ending node of every arc of the network
:rtype: pandas.Dataframe
"""
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports the data in ``raw_data`` at the key ``_variables_label``.
:param raw_data: List of Dicts
:type raw_data: List
:return: Datframe containg the variables simbolic labels and their cardinalities
:rtype: pandas.Dataframe
"""
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
def read_json_file(self) -> typing.List:
"""Reads the JSON file in the path self.filePath.
:return: The contents of the json file
:rtype: List
"""
with open(self._file_path) as f:
data = json.load(f)
return data
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param key: the key for the Dicts from which exctract data
:type key: string
:return: A normalized dataframe
:rtype: pandas.Datframe
"""
return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
"""
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param trajectories_key: the key of the trajectories objects
:type trajectories_key: string
:return: A list of daframes containg the trajectories
:rtype: List
"""
dataframe = pd.DataFrame
smps = raw_data[indx][trajectories_key]
df_samples_list = [dataframe(sample) for sample in smps]
return df_samples_list
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
"""
columns_header = list(sample_frame.columns.values)
columns_header.remove(self._time_key)
return columns_header
def clear_data_frame_list(self) -> None:
"""Removes all values present in the dataframes in the list ``_df_samples_list``.
"""
for indx in range(len(self._df_samples_list)):
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
def dataset_id(self) -> object:
return self._array_indx
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
as keys for the set of CIMS of a particular node.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param cims_key: the key where the json object cims are placed
:type cims_key: string
:return: a dictionary containing the sampled CIMS for all the variables in the net
:rtype: Dictionary
"""
cims_for_all_vars = {}
for var in raw_data[indx][cims_key]:
sampled_cims_list = []
cims_for_all_vars[var] = sampled_cims_list
for p_comb in raw_data[indx][cims_key][var]:
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
return cims_for_all_vars

@ -0,0 +1,65 @@
import json
import typing
import pandas as pd
import numpy as np
from .abstract_importer import AbstractImporter
class SampleImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data loaded directly by using DataFrame
:param trajectory_list: the data that describes the trajectories
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param variables: the data that describes the variables with name and cardinality
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param prior_net_structure: the data of the real structure, if it exists
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self,
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None):
'If the data are not DataFrame, it will be converted'
if isinstance(variables,list) or isinstance(variables,np.ndarray):
variables = pd.DataFrame(variables)
if isinstance(variables,list) or isinstance(variables,np.ndarray):
prior_net_structure=pd.DataFrame(prior_net_structure)
super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
variables= variables,
prior_net_structure=prior_net_structure)
def import_data(self, header_column = None):
if header_column is not None:
self._sorter = header_column
else:
self._sorter = self.build_sorter(self._df_samples_list[0])
samples_list= self._df_samples_list
if isinstance(samples_list, np.ndarray):
samples_list = samples_list.tolist()
self.compute_row_delta_in_all_samples_frames(samples_list)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list.
"""
columns_header = list(sample_frame.columns.values)
del columns_header[0]
return columns_header
def dataset_id(self) -> object:
pass

@ -0,0 +1,8 @@
import PyCTBN.PyCTBN.estimators
from PyCTBN.PyCTBN.estimators import *
import PyCTBN.PyCTBN.optimizers
from PyCTBN.PyCTBN.optimizers import *
import PyCTBN.PyCTBN.structure_graph
from PyCTBN.PyCTBN.structure_graph import *
import PyCTBN.PyCTBN.utility
from PyCTBN.PyCTBN.utility import *

@ -0,0 +1,5 @@
from .fam_score_calculator import FamScoreCalculator
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from .structure_constraint_based_estimator import StructureConstraintBasedEstimator
from .structure_score_based_estimator import StructureScoreBasedEstimator

@ -0,0 +1,272 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from math import log
from scipy.special import loggamma
from random import choice
from ..structure_graph.set_of_cims import SetOfCims
from ..structure_graph.network_graph import NetworkGraph
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
'''
'''
class FamScoreCalculator:
"""
Has the task of calculating the FamScore of a node by using a Bayesian score function
"""
def __init__(self):
#np.seterr('raise')
pass
# region theta
def marginal_likelihood_theta(self,
cims: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the FamScore value of the node identified by the label node_id
:param cims: np.array with all the node's cims
:type cims: np.array
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta
:rtype: float
"""
return np.sum(
[self.variable_cim_xu_marginal_likelihood_theta(cim,
alpha_xu,
alpha_xxu)
for cim in cims])
def variable_cim_xu_marginal_likelihood_theta(self,
cim: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the value of the marginal likelihood over theta given a cim
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta
:rtype: float
"""
'get cim length'
values = len(cim._state_residence_times)
'compute the marginal likelihood for the current cim'
return np.sum([
self.single_cim_xu_marginal_likelihood_theta(
index,
cim,
alpha_xu,
alpha_xxu)
for index in range(values)])
def single_cim_xu_marginal_likelihood_theta(self,
index: int,
cim: ConditionalIntensityMatrix,
alpha_xu: float,
alpha_xxu: float):
"""
Calculate the marginal likelihood on q of the node when assumes a specif value
and a specif parents's assignment
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta when the node assumes a specif value
:rtype: float
"""
values = list(range(len(cim._state_residence_times)))
'remove the index because of the x != x^ condition in the summation '
values.remove(index)
'uncomment for alpha xx not uniform'
#alpha_xxu = alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \
+ \
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
cim.state_transition_matrix[index,index_x_first],
alpha_xxu)
for index_x_first in values])
def single_internal_cim_xxu_marginal_likelihood_theta(self,
M_xxu_suff_stats: float,
alpha_xxu: float=1):
"""Calculate the second part of the marginal likelihood over theta formula
:param M_xxu_suff_stats: value of the suffucient statistic M[xx'|u]
:type M_xxu_suff_stats: float
:param alpha_xxu: distribuited hyperparameter over the CTBNs theta parameters
:type alpha_xxu: float
:return: the value of the marginal likelihood over theta when the node assumes a specif value
:rtype: float
"""
return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu)
# endregion
# region q
def marginal_likelihood_q(self,
cims: np.array,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the value of the marginal likelihood over q of the node identified by the label node_id
:param cims: np.array with all the node's cims
:type cims: np.array
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood over q
:rtype: float
"""
return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims])
def variable_cim_xu_marginal_likelihood_q(self,
cim: ConditionalIntensityMatrix,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the value of the marginal likelihood over q given a cim
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood over q
:rtype: float
"""
'get cim length'
values=len(cim._state_residence_times)
'compute the marginal likelihood for the current cim'
return np.sum([
self.single_cim_xu_marginal_likelihood_q(
cim.state_transition_matrix[index, index],
cim._state_residence_times[index],
tau_xu,
alpha_xu)
for index in range(values)])
def single_cim_xu_marginal_likelihood_q(self,
M_xu_suff_stats: float,
T_xu_suff_stats: float,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the marginal likelihood on q of the node when assumes a specif value
and a specif parents's assignment
:param M_xu_suff_stats: value of the suffucient statistic M[x|u]
:type M_xxu_suff_stats: float
:param T_xu_suff_stats: value of the suffucient statistic T[x|u]
:type T_xu_suff_stats: float
:param cim: A conditional_intensity_matrix object with the sufficient statistics
:type cim: class:'ConditionalIntensityMatrix'
:param tau_xu: hyperparameter over the CTBNs q parameters
:type tau_xu: float
:param alpha_xu: hyperparameter over the CTBNs q parameters
:type alpha_xu: float
:return: the value of the marginal likelihood of the node when assumes a specif value
:rtype: float
"""
return (
loggamma(alpha_xu + M_xu_suff_stats + 1) +
(log(tau_xu)
*
(alpha_xu+1))
) \
- \
(loggamma(alpha_xu + 1)+(
log(tau_xu + T_xu_suff_stats)
*
(alpha_xu + M_xu_suff_stats + 1))
)
# end region
def get_fam_score(self,
cims: np.array,
tau_xu: float=0.1,
alpha_xu: float=1):
"""
Calculate the FamScore value of the node
:param cims: np.array with all the node's cims
:type cims: np.array
:param tau_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type tau_xu: float, optional
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 1
:type alpha_xu: float, optional
:return: the FamScore value of the node
:rtype: float
"""
#print("------")
#print(self.marginal_likelihood_q(cims,
# tau_xu,
# alpha_xu))
#print(self.marginal_likelihood_theta(cims,
# alpha_xu,
# alpha_xxu))
'calculate alpha_xxu as a uniform distribution'
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1)
return self.marginal_likelihood_q(cims,
tau_xu,
alpha_xu) \
+ \
self.marginal_likelihood_theta(cims,
alpha_xu,
alpha_xxu)

@ -0,0 +1,143 @@
import sys
sys.path.append('../')
import numpy as np
from ..structure_graph.network_graph import NetworkGraph
from ..structure_graph.set_of_cims import SetOfCims
from ..structure_graph.trajectory import Trajectory
class ParametersEstimator(object):
"""Has the task of computing the cims of particular node given the trajectories and the net structure
in the graph ``_net_graph``.
:param trajectories: the trajectories
:type trajectories: Trajectory
:param net_graph: the net structure
:type net_graph: NetworkGraph
:_single_set_of_cims: the set of cims object that will hold the cims of the node
"""
def __init__(self, trajectories: Trajectory, net_graph: NetworkGraph):
"""Constructor Method
"""
self._trajectories = trajectories
self._net_graph = net_graph
self._single_set_of_cims = None
def fast_init(self, node_id: str) -> None:
"""Initializes all the necessary structures for the parameters estimation for the node ``node_id``.
:param node_id: the node label
:type node_id: string
"""
p_vals = self._net_graph._aggregated_info_about_nodes_parents[2]
node_states_number = self._net_graph.get_states_number(node_id)
self._single_set_of_cims = SetOfCims(node_id, p_vals, node_states_number, self._net_graph.p_combs)
def compute_parameters_for_node(self, node_id: str) -> SetOfCims:
"""Compute the CIMS of the node identified by the label ``node_id``.
:param node_id: the node label
:type node_id: string
:return: A SetOfCims object filled with the computed CIMS
:rtype: SetOfCims
"""
node_indx = self._net_graph.get_node_indx(node_id)
state_res_times = self._single_set_of_cims._state_residence_times
transition_matrices = self._single_set_of_cims._transition_matrices
ParametersEstimator.compute_state_res_time_for_node(self._trajectories.times,
self._trajectories.trajectory,
self._net_graph.time_filtering,
self._net_graph.time_scalar_indexing_strucure,
state_res_times)
ParametersEstimator.compute_state_transitions_for_a_node(node_indx, self._trajectories.complete_trajectory,
self._net_graph.transition_filtering,
self._net_graph.transition_scalar_indexing_structure,
transition_matrices)
self._single_set_of_cims.build_cims(state_res_times, transition_matrices)
return self._single_set_of_cims
@staticmethod
def compute_state_res_time_for_node(times: np.ndarray, trajectory: np.ndarray,
cols_filter: np.ndarray, scalar_indexes_struct: np.ndarray,
T: np.ndarray) -> None:
"""Compute the state residence times for a node and fill the matrix ``T`` with the results
:param node_indx: the index of the node
:type node_indx: int
:param times: the times deltas vector
:type times: numpy.array
:param trajectory: the trajectory
:type trajectory: numpy.ndArray
:param cols_filter: the columns filtering structure
:type cols_filter: numpy.array
:param scalar_indexes_struct: the indexing structure
:type scalar_indexes_struct: numpy.array
:param T: the state residence times vectors
:type T: numpy.ndArray
"""
T[:] = np.bincount(np.sum(trajectory[:, cols_filter] * scalar_indexes_struct / scalar_indexes_struct[0], axis=1)
.astype(np.int), \
times,
minlength=scalar_indexes_struct[-1]).reshape(-1, T.shape[1])
@staticmethod
def compute_state_transitions_for_a_node(node_indx: int, trajectory: np.ndarray, cols_filter: np.ndarray,
scalar_indexing: np.ndarray, M: np.ndarray) -> None:
"""Compute the state residence times for a node and fill the matrices ``M`` with the results.
:param node_indx: the index of the node
:type node_indx: int
:param trajectory: the trajectory
:type trajectory: numpy.ndArray
:param cols_filter: the columns filtering structure
:type cols_filter: numpy.array
:param scalar_indexing: the indexing structure
:type scalar_indexing: numpy.array
:param M: the state transitions matrices
:type M: numpy.ndArray
"""
diag_indices = np.array([x * M.shape[1] + x % M.shape[1] for x in range(M.shape[0] * M.shape[1])],
dtype=np.int64)
trj_tmp = trajectory[trajectory[:, int(trajectory.shape[1] / 2) + node_indx].astype(np.int) >= 0]
M[:] = np.bincount(np.sum(trj_tmp[:, cols_filter] * scalar_indexing / scalar_indexing[0], axis=1).astype(np.int)
, minlength=scalar_indexing[-1]).reshape(-1, M.shape[1], M.shape[2])
M_raveled = M.ravel()
M_raveled[diag_indices] = 0
M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
def init_sets_cims_container(self):
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
self.net_graph.nodes_values,
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
self.net_graph.p_combs)
def compute_parameters(self):
#print(self.net_graph.get_nodes())
#print(self.amalgamated_cims_struct.sets_of_cims)
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
#print(self.net_graph.time_filtering[indx])
#print(self.net_graph.time_scalar_indexing_strucure[indx])
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
self.sample_path.trajectories.trajectory,
self.net_graph.time_filtering[indx],
self.net_graph.time_scalar_indexing_strucure[indx],
aggr[1]._state_residence_times)
#print(self.net_graph.transition_filtering[indx])
#print(self.net_graph.transition_scalar_indexing_structure[indx])
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]),
self.sample_path.trajectories.complete_trajectory,
self.net_graph.transition_filtering[indx],
self.net_graph.transition_scalar_indexing_structure[indx],
aggr[1]._transition_matrices)
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices)

@ -0,0 +1,238 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
import os
from scipy.stats import chi2 as chi2_dist
from scipy.stats import f as f_dist
from tqdm import tqdm
from ..utility.cache import Cache
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
from ..optimizers.constraint_based_optimizer import ConstraintBasedOptimizer
import concurrent.futures
import multiprocessing
from multiprocessing import Pool
class StructureConstraintBasedEstimator(StructureEstimator):
"""
Has the task of estimating the network structure given the trajectories in samplepath by using a constraint-based approach.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:param exp_test_alfa: the significance level for the exponential Hp test
:type exp_test_alfa: float
:param chi_test_alfa: the significance level for the chi Hp test
:type chi_test_alfa: float
:_nodes: the nodes labels
:_nodes_vals: the nodes cardinalities
:_nodes_indxs: the nodes indexes
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
:_cache: the Cache object
"""
def __init__(self, sample_path: SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= [],thumb_threshold:int = 25):
super().__init__(sample_path,known_edges)
self._exp_test_sign = exp_test_alfa
self._chi_test_alfa = chi_test_alfa
self._thumb_threshold = thumb_threshold
self._cache = Cache()
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int, parent_indx, child_indx) -> bool:
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
G2 = {G1 U test_parent} (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests.
:param test_parent: the node label of the test parent
:type test_parent: string
:param test_child: the node label of the child
:type test_child: string
:param parent_set: the common parent set
:type parent_set: List
:param child_states_numb: the cardinality of the ``test_child``
:type child_states_numb: int
:param tot_vars_count: the total number of variables in the net
:type tot_vars_count: int
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise
:rtype: bool
"""
p_set = parent_set[:]
complete_info = parent_set[:]
complete_info.append(test_child)
parents = np.array(parent_set)
parents = np.append(parents, test_parent)
sorted_parents = self._nodes[np.isin(self._nodes, parents)]
cims_filter = sorted_parents != test_parent
p_set.insert(0, test_parent)
sofc2 = self._cache.find(set(p_set))
if not sofc2:
complete_info.append(test_parent)
bool_mask2 = np.isin(self._nodes, complete_info)
l2 = list(self._nodes[bool_mask2])
indxs2 = self._nodes_indxs[bool_mask2]
vals2 = self._nodes_vals[bool_mask2]
eds2 = list(itertools.product(p_set, test_child))
s2 = Structure(l2, indxs2, vals2, eds2, tot_vars_count)
g2 = NetworkGraph(s2)
g2.fast_init(test_child)
p2 = ParametersEstimator(self._sample_path.trajectories, g2)
p2.fast_init(test_child)
sofc2 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc2)
del p_set[0]
sofc1 = self._cache.find(set(p_set))
if not sofc1:
g2.remove_node(test_parent)
g2.fast_init(test_child)
p2 = ParametersEstimator(self._sample_path.trajectories, g2)
p2.fast_init(test_child)
sofc1 = p2.compute_parameters_for_node(test_child)
self._cache.put(set(p_set), sofc1)
thumb_value = 0.0
if child_states_numb > 2:
parent_val = self._sample_path.structure.get_states_number(test_parent)
bool_mask_vals = np.isin(self._nodes, parent_set)
parents_vals = self._nodes_vals[bool_mask_vals]
thumb_value = self.compute_thumb_value(parent_val, child_states_numb, parents_vals)
for cim1, p_comb in zip(sofc1.actual_cims, sofc1.p_combs):
cond_cims = sofc2.filter_cims_with_mask(cims_filter, p_comb)
for cim2 in cond_cims:
if not self.independence_test(child_states_numb, cim1, cim2, thumb_value, parent_indx, child_indx):
return False
return True
def independence_test(self, child_states_numb: int, cim1: ConditionalIntensityMatrix,
cim2: ConditionalIntensityMatrix, thumb_value: float, parent_indx, child_indx) -> bool:
"""Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected,
it is performed also the chi_test.
:param child_states_numb: the cardinality of the test child
:type child_states_numb: int
:param cim1: a cim belonging to the graph without test parent
:type cim1: ConditionalIntensityMatrix
:param cim2: a cim belonging to the graph with test parent
:type cim2: ConditionalIntensityMatrix
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise.
:rtype: bool
"""
M1 = cim1.state_transition_matrix
M2 = cim2.state_transition_matrix
r1s = M1.diagonal()
r2s = M2.diagonal()
C1 = cim1.cim
C2 = cim2.cim
if child_states_numb > 2:
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
self._removable_edges_matrix[parent_indx][child_indx] = False
return False
F_stats = C2.diagonal() / C1.diagonal()
exp_alfa = self._exp_test_sign
for val in range(0, child_states_numb):
if F_stats[val] < f_dist.ppf(exp_alfa / 2, r1s[val], r2s[val]) or \
F_stats[val] > f_dist.ppf(1 - exp_alfa / 2, r1s[val], r2s[val]):
return False
M1_no_diag = M1[~np.eye(M1.shape[0], dtype=bool)].reshape(M1.shape[0], -1)
M2_no_diag = M2[~np.eye(M2.shape[0], dtype=bool)].reshape(
M2.shape[0], -1)
chi_2_quantile = chi2_dist.ppf(1 - self._chi_test_alfa, child_states_numb - 1)
Ks = np.sqrt(r1s / r2s)
Ls = np.sqrt(r2s / r1s)
for val in range(0, child_states_numb):
Chi = np.sum(np.power(Ks[val] * M2_no_diag[val] - Ls[val] *M1_no_diag[val], 2) /
(M1_no_diag[val] + M2_no_diag[val]))
if Chi > chi_2_quantile:
return False
return True
def compute_thumb_value(self, parent_val, child_val, parent_set_vals):
"""Compute the value to test against the thumb_threshold.
:param parent_val: test parent's variable cardinality
:type parent_val: int
:param child_val: test child's variable cardinality
:type child_val: int
:param parent_set_vals: the cardinalities of the nodes in the current sep-set
:type parent_set_vals: List
:return: the thumb value for the current independence test
:rtype: int
"""
df = (child_val - 1) ** 2
df = df * parent_val
for v in parent_set_vals:
df = df * v
return df
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int)-> typing.List:
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
:param var_id: the node label of the test child
:type var_id: string
"""
optimizer_obj = ConstraintBasedOptimizer(
node_id = var_id,
structure_estimator = self,
tot_vars_count = tot_vars_count)
return optimizer_obj.optimize_structure()
def ctpc_algorithm(self,disable_multiprocessing:bool= False ):
"""Compute the CTPC algorithm over the entire net.
"""
ctpc_algo = self.one_iteration_of_CTPC_algorithm
total_vars_numb = self._sample_path.total_variables_count
n_nodes= len(self._nodes)
total_vars_numb_array = [total_vars_numb] * n_nodes
'get the number of CPU'
cpu_count = multiprocessing.cpu_count()
'Remove all the edges from the structure'
self._sample_path.structure.clean_structure_edges()
'Estimate the best parents for each node'
#with multiprocessing.Pool(processes=cpu_count) as pool:
#with get_context("spawn").Pool(processes=cpu_count) as pool:
if disable_multiprocessing:
print("DISABILITATO")
cpu_count = 1
list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
else:
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
list_edges_partial = executor.map(ctpc_algo,
self._nodes,
total_vars_numb_array)
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
return set(itertools.chain.from_iterable(list_edges_partial))
def estimate_structure(self,disable_multiprocessing:bool=False):
return self.ctpc_algorithm(disable_multiprocessing=disable_multiprocessing)

@ -0,0 +1,187 @@
import itertools
import json
import typing
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from abc import ABC
import abc
from ..utility.cache import Cache
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
class StructureEstimator(object):
"""Has the task of estimating the network structure given the trajectories in ``samplepath``.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:_nodes: the nodes labels
:_nodes_vals: the nodes cardinalities
:_nodes_indxs: the nodes indexes
:_complete_graph: the complete directed graph built using the nodes labels in ``_nodes``
"""
def __init__(self, sample_path: SamplePath, known_edges: typing.List = None):
self._sample_path = sample_path
self._nodes = np.array(self._sample_path.structure.nodes_labels)
self._nodes_vals = self._sample_path.structure.nodes_values
self._nodes_indxs = self._sample_path.structure.nodes_indexes
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
self._complete_graph = StructureEstimator.build_complete_graph(self._sample_path.structure.nodes_labels)
def build_removable_edges_matrix(self, known_edges: typing.List):
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given:
:param known_edges: the list of nodes labels
:type known_edges: List
:return: a boolean matrix
:rtype: np.ndarray
"""
tot_vars_count = self._sample_path.total_variables_count
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
if known_edges:
for edge in known_edges:
i = self._sample_path.structure.get_node_indx(edge[0])
j = self._sample_path.structure.get_node_indx(edge[1])
complete_adj_matrix[i][j] = False
return complete_adj_matrix
@staticmethod
def build_complete_graph(node_ids: typing.List) -> nx.DiGraph:
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``:
:param node_ids: the list of nodes labels
:type node_ids: List
:return: a complete Digraph Object
:rtype: networkx.DiGraph
"""
complete_graph = nx.DiGraph()
complete_graph.add_nodes_from(node_ids)
complete_graph.add_edges_from(itertools.permutations(node_ids, 2))
return complete_graph
@staticmethod
def generate_possible_sub_sets_of_size( u: typing.List, size: int, parent_label: str):
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``,
that do not contains a the node identified by ``parent_label``.
:param u: the list of nodes
:type u: List
:param size: the size of the subsets
:type size: int
:param parent_label: the node to exclude in the subsets generation
:type parent_label: string
:return: an Iterator Object containing a list of lists
:rtype: Iterator
"""
list_without_test_parent = u[:]
list_without_test_parent.remove(parent_label)
return map(list, itertools.combinations(list_without_test_parent, size))
def save_results(self) -> None:
"""Save the estimated Structure to a .json file in the path where the data are loaded from.
The file is named as the input dataset but the `results_` word is appended to the results file.
"""
res = json_graph.node_link_data(self._complete_graph)
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self._sample_path._importer.dataset_id())
name += '.json'
file_name = 'results_' + name
with open(file_name, 'w') as f:
json.dump(res, f)
def remove_diagonal_elements(self, matrix):
m = matrix.shape[0]
strided = np.lib.stride_tricks.as_strided
s0, s1 = matrix.strides
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
@abc.abstractmethod
def estimate_structure(self) -> typing.List:
"""Abstract method to estimate the structure
:return: List of estimated edges
:rtype: Typing.List
"""
pass
def adjacency_matrix(self) -> np.ndarray:
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation.
:return: The adjacency matrix of the graph ``_complete_graph``
:rtype: numpy.ndArray
"""
return nx.adj_matrix(self._complete_graph).toarray().astype(bool)
def spurious_edges(self) -> typing.List:
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in
``_sample_path.structure``.
:return: A list containing the spurious edges
:rtype: List
"""
if not self._sample_path.has_prior_net_structure:
raise RuntimeError("Can not compute spurious edges with no prior net structure!")
real_graph = nx.DiGraph()
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
real_graph.add_edges_from(self._sample_path.structure.edges)
return nx.difference(real_graph, self._complete_graph).edges
def save_plot_estimated_structure_graph(self) -> None:
"""Plot the estimated structure in a graphical model style.
Spurious edges are colored in red.
"""
graph_to_draw = nx.DiGraph()
spurious_edges = self.spurious_edges()
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
print(non_spurious_edges)
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
graph_to_draw.add_edges_from(spurious_edges)
graph_to_draw.add_edges_from(non_spurious_edges)
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
options = {
"node_size": 2000,
"node_color": "white",
"edgecolors": "black",
'linewidths':2,
"with_labels":True,
"font_size":13,
'connectionstyle': 'arc3, rad = 0.1',
"arrowsize": 15,
"arrowstyle": '<|-',
"width": 1,
"edge_color":edges_colors,
}
nx.draw(graph_to_draw, pos, **options)
ax = plt.gca()
ax.margins(0.20)
plt.axis("off")
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self._sample_path._importer.dataset_id())
name += '.png'
plt.savefig(name)
plt.clf()
print("Estimated Structure Plot Saved At: ", os.path.abspath(name))

@ -0,0 +1,244 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from random import choice
import concurrent.futures
import copy
from ..structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
from ..structure_graph.network_graph import NetworkGraph
from .parameters_estimator import ParametersEstimator
from .structure_estimator import StructureEstimator
from ..structure_graph.sample_path import SamplePath
from ..structure_graph.structure import Structure
from .fam_score_calculator import FamScoreCalculator
from ..optimizers.hill_climbing_search import HillClimbing
from ..optimizers.tabu_search import TabuSearch
import multiprocessing
from multiprocessing import Pool
class StructureScoreBasedEstimator(StructureEstimator):
"""
Has the task of estimating the network structure given the trajectories in samplepath by
using a score based approach.
:param sample_path: the _sample_path object containing the trajectories and the real structure
:type sample_path: SamplePath
:param tau_xu: hyperparameter over the CTBNs q parameters, default to 0.1
:type tau_xu: float, optional
:param alpha_xu: hyperparameter over the CTBNs q parameters, default to 1
:type alpha_xu: float, optional
:param known_edges: List of known edges, default to []
:type known_edges: List, optional
"""
def __init__(self, sample_path: SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []):
super().__init__(sample_path,known_edges)
self.tau_xu=tau_xu
self.alpha_xu=alpha_xu
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = None,
optimizer: str = 'tabu',disable_multiprocessing:bool= False ):
"""
Compute the score-based algorithm to find the optimal structure
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
:type optimizer: string, optional
:param disable_multiprocessing: true if you desire to disable the multiprocessing operations, default to False
:type disable_multiprocessing: Boolean, optional
"""
'Save the true edges structure in tuples'
true_edges = copy.deepcopy(self._sample_path.structure.edges)
true_edges = set(map(tuple, true_edges))
'Remove all the edges from the structure'
self._sample_path.structure.clean_structure_edges()
estimate_parents = self.estimate_parents
n_nodes= len(self._nodes)
l_max_parents= [max_parents] * n_nodes
l_iterations_number = [iterations_number] * n_nodes
l_patience = [patience] * n_nodes
l_tabu_length = [tabu_length] * n_nodes
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
l_optimizer = [optimizer] * n_nodes
'get the number of CPU'
cpu_count = multiprocessing.cpu_count()
print(f"CPU COUNT: {cpu_count}")
if disable_multiprocessing:
cpu_count = 1
#with get_context("spawn").Pool(processes=cpu_count) as pool:
#with multiprocessing.Pool(processes=cpu_count) as pool:
'Estimate the best parents for each node'
if disable_multiprocessing:
list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self._nodes]
else:
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_count) as executor:
list_edges_partial = executor.map(estimate_parents,
self._nodes,
l_max_parents,
l_iterations_number,
l_patience,
l_tabu_length,
l_tabu_rules_duration,
l_optimizer)
#list_edges_partial = p.map(estimate_parents, self._nodes)
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
'Concatenate all the edges list'
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
#print('-------------------------')
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
try:
n_added_fake_edges = len(set_list_edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(set_list_edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
# print(f"n archi reali non trovati: {n_missing_edges}")
# print(f"n archi non reali aggiunti: {n_added_fake_edges}")
print(true_edges)
print(set_list_edges)
print(f"precision: {precision} ")
print(f"recall: {recall} ")
except Exception as e:
print(f"errore: {e}")
return set_list_edges
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5,
optimizer:str = 'hill' ):
"""
Use the FamScore of a node in order to find the best parent nodes
:param node_id: current node's id
:type node_id: string
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
:param optimizer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search), defualt to 'tabu'
:type optimizer: string, optional
:return: A list of the best edges for the currente node
:rtype: List
"""
"choose the optimizer algotithm"
if optimizer == 'tabu':
optimizer = TabuSearch(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience,
tabu_length = tabu_length,
tabu_rules_duration = tabu_rules_duration)
else: #if optimizer == 'hill':
optimizer = HillClimbing(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience)
"call the optmizer's function that calculates the current node's parents"
return optimizer.optimize_structure()
def get_score_from_graph(self,
graph: NetworkGraph,
node_id:str):
"""
Get the FamScore of a node
:param node_id: current node's id
:type node_id: string
:param graph: current graph to be computed
:type graph: class:'NetworkGraph'
:return: The FamSCore for this graph structure
:rtype: float
"""
'inizialize the graph for a single node'
graph.fast_init(node_id)
params_estimation = ParametersEstimator(self._sample_path.trajectories, graph)
'Inizialize and compute parameters for node'
params_estimation.fast_init(node_id)
SoCims = params_estimation.compute_parameters_for_node(node_id)
'calculate the FamScore for the node'
fam_score_obj = FamScoreCalculator()
score = fam_score_obj.get_fam_score(SoCims.actual_cims,tau_xu = self.tau_xu,alpha_xu=self.alpha_xu)
#print(f" lo score per {node_id} risulta: {score} ")
return score

@ -0,0 +1,4 @@
from .optimizer import Optimizer
from .tabu_search import TabuSearch
from .hill_climbing_search import HillClimbing
from .constraint_based_optimizer import ConstraintBasedOptimizer

@ -0,0 +1,87 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice
from abc import ABC
import copy
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
class ConstraintBasedOptimizer(Optimizer):
"""
Optimizer class that implement a CTPC Algorithm
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param tot_vars_count: number of variables in the dataset
:type tot_vars_count: int
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
tot_vars_count:int
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.tot_vars_count = tot_vars_count
def optimize_structure(self):
"""
Compute Optimization process for a structure_estimator by using a CTPC Algorithm
:return: the estimated structure for the node
:rtype: List
"""
print("##################TESTING VAR################", self.node_id)
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id]
for possible_parent in other_nodes:
graph.add_edges([(possible_parent,self.node_id)])
u = other_nodes
#tests_parents_numb = len(u)
#complete_frame = self.complete_graph_frame
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
b = 0
while b < len(u):
parent_indx = 0
while parent_indx < len(u):
removed = False
test_parent = u[parent_indx]
i = self.structure_estimator._sample_path.structure.get_node_indx(test_parent)
j = self.structure_estimator._sample_path.structure.get_node_indx(self.node_id)
if self.structure_estimator._removable_edges_matrix[i][j]:
S = StructureEstimator.generate_possible_sub_sets_of_size(u, b, test_parent)
for parents_set in S:
if self.structure_estimator.complete_test(test_parent, self.node_id, parents_set, child_states_numb, self.tot_vars_count,i,j):
graph.remove_edges([(test_parent, self.node_id)])
u.remove(test_parent)
removed = True
break
if not removed:
parent_indx += 1
b += 1
self.structure_estimator._cache.clear()
return graph.edges

@ -0,0 +1,135 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice
from abc import ABC
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
class HillClimbing(Optimizer):
"""
Optimizer class that implement Hill Climbing Search
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
:return: the estimated structure for the node
:rtype: List
"""
#'Create the graph for the single node'
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = [node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)]
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
patince_count = 0
for i in range(self.iterations_number):
'choose a new random edge'
current_new_parent = choice(other_nodes)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,39 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
import abc
from ..estimators.structure_estimator import StructureEstimator
class Optimizer(abc.ABC):
"""
Interface class for all the optimizer's child PyCTBN
:param node_id: the node label
:type node_id: string
:param structure_estimator: A structureEstimator Object to predict the structure
:type structure_estimator: class:'StructureEstimator'
"""
def __init__(self, node_id:str, structure_estimator: StructureEstimator):
self.node_id = node_id
self.structure_estimator = structure_estimator
@abc.abstractmethod
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator
:return: the estimated structure for the node
:rtype: List
"""
pass

@ -0,0 +1,199 @@
import itertools
import json
import typing
import networkx as nx
import numpy as np
from random import choice,sample
from abc import ABC
from .optimizer import Optimizer
from ..estimators.structure_estimator import StructureEstimator
from ..structure_graph.network_graph import NetworkGraph
import queue
class TabuSearch(Optimizer):
"""
Optimizer class that implement Tabu Search
:param node_id: current node's id
:type node_id: string
:param structure_estimator: a structure estimator object with the information about the net
:type structure_estimator: class:'StructureEstimator'
:param max_parents: maximum number of parents for each variable. If None, disabled, default to None
:type max_parents: int, optional
:param iterations_number: maximum number of optimization algorithm's iteration, default to 40
:type iterations_number: int, optional
:param patience: number of iteration without any improvement before to stop the search.If None, disabled, default to None
:type patience: int, optional
:param tabu_length: maximum lenght of the data structures used in the optimization process, default to None
:type tabu_length: int, optional
:param tabu_rules_duration: number of iterations in which each rule keeps its value, default to None
:type tabu_rules_duration: int, optional
"""
def __init__(self,
node_id:str,
structure_estimator: StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None,
tabu_length:int = None,
tabu_rules_duration = None
):
"""
Constructor
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
self.tabu_length = tabu_length
self.tabu_rules_duration = tabu_rules_duration
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator by using a Hill Climbing Algorithm
:return: the estimated structure for the node
:rtype: List
"""
print(f"tabu search is processing the structure of {self.node_id}")
'Create the graph for the single node'
graph = NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = set([node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)])
'calculate the score for the node without parents'
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
'initialize tabu_length and tabu_rules_duration if None'
if self.tabu_length is None:
self.tabu_length = len(other_nodes)
if self.tabu_rules_duration is None:
self.tabu_tabu_rules_durationength = len(other_nodes)
'inizialize the data structures'
tabu_set = set()
tabu_queue = queue.Queue()
patince_count = 0
tabu_count = 0
for i in range(self.iterations_number):
current_possible_nodes = other_nodes.difference(tabu_set)
'choose a new random edge according to tabu restiction'
if(len(current_possible_nodes) > 0):
current_new_parent = sample(current_possible_nodes,k=1)[0]
else:
current_new_parent = tabu_queue.get()
tabu_set.remove(current_new_parent)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
# print("-------------------------------------------")
# print(f"Current new parent: {current_new_parent}")
# print(f"Current score: {current_score}")
# print(f"Current best score: {actual_best_score}")
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
# print(f"graph edges: {graph.edges}")
# print("-------------------------------------------")
# input()
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
'update tabu list'
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if tabu_queue.qsize() >= self.tabu_length:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
'Add the node on the tabu list'
tabu_queue.put(current_new_parent)
tabu_set.add(current_new_parent)
tabu_count += 1
'Every tabu_rules_duration step remove an item from the tabu list '
if tabu_count % self.tabu_rules_duration == 0:
if tabu_queue.qsize() > 0:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
tabu_count = 0
else:
tabu_count = 0
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,6 @@
from .conditional_intensity_matrix import ConditionalIntensityMatrix
from .network_graph import NetworkGraph
from .sample_path import SamplePath
from .set_of_cims import SetOfCims
from .structure import Structure
from .trajectory import Trajectory

@ -0,0 +1,42 @@
import numpy as np
class ConditionalIntensityMatrix(object):
"""Abstracts the Conditional Intesity matrix of a node as aggregation of the state residence times vector
and state transition matrix and the actual CIM matrix.
:param state_residence_times: state residence times vector
:type state_residence_times: numpy.array
:param state_transition_matrix: the transitions count matrix
:type state_transition_matrix: numpy.ndArray
:_cim: the actual cim of the node
"""
def __init__(self, state_residence_times: np.array, state_transition_matrix: np.array):
"""Constructor Method
"""
self._state_residence_times = state_residence_times
self._state_transition_matrix = state_transition_matrix
self._cim = self.state_transition_matrix.astype(np.float64)
def compute_cim_coefficients(self) -> None:
"""Compute the coefficients of the matrix _cim by using the following equality q_xx' = M[x, x'] / T[x].
The class member ``_cim`` will contain the computed cim
"""
np.fill_diagonal(self._cim, self._cim.diagonal() * -1)
self._cim = ((self._cim.T + 1) / (self._state_residence_times + 1)).T
@property
def state_residence_times(self) -> np.ndarray:
return self._state_residence_times
@property
def state_transition_matrix(self) -> np.ndarray:
return self._state_transition_matrix
@property
def cim(self) -> np.ndarray:
return self._cim
def __repr__(self):
return 'CIM:\n' + str(self.cim)

@ -0,0 +1,293 @@
import typing
import networkx as nx
import numpy as np
from .structure import Structure
class NetworkGraph(object):
"""Abstracts the infos contained in the Structure class in the form of a directed graph.
Has the task of creating all the necessary filtering and indexing structures for parameters estimation
:param graph_struct: the ``Structure`` object from which infos about the net will be extracted
:type graph_struct: Structure
:_graph: directed graph
:_aggregated_info_about_nodes_parents: a structure that contains all the necessary infos
about every parents of the node of which all the indexing and filtering structures will be constructed.
:_time_scalar_indexing_structure: the indexing structure for state res time estimation
:_transition_scalar_indexing_structure: the indexing structure for transition computation
:_time_filtering: the columns filtering structure used in the computation of the state res times
:_transition_filtering: the columns filtering structure used in the computation of the transition
from one state to another
:_p_combs_structure: all the possible parents states combination for the node of interest
"""
def __init__(self, graph_struct: Structure):
"""Constructor Method
"""
self._graph_struct = graph_struct
self._graph = nx.DiGraph()
self._aggregated_info_about_nodes_parents = None
self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = None
self._time_filtering = None
self._transition_filtering = None
self._p_combs_structure = None
def init_graph(self):
self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_scalar_indexing_structures()
self.build_time_columns_filtering_structure()
self.build_transition_columns_filtering_structure()
self._p_combs_structure = self.build_p_combs_structure()
def fast_init(self, node_id: str) -> None:
"""Initializes all the necessary structures for parameters estimation of the node identified by the label
node_id
:param node_id: the label of the node
:type node_id: string
"""
self.add_nodes(self._graph_struct.nodes_labels)
self.add_edges(self._graph_struct.edges)
self._aggregated_info_about_nodes_parents = self.get_ordered_by_indx_set_of_parents(node_id)
p_indxs = self._aggregated_info_about_nodes_parents[1]
p_vals = self._aggregated_info_about_nodes_parents[2]
node_states = self.get_states_number(node_id)
node_indx = self.get_node_indx(node_id)
cols_number = self._graph_struct.total_variables_number
self._time_scalar_indexing_structure = NetworkGraph.\
build_time_scalar_indexing_structure_for_a_node(node_states, p_vals)
self._transition_scalar_indexing_structure = NetworkGraph.\
build_transition_scalar_indexing_structure_for_a_node(node_states, p_vals)
self._time_filtering = NetworkGraph.build_time_columns_filtering_for_a_node(node_indx, p_indxs)
self._transition_filtering = NetworkGraph.build_transition_filtering_for_a_node(node_indx, p_indxs, cols_number)
self._p_combs_structure = NetworkGraph.build_p_comb_structure_for_a_node(p_vals)
def add_nodes(self, list_of_nodes: typing.List) -> None:
"""Adds the nodes to the ``_graph`` contained in the list of nodes ``list_of_nodes``.
Sets all the properties that identify a nodes (index, positional index, cardinality)
:param list_of_nodes: the nodes to add to ``_graph``
:type list_of_nodes: List
"""
nodes_indxs = self._graph_struct.nodes_indexes
nodes_vals = self._graph_struct.nodes_values
pos = 0
for id, node_indx, node_val in zip(list_of_nodes, nodes_indxs, nodes_vals):
self._graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos)
pos += 1
def has_edge(self,edge:tuple)-> bool:
"""
Check if the graph contains a specific edge
Parameters:
edge: a tuple that rappresents the edge
Returns:
bool
"""
return self._graph.has_edge(edge[0],edge[1])
def add_edges(self, list_of_edges: typing.List) -> None:
"""Add the edges to the ``_graph`` contained in the list ``list_of_edges``.
:param list_of_edges: the list containing of tuples containing the edges
:type list_of_edges: List
"""
self._graph.add_edges_from(list_of_edges)
def remove_node(self, node_id: str) -> None:
"""Remove the node ``node_id`` from all the class members.
Initialize all the filtering/indexing structures.
"""
self._graph.remove_node(node_id)
self._graph_struct.remove_node(node_id)
self.clear_indexing_filtering_structures()
def clear_indexing_filtering_structures(self) -> None:
"""Initialize all the filtering/indexing structures.
"""
self._aggregated_info_about_nodes_parents = None
self._time_scalar_indexing_structure = None
self._transition_scalar_indexing_structure = None
self._time_filtering = None
self._transition_filtering = None
self._p_combs_structure = None
def get_ordered_by_indx_set_of_parents(self, node: str) -> typing.Tuple:
"""Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
(parents_labels, parents_indexes, parents_cardinalities).
:param node: the label of the node
:type node: string
:return: a tuple containing all the parent set infos
:rtype: Tuple
"""
parents = self.get_parents_by_id(node)
nodes = self._graph_struct.nodes_labels
d = {v: i for i, v in enumerate(nodes)}
sorted_parents = sorted(parents, key=lambda v: d[v])
get_node_indx = self.get_node_indx
p_indxes = [get_node_indx(node) for node in sorted_parents]
p_values = [self.get_states_number(node) for node in sorted_parents]
return sorted_parents, p_indxes, p_values
def remove_edges(self, list_of_edges: typing.List) -> None:
"""Remove the edges to the graph contained in the list list_of_edges.
:param list_of_edges: The edges to remove from the graph
:type list_of_edges: List
"""
self._graph.remove_edges_from(list_of_edges)
@staticmethod
def build_time_scalar_indexing_structure_for_a_node(node_states: int,
parents_vals: typing.List) -> np.ndarray:
"""Builds an indexing structure for the computation of state residence times values.
:param node_states: the node cardinality
:type node_states: int
:param parents_vals: the caridinalites of the node's parents
:type parents_vals: List
:return: The time indexing structure
:rtype: numpy.ndArray
"""
T_vector = np.array([node_states])
T_vector = np.append(T_vector, parents_vals)
T_vector = T_vector.cumprod().astype(np.int)
return T_vector
@staticmethod
def build_transition_scalar_indexing_structure_for_a_node(node_states_number: int, parents_vals: typing.List) \
-> np.ndarray:
"""Builds an indexing structure for the computation of state transitions values.
:param node_states_number: the node cardinality
:type node_states_number: int
:param parents_vals: the caridinalites of the node's parents
:type parents_vals: List
:return: The transition indexing structure
:rtype: numpy.ndArray
"""
M_vector = np.array([node_states_number,
node_states_number])
M_vector = np.append(M_vector, parents_vals)
M_vector = M_vector.cumprod().astype(np.int)
return M_vector
@staticmethod
def build_time_columns_filtering_for_a_node(node_indx: int, p_indxs: typing.List) -> np.ndarray:
"""
Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
in the dataset.
This structute will be used in the computation of the state res times.
:param node_indx: the index of the node
:type node_indx: int
:param p_indxs: the indexes of the node's parents
:type p_indxs: List
:return: The filtering structure for times estimation
:rtype: numpy.ndArray
"""
return np.append(np.array([node_indx], dtype=np.int), p_indxs).astype(np.int)
@staticmethod
def build_transition_filtering_for_a_node(node_indx: int, p_indxs: typing.List, nodes_number: int) \
-> np.ndarray:
"""Builds the necessary structure to filter the desired columns indicated by ``node_indx`` and ``p_indxs``
in the dataset.
This structure will be used in the computation of the state transitions values.
:param node_indx: the index of the node
:type node_indx: int
:param p_indxs: the indexes of the node's parents
:type p_indxs: List
:param nodes_number: the total number of nodes in the dataset
:type nodes_number: int
:return: The filtering structure for transitions estimation
:rtype: numpy.ndArray
"""
return np.array([node_indx + nodes_number, node_indx, *p_indxs], dtype=np.int)
@staticmethod
def build_p_comb_structure_for_a_node(parents_values: typing.List) -> np.ndarray:
"""
Builds the combinatorial structure that contains the combinations of all the values contained in
``parents_values``.
:param parents_values: the cardinalities of the nodes
:type parents_values: List
:return: A numpy matrix containing a grid of the combinations
:rtype: numpy.ndArray
"""
tmp = []
for val in parents_values:
tmp.append([x for x in range(val)])
if len(parents_values) > 0:
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
if len(parents_values) > 1:
tmp_comb = parents_comb[:, 1].copy()
parents_comb[:, 1] = parents_comb[:, 0].copy()
parents_comb[:, 0] = tmp_comb
else:
parents_comb = np.array([[]], dtype=np.int)
return parents_comb
def get_parents_by_id(self, node_id) -> typing.List:
"""Returns a list of labels of the parents of the node ``node_id``
:param node_id: the node label
:type node_id: string
:return: a List of labels of the parents
:rtype: List
"""
return list(self._graph.predecessors(node_id))
def get_states_number(self, node_id) -> int:
return self._graph.nodes[node_id]['val']
def get_node_indx(self, node_id) -> int:
return nx.get_node_attributes(self._graph, 'indx')[node_id]
def get_positional_node_indx(self, node_id) -> int:
return self._graph.nodes[node_id]['pos_indx']
@property
def nodes(self) -> typing.List:
return self._graph_struct.nodes_labels
@property
def edges(self) -> typing.List:
return list(self._graph.edges)
@property
def nodes_indexes(self) -> np.ndarray:
return self._graph_struct.nodes_indexes
@property
def nodes_values(self) -> np.ndarray:
return self._graph_struct.nodes_values
@property
def time_scalar_indexing_strucure(self) -> np.ndarray:
return self._time_scalar_indexing_structure
@property
def time_filtering(self) -> np.ndarray:
return self._time_filtering
@property
def transition_scalar_indexing_structure(self) -> np.ndarray:
return self._transition_scalar_indexing_structure
@property
def transition_filtering(self) -> np.ndarray:
return self._transition_filtering
@property
def p_combs(self) -> np.ndarray:
return self._p_combs_structure

@ -0,0 +1,91 @@
import numpy as np
import pandas as pd
from .structure import Structure
from .trajectory import Trajectory
from ..utility.abstract_importer import AbstractImporter
class SamplePath(object):
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
contain the mentioned data.
:param importer: the Importer object which contains the imported and processed data
:type importer: AbstractImporter
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
:_structure: the ``Structure`` Object that will contain all the structural infos about the net
:_total_variables_count: the number of variables in the net
"""
def __init__(self, importer: AbstractImporter):
"""Constructor Method
"""
self._importer = importer
if self._importer._df_variables is None or self._importer._concatenated_samples is None:
raise RuntimeError('The importer object has to contain the all processed data!')
if self._importer._df_variables.empty:
raise RuntimeError('The importer object has to contain the all processed data!')
if isinstance(self._importer._concatenated_samples, pd.DataFrame):
if self._importer._concatenated_samples.empty:
raise RuntimeError('The importer object has to contain the all processed data!')
if isinstance(self._importer._concatenated_samples, np.ndarray):
if self._importer._concatenated_samples.size == 0:
raise RuntimeError('The importer object has to contain the all processed data!')
self._trajectories = None
self._structure = None
self._total_variables_count = None
def build_trajectories(self) -> None:
"""Builds the Trajectory object that will contain all the trajectories.
Clears all the unused dataframes in ``_importer`` Object
"""
self._trajectories = \
Trajectory(self._importer.build_list_of_samples_array(self._importer.concatenated_samples),
len(self._importer.sorter) + 1)
self._importer.clear_concatenated_frame()
def build_structure(self) -> None:
"""
Builds the ``Structure`` object that aggregates all the infos about the net.
"""
if self._importer.sorter != self._importer.variables.iloc[:, 0].to_list():
raise RuntimeError("The Dataset columns order have to match the order of labels in the variables Frame!")
self._total_variables_count = len(self._importer.sorter)
labels = self._importer.variables.iloc[:, 0].to_list()
indxs = self._importer.variables.index.to_numpy()
vals = self._importer.variables.iloc[:, 1].to_numpy()
if self._importer.structure is None or self._importer.structure.empty:
edges = []
else:
edges = list(self._importer.structure.to_records(index=False))
self._structure = Structure(labels, indxs, vals, edges,
self._total_variables_count)
def clear_memory(self):
self._importer._raw_data = []
@property
def trajectories(self) -> Trajectory:
return self._trajectories
@property
def structure(self) -> Structure:
return self._structure
@property
def total_variables_count(self) -> int:
return self._total_variables_count
@property
def has_prior_net_structure(self) -> bool:
return bool(self._structure.edges)

@ -0,0 +1,97 @@
import typing
import numpy as np
from .conditional_intensity_matrix import ConditionalIntensityMatrix
class SetOfCims(object):
"""Aggregates all the CIMS of the node identified by the label _node_id.
:param node_id: the node label
:type node_ind: string
:param parents_states_number: the cardinalities of the parents
:type parents_states_number: List
:param node_states_number: the caridinality of the node
:type node_states_number: int
:param p_combs: the p_comb structure bound to this node
:type p_combs: numpy.ndArray
:_state_residence_time: matrix containing all the state residence time vectors for the node
:_transition_matrices: matrix containing all the transition matrices for the node
:_actual_cims: the cims of the node
"""
def __init__(self, node_id: str, parents_states_number: typing.List, node_states_number: int, p_combs: np.ndarray):
"""Constructor Method
"""
self._node_id = node_id
self._parents_states_number = parents_states_number
self._node_states_number = node_states_number
self._actual_cims = []
self._state_residence_times = None
self._transition_matrices = None
self._p_combs = p_combs
self.build_times_and_transitions_structures()
def build_times_and_transitions_structures(self) -> None:
"""Initializes at the correct dimensions the state residence times matrix and the state transition matrices.
"""
if not self._parents_states_number:
self._state_residence_times = np.zeros((1, self._node_states_number), dtype=np.float)
self._transition_matrices = np.zeros((1, self._node_states_number, self._node_states_number), dtype=np.int)
else:
self._state_residence_times = \
np.zeros((np.prod(self._parents_states_number), self._node_states_number), dtype=np.float)
self._transition_matrices = np.zeros([np.prod(self._parents_states_number), self._node_states_number,
self._node_states_number], dtype=np.int)
def build_cims(self, state_res_times: np.ndarray, transition_matrices: np.ndarray) -> None:
"""Build the ``ConditionalIntensityMatrix`` objects given the state residence times and transitions matrices.
Compute the cim coefficients.The class member ``_actual_cims`` will contain the computed cims.
:param state_res_times: the state residence times matrix
:type state_res_times: numpy.ndArray
:param transition_matrices: the transition matrices
:type transition_matrices: numpy.ndArray
"""
for state_res_time_vector, transition_matrix in zip(state_res_times, transition_matrices):
cim_to_add = ConditionalIntensityMatrix(state_res_time_vector, transition_matrix)
cim_to_add.compute_cim_coefficients()
self._actual_cims.append(cim_to_add)
self._actual_cims = np.array(self._actual_cims)
self._transition_matrices = None
self._state_residence_times = None
def filter_cims_with_mask(self, mask_arr: np.ndarray, comb: typing.List) -> np.ndarray:
"""Filter the cims contained in the array ``_actual_cims`` given the boolean mask ``mask_arr`` and the index
``comb``.
:param mask_arr: the boolean mask that indicates which parent to consider
:type mask_arr: numpy.array
:param comb: the state/s of the filtered parents
:type comb: numpy.array
:return: Array of ``ConditionalIntensityMatrix`` objects
:rtype: numpy.array
"""
if mask_arr.size <= 1:
return self._actual_cims
else:
flat_indxs = np.argwhere(np.all(self._p_combs[:, mask_arr] == comb, axis=1)).ravel()
return self._actual_cims[flat_indxs]
@property
def actual_cims(self) -> np.ndarray:
return self._actual_cims
@property
def p_combs(self) -> np.ndarray:
return self._p_combs
def get_cims_number(self):
return len(self._actual_cims)

@ -0,0 +1,124 @@
import typing as ty
import numpy as np
class Structure(object):
"""Contains all the infos about the network structure(nodes labels, nodes caridinalites, edges, indexes)
:param nodes_labels_list: the symbolic names of the variables
:type nodes_labels_list: List
:param nodes_indexes_arr: the indexes of the nodes
:type nodes_indexes_arr: numpy.ndArray
:param nodes_vals_arr: the cardinalites of the nodes
:type nodes_vals_arr: numpy.ndArray
:param edges_list: the edges of the network
:type edges_list: List
:param total_variables_number: the total number of variables in the dataset
:type total_variables_number: int
"""
def __init__(self, nodes_labels_list: ty.List, nodes_indexes_arr: np.ndarray, nodes_vals_arr: np.ndarray,
edges_list: ty.List, total_variables_number: int):
"""Constructor Method
"""
self._nodes_labels_list = nodes_labels_list
self._nodes_indexes_arr = nodes_indexes_arr
self._nodes_vals_arr = nodes_vals_arr
self._edges_list = edges_list
self._total_variables_number = total_variables_number
def remove_node(self, node_id: str) -> None:
"""Remove the node ``node_id`` from all the class members.
The class member ``_total_variables_number`` since it refers to the total number of variables in the dataset.
"""
node_positional_indx = self._nodes_labels_list.index(node_id)
del self._nodes_labels_list[node_positional_indx]
self._nodes_indexes_arr = np.delete(self._nodes_indexes_arr, node_positional_indx)
self._nodes_vals_arr = np.delete(self._nodes_vals_arr, node_positional_indx)
self._edges_list = [(from_node, to_node) for (from_node, to_node) in self._edges_list if (from_node != node_id
and to_node != node_id)]
@property
def edges(self) -> ty.List:
return self._edges_list
@property
def nodes_labels(self) -> ty.List:
return self._nodes_labels_list
@property
def nodes_indexes(self) -> np.ndarray:
return self._nodes_indexes_arr
@property
def nodes_values(self) -> np.ndarray:
return self._nodes_vals_arr
@property
def total_variables_number(self) -> int:
return self._total_variables_number
def get_node_id(self, node_indx: int) -> str:
"""Given the ``node_index`` returns the node label.
:param node_indx: the node index
:type node_indx: int
:return: the node label
:rtype: string
"""
return self._nodes_labels_list[node_indx]
def clean_structure_edges(self):
self._edges_list = list()
def add_edge(self,edge: tuple):
self._edges_list.append(tuple)
print(self._edges_list)
def remove_edge(self,edge: tuple):
self._edges_list.remove(tuple)
def contains_edge(self,edge:tuple) -> bool:
return edge in self._edges_list
def get_node_indx(self, node_id: str) -> int:
"""Given the ``node_index`` returns the node label.
:param node_id: the node label
:type node_id: string
:return: the node index
:rtype: int
"""
pos_indx = self._nodes_labels_list.index(node_id)
return self._nodes_indexes_arr[pos_indx]
def get_positional_node_indx(self, node_id: str) -> int:
return self._nodes_labels_list.index(node_id)
def get_states_number(self, node: str) -> int:
"""Given the node label ``node`` returns the cardinality of the node.
:param node: the node label
:type node: string
:return: the node cardinality
:rtype: int
"""
pos_indx = self._nodes_labels_list.index(node)
return self._nodes_vals_arr[pos_indx]
def __repr__(self):
return "Variables:\n" + str(self._nodes_labels_list) +"\nValues:\n"+ str(self._nodes_vals_arr) +\
"\nEdges: \n" + str(self._edges_list)
def __eq__(self, other):
"""Overrides the default implementation"""
if isinstance(other, Structure):
return set(self._nodes_labels_list) == set(other._nodes_labels_list) and \
np.array_equal(self._nodes_vals_arr, other._nodes_vals_arr) and \
np.array_equal(self._nodes_indexes_arr, other._nodes_indexes_arr) and \
self._edges_list == other._edges_list
return False

@ -0,0 +1,45 @@
import typing
import numpy as np
class Trajectory(object):
""" Abstracts the infos about a complete set of trajectories, represented as a numpy array of doubles
(the time deltas) and a numpy matrix of ints (the changes of states).
:param list_of_columns: the list containing the times array and values matrix
:type list_of_columns: List
:param original_cols_number: total number of cols in the data
:type original_cols_number: int
:_actual_trajectory: the trajectory containing also the duplicated/shifted values
:_times: the array containing the time deltas
"""
def __init__(self, list_of_columns: typing.List, original_cols_number: int):
"""Constructor Method
"""
self._times = list_of_columns[0]
self._actual_trajectory = list_of_columns[1]
self._original_cols_number = original_cols_number
@property
def trajectory(self) -> np.ndarray:
return self._actual_trajectory[:, :self._original_cols_number - 1]
@property
def complete_trajectory(self) -> np.ndarray:
return self._actual_trajectory
@property
def times(self):
return self._times
def size(self):
return self._actual_trajectory.shape[0]
def __repr__(self):
return "Complete Trajectory Rows: " + str(self.size()) + "\n" + self.complete_trajectory.__repr__() + \
"\nTimes Rows:" + str(self.times.size) + "\n" + self.times.__repr__()

@ -0,0 +1,4 @@
from .abstract_importer import AbstractImporter
from .cache import Cache
from .json_importer import JsonImporter
from .sample_importer import SampleImporter

@ -0,0 +1,164 @@
import typing
from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
import copy
#from sklearn.utils import resample
class AbstractImporter(ABC):
"""Abstract class that exposes all the necessary methods to process the trajectories and the net structure.
:param file_path: the file path, or dataset name if you import already processed data
:type file_path: str
:param trajectory_list: Dataframe or numpy array containing the concatenation of all the processed trajectories
:type trajectory_list: typing.Union[pandas.DataFrame, numpy.ndarray]
:param variables: Dataframe containing the nodes labels and cardinalities
:type variables: pandas.DataFrame
:prior_net_structure: Dataframe containing the structure of the network (edges)
:type prior_net_structure: pandas.DataFrame
:_sorter: A list containing the variables labels in the SAME order as the columns in ``concatenated_samples``
.. warning::
The parameters ``variables`` and ``prior_net_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
See the tutorial on how to construct a correct ``concatenated_samples`` Dataframe/ndarray.
.. note::
See :class:``JsonImporter`` for an example implementation
"""
def __init__(self, file_path: str = None, trajectory_list: typing.Union[pd.DataFrame, np.ndarray] = None,
variables: pd.DataFrame = None, prior_net_structure: pd.DataFrame = None):
"""Constructor
"""
self._file_path = file_path
self._df_samples_list = trajectory_list
self._concatenated_samples = []
self._df_variables = variables
self._df_structure = prior_net_structure
self._sorter = None
super().__init__()
@abstractmethod
def build_sorter(self, trajecory_header: object) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
and keeping ONLY the variables symbolic labels, cutting out the time label in the header.
:param trajecory_header: an object that will be used to define the header
:type trajecory_header: object
:return: A list containing the processed header.
:rtype: List
"""
pass
def compute_row_delta_sigle_samples_frame(self, sample_frame: pd.DataFrame,
columns_header: typing.List, shifted_cols_header: typing.List) \
-> pd.DataFrame:
"""Computes the difference between each value present in th time column.
Copies and shift by one position up all the values present in the remaining columns.
:param sample_frame: the traj to be processed
:type sample_frame: pandas.Dataframe
:param columns_header: the original header of sample_frame
:type columns_header: List
:param shifted_cols_header: a copy of columns_header with changed names of the contents
:type shifted_cols_header: List
:return: The processed dataframe
:rtype: pandas.Dataframe
.. warning::
the Dataframe ``sample_frame`` has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
"""
sample_frame = copy.deepcopy(sample_frame)
sample_frame.iloc[:, 0] = sample_frame.iloc[:, 0].diff().shift(-1)
shifted_cols = sample_frame[columns_header].shift(-1).fillna(0).astype('int32')
shifted_cols.columns = shifted_cols_header
sample_frame = sample_frame.assign(**shifted_cols)
sample_frame.drop(sample_frame.tail(1).index, inplace=True)
return sample_frame
def compute_row_delta_in_all_samples_frames(self, df_samples_list: typing.List) -> None:
"""Calls the method ``compute_row_delta_sigle_samples_frame`` on every dataframe present in the list
``df_samples_list``.
Concatenates the result in the dataframe ``concatanated_samples``
:param df_samples_list: the datframe's list to be processed and concatenated
:type df_samples_list: List
.. warning::
The Dataframe sample_frame has to follow the column structure of this header:
Header of sample_frame = [Time | Variable values]
The class member self._sorter HAS to be properly INITIALIZED (See class members definition doc)
.. note::
After the call of this method the class member ``concatanated_samples`` will contain all processed
and merged trajectories
"""
if not self._sorter:
raise RuntimeError("The class member self._sorter has to be INITIALIZED!")
shifted_cols_header = [s + "S" for s in self._sorter]
compute_row_delta = self.compute_row_delta_sigle_samples_frame
proc_samples_list = [compute_row_delta(sample, self._sorter, shifted_cols_header)
for sample in df_samples_list]
self._concatenated_samples = pd.concat(proc_samples_list)
complete_header = self._sorter[:]
complete_header.insert(0,'Time')
complete_header.extend(shifted_cols_header)
self._concatenated_samples = self._concatenated_samples[complete_header]
def build_list_of_samples_array(self, concatenated_sample: pd.DataFrame) -> typing.List:
"""Builds a List containing the the delta times numpy array, and the complete transitions matrix
:param concatenated_sample: the dataframe/array from which the time, and transitions matrix have to be extracted
and converted
:type concatenated_sample: pandas.Dataframe
:return: the resulting list of numpy arrays
:rtype: List
"""
concatenated_array = concatenated_sample.to_numpy()
columns_list = [concatenated_array[:, 0], concatenated_array[:, 1:].astype(int)]
return columns_list
def clear_concatenated_frame(self) -> None:
"""Removes all values in the dataframe concatenated_samples.
"""
if isinstance(self._concatenated_samples, pd.DataFrame):
self._concatenated_samples = self._concatenated_samples.iloc[0:0]
@abstractmethod
def dataset_id(self) -> object:
"""If the original dataset contains multiple dataset, this method returns a unique id to identify the current
dataset
"""
pass
@property
def concatenated_samples(self) -> pd.DataFrame:
return self._concatenated_samples
@property
def variables(self) -> pd.DataFrame:
return self._df_variables
@property
def structure(self) -> pd.DataFrame:
return self._df_structure
@property
def sorter(self) -> typing.List:
return self._sorter
@property
def file_path(self) -> str:
return self._file_path

@ -0,0 +1,58 @@
import typing
from ..structure_graph.set_of_cims import SetOfCims
class Cache:
"""This class acts as a cache of ``SetOfCims`` objects for a node.
:__list_of_sets_of_parents: a list of ``Sets`` objects of the parents to which the cim in cache at SAME
index is related
:__actual_cache: a list of setOfCims objects
"""
def __init__(self):
"""Constructor Method
"""
self._list_of_sets_of_parents = []
self._actual_cache = []
def find(self, parents_comb: typing.Set): #typing.Union[typing.Set, str]
"""
Tries to find in cache given the symbolic parents combination ``parents_comb`` the ``SetOfCims``
related to that ``parents_comb``.
:param parents_comb: the parents related to that ``SetOfCims``
:type parents_comb: Set
:return: A ``SetOfCims`` object if the ``parents_comb`` index is found in ``__list_of_sets_of_parents``.
None otherwise.
:rtype: SetOfCims
"""
try:
#print("Cache State:", self.list_of_sets_of_indxs)
#print("Look For:", parents_comb)
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)]
#print("CACHE HIT!!!!", parents_comb)
return result
except ValueError:
return None
def put(self, parents_comb: typing.Set, socim: SetOfCims):
"""Place in cache the ``SetOfCims`` object, and the related symbolic index ``parents_comb`` in
``__list_of_sets_of_parents``.
:param parents_comb: the symbolic set index
:type parents_comb: Set
:param socim: the related SetOfCims object
:type socim: SetOfCims
"""
#print("Putting in cache:", parents_comb)
self._list_of_sets_of_parents.append(parents_comb)
self._actual_cache.append(socim)
def clear(self):
"""Clear the contents both of ``__actual_cache`` and ``__list_of_sets_of_parents``.
"""
del self._list_of_sets_of_parents[:]
del self._actual_cache[:]

@ -0,0 +1,176 @@
import json
import typing
import pandas as pd
from .abstract_importer import AbstractImporter
class JsonImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data in json extension.
:param file_path: the path of the file that contains tha data to be imported
:type file_path: string
:param samples_label: the reference key for the samples in the trajectories
:type samples_label: string
:param structure_label: the reference key for the structure of the network data
:type structure_label: string
:param variables_label: the reference key for the cardinalites of the nodes data
:type variables_label: string
:param time_key: the key used to identify the timestamps in each trajectory
:type time_key: string
:param variables_key: the key used to identify the names of the variables in the net
:type variables_key: string
:_array_indx: the index of the outer JsonArray to extract the data from
:type _array_indx: int
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str):
"""Constructor method
.. note::
This constructor calls also the method ``read_json_file()``, so after the construction of the object
the class member ``_raw_data`` will contain the raw imported json data.
"""
self._samples_label = samples_label
self._structure_label = structure_label
self._variables_label = variables_label
self._time_key = time_key
self._variables_key = variables_key
self._df_samples_list = None
self._array_indx = None
super(JsonImporter, self).__init__(file_path)
self._raw_data = self.read_json_file()
def import_data(self, indx: int) -> None:
"""Implements the abstract method of :class:`AbstractImporter`.
:param indx: the index of the outer JsonArray to extract the data from
:type indx: int
"""
self._array_indx = indx
self._df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self._df_samples_list[0])
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
self.clear_data_frame_list()
self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(self._raw_data)
def import_trajectories(self, raw_data: typing.List) -> typing.List:
"""Imports the trajectories from the list of dicts ``raw_data``.
:param raw_data: List of Dicts
:type raw_data: List
:return: List of dataframes containing all the trajectories
:rtype: List
"""
return self.normalize_trajectories(raw_data, self._array_indx, self._samples_label)
def import_structure(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports in a dataframe the data in the list raw_data at the key ``_structure_label``
:param raw_data: List of Dicts
:type raw_data: List
:return: Dataframe containg the starting node a ending node of every arc of the network
:rtype: pandas.Dataframe
"""
return self.one_level_normalizing(raw_data, self._array_indx, self._structure_label)
def import_variables(self, raw_data: typing.List) -> pd.DataFrame:
"""Imports the data in ``raw_data`` at the key ``_variables_label``.
:param raw_data: List of Dicts
:type raw_data: List
:return: Datframe containg the variables simbolic labels and their cardinalities
:rtype: pandas.Dataframe
"""
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
def read_json_file(self) -> typing.List:
"""Reads the JSON file in the path self.filePath.
:return: The contents of the json file
:rtype: List
"""
with open(self._file_path) as f:
data = json.load(f)
return data
def one_level_normalizing(self, raw_data: typing.List, indx: int, key: str) -> pd.DataFrame:
"""Extracts the one-level nested data in the list ``raw_data`` at the index ``indx`` at the key ``key``.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param key: the key for the Dicts from which exctract data
:type key: string
:return: A normalized dataframe
:rtype: pandas.Datframe
"""
return pd.DataFrame(raw_data[indx][key])
def normalize_trajectories(self, raw_data: typing.List, indx: int, trajectories_key: str) -> typing.List:
"""
Extracts the trajectories in ``raw_data`` at the index ``index`` at the key ``trajectories key``.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param trajectories_key: the key of the trajectories objects
:type trajectories_key: string
:return: A list of daframes containg the trajectories
:rtype: List
"""
dataframe = pd.DataFrame
smps = raw_data[indx][trajectories_key]
df_samples_list = [dataframe(sample) for sample in smps]
return df_samples_list
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` for this dataset.
"""
columns_header = list(sample_frame.columns.values)
columns_header.remove(self._time_key)
return columns_header
def clear_data_frame_list(self) -> None:
"""Removes all values present in the dataframes in the list ``_df_samples_list``.
"""
for indx in range(len(self._df_samples_list)):
self._df_samples_list[indx] = self._df_samples_list[indx].iloc[0:0]
def dataset_id(self) -> object:
return self._array_indx
def import_sampled_cims(self, raw_data: typing.List, indx: int, cims_key: str) -> typing.Dict:
"""Imports the synthetic CIMS in the dataset in a dictionary, using variables labels
as keys for the set of CIMS of a particular node.
:param raw_data: List of Dicts
:type raw_data: List
:param indx: The index of the array from which the data have to be extracted
:type indx: int
:param cims_key: the key where the json object cims are placed
:type cims_key: string
:return: a dictionary containing the sampled CIMS for all the variables in the net
:rtype: Dictionary
"""
cims_for_all_vars = {}
for var in raw_data[indx][cims_key]:
sampled_cims_list = []
cims_for_all_vars[var] = sampled_cims_list
for p_comb in raw_data[indx][cims_key][var]:
cims_for_all_vars[var].append(pd.DataFrame(raw_data[indx][cims_key][var][p_comb]).to_numpy())
return cims_for_all_vars

@ -0,0 +1,65 @@
import json
import typing
import pandas as pd
import numpy as np
from .abstract_importer import AbstractImporter
class SampleImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data loaded directly by using DataFrame
:param trajectory_list: the data that describes the trajectories
:type trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param variables: the data that describes the variables with name and cardinality
:type variables: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:param prior_net_structure: the data of the real structure, if it exists
:type prior_net_structure: typing.Union[pd.DataFrame, np.ndarray, typing.List]
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self,
trajectory_list: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
variables: typing.Union[pd.DataFrame, np.ndarray, typing.List] = None,
prior_net_structure: typing.Union[pd.DataFrame, np.ndarray,typing.List] = None):
'If the data are not DataFrame, it will be converted'
if isinstance(variables,list) or isinstance(variables,np.ndarray):
variables = pd.DataFrame(variables)
if isinstance(variables,list) or isinstance(variables,np.ndarray):
prior_net_structure=pd.DataFrame(prior_net_structure)
super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
variables= variables,
prior_net_structure=prior_net_structure)
def import_data(self, header_column = None):
if header_column is not None:
self._sorter = header_column
else:
self._sorter = self.build_sorter(self._df_samples_list[0])
samples_list= self._df_samples_list
if isinstance(samples_list, np.ndarray):
samples_list = samples_list.tolist()
self.compute_row_delta_in_all_samples_frames(samples_list)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Implements the abstract method build_sorter of the :class:`AbstractImporter` in order to get the ordered variables list.
"""
columns_header = list(sample_frame.columns.values)
del columns_header[0]
return columns_header
def dataset_id(self) -> object:
pass

@ -0,0 +1,20 @@
from setuptools import setup, find_packages
setup(name='PyCTBN',
version='1.0',
url='https://github.com/philipMartini/PyCTBN',
license='MIT',
author=['Alessandro Bregoli', 'Filippo Martini','Luca Moretti'],
author_email=['a.bregoli1@campus.unimib.it', 'f.martini@campus.unimib.it','lucamoretti96@gmail.com'],
description='A Continuous Time Bayesian Networks Library',
packages=find_packages('.', exclude=['tests']),
#packages=['PyCTBN.PyCTBN'],
install_requires=[
'numpy', 'pandas', 'networkx', 'scipy', 'matplotlib', 'tqdm'],
dependency_links=['https://github.com/numpy/numpy', 'https://github.com/pandas-dev/pandas',
'https://github.com/networkx/networkx', 'https://github.com/scipy/scipy',
'https://github.com/tqdm/tqdm'],
#long_description=open('../README.md').read(),
zip_safe=False,
python_requires='>=3.6')

@ -0,0 +1,963 @@
<?xml version="1.0" ?>
<coverage version="5.2" timestamp="1597406229874" lines-valid="891" lines-covered="638" line-rate="0.716" branches-covered="0" branches-valid="0" branch-rate="0" complexity="0">
<!-- Generated by coverage.py: https://coverage.readthedocs.io -->
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
<sources>
<source></source>
</sources>
<packages>
<package name="." line-rate="1" branch-rate="0" complexity="0">
<PyCTBN>
<class name="test_json_importer.py" filename="test_json_importer.py" complexity="0" line-rate="1" branch-rate="0">
<methods/>
<lines>
<line number="1" hits="1"/>
<line number="2" hits="1"/>
<line number="3" hits="1"/>
<line number="4" hits="1"/>
<line number="5" hits="1"/>
<line number="6" hits="1"/>
<line number="7" hits="1"/>
<line number="8" hits="1"/>
<line number="10" hits="1"/>
<line number="12" hits="1"/>
<line number="16" hits="1"/>
<line number="18" hits="1"/>
<line number="19" hits="1"/>
<line number="20" hits="1"/>
<line number="22" hits="1"/>
<line number="23" hits="1"/>
<line number="24" hits="1"/>
<line number="25" hits="1"/>
<line number="26" hits="1"/>
<line number="27" hits="1"/>
<line number="28" hits="1"/>
<line number="29" hits="1"/>
<line number="30" hits="1"/>
<line number="31" hits="1"/>
<line number="32" hits="1"/>
<line number="33" hits="1"/>
<line number="34" hits="1"/>
<line number="36" hits="1"/>
<line number="37" hits="1"/>
<line number="38" hits="1"/>
<line number="39" hits="1"/>
<line number="40" hits="1"/>
<line number="41" hits="1"/>
<line number="42" hits="1"/>
<line number="43" hits="1"/>
<line number="44" hits="1"/>
<line number="45" hits="1"/>
<line number="47" hits="1"/>
<line number="48" hits="1"/>
<line number="49" hits="1"/>
<line number="50" hits="1"/>
<line number="51" hits="1"/>
<line number="53" hits="1"/>
<line number="54" hits="1"/>
<line number="55" hits="1"/>
<line number="57" hits="1"/>
<line number="58" hits="1"/>
<line number="59" hits="1"/>
<line number="61" hits="1"/>
<line number="62" hits="1"/>
<line number="63" hits="1"/>
<line number="64" hits="1"/>
<line number="66" hits="1"/>
<line number="67" hits="1"/>
<line number="68" hits="1"/>
<line number="69" hits="1"/>
<line number="71" hits="1"/>
<line number="72" hits="1"/>
<line number="73" hits="1"/>
<line number="74" hits="1"/>
<line number="75" hits="1"/>
<line number="76" hits="1"/>
<line number="77" hits="1"/>
<line number="78" hits="1"/>
<line number="80" hits="1"/>
<line number="82" hits="1"/>
<line number="84" hits="1"/>
<line number="85" hits="1"/>
<line number="86" hits="1"/>
<line number="87" hits="1"/>
<line number="88" hits="1"/>
<line number="89" hits="1"/>
<line number="90" hits="1"/>
<line number="92" hits="1"/>
<line number="93" hits="1"/>
<line number="94" hits="1"/>
<line number="95" hits="1"/>
<line number="96" hits="1"/>
<line number="97" hits="1"/>
<line number="98" hits="1"/>
<line number="99" hits="1"/>
<line number="101" hits="1"/>
<line number="102" hits="1"/>
<line number="103" hits="1"/>
<line number="104" hits="1"/>
<line number="105" hits="1"/>
<line number="107" hits="1"/>
<line number="108" hits="1"/>
<line number="109" hits="1"/>
<line number="110" hits="1"/>
<line number="111" hits="1"/>
<line number="112" hits="1"/>
<line number="113" hits="1"/>
<line number="114" hits="1"/>
<line number="115" hits="1"/>
<line number="116" hits="1"/>
<line number="117" hits="1"/>
<line number="118" hits="1"/>
<line number="119" hits="1"/>
<line number="120" hits="1"/>
<line number="121" hits="1"/>
<line number="122" hits="1"/>
<line number="124" hits="1"/>
<line number="125" hits="1"/>
<line number="126" hits="1"/>
<line number="127" hits="1"/>
<line number="128" hits="1"/>
<line number="129" hits="1"/>
<line number="131" hits="1"/>
<line number="132" hits="1"/>
<line number="133" hits="1"/>
<line number="134" hits="1"/>
<line number="136" hits="1"/>
<line number="138" hits="1"/>
<line number="139" hits="1"/>
<line number="140" hits="1"/>
<line number="141" hits="1"/>
<line number="142" hits="1"/>
<line number="143" hits="1"/>
<line number="145" hits="1"/>
<line number="146" hits="1"/>
<line number="147" hits="1"/>
<line number="154" hits="1"/>
<line number="156" hits="1"/>
<line number="157" hits="1"/>
<line number="158" hits="1"/>
<line number="160" hits="1"/>
<line number="161" hits="1"/>
<line number="162" hits="1"/>
<line number="163" hits="1"/>
<line number="164" hits="1"/>
<line number="166" hits="1"/>
<line number="169" hits="1"/>
<line number="170" hits="1"/>
</lines>
</class>
<class name="test_sample_path.py" filename="test_sample_path.py" complexity="0" line-rate="1" branch-rate="0">
<methods/>
<lines>
<line number="1" hits="1"/>
<line number="2" hits="1"/>
<line number="3" hits="1"/>
<line number="4" hits="1"/>
<line number="5" hits="1"/>
<line number="6" hits="1"/>
<line number="7" hits="1"/>
<line number="8" hits="1"/>
<line number="9" hits="1"/>
<line number="12" hits="1"/>
<line number="14" hits="1"/>
<line number="15" hits="1"/>
<line number="16" hits="1"/>
<line number="17" hits="1"/>
<line number="19" hits="1"/>
<line number="20" hits="1"/>
<line number="21" hits="1"/>
<line number="22" hits="1"/>
<line number="23" hits="1"/>
<line number="24" hits="1"/>
<line number="25" hits="1"/>
<line number="26" hits="1"/>
<line number="27" hits="1"/>
<line number="28" hits="1"/>
<line number="29" hits="1"/>
<line number="30" hits="1"/>
<line number="33" hits="1"/>
<line number="34" hits="1"/>
</lines>
</class>
<class name="test_trajectory.py" filename="test_trajectory.py" complexity="0" line-rate="1" branch-rate="0">
<methods/>
<lines>
<line number="1" hits="1"/>
<line number="2" hits="1"/>
<line number="3" hits="1"/>
<line number="4" hits="1"/>
<line number="6" hits="1"/>
<line number="9" hits="1"/>
<line number="11" hits="1"/>
<line number="12" hits="1"/>
<line number="13" hits="1"/>
<line number="14" hits="1"/>
<line number="15" hits="1"/>
<line number="16" hits="1"/>
<line number="17" hits="1"/>
<line number="18" hits="1"/>
<line number="20" hits="1"/>
<line number="21" hits="1"/>
<line number="22" hits="1"/>
<line number="24" hits="1"/>
<line number="25" hits="1"/>
<line number="26" hits="1"/>
<line number="27" hits="1"/>
<line number="28" hits="1"/>
<line number="30" hits="1"/>
<line number="31" hits="1"/>
<line number="32" hits="1"/>
<line number="33" hits="1"/>
<line number="35" hits="1"/>
<line number="36" hits="1"/>
<line number="37" hits="1"/>
<line number="38" hits="1"/>
<line number="40" hits="1"/>
<line number="41" hits="1"/>
<line number="42" hits="1"/>
<line number="43" hits="1"/>
<line number="46" hits="1"/>
<line number="47" hits="1"/>
</lines>
</class>
</PyCTBN>
</package>
<package name=".Users.Zalum.Desktop.Tesi.CTBN_Project.PyCTBN.PyCTBN" line-rate="0.9059" branch-rate="0" complexity="0">
<PyCTBN>
<class name="abstract_importer.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/abstract_importer.py" complexity="0" line-rate="0.8182" branch-rate="0">
<methods/>
<lines>
<line number="1" hits="1"/>
<line number="4" hits="1"/>
<line number="12" hits="1"/>
<line number="13" hits="1"/>
<line number="14" hits="1"/>
<line number="16" hits="1"/>
<line number="17" hits="1"/>
<line number="18" hits="0"/>
<line number="20" hits="1"/>
<line number="21" hits="1"/>
<line number="22" hits="0"/>
</lines>
</class>
<class name="abstract_sample_path.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/abstract_sample_path.py" complexity="0" line-rate="0.8571" branch-rate="0">
<methods/>
<lines>
<line number="1" hits="1"/>
<line number="2" hits="1"/>
<line number="5" hits="1"/>
<line number="7" hits="1"/>
<line number="8" hits="1"/>
<line number="9" hits="1"/>
<line number="10" hits="1"/>
<line number="11" hits="1"/>
<line number="13" hits="1"/>
<line number="14" hits="1"/>
<line number="25" hits="0"/>
<line number="27" hits="1"/>
<line number="28" hits="1"/>
<line number="37" hits="0"/>
</lines>
</class>
<class name="json_importer.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/json_importer.py" complexity="0" line-rate="1" branch-rate="0">
<methods/>
<lines>
<line number="2" hits="1"/>
<line number="3" hits="1"/>
<line number="4" hits="1"/>
<line number="5" hits="1"/>
<line number="8" hits="1"/>
<line number="30" hits="1"/>
<line number="32" hits="1"/>
<line number="33" hits="1"/>
<line number="34" hits="1"/>
<line number="35" hits="1"/>
<line number="36" hits="1"/>
<line number="37" hits="1"/>
<line number="38" hits="1"/>
<line number="39" hits="1"/>
<line number="40" hits="1"/>
<line number="41" hits="1"/>
<line number="42" hits="1"/>
<line number="44" hits="1"/>
<line number="52" hits="1"/>
<line number="53" hits="1"/>
<line number="54" hits="1"/>
<line number="55" hits="1"/>
<line number="56" hits="1"/>
<line number="57" hits="1"/>
<line number="59" hits="1"/>
<line number="67" hits="1"/>
<line number="69" hits="1"/>
<line number="78" hits="1"/>
<line number="81" hits="1"/>
<line number="92" hits="1"/>
<line number="96" hits="1"/>
<line number="97" hits="1"/>
<line number="98" hits="1"/>
<line number="99" hits="1"/>
<line number="100" hits="1"/>
<line number="102" hits="1"/>
<line number="116" hits="1"/>
<line number="117" hits="1"/>
<line number="118" hits="1"/>
<line number="122" hits="1"/>
<line number="134" hits="1"/>
<line number="136" hits="1"/>
<line number="149" hits="1"/>
<line number="150" hits="1"/>
<line number="151" hits="1"/>
<line number="152" hits="1"/>
<line number="153" hits="1"/>
<line number="154" hits="1"/>
<line number="156" hits="1"/>
<line number="171" hits="1"/>
<line number="172" hits="1"/>
<line number="174" hits="1"/>
<line number="175" hits="1"/>
<line number="176" hits="1"/>
<line number="177" hits="1"/>
<line number="179" hits="1"/>
<line number="189" hits="1"/>
<line number="190" hits="1"/>
<line number="191" hits="1"/>
<line number="193" hits="1"/>
<line number="194" hits="1"/>
<line number="195" hits="1"/>
<line number="196" hits="1"/>
<line number="198" hits="1"/>
<line number="201" hits="1"/>
<line number="209" hits="1"/>
<line number="212" hits="1"/>
<line number="214" hits="1"/>
<line number="222" hits="1"/>
<line number="224" hits="1"/>
<line number="228" hits="1"/>
<line number="229" hits="1"/>
<line number="231" hits="1"/>
<line number="232" hits="1"/>
<line number="233" hits="1"/>
<line number="234" hits="1"/>
<line number="235" hits="1"/>
<line number="236" hits="1"/>
<line number="237" hits="1"/>
<line number="238" hits="1"/>
<line number="240" hits="1"/>
<line number="241" hits="1"/>
<line number="242" hits="1"/>
<line number="244" hits="1"/>
<line number="245" hits="1"/>
<line number="246" hits="1"/>
<line number="248" hits="1"/>
<line number="249" hits="1"/>
<line number="250" hits="1"/>
</lines>
</class>
<class name="sample_path.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/sample_path.py" complexity="0" line-rate="0.963" branch-rate="0">
<methods/>
<lines>
<line number="1" hits="1"/>
<line number="2" hits="1"/>
<line number="3" hits="1"/>
<line number="4" hits="1"/>
<line number="7" hits="1"/>
<line number="22" hits="1"/>
<line number="24" hits="1"/>
<line number="27" hits="1"/>
<line number="29" hits="1"/>
<line number="39" hits="1"/>
<line number="40" hits="1"/>
<line number="44" hits="1"/>
<line number="46" hits="1"/>
<line number="54" hits="1"/>
<line number="55" hits="1"/>
<line number="57" hits="1"/>
<line number="58" hits="1"/>
<line number="59" hits="1"/>
<line number="60" hits="1"/>
<line number="63" hits="1"/>
<line number="64" hits="1"/>
<line number="65" hits="1"/>
<line number="67" hits="1"/>
<line number="68" hits="1"/>
<line number="69" hits="1"/>
<line number="71" hits="1"/>
<line number="72" hits="0"/>
</lines>
</class>
<class name="structure.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/structure.py" complexity="0" line-rate="0.65" branch-rate="0">
<methods/>
<lines>
<line number="1" hits="1"/>
<line number="2" hits="1"/>
<line number="5" hits="1"/>
<line number="16" hits="1"/>
<line number="18" hits="1"/>
<line number="19" hits="1"/>
<line number="20" hits="1"/>
<line number="21" hits="1"/>
<line number="22" hits="1"/>
<line number="24" hits="1"/>
<line number="25" hits="1"/>
<line number="28" hits="0"/>
<line number="30" hits="1"/>
<line number="31" hits="1"/>
<line number="32" hits="0"/>
<line number="34" hits="1"/>
<line number="35" hits="1"/>
<line number="36" hits="0"/>
<line number="38" hits="1"/>
<line number="39" hits="1"/>
<line number="40" hits="0"/>
<line number="42" hits="1"/>
<line number="43" hits="1"/>
<line number="44" hits="0"/>
<line number="46" hits="1"/>
<line number="47" hits="0"/>
<line number="49" hits="1"/>
<line number="50" hits="0"/>
<line number="51" hits="0"/>
<line number="53" hits="1"/>
<line number="54" hits="0"/>
<line number="56" hits="1"/>
<line number="57" hits="0"/>
<line number="58" hits="0"/>
<line number="60" hits="1"/>
<line number="61" hits="1"/>
<line number="64" hits="1"/>
<line number="66" hits="0"/>
<line number="67" hits="0"/>
<line number="72" hits="0"/>
</lines>
</class>
<class name="trajectory.py" filename="/Users/Zalum/Desktop/Tesi/CTBN_Project/PyCTBN/PyCTBN/trajectory.py" complexity="0" line-rate="1" branch-rate="0">
<methods/>
<lines>
<line number="2" hits="1"/>
<line number="5" hits="1"/>
<line number="17" hits="1"/>
<line number="18" hits="1"/>
<line number="19" hits="1"/>
<line number="20" hits="1"/>
<line number="21" hits="1"/>
<line number="22" hits="1"/>
<line number="24" hits="1"/>
<line number="25" hits="1"/>
<line number="32" hits="1"/>
<line number="34" hits="1"/>
<line number="35" hits="1"/>
<line number="42" hits="1"/>
<line number="44" hits="1"/>
<line number="45" hits="1"/>
<line number="46" hits="1"/>
<line number="48" hits="1"/>
<line number="49" hits="1"/>
<line number="51" hits="1"/>
<line number="52" hits="1"/>
</lines>
</class>
</PyCTBN>
</package>
<package name=".Users.Zalum.Library.Python.3.8.lib.python.site-packages" line-rate="0.5234" branch-rate="0" complexity="0">
<PyCTBN>
<class name="six.py" filename="/Users/Zalum/Library/Python/3.8/lib/python/site-packages/six.py" complexity="0" line-rate="0.5234" branch-rate="0">
<methods/>
<lines>
<line number="21" hits="1"/>
<line number="23" hits="1"/>
<line number="25" hits="1"/>
<line number="26" hits="1"/>
<line number="27" hits="1"/>
<line number="28" hits="1"/>
<line number="29" hits="1"/>
<line number="31" hits="1"/>
<line number="32" hits="1"/>
<line number="36" hits="1"/>
<line number="37" hits="1"/>
<line number="38" hits="1"/>
<line number="40" hits="1"/>
<line number="41" hits="1"/>
<line number="42" hits="1"/>
<line number="43" hits="1"/>
<line number="44" hits="1"/>
<line number="45" hits="1"/>
<line number="47" hits="1"/>
<line number="49" hits="0"/>
<line number="50" hits="0"/>
<line number="51" hits="0"/>
<line number="52" hits="0"/>
<line number="53" hits="0"/>
<line number="55" hits="0"/>
<line number="57" hits="0"/>
<line number="60" hits="0"/>
<line number="62" hits="0"/>
<line number="63" hits="0"/>
<line number="64" hits="0"/>
<line number="65" hits="0"/>
<line number="66" hits="0"/>
<line number="68" hits="0"/>
<line number="71" hits="0"/>
<line number="72" hits="0"/>
<line number="75" hits="1"/>
<line number="77" hits="1"/>
<line number="80" hits="1"/>
<line number="82" hits="1"/>
<line number="83" hits="1"/>
<line number="86" hits="1"/>
<line number="88" hits="1"/>
<line number="89" hits="1"/>
<line number="91" hits="1"/>
<line number="92" hits="1"/>
<line number="93" hits="1"/>
<line number="94" hits="1"/>
<line number="97" hits="1"/>
<line number="98" hits="0"/>
<line number="99" hits="0"/>
<line number="100" hits="1"/>
<line number="103" hits="1"/>
<line number="105" hits="1"/>
<line number="106" hits="1"/>
<line number="107" hits="1"/>
<line number="108" hits="1"/>
<line number="109" hits="1"/>
<line number="110" hits="1"/>
<line number="112" hits="0"/>
<line number="114" hits="1"/>
<line number="115" hits="1"/>
<line number="117" hits="1"/>
<line number="118" hits="0"/>
<line number="119" hits="0"/>
<line number="120" hits="0"/>
<line number="121" hits="0"/>
<line number="124" hits="1"/>
<line number="126" hits="1"/>
<line number="127" hits="1"/>
<line number="128" hits="1"/>
<line number="130" hits="1"/>
<line number="131" hits="0"/>
<line number="132" hits="0"/>
<line number="133" hits="0"/>
<line number="136" hits="1"/>
<line number="139" hits="1"/>
<line number="141" hits="1"/>
<line number="142" hits="1"/>
<line number="143" hits="1"/>
<line number="144" hits="1"/>
<line number="145" hits="0"/>
<line number="146" hits="1"/>
<line number="147" hits="1"/>
<line number="148" hits="1"/>
<line number="149" hits="1"/>
<line number="151" hits="1"/>
<line number="152" hits="1"/>
<line number="154" hits="0"/>
<line number="155" hits="0"/>
<line number="156" hits="0"/>
<line number="157" hits="0"/>
<line number="159" hits="1"/>
<line number="160" hits="0"/>
<line number="161" hits="0"/>
<line number="164" hits="1"/>
<line number="173" hits="1"/>
<line number="174" hits="1"/>
<line number="175" hits="1"/>
<line number="177" hits="1"/>
<line number="178" hits="1"/>
<line number="179" hits="1"/>
<line number="181" hits="1"/>
<line number="182" hits="1"/>
<line number="184" hits="1"/>
<line number="185" hits="1"/>
<line number="186" hits="1"/>
<line number="187" hits="1"/>
<line number="189" hits="1"/>
<line number="190" hits="1"/>
<line number="191" hits="1"/>
<line number="192" hits="0"/>
<line number="193" hits="0"/>
<line number="195" hits="1"/>
<line number="196" hits="1"/>
<line number="198" hits="1"/>
<line number="199" hits="1"/>
<line number="200" hits="1"/>
<line number="201" hits="1"/>
<line number="202" hits="1"/>
<line number="203" hits="0"/>
<line number="205" hits="1"/>
<line number="206" hits="1"/>
<line number="207" hits="1"/>
<line number="209" hits="1"/>
<line number="216" hits="1"/>
<line number="218" hits="1"/>
<line number="222" hits="0"/>
<line number="223" hits="0"/>
<line number="224" hits="1"/>
<line number="226" hits="1"/>
<line number="229" hits="1"/>
<line number="232" hits="1"/>
<line number="235" hits="1"/>
<line number="307" hits="1"/>
<line number="308" hits="0"/>
<line number="312" hits="1"/>
<line number="313" hits="1"/>
<line number="314" hits="1"/>
<line number="315" hits="1"/>
<line number="316" hits="1"/>
<line number="318" hits="1"/>
<line number="320" hits="1"/>
<line number="321" hits="1"/>
<line number="324" hits="1"/>
<line number="329" hits="1"/>
<line number="356" hits="1"/>
<line number="357" hits="1"/>
<line number="358" hits="1"/>
<line number="360" hits="1"/>
<line number="362" hits="1"/>
<line number="366" hits="1"/>
<line number="371" hits="1"/>
<line number="376" hits="1"/>
<line number="377" hits="1"/>
<line number="378" hits="1"/>
<line number="380" hits="1"/>
<line number="382" hits="1"/>
<line number="386" hits="1"/>
<line number="391" hits="1"/>
<line number="428" hits="1"/>
<line number="429" hits="1"/>
<line number="430" hits="1"/>
<line number="432" hits="1"/>
<line number="434" hits="1"/>
<line number="438" hits="1"/>
<line number="443" hits="1"/>
<line number="449" hits="1"/>
<line number="450" hits="1"/>
<line number="451" hits="1"/>
<line number="453" hits="1"/>
<line number="455" hits="1"/>
<line number="459" hits="1"/>
<line number="464" hits="1"/>
<line number="467" hits="1"/>
<line number="468" hits="1"/>
<line number="469" hits="1"/>
<line number="471" hits="1"/>
<line number="473" hits="1"/>
<line number="477" hits="1"/>
<line number="480" hits="1"/>
<line number="481" hits="1"/>
<line number="482" hits="1"/>
<line number="483" hits="1"/>
<line number="484" hits="1"/>
<line number="485" hits="1"/>
<line number="487" hits="1"/>
<line number="488" hits="0"/>
<line number="490" hits="1"/>
<line number="494" hits="1"/>
<line number="496" hits="0"/>
<line number="499" hits="1"/>
<line number="501" hits="0"/>
<line number="502" hits="0"/>
<line number="503" hits="0"/>
<line number="504" hits="0"/>
<line number="505" hits="0"/>
<line number="506" hits="0"/>
<line number="507" hits="0"/>
<line number="510" hits="1"/>
<line number="511" hits="1"/>
<line number="512" hits="1"/>
<line number="514" hits="1"/>
<line number="515" hits="1"/>
<line number="516" hits="1"/>
<line number="517" hits="1"/>
<line number="519" hits="0"/>
<line number="520" hits="0"/>
<line number="522" hits="0"/>
<line number="523" hits="0"/>
<line number="524" hits="0"/>
<line number="525" hits="0"/>
<line number="528" hits="1"/>
<line number="529" hits="1"/>
<line number="530" hits="0"/>
<line number="531" hits="0"/>
<line number="532" hits="0"/>
<line number="533" hits="1"/>
<line number="536" hits="1"/>
<line number="537" hits="1"/>
<line number="538" hits="0"/>
<line number="539" hits="0"/>
<line number="540" hits="0"/>
<line number="543" hits="1"/>
<line number="544" hits="1"/>
<line number="545" hits="0"/>
<line number="547" hits="1"/>
<line number="549" hits="1"/>
<line number="550" hits="0"/>
<line number="552" hits="1"/>
<line number="554" hits="0"/>
<line number="555" hits="0"/>
<line number="557" hits="0"/>
<line number="558" hits="0"/>
<line number="560" hits="0"/>
<line number="561" hits="0"/>
<line number="563" hits="0"/>
<line number="565" hits="0"/>
<line number="566" hits="0"/>
<line number="568" hits="0"/>
<line number="569" hits="1"/>
<line number="573" hits="1"/>
<line number="574" hits="1"/>
<line number="575" hits="1"/>
<line number="576" hits="1"/>
<line number="577" hits="1"/>
<line number="578" hits="1"/>
<line number="581" hits="1"/>
<line number="582" hits="1"/>
<line number="583" hits="0"/>
<line number="585" hits="1"/>
<line number="586" hits="0"/>
<line number="588" hits="1"/>
<line number="589" hits="0"/>
<line number="591" hits="1"/>
<line number="592" hits="0"/>
<line number="594" hits="1"/>
<line number="596" hits="1"/>
<line number="598" hits="1"/>
<line number="600" hits="0"/>
<line number="601" hits="0"/>
<line number="603" hits="0"/>
<line number="604" hits="0"/>
<line number="606" hits="0"/>
<line number="607" hits="0"/>
<line number="609" hits="0"/>
<line number="610" hits="0"/>
<line number="612" hits="0"/>
<line number="614" hits="0"/>
<line number="616" hits="0"/>
<line number="618" hits="1"/>
<line number="619" hits="1"/>
<line number="620" hits="1"/>
<line number="622" hits="1"/>
<line number="626" hits="1"/>
<line number="627" hits="1"/>
<line number="628" hits="0"/>
<line number="630" hits="1"/>
<line number="631" hits="0"/>
<line number="632" hits="1"/>
<line number="633" hits="1"/>
<line number="634" hits="1"/>
<line number="635" hits="1"/>
<line number="636" hits="1"/>
<line number="637" hits="1"/>
<line number="638" hits="1"/>
<line number="639" hits="1"/>
<line number="640" hits="1"/>
<line number="641" hits="1"/>
<line number="642" hits="1"/>
<line number="643" hits="1"/>
<line number="644" hits="1"/>
<line number="645" hits="0"/>
<line number="646" hits="0"/>
<line number="647" hits="0"/>
<line number="649" hits="1"/>
<line number="650" hits="1"/>
<line number="651" hits="1"/>
<line number="653" hits="0"/>
<line number="654" hits="0"/>
<line number="657" hits="0"/>
<line number="658" hits="0"/>
<line number="659" hits="0"/>
<line number="660" hits="0"/>
<line number="662" hits="0"/>
<line number="663" hits="0"/>
<line number="665" hits="0"/>
<line number="666" hits="0"/>
<line number="667" hits="0"/>
<line number="668" hits="0"/>
<line number="669" hits="0"/>
<line number="670" hits="0"/>
<line number="671" hits="0"/>
<line number="672" hits="0"/>
<line number="673" hits="0"/>
<line number="674" hits="1"/>
<line number="675" hits="1"/>
<line number="678" hits="1"/>
<line number="679" hits="0"/>
<line number="682" hits="1"/>
<line number="683" hits="0"/>
<line number="686" hits="1"/>
<line number="687" hits="0"/>
<line number="690" hits="1"/>
<line number="691" hits="0"/>
<line number="694" hits="1"/>
<line number="695" hits="1"/>
<line number="697" hits="1"/>
<line number="698" hits="0"/>
<line number="699" hits="0"/>
<line number="700" hits="0"/>
<line number="701" hits="0"/>
<line number="702" hits="0"/>
<line number="703" hits="0"/>
<line number="705" hits="0"/>
<line number="706" hits="0"/>
<line number="709" hits="0"/>
<line number="711" hits="0"/>
<line number="712" hits="0"/>
<line number="713" hits="0"/>
<line number="714" hits="0"/>
<line number="715" hits="0"/>
<line number="716" hits="0"/>
<line number="717" hits="0"/>
<line number="718" hits="0"/>
<line number="719" hits="0"/>
<line number="721" hits="0"/>
<line number="729" hits="1"/>
<line number="730" hits="1"/>
<line number="737" hits="0"/>
<line number="738" hits="0"/>
<line number="741" hits="1"/>
<line number="742" hits="1"/>
<line number="743" hits="0"/>
<line number="745" hits="0"/>
<line number="746" hits="0"/>
<line number="747" hits="0"/>
<line number="749" hits="0"/>
<line number="750" hits="0"/>
<line number="751" hits="0"/>
<line number="753" hits="0"/>
<line number="756" hits="0"/>
<line number="757" hits="0"/>
<line number="758" hits="0"/>
<line number="759" hits="0"/>
<line number="760" hits="0"/>
<line number="761" hits="0"/>
<line number="762" hits="0"/>
<line number="763" hits="0"/>
<line number="764" hits="0"/>
<line number="765" hits="0"/>
<line number="766" hits="0"/>
<line number="767" hits="0"/>
<line number="768" hits="0"/>
<line number="769" hits="0"/>
<line number="770" hits="0"/>
<line number="771" hits="0"/>
<line number="772" hits="0"/>
<line number="773" hits="0"/>
<line number="774" hits="0"/>
<line number="775" hits="0"/>
<line number="776" hits="0"/>
<line number="777" hits="0"/>
<line number="778" hits="0"/>
<line number="779" hits="0"/>
<line number="780" hits="0"/>
<line number="781" hits="0"/>
<line number="782" hits="0"/>
<line number="783" hits="0"/>
<line number="785" hits="0"/>
<line number="786" hits="0"/>
<line number="787" hits="0"/>
<line number="788" hits="0"/>
<line number="789" hits="0"/>
<line number="790" hits="0"/>
<line number="791" hits="0"/>
<line number="792" hits="0"/>
<line number="793" hits="0"/>
<line number="794" hits="0"/>
<line number="795" hits="0"/>
<line number="796" hits="1"/>
<line number="797" hits="0"/>
<line number="799" hits="0"/>
<line number="800" hits="0"/>
<line number="801" hits="0"/>
<line number="802" hits="0"/>
<line number="803" hits="0"/>
<line number="804" hits="0"/>
<line number="806" hits="1"/>
<line number="808" hits="1"/>
<line number="814" hits="0"/>
<line number="817" hits="0"/>
<line number="818" hits="0"/>
<line number="819" hits="0"/>
<line number="820" hits="0"/>
<line number="821" hits="0"/>
<line number="823" hits="0"/>
<line number="824" hits="0"/>
<line number="825" hits="0"/>
<line number="826" hits="0"/>
<line number="827" hits="0"/>
<line number="828" hits="0"/>
<line number="830" hits="0"/>
<line number="832" hits="0"/>
<line number="834" hits="0"/>
<line number="837" hits="1"/>
<line number="840" hits="1"/>
<line number="845" hits="1"/>
<line number="847" hits="1"/>
<line number="848" hits="1"/>
<line number="851" hits="1"/>
<line number="852" hits="1"/>
<line number="853" hits="0"/>
<line number="855" hits="0"/>
<line number="856" hits="1"/>
<line number="858" hits="1"/>
<line number="859" hits="1"/>
<line number="860" hits="1"/>
<line number="861" hits="1"/>
<line number="864" hits="1"/>
<line number="866" hits="1"/>
<line number="867" hits="1"/>
<line number="868" hits="1"/>
<line number="869" hits="1"/>
<line number="870" hits="0"/>
<line number="871" hits="0"/>
<line number="872" hits="0"/>
<line number="873" hits="0"/>
<line number="874" hits="1"/>
<line number="875" hits="1"/>
<line number="876" hits="1"/>
<line number="877" hits="1"/>
<line number="878" hits="1"/>
<line number="879" hits="1"/>
<line number="882" hits="1"/>
<line number="893" hits="0"/>
<line number="894" hits="0"/>
<line number="895" hits="0"/>
<line number="896" hits="0"/>
<line number="898" hits="0"/>
<line number="901" hits="1"/>
<line number="912" hits="0"/>
<line number="913" hits="0"/>
<line number="914" hits="0"/>
<line number="915" hits="0"/>
<line number="916" hits="0"/>
<line number="917" hits="0"/>
<line number="918" hits="0"/>
<line number="921" hits="1"/>
<line number="932" hits="0"/>
<line number="933" hits="0"/>
<line number="934" hits="0"/>
<line number="935" hits="0"/>
<line number="937" hits="0"/>
<line number="940" hits="1"/>
<line number="948" hits="0"/>
<line number="949" hits="0"/>
<line number="950" hits="0"/>
<line number="953" hits="0"/>
<line number="954" hits="0"/>
<line number="955" hits="0"/>
<line number="961" hits="1"/>
<line number="962" hits="1"/>
<line number="963" hits="1"/>
<line number="964" hits="1"/>
<line number="968" hits="1"/>
<line number="969" hits="1"/>
<line number="974" hits="1"/>
<line number="976" hits="0"/>
<line number="977" hits="0"/>
<line number="978" hits="1"/>
<line number="980" hits="1"/>
</lines>
</class>
</PyCTBN>
</package>
</packages>
</coverage>

@ -0,0 +1,67 @@
import unittest
import numpy as np
import glob
import os
from ...PyCTBN.structure_graph.network_graph import NetworkGraph
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.structure_graph.set_of_cims import SetOfCims
from ...PyCTBN.estimators.parameters_estimator import ParametersEstimator
from ...PyCTBN.utility.json_importer import JsonImporter
class TestParametersEstimatior(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json', "*.json"))
cls.array_indx = 0
cls.importer = JsonImporter('./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(cls.array_indx)
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
print(cls.s1.structure.edges)
print(cls.s1.structure.nodes_values)
def test_fast_init(self):
for node in self.s1.structure.nodes_labels:
g = NetworkGraph(self.s1.structure)
g.fast_init(node)
p1 = ParametersEstimator(self.s1.trajectories, g)
self.assertEqual(p1._trajectories, self.s1.trajectories)
self.assertEqual(p1._net_graph, g)
self.assertIsNone(p1._single_set_of_cims)
p1.fast_init(node)
self.assertIsInstance(p1._single_set_of_cims, SetOfCims)
def test_compute_parameters_for_node(self):
for indx, node in enumerate(self.s1.structure.nodes_labels):
print(node)
g = NetworkGraph(self.s1.structure)
g.fast_init(node)
p1 = ParametersEstimator(self.s1.trajectories, g)
p1.fast_init(node)
sofc1 = p1.compute_parameters_for_node(node)
sampled_cims = self.aux_import_sampled_cims('dyn.cims')
sc = list(sampled_cims.values())
self.equality_of_cims_of_node(sc[indx], sofc1._actual_cims)
def equality_of_cims_of_node(self, sampled_cims, estimated_cims):
self.assertEqual(len(sampled_cims), len(estimated_cims))
for c1, c2 in zip(sampled_cims, estimated_cims):
self.cim_equality_test(c1, c2.cim)
def cim_equality_test(self, cim1, cim2):
for r1, r2 in zip(cim1, cim2):
self.assertTrue(np.all(np.isclose(r1, r2, 1e01)))
def aux_import_sampled_cims(self, cims_label):
i1 = JsonImporter('./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json', '', '', '', '', '')
raw_data = i1.read_json_file()
return i1.import_sampled_cims(raw_data, self.array_indx, cims_label)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,64 @@
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import psutil
from line_profiler import LineProfiler
import json
import pandas as pd
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.estimators.structure_constraint_based_estimator import StructureConstraintBasedEstimator
from ...PyCTBN.utility.sample_importer import SampleImporter
import copy
class TestStructureConstraintBasedEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
cls.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
def test_structure(self):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1)
edges = se1.estimate_structure(disable_multiprocessing=False)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,59 @@
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import psutil
from line_profiler import LineProfiler
from ...PyCTBN.utility.cache import Cache
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.estimators.structure_constraint_based_estimator import StructureConstraintBasedEstimator
from ...PyCTBN.utility.json_importer import JsonImporter
from multiprocessing import set_start_method
import copy
class TestStructureConstraintBasedEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
pass
def test_structure(self):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1)
edges = se1.estimate_structure(disable_multiprocessing=False)
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1)
edges = se1.estimate_structure(disable_multiprocessing=True)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,82 @@
import sys
sys.path.append("../../PyCTBN/")
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import psutil
from line_profiler import LineProfiler
import copy
from ...PyCTBN.utility.cache import Cache
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator
from ...PyCTBN.utility.json_importer import JsonImporter
from ...PyCTBN.utility.sample_importer import SampleImporter
import json
import pandas as pd
class TestStructureScoreBasedEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
cls.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
def test_structure(self):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureScoreBasedEstimator(self.s1,known_edges = [('X','Q')])
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = 35,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'hill',
disable_multiprocessing=True
)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,79 @@
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import psutil
from line_profiler import LineProfiler
import copy
from ...PyCTBN.utility.cache import Cache
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator
from ...PyCTBN.utility.json_importer import JsonImporter
class TestStructureScoreBasedEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
pass
def test_structure(self):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = 35,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'tabu',
disable_multiprocessing=False
)
self.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = 35,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'tabu',
disable_multiprocessing=True
)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,54 @@
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import psutil
from line_profiler import LineProfiler
import copy
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator
from ...PyCTBN.utility.json_importer import JsonImporter
class TestHillClimbingSearch(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
def test_structure(self):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 40,
patience = None,
optimizer = 'hill'
)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,84 @@
import sys
sys.path.append("../../PyCTBN/")
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import pandas as pd
import psutil
from line_profiler import LineProfiler
import copy
import json
import utility.cache as ch
import structure_graph.sample_path as sp
import estimators.structure_score_based_estimator as se
import utility.json_importer as ji
import utility.sample_importer as si
class TestTabuSearch(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = si.SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
cls.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
#cls.s1.clear_memory()
def test_structure(self):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = se.StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = 20,
tabu_length = 10,
tabu_rules_duration = 10,
optimizer = 'tabu',
disable_multiprocessing=False
)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,46 @@
import unittest
import numpy as np
from ...PyCTBN.structure_graph.conditional_intensity_matrix import ConditionalIntensityMatrix
class TestConditionalIntensityMatrix(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.state_res_times = np.random.rand(1, 3)[0]
cls.state_res_times = cls.state_res_times * 1000
cls.state_transition_matrix = np.random.randint(1, 10000, (3, 3))
for i in range(0, len(cls.state_res_times)):
cls.state_transition_matrix[i, i] = 0
cls.state_transition_matrix[i, i] = np.sum(cls.state_transition_matrix[i])
def test_init(self):
c1 = ConditionalIntensityMatrix(self.state_res_times, self.state_transition_matrix)
self.assertTrue(np.array_equal(self.state_res_times, c1.state_residence_times))
self.assertTrue(np.array_equal(self.state_transition_matrix, c1.state_transition_matrix))
self.assertEqual(c1.cim.dtype, np.float)
self.assertEqual(self.state_transition_matrix.shape, c1.cim.shape)
def test_compute_cim_coefficients(self):
c1 = ConditionalIntensityMatrix(self.state_res_times, self.state_transition_matrix)
c2 = self.state_transition_matrix.astype(np.float)
np.fill_diagonal(c2, c2.diagonal() * -1)
for i in range(0, len(self.state_res_times)):
for j in range(0, len(self.state_res_times)):
c2[i, j] = (c2[i, j] + 1) / (self.state_res_times[i] + 1)
c1.compute_cim_coefficients()
for i in range(0, len(c1.state_residence_times)):
self.assertTrue(np.isclose(np.sum(c1.cim[i]), 0.0, 1e-02, 1e-01))
for i in range(0, len(self.state_res_times)):
for j in range(0, len(self.state_res_times)):
self.assertTrue(np.isclose(c1.cim[i, j], c2[i, j], 1e-02, 1e-01))
def test_repr(self):
c1 = ConditionalIntensityMatrix(self.state_res_times, self.state_transition_matrix)
print(c1)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,190 @@
import unittest
import glob
import os
import networkx as nx
import numpy as np
import itertools
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.structure_graph.network_graph import NetworkGraph
from ...PyCTBN.utility.json_importer import JsonImporter
class TestNetworkGraph(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
def test_init(self):
g1 = NetworkGraph(self.s1.structure)
self.assertEqual(self.s1.structure, g1._graph_struct)
self.assertIsInstance(g1._graph, nx.DiGraph)
self.assertIsNone(g1.time_scalar_indexing_strucure)
self.assertIsNone(g1.transition_scalar_indexing_structure)
self.assertIsNone(g1.transition_filtering)
self.assertIsNone(g1.p_combs)
def test_add_nodes(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
for n1, n2 in zip(g1.nodes, self.s1.structure.nodes_labels):
self.assertEqual(n1, n2)
def test_add_edges(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_edges(self.s1.structure.edges)
for e in self.s1.structure.edges:
self.assertIn(tuple(e), g1.edges)
def test_fast_init(self):
g1 = NetworkGraph(self.s1.structure)
for node in self.s1.structure.nodes_labels:
g1.fast_init(node)
self.assertIsNotNone(g1._graph.nodes)
self.assertIsNotNone(g1._graph.edges)
self.assertIsInstance(g1._time_scalar_indexing_structure, np.ndarray)
self.assertIsInstance(g1._transition_scalar_indexing_structure, np.ndarray)
self.assertIsInstance(g1._time_filtering, np.ndarray)
self.assertIsInstance(g1._transition_filtering, np.ndarray)
self.assertIsInstance(g1._p_combs_structure, np.ndarray)
self.assertIsInstance(g1._aggregated_info_about_nodes_parents, tuple)
def test_get_ordered_by_indx_set_of_parents(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in self.s1.structure.nodes_labels:
aggr_info = g1.get_ordered_by_indx_set_of_parents(node)
for indx in range(len(aggr_info[0]) - 1 ):
self.assertLess(g1.get_node_indx(aggr_info[0][indx]), g1.get_node_indx(aggr_info[0][indx + 1]))
for par, par_indx in zip(aggr_info[0], aggr_info[1]):
self.assertEqual(g1.get_node_indx(par), par_indx)
for par, par_val in zip(aggr_info[0], aggr_info[2]):
self.assertEqual(g1._graph_struct.get_states_number(par), par_val)
def test_build_time_scalar_indexing_structure_for_a_node(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in self.s1.structure.nodes_labels:
aggr_info = g1.get_ordered_by_indx_set_of_parents(node)
self.aux_build_time_scalar_indexing_structure_for_a_node(g1, node, aggr_info[1],
aggr_info[0], aggr_info[2])
def aux_build_time_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels, parents_vals):
node_states = graph.get_states_number(node_id)
time_scalar_indexing = NetworkGraph.build_time_scalar_indexing_structure_for_a_node(node_states, parents_vals)
self.assertEqual(len(time_scalar_indexing), len(parents_indxs) + 1)
merged_list = parents_labels[:]
merged_list.insert(0, node_id)
vals_list = []
for node in merged_list:
vals_list.append(graph.get_states_number(node))
t_vec = np.array(vals_list)
t_vec = t_vec.cumprod()
self.assertTrue(np.array_equal(time_scalar_indexing, t_vec))
def test_build_transition_scalar_indexing_structure_for_a_node(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in self.s1.structure.nodes_labels:
aggr_info = g1.get_ordered_by_indx_set_of_parents(node)
self.aux_build_transition_scalar_indexing_structure_for_a_node(g1, node, aggr_info[1],
aggr_info[0], aggr_info[2])
def aux_build_transition_scalar_indexing_structure_for_a_node(self, graph, node_id, parents_indxs, parents_labels,
parents_values):
node_states = graph.get_states_number(node_id)
transition_scalar_indexing = graph.build_transition_scalar_indexing_structure_for_a_node(node_states,
parents_values)
self.assertEqual(len(transition_scalar_indexing), len(parents_indxs) + 2)
merged_list = parents_labels[:]
merged_list.insert(0, node_id)
merged_list.insert(0, node_id)
vals_list = []
for node_id in merged_list:
vals_list.append(graph.get_states_number(node_id))
m_vec = np.array([vals_list])
m_vec = m_vec.cumprod()
self.assertTrue(np.array_equal(transition_scalar_indexing, m_vec))
def test_build_time_columns_filtering_structure_for_a_node(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in self.s1.structure.nodes_labels:
aggr_info = g1.get_ordered_by_indx_set_of_parents(node)
self.aux_build_time_columns_filtering_structure_for_a_node(g1, node, aggr_info[1])
def aux_build_time_columns_filtering_structure_for_a_node(self, graph, node_id, p_indxs):
graph.build_time_columns_filtering_for_a_node(graph.get_node_indx(node_id), p_indxs)
single_filter = []
single_filter.append(graph.get_node_indx(node_id))
single_filter.extend(p_indxs)
self.assertTrue(np.array_equal(graph.build_time_columns_filtering_for_a_node(graph.get_node_indx(node_id),
p_indxs),np.array(single_filter)))
def test_build_transition_columns_filtering_structure(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in self.s1.structure.nodes_labels:
aggr_info = g1.get_ordered_by_indx_set_of_parents(node)
self.aux_build_time_columns_filtering_structure_for_a_node(g1, node, aggr_info[1])
def aux_build_transition_columns_filtering_structure(self, graph, node_id, p_indxs):
single_filter = []
single_filter.append(graph.get_node_indx(node_id) + graph._graph_struct.total_variables_number)
single_filter.append(graph.get_node_indx(node_id))
single_filter.extend(p_indxs)
self.assertTrue(np.array_equal(graph.build_transition_filtering_for_a_node(graph.get_node_indx(node_id),
p_indxs), np.array(single_filter)))
def test_build_p_combs_structure(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in self.s1.structure.nodes_labels:
aggr_info = g1.get_ordered_by_indx_set_of_parents(node)
self.aux_build_p_combs_structure(g1, aggr_info[2])
def aux_build_p_combs_structure(self, graph, p_vals):
p_combs = graph.build_p_comb_structure_for_a_node(p_vals)
p_possible_vals = []
for val in p_vals:
vals = [v for v in range(val)]
p_possible_vals.extend(vals)
comb_struct = set(itertools.product(p_possible_vals,repeat=len(p_vals)))
for comb in comb_struct:
self.assertIn(np.array(comb), p_combs)
def test_get_parents_by_id(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node in g1.nodes:
self.assertListEqual(g1.get_parents_by_id(node), list(g1._graph.predecessors(node)))
def test_get_states_number(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node, val in zip(g1.nodes, g1.nodes_values):
self.assertEqual(val, g1.get_states_number(node))
def test_get_node_indx(self):
g1 = NetworkGraph(self.s1.structure)
g1.add_nodes(self.s1.structure.nodes_labels)
g1.add_edges(self.s1.structure.edges)
for node, indx in zip(g1.nodes, g1.nodes_indexes):
self.assertEqual(indx, g1.get_node_indx(node))
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,72 @@
import unittest
import glob
import os
import random
from ...PyCTBN.utility.json_importer import JsonImporter
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.structure_graph.trajectory import Trajectory
from ...PyCTBN.structure_graph.structure import Structure
class TestSamplePath(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json"))
def test_init_not_initialized_importer(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertRaises(RuntimeError, SamplePath, importer)
def test_init_not_filled_dataframse(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
importer.clear_concatenated_frame()
self.assertRaises(RuntimeError, SamplePath, importer)
def test_init(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
self.assertIsNone(s1.trajectories)
self.assertIsNone(s1.structure)
self.assertFalse(s1._importer.concatenated_samples.empty)
self.assertIsNone(s1._total_variables_count)
def test_build_trajectories(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
s1.build_trajectories()
self.assertIsInstance(s1.trajectories, Trajectory)
def test_build_structure(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
s1.build_structure()
self.assertIsInstance(s1.structure, Structure)
self.assertEqual(s1._total_variables_count, len(s1._importer.sorter))
def test_build_structure_bad_sorter(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
random.shuffle(importer._sorter)
self.assertRaises(RuntimeError, s1.build_structure)
def test_build_saplepath_no_prior_net_structure(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
importer._df_structure = None
s1 = SamplePath(importer)
s1.build_trajectories()
s1.build_structure()
self.assertFalse(s1.structure.edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,133 @@
import unittest
import numpy as np
import itertools
from ...PyCTBN.structure_graph.set_of_cims import SetOfCims
class TestSetOfCims(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.node_id = 'X'
cls.possible_cardinalities = [2, 3]
cls.possible_states = [[0,1], [0, 1, 2]]
cls.node_states_number = range(2, 4)
def test_init(self):
# empty parent set
for sn in self.node_states_number:
p_combs = self.build_p_comb_structure_for_a_node([])
self.aux_test_init(self.node_id, [], sn, p_combs)
# one parent
for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=1):
p_combs = self.build_p_comb_structure_for_a_node(list(p))
self.aux_test_init(self.node_id, list(p), sn, p_combs)
#two parents
for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=2):
p_combs = self.build_p_comb_structure_for_a_node(list(p))
self.aux_test_init(self.node_id, list(p), sn, p_combs)
def test_build_cims(self):
# empty parent set
for sn in self.node_states_number:
p_combs = self.build_p_comb_structure_for_a_node([])
self.aux_test_build_cims(self.node_id, [], sn, p_combs)
# one parent
for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=1):
p_combs = self.build_p_comb_structure_for_a_node(list(p))
self.aux_test_build_cims(self.node_id, list(p), sn, p_combs)
#two parents
for sn in self.node_states_number:
for p in itertools.product(self.possible_cardinalities, repeat=2):
p_combs = self.build_p_comb_structure_for_a_node(list(p))
self.aux_test_build_cims(self.node_id, list(p), sn, p_combs)
def test_filter_cims_with_mask(self):
p_combs = self.build_p_comb_structure_for_a_node(self.possible_cardinalities)
sofc1 = SetOfCims('X', self.possible_cardinalities, 3, p_combs)
state_res_times_list = []
transition_matrices_list = []
for i in range(len(p_combs)):
state_res_times = np.random.rand(1, 3)[0]
state_res_times = state_res_times * 1000
state_transition_matrix = np.random.randint(1, 10000, (3, 3))
state_res_times_list.append(state_res_times)
transition_matrices_list.append(state_transition_matrix)
sofc1.build_cims(np.array(state_res_times_list), np.array(transition_matrices_list))
for length_of_mask in range(3):
for mask in list(itertools.permutations([True, False],r=length_of_mask)):
m = np.array(mask)
for parent_value in range(self.possible_cardinalities[0]):
cims = sofc1.filter_cims_with_mask(m, [parent_value])
if length_of_mask == 0 or length_of_mask == 1:
self.assertTrue(np.array_equal(sofc1._actual_cims, cims))
else:
indxs = self.another_filtering_method(p_combs, m, [parent_value])
self.assertTrue(np.array_equal(cims, sofc1._actual_cims[indxs]))
def aux_test_build_cims(self, node_id, p_values, node_states, p_combs):
state_res_times_list = []
transition_matrices_list = []
so1 = SetOfCims(node_id, p_values, node_states, p_combs)
for i in range(len(p_combs)):
state_res_times = np.random.rand(1, node_states)[0]
state_res_times = state_res_times * 1000
state_transition_matrix = np.random.randint(1, 10000, (node_states, node_states))
state_res_times_list.append(state_res_times)
transition_matrices_list.append(state_transition_matrix)
so1.build_cims(np.array(state_res_times_list), np.array(transition_matrices_list))
self.assertEqual(len(state_res_times_list), so1.get_cims_number())
self.assertIsInstance(so1._actual_cims, np.ndarray)
self.assertIsNone(so1._transition_matrices)
self.assertIsNone(so1._state_residence_times)
def aux_test_init(self, node_id, parents_states_number, node_states_number, p_combs):
sofcims = SetOfCims(node_id, parents_states_number, node_states_number, p_combs)
self.assertEqual(sofcims._node_id, node_id)
self.assertTrue(np.array_equal(sofcims._p_combs, p_combs))
self.assertTrue(np.array_equal(sofcims._parents_states_number, parents_states_number))
self.assertEqual(sofcims._node_states_number, node_states_number)
self.assertFalse(sofcims._actual_cims)
self.assertEqual(sofcims._state_residence_times.shape[0], np.prod(np.array(parents_states_number)))
self.assertEqual(len(sofcims._state_residence_times[0]), node_states_number)
self.assertEqual(sofcims._transition_matrices.shape[0], np.prod(np.array(parents_states_number)))
self.assertEqual(len(sofcims._transition_matrices[0][0]), node_states_number)
def build_p_comb_structure_for_a_node(self, parents_values):
"""
Builds the combinatory structure that contains the combinations of all the values contained in parents_values.
Parameters:
parents_values: the cardinalities of the nodes
Returns:
a numpy matrix containing a grid of the combinations
"""
tmp = []
for val in parents_values:
tmp.append([x for x in range(val)])
if len(parents_values) > 0:
parents_comb = np.array(np.meshgrid(*tmp)).T.reshape(-1, len(parents_values))
if len(parents_values) > 1:
tmp_comb = parents_comb[:, 1].copy()
parents_comb[:, 1] = parents_comb[:, 0].copy()
parents_comb[:, 0] = tmp_comb
else:
parents_comb = np.array([[]], dtype=np.int)
return parents_comb
def another_filtering_method(self,p_combs, mask, parent_value):
masked_combs = p_combs[:, mask]
indxs = []
for indx, val in enumerate(masked_combs):
if val == parent_value:
indxs.append(indx)
return np.array(indxs)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,81 @@
import unittest
import numpy as np
from ...PyCTBN.structure_graph.structure import Structure
class TestStructure(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.labels = ['X','Y','Z']
cls.indxs = np.array([0,1,2])
cls.vals = np.array([3,3,3])
cls.edges = [('X','Z'),('Y','Z'), ('Z','Y')]
cls.vars_numb = len(cls.labels)
def test_init(self):
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
self.assertListEqual(self.labels,s1.nodes_labels)
self.assertIsInstance(s1.nodes_indexes, np.ndarray)
self.assertTrue(np.array_equal(self.indxs, s1.nodes_indexes))
self.assertIsInstance(s1.nodes_values, np.ndarray)
self.assertTrue(np.array_equal(self.vals, s1.nodes_values))
self.assertListEqual(self.edges, s1.edges)
self.assertEqual(self.vars_numb, s1.total_variables_number)
def test_get_node_id(self):
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
for indx, var in enumerate(self.labels):
self.assertEqual(var, s1.get_node_id(indx))
def test_get_node_indx(self):
l2 = self.labels[:]
l2.remove('Y')
i2 = self.indxs.copy()
np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X','Z')]
n2 = self.vars_numb - 1
s1 = Structure(l2, i2, v2, e2, n2)
for indx, var in zip(i2, l2):
self.assertEqual(indx, s1.get_node_indx(var))
def test_get_positional_node_indx(self):
l2 = self.labels[:]
l2.remove('Y')
i2 = self.indxs.copy()
np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X', 'Z')]
n2 = self.vars_numb - 1
s1 = Structure(l2, i2, v2, e2, n2)
for indx, var in enumerate(s1.nodes_labels):
self.assertEqual(indx, s1.get_positional_node_indx(var))
def test_get_states_number(self):
l2 = self.labels[:]
l2.remove('Y')
i2 = self.indxs.copy()
np.delete(i2, 1)
v2 = self.vals.copy()
np.delete(v2, 1)
e2 = [('X', 'Z')]
n2 = self.vars_numb - 1
s1 = Structure(l2, i2, v2, e2, n2)
for val, node in zip(v2, l2):
self.assertEqual(val, s1.get_states_number(node))
def test_equality(self):
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
s2 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
self.assertEqual(s1, s2)
def test_repr(self):
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
print(s1)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,27 @@
import unittest
import numpy as np
import glob
from ...PyCTBN.structure_graph.trajectory import Trajectory
from ...PyCTBN.utility.json_importer import JsonImporter
class TestTrajectory(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./test_data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
def test_init(self):
t1 = Trajectory(self.importer.build_list_of_samples_array(self.importer.concatenated_samples),
len(self.importer.sorter) + 1)
self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:, 0].to_numpy(), t1.times))
self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:,1:].to_numpy(), t1.complete_trajectory))
self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:, 1: len(self.importer.sorter) + 1], t1.trajectory))
self.assertEqual(len(self.importer.sorter) + 1, t1._original_cols_number)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,57 @@
import unittest
import numpy as np
from ...PyCTBN.utility.cache import Cache
from ...PyCTBN.structure_graph.set_of_cims import SetOfCims
class TestCache(unittest.TestCase):
def test_init(self):
c1 = Cache()
self.assertFalse(c1._list_of_sets_of_parents)
self.assertFalse(c1._actual_cache)
def test_put(self):
c1 = Cache()
pset1 = {'X', 'Y'}
sofc1 = SetOfCims('Z', [], 3, np.array([]))
c1.put(pset1, sofc1)
self.assertEqual(1, len(c1._actual_cache))
self.assertEqual(1, len(c1._list_of_sets_of_parents))
self.assertEqual(sofc1, c1._actual_cache[0])
pset2 = {'X'}
sofc2 = SetOfCims('Z', [], 3, np.array([]))
c1.put(pset2, sofc2)
self.assertEqual(2, len(c1._actual_cache))
self.assertEqual(2, len(c1._list_of_sets_of_parents))
self.assertEqual(sofc2, c1._actual_cache[1])
def test_find(self):
c1 = Cache()
pset1 = {'X', 'Y'}
sofc1 = SetOfCims('Z', [], 3, np.array([]))
c1.put(pset1, sofc1)
self.assertEqual(1, len(c1._actual_cache))
self.assertEqual(1, len(c1._list_of_sets_of_parents))
self.assertIsInstance(c1.find(pset1), SetOfCims)
self.assertEqual(sofc1, c1.find(pset1))
self.assertIsInstance(c1.find({'Y', 'X'}), SetOfCims)
self.assertEqual(sofc1, c1.find({'Y', 'X'}))
self.assertIsNone(c1.find({'X'}))
def test_clear(self):
c1 = Cache()
pset1 = {'X', 'Y'}
sofc1 = SetOfCims('Z', [], 3, np.array([]))
c1.put(pset1, sofc1)
self.assertEqual(1, len(c1._actual_cache))
self.assertEqual(1, len(c1._list_of_sets_of_parents))
c1.clear()
self.assertFalse(c1._list_of_sets_of_parents)
self.assertFalse(c1._actual_cache)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,176 @@
import unittest
import os
import glob
import numpy as np
import pandas as pd
from ...PyCTBN.utility.json_importer import JsonImporter
import json
class TestJsonImporter(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json"))
def test_init(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertEqual(j1._samples_label, 'samples')
self.assertEqual(j1._structure_label, 'dyn.str')
self.assertEqual(j1._variables_label, 'variables')
self.assertEqual(j1._time_key, 'Time')
self.assertEqual(j1._variables_key, 'Name')
self.assertEqual(j1._file_path, "./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json")
self.assertIsNone(j1._df_samples_list)
self.assertIsNone(j1.variables)
self.assertIsNone(j1.structure)
self.assertEqual(j1.concatenated_samples,[])
self.assertIsNone(j1.sorter)
self.assertIsNone(j1._array_indx)
self.assertIsInstance(j1._raw_data, list)
def test_read_json_file_found(self):
data_set = {"key1": [1, 2, 3], "key2": [4, 5, 6]}
with open('data.json', 'w') as f:
json.dump(data_set, f)
path = os.getcwd()
path = path + '/data.json'
j1 = JsonImporter(path, '', '', '', '', '')
self.assertTrue(self.ordered(data_set) == self.ordered(j1._raw_data))
os.remove('data.json')
def test_read_json_file_not_found(self):
path = os.getcwd()
path = path + '/data.json'
self.assertRaises(FileNotFoundError, JsonImporter, path, '', '', '', '', '')
def test_build_sorter(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label)
sorter = j1.build_sorter(df_samples_list[0])
self.assertListEqual(sorter, list(df_samples_list[0].columns.values)[1:])
def test_normalize_trajectories(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label)
self.assertEqual(len(df_samples_list), len(j1._raw_data[0][j1._samples_label]))
def test_normalize_trajectories_wrong_indx(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertRaises(IndexError, j1.normalize_trajectories, j1._raw_data, 474, j1._samples_label)
def test_normalize_trajectories_wrong_key(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'sample', 'dyn.str', 'variables', 'Time', 'Name')
self.assertRaises(KeyError, j1.normalize_trajectories, j1._raw_data, 0, j1._samples_label)
def test_compute_row_delta_single_samples_frame(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
sample_frame = j1._df_samples_list[0]
original_copy = sample_frame.copy()
columns_header = list(sample_frame.columns.values)
shifted_cols_header = [s + "S" for s in columns_header[1:]]
new_sample_frame = j1.compute_row_delta_sigle_samples_frame(sample_frame, columns_header[1:],
shifted_cols_header)
self.assertEqual(len(list(sample_frame.columns.values)) + len(shifted_cols_header),
len(list(new_sample_frame.columns.values)))
self.assertEqual(sample_frame.shape[0] - 1, new_sample_frame.shape[0])
for indx, row in new_sample_frame.iterrows():
self.assertAlmostEqual(row['Time'],
original_copy.iloc[indx + 1]['Time'] - original_copy.iloc[indx]['Time'])
for indx, row in new_sample_frame.iterrows():
np.array_equal(np.array(row[columns_header[1:]],dtype=int),
np.array(original_copy.iloc[indx][columns_header[1:]],dtype=int))
np.array_equal(np.array(row[shifted_cols_header], dtype=int),
np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int))
def test_compute_row_delta_in_all_frames(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0])
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
self.assertEqual(list(j1._df_samples_list[0].columns.values),
list(j1.concatenated_samples.columns.values)[:len(list(j1._df_samples_list[0].columns.values))])
self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1._time_key)
def test_compute_row_delta_in_all_frames_not_init_sorter(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
self.assertRaises(RuntimeError, j1.compute_row_delta_in_all_samples_frames, j1._df_samples_list)
def test_clear_data_frame_list(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0])
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
j1.clear_data_frame_list()
for df in j1._df_samples_list:
self.assertTrue(df.empty)
def test_clear_concatenated_frame(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data(0)
j1.clear_concatenated_frame()
self.assertTrue(j1.concatenated_samples.empty)
def test_import_variables(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
sorter = ['X', 'Y', 'Z']
raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}]
j1._array_indx = 0
df_var = j1.import_variables(raw_data)
self.assertEqual(list(df_var[j1._variables_key]), sorter)
def test_import_structure(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}]
j1._array_indx = 0
df_struct = j1.import_structure(raw_data)
self.assertIsInstance(df_struct, pd.DataFrame)
def test_import_sampled_cims(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file()
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0])
cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims')
self.assertEqual(list(cims.keys()), j1.sorter)
def test_dataset_id(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
array_indx = 0
j1.import_data(array_indx)
self.assertEqual(array_indx, j1.dataset_id())
def test_file_path(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertEqual(j1.file_path, "./PyCTBN/data/networks_and_trajectories_binary_data_01_3.json")
def test_import_data(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data(0)
self.assertEqual(list(j1.variables[j1._variables_key]),
list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1._variables_key]) + 1]))
print(j1.variables)
print(j1.structure)
print(j1.concatenated_samples)
def ordered(self, obj):
if isinstance(obj, dict):
return sorted((k, self.ordered(v)) for k, v in obj.items())
if isinstance(obj, list):
return sorted(self.ordered(x) for x in obj)
else:
return obj
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,80 @@
import unittest
import os
import glob
import numpy as np
import pandas as pd
from ...PyCTBN.utility.sample_importer import SampleImporter
from ...PyCTBN.structure_graph.sample_path import SamplePath
import json
class TestSampleImporter(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
cls.trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
cls.variables= pd.DataFrame(raw_data[0]["variables"])
cls.prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
def test_init(self):
sample_importer = SampleImporter(
trajectory_list=self.trajectory_list,
variables=self.variables,
prior_net_structure=self.prior_net_structure
)
sample_importer.import_data()
s1 = SamplePath(sample_importer)
s1.build_trajectories()
s1.build_structure()
s1.clear_memory()
self.assertEqual(len(s1._importer._df_samples_list), 300)
self.assertIsInstance(s1._importer._df_samples_list,list)
self.assertIsInstance(s1._importer._df_samples_list[0],pd.DataFrame)
self.assertEqual(len(s1._importer._df_variables), 3)
self.assertIsInstance(s1._importer._df_variables,pd.DataFrame)
self.assertEqual(len(s1._importer._df_structure), 2)
self.assertIsInstance(s1._importer._df_structure,pd.DataFrame)
def test_order(self):
sample_importer = SampleImporter(
trajectory_list=self.trajectory_list,
variables=self.variables,
prior_net_structure=self.prior_net_structure
)
sample_importer.import_data()
s1 = SamplePath(sample_importer)
s1.build_trajectories()
s1.build_structure()
s1.clear_memory()
for count,var in enumerate(s1._importer._df_samples_list[0].columns[1:]):
self.assertEqual(s1._importer._sorter[count],var)
def ordered(self, obj):
if isinstance(obj, dict):
return sorted((k, self.ordered(v)) for k, v in obj.items())
if isinstance(obj, list):
return sorted(self.ordered(x) for x in obj)
else:
return obj
if __name__ == '__main__':
unittest.main()

@ -1 +1 @@
# PyCTBN
# CTBN_Project

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More