1
0
Fork 0

Merge branch 'Fully_Documented_version'

master
Luca Moretti 4 years ago
commit e52d98b32e
  1. BIN
      .coverage
  2. 3
      .coveragerc
  3. 3
      .gitignore
  4. BIN
      CTBN_Diagramma_Dominio.pdf
  5. 9
      PyCTBN/MANIFEST.in
  6. 16
      PyCTBN/PyCTBN/__init__.py
  7. 9
      PyCTBN/PyCTBN/estimators/fam_score_calculator.py
  8. 26
      PyCTBN/PyCTBN/estimators/parameters_estimator.py
  9. 11
      PyCTBN/PyCTBN/estimators/structure_constraint_based_estimator.py
  10. 86
      PyCTBN/PyCTBN/estimators/structure_estimator.py
  11. 20
      PyCTBN/PyCTBN/estimators/structure_score_based_estimator.py
  12. 3
      PyCTBN/PyCTBN/optimizers/constraint_based_optimizer.py
  13. 2
      PyCTBN/PyCTBN/optimizers/optimizer.py
  14. 15
      PyCTBN/PyCTBN/optimizers/tabu_search.py
  15. 18
      PyCTBN/PyCTBN/structure_graph/network_graph.py
  16. 17
      PyCTBN/PyCTBN/structure_graph/sample_path.py
  17. 6
      PyCTBN/PyCTBN/structure_graph/structure.py
  18. 1
      PyCTBN/PyCTBN/utility/abstract_importer.py
  19. 4
      PyCTBN/PyCTBN/utility/cache.py
  20. 9
      PyCTBN/PyCTBN/utility/sample_importer.py
  21. 2
      PyCTBN/__init__.py
  22. BIN
      PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json
  23. BIN
      PyCTBN/test_data/networks_and_trajectories_binary_data_02_10_1.json
  24. BIN
      PyCTBN/test_data/networks_and_trajectories_ternary_data_01_6_1.json
  25. 0
      PyCTBN/tests/estimators/__init__.py
  26. 118
      PyCTBN/tests/estimators/test_structure_constraint_based_estimator.py
  27. 118
      PyCTBN/tests/estimators/test_structure_estimator.py
  28. 171
      PyCTBN/tests/estimators/test_structure_score_based_estimator.py
  29. 0
      PyCTBN/tests/optimizers/__init__.py
  30. 66
      PyCTBN/tests/optimizers/test_hill_climbing_search.py
  31. 83
      PyCTBN/tests/optimizers/test_tabu_search.py
  32. 0
      PyCTBN/tests/structure_graph/__init__.py
  33. 13
      PyCTBN/tests/structure_graph/test_sample_path.py
  34. 10
      PyCTBN/tests/structure_graph/test_structure.py
  35. 5
      PyCTBN/tests/structure_graph/test_trajectory.py
  36. 0
      PyCTBN/tests/utility/__init__.py
  37. 2
      PyCTBN/tests/utility/test_json_importer.py
  38. 4
      PyCTBN/tests/utility/test_sample_importer.py
  39. 1
      README.md
  40. 125
      README.rst
  41. 41
      basic_main.py
  42. 1207
      coverage.xml
  43. 70
      docs/make.bat
  44. 22
      setup.py

Binary file not shown.

@ -0,0 +1,3 @@
[run]
omit =
*/tests/*

3
.gitignore vendored

@ -5,4 +5,5 @@ __pycache__
**/PyCTBN.egg-info **/PyCTBN.egg-info
**/dist **/dist
**/results_data **/results_data
**/.scannerwork **/.scannerwork
**/build

Binary file not shown.

@ -0,0 +1,9 @@
include MANIFEST.in
include setup.py
include README.rst
prune PyCTBN/test_data
prune PyCTBN/tests
prune tests
prune test_data
prune *tests*
prune *test*

@ -1,8 +1,8 @@
import PyCTBN.estimators import PyCTBN.PyCTBN.estimators
from PyCTBN.estimators import * from PyCTBN.PyCTBN.estimators import *
import PyCTBN.optimizers import PyCTBN.PyCTBN.optimizers
from PyCTBN.optimizers import * from PyCTBN.PyCTBN.optimizers import *
import PyCTBN.structure_graph import PyCTBN.PyCTBN.structure_graph
from PyCTBN.structure_graph import * from PyCTBN.PyCTBN.structure_graph import *
import PyCTBN.utility import PyCTBN.PyCTBN.utility
from PyCTBN.utility import * from PyCTBN.PyCTBN.utility import *

@ -252,14 +252,7 @@ class FamScoreCalculator:
:return: the FamScore value of the node :return: the FamScore value of the node
:rtype: float :rtype: float
""" """
#print("------")
#print(self.marginal_likelihood_q(cims,
# tau_xu,
# alpha_xu))
#print(self.marginal_likelihood_theta(cims,
# alpha_xu,
# alpha_xxu))
'calculate alpha_xxu as a uniform distribution' 'calculate alpha_xxu as a uniform distribution'
alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1) alpha_xxu = alpha_xu /(len(cims[0]._state_residence_times) - 1)

@ -107,32 +107,6 @@ class ParametersEstimator(object):
M_raveled[diag_indices] = 0 M_raveled[diag_indices] = 0
M_raveled[diag_indices] = np.sum(M, axis=2).ravel() M_raveled[diag_indices] = np.sum(M, axis=2).ravel()
def init_sets_cims_container(self):
self.sets_of_cims_struct = acims.SetsOfCimsContainer(self.net_graph.nodes,
self.net_graph.nodes_values,
self.net_graph.get_ordered_by_indx_parents_values_for_all_nodes(),
self.net_graph.p_combs)
def compute_parameters(self):
#print(self.net_graph.get_nodes())
#print(self.amalgamated_cims_struct.sets_of_cims)
#enumerate(zip(self.net_graph.get_nodes(), self.amalgamated_cims_struct.sets_of_cims))
for indx, aggr in enumerate(zip(self.net_graph.nodes, self.sets_of_cims_struct.sets_of_cims)):
#print(self.net_graph.time_filtering[indx])
#print(self.net_graph.time_scalar_indexing_strucure[indx])
self.compute_state_res_time_for_node(self.net_graph.get_node_indx(aggr[0]), self.sample_path.trajectories.times,
self.sample_path.trajectories.trajectory,
self.net_graph.time_filtering[indx],
self.net_graph.time_scalar_indexing_strucure[indx],
aggr[1]._state_residence_times)
#print(self.net_graph.transition_filtering[indx])
#print(self.net_graph.transition_scalar_indexing_structure[indx])
self.compute_state_transitions_for_a_node(self.net_graph.get_node_indx(aggr[0]),
self.sample_path.trajectories.complete_trajectory,
self.net_graph.transition_filtering[indx],
self.net_graph.transition_scalar_indexing_structure[indx],
aggr[1]._transition_matrices)
aggr[1].build_cims(aggr[1]._state_residence_times, aggr[1]._transition_matrices)

@ -141,8 +141,7 @@ class StructureConstraintBasedEstimator(StructureEstimator):
r2s = M2.diagonal() r2s = M2.diagonal()
C1 = cim1.cim C1 = cim1.cim
C2 = cim2.cim C2 = cim2.cim
if child_states_numb > 2: if child_states_numb > 2 and (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
if (np.sum(np.diagonal(M1)) / thumb_value) < self._thumb_threshold:
self._removable_edges_matrix[parent_indx][child_indx] = False self._removable_edges_matrix[parent_indx][child_indx] = False
return False return False
F_stats = C2.diagonal() / C1.diagonal() F_stats = C2.diagonal() / C1.diagonal()
@ -225,9 +224,13 @@ class StructureConstraintBasedEstimator(StructureEstimator):
list_edges_partial = executor.map(ctpc_algo, list_edges_partial = executor.map(ctpc_algo,
self._nodes, self._nodes,
total_vars_numb_array) total_vars_numb_array)
#list_edges_partial = [ctpc_algo(n,total_vars_numb) for n in self._nodes]
'Update the graph'
edges = set(itertools.chain.from_iterable(list_edges_partial))
self._complete_graph = nx.DiGraph()
self._complete_graph.add_edges_from(edges)
return set(itertools.chain.from_iterable(list_edges_partial)) return edges
def estimate_structure(self,disable_multiprocessing:bool=False): def estimate_structure(self,disable_multiprocessing:bool=False):

@ -9,7 +9,7 @@ import numpy as np
from networkx.readwrite import json_graph from networkx.readwrite import json_graph
from abc import ABC from abc import ABC
import os
import abc import abc
from ..utility.cache import Cache from ..utility.cache import Cache
@ -104,11 +104,11 @@ class StructureEstimator(object):
json.dump(res, f) json.dump(res, f)
def remove_diagonal_elements(self, matrix): #def remove_diagonal_elements(self, matrix):
m = matrix.shape[0] # m = matrix.shape[0]
strided = np.lib.stride_tricks.as_strided # strided = np.lib.stride_tricks.as_strided
s0, s1 = matrix.strides # s0, s1 = matrix.strides
return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1) # return strided(matrix.ravel()[1:], shape=(m - 1, m), strides=(s0 + s1, s1)).reshape(m, -1)
@abc.abstractmethod @abc.abstractmethod
@ -137,49 +137,47 @@ class StructureEstimator(object):
:rtype: List :rtype: List
""" """
if not self._sample_path.has_prior_net_structure: if not self._sample_path.has_prior_net_structure:
raise RuntimeError("Can not compute spurious edges with no prior net structure!") return []
real_graph = nx.DiGraph() real_graph = nx.DiGraph()
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels) real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
real_graph.add_edges_from(self._sample_path.structure.edges) real_graph.add_edges_from(self._sample_path.structure.edges)
return nx.difference(real_graph, self._complete_graph).edges return nx.difference(real_graph, self._complete_graph).edges
def save_plot_estimated_structure_graph(self) -> None: def save_plot_estimated_structure_graph(self, file_path: str) -> None:
"""Plot the estimated structure in a graphical model style. """Plot the estimated structure in a graphical model style, use .png extension.
Spurious edges are colored in red. Spurious edges are colored in red if a prior structure is present.
"""
graph_to_draw = nx.DiGraph() :param file_path: path to save the file to
spurious_edges = self.spurious_edges() :type: string
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges)) """
print(non_spurious_edges) graph_to_draw = nx.DiGraph()
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges] spurious_edges = self.spurious_edges()
graph_to_draw.add_edges_from(spurious_edges) non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
graph_to_draw.add_edges_from(non_spurious_edges) edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10) graph_to_draw.add_edges_from(spurious_edges)
options = { graph_to_draw.add_edges_from(non_spurious_edges)
"node_size": 2000, pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
"node_color": "white", options = {
"edgecolors": "black", "node_size": 2000,
'linewidths':2, "node_color": "white",
"with_labels":True, "edgecolors": "black",
"font_size":13, 'linewidths':2,
'connectionstyle': 'arc3, rad = 0.1', "with_labels":True,
"arrowsize": 15, "font_size":13,
"arrowstyle": '<|-', 'connectionstyle': 'arc3, rad = 0.1',
"width": 1, "arrowsize": 15,
"edge_color":edges_colors, "arrowstyle": '<|-',
} "width": 1,
"edge_color":edges_colors,
nx.draw(graph_to_draw, pos, **options) }
ax = plt.gca()
ax.margins(0.20) nx.draw(graph_to_draw, pos, **options)
plt.axis("off") ax = plt.gca()
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1] ax.margins(0.20)
name = name.split('.', 1)[0] plt.axis("off")
name += '_' + str(self._sample_path._importer.dataset_id()) plt.savefig(file_path)
name += '.png' plt.clf()
plt.savefig(name) print("Estimated Structure Plot Saved At: ", os.path.abspath(file_path))
plt.clf()
print("Estimated Structure Plot Saved At: ", os.path.abspath(name))

@ -121,15 +121,9 @@ class StructureScoreBasedEstimator(StructureEstimator):
l_optimizer) l_optimizer)
#list_edges_partial = p.map(estimate_parents, self._nodes)
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
'Concatenate all the edges list' 'Concatenate all the edges list'
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
#print('-------------------------')
'calculate precision and recall' 'calculate precision and recall'
n_missing_edges = 0 n_missing_edges = 0
@ -145,10 +139,7 @@ class StructureScoreBasedEstimator(StructureEstimator):
precision = n_true_positive / (n_true_positive + n_added_fake_edges) precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges) recall = n_true_positive / (n_true_positive + n_missing_edges)
# print(f"n archi reali non trovati: {n_missing_edges}")
# print(f"n archi non reali aggiunti: {n_added_fake_edges}")
print(true_edges) print(true_edges)
print(set_list_edges) print(set_list_edges)
print(f"precision: {precision} ") print(f"precision: {precision} ")
@ -156,6 +147,11 @@ class StructureScoreBasedEstimator(StructureEstimator):
except Exception as e: except Exception as e:
print(f"errore: {e}") print(f"errore: {e}")
'Update the graph'
self._complete_graph = nx.DiGraph()
self._complete_graph.add_edges_from(set_list_edges)
return set_list_edges return set_list_edges
@ -238,7 +234,3 @@ class StructureScoreBasedEstimator(StructureEstimator):
#print(f" lo score per {node_id} risulta: {score} ") #print(f" lo score per {node_id} risulta: {score} ")
return score return score

@ -60,9 +60,6 @@ class ConstraintBasedOptimizer(Optimizer):
u = other_nodes u = other_nodes
#tests_parents_numb = len(u)
#complete_frame = self.complete_graph_frame
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id) child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
b = 0 b = 0
while b < len(u): while b < len(u):

@ -36,4 +36,4 @@ class Optimizer(abc.ABC):
:return: the estimated structure for the node :return: the estimated structure for the node
:rtype: List :rtype: List
""" """
pass pass

@ -104,7 +104,7 @@ class TabuSearch(Optimizer):
self.tabu_length = len(other_nodes) self.tabu_length = len(other_nodes)
if self.tabu_rules_duration is None: if self.tabu_rules_duration is None:
self.tabu_tabu_rules_durationength = len(other_nodes) self.tabu_rules_duration = len(other_nodes)
'inizialize the data structures' 'inizialize the data structures'
tabu_set = set() tabu_set = set()
@ -140,20 +140,9 @@ class TabuSearch(Optimizer):
graph.remove_edges([parent_removed]) graph.remove_edges([parent_removed])
graph.add_edges([current_edge]) graph.add_edges([current_edge])
added = True added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
# print("-------------------------------------------")
# print(f"Current new parent: {current_new_parent}")
# print(f"Current score: {current_score}")
# print(f"Current best score: {actual_best_score}")
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
# print(f"graph edges: {graph.edges}")
# print("-------------------------------------------")
# input()
if current_score > actual_best_score: if current_score > actual_best_score:
'update current best score' 'update current best score'
actual_best_score = current_score actual_best_score = current_score

@ -36,15 +36,15 @@ class NetworkGraph(object):
self._transition_filtering = None self._transition_filtering = None
self._p_combs_structure = None self._p_combs_structure = None
def init_graph(self): #def init_graph(self):
self.add_nodes(self._nodes_labels) # self.add_nodes(self._nodes_labels)
self.add_edges(self.graph_struct.edges) # self.add_edges(self.graph_struct.edges)
self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes() # self.aggregated_info_about_nodes_parents = self.get_ord_set_of_par_of_all_nodes()
self._fancy_indexing = self.build_fancy_indexing_structure(0) # self._fancy_indexing = self.build_fancy_indexing_structure(0)
self.build_scalar_indexing_structures() # self.build_scalar_indexing_structures()
self.build_time_columns_filtering_structure() # self.build_time_columns_filtering_structure()
self.build_transition_columns_filtering_structure() # self.build_transition_columns_filtering_structure()
self._p_combs_structure = self.build_p_combs_structure() # self._p_combs_structure = self.build_p_combs_structure()
def fast_init(self, node_id: str) -> None: def fast_init(self, node_id: str) -> None:
"""Initializes all the necessary structures for parameters estimation of the node identified by the label """Initializes all the necessary structures for parameters estimation of the node identified by the label

@ -8,6 +8,7 @@ from .trajectory import Trajectory
from ..utility.abstract_importer import AbstractImporter from ..utility.abstract_importer import AbstractImporter
MESSAGE_HAS_TO_CONTAIN_EXCEPTION = 'The importer object has to contain the all processed data!'
class SamplePath(object): class SamplePath(object):
"""Aggregates all the informations about the trajectories, the real structure of the sampled net and variables """Aggregates all the informations about the trajectories, the real structure of the sampled net and variables
@ -25,15 +26,15 @@ class SamplePath(object):
""" """
self._importer = importer self._importer = importer
if self._importer._df_variables is None or self._importer._concatenated_samples is None: if self._importer._df_variables is None or self._importer._concatenated_samples is None:
raise RuntimeError('The importer object has to contain the all processed data!') raise RuntimeError()
if self._importer._df_variables.empty: if self._importer._df_variables.empty:
raise RuntimeError('The importer object has to contain the all processed data!') raise RuntimeError(MESSAGE_HAS_TO_CONTAIN_EXCEPTION)
if isinstance(self._importer._concatenated_samples, pd.DataFrame): if isinstance(self._importer._concatenated_samples, pd.DataFrame) and\
if self._importer._concatenated_samples.empty: self._importer._concatenated_samples.empty:
raise RuntimeError('The importer object has to contain the all processed data!') raise RuntimeError(MESSAGE_HAS_TO_CONTAIN_EXCEPTION)
if isinstance(self._importer._concatenated_samples, np.ndarray): if isinstance(self._importer._concatenated_samples, np.ndarray) and\
if self._importer._concatenated_samples.size == 0: self._importer._concatenated_samples.size == 0:
raise RuntimeError('The importer object has to contain the all processed data!') raise RuntimeError(MESSAGE_HAS_TO_CONTAIN_EXCEPTION)
self._trajectories = None self._trajectories = None
self._structure = None self._structure = None
self._total_variables_count = None self._total_variables_count = None

@ -74,11 +74,11 @@ class Structure(object):
self._edges_list = list() self._edges_list = list()
def add_edge(self,edge: tuple): def add_edge(self,edge: tuple):
self._edges_list.append(tuple) self._edges_list.append(edge)
print(self._edges_list)
def remove_edge(self,edge: tuple): def remove_edge(self,edge: tuple):
self._edges_list.remove(tuple) self._edges_list.remove(edge)
def contains_edge(self,edge:tuple) -> bool: def contains_edge(self,edge:tuple) -> bool:
return edge in self._edges_list return edge in self._edges_list

@ -7,7 +7,6 @@ import pandas as pd
import copy import copy
#from sklearn.utils import resample
class AbstractImporter(ABC): class AbstractImporter(ABC):

@ -30,10 +30,7 @@ class Cache:
:rtype: SetOfCims :rtype: SetOfCims
""" """
try: try:
#print("Cache State:", self.list_of_sets_of_indxs)
#print("Look For:", parents_comb)
result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)] result = self._actual_cache[self._list_of_sets_of_parents.index(parents_comb)]
#print("CACHE HIT!!!!", parents_comb)
return result return result
except ValueError: except ValueError:
return None return None
@ -47,7 +44,6 @@ class Cache:
:param socim: the related SetOfCims object :param socim: the related SetOfCims object
:type socim: SetOfCims :type socim: SetOfCims
""" """
#print("Putting in cache:", parents_comb)
self._list_of_sets_of_parents.append(parents_comb) self._list_of_sets_of_parents.append(parents_comb)
self._actual_cache.append(socim) self._actual_cache.append(socim)

@ -32,7 +32,7 @@ class SampleImporter(AbstractImporter):
'If the data are not DataFrame, it will be converted' 'If the data are not DataFrame, it will be converted'
if isinstance(variables,list) or isinstance(variables,np.ndarray): if isinstance(variables,list) or isinstance(variables,np.ndarray):
variables = pd.DataFrame(variables) variables = pd.DataFrame(variables)
if isinstance(variables,list) or isinstance(variables,np.ndarray): if isinstance(prior_net_structure,list) or isinstance(prior_net_structure,np.ndarray):
prior_net_structure=pd.DataFrame(prior_net_structure) prior_net_structure=pd.DataFrame(prior_net_structure)
super(SampleImporter, self).__init__(trajectory_list =trajectory_list, super(SampleImporter, self).__init__(trajectory_list =trajectory_list,
@ -48,9 +48,6 @@ class SampleImporter(AbstractImporter):
samples_list= self._df_samples_list samples_list= self._df_samples_list
if isinstance(samples_list, np.ndarray):
samples_list = samples_list.tolist()
self.compute_row_delta_in_all_samples_frames(samples_list) self.compute_row_delta_in_all_samples_frames(samples_list)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
@ -61,5 +58,5 @@ class SampleImporter(AbstractImporter):
return columns_header return columns_header
def dataset_id(self) -> object: def dataset_id(self) -> str:
pass return str("")

@ -0,0 +1,2 @@
import PyCTBN.PyCTBN
from PyCTBN.PyCTBN import *

@ -50,15 +50,127 @@ class TestStructureConstraintBasedEstimator(unittest.TestCase):
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
def test_structure(self): def test_structure_1(self):
true_edges = copy.deepcopy(self.s1.structure.edges) true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges)) true_edges = set(map(tuple, true_edges))
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1) se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1)
edges = se1.estimate_structure(disable_multiprocessing=False) edges = se1.estimate_structure(True)
self.assertFalse(se1.spurious_edges())
self.assertEqual(edges, true_edges) self.assertEqual(edges, true_edges)
def test_structure_2(self):
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_02_10_1.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data["variables"])
prior_net_structure = pd.DataFrame(raw_data["dyn.str"])
self.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
self.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1)
edges = se1.estimate_structure(True)
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
n_added_fake_edges = len(edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
self.assertGreaterEqual(precision,0.75)
self.assertGreaterEqual(recall,0.75)
def test_structure_3(self):
with open("./PyCTBN/test_data/networks_and_trajectories_ternary_data_01_6_1.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data["variables"])
prior_net_structure = pd.DataFrame(raw_data["dyn.str"])
self.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
self.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureConstraintBasedEstimator(self.s1,0.1,0.1)
edges = se1.estimate_structure(True)
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
n_added_fake_edges = len(edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
self.assertGreaterEqual(precision,0.75)
self.assertGreaterEqual(recall,0.75)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -0,0 +1,118 @@
import glob
import math
import os
import unittest
import json
import networkx as nx
import numpy as np
import timeit
from ...PyCTBN.utility.cache import Cache
from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.estimators.structure_constraint_based_estimator import StructureConstraintBasedEstimator
from ...PyCTBN.utility.json_importer import JsonImporter
class TestStructureEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json"))
cls.importer = JsonImporter('./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json', 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
def test_init(self):
exp_alfa = 0.1
chi_alfa = 0.1
se1 = StructureConstraintBasedEstimator(self.s1, exp_alfa, chi_alfa)
self.assertEqual(self.s1, se1._sample_path)
self.assertTrue(np.array_equal(se1._nodes, np.array(self.s1.structure.nodes_labels)))
self.assertTrue(np.array_equal(se1._nodes_indxs, self.s1.structure.nodes_indexes))
self.assertTrue(np.array_equal(se1._nodes_vals, self.s1.structure.nodes_values))
self.assertEqual(se1._exp_test_sign, exp_alfa)
self.assertEqual(se1._chi_test_alfa, chi_alfa)
self.assertIsInstance(se1._complete_graph, nx.DiGraph)
self.assertIsInstance(se1._cache, Cache)
def test_build_complete_graph(self):
exp_alfa = 0.1
chi_alfa = 0.1
nodes_numb = len(self.s1.structure.nodes_labels)
se1 = StructureConstraintBasedEstimator(self.s1, exp_alfa, chi_alfa)
cg = se1.build_complete_graph(self.s1.structure.nodes_labels)
self.assertEqual(len(cg.edges), nodes_numb*(nodes_numb - 1))
for node in self.s1.structure.nodes_labels:
no_self_loops = self.s1.structure.nodes_labels[:]
no_self_loops.remove(node)
for n2 in no_self_loops:
self.assertIn((node, n2), cg.edges)
#se1.save_plot_estimated_structure_graph()
def test_build_removable_edges_matrix(self):
exp_alfa = 0.1
chi_alfa = 0.1
known_edges = self.s1.structure.edges[0:2]
se1 = StructureConstraintBasedEstimator(self.s1, exp_alfa, chi_alfa, known_edges)
for edge in known_edges:
i = self.s1.structure.get_node_indx(edge[0])
j = self.s1.structure.get_node_indx(edge[1])
self.assertFalse(se1._removable_edges_matrix[i][j])
def test_generate_possible_sub_sets_of_size(self):
exp_alfa = 0.1
chi_alfa = 0.1
nodes_numb = len(self.s1.structure.nodes_labels)
se1 = StructureConstraintBasedEstimator(self.s1, exp_alfa, chi_alfa)
for node in self.s1.structure.nodes_labels:
for b in range(nodes_numb):
sets = StructureConstraintBasedEstimator.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node)
sets2 = StructureConstraintBasedEstimator.generate_possible_sub_sets_of_size(self.s1.structure.nodes_labels, b, node)
self.assertEqual(len(list(sets)), math.floor(math.factorial(nodes_numb - 1) /
(math.factorial(b)*math.factorial(nodes_numb -1 - b))))
for sset in sets2:
self.assertFalse(node in sset)
def test_time(self):
known_edges = []
se1 = StructureConstraintBasedEstimator(self.s1, 0.1, 0.1, known_edges,25)
exec_time = timeit.timeit(se1.ctpc_algorithm, number=1)
print("Execution Time: ", exec_time)
for ed in self.s1.structure.edges:
self.assertIn(tuple(ed), se1._complete_graph.edges)
#print("Spurious Edges:", se1.spurious_edges())
#se1.save_plot_estimated_structure_graph()
def test_save_results(self):
se1 = StructureConstraintBasedEstimator(self.s1, 0.1, 0.1)
se1.ctpc_algorithm()
se1.save_results()
name = self.s1._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self.s1._importer.dataset_id())
name += '.json'
file_name = 'results_' + name
with open(file_name) as f:
js_graph = json.load(f)
result_graph = nx.json_graph.node_link_graph(js_graph)
self.assertFalse(nx.difference(se1._complete_graph, result_graph).edges)
os.remove(file_name)
def test_adjacency_matrix(self):
se1 = StructureConstraintBasedEstimator(self.s1, 0.1, 0.1)
se1.ctpc_algorithm()
adj_matrix = nx.adj_matrix(se1._complete_graph).toarray().astype(bool)
self.assertTrue(np.array_equal(adj_matrix, se1.adjacency_matrix()))
def test_save_plot_estimated_graph(self):
se1 = StructureConstraintBasedEstimator(self.s1, 0.1, 0.1)
edges = se1.estimate_structure(disable_multiprocessing=True)
se1.save_plot_estimated_structure_graph('./networks_and_trajectories_ternary_data_3.png')
if __name__ == '__main__':
unittest.main()

@ -1,5 +1,4 @@
import sys
sys.path.append("../../PyCTBN/")
import glob import glob
import math import math
import os import os
@ -54,14 +53,38 @@ class TestStructureScoreBasedEstimator(unittest.TestCase):
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
def test_structure_monoprocesso(self):
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
def test_structure(self): self.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
self.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges) true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges)) true_edges = set(map(tuple, true_edges))
se1 = StructureScoreBasedEstimator(self.s1,known_edges = [('X','Q')]) se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure( edges = se1.estimate_structure(
max_parents = None, max_parents = None,
iterations_number = 100, iterations_number = 100,
@ -74,7 +97,147 @@ class TestStructureScoreBasedEstimator(unittest.TestCase):
self.assertEqual(edges, true_edges) self.assertEqual(edges, true_edges)
def test_structure_1(self):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = 35,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'hill',
disable_multiprocessing=False
)
self.assertEqual(edges, true_edges)
def test_structure_2(self):
with open("./PyCTBN/test_data/networks_and_trajectories_binary_data_02_10_1.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data["variables"])
prior_net_structure = pd.DataFrame(raw_data["dyn.str"])
self.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
self.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = 35,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'hill',
disable_multiprocessing=True
)
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
n_added_fake_edges = len(edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
self.assertGreaterEqual(precision,0.75)
self.assertGreaterEqual(recall,0.75)
def test_structure_3(self):
with open("./PyCTBN/test_data/networks_and_trajectories_ternary_data_01_6_1.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= raw_data["variables"]
prior_net_structure = raw_data["dyn.str"]
self.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
self.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
known_edges = self.s1.structure.edges[0:2]
se1 = StructureScoreBasedEstimator(self.s1,known_edges=known_edges)
edges = se1.estimate_structure(
max_parents = 4,
iterations_number = 100,
patience = 35,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'hill',
disable_multiprocessing=True
)
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
n_added_fake_edges = len(edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
self.assertGreaterEqual(precision,0.75)
self.assertGreaterEqual(recall,0.75)
if __name__ == '__main__': if __name__ == '__main__':

@ -9,11 +9,14 @@ import numpy as np
import psutil import psutil
from line_profiler import LineProfiler from line_profiler import LineProfiler
import copy import copy
import json
import pandas as pd
from ...PyCTBN.structure_graph.sample_path import SamplePath from ...PyCTBN.structure_graph.sample_path import SamplePath
from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator
from ...PyCTBN.utility.json_importer import JsonImporter from ...PyCTBN.utility.json_importer import JsonImporter
from ...PyCTBN.utility.sample_importer import SampleImporter
@ -38,16 +41,75 @@ class TestHillClimbingSearch(unittest.TestCase):
se1 = StructureScoreBasedEstimator(self.s1) se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure( edges = se1.estimate_structure(
max_parents = None, max_parents = 2,
iterations_number = 40, iterations_number = 40,
patience = None, patience = None,
optimizer = 'hill' optimizer = 'hill',
disable_multiprocessing=True
) )
self.assertEqual(edges, true_edges) self.assertEqual(edges, true_edges)
def test_structure_3(self):
with open("./PyCTBN/test_data/networks_and_trajectories_ternary_data_01_6_1.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= raw_data["variables"]
prior_net_structure = raw_data["dyn.str"]
self.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
self.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
known_edges = self.s1.structure.edges[0:2]
se1 = StructureScoreBasedEstimator(self.s1,known_edges=known_edges)
edges = se1.estimate_structure(
max_parents = 3,
iterations_number = 100,
patience = 40,
optimizer = 'hill',
disable_multiprocessing=True
)
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
n_added_fake_edges = len(edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
self.assertGreaterEqual(precision,0.75)
self.assertGreaterEqual(recall,0.75)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -13,14 +13,11 @@ from line_profiler import LineProfiler
import copy import copy
import json import json
import utility.cache as ch from ...PyCTBN.utility.cache import Cache
import structure_graph.sample_path as sp from ...PyCTBN.structure_graph.sample_path import SamplePath
import estimators.structure_score_based_estimator as se from ...PyCTBN.estimators.structure_score_based_estimator import StructureScoreBasedEstimator
import utility.json_importer as ji from ...PyCTBN.utility.json_importer import JsonImporter
import utility.sample_importer as si from ...PyCTBN.utility.sample_importer import SampleImporter
class TestTabuSearch(unittest.TestCase): class TestTabuSearch(unittest.TestCase):
@ -40,7 +37,7 @@ class TestTabuSearch(unittest.TestCase):
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"]) prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = si.SampleImporter( cls.importer = SampleImporter(
trajectory_list=trajectory_list, trajectory_list=trajectory_list,
variables=variables, variables=variables,
prior_net_structure=prior_net_structure prior_net_structure=prior_net_structure
@ -52,7 +49,7 @@ class TestTabuSearch(unittest.TestCase):
#cls.traj = cls.s1.concatenated_samples #cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj)) # print(len(cls.traj))
cls.s1 = sp.SamplePath(cls.importer) cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
#cls.s1.clear_memory() #cls.s1.clear_memory()
@ -63,21 +60,79 @@ class TestTabuSearch(unittest.TestCase):
true_edges = copy.deepcopy(self.s1.structure.edges) true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges)) true_edges = set(map(tuple, true_edges))
se1 = se.StructureScoreBasedEstimator(self.s1) se1 = StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure( edges = se1.estimate_structure(
max_parents = None, max_parents = None,
iterations_number = 100, iterations_number = 100,
patience = 20, patience = 20,
tabu_length = 10,
tabu_rules_duration = 10,
optimizer = 'tabu', optimizer = 'tabu',
disable_multiprocessing=False disable_multiprocessing=True
) )
self.assertEqual(edges, true_edges) self.assertEqual(edges, true_edges)
def test_structure_3(self):
with open("./PyCTBN/test_data/networks_and_trajectories_ternary_data_01_6_1.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= raw_data["variables"]
prior_net_structure = raw_data["dyn.str"]
self.importer = SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
self.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
self.s1 = SamplePath(self.importer)
self.s1.build_trajectories()
self.s1.build_structure()
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
known_edges = self.s1.structure.edges[0:2]
se1 = StructureScoreBasedEstimator(self.s1,known_edges=known_edges)
edges = se1.estimate_structure(
max_parents = 4,
iterations_number = 100,
patience = 40,
tabu_length = 3,
tabu_rules_duration = 3,
optimizer = 'tabu',
disable_multiprocessing=True
)
'calculate precision and recall'
n_missing_edges = 0
n_added_fake_edges = 0
n_added_fake_edges = len(edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(edges))
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
self.assertGreaterEqual(precision,0.75)
self.assertGreaterEqual(recall,0.75)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -54,7 +54,8 @@ class TestSamplePath(unittest.TestCase):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0) importer.import_data(0)
s1 = SamplePath(importer) s1 = SamplePath(importer)
random.shuffle(importer._sorter) importer._sorter[0],importer._sorter[1]= importer._sorter[1],importer._sorter[0]
self.assertRaises(RuntimeError, s1.build_structure) self.assertRaises(RuntimeError, s1.build_structure)
def test_build_saplepath_no_prior_net_structure(self): def test_build_saplepath_no_prior_net_structure(self):
@ -66,7 +67,17 @@ class TestSamplePath(unittest.TestCase):
s1.build_structure() s1.build_structure()
self.assertFalse(s1.structure.edges) self.assertFalse(s1.structure.edges)
def test_buid_samplepath_no_variables(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
importer._df_variables = None
self.assertRaises(RuntimeError, SamplePath, importer)
def test_buid_samplepath_no_concatenated_samples(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
importer._concatenated_samples = None
self.assertRaises(RuntimeError, SamplePath, importer)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -28,6 +28,15 @@ class TestStructure(unittest.TestCase):
for indx, var in enumerate(self.labels): for indx, var in enumerate(self.labels):
self.assertEqual(var, s1.get_node_id(indx)) self.assertEqual(var, s1.get_node_id(indx))
def test_edges_operations(self):
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
self.assertTrue(s1.contains_edge(('X','Z')))
s1.add_edge(('Z','X'))
self.assertTrue(s1.contains_edge(('Z','X')))
s1.remove_edge(('Z','X'))
self.assertFalse(s1.contains_edge(('Z','X')))
def test_get_node_indx(self): def test_get_node_indx(self):
l2 = self.labels[:] l2 = self.labels[:]
l2.remove('Y') l2.remove('Y')
@ -71,6 +80,7 @@ class TestStructure(unittest.TestCase):
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
s2 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) s2 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)
self.assertEqual(s1, s2) self.assertEqual(s1, s2)
self.assertNotEqual(s1,4)
def test_repr(self): def test_repr(self):
s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb) s1 = Structure(self.labels, self.indxs, self.vals, self.edges, self.vars_numb)

@ -2,6 +2,7 @@
import unittest import unittest
import numpy as np import numpy as np
import glob import glob
import os
from ...PyCTBN.structure_graph.trajectory import Trajectory from ...PyCTBN.structure_graph.trajectory import Trajectory
from ...PyCTBN.utility.json_importer import JsonImporter from ...PyCTBN.utility.json_importer import JsonImporter
@ -10,7 +11,7 @@ class TestTrajectory(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./test_data', "*.json")) cls.read_files = glob.glob(os.path.join('./PyCTBN/test_data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name') cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0) cls.importer.import_data(0)
@ -22,6 +23,8 @@ class TestTrajectory(unittest.TestCase):
self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:, 1: len(self.importer.sorter) + 1], t1.trajectory)) self.assertTrue(np.array_equal(self.importer.concatenated_samples.iloc[:, 1: len(self.importer.sorter) + 1], t1.trajectory))
self.assertEqual(len(self.importer.sorter) + 1, t1._original_cols_number) self.assertEqual(len(self.importer.sorter) + 1, t1._original_cols_number)
self.assertEqual(self.importer.concatenated_samples.iloc[:,1:].to_numpy().shape[0], t1.size())
print(t1)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -152,7 +152,7 @@ class TestJsonImporter(unittest.TestCase):
def test_file_path(self): def test_file_path(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertEqual(j1.file_path, "./PyCTBN/data/networks_and_trajectories_binary_data_01_3.json") self.assertEqual(j1.file_path, "./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json")
def test_import_data(self): def test_import_data(self):
j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') j1 = JsonImporter("./PyCTBN/test_data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')

@ -33,13 +33,15 @@ class TestSampleImporter(unittest.TestCase):
prior_net_structure=self.prior_net_structure prior_net_structure=self.prior_net_structure
) )
sample_importer.import_data() sample_importer.import_data(['X','Y','Z'])
s1 = SamplePath(sample_importer) s1 = SamplePath(sample_importer)
s1.build_trajectories() s1.build_trajectories()
s1.build_structure() s1.build_structure()
s1.clear_memory() s1.clear_memory()
data_id= sample_importer.dataset_id()
self.assertEqual(data_id,"")
self.assertEqual(len(s1._importer._df_samples_list), 300) self.assertEqual(len(s1._importer._df_samples_list), 300)
self.assertIsInstance(s1._importer._df_samples_list,list) self.assertIsInstance(s1._importer._df_samples_list,list)
self.assertIsInstance(s1._importer._df_samples_list[0],pd.DataFrame) self.assertIsInstance(s1._importer._df_samples_list[0],pd.DataFrame)

@ -1 +0,0 @@
# CTBN_Project

@ -0,0 +1,125 @@
PyCTBN
======
A Continuous Time Bayesian Networks Library
Installation/Usage
*******************
Download the release in .tar.gz or .whl format and simply use pip install to install it::
$ pip install PyCTBN-1.0.tar.gz
Documentation
*************
Please refer to https://philipmartini.github.io/PyCTBN/ for the full project documentation.
Implementing your own data importer
***********************************
.. code-block:: python
"""This example demonstrates the implementation of a simple data importer the extends the class abstract importer to import data in csv format.
The net in exam has three ternary nodes and no prior net structure.
"""
from PyCTBN import AbstractImporter
class CSVImporter(AbstractImporter):
def __init__(self, file_path):
self._df_samples_list = None
super(CSVImporter, self).__init__(file_path)
def import_data(self):
self.read_csv_file()
self._sorter = self.build_sorter(self._df_samples_list[0])
self.import_variables()
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
def read_csv_file(self):
df = pd.read_csv(self._file_path)
df.drop(df.columns[[0]], axis=1, inplace=True)
self._df_samples_list = [df]
def import_variables(self):
values_list = [3 for var in self._sorter]
# initialize dict of lists
data = {'Name':self._sorter, 'Value':values_list}
# Create the pandas DataFrame
self._df_variables = pd.DataFrame(data)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
return list(sample_frame.columns)[1:]
def dataset_id(self) -> object:
pass
Parameters Estimation Example
*****************************
.. code-block:: python
from PyCTBN import JsonImporter
from PyCTBN import SamplePath
from PyCTBN import NetworkGraph
from PyCTBN import ParametersEstimator
def main():
read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir
#import data
importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
#Create a SamplePath Obj passing an already filled AbstractImporter object
s1 = SamplePath(importer)
#Build The trajectries and the structural infos
s1.build_trajectories()
s1.build_structure()
print(s1.structure.edges)
print(s1.structure.nodes_values)
#From The Structure Object build the Graph
g = NetworkGraph(s1.structure)
#Select a node you want to estimate the parameters
node = g.nodes[2]
print("Node", node)
#Init the _graph specifically for THIS node
g.fast_init(node)
#Use SamplePath and Grpah to create a ParametersEstimator Object
p1 = ParametersEstimator(s1.trajectories, g)
#Init the peEst specifically for THIS node
p1.fast_init(node)
#Compute the parameters
sofc1 = p1.compute_parameters_for_node(node)
#The est CIMS are inside the resultant SetOfCIms Obj
print(sofc1.actual_cims)
Structure Estimation Example
****************************
.. code-block:: python
from PyCTBN import JsonImporter
from PyCTBN import SamplePath
from PyCTBN import StructureEstimator
def structure_estimation_example():
# read the json files in ./data path
read_files = glob.glob(os.path.join('./data', "*.json"))
# initialize a JsonImporter object for the first file
importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
# import the data at index 0 of the outer json array
importer.import_data(0)
# construct a SamplePath Object passing a filled AbstractImporter
s1 = SamplePath(importer)
# build the trajectories
s1.build_trajectories()
# build the real structure
s1.build_structure()
# construct a StructureEstimator object
se1 = StructureEstimator(s1, 0.1, 0.1)
# call the ctpc algorithm
se1.ctpc_algorithm()
# the adjacency matrix of the estimated structure
print(se1.adjacency_matrix())
# save results to a json file
se1.save_results()

@ -0,0 +1,41 @@
import glob
import os
import sys
sys.path.append("./PyCTBN/")
import structure_graph.network_graph as ng
import structure_graph.sample_path as sp
import structure_graph.set_of_cims as sofc
import estimators.parameters_estimator as pe
import utility.json_importer as ji
def main():
read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir
#import data
importer = ji.JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
#Create a SamplePath Obj
s1 = sp.SamplePath(importer)
#Build The trajectries and the structural infos
s1.build_trajectories()
s1.build_structure()
#From The Structure Object build the Graph
g = ng.NetworkGraph(s1.structure)
#Select a node you want to estimate the parameters
node = g.nodes[1]
#Init the graph specifically for THIS node
g.fast_init(node)
#Use SamplePath and Grpah to create a ParametersEstimator Object
p1 = pe.ParametersEstimator(s1, g)
#Init the peEst specifically for THIS node
p1.fast_init(node)
#Compute the parameters
sofc1 = p1.compute_parameters_for_node(node)
#The est CIMS are inside the resultant SetOfCIms Obj
print(sofc1.actual_cims)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

@ -1,35 +1,35 @@
@ECHO OFF @ECHO OFF
pushd %~dp0 pushd %~dp0
REM Command file for Sphinx documentation REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" ( if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build set SPHINXBUILD=sphinx-build
) )
set SOURCEDIR=. set SOURCEDIR=.
set BUILDDIR=_build set BUILDDIR=_build
if "%1" == "" goto help if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL %SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 ( if errorlevel 9009 (
echo. echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH. echo.may add the Sphinx directory to PATH.
echo. echo.
echo.If you don't have Sphinx installed, grab it from echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/ echo.http://sphinx-doc.org/
exit /b 1 exit /b 1
) )
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end goto end
:help :help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end :end
popd popd

@ -0,0 +1,22 @@
from setuptools import setup, find_packages
setup(name='PyCTBN',
version='1.0',
url='https://github.com/philipMartini/PyCTBN',
license='MIT',
author=['Alessandro Bregoli', 'Filippo Martini','Luca Moretti'],
author_email=['a.bregoli1@campus.unimib.it', 'f.martini@campus.unimib.it','lucamoretti96@gmail.com'],
description='A Continuous Time Bayesian Networks Library',
packages=find_packages(exclude=['*test*','test_data','tests','PyCTBN.tests','PyCTBN.test_data']),
exclude_package_data={'': ['*test*','test_data','tests','PyCTBN.tests','PyCTBN.test_data']},
#packages=['PyCTBN.PyCTBN'],
install_requires=[
'numpy', 'pandas', 'networkx', 'scipy', 'matplotlib', 'tqdm'],
dependency_links=['https://github.com/numpy/numpy', 'https://github.com/pandas-dev/pandas',
'https://github.com/networkx/networkx', 'https://github.com/scipy/scipy',
'https://github.com/tqdm/tqdm'],
#long_description=open('../README.md').read(),
zip_safe=False,
include_package_data=True,
python_requires='>=3.6')