1
0
Fork 0

Added prior knowledge

master
Luca Moretti 4 years ago
parent 4220c81970
commit 2d476188b5
  1. 71
      main_package/classes/estimators/structure_constraint_based_estimator.py
  2. 141
      main_package/classes/estimators/structure_estimator.py
  3. 10
      main_package/classes/estimators/structure_score_based_estimator.py
  4. 6
      main_package/classes/optimizers/constraint_based_optimizer.py
  5. 25
      main_package/classes/optimizers/hill_climbing_search.py
  6. 24
      main_package/classes/optimizers/tabu_search.py
  7. 33
      main_package/tests/estimators/test_structure_constraint_based_estimator.py
  8. 35
      main_package/tests/estimators/test_structure_score_based_estimator.py
  9. 13
      main_package/tests/results/results.csv

@ -36,28 +36,30 @@ class StructureConstraintBasedEstimator(se.StructureEstimator):
:chi_test_alfa: the significance level for the chi Hp test
"""
def __init__(self, sample_path: sp.SamplePath, exp_test_alfa: float, chi_test_alfa: float):
super().__init__(sample_path)
def __init__(self, sample_path: sp.SamplePath, exp_test_alfa: float, chi_test_alfa: float,known_edges: typing.List= []):
super().__init__(sample_path,known_edges)
self.exp_test_sign = exp_test_alfa
self.chi_test_alfa = chi_test_alfa
def complete_test(self, test_parent: str, test_child: str, parent_set: typing.List, child_states_numb: int,
tot_vars_count: int):
"""
Permorms a complete independence test on the directed graphs G1 = test_child U parent_set
G2 = G1 U test_parent (added as an additional parent of the test_child).
"""Performs a complete independence test on the directed graphs G1 = {test_child U parent_set}
G2 = {G1 U test_parent} (added as an additional parent of the test_child).
Generates all the necessary structures and datas to perform the tests.
Parameters:
test_parent: the node label of the test parent
test_child: the node label of the child
parent_set: the common parent set
child_states_numb: the cardinality of the test_child
tot_vars_count_ the total number of variables in the net
Returns:
True iff test_child and test_parent are independent given the sep_set parent_set
False otherwise
:param test_parent: the node label of the test parent
:type test_parent: string
:param test_child: the node label of the child
:type test_child: string
:param parent_set: the common parent set
:type parent_set: List
:param child_states_numb: the cardinality of the ``test_child``
:type child_states_numb: int
:param tot_vars_count: the total number of variables in the net
:type tot_vars_count: int
:return: True iff test_child and test_parent are independent given the sep_set parent_set. False otherwise
:rtype: bool
"""
#print("Test Parent:", test_parent)
#print("Sep Set", parent_set)
@ -92,7 +94,7 @@ class StructureConstraintBasedEstimator(se.StructureEstimator):
s1 = st.Structure(l1, indxs1, vals1, eds1, tot_vars_count)
g1 = ng.NetworkGraph(s1)
g1.fast_init(test_child)
p1 = pe.ParametersEstimator(self.sample_path, g1)
p1 = pe.ParametersEstimator(self._sample_path, g1)
p1.fast_init(test_child)
sofc1 = p1.compute_parameters_for_node(test_child)
#if not p_set:
@ -125,7 +127,7 @@ class StructureConstraintBasedEstimator(se.StructureEstimator):
s2 = st.Structure(l2, indxs2, vals2, eds2, tot_vars_count)
g2 = ng.NetworkGraph(s2)
g2.fast_init(test_child)
p2 = pe.ParametersEstimator(self.sample_path, g2)
p2 = pe.ParametersEstimator(self._sample_path, g2)
p2.fast_init(test_child)
sofc2 = p2.compute_parameters_for_node(test_child)
self.cache.put(set(p_set), sofc2)
@ -146,19 +148,18 @@ class StructureConstraintBasedEstimator(se.StructureEstimator):
def independence_test(self, child_states_numb: int, cim1: condim.ConditionalIntensityMatrix,
cim2: condim.ConditionalIntensityMatrix):
"""
Compute the actual independence test using two cims.
"""Compute the actual independence test using two cims.
It is performed first the exponential test and if the null hypothesis is not rejected,
it is permormed also the chi_test.
Parameters:
child_states_numb: the cardinality of the test child
cim1: a cim belonging to the graph without test parent
cim2: a cim belonging to the graph with test parent
Returns:
True iff both tests do NOT reject the null hypothesis of indipendence
False otherwise
it is performed also the chi_test.
:param child_states_numb: the cardinality of the test child
:type child_states_numb: int
:param cim1: a cim belonging to the graph without test parent
:type cim1: ConditionalIntensityMatrix
:param cim2: a cim belonging to the graph with test parent
:type cim2: ConditionalIntensityMatrix
:return: True iff both tests do NOT reject the null hypothesis of independence. False otherwise.
:rtype: bool
"""
M1 = cim1.state_transition_matrix
M2 = cim2.state_transition_matrix
@ -202,14 +203,10 @@ class StructureConstraintBasedEstimator(se.StructureEstimator):
return True
def one_iteration_of_CTPC_algorithm(self, var_id: str, tot_vars_count: int):
"""
Performs an iteration of the CTPC algorithm using the node var_id as test_child.
"""Performs an iteration of the CTPC algorithm using the node ``var_id`` as ``test_child``.
Parameters:
var_id: the node label of the test child
tot_vars_count: the number of nodes in the net
Returns:
void
:param var_id: the node label of the test child
:type var_id: string
"""
optimizer_obj = optimizer.ConstraintBasedOptimizer(
node_id = var_id,
@ -227,7 +224,7 @@ class StructureConstraintBasedEstimator(se.StructureEstimator):
void
"""
ctpc_algo = self.one_iteration_of_CTPC_algorithm
total_vars_numb = self.sample_path.total_variables_count
total_vars_numb = self._sample_path.total_variables_count
n_nodes= len(self.nodes)
@ -239,7 +236,7 @@ class StructureConstraintBasedEstimator(se.StructureEstimator):
'Remove all the edges from the structure'
self.sample_path.structure.clean_structure_edges()
self._sample_path.structure.clean_structure_edges()
'Estimate the best parents for each node'
#with multiprocessing.Pool(processes=cpu_count) as pool:

@ -33,22 +33,39 @@ class StructureEstimator(ABC):
:cache: the cache object
"""
def __init__(self, sample_path: sp.SamplePath):
self.sample_path = sample_path
self.nodes = np.array(self.sample_path.structure.nodes_labels)
self.nodes_vals = self.sample_path.structure.nodes_values
self.nodes_indxs = self.sample_path.structure.nodes_indexes
self.complete_graph = self.build_complete_graph(self.sample_path.structure.nodes_labels)
def __init__(self, sample_path: sp.SamplePath, known_edges: typing.List = None):
self._sample_path = sample_path
self.nodes = np.array(self._sample_path.structure.nodes_labels)
self.nodes_vals = self._sample_path.structure.nodes_values
self.nodes_indxs = self._sample_path.structure.nodes_indexes
self._removable_edges_matrix = self.build_removable_edges_matrix(known_edges)
self.complete_graph = self.build_complete_graph(self._sample_path.structure.nodes_labels)
self.cache = ch.Cache()
def build_complete_graph(self, node_ids: typing.List):
def build_removable_edges_matrix(self, known_edges: typing.List):
"""Builds a boolean matrix who shows if a edge could be removed or not, based on prior knowledge given:
:param known_edges: the list of nodes labels
:type known_edges: List
:return: a boolean matrix
:rtype: np.ndarray
"""
Builds a complete directed graph (no self loops) given the nodes labels in the list node_ids:
tot_vars_count = self._sample_path.total_variables_count
complete_adj_matrix = np.full((tot_vars_count, tot_vars_count), True)
if known_edges:
for edge in known_edges:
i = self._sample_path.structure.get_node_indx(edge[0])
j = self._sample_path.structure.get_node_indx(edge[1])
complete_adj_matrix[i][j] = False
return complete_adj_matrix
def build_complete_graph(self, node_ids: typing.List):
"""Builds a complete directed graph (no self loops) given the nodes labels in the list ``node_ids``:
Parameters:
node_ids: the list of nodes labels
Returns:
a complete Digraph Object
:param node_ids: the list of nodes labels
:type node_ids: List
:return: a complete Digraph Object
:rtype: networkx.DiGraph
"""
complete_graph = nx.DiGraph()
complete_graph.add_nodes_from(node_ids)
@ -57,33 +74,28 @@ class StructureEstimator(ABC):
def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str):
"""
Creates a list containing all possible subsets of the list u of size size,
that do not contains a the node identified by parent_label.
Parameters:
u: the list of nodes
size: the size of the subsets
parent_label: the nodes to exclude in the subsets generation
Returns:
a Map Object containing a list of lists
"""Creates a list containing all possible subsets of the list ``u`` of size ``size``,
that do not contains a the node identified by ``parent_label``.
:param u: the list of nodes
:type u: List
:param size: the size of the subsets
:type size: int
:param parent_label: the node to exclude in the subsets generation
:type parent_label: string
:return: an Iterator Object containing a list of lists
:rtype: Iterator
"""
list_without_test_parent = u[:]
list_without_test_parent.remove(parent_label)
return map(list, itertools.combinations(list_without_test_parent, size))
def save_results(self):
"""
Save the estimated Structure to a .json file
Parameters:
void
Returns:
void
"""Save the estimated Structure to a .json file in the path where the data are loaded from.
The file is named as the input dataset but the `results_` word is appended to the results file.
"""
res = json_graph.node_link_data(self.complete_graph)
name = self.sample_path.importer.file_path.rsplit('/',1)[-1]
name = self._sample_path.importer.file_path.rsplit('/',1)[-1]
#print(name)
name = '../results_' + name
with open(name, 'w+') as f:
@ -99,14 +111,71 @@ class StructureEstimator(ABC):
@abc.abstractmethod
def estimate_structure(self) -> typing.List:
"""Abstract method to estimate the structure
:return: List of estimated edges
:rtype: Typing.List
"""
Compute Optimization process for a structure_estimator
pass
Parameters:
Returns:
the estimated structure for the node
def adjacency_matrix(self) -> np.ndarray:
"""Converts the estimated structure ``_complete_graph`` to a boolean adjacency matrix representation.
:return: The adjacency matrix of the graph ``_complete_graph``
:rtype: numpy.ndArray
"""
pass
return nx.adj_matrix(self._complete_graph).toarray().astype(bool)
def spurious_edges(self) -> typing.List:
"""Return the spurious edges present in the estimated structure, if a prior net structure is present in
``_sample_path.structure``.
:return: A list containing the spurious edges
:rtype: List
"""
if not self._sample_path.has_prior_net_structure:
raise RuntimeError("Can not compute spurious edges with no prior net structure!")
real_graph = nx.DiGraph()
real_graph.add_nodes_from(self._sample_path.structure.nodes_labels)
real_graph.add_edges_from(self._sample_path.structure.edges)
return nx.difference(real_graph, self._complete_graph).edges
def save_plot_estimated_structure_graph(self) -> None:
"""Plot the estimated structure in a graphical model style.
Spurious edges are colored in red.
"""
graph_to_draw = nx.DiGraph()
spurious_edges = self.spurious_edges()
non_spurious_edges = list(set(self._complete_graph.edges) - set(spurious_edges))
print(non_spurious_edges)
edges_colors = ['red' if edge in spurious_edges else 'black' for edge in self._complete_graph.edges]
graph_to_draw.add_edges_from(spurious_edges)
graph_to_draw.add_edges_from(non_spurious_edges)
pos = nx.spring_layout(graph_to_draw, k=0.5*1/np.sqrt(len(graph_to_draw.nodes())), iterations=50,scale=10)
options = {
"node_size": 2000,
"node_color": "white",
"edgecolors": "black",
'linewidths':2,
"with_labels":True,
"font_size":13,
'connectionstyle': 'arc3, rad = 0.1',
"arrowsize": 15,
"arrowstyle": '<|-',
"width": 1,
"edge_color":edges_colors,
}
nx.draw(graph_to_draw, pos, **options)
ax = plt.gca()
ax.margins(0.20)
plt.axis("off")
name = self._sample_path._importer.file_path.rsplit('/', 1)[-1]
name = name.split('.', 1)[0]
name += '_' + str(self._sample_path._importer.dataset_id())
name += '.png'
plt.savefig(name)
plt.clf()
print("Estimated Structure Plot Saved At: ", os.path.abspath(name))

@ -44,8 +44,8 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
"""
def __init__(self, sample_path: sp.SamplePath, tau_xu:int=0.1, alpha_xu:int = 1):
super().__init__(sample_path)
def __init__(self, sample_path: sp.SamplePath, tau_xu:int=0.1, alpha_xu:int = 1,known_edges: typing.List= []):
super().__init__(sample_path,known_edges)
self.tau_xu=tau_xu
self.alpha_xu=alpha_xu
@ -70,11 +70,11 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
"""
'Save the true edges structure in tuples'
true_edges = copy.deepcopy(self.sample_path.structure.edges)
true_edges = copy.deepcopy(self._sample_path.structure.edges)
true_edges = set(map(tuple, true_edges))
'Remove all the edges from the structure'
self.sample_path.structure.clean_structure_edges()
self._sample_path.structure.clean_structure_edges()
estimate_parents = self.estimate_parents
@ -208,7 +208,7 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
'inizialize the graph for a single node'
graph.fast_init(node_id)
params_estimation = pe.ParametersEstimator(self.sample_path, graph)
params_estimation = pe.ParametersEstimator(self._sample_path, graph)
'Inizialize and compute parameters for node'
params_estimation.fast_init(node_id)

@ -51,9 +51,9 @@ class ConstraintBasedOptimizer(Optimizer):
"""
print("##################TESTING VAR################", self.node_id)
graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure)
graph = ng.NetworkGraph(self.structure_estimator._sample_path.structure)
other_nodes = [node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id]
other_nodes = [node for node in self.structure_estimator._sample_path.structure.nodes_labels if node != self.node_id]
for possible_parent in other_nodes:
graph.add_edges([(possible_parent,self.node_id)])
@ -63,7 +63,7 @@ class ConstraintBasedOptimizer(Optimizer):
#tests_parents_numb = len(u)
#complete_frame = self.complete_graph_frame
#test_frame = complete_frame.loc[complete_frame['To'].isin([self.node_id])]
child_states_numb = self.structure_estimator.sample_path.structure.get_states_number(self.node_id)
child_states_numb = self.structure_estimator._sample_path.structure.get_states_number(self.node_id)
b = 0
while b < len(u):
parent_indx = 0

@ -61,10 +61,31 @@ class HillClimbing(Optimizer):
"""
#'Create the graph for the single node'
graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure)
graph = ng.NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = [node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)]
other_nodes = [node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id]
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
patince_count = 0

@ -68,10 +68,30 @@ class TabuSearch(Optimizer):
print(f"tabu search is processing the structure of {self.node_id}")
'Create the graph for the single node'
graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure)
graph = ng.NetworkGraph(self.structure_estimator._sample_path.structure)
'get the index for the current node'
node_index = self.structure_estimator._sample_path._structure.get_node_indx(self.node_id)
'list of prior edges'
prior_parents = set()
'Add the edges from prior knowledge'
for i in range(len(self.structure_estimator._removable_edges_matrix)):
if not self.structure_estimator._removable_edges_matrix[i][node_index]:
parent_id= self.structure_estimator._sample_path._structure.get_node_id(i)
prior_parents.add(parent_id)
'Add the node to the starting structure'
graph.add_edges([(parent_id, self.node_id)])
'get all the possible parents'
other_nodes = set([node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id])
other_nodes = set([node for node in
self.structure_estimator._sample_path.structure.nodes_labels if
node != self.node_id and
not prior_parents.__contains__(node)])
'calculate the score for the node without parents'
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)

@ -10,12 +10,13 @@ import numpy as np
import psutil
from line_profiler import LineProfiler
import json
import pandas as pd
import utility.cache as ch
import structure_graph.sample_path as sp
import estimators.structure_constraint_based_estimator as se
import utility.json_importer as ji
from multiprocessing import set_start_method
import utility.sample_importer as si
import copy
@ -23,8 +24,29 @@ import copy
class TestStructureConstraintBasedEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_15.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name',1)
with open("../../data/networks_and_trajectories_ternary_data_3.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = si.SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
cls.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
@ -33,7 +55,6 @@ class TestStructureConstraintBasedEstimator(unittest.TestCase):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
set_start_method("spawn")
se1 = se.StructureConstraintBasedEstimator(self.s1,0.1,0.1)
edges = se1.estimate_structure(disable_multiprocessing=False)

@ -14,7 +14,11 @@ import copy
import utility.cache as ch
import structure_graph.sample_path as sp
import estimators.structure_score_based_estimator as se
import utility.json_importer as ji
import utility.sample_importer as si
import json
import pandas as pd
@ -22,8 +26,29 @@ class TestStructureScoreBasedEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_binary_data_15.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
with open("../../data/networks_and_trajectories_binary_data_01_6.json") as f:
raw_data = json.load(f)
trajectory_list_raw= raw_data[0]["samples"]
trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
variables= pd.DataFrame(raw_data[0]["variables"])
prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
cls.importer = si.SampleImporter(
trajectory_list=trajectory_list,
variables=variables,
prior_net_structure=prior_net_structure
)
cls.importer.import_data()
#cls.s1 = sp.SamplePath(cls.importer)
#cls.traj = cls.s1.concatenated_samples
# print(len(cls.traj))
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
@ -35,14 +60,14 @@ class TestStructureScoreBasedEstimator(unittest.TestCase):
true_edges = set(map(tuple, true_edges))
se1 = se.StructureScoreBasedEstimator(self.s1)
se1 = se.StructureScoreBasedEstimator(self.s1,known_edges = [('X','Q')])
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = 35,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'tabu',
optimizer = 'hill',
disable_multiprocessing=True
)

@ -1 +1,14 @@
Time,Type,Variables,Density_Network,Cardinality,Index,F1,Precision,Recall
4.19,
4.078,
4.368,
6.024,
8.198,
10.586,
10.589,
10.447,
10.516,
8.335,
11.243,
11.78,
1 Time Time,Type,Variables,Density_Network,Cardinality,Index,F1,Precision,Recall Type Variables Density_Network Cardinality Index F1 Precision Recall
2 4.19,
3 4.078,
4 4.368,
5 6.024,
6 8.198,
7 10.586,
8 10.589,
9 10.447,
10 10.516,
11 8.335,
12 11.243,
13 11.78,
14