1
0
Fork 0

Added a first implementation of tabu search and created the optimizer interface

master
Luca Moretti 4 years ago
parent 96a93957e6
commit 3a80bd3ce9
  1. 2
      main_package/classes/estimators/fam_score_calculator.py
  2. 138
      main_package/classes/estimators/structure_score_based_estimator.py
  3. 112
      main_package/classes/optimizers/hill_climbing_search.py
  4. 39
      main_package/classes/optimizers/optimizer.py
  5. 161
      main_package/classes/optimizers/tabu_search.py
  6. 52
      main_package/tests/optimizers/test_hill_climbing_search.py
  7. 54
      main_package/tests/optimizers/test_tabu_search.py

@ -110,7 +110,7 @@ class FamScoreCalculator:
+ \
np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
cim.state_transition_matrix[index,index_x_first],
alpha_xxu)
alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
for index_x_first in values])

@ -19,6 +19,8 @@ import estimators.structure_estimator as se
import structure_graph.sample_path as sp
import structure_graph.structure as st
import estimators.fam_score_calculator as fam_score
import optimizers.hill_climbing_search as hill
import optimizers.tabu_search as tabu
from utility.decorators import timing
@ -48,7 +50,9 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
@timing
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, patience:int = None ):
def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
patience:int = None, tabu_length:int = None, tabu_rules_duration:int = 5,
optimizer: str = 'hill' ):
"""
Compute the score-based algorithm to find the optimal structure
@ -74,6 +78,9 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
l_max_parents= [max_parents] * n_nodes
l_iterations_number = [iterations_number] * n_nodes
l_patience = [patience] * n_nodes
l_tabu_length = [tabu_length] * n_nodes
l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
l_optimizer = [optimizer] * n_nodes
'get the number of CPU'
@ -83,10 +90,17 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
'Estimate the best parents for each node'
with multiprocessing.Pool(processes=cpu_count) as pool:
list_edges_partial = pool.starmap(estimate_parents, zip(self.nodes,l_max_parents,l_iterations_number,l_patience))
#list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience) for n in self.nodes]
list_edges_partial = pool.starmap(estimate_parents, zip(
self.nodes,
l_max_parents,
l_iterations_number,
l_patience,
l_tabu_length,
l_tabu_rules_duration,
l_optimizer))
# list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self.nodes]
#list_edges_partial = p.map(estimate_parents, self.nodes)
#list_edges_partial= estimate_parents('Y',max_parents,iterations_number,patience)
#list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer)
'Concatenate all the edges list'
set_list_edges = set(itertools.chain.from_iterable(list_edges_partial))
@ -98,39 +112,33 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
n_missing_edges = 0
n_added_fake_edges = 0
try:
n_added_fake_edges = len(set_list_edges.difference(true_edges))
n_added_fake_edges = len(set_list_edges.difference(true_edges))
n_missing_edges = len(true_edges.difference(set_list_edges))
n_missing_edges = len(true_edges.difference(set_list_edges))
n_true_positive = len(true_edges) - n_missing_edges
n_true_positive = len(true_edges) - n_missing_edges
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
precision = n_true_positive / (n_true_positive + n_added_fake_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
recall = n_true_positive / (n_true_positive + n_missing_edges)
# for estimate_edge in list_edges:
# if not estimate_edge in true_edges:
# n_added_fake_edges += 1
# for true_edge in true_edges:
# if not true_edge in list_edges:
# n_missing_edges += 1
# print(true_edge)
# print(f"n archi reali non trovati: {n_missing_edges}")
# print(f"n archi non reali aggiunti: {n_added_fake_edges}")
print(true_edges)
print(set_list_edges)
print(f"precision: {precision} ")
print(f"recall: {recall} ")
# print(f"n archi reali non trovati: {n_missing_edges}")
# print(f"n archi non reali aggiunti: {n_added_fake_edges}")
print(true_edges)
print(set_list_edges)
print(f"precision: {precision} ")
print(f"recall: {recall} ")
except Exception as e:
print(f"errore: {e}")
return set_list_edges
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, patience:int = 10 ):
def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5,
optimizer:str = 'hill' ):
"""
Use the FamScore of a node in order to find the best parent nodes
Parameters:
@ -138,61 +146,33 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
max_parents: maximum number of parents for each variable. If None, disabled
iterations_number: maximum number of optimization algorithm's iteration
patience: number of iteration without any improvement before to stop the search.If None, disabled
tabu_length: maximum lenght of the data structures used in the optimization process
tabu_rules_duration: number of iterations in which each rule keeps its value
optimzer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search)
Returns:
A list of the best edges for the currente node
"""
'Create the graph for the single node'
graph = ng.NetworkGraph(self.sample_path.structure)
other_nodes = [node for node in self.sample_path.structure.nodes_labels if node != node_id]
actual_best_score = self.get_score_from_graph(graph,node_id)
patince_count = 0
for i in range(iterations_number):
'choose a new random edge'
current_new_parent = choice(other_nodes)
current_edge = (current_new_parent,node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if max_parents is not None:
parents_list = graph.get_parents_by_id(node_id)
if len(parents_list) >= max_parents :
parent_removed = (choice(parents_list), node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.get_score_from_graph(graph,node_id)
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if patience is not None and patince_count > patience:
break
print(f"finito variabile: {node_id}")
return graph.edges
"choose the optimizer algotithm"
if optimizer == 'tabu':
optimizer = tabu.TabuSearch(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience,
tabu_length = tabu_length,
tabu_rules_duration = tabu_rules_duration)
else: #if optimizer == 'hill':
optimizer = hill.HillClimbing(
node_id = node_id,
structure_estimator = self,
max_parents = max_parents,
iterations_number = iterations_number,
patience = patience)
"call the optmizer's function that calculates the current node's parents"
return optimizer.optimize_structure()
def get_score_from_graph(self,graph: ng.NetworkGraph,node_id:str):

@ -0,0 +1,112 @@
import sys
sys.path.append('../')
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from random import choice
from abc import ABC
from optimizers.optimizer import Optimizer
from estimators import structure_estimator as se
import structure_graph.network_graph as ng
class HillClimbing(Optimizer):
"""
Optimizer class that implement Hill Climbing Search
"""
def __init__(self,
node_id:str,
structure_estimator: se.StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None
):
"""
Compute Optimization process for a structure_estimator
Parameters:
max_parents: maximum number of parents for each variable. If None, disabled
iterations_number: maximum number of optimization algorithm's iteration
patience: number of iteration without any improvement before to stop the search.If None, disabled
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator
Parameters:
Returns:
the estimated structure for the node
"""
#'Create the graph for the single node'
graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure)
other_nodes = [node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id]
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
patince_count = 0
for i in range(self.iterations_number):
'choose a new random edge'
current_new_parent = choice(other_nodes)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,39 @@
import sys
sys.path.append('../')
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
import abc
from estimators import structure_estimator as se
class Optimizer(abc.ABC):
"""
Interface class for all the optimizer's child classes
"""
def __init__(self, node_id:str, structure_estimator: se.StructureEstimator):
self.node_id = node_id
self.structure_estimator = structure_estimator
@abc.abstractmethod
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator
Parameters:
Returns:
the estimated structure for the node
"""
pass

@ -0,0 +1,161 @@
import sys
sys.path.append('../')
import itertools
import json
import typing
import networkx as nx
import numpy as np
from networkx.readwrite import json_graph
from random import choice,sample
from abc import ABC
from optimizers.optimizer import Optimizer
from estimators import structure_estimator as se
import structure_graph.network_graph as ng
import queue
class TabuSearch(Optimizer):
"""
Optimizer class that implement Hill Climbing Search
"""
def __init__(self,
node_id:str,
structure_estimator: se.StructureEstimator,
max_parents:int = None,
iterations_number:int= 40,
patience:int = None,
tabu_length:int = 0,
tabu_rules_duration = 5
):
"""
Compute Optimization process for a structure_estimator
Parameters:
max_parents: maximum number of parents for each variable. If None, disabled
iterations_number: maximum number of optimization algorithm's iteration
patience: number of iteration without any improvement before to stop the search.If None, disabled
tabu_length: maximum lenght of the data structures used in the optimization process
tabu_rules_duration: number of iterations in which each rule keeps its value
"""
super().__init__(node_id, structure_estimator)
self.max_parents = max_parents
self.iterations_number = iterations_number
self.patience = patience
self.tabu_length = tabu_length
self.tabu_rules_duration = tabu_rules_duration
def optimize_structure(self) -> typing.List:
"""
Compute Optimization process for a structure_estimator
Parameters:
Returns:
the estimated structure for the node
"""
print(f"tabu search is processing the structure of {self.node_id}")
#'Create the graph for the single node'
graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure)
other_nodes = set([node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id])
actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
tabu_set = set()
tabu_queue = queue.Queue()
patince_count = 0
tabu_count = 0
for i in range(self.iterations_number):
current_possible_nodes = other_nodes.difference(tabu_set)
'choose a new random edge according to tabu restiction'
if(len(current_possible_nodes) > 0):
current_new_parent = sample(current_possible_nodes,k=1)[0]
else:
current_new_parent = tabu_queue.get()
tabu_set.remove(current_new_parent)
current_edge = (current_new_parent,self.node_id)
added = False
parent_removed = None
if graph.has_edge(current_edge):
graph.remove_edges([current_edge])
else:
'check the max_parents constraint'
if self.max_parents is not None:
parents_list = graph.get_parents_by_id(self.node_id)
if len(parents_list) >= self.max_parents :
parent_removed = (choice(parents_list), self.node_id)
graph.remove_edges([parent_removed])
graph.add_edges([current_edge])
added = True
#print('**************************')
current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
# print("-------------------------------------------")
# print(f"Current new parent: {current_new_parent}")
# print(f"Current score: {current_score}")
# print(f"Current best score: {actual_best_score}")
# print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
# print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
# print(f"graph edges: {graph.edges}")
# print("-------------------------------------------")
# input()
if current_score > actual_best_score:
'update current best score'
actual_best_score = current_score
patince_count = 0
'update tabu list'
else:
'undo the last update'
if added:
graph.remove_edges([current_edge])
'If a parent was removed, add it again to the graph'
if parent_removed is not None:
graph.add_edges([parent_removed])
else:
graph.add_edges([current_edge])
'update patience count'
patince_count += 1
tabu_count += 1
if tabu_queue.qsize() >= self.tabu_length:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
'Add the node on the tabu list'
tabu_queue.put(current_new_parent)
tabu_set.add(current_new_parent)
if tabu_count % self.tabu_rules_duration == 0:
if tabu_queue.qsize() > 0:
current_removed = tabu_queue.get()
tabu_set.remove(current_removed)
tabu_count = 0
else:
tabu_count = 0
if self.patience is not None and patince_count > self.patience:
break
print(f"finito variabile: {self.node_id}")
return graph.edges

@ -0,0 +1,52 @@
import sys
sys.path.append("../../classes/")
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import psutil
from line_profiler import LineProfiler
import copy
import utility.cache as ch
import structure_graph.sample_path as sp
import estimators.structure_score_based_estimator as se
import utility.json_importer as ji
class TestHillClimbingSearch(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
def test_structure(self):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = se.StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 40,
patience = None,
optimizer = 'hill'
)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,54 @@
import sys
sys.path.append("../../classes/")
import glob
import math
import os
import unittest
import networkx as nx
import numpy as np
import psutil
from line_profiler import LineProfiler
import copy
import utility.cache as ch
import structure_graph.sample_path as sp
import estimators.structure_score_based_estimator as se
import utility.json_importer as ji
class TestTabuSearch(unittest.TestCase):
@classmethod
def setUpClass(cls):
#cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_04_10.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.s1 = sp.SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
def test_structure(self):
true_edges = copy.deepcopy(self.s1.structure.edges)
true_edges = set(map(tuple, true_edges))
se1 = se.StructureScoreBasedEstimator(self.s1)
edges = se1.estimate_structure(
max_parents = None,
iterations_number = 100,
patience = None,
tabu_length = 15,
tabu_rules_duration = 15,
optimizer = 'tabu'
)
self.assertEqual(edges, true_edges)
if __name__ == '__main__':
unittest.main()