From 3a80bd3ce937276b088e0b1f1549056d1eae8460 Mon Sep 17 00:00:00 2001 From: Luca Moretti Date: Tue, 3 Nov 2020 17:33:59 +0100 Subject: [PATCH] Added a first implementation of tabu search and created the optimizer interface --- .../estimators/fam_score_calculator.py | 2 +- .../structure_score_based_estimator.py | 138 +++++++-------- .../optimizers/hill_climbing_search.py | 112 ++++++++++++ main_package/classes/optimizers/optimizer.py | 39 +++++ .../classes/optimizers/tabu_search.py | 161 ++++++++++++++++++ .../optimizers/test_hill_climbing_search.py | 52 ++++++ .../tests/optimizers/test_tabu_search.py | 54 ++++++ 7 files changed, 478 insertions(+), 80 deletions(-) create mode 100644 main_package/classes/optimizers/hill_climbing_search.py create mode 100644 main_package/classes/optimizers/optimizer.py create mode 100644 main_package/classes/optimizers/tabu_search.py create mode 100644 main_package/tests/optimizers/test_hill_climbing_search.py create mode 100644 main_package/tests/optimizers/test_tabu_search.py diff --git a/main_package/classes/estimators/fam_score_calculator.py b/main_package/classes/estimators/fam_score_calculator.py index 5ac59c3..b764924 100644 --- a/main_package/classes/estimators/fam_score_calculator.py +++ b/main_package/classes/estimators/fam_score_calculator.py @@ -110,7 +110,7 @@ class FamScoreCalculator: + \ np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( cim.state_transition_matrix[index,index_x_first], - alpha_xxu) + alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index]) for index_x_first in values]) diff --git a/main_package/classes/estimators/structure_score_based_estimator.py b/main_package/classes/estimators/structure_score_based_estimator.py index d84112e..9c9ceec 100644 --- a/main_package/classes/estimators/structure_score_based_estimator.py +++ b/main_package/classes/estimators/structure_score_based_estimator.py @@ -19,6 +19,8 @@ import estimators.structure_estimator as se import structure_graph.sample_path as sp import structure_graph.structure as st import estimators.fam_score_calculator as fam_score +import optimizers.hill_climbing_search as hill +import optimizers.tabu_search as tabu from utility.decorators import timing @@ -48,7 +50,9 @@ class StructureScoreBasedEstimator(se.StructureEstimator): @timing - def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, patience:int = None ): + def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, + patience:int = None, tabu_length:int = None, tabu_rules_duration:int = 5, + optimizer: str = 'hill' ): """ Compute the score-based algorithm to find the optimal structure @@ -74,6 +78,9 @@ class StructureScoreBasedEstimator(se.StructureEstimator): l_max_parents= [max_parents] * n_nodes l_iterations_number = [iterations_number] * n_nodes l_patience = [patience] * n_nodes + l_tabu_length = [tabu_length] * n_nodes + l_tabu_rules_duration = [tabu_rules_duration] * n_nodes + l_optimizer = [optimizer] * n_nodes 'get the number of CPU' @@ -83,10 +90,17 @@ class StructureScoreBasedEstimator(se.StructureEstimator): 'Estimate the best parents for each node' with multiprocessing.Pool(processes=cpu_count) as pool: - list_edges_partial = pool.starmap(estimate_parents, zip(self.nodes,l_max_parents,l_iterations_number,l_patience)) - #list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience) for n in self.nodes] + list_edges_partial = pool.starmap(estimate_parents, zip( + self.nodes, + l_max_parents, + l_iterations_number, + l_patience, + l_tabu_length, + l_tabu_rules_duration, + l_optimizer)) + # list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self.nodes] #list_edges_partial = p.map(estimate_parents, self.nodes) - #list_edges_partial= estimate_parents('Y',max_parents,iterations_number,patience) + #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) 'Concatenate all the edges list' set_list_edges = set(itertools.chain.from_iterable(list_edges_partial)) @@ -98,39 +112,33 @@ class StructureScoreBasedEstimator(se.StructureEstimator): n_missing_edges = 0 n_added_fake_edges = 0 + try: + n_added_fake_edges = len(set_list_edges.difference(true_edges)) - n_added_fake_edges = len(set_list_edges.difference(true_edges)) + n_missing_edges = len(true_edges.difference(set_list_edges)) - n_missing_edges = len(true_edges.difference(set_list_edges)) + n_true_positive = len(true_edges) - n_missing_edges - n_true_positive = len(true_edges) - n_missing_edges + precision = n_true_positive / (n_true_positive + n_added_fake_edges) - precision = n_true_positive / (n_true_positive + n_added_fake_edges) + recall = n_true_positive / (n_true_positive + n_missing_edges) - recall = n_true_positive / (n_true_positive + n_missing_edges) - - # for estimate_edge in list_edges: - # if not estimate_edge in true_edges: - # n_added_fake_edges += 1 - - # for true_edge in true_edges: - # if not true_edge in list_edges: - # n_missing_edges += 1 - # print(true_edge) - - - # print(f"n archi reali non trovati: {n_missing_edges}") - # print(f"n archi non reali aggiunti: {n_added_fake_edges}") - print(true_edges) - print(set_list_edges) - print(f"precision: {precision} ") - print(f"recall: {recall} ") + # print(f"n archi reali non trovati: {n_missing_edges}") + # print(f"n archi non reali aggiunti: {n_added_fake_edges}") + print(true_edges) + print(set_list_edges) + print(f"precision: {precision} ") + print(f"recall: {recall} ") + except Exception as e: + print(f"errore: {e}") return set_list_edges - def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, patience:int = 10 ): + def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, + patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, + optimizer:str = 'hill' ): """ Use the FamScore of a node in order to find the best parent nodes Parameters: @@ -138,61 +146,33 @@ class StructureScoreBasedEstimator(se.StructureEstimator): max_parents: maximum number of parents for each variable. If None, disabled iterations_number: maximum number of optimization algorithm's iteration patience: number of iteration without any improvement before to stop the search.If None, disabled + tabu_length: maximum lenght of the data structures used in the optimization process + tabu_rules_duration: number of iterations in which each rule keeps its value + optimzer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search) Returns: A list of the best edges for the currente node """ - - 'Create the graph for the single node' - graph = ng.NetworkGraph(self.sample_path.structure) - - other_nodes = [node for node in self.sample_path.structure.nodes_labels if node != node_id] - actual_best_score = self.get_score_from_graph(graph,node_id) - - patince_count = 0 - for i in range(iterations_number): - 'choose a new random edge' - current_new_parent = choice(other_nodes) - current_edge = (current_new_parent,node_id) - added = False - parent_removed = None - - - if graph.has_edge(current_edge): - graph.remove_edges([current_edge]) - else: - 'check the max_parents constraint' - if max_parents is not None: - parents_list = graph.get_parents_by_id(node_id) - if len(parents_list) >= max_parents : - parent_removed = (choice(parents_list), node_id) - graph.remove_edges([parent_removed]) - graph.add_edges([current_edge]) - added = True - #print('**************************') - current_score = self.get_score_from_graph(graph,node_id) - - - if current_score > actual_best_score: - 'update current best score' - actual_best_score = current_score - patince_count = 0 - else: - 'undo the last update' - if added: - graph.remove_edges([current_edge]) - 'If a parent was removed, add it again to the graph' - if parent_removed is not None: - graph.add_edges([parent_removed]) - else: - graph.add_edges([current_edge]) - 'update patience count' - patince_count += 1 - - if patience is not None and patince_count > patience: - break - - print(f"finito variabile: {node_id}") - return graph.edges + + "choose the optimizer algotithm" + if optimizer == 'tabu': + optimizer = tabu.TabuSearch( + node_id = node_id, + structure_estimator = self, + max_parents = max_parents, + iterations_number = iterations_number, + patience = patience, + tabu_length = tabu_length, + tabu_rules_duration = tabu_rules_duration) + else: #if optimizer == 'hill': + optimizer = hill.HillClimbing( + node_id = node_id, + structure_estimator = self, + max_parents = max_parents, + iterations_number = iterations_number, + patience = patience) + + "call the optmizer's function that calculates the current node's parents" + return optimizer.optimize_structure() def get_score_from_graph(self,graph: ng.NetworkGraph,node_id:str): diff --git a/main_package/classes/optimizers/hill_climbing_search.py b/main_package/classes/optimizers/hill_climbing_search.py new file mode 100644 index 0000000..6e1cdc3 --- /dev/null +++ b/main_package/classes/optimizers/hill_climbing_search.py @@ -0,0 +1,112 @@ +import sys +sys.path.append('../') +import itertools +import json +import typing + +import networkx as nx +import numpy as np +from networkx.readwrite import json_graph + +from random import choice + +from abc import ABC + + +from optimizers.optimizer import Optimizer +from estimators import structure_estimator as se +import structure_graph.network_graph as ng + + +class HillClimbing(Optimizer): + """ + Optimizer class that implement Hill Climbing Search + + """ + def __init__(self, + node_id:str, + structure_estimator: se.StructureEstimator, + max_parents:int = None, + iterations_number:int= 40, + patience:int = None + ): + """ + Compute Optimization process for a structure_estimator + + Parameters: + max_parents: maximum number of parents for each variable. If None, disabled + iterations_number: maximum number of optimization algorithm's iteration + patience: number of iteration without any improvement before to stop the search.If None, disabled + + + """ + super().__init__(node_id, structure_estimator) + self.max_parents = max_parents + self.iterations_number = iterations_number + self.patience = patience + + + + def optimize_structure(self) -> typing.List: + """ + Compute Optimization process for a structure_estimator + + Parameters: + + Returns: + the estimated structure for the node + + """ + + #'Create the graph for the single node' + graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure) + + + other_nodes = [node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id] + actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) + + patince_count = 0 + for i in range(self.iterations_number): + 'choose a new random edge' + current_new_parent = choice(other_nodes) + current_edge = (current_new_parent,self.node_id) + added = False + parent_removed = None + + + if graph.has_edge(current_edge): + graph.remove_edges([current_edge]) + else: + 'check the max_parents constraint' + if self.max_parents is not None: + parents_list = graph.get_parents_by_id(self.node_id) + if len(parents_list) >= self.max_parents : + parent_removed = (choice(parents_list), self.node_id) + graph.remove_edges([parent_removed]) + graph.add_edges([current_edge]) + added = True + #print('**************************') + current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) + + + if current_score > actual_best_score: + 'update current best score' + actual_best_score = current_score + patince_count = 0 + else: + 'undo the last update' + if added: + graph.remove_edges([current_edge]) + 'If a parent was removed, add it again to the graph' + if parent_removed is not None: + graph.add_edges([parent_removed]) + else: + graph.add_edges([current_edge]) + 'update patience count' + patince_count += 1 + + if self.patience is not None and patince_count > self.patience: + break + + print(f"finito variabile: {self.node_id}") + return graph.edges \ No newline at end of file diff --git a/main_package/classes/optimizers/optimizer.py b/main_package/classes/optimizers/optimizer.py new file mode 100644 index 0000000..c586d2e --- /dev/null +++ b/main_package/classes/optimizers/optimizer.py @@ -0,0 +1,39 @@ +import sys +sys.path.append('../') +import itertools +import json +import typing + +import networkx as nx +import numpy as np +from networkx.readwrite import json_graph + +import abc + +from estimators import structure_estimator as se + + + +class Optimizer(abc.ABC): + """ + Interface class for all the optimizer's child classes + + """ + + def __init__(self, node_id:str, structure_estimator: se.StructureEstimator): + self.node_id = node_id + self.structure_estimator = structure_estimator + + + @abc.abstractmethod + def optimize_structure(self) -> typing.List: + """ + Compute Optimization process for a structure_estimator + + Parameters: + + Returns: + the estimated structure for the node + + """ + pass diff --git a/main_package/classes/optimizers/tabu_search.py b/main_package/classes/optimizers/tabu_search.py new file mode 100644 index 0000000..fdce6a2 --- /dev/null +++ b/main_package/classes/optimizers/tabu_search.py @@ -0,0 +1,161 @@ +import sys +sys.path.append('../') +import itertools +import json +import typing + +import networkx as nx +import numpy as np +from networkx.readwrite import json_graph + +from random import choice,sample + +from abc import ABC + + +from optimizers.optimizer import Optimizer +from estimators import structure_estimator as se +import structure_graph.network_graph as ng + +import queue + + +class TabuSearch(Optimizer): + """ + Optimizer class that implement Hill Climbing Search + + """ + def __init__(self, + node_id:str, + structure_estimator: se.StructureEstimator, + max_parents:int = None, + iterations_number:int= 40, + patience:int = None, + tabu_length:int = 0, + tabu_rules_duration = 5 + ): + """ + Compute Optimization process for a structure_estimator + + Parameters: + max_parents: maximum number of parents for each variable. If None, disabled + iterations_number: maximum number of optimization algorithm's iteration + patience: number of iteration without any improvement before to stop the search.If None, disabled + tabu_length: maximum lenght of the data structures used in the optimization process + tabu_rules_duration: number of iterations in which each rule keeps its value + + """ + super().__init__(node_id, structure_estimator) + self.max_parents = max_parents + self.iterations_number = iterations_number + self.patience = patience + self.tabu_length = tabu_length + self.tabu_rules_duration = tabu_rules_duration + + + def optimize_structure(self) -> typing.List: + """ + Compute Optimization process for a structure_estimator + + Parameters: + + Returns: + the estimated structure for the node + + """ + print(f"tabu search is processing the structure of {self.node_id}") + #'Create the graph for the single node' + graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure) + + + other_nodes = set([node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id]) + actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) + + tabu_set = set() + tabu_queue = queue.Queue() + + patince_count = 0 + tabu_count = 0 + for i in range(self.iterations_number): + + current_possible_nodes = other_nodes.difference(tabu_set) + + 'choose a new random edge according to tabu restiction' + if(len(current_possible_nodes) > 0): + current_new_parent = sample(current_possible_nodes,k=1)[0] + else: + current_new_parent = tabu_queue.get() + tabu_set.remove(current_new_parent) + + + + current_edge = (current_new_parent,self.node_id) + added = False + parent_removed = None + + if graph.has_edge(current_edge): + graph.remove_edges([current_edge]) + else: + 'check the max_parents constraint' + if self.max_parents is not None: + parents_list = graph.get_parents_by_id(self.node_id) + if len(parents_list) >= self.max_parents : + parent_removed = (choice(parents_list), self.node_id) + graph.remove_edges([parent_removed]) + graph.add_edges([current_edge]) + added = True + #print('**************************') + current_score = self.structure_estimator.get_score_from_graph(graph,self.node_id) + + + # print("-------------------------------------------") + # print(f"Current new parent: {current_new_parent}") + # print(f"Current score: {current_score}") + # print(f"Current best score: {actual_best_score}") + # print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}") + # print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}") + # print(f"graph edges: {graph.edges}") + + # print("-------------------------------------------") + # input() + if current_score > actual_best_score: + 'update current best score' + actual_best_score = current_score + patince_count = 0 + 'update tabu list' + + + else: + 'undo the last update' + if added: + graph.remove_edges([current_edge]) + 'If a parent was removed, add it again to the graph' + if parent_removed is not None: + graph.add_edges([parent_removed]) + else: + graph.add_edges([current_edge]) + 'update patience count' + patince_count += 1 + tabu_count += 1 + + if tabu_queue.qsize() >= self.tabu_length: + current_removed = tabu_queue.get() + tabu_set.remove(current_removed) + 'Add the node on the tabu list' + tabu_queue.put(current_new_parent) + tabu_set.add(current_new_parent) + + + if tabu_count % self.tabu_rules_duration == 0: + if tabu_queue.qsize() > 0: + current_removed = tabu_queue.get() + tabu_set.remove(current_removed) + tabu_count = 0 + else: + tabu_count = 0 + + if self.patience is not None and patince_count > self.patience: + break + + print(f"finito variabile: {self.node_id}") + return graph.edges \ No newline at end of file diff --git a/main_package/tests/optimizers/test_hill_climbing_search.py b/main_package/tests/optimizers/test_hill_climbing_search.py new file mode 100644 index 0000000..dfc87ef --- /dev/null +++ b/main_package/tests/optimizers/test_hill_climbing_search.py @@ -0,0 +1,52 @@ +import sys +sys.path.append("../../classes/") +import glob +import math +import os +import unittest + +import networkx as nx +import numpy as np +import psutil +from line_profiler import LineProfiler +import copy + +import utility.cache as ch +import structure_graph.sample_path as sp +import estimators.structure_score_based_estimator as se +import utility.json_importer as ji + + + +class TestHillClimbingSearch(unittest.TestCase): + + @classmethod + def setUpClass(cls): + #cls.read_files = glob.glob(os.path.join('../../data', "*.json")) + cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.s1 = sp.SamplePath(cls.importer) + cls.s1.build_trajectories() + cls.s1.build_structure() + + + + def test_structure(self): + true_edges = copy.deepcopy(self.s1.structure.edges) + true_edges = set(map(tuple, true_edges)) + + se1 = se.StructureScoreBasedEstimator(self.s1) + edges = se1.estimate_structure( + max_parents = None, + iterations_number = 40, + patience = None, + optimizer = 'hill' + ) + + + self.assertEqual(edges, true_edges) + + + +if __name__ == '__main__': + unittest.main() + diff --git a/main_package/tests/optimizers/test_tabu_search.py b/main_package/tests/optimizers/test_tabu_search.py new file mode 100644 index 0000000..4deb126 --- /dev/null +++ b/main_package/tests/optimizers/test_tabu_search.py @@ -0,0 +1,54 @@ +import sys +sys.path.append("../../classes/") +import glob +import math +import os +import unittest + +import networkx as nx +import numpy as np +import psutil +from line_profiler import LineProfiler +import copy + +import utility.cache as ch +import structure_graph.sample_path as sp +import estimators.structure_score_based_estimator as se +import utility.json_importer as ji + + + +class TestTabuSearch(unittest.TestCase): + + @classmethod + def setUpClass(cls): + #cls.read_files = glob.glob(os.path.join('../../data', "*.json")) + cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_04_10.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name') + cls.s1 = sp.SamplePath(cls.importer) + cls.s1.build_trajectories() + cls.s1.build_structure() + + + + def test_structure(self): + true_edges = copy.deepcopy(self.s1.structure.edges) + true_edges = set(map(tuple, true_edges)) + + se1 = se.StructureScoreBasedEstimator(self.s1) + edges = se1.estimate_structure( + max_parents = None, + iterations_number = 100, + patience = None, + tabu_length = 15, + tabu_rules_duration = 15, + optimizer = 'tabu' + ) + + + self.assertEqual(edges, true_edges) + + + +if __name__ == '__main__': + unittest.main() +