Added a first implementation of tabu search and created the optimizer interface

4 years ago · 3a80bd3ce9
parent 96a93957e6
commit 3a80bd3ce9
7 changed files with 478 additions and 80 deletions
--- a/main_package/classes/estimators/fam_score_calculator.py
+++ b/main_package/classes/estimators/fam_score_calculator.py
@ -110,7 +110,7 @@ class FamScoreCalculator:
                + \
                np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
                                                                        cim.state_transition_matrix[index,index_x_first],
-                                                                        alpha_xxu)
+                                                                        alpha_xu * cim.state_transition_matrix[index,index_x_first] / cim.state_transition_matrix[index, index])
                for index_x_first in values])


--- a/main_package/classes/estimators/structure_score_based_estimator.py
+++ b/main_package/classes/estimators/structure_score_based_estimator.py
@ -19,6 +19,8 @@ import estimators.structure_estimator as se
 import structure_graph.sample_path as sp
 import structure_graph.structure as st
 import estimators.fam_score_calculator as fam_score
+import optimizers.hill_climbing_search as hill
+import optimizers.tabu_search as tabu

 from utility.decorators import timing

@ -48,7 +50,9 @@ class StructureScoreBasedEstimator(se.StructureEstimator):


    @timing
-    def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, patience:int = None ):
+    def estimate_structure(self, max_parents:int = None, iterations_number:int= 40,
+                         patience:int = None, tabu_length:int = None, tabu_rules_duration:int = 5,
+                         optimizer: str = 'hill' ):
        """
        Compute the score-based algorithm to find the optimal structure

@ -74,6 +78,9 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
        l_max_parents= [max_parents] * n_nodes
        l_iterations_number = [iterations_number] * n_nodes
        l_patience = [patience] * n_nodes
+        l_tabu_length = [tabu_length] * n_nodes
+        l_tabu_rules_duration = [tabu_rules_duration] * n_nodes
+        l_optimizer = [optimizer] * n_nodes


        'get the number of CPU'
@ -83,10 +90,17 @@ class StructureScoreBasedEstimator(se.StructureEstimator):

        'Estimate the best parents for each node'
        with multiprocessing.Pool(processes=cpu_count) as pool:
-            list_edges_partial = pool.starmap(estimate_parents, zip(self.nodes,l_max_parents,l_iterations_number,l_patience))
-            #list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience) for n in self.nodes]
+            list_edges_partial = pool.starmap(estimate_parents, zip(
+                                                                self.nodes,
+                                                                l_max_parents,
+                                                                l_iterations_number,
+                                                                l_patience,
+                                                                l_tabu_length,
+                                                                l_tabu_rules_duration,
+                                                                l_optimizer))
+            # list_edges_partial = [estimate_parents(n,max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) for n in self.nodes]
            #list_edges_partial = p.map(estimate_parents, self.nodes)
-            #list_edges_partial= estimate_parents('Y',max_parents,iterations_number,patience) 
+            #list_edges_partial= estimate_parents('Q',max_parents,iterations_number,patience,tabu_length,tabu_rules_duration,optimizer) 

        'Concatenate all the edges list'
        set_list_edges =  set(itertools.chain.from_iterable(list_edges_partial))
@ -98,39 +112,33 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
        n_missing_edges = 0
        n_added_fake_edges = 0

+        try:
+            n_added_fake_edges = len(set_list_edges.difference(true_edges))

-        n_added_fake_edges = len(set_list_edges.difference(true_edges))
+            n_missing_edges = len(true_edges.difference(set_list_edges))

-        n_missing_edges = len(true_edges.difference(set_list_edges))
+            n_true_positive = len(true_edges) - n_missing_edges

-        n_true_positive = len(true_edges) - n_missing_edges
+            precision = n_true_positive / (n_true_positive + n_added_fake_edges)

-        precision = n_true_positive / (n_true_positive + n_added_fake_edges)
+            recall = n_true_positive / (n_true_positive + n_missing_edges)

-        recall = n_true_positive / (n_true_positive + n_missing_edges)

-
-        # for estimate_edge in list_edges:
-        #     if not estimate_edge in true_edges:
-        #         n_added_fake_edges += 1
-        
-        # for true_edge in true_edges:
-        #     if not true_edge in list_edges:
-        #         n_missing_edges += 1
-        #         print(true_edge)
-
-
-        # print(f"n archi reali non trovati: {n_missing_edges}")
-        # print(f"n archi non reali aggiunti: {n_added_fake_edges}")
-        print(true_edges)
-        print(set_list_edges)
-        print(f"precision: {precision} ")
-        print(f"recall: {recall} ")
+            # print(f"n archi reali non trovati: {n_missing_edges}")
+            # print(f"n archi non reali aggiunti: {n_added_fake_edges}")
+            print(true_edges)
+            print(set_list_edges)
+            print(f"precision: {precision} ")
+            print(f"recall: {recall} ")
+        except Exception as e: 
+            print(f"errore: {e}")

        return set_list_edges
    

-    def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, patience:int = 10 ):
+    def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40,
+                            patience:int = 10, tabu_length:int = None, tabu_rules_duration:int=5, 
+                            optimizer:str = 'hill' ):
        """
        Use the FamScore of a node in order to find the best parent nodes
        Parameters:
@ -138,61 +146,33 @@ class StructureScoreBasedEstimator(se.StructureEstimator):
            max_parents: maximum number of parents for each variable. If None, disabled
            iterations_number: maximum number of optimization algorithm's iteration
            patience: number of iteration without any improvement before to stop the search.If None, disabled
+            tabu_length: maximum lenght of the data structures used in the optimization process
+            tabu_rules_duration: number of iterations in which each rule keeps its value 
+            optimzer: name of the optimizer algorithm. Possible values: 'hill' (Hill climbing),'tabu' (tabu search)
        Returns:
            A list of the best edges for the currente node
        """
-        
-        'Create the graph for the single node'
-        graph = ng.NetworkGraph(self.sample_path.structure)
-
-        other_nodes =  [node for node in self.sample_path.structure.nodes_labels if node != node_id]
-        actual_best_score = self.get_score_from_graph(graph,node_id)
-
-        patince_count = 0
-        for i in range(iterations_number):
-            'choose a new random edge'
-            current_new_parent = choice(other_nodes)
-            current_edge =  (current_new_parent,node_id)
-            added = False
-            parent_removed = None 
-            
-
-            if graph.has_edge(current_edge):
-                graph.remove_edges([current_edge])
-            else:
-                'check the max_parents constraint'
-                if max_parents is not None:
-                    parents_list = graph.get_parents_by_id(node_id)
-                    if len(parents_list) >= max_parents :
-                        parent_removed = (choice(parents_list), node_id)
-                        graph.remove_edges([parent_removed])
-                graph.add_edges([current_edge])
-                added = True
-            #print('**************************')
-            current_score =  self.get_score_from_graph(graph,node_id)
-
-
-            if current_score > actual_best_score:
-                'update current best score' 
-                actual_best_score = current_score
-                patince_count = 0
-            else:
-                'undo the last update'
-                if added:
-                    graph.remove_edges([current_edge])
-                    'If a parent was removed, add it again to the graph'
-                    if parent_removed is not None:
-                        graph.add_edges([parent_removed])
-                else:
-                    graph.add_edges([current_edge])
-                'update patience count'
-                patince_count += 1
-
-            if patience is not None and patince_count > patience:
-                break
-
-        print(f"finito variabile: {node_id}")
-        return graph.edges
+
+        "choose the optimizer algotithm"
+        if optimizer == 'tabu':
+            optimizer = tabu.TabuSearch(
+                        node_id = node_id,
+                        structure_estimator = self,
+                        max_parents = max_parents,
+                        iterations_number = iterations_number,
+                        patience = patience,
+                        tabu_length = tabu_length,
+                        tabu_rules_duration = tabu_rules_duration)
+        else: #if optimizer == 'hill':
+            optimizer = hill.HillClimbing(
+                                    node_id = node_id,
+                                    structure_estimator = self,
+                                    max_parents = max_parents,
+                                    iterations_number = iterations_number,
+                                    patience = patience)
+
+        "call the optmizer's function that calculates the current node's parents"
+        return optimizer.optimize_structure()

    
    def get_score_from_graph(self,graph: ng.NetworkGraph,node_id:str):
--- a/main_package/classes/optimizers/hill_climbing_search.py
+++ b/main_package/classes/optimizers/hill_climbing_search.py
@ -0,0 +1,112 @@
+import sys
+sys.path.append('../')
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from random import choice
+
+from abc import ABC
+
+
+from optimizers.optimizer import Optimizer
+from estimators import structure_estimator as se
+import structure_graph.network_graph as ng
+
+
+class HillClimbing(Optimizer):
+    """
+    Optimizer class that implement Hill Climbing Search
+    
+    """
+    def __init__(self,
+                node_id:str,
+                structure_estimator: se.StructureEstimator,
+                max_parents:int = None,
+                iterations_number:int= 40,
+                patience:int = None
+                ):
+        """
+        Compute Optimization process for a structure_estimator
+
+        Parameters:
+            max_parents: maximum number of parents for each variable. If None, disabled
+            iterations_number: maximum number of optimization algorithm's iteration
+            patience: number of iteration without any improvement before to stop the search.If None, disabled
+
+
+        """
+        super().__init__(node_id, structure_estimator)
+        self.max_parents = max_parents
+        self.iterations_number = iterations_number
+        self.patience = patience
+        
+
+
+    def optimize_structure(self) -> typing.List:
+        """
+        Compute Optimization process for a structure_estimator
+
+        Parameters:
+
+        Returns:
+            the estimated structure for the node
+
+        """
+
+        #'Create the graph for the single node'
+        graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure)
+
+
+        other_nodes =  [node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id]
+        actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+        patince_count = 0
+        for i in range(self.iterations_number):
+            'choose a new random edge'
+            current_new_parent = choice(other_nodes)
+            current_edge =  (current_new_parent,self.node_id)
+            added = False
+            parent_removed = None 
+            
+
+            if graph.has_edge(current_edge):
+                graph.remove_edges([current_edge])
+            else:
+                'check the max_parents constraint'
+                if self.max_parents is not None:
+                    parents_list = graph.get_parents_by_id(self.node_id)
+                    if len(parents_list) >= self.max_parents :
+                        parent_removed = (choice(parents_list), self.node_id)
+                        graph.remove_edges([parent_removed])
+                graph.add_edges([current_edge])
+                added = True
+            #print('**************************')
+            current_score =  self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+            if current_score > actual_best_score:
+                'update current best score' 
+                actual_best_score = current_score
+                patince_count = 0
+            else:
+                'undo the last update'
+                if added:
+                    graph.remove_edges([current_edge])
+                    'If a parent was removed, add it again to the graph'
+                    if parent_removed is not None:
+                        graph.add_edges([parent_removed])
+                else:
+                    graph.add_edges([current_edge])
+                'update patience count'
+                patince_count += 1
+
+            if self.patience is not None and patince_count > self.patience:
+                break
+
+        print(f"finito variabile: {self.node_id}")
+        return graph.edges
--- a/main_package/classes/optimizers/optimizer.py
+++ b/main_package/classes/optimizers/optimizer.py
@ -0,0 +1,39 @@
+import sys
+sys.path.append('../')
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+import abc 
+
+from estimators import structure_estimator as se
+
+
+
+class Optimizer(abc.ABC):
+    """
+    Interface class for all the optimizer's child classes
+    
+    """
+
+    def __init__(self, node_id:str, structure_estimator: se.StructureEstimator):
+        self.node_id = node_id
+        self.structure_estimator = structure_estimator
+        
+
+    @abc.abstractmethod
+    def optimize_structure(self) -> typing.List:
+        """
+        Compute Optimization process for a structure_estimator
+
+        Parameters:
+
+        Returns:
+            the estimated structure for the node
+
+        """
+    pass
--- a/main_package/classes/optimizers/tabu_search.py
+++ b/main_package/classes/optimizers/tabu_search.py
@ -0,0 +1,161 @@
+import sys
+sys.path.append('../')
+import itertools
+import json
+import typing
+
+import networkx as nx
+import numpy as np
+from networkx.readwrite import json_graph
+
+from random import choice,sample
+
+from abc import ABC
+
+
+from optimizers.optimizer import Optimizer
+from estimators import structure_estimator as se
+import structure_graph.network_graph as ng
+
+import queue
+
+
+class TabuSearch(Optimizer):
+    """
+    Optimizer class that implement Hill Climbing Search
+    
+    """
+    def __init__(self,
+                node_id:str,
+                structure_estimator: se.StructureEstimator,
+                max_parents:int = None,
+                iterations_number:int= 40,
+                patience:int = None,
+                tabu_length:int = 0,
+                tabu_rules_duration = 5
+                ):
+        """
+        Compute Optimization process for a structure_estimator
+
+        Parameters:
+            max_parents: maximum number of parents for each variable. If None, disabled
+            iterations_number: maximum number of optimization algorithm's iteration
+            patience: number of iteration without any improvement before to stop the search.If None, disabled
+            tabu_length: maximum lenght of the data structures used in the optimization process
+            tabu_rules_duration: number of iterations in which each rule keeps its value 
+
+        """
+        super().__init__(node_id, structure_estimator)
+        self.max_parents = max_parents
+        self.iterations_number = iterations_number
+        self.patience = patience
+        self.tabu_length = tabu_length
+        self.tabu_rules_duration = tabu_rules_duration
+
+
+    def optimize_structure(self) -> typing.List:
+        """
+        Compute Optimization process for a structure_estimator
+
+        Parameters:
+
+        Returns:
+            the estimated structure for the node
+
+        """
+        print(f"tabu search is processing the structure of {self.node_id}")
+        #'Create the graph for the single node'
+        graph = ng.NetworkGraph(self.structure_estimator.sample_path.structure)
+
+
+        other_nodes =  set([node for node in self.structure_estimator.sample_path.structure.nodes_labels if node != self.node_id])
+        actual_best_score = self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+        tabu_set = set()
+        tabu_queue = queue.Queue()
+
+        patince_count = 0
+        tabu_count = 0 
+        for i in range(self.iterations_number):
+            
+            current_possible_nodes = other_nodes.difference(tabu_set)
+
+            'choose a new random edge according to tabu restiction'
+            if(len(current_possible_nodes) > 0):
+                current_new_parent = sample(current_possible_nodes,k=1)[0]
+            else:
+                current_new_parent = tabu_queue.get()
+                tabu_set.remove(current_new_parent)
+
+
+            
+            current_edge =  (current_new_parent,self.node_id)
+            added = False
+            parent_removed = None 
+
+            if graph.has_edge(current_edge):
+                graph.remove_edges([current_edge])
+            else:
+                'check the max_parents constraint'
+                if self.max_parents is not None:
+                    parents_list = graph.get_parents_by_id(self.node_id)
+                    if len(parents_list) >= self.max_parents :
+                        parent_removed = (choice(parents_list), self.node_id)
+                        graph.remove_edges([parent_removed])
+                graph.add_edges([current_edge])
+                added = True
+            #print('**************************')
+            current_score =  self.structure_estimator.get_score_from_graph(graph,self.node_id)
+
+
+            # print("-------------------------------------------")
+            # print(f"Current new parent: {current_new_parent}")
+            # print(f"Current score: {current_score}")
+            # print(f"Current best score: {actual_best_score}")
+            # print(f"tabu list : {str(tabu_set)} length: {len(tabu_set)}")
+            # print(f"tabu queue : {str(tabu_queue)} length: {tabu_queue.qsize()}")
+            # print(f"graph edges: {graph.edges}")
+
+            # print("-------------------------------------------")
+            # input()
+            if current_score > actual_best_score:
+                'update current best score' 
+                actual_best_score = current_score
+                patince_count = 0
+                'update tabu list'
+
+
+            else:
+                'undo the last update'
+                if added:
+                    graph.remove_edges([current_edge])
+                    'If a parent was removed, add it again to the graph'
+                    if parent_removed is not None:
+                        graph.add_edges([parent_removed])
+                else:
+                    graph.add_edges([current_edge])
+                'update patience count'
+                patince_count += 1
+                tabu_count += 1
+            
+            if tabu_queue.qsize() >= self.tabu_length:
+                    current_removed = tabu_queue.get()
+                    tabu_set.remove(current_removed)
+            'Add the node on the tabu list'
+            tabu_queue.put(current_new_parent)
+            tabu_set.add(current_new_parent)
+
+
+            if tabu_count % self.tabu_rules_duration == 0:
+                if tabu_queue.qsize() > 0:
+                    current_removed = tabu_queue.get()
+                    tabu_set.remove(current_removed)
+                    tabu_count = 0
+                else:
+                    tabu_count = 0
+
+            if self.patience is not None and patince_count > self.patience:
+                break
+
+        print(f"finito variabile: {self.node_id}")
+        return graph.edges
--- a/main_package/tests/optimizers/test_hill_climbing_search.py
+++ b/main_package/tests/optimizers/test_hill_climbing_search.py
@ -0,0 +1,52 @@
+import sys
+sys.path.append("../../classes/")
+import glob
+import math
+import os
+import unittest
+
+import networkx as nx
+import numpy as np
+import psutil
+from line_profiler import LineProfiler
+import copy
+
+import utility.cache as ch
+import structure_graph.sample_path as sp
+import estimators.structure_score_based_estimator as se
+import utility.json_importer as ji
+
+
+
+class TestHillClimbingSearch(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        #cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
+        cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_binary_data_01_3.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        cls.s1 = sp.SamplePath(cls.importer)
+        cls.s1.build_trajectories()
+        cls.s1.build_structure()
+
+
+
+    def test_structure(self):
+        true_edges = copy.deepcopy(self.s1.structure.edges)
+        true_edges = set(map(tuple, true_edges))
+
+        se1 = se.StructureScoreBasedEstimator(self.s1)
+        edges = se1.estimate_structure(
+                            max_parents = None,
+                            iterations_number = 40,
+                            patience = None,
+                            optimizer = 'hill'
+                            )
+        
+
+        self.assertEqual(edges, true_edges)
+        
+
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/main_package/tests/optimizers/test_tabu_search.py
+++ b/main_package/tests/optimizers/test_tabu_search.py
@ -0,0 +1,54 @@
+import sys
+sys.path.append("../../classes/")
+import glob
+import math
+import os
+import unittest
+
+import networkx as nx
+import numpy as np
+import psutil
+from line_profiler import LineProfiler
+import copy
+
+import utility.cache as ch
+import structure_graph.sample_path as sp
+import estimators.structure_score_based_estimator as se
+import utility.json_importer as ji
+
+
+
+class TestTabuSearch(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        #cls.read_files = glob.glob(os.path.join('../../data', "*.json"))
+        cls.importer = ji.JsonImporter("../../data/networks_and_trajectories_ternary_data_04_10.json", 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        cls.s1 = sp.SamplePath(cls.importer)
+        cls.s1.build_trajectories()
+        cls.s1.build_structure()
+
+
+
+    def test_structure(self):
+        true_edges = copy.deepcopy(self.s1.structure.edges)
+        true_edges = set(map(tuple, true_edges))
+
+        se1 = se.StructureScoreBasedEstimator(self.s1)
+        edges = se1.estimate_structure(
+                            max_parents = None,
+                            iterations_number = 100,
+                            patience = None,
+                            tabu_length = 15,
+                            tabu_rules_duration = 15,
+                            optimizer = 'tabu'
+                            )
+        
+
+        self.assertEqual(edges, true_edges)
+        
+
+
+if __name__ == '__main__':
+    unittest.main()
+