From 46ab383b07efff06a20ed0ec568411a1c2b88c14 Mon Sep 17 00:00:00 2001 From: Luca Moretti Date: Fri, 9 Oct 2020 16:58:40 +0200 Subject: [PATCH] Added max_parets and patience costraints --- .../structure_score_based_estimator.py | 76 ++++++++++++++++--- .../test_structure_score_based_estimator.py | 6 +- 2 files changed, 72 insertions(+), 10 deletions(-) diff --git a/main_package/classes/structure_score_based_estimator.py b/main_package/classes/structure_score_based_estimator.py index 4a29ebe..008a30c 100644 --- a/main_package/classes/structure_score_based_estimator.py +++ b/main_package/classes/structure_score_based_estimator.py @@ -19,6 +19,11 @@ import structure as st import fam_score_calculator as fam_score +import multiprocessing +from multiprocessing import Pool + + + ''' #TODO: Insert maximum number of parents #TODO: Insert maximum number of iteration or other exit criterions @@ -64,37 +69,71 @@ class StructureScoreBasedEstimator: - def estimate_structure(self): + def estimate_structure(self, max_parents:int = None, iterations_number:int= 40, patience:int = None ): """ Compute the score-based algorithm to find the optimal structure Parameters: - + max_parents: maximum number of parents for each variable. If None, disabled + iterations_number: maximum number of optimization algorithm's iteration + patience: number of iteration without any improvement before to stop the search.If None, disabled Returns: void """ + 'Save the true edges structure in tuples' + true_edges = copy.deepcopy(self.sample_path.structure.edges) + true_edges = list(map(tuple, true_edges)) + 'Remove all the edges from the structure' - print( self.sample_path.structure.edges) - print( type(self.sample_path.structure.edges)) - print( type(self.sample_path.structure.edges[0])) self.sample_path.structure.clean_structure_edges() estimate_parents = self.estimate_parents - 'Estimate the best parents for each node' - list_edges_partial = [estimate_parents(n) for n in self.nodes] + + n_nodes= len(self.nodes) + l_max_parents= [max_parents] * n_nodes + l_iterations_number = [iterations_number] * n_nodes + l_patience = [patience] * n_nodes + + + 'Estimate the best parents for each node' + with multiprocessing.Pool(processes=4) as pool: + list_edges_partial = pool.starmap(estimate_parents, zip(self.nodes,l_max_parents,l_iterations_number,l_patience)) + #list_edges_partial = [estimate_parents(n) for n in self.nodes] + #list_edges_partial = p.map(estimate_parents, self.nodes) + 'Concatenate all the edges list' list_edges = list(itertools.chain.from_iterable(list_edges_partial)) print('-------------------------') + + 'TODO: Pensare a un modo migliore -- set difference sembra non funzionare ' + n_missing_edges = 0 + n_added_fake_edges = 0 + + for estimate_edge in list_edges: + if not estimate_edge in true_edges: + n_added_fake_edges += 1 + + for true_edge in true_edges: + if not true_edge in list_edges: + n_missing_edges += 1 + + + print(f"n archi reali non trovati: {n_missing_edges}") + print(f"n archi non reali aggiunti: {n_added_fake_edges}") + print(true_edges) print(list_edges) - def estimate_parents(self,node_id:str): + def estimate_parents(self,node_id:str, max_parents:int = None, iterations_number:int= 40, patience:int = 10 ): """ Use the FamScore of a node in order to find the best parent nodes Parameters: node_id: current node's id + max_parents: maximum number of parents for each variable. If None, disabled + iterations_number: maximum number of optimization algorithm's iteration + patience: number of iteration without any improvement before to stop the search.If None, disabled Returns: A list of the best edges for the currente node """ @@ -105,15 +144,24 @@ class StructureScoreBasedEstimator: other_nodes = [node for node in self.sample_path.structure.nodes_labels if node != node_id] actual_best_score = self.get_score_from_structure(graph,node_id) - for i in range(40): + patince_count = 0 + for i in range(iterations_number): 'choose a new random edge' current_new_parent = choice(other_nodes) current_edge = (current_new_parent,node_id) added = False + parent_removed = None + if graph.has_edge(current_edge): graph.remove_edges([current_edge]) else: + 'check the max_parents constraint' + if max_parents is not None: + parents_list = graph.get_parents_by_id(node_id) + if len(parents_list) >= max_parents : + parent_removed = (choice(parents_list), node_id) + graph.remove_edges([parent_removed]) graph.add_edges([current_edge]) added = True @@ -122,13 +170,23 @@ class StructureScoreBasedEstimator: if current_score > actual_best_score: 'update current best score' actual_best_score = current_score + patince_count = 0 else: 'undo the last update' if added: graph.remove_edges([current_edge]) + 'If a parent was removed, add it again to the graph' + if parent_removed is not None: + graph.add_edges([parent_removed]) else: graph.add_edges([current_edge]) + 'update patience count' + patince_count += 1 + + if patience is not None and patince_count > patience: + break + print(f"finito variabile: {node_id}") return graph.edges diff --git a/main_package/tests/test_structure_score_based_estimator.py b/main_package/tests/test_structure_score_based_estimator.py index 1081348..a4b602e 100644 --- a/main_package/tests/test_structure_score_based_estimator.py +++ b/main_package/tests/test_structure_score_based_estimator.py @@ -31,7 +31,11 @@ class TestStructureScoreBasedEstimator(unittest.TestCase): def test_esecuzione(self): se1 = se.StructureScoreBasedEstimator(self.s1) - se1.estimate_structure() + se1.estimate_structure( + max_parents = 5, + iterations_number = 80, + patience = None + ) if __name__ == '__main__': unittest.main()