From b0f9dd4bc93ed96a8e4f5a968c159e731e37d1fe Mon Sep 17 00:00:00 2001 From: Luca Moretti Date: Thu, 8 Oct 2020 18:13:49 +0200 Subject: [PATCH] Added a naive hill climbing --- main_package/classes/fam_score_calculator.py | 5 +- main_package/classes/network_graph.py | 11 +++ main_package/classes/structure.py | 13 +++ .../structure_score_based_estimator.py | 92 ++++++++++++------- 4 files changed, 85 insertions(+), 36 deletions(-) diff --git a/main_package/classes/fam_score_calculator.py b/main_package/classes/fam_score_calculator.py index 9df8f31..79638f2 100644 --- a/main_package/classes/fam_score_calculator.py +++ b/main_package/classes/fam_score_calculator.py @@ -71,7 +71,6 @@ class FamScoreCalculator: 'get cim length' values = len(cim.state_residence_times) - print(f"transition time: {cim.state_transition_matrix}") 'compute the marginal likelihood for the current cim' return np.sum([ self.single_cim_xu_marginal_likelihood_theta( @@ -100,10 +99,9 @@ class FamScoreCalculator: values = list(range(len(cim.state_residence_times))) + 'remove the index because of the x != x^ condition in the summation ' values.remove(index) - print(values) - return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \ + \ np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta( @@ -187,7 +185,6 @@ class FamScoreCalculator: Returns: the marginal likelihood of the node when assumes a specif value """ - print(f"M[x|u]: {M_xu_suff_stats} T[x|u]: {T_xu_suff_stats}") return ( loggamma(alpha_xu + M_xu_suff_stats + 1) + (log(tau_xu) diff --git a/main_package/classes/network_graph.py b/main_package/classes/network_graph.py index 0c9f05f..7ed2063 100644 --- a/main_package/classes/network_graph.py +++ b/main_package/classes/network_graph.py @@ -89,6 +89,17 @@ class NetworkGraph: self.graph.add_node(id, indx=node_indx, val=node_val, pos_indx=pos) pos += 1 + def has_edge(self,edge:tuple)-> bool: + """ + Check if the graph contains a specific edge + + Parameters: + edge: a tuple that rappresents the edge + Returns: + bool + """ + return self.graph.has_edge(edge[0],edge[1]) + def add_edges(self, list_of_edges: typing.List): """ Add the edges to the graph contained in the list list_of_edges. diff --git a/main_package/classes/structure.py b/main_package/classes/structure.py index 3dee1c9..010c639 100644 --- a/main_package/classes/structure.py +++ b/main_package/classes/structure.py @@ -47,6 +47,19 @@ class Structure: def get_node_id(self, node_indx: int) -> str: return self._nodes_labels_list[node_indx] + def clean_structure_edges(self): + self._edges_list = list() + + def add_edge(self,edge: tuple): + self._edges_list.append(tuple) + print(self._edges_list) + + def remove_edge(self,edge: tuple): + self._edges_list.remove(tuple) + + def contains_edge(self,edge:tuple) -> bool: + return edge in self._edges_list + def get_node_indx(self, node_id: str) -> int: pos_indx = self._nodes_labels_list.index(node_id) return self._nodes_indexes_arr[pos_indx] diff --git a/main_package/classes/structure_score_based_estimator.py b/main_package/classes/structure_score_based_estimator.py index a149f36..4a29ebe 100644 --- a/main_package/classes/structure_score_based_estimator.py +++ b/main_package/classes/structure_score_based_estimator.py @@ -9,6 +9,7 @@ from networkx.readwrite import json_graph from random import choice +import copy import cache as ch import conditional_intensity_matrix as condim import network_graph as ng @@ -20,8 +21,9 @@ import fam_score_calculator as fam_score ''' #TODO: Insert maximum number of parents -#TODO: Evaluate if it's better to start from a complete or an empty graph +#TODO: Insert maximum number of iteration or other exit criterions #TODO: Create a parent class StructureEstimator and Two Subclasses (Score-Based and Constraint-Based) +#TODO: Evaluate if it could be better to change list_edges to set for improve the performance ''' class StructureScoreBasedEstimator: @@ -67,70 +69,96 @@ class StructureScoreBasedEstimator: Compute the score-based algorithm to find the optimal structure Parameters: - node_id: the label of the node + Returns: void """ + 'Remove all the edges from the structure' + print( self.sample_path.structure.edges) + print( type(self.sample_path.structure.edges)) + print( type(self.sample_path.structure.edges[0])) + self.sample_path.structure.clean_structure_edges() + estimate_parents = self.estimate_parents 'Estimate the best parents for each node' - #[estimate_parents(n) for n in self.nodes] - estimate_parents('X') + list_edges_partial = [estimate_parents(n) for n in self.nodes] + + 'Concatenate all the edges list' + list_edges = list(itertools.chain.from_iterable(list_edges_partial)) + + print('-------------------------') + print(list_edges) def estimate_parents(self,node_id:str): """ Use the FamScore of a node in order to find the best parent nodes Parameters: - void + node_id: current node's id Returns: - void + A list of the best edges for the currente node """ + 'Create the graph for the single node' graph = ng.NetworkGraph(self.sample_path.structure) - 'inizialize the graph for a single node' - graph.graph_struct._edges_list=[] + other_nodes = [node for node in self.sample_path.structure.nodes_labels if node != node_id] + actual_best_score = self.get_score_from_structure(graph,node_id) - graph.fast_init(node_id) + for i in range(40): + 'choose a new random edge' + current_new_parent = choice(other_nodes) + current_edge = (current_new_parent,node_id) + added = False + + if graph.has_edge(current_edge): + graph.remove_edges([current_edge]) + else: + graph.add_edges([current_edge]) + added = True + + current_score = self.get_score_from_structure(graph,node_id) + + if current_score > actual_best_score: + 'update current best score' + actual_best_score = current_score + else: + 'undo the last update' + if added: + graph.remove_edges([current_edge]) + else: + graph.add_edges([current_edge]) + + return graph.edges - #graph.graph.remove_edge('Z','X') - graph.add_edges([['Z','X']]) - #graph.add_edges([['X','Z']]) + + def get_score_from_structure(self,graph: ng.NetworkGraph,node_id:str): + """ + Use the FamScore of a node in order to find the best parent nodes + Parameters: + node_id: current node's id + graph: current graph to be computed + Returns: + The FamSCore for this structure + """ + 'inizialize the graph for a single node' graph.fast_init(node_id) - + params_estimation = pe.ParametersEstimator(self.sample_path, graph) 'Inizialize and compute parameters for node' params_estimation.fast_init(node_id) SoCims = params_estimation.compute_parameters_for_node(node_id) - print(f"il numero di cims รจ : {len(SoCims.actual_cims)}") - 'calculate the FamScore for the node' fam_score_obj = fam_score.FamScoreCalculator() score = fam_score_obj.get_fam_score(SoCims.actual_cims) - print(f" lo score per {node_id} risulta: {score} ") + #print(f" lo score per {node_id} risulta: {score} ") return score - - '''mask = np.array([True,True]) - - cims = SoCims.filter_cims_with_mask(mask,[1,1]) - - # print(f"-----{len(SoCims.transition_matrices)}-------") - print(f"{cims[0].state_transition_matrix}") - - cims = SoCims.filter_cims_with_mask(mask,[0,0]) - - print(f"---parents {len(parents)}---------") - print(f"{cims[0].state_transition_matrix}") - ''' - - - def generate_possible_sub_sets_of_size(self, u: typing.List, size: int, parent_label: str):