Added FamScore function, added remove_edges function in network graph

4 years ago · 75c689ff42
parent 0112859665
commit 75c689ff42
4 changed files with 177 additions and 54 deletions
--- a/main_package/classes/fam_score_calculator.py
+++ b/main_package/classes/fam_score_calculator.py
@ -9,45 +9,130 @@ from networkx.readwrite import json_graph
 from math import log
-from scipy.special import gamma
+from scipy.special import loggamma
 from random import choice
-import set_of_cims as soCims 
+import set_of_cims as soCims
 import network_graph as net_graph
 import conditional_intensity_matrix as cim_class
 '''
 TODO: Parlare dell'idea di ciclare sulle cim senza filtrare
-TODO: Parlare di gamma in scipy e math(overflow) 
+TODO: Parlare del problema con gamma in scipy e math(overflow)
-TODO: Problema warning overflow
+TODO: Problema warning overflow durante l'esecuzione
 '''
 class FamScoreCalculator:
    """
-    Has the task of calculate the FamScore of a node
+    Has the task of calculating the FamScore of a node
    """
    def __init__(self):
        np.seterr('raise')
        pass
-    def marginal_likelihood_theta(self, 
+    # region theta
-                            node_id: str, 
+
-                            set_of_cims: soCims.SetOfCims,
+    def marginal_likelihood_theta(self,
-                            graph:net_graph.NetworkGraph):
+                        cims: cim_class.ConditionalIntensityMatrix,
                        alpha_xu: float = 1,
                        alpha_xxu: float = 1):
        """
-       calculate the value of the marginal likelihood over theta of the node identified by the label node_id
+        calculate the FamScore value of the node identified by the label node_id
        Parameters:
-            node_id: the label of the node
+            cims: np.array with all the node's cims,
            alpha_xu: hyperparameter over the CTBN’s q parameters
            alpha_xxu: hyperparameter over the CTBN’s theta parameters
        Returns:
            the value of the marginal likelihood over theta
        """
        return np.sum(
                        [self.variable_cim_xu_marginal_likelihood_theta(cim,
                                                                    alpha_xu,
                                                                    alpha_xxu)
                        for cim in cims])
    def variable_cim_xu_marginal_likelihood_theta(self,
                        cim: cim_class.ConditionalIntensityMatrix,
                        alpha_xu: float = 1,
                        alpha_xxu: float = 1):
        """
        calculate the value of the marginal likelihood over theta given a cim
        Parameters:
            cim: A conditional_intensity_matrix object with the sufficient statistics,
            alpha_xu: hyperparameter over the CTBN’s q parameters
            alpha_xxu: hyperparameter over the CTBN’s theta parameters
        Returns:
            the value of the marginal likelihood over theta
        """
-        return 2
+
        'get cim length'
        values = len(cim.state_residence_times)
        print(f"transition time: {cim.state_transition_matrix}")
        'compute the marginal likelihood for the current cim'
        return np.sum([
                    self.single_cim_xu_marginal_likelihood_theta(
                                                index,
                                                cim,
                                                alpha_xu,
                                                alpha_xxu)
                    for index in range(values)])
    def single_cim_xu_marginal_likelihood_theta(self,
                    index: int,
                    cim: cim_class.ConditionalIntensityMatrix,
                    alpha_xu: float = 1,
                    alpha_xxu: float = 1):
        """
        calculate the marginal likelihood on q of the node when assumes a specif value
        and a specif parents's assignment
        Parameters:
            index: current x instance's index
            cim: A conditional_intensity_matrix object with the sufficient statistics,
            alpha_xu: hyperparameter over the CTBN’s q parameters
            alpha_xxu: hyperparameter over the CTBN’s theta parameters
        Returns:
            the marginal likelihood of the node when assumes a specif value
        """
        values = list(range(len(cim.state_residence_times)))
        values.remove(index)
        print(values)
        return (loggamma(alpha_xu) - loggamma(alpha_xu + cim.state_transition_matrix[index, index])) \
                + \
                np.sum([self.single_internal_cim_xxu_marginal_likelihood_theta(
                                                                        cim.state_transition_matrix[index,index_x_first],
                                                                        alpha_xxu)
                for index_x_first in values])
    def single_internal_cim_xxu_marginal_likelihood_theta(self,
                                                M_xxu_suff_stats: float,
                                                alpha_xxu: float=1):
        """
        calculate the second part of the marginal likelihood over theta formula
        Parameters:
            M_xxu_suff_stats: value of the suffucient statistic M[xx'|u]
            alpha_xxu: hyperparameter over the CTBN’s theta parameters
        Returns:
            the marginal likelihood of the node when assumes a specif value
        """
        return loggamma(alpha_xxu+M_xxu_suff_stats) - loggamma(alpha_xxu)
    # endregion
    # region q
    def marginal_likelihood_q(self,
                        cims: np.array,
-                        tau_xu:float = 1,
+                        tau_xu: float=1,
-                        alpha_xu:float = 1):
+                        alpha_xu: float=1):
        """
        calculate the value of the marginal likelihood over q of the node identified by the label node_id
        Parameters:
@ -57,51 +142,72 @@ class FamScoreCalculator:
        Returns:
            the value of the marginal likelihood over q
        """
-        return np.prod([self.variable_cim_xu_marginal_likelihood_q(cim,tau_xu,alpha_xu) for cim in cims])
+        return np.sum([self.variable_cim_xu_marginal_likelihood_q(cim, tau_xu, alpha_xu) for cim in cims])
-    
+
    def variable_cim_xu_marginal_likelihood_q(self,
-                        cim:cim_class.ConditionalIntensityMatrix,
+                        cim: cim_class.ConditionalIntensityMatrix,
-                        tau_xu:float = 1,
+                        tau_xu: float=1,
-                        alpha_xu:float = 1):
+                        alpha_xu: float=1):
        """
        calculate the value of the marginal likelihood over q given a cim
        Parameters:
            cim: A conditional_intensity_matrix object with the sufficient statistics,
            tau_xu: hyperparameter over the CTBN’s q parameters
            alpha_xu: hyperparameter over the CTBN’s q parameters
        Returns:
            the value of the marginal likelihood over q
        """
        'get cim length'
-        values=len(cim.state_residence_times) 
+        values=len(cim.state_residence_times)
        'compute the marginal likelihood for the current cim'
-        return np.prod([
+        return np.sum([
                    self.single_cim_xu_marginal_likelihood_q(
-                                                cim.state_transition_matrix[index,index],
+                                                cim.state_transition_matrix[index, index],
                                                cim.state_residence_times[index],
                                                tau_xu,
                                                alpha_xu)
-                    for index in range(values)])    
+                    for index in range(values)])
    def single_cim_xu_marginal_likelihood_q(self,
-                        M_suff_stats:float,
+                        M_xu_suff_stats: float,
-                        T_suff_stats:float,
+                        T_xu_suff_stats: float,
-                        tau_xu:float = 1,
+                        tau_xu: float=1,
-                        alpha_xu:float = 1):
+                        alpha_xu: float=1):
        """
-        calculate the marginal likelihood of the node when assumes a specif value
+        calculate the marginal likelihood on q of the node when assumes a specif value
-        and a specif parents's assignment 
+        and a specif parents's assignment
        Parameters:
-            cims: np.array with all the node's cims,
+            M_xu_suff_stats: value of the suffucient statistic M[x|u]
            T_xu_suff_stats: value of the suffucient statistic T[x|u]
            tau_xu: hyperparameter over the CTBN’s q parameters
            alpha_xu: hyperparameter over the CTBN’s q parameters
        Returns:
            the marginal likelihood of the node when assumes a specif value
        """
-        print(M_suff_stats)
+        print(f"M[x|u]: {M_xu_suff_stats} T[x|u]: {T_xu_suff_stats}")
-        return  (gamma(alpha_xu + M_suff_stats + 1)* (tau_xu**(alpha_xu+1))) \
+        return (
-                / \
+                loggamma(alpha_xu + M_xu_suff_stats + 1) + 
-                (gamma(alpha_xu + 1)*((tau_xu + T_suff_stats)**(alpha_xu + M_suff_stats + 1)))
+                                                        (log(tau_xu)
                                                        *
                                                        (alpha_xu+1))
                ) \
                - \
                (loggamma(alpha_xu + 1)+(
                                    log(tau_xu + T_xu_suff_stats) 
                                    *
                                    (alpha_xu + M_xu_suff_stats + 1))
                )
    # end region
    def get_fam_score(self,
                cims: np.array,
-                tau_xu:float = 1,
+                tau_xu: float=1,
-                alpha_xu:float = 1,
+                alpha_xu: float=1,
-                alpha_xxu:float = 1):
+                alpha_xxu: float=1):
        """
        calculate the FamScore value of the node identified by the label node_id
        Parameters:
@ -112,10 +218,10 @@ class FamScoreCalculator:
        Returns:
            the FamScore value of the node
        """
-        return log(
+        return self.marginal_likelihood_q(cims,
-                    self.marginal_likelihood_q(cims,tau_xu,alpha_xu)
+                                    tau_xu,
-                ) \
+                                    alpha_xu) \
-                + \
+               + \
-                log(
+               self.marginal_likelihood_theta(cims, 
-                    self.marginal_likelihood_theta(cims,tau_xu,alpha_xu,alpha_xxu)
+                                        alpha_xu,
-                    )
+                                        alpha_xxu)
--- a/main_package/classes/network_graph.py
+++ b/main_package/classes/network_graph.py
@ -100,6 +100,17 @@ class NetworkGraph:
        """
        self.graph.add_edges_from(list_of_edges)
    def remove_edges(self, list_of_edges: typing.List):
        """
        Remove the edges to the graph contained in the list list_of_edges.
        Parameters:
            list_of_edges
        Returns:
            void
        """
        self.graph.remove_edges_from(list_of_edges)
    def get_ordered_by_indx_set_of_parents(self, node: str):
        """
        Builds the aggregated structure that holds all the infos relative to the parent set of the node, namely
--- a/main_package/classes/structure_score_based_estimator.py
+++ b/main_package/classes/structure_score_based_estimator.py
@ -80,35 +80,42 @@ class StructureScoreBasedEstimator:
    def estimate_parents(self,node_id:str):
        """
        Use the FamScore of a node in order to find the best parent nodes
        Parameters:
            void
        Returns:
            void
        """
        'Create the graph for the single node'
        graph = ng.NetworkGraph(self.sample_path.structure)
        'inizialize the graph for a single node'
        graph.fast_init(node_id)
        graph.graph_struct._edges_list=[]
        graph.fast_init(node_id) 
        #graph.graph.remove_edge('Z','X')
        graph.add_edges([['Z','X']])
        #graph.add_edges([['X','Z']])
        graph.fast_init(node_id) 
        params_estimation = pe.ParametersEstimator(self.sample_path, graph)
        'Inizialize and compute parameters for node'
        params_estimation.fast_init(node_id)
        SoCims = params_estimation.compute_parameters_for_node(node_id)
-        'Get the node\'s parents list'
+        print(f"il numero di cims è : {len(SoCims.actual_cims)}")
        parents = graph.get_parents_by_id(node_id)
        values = graph.get_states_number(parents[0])
        print(f" actual_cims {len(SoCims.actual_cims)} padri {len(parents)} ")
        'calculate the FamScore for the node'
        fam_score_obj = fam_score.FamScoreCalculator()
        score = fam_score_obj.get_fam_score(SoCims.actual_cims)
        print(f" lo score per {node_id} risulta: {score} ")
        return score 
        '''mask = np.array([True,True])
        cims = SoCims.filter_cims_with_mask(mask,[1,1])
--- a/main_package/tests/test_structure_score_based_estimator.py
+++ b/main_package/tests/test_structure_score_based_estimator.py
@ -29,7 +29,6 @@ class TestStructureScoreBasedEstimator(unittest.TestCase):
    def test_esecuzione(self):
        se1 = se.StructureScoreBasedEstimator(self.s1)
        se1.estimate_structure()