Source code for purestochastic.common.losses

import tensorflow as tf
import numpy as np

[docs]@tf.function
def gaussian_negative_log_likelihood(y, prediction):
    r"""Gaussian negative log likelihood loss function.

    The loss function computes the gaussian negative log likelihood between the ground truth values 
    :math:`y=(y_1, \ldots, y_n)` and the predictions which are the mean and the variance of a gaussian
    distribution : 
    
        * :math:`\hat{\mu} = \text{prediction}[ : , \cdots , :  , 0]`

        * :math:`\hat{\sigma}^2 = \text{prediction}[ : , \cdots , : , 1]`
    
    Mathematically, the loss function is defined as : 

    .. math::
        \mathcal{L}(y, \hat{\mu}, \hat{\sigma}^2) = \frac{1}{2n} \displaystyle\sum_{i=1}^{n} \Bigg[\ln(\hat{\sigma}^2(x_i,\theta))+\frac{(y_i-\hat{\mu}(x_i,\theta))^2}{\hat{\sigma}^2(x_i,\theta)} \Bigg] + \frac{1}{2}\log(2 \pi)
    
    If ``y`` is not a 1D array, the batch dimension needs to be the first and the output value is
    the mean over all other dimensions.


    Parameters
    ----------
    y : tf.Tensor or np.ndarray
        Ground truth values.
    prediction : tf.Tensor or np.ndarray
        The predicted values.

    Returns
    -------
    tf.float32 
        Value of the Gaussian negative log likelihood.

    Input shape
    -----------
        (N+2)-D tensor with shape: ``[batch_size, d_0, ...,  d_N]``.

    Output shape
    ------------
        (N+3)-D tensor with shape: ``[batch_size, d_0, ..., d_N, 2]``.

    Note
    ----
    The likelihood is a product of density functions and so the values can be between :math:`[0, \infty]`.
    The negative log likelihood can thus be negative if the likelihood is greater than 1. Therefore, don't
    worry if your loss function is negative, it's often the case.
    
    """

    # Separate the mean and the variance
    mean, var = tf.unstack(prediction, axis=-1)

    # Compute the Gaussian Negative Log Likelihood
    log_var = tf.math.log(var)
    squared_error = tf.math.pow(tf.math.subtract(mean,y),2)
    return tf.math.reduce_mean(log_var + tf.math.divide(squared_error,var))/2 + (1/2)*tf.math.log(2*np.pi)