Source code for pints.toy._high_dimensional_gaussian

#
# High-dimensional Gaussian log-pdf.
#
# This file is part of PINTS (https://github.com/pints-team/pints/) which is
# released under the BSD 3-clause license. See accompanying LICENSE.md for
# copyright notice and full license details.
#
import numpy as np
import scipy.stats

from . import ToyLogPDF


[docs]class HighDimensionalGaussianLogPDF(ToyLogPDF):
    """
    High-dimensional zero-mean multivariate Gaussian log pdf, with off-diagonal
    correlations.

    Specifically, the covariance matrix Sigma is constructed so that diagonal
    elements are integers: Sigma_i,i = i and off-diagonal elements are
    Sigma_i,j = rho * sqrt(i) * sqrt(j).

    Extends :class:`pints.toy.ToyLogPDF`.

    Parameters
    ----------
    dimension : int
        Dimensions of multivariate Gaussian distribution (which must exceed 1).
    rho : float
        The correlation between pairs of parameter dimensions. Note that this
        must be between ```-1 / (dimension - 1) and 1`` so that the
        covariance matrix is positive semi-definite.
    """
    def __init__(self, dimension=20, rho=0.5):
        self._n_parameters = int(dimension)
        if self._n_parameters <= 1:
            raise ValueError('Dimensions must exceed 1.')
        rho = float(rho)
        # bounds must satisfy:
        # https://stats.stackexchange.com/questions/72790/
        if rho > 1.0:
            raise ValueError('rho must be between -1 / (dims - 1) and 1.')
        if rho < -float(1.0 / (self._n_parameters - 1)):
            raise ValueError('rho must be between -1 / (dims - 1) and 1.')
        self._rho = rho

        # Construct mean array
        self._mean = np.zeros(self._n_parameters)

        # Construct covariance matrix
        cov = np.arange(1, 1 + self._n_parameters).reshape(
            (self._n_parameters, 1))
        cov = cov.repeat(self._n_parameters, axis=1)
        cov = np.sqrt(cov)
        cov = self._rho * cov * cov.T
        np.fill_diagonal(cov, 1 + np.arange(self._n_parameters))
        self._cov = cov
        self._cov_inv = np.linalg.inv(cov)

        # Construct scipy 'random variable'
        self._var = scipy.stats.multivariate_normal(self._mean, self._cov)

    def __call__(self, x):
        return self._var.logpdf(x)

[docs]    def distance(self, samples):
        """
        Returns approximate Kullback-Leibler divergence between samples
        and underlying distribution.

        See :meth:`pints.toy.ToyLogPDF.distance()`.
        """
        return self.kl_divergence(samples)

[docs]    def evaluateS1(self, x):
        """ See :meth:`pints.LogPDF.evaluateS1()`. """
        L = self.__call__(x)
        self._x_minus_mu = x - self._mean

        # derivative wrt x: see https://stats.stackexchange.com/questions/27436/how-to-take-derivative-of-multivariate-normal-density # noqa
        dL = -np.matmul(self._cov_inv, self._x_minus_mu)
        return L, dL

[docs]    def kl_divergence(self, samples):
        """
        Returns approximate Kullback-Leibler divergence between samples
        and underlying distribution.

        The returned value is (near) zero for perfect sampling, and then
        increases as the error gets larger.

        See: https://en.wikipedia.org/wiki/Kullback-Leibler_divergence
        """
        # Check size of input
        if not len(samples.shape) == 2:
            raise ValueError('Given samples list must be n x 2.')
        if samples.shape[1] != self._n_parameters:
            raise ValueError(
                'Given samples must have length ' + str(self._n_parameters))

        # Calculate the Kullback-Leibler divergence between the given samples
        # and the multivariate normal distribution underlying this banana.
        # From wikipedia:
        #
        # k = dimension of distribution
        # dkl = 0.5 * (
        #       trace(s1^-1 * s0)
        #       + (m1 - m0)T * s1^-1 * (m1 - m0)
        #       + log( det(s1) / det(s0) )
        #       - k
        #       )
        #
        # For this distribution, s1 is the identify matrix, and m1 is zero,
        # so it simplifies to
        #
        # dkl = 0.5 * (trace(s0) + m0.dot(m0) - log(det(s0)) - k))
        #
        y = np.array(samples, copy=True)
        m0 = np.mean(y, axis=0)
        s0 = np.cov(y.T)
        s1 = self._cov
        m1 = self._mean
        s1_inv = np.linalg.inv(s1)
        return 0.5 * (
            np.trace(np.matmul(s1_inv, s0)) +
            np.matmul(np.matmul(m1 - m0, s1_inv), m1 - m0) -
            np.log(np.linalg.det(s0)) +
            np.log(np.linalg.det(s1)) -
            self._n_parameters)

[docs]    def n_parameters(self):
        """ See :meth:`pints.LogPDF.n_parameters()`. """
        return self._n_parameters

[docs]    def rho(self):
        """ Returns rho (correlation between dimensions) """
        return self._rho

[docs]    def sample(self, n_samples):
        """ See :meth:`pints.toy.ToyLogPDF.sample()`. """
        n_samples = int(n_samples)
        if n_samples < 1:
            raise ValueError(
                'Number of samples must be greater than or equal to 1.')
        return self._var.rvs(n_samples)

[docs]    def suggested_bounds(self):
        """ See :meth:`pints.toy.ToyLogPDF.suggested_bounds()`. """
        # maximum variance in one dimension is n_parameters, so use
        # 3 times sqrt of this as prior bounds
        magnitude = 3 * np.sqrt(self.n_parameters())
        bounds = np.tile([-magnitude, magnitude], (self.n_parameters(), 1))
        return np.transpose(bounds).tolist()
Source code for pints.toy._high_dimensional_gaussian

Pints

Navigation

Related Topics