Source code for pints.toy._high_dimensional_gaussian

#
# High-dimensional Gaussian log-pdf.
#
# This file is part of PINTS (https://github.com/pints-team/pints/) which is
# released under the BSD 3-clause license. See accompanying LICENSE.md for
# copyright notice and full license details.
#
import numpy as np
import scipy.stats

from . import ToyLogPDF


[docs]class HighDimensionalGaussianLogPDF(ToyLogPDF): """ High-dimensional zero-mean multivariate Gaussian log pdf, with off-diagonal correlations. Specifically, the covariance matrix Sigma is constructed so that diagonal elements are integers: Sigma_i,i = i and off-diagonal elements are Sigma_i,j = rho * sqrt(i) * sqrt(j). Extends :class:`pints.toy.ToyLogPDF`. Parameters ---------- dimension : int Dimensions of multivariate Gaussian distribution (which must exceed 1). rho : float The correlation between pairs of parameter dimensions. Note that this must be between ```-1 / (dimension - 1) and 1`` so that the covariance matrix is positive semi-definite. """ def __init__(self, dimension=20, rho=0.5): self._n_parameters = int(dimension) if self._n_parameters <= 1: raise ValueError('Dimensions must exceed 1.') rho = float(rho) # bounds must satisfy: # https://stats.stackexchange.com/questions/72790/ if rho > 1.0: raise ValueError('rho must be between -1 / (dims - 1) and 1.') if rho < -float(1.0 / (self._n_parameters - 1)): raise ValueError('rho must be between -1 / (dims - 1) and 1.') self._rho = rho # Construct mean array self._mean = np.zeros(self._n_parameters) # Construct covariance matrix cov = np.arange(1, 1 + self._n_parameters).reshape( (self._n_parameters, 1)) cov = cov.repeat(self._n_parameters, axis=1) cov = np.sqrt(cov) cov = self._rho * cov * cov.T np.fill_diagonal(cov, 1 + np.arange(self._n_parameters)) self._cov = cov self._cov_inv = np.linalg.inv(cov) # Construct scipy 'random variable' self._var = scipy.stats.multivariate_normal(self._mean, self._cov) def __call__(self, x): return self._var.logpdf(x)
[docs] def distance(self, samples): """ Returns approximate Kullback-Leibler divergence between samples and underlying distribution. See :meth:`pints.toy.ToyLogPDF.distance()`. """ return self.kl_divergence(samples)
[docs] def evaluateS1(self, x): """ See :meth:`pints.LogPDF.evaluateS1()`. """ L = self.__call__(x) self._x_minus_mu = x - self._mean # derivative wrt x: see https://stats.stackexchange.com/questions/27436/how-to-take-derivative-of-multivariate-normal-density # noqa dL = -np.matmul(self._cov_inv, self._x_minus_mu) return L, dL
[docs] def kl_divergence(self, samples): """ Returns approximate Kullback-Leibler divergence between samples and underlying distribution. The returned value is (near) zero for perfect sampling, and then increases as the error gets larger. See: https://en.wikipedia.org/wiki/Kullback-Leibler_divergence """ # Check size of input if not len(samples.shape) == 2: raise ValueError('Given samples list must be n x 2.') if samples.shape[1] != self._n_parameters: raise ValueError( 'Given samples must have length ' + str(self._n_parameters)) # Calculate the Kullback-Leibler divergence between the given samples # and the multivariate normal distribution underlying this banana. # From wikipedia: # # k = dimension of distribution # dkl = 0.5 * ( # trace(s1^-1 * s0) # + (m1 - m0)T * s1^-1 * (m1 - m0) # + log( det(s1) / det(s0) ) # - k # ) # # For this distribution, s1 is the identify matrix, and m1 is zero, # so it simplifies to # # dkl = 0.5 * (trace(s0) + m0.dot(m0) - log(det(s0)) - k)) # y = np.array(samples, copy=True) m0 = np.mean(y, axis=0) s0 = np.cov(y.T) s1 = self._cov m1 = self._mean s1_inv = np.linalg.inv(s1) return 0.5 * ( np.trace(np.matmul(s1_inv, s0)) + np.matmul(np.matmul(m1 - m0, s1_inv), m1 - m0) - np.log(np.linalg.det(s0)) + np.log(np.linalg.det(s1)) - self._n_parameters)
[docs] def n_parameters(self): """ See :meth:`pints.LogPDF.n_parameters()`. """ return self._n_parameters
[docs] def rho(self): """ Returns rho (correlation between dimensions) """ return self._rho
[docs] def sample(self, n_samples): """ See :meth:`pints.toy.ToyLogPDF.sample()`. """ n_samples = int(n_samples) if n_samples < 1: raise ValueError( 'Number of samples must be greater than or equal to 1.') return self._var.rvs(n_samples)
[docs] def suggested_bounds(self): """ See :meth:`pints.toy.ToyLogPDF.suggested_bounds()`. """ # maximum variance in one dimension is n_parameters, so use # 3 times sqrt of this as prior bounds magnitude = 3 * np.sqrt(self.n_parameters()) bounds = np.tile([-magnitude, magnitude], (self.n_parameters(), 1)) return np.transpose(bounds).tolist()