Source code for pints._mcmc._slice_rank_shrinking

#
# Slice Sampling - Covariance Adaptive: Rank Shrinking
#
# This file is part of PINTS (https://github.com/pints-team/pints/) which is
# released under the BSD 3-clause license. See accompanying LICENSE.md for
# copyright notice and full license details.
#
import pints
import numpy as np


[docs]class SliceRankShrinkingMCMC(pints.SingleChainMCMC):
    r"""
    Implements Covariance-Adaptive slice sampling by "rank shrinking",
    as introduced in [1]_ with pseudocode given in Fig. 5.

    This is an adaptive multivariate method which uses additional points,
    called "crumbs", and rejected proposals to guide the selection of samples.

    It generates samples by sampling uniformly from the volume underneath the
    posterior (:math:`f`). It does so by introducing an auxiliary variable
    (:math:`y`) that guide the path of a Markov chain.

    Sampling follows:

    1. Calculate the pdf (:math:`f(x_0)`) of the current sample
    :math:`(x_0)`.
    2. Draw a real value (:math:`y`) uniformly from :math:`(0, f(x0))`,
    defining a horizontal "slice": :math:`S = {x: y < f(x)}`. Note that
    :math:`x_0` is always within :math:`S`.
    3. Draw the first crumb (:math:`c_1`) from a Gaussian distribution with
    mean :math:`x_0` and precision matrix :math:`W_1`.
    4. Draw a new point (:math:`x_1`) from a Gaussian distribution with mean
    :math:`c_1` and precision matrix :math:`W_2`.

    New crumbs are drawn until a new proposal is accepted. In particular,
    after sampling :math:`k` crumbs from Gaussian distributions with mean
    :math:`x0` and precision matrices :math:`(W_1, ..., W_k)`, the distribution
    for the kth proposal sample is:

    .. math::
        x_k \sim Normal(\bar{c}_k, \Lambda^{-1}_k)

    where:

       :math:`\Lambda_k = W_1 + ... + W_k`
       :math:`\bar{c}_k = \Lambda^{-1}_k * (W_1 * c_1 + ... + W_k * c_k)`

    This method aims to conveniently modify the (k+1)th proposal distribution
    to increase the likelihood of sampling an acceptable point. It does so by
    calculating the gradient (:math:`g(f(x))`) of the unnormalised posterior
    (:math:`f(x)`) at the last rejected point (:math:`x_k`). It then sets the
    conditional variance of the (k + 1)th proposal distribution in the
    direction of the gradient :math:`g(f(x_k))` to 0. This is reasonable in
    that the gradient at a proposal probably points in a direction where the
    variance is small, so it is more efficient to move in a different
    direction.

    To avoid floating-point underflow, we implement the suggestion advanced
    in [2]_ pp.712. We use the log pdf of the un-normalised posterior
    (:math:`\text{log} f(x)`) instead of :math:`f(x)`. In doing so, we use an
    auxiliary variable :math:`z = log(y) - \epsilon`, where
    :math:`\epsilon \sim \text{exp}(1)` and define the slice as
    :math:`S = {x : z < log f(x)}`.

    Extends :class:`SingleChainMCMC`.

    References
    ----------
    .. [1] "Covariance-Adaptive Slice Sampling", 2010, M Thompson and RM Neal,
           Technical Report No. 1002, Department of Statistics, University of
           Toronto
    .. [2] "Slice sampling", 2003, Neal, R.M., The annals of statistics, 31(3),
           pp.705-767. https://doi.org/10.1214/aos/1056562461
    """

    def __init__(self, x0, sigma0=None):
        super(SliceRankShrinkingMCMC, self).__init__(x0, sigma0)

        # Set initial state
        self._x0 = np.asarray(x0, dtype=float)
        self._running = False
        self._ready_for_tell = False
        self._current = None
        self._current_log_y = None
        self._proposed = None

        # Standard deviation of initial crumb
        self._sigma_c = 1

        # Matrix of orthonormal columns of directions in which the conditional
        # variance is zero
        self._J = np.zeros((self._n_parameters, 0), float)

        # Number of crumbs
        self._k = 0

        # Mean of unprojected proposal distribution
        self._c_bar = 0

    # Function returning the component of vector v orthogonal to the
    # columns of J
    def _p(self, J, v):
        if not J.any():
            return np.array(v, copy=True)
        else:
            return np.array(v - np.dot(J, np.dot(J.transpose(), v)), copy=True)

[docs]    def ask(self):
        """ See :meth:`SingleChainMCMC.ask()`. """

        # Check ask/tell pattern
        if self._ready_for_tell:
            raise RuntimeError('Ask() called when expecting call to tell().')

        # Initialise on first call
        if not self._running:
            self._running = True

        # Very first iteration
        if self._current is None:

            # Ask for the log pdf of x0
            self._ready_for_tell = True
            return np.array(self._x0, copy=True)

        # Increase crumbs count
        self._k += 1

        # Sample crumb
        mean = np.zeros(self._n_parameters)
        cov = np.identity(self._n_parameters)
        z = np.random.multivariate_normal(mean, cov)
        c = self._sigma_c * z

        # Mean of proposal distribution
        self._c_bar = ((self._k - 1) * self._c_bar + c) / self._k

        # Sample trial point
        z = np.random.multivariate_normal(mean, cov)
        self._proposed = self._current + self._p(
            self._J, self._c_bar + self._sigma_c / np.sqrt(self._k) * z)

        # Send trial point for checks
        self._ready_for_tell = True
        return np.copy(self._proposed)

[docs]    def current_slice_height(self):
        """
        Returns the height of the current slice.
        """
        return self._current_log_y

[docs]    def name(self):
        """ See :meth:`pints.MCMCSampler.name()`. """
        return 'Slice Sampling - Covariance-Adaptive: Rank Shrinking.'

[docs]    def needs_sensitivities(self):
        """ See :meth:`pints.MCMCSampler.needs_sensitivities()`. """
        return True

[docs]    def n_hyper_parameters(self):
        """ See :meth:`TunableMethod.n_hyper_parameters()`. """
        return 1

[docs]    def set_hyper_parameters(self, x):
        """
        The hyper-parameter vector is ``[sigma_c]``.
        See :meth:`TunableMethod.set_hyper_parameters()`.
        """
        self.set_sigma_c(x[0])

[docs]    def set_sigma_c(self, sigma_c):
        """
        Sets standard deviation of initial crumb distribution.
        """
        sigma_c = float(sigma_c)
        if sigma_c < 0:
            raise ValueError(
                'Inital crumb standard deviation must be positive.')
        self._sigma_c = sigma_c

[docs]    def sigma_c(self):
        """
        Returns standard deviation of initial crumb distribution.
        """
        return self._sigma_c

[docs]    def tell(self, reply):
        """ See :meth:`pints.SingleChainMCMC.tell()`. """

        # Check ask/tell pattern
        if not self._ready_for_tell:
            raise RuntimeError('Tell called before proposal was set.')
        self._ready_for_tell = False

        # Unpack reply
        fx, grad = reply

        # Check reply, copy gradient
        fx = float(fx)
        grad = pints.vector(grad)
        assert(grad.shape == (self._n_parameters, ))

        # Very first call
        if self._current is None:
            # Check first point is somewhere sensible
            if not np.isfinite(fx):
                raise ValueError(
                    'Initial point for MCMC must have finite logpdf.')

            # Set current sample, log pdf of current sample and initialise
            # proposed sample for next iteration
            self._current = np.array(self._x0, copy=True)
            self._proposed = np.array(self._current, copy=True)

            # Sample height of the slice log_y for next iteration
            e = np.random.exponential(1)
            self._current_log_y = fx - e

            # Return first point in chain, which is x0
            # Note: `grad` is not stored in this iteration, so can return
            return np.copy(self._current), (fx, grad), True

        # Acceptance check
        if fx >= self._current_log_y:
            # The accepted sample becomes the new current sample
            self._current = np.copy(self._proposed)

            # Sample new log_y used to define the next slice
            e = np.random.exponential(1)
            self._current_log_y = fx - e

            # Reset parameters
            self._J = np.zeros((self._n_parameters, 0), float)
            self._k = 0
            self._c_bar = 0

            # Return accepted sample
            # Note: `grad` is not stored in this iteration, so can return
            return np.copy(self._current), (fx, grad), True

        # If proposal is reject, shrink rank of the next proposal distribution
        # by adding new orthonormal column to ``J``. This will represent a new
        # direction in which the conditional covariance of the proposal
        # distribution will be 0.
        else:
            # If J has less non-zero columns than``number of
            # parameters - 1``, shrink rank by adding new orthonormal
            # column

            if self._J.shape[1] < self._n_parameters - 1:
                # Gradient projection
                g_star = self._p(self._J, grad)

                # To prevent meaningless adaptations, we only perform this
                # operation when the angle between the gradient and its
                # projection into the nullspace of J is less than 60 degrees.
                if np.dot(g_star.transpose(), grad) > (
                        .5 * np.linalg.norm(g_star) * np.linalg.norm(grad)):
                    new_column = np.array(g_star / np.linalg.norm(g_star))
                    self._J = np.column_stack([self._J, new_column])
            return None
Source code for pints._mcmc._slice_rank_shrinking

Pints

Navigation

Related Topics