Source code for bootstrap_stat.significance

# pyre-unsafe
"""Achieved significance level and power analysis methods."""

from typing import Any

import numpy as np
import numpy.typing as npt
import scipy.stats as ss

from bootstrap_stat._utils import (
    ArrayLike,
    JackknifeValues,
    RNGLike,
    Statistic,
    _apply_rng,
    _bca_acceleration,
)
from bootstrap_stat.distributions import EmpiricalDistribution
from bootstrap_stat.sampling import bootstrap_samples, jackknife_values



[docs]
def bootstrap_asl(
    dist: EmpiricalDistribution,
    stat: Statistic,
    x: ArrayLike | tuple[ArrayLike, ...] | None,
    B: int = 1000,
    size: int | tuple[int, ...] | None = None,
    return_samples: bool = False,
    theta_star: npt.NDArray[np.float64] | None = None,
    theta_hat: float | None = None,
    two_sided: bool = False,
    num_threads: int = 1,
    rng: RNGLike = None,
) -> float | tuple[float, npt.NDArray[np.float64]]:
    """Achieved Significance Level, general bootstrap method

    Parameters
    ----------
     dist : EmpiricalDistribution
        The empirical distribution.
     stat : function
        The test statistic.
     x : array_like or pandas DataFrame or tuple
        The data. It isn't used for anything, so the caller can pass
        None if the data are not available, but it is here for
        consistency across asl routines.
     B : int, optional
        Number of bootstrap samples. Defaults to 1000.
     size : int or tuple of ints, optional
        Size to pass for generating samples from the alternative
        distribution. Defaults to None.
     return_samples : boolean, optional
        If True, return the bootstrapped statistic or jackknife
        values. Defaults to False.
     theta_star : array_like, optional
        Bootstrapped statistic values. Can be passed if they have
        already been calculated, which will speed this up
        considerably.
     theta_hat : float, optional
        Observed statistic. Can be passed if it has already been
        calculated, which will speed this up slightly.
     two_sided : boolean, optional
        If True, computes a two-sided significance value. If False
        (default), only a one-sided value is returned. Support for
        two-sided tests is *experimental*. Use with caution!
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     asl : float
       Achieved significance level, the probability of an outcome at
       least as extreme as that actually observed under the null
       hypothesis; aka the p-value.
     theta_star : ndarray
        Array of bootstrapped statistic values. Only returned if
        `return_samples` is True.

    See Also
    --------
    percentile_asl : Percentile method for achieved significance level.
    bcanon_asl : BCa-corrected achieved significance level.
    bootstrap_power : Bootstrap estimate of statistical power.

    Notes
    -----
    The bootstrap ASL estimates the p-value by sampling from a null
    distribution---an empirical distribution for which the null
    hypothesis is true---and counting the fraction of bootstrap
    statistics at least as extreme as the observed value. The caller
    is responsible for constructing the null distribution. For
    example, if the null hypothesis is that a population mean equals
    zero, ``dist`` should be built from a shifted copy of the data.
    The argument ``x`` is accepted but not used; it is present for
    consistency with :func:`percentile_asl` and :func:`bcanon_asl`.
    See [ET93]_ (S16.3) for details.

    """
    asl = 0

    if theta_hat is None:
        theta_hat = stat(x)

    if theta_star is None:
        theta_star = bootstrap_samples(
            dist, stat, B, size=size, num_threads=num_threads, rng=rng
        )

    if two_sided:
        asl = (abs(theta_star) >= abs(theta_hat)).sum()
    else:
        asl = (theta_star >= theta_hat).sum()

    asl /= len(theta_star)
    if return_samples:
        return asl, theta_star
    else:
        return asl




[docs]
def percentile_asl(
    dist: EmpiricalDistribution,
    stat: Statistic,
    x: ArrayLike | tuple[ArrayLike, ...],
    theta_0: float = 0,
    B: int = 1000,
    size: int | tuple[int, ...] | None = None,
    return_samples: bool = False,
    theta_star: npt.NDArray[np.float64] | None = None,
    theta_hat: float | None = None,
    two_sided: bool = False,
    num_threads: int = 1,
    rng: RNGLike = None,
) -> float | tuple[float, npt.NDArray[np.float64]]:
    r"""Achieved Significance Level, percentile method

    Parameters
    ----------
     dist : EmpiricalDistribution
        The empirical distribution.
     stat : function
        The test statistic.
     x : array_like or pandas DataFrame or tuple
        The data, used to calculate the observed value of the
        statistic if `theta_hat` is not passed.
     theta_0 : float, optional
        The mean of the test statistic under the null
        hypothesis. Defaults to 0.
     B : int, optional
        Number of bootstrap samples.
     size : int or tuple of ints, optional
        Size to pass for generating samples from the alternative
        distribution. Defaults to None.
     return_samples : boolean, optional
        If True, return the bootstrapped statistic or jackknife
        values. Defaults to False.
     theta_star : array_like, optional
        Bootstrapped statistic values. Can be passed if they have
        already been calculated, which will speed this up
        considerably.
     theta_hat : float, optional
        Observed statistic. Can be passed if it has already been
        calculated, which will speed this up slightly.
     two_sided : boolean, optional
        If True, computes a two-sided significance value. If False
        (default), only a one-sided value is returned. Support for
        two-sided tests is *experimental*. Use with caution!
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     asl : float
       Achieved significance level, the probability of an outcome at
       least as extreme as that actually observed under the null
       hypothesis; aka the p-value.
     theta_star : ndarray
        Array of bootstrapped statistic values. Only returned if
        `return_samples` is True.

    See Also
    --------
    bootstrap_asl : General bootstrap achieved significance level.
    bcanon_asl : BCa-corrected achieved significance level.
    bootstrap_power : Bootstrap estimate of statistical power.

    Notes
    -----
    Under the null hypothesis, the value of the statistic is
    :math:`\theta_0`. Suppose :math:`\hat{\theta} > \theta_0`. Let
    :math:`\theta_{\mathrm{lo}}`, :math:`\theta_{\mathrm{hi}}` be the
    endpoints of a :math:`100(1 - \alpha)\%` confidence interval on
    :math:`\theta`. Suppose :math:`\alpha` is such that
    :math:`\theta_{\mathrm{lo}} = \theta_0`. Then :math:`\alpha` is
    the achieved significance level.

    For the percentile interval, this is simply the fraction of
    bootstrap samples that are "on the other side" of :math:`\theta_0`
    from :math:`\hat{\theta}`.

    """
    if theta_hat is None:
        theta_hat = stat(x)

    if theta_hat == theta_0:
        return 1.0

    if theta_star is None:
        theta_star = bootstrap_samples(
            dist, stat, B, size=size, num_threads=num_threads, rng=rng
        )

    if theta_hat > theta_0:
        b = (theta_star < theta_0).sum()
    else:
        b = (theta_star > theta_0).sum()

    asl = b / len(theta_star)
    if two_sided:
        asl *= 2

    if return_samples:
        return asl, theta_star
    else:
        return asl




[docs]
def bcanon_asl(
    dist: EmpiricalDistribution,
    stat: Statistic,
    x: ArrayLike | tuple[ArrayLike, ...],
    theta_0: float = 0,
    B: int = 1000,
    size: int | tuple[int, ...] | None = None,
    return_samples: bool = False,
    theta_star: npt.NDArray[np.float64] | None = None,
    theta_hat: float | None = None,
    jv: JackknifeValues | None = None,
    two_sided: bool = False,
    num_threads: int = 1,
    rng: RNGLike = None,
) -> float | tuple[float, npt.NDArray[np.float64], JackknifeValues | None]:
    r"""Achieved Significance Level, bcanon method

    Parameters
    ----------
     dist : EmpiricalDistribution
        The empirical distribution.
     stat : function
        The test statistic.
     x : array_like or pandas DataFrame or tuple
        The data, used to evaluate the observed statistic and compute
        jackknife values.
     theta_0 : float, optional
        The mean of the test statistic under the null
        hypothesis. Defaults to 0.
     B : int, optional
        Number of bootstrap samples.
     size : int or tuple of ints, optional
        Size to pass for generating samples from the alternative
        distribution. Defaults to None.
     return_samples : boolean, optional
        If True, return the bootstrapped statistic or jackknife
        values. Defaults to False.
     theta_star : array_like, optional
        Bootstrapped statistic values. Can be passed if they have
        already been calculated, which will speed this up
        considerably.
     theta_hat : float, optional
        Observed statistic. Can be passed if it has already been
        calculated, which will speed this up slightly.
     jv : array_like, optional
        Jackknife values. Can be passed if they have already been
        calculated, which will speed this up considerably.
     two_sided : boolean, optional
        If True, computes a two-sided significance value. If False
        (default), only a one-sided value is returned. Support for
        two-sided tests is *experimental*. Use with caution!
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     asl : float
       Achieved significance level, the probability of an outcome at
       least as extreme as that actually observed under the null
       hypothesis; aka the p-value.
     theta_star : ndarray
        Array of bootstrapped statistic values. Only returned if
        `return_samples` is True.
     jv : ndarray
        Jackknife values. Only returned if `return_samples` is True.

    See Also
    --------
    bootstrap_asl : General bootstrap achieved significance level.
    percentile_asl : Percentile method for achieved significance level.
    bcanon_interval : Related BCa confidence interval.
    bootstrap_power : Bootstrap estimate of statistical power.

    Notes
    -----
    The BCa-corrected ASL applies the same bias-correction and
    acceleration adjustments used in :func:`bcanon_interval` to the
    percentile-based ASL from :func:`percentile_asl`. Where
    :func:`percentile_asl` simply counts bootstrap statistics on the
    far side of ``theta_0``, this function converts that count to a
    normal-quantile, shifts it by the bias-correction term
    :math:`\hat{z}_0`, and further adjusts by the acceleration
    :math:`\hat{a}`, which accounts for skewness in the sampling
    distribution of the statistic. The result is more accurate than
    the percentile ASL when the statistic is not symmetrically
    distributed. See [ET93]_ (S14.3) for details.

    """
    if theta_hat is None:
        theta_hat = stat(x)

    a0, theta_star = percentile_asl(
        dist,
        stat,
        x,
        theta_0=theta_0,
        B=B,
        size=size,
        return_samples=True,
        theta_star=theta_star,
        theta_hat=theta_hat,
        two_sided=False,
        num_threads=num_threads,
        rng=rng,
    )
    if a0 == 0 or a0 == 1:
        if return_samples:
            return a0, theta_star, None
        else:
            return a0

    if theta_hat > theta_0:
        zb = (theta_star < theta_hat).sum()
    else:
        zb = (theta_star > theta_hat).sum()

    z0_hat = ss.norm.ppf(zb / len(theta_star))

    if jv is None:
        if dist.is_multi_sample:
            jv = [
                jackknife_values(x, stat, num_threads=num_threads, sample=i)
                for i in range(len(x))
            ]
            jv = (*jv,)
        else:
            jv = jackknife_values(x, stat, num_threads=num_threads)

    a_hat = _bca_acceleration(jv)
    w0 = ss.norm.ppf(a0)
    t = (w0 - z0_hat) / (1 + a_hat * (w0 - z0_hat)) - z0_hat
    asl = ss.norm.cdf(t)

    if two_sided:
        # To-do: generalize as follows.
        # What we really want is stat with the samples
        # interchanged. In many cases of practically interest, that is
        # simply -stat, as shown below. But that's not always the case
        # and we can be more general by allowing the user to pass a
        # transform mapping stat to interchange stat. Need to think
        # this through more.
        other_asl = bcanon_asl(
            dist,
            lambda z: -stat(z),
            x,
            theta_hat=-theta_hat,
            theta_0=-theta_0,
            theta_star=-theta_star,
            two_sided=False,
            num_threads=num_threads,
        )
        asl += other_asl

    if return_samples:
        return asl, theta_star, jv
    else:
        return asl




[docs]
def bootstrap_power(
    alt_dist: EmpiricalDistribution,
    null_dist: type[EmpiricalDistribution],
    stat: Statistic,
    asl: Statistic = bootstrap_asl,
    alpha: float = 0.05,
    size: int | tuple[int, ...] | None = None,
    P: int = 100,
    rng: RNGLike = None,
    **kwargs: Any,
) -> float:
    """Bootstrap Power

    Parameters
    ----------
     alt_dist : EmpiricalDistribution
        Distribution under the alternative hypothesis.
     null_dist : class
        Class corresponding to the null distribution. See Notes.
     stat : function
        Function that computes the test statistic.
     asl : function, optional
        Function that computes an achieved significance
        level. Defaults to bootstrap_asl.
     alpha : float, optional
        Desired Type-I error rate. Defaults to 0.05.
     size : int or tuple of ints, optional
        Size to pass for generating samples from the alternative
        distribution. Defaults to None.
     P : int, optional
        Number of Monte Carlo simulations to run for the purposes of
        calculating power. Defaults to 100.
     kwargs : optional
        Other keyword arguments to pass to `asl`, such as the number
        of bootstrap samples to use.

    Returns
    -------
     pwr : float
        The fraction of Monte Carlo simulations in which the null
        hypothesis was rejected.

    See Also
    --------
    bootstrap_asl : General bootstrap achieved significance level.
    bcanon_asl : BCa-corrected achieved significance level.

    Notes
    -----
    Perhaps the most confusing aspect of this function is that there
    are two distribution passed as input, and they are of a different
    form. The `alt_dist` should be passed as an instance of an
    :class:`EmpiricalDistribution` or a subclass thereof. We use this
    parameter to generate samples from that distribution. We then need
    to generate an :class:`EmpiricalDistribution` from that sample,
    for which we
    need the *class* corresponding to the null distribution, not an
    instance thereof. I recognize this is confusing!

    """
    if rng is not None:
        _apply_rng(alt_dist, rng)

    # Spawn one rng per Monte Carlo trial so each sim_dist has its own
    # independent stream while remaining reproducible from `rng`.
    sim_rngs = alt_dist._rng.spawn(P)

    rejections = 0
    for i in range(P):
        sample = alt_dist.sample(size=size)
        sim_dist = null_dist(sample)
        _apply_rng(sim_dist, sim_rngs[i])
        a = asl(sim_dist, stat, sample, **kwargs)
        if a <= alpha:
            rejections += 1
    return rejections / P