# pyre-unsafe
"""Achieved significance level and power analysis methods."""
from typing import Any
import numpy as np
import numpy.typing as npt
import scipy.stats as ss
from bootstrap_stat._utils import (
ArrayLike,
JackknifeValues,
RNGLike,
Statistic,
_apply_rng,
_bca_acceleration,
)
from bootstrap_stat.distributions import EmpiricalDistribution
from bootstrap_stat.sampling import bootstrap_samples, jackknife_values
[docs]
def bootstrap_asl(
dist: EmpiricalDistribution,
stat: Statistic,
x: ArrayLike | tuple[ArrayLike, ...] | None,
B: int = 1000,
size: int | tuple[int, ...] | None = None,
return_samples: bool = False,
theta_star: npt.NDArray[np.float64] | None = None,
theta_hat: float | None = None,
two_sided: bool = False,
num_threads: int = 1,
rng: RNGLike = None,
) -> float | tuple[float, npt.NDArray[np.float64]]:
"""Achieved Significance Level, general bootstrap method
Parameters
----------
dist : EmpiricalDistribution
The empirical distribution.
stat : function
The test statistic.
x : array_like or pandas DataFrame or tuple
The data. It isn't used for anything, so the caller can pass
None if the data are not available, but it is here for
consistency across asl routines.
B : int, optional
Number of bootstrap samples. Defaults to 1000.
size : int or tuple of ints, optional
Size to pass for generating samples from the alternative
distribution. Defaults to None.
return_samples : boolean, optional
If True, return the bootstrapped statistic or jackknife
values. Defaults to False.
theta_star : array_like, optional
Bootstrapped statistic values. Can be passed if they have
already been calculated, which will speed this up
considerably.
theta_hat : float, optional
Observed statistic. Can be passed if it has already been
calculated, which will speed this up slightly.
two_sided : boolean, optional
If True, computes a two-sided significance value. If False
(default), only a one-sided value is returned. Support for
two-sided tests is *experimental*. Use with caution!
num_threads : int, optional
Number of threads to use for multicore processing. Defaults to
1, meaning all calculations will be done in a single
thread. Set to -1 to use all available cores.
Returns
-------
asl : float
Achieved significance level, the probability of an outcome at
least as extreme as that actually observed under the null
hypothesis; aka the p-value.
theta_star : ndarray
Array of bootstrapped statistic values. Only returned if
`return_samples` is True.
See Also
--------
percentile_asl : Percentile method for achieved significance level.
bcanon_asl : BCa-corrected achieved significance level.
bootstrap_power : Bootstrap estimate of statistical power.
Notes
-----
The bootstrap ASL estimates the p-value by sampling from a null
distribution---an empirical distribution for which the null
hypothesis is true---and counting the fraction of bootstrap
statistics at least as extreme as the observed value. The caller
is responsible for constructing the null distribution. For
example, if the null hypothesis is that a population mean equals
zero, ``dist`` should be built from a shifted copy of the data.
The argument ``x`` is accepted but not used; it is present for
consistency with :func:`percentile_asl` and :func:`bcanon_asl`.
See [ET93]_ (S16.3) for details.
"""
asl = 0
if theta_hat is None:
theta_hat = stat(x)
if theta_star is None:
theta_star = bootstrap_samples(
dist, stat, B, size=size, num_threads=num_threads, rng=rng
)
if two_sided:
asl = (abs(theta_star) >= abs(theta_hat)).sum()
else:
asl = (theta_star >= theta_hat).sum()
asl /= len(theta_star)
if return_samples:
return asl, theta_star
else:
return asl
[docs]
def percentile_asl(
dist: EmpiricalDistribution,
stat: Statistic,
x: ArrayLike | tuple[ArrayLike, ...],
theta_0: float = 0,
B: int = 1000,
size: int | tuple[int, ...] | None = None,
return_samples: bool = False,
theta_star: npt.NDArray[np.float64] | None = None,
theta_hat: float | None = None,
two_sided: bool = False,
num_threads: int = 1,
rng: RNGLike = None,
) -> float | tuple[float, npt.NDArray[np.float64]]:
r"""Achieved Significance Level, percentile method
Parameters
----------
dist : EmpiricalDistribution
The empirical distribution.
stat : function
The test statistic.
x : array_like or pandas DataFrame or tuple
The data, used to calculate the observed value of the
statistic if `theta_hat` is not passed.
theta_0 : float, optional
The mean of the test statistic under the null
hypothesis. Defaults to 0.
B : int, optional
Number of bootstrap samples.
size : int or tuple of ints, optional
Size to pass for generating samples from the alternative
distribution. Defaults to None.
return_samples : boolean, optional
If True, return the bootstrapped statistic or jackknife
values. Defaults to False.
theta_star : array_like, optional
Bootstrapped statistic values. Can be passed if they have
already been calculated, which will speed this up
considerably.
theta_hat : float, optional
Observed statistic. Can be passed if it has already been
calculated, which will speed this up slightly.
two_sided : boolean, optional
If True, computes a two-sided significance value. If False
(default), only a one-sided value is returned. Support for
two-sided tests is *experimental*. Use with caution!
num_threads : int, optional
Number of threads to use for multicore processing. Defaults to
1, meaning all calculations will be done in a single
thread. Set to -1 to use all available cores.
Returns
-------
asl : float
Achieved significance level, the probability of an outcome at
least as extreme as that actually observed under the null
hypothesis; aka the p-value.
theta_star : ndarray
Array of bootstrapped statistic values. Only returned if
`return_samples` is True.
See Also
--------
bootstrap_asl : General bootstrap achieved significance level.
bcanon_asl : BCa-corrected achieved significance level.
bootstrap_power : Bootstrap estimate of statistical power.
Notes
-----
Under the null hypothesis, the value of the statistic is
:math:`\theta_0`. Suppose :math:`\hat{\theta} > \theta_0`. Let
:math:`\theta_{\mathrm{lo}}`, :math:`\theta_{\mathrm{hi}}` be the
endpoints of a :math:`100(1 - \alpha)\%` confidence interval on
:math:`\theta`. Suppose :math:`\alpha` is such that
:math:`\theta_{\mathrm{lo}} = \theta_0`. Then :math:`\alpha` is
the achieved significance level.
For the percentile interval, this is simply the fraction of
bootstrap samples that are "on the other side" of :math:`\theta_0`
from :math:`\hat{\theta}`.
"""
if theta_hat is None:
theta_hat = stat(x)
if theta_hat == theta_0:
return 1.0
if theta_star is None:
theta_star = bootstrap_samples(
dist, stat, B, size=size, num_threads=num_threads, rng=rng
)
if theta_hat > theta_0:
b = (theta_star < theta_0).sum()
else:
b = (theta_star > theta_0).sum()
asl = b / len(theta_star)
if two_sided:
asl *= 2
if return_samples:
return asl, theta_star
else:
return asl
[docs]
def bcanon_asl(
dist: EmpiricalDistribution,
stat: Statistic,
x: ArrayLike | tuple[ArrayLike, ...],
theta_0: float = 0,
B: int = 1000,
size: int | tuple[int, ...] | None = None,
return_samples: bool = False,
theta_star: npt.NDArray[np.float64] | None = None,
theta_hat: float | None = None,
jv: JackknifeValues | None = None,
two_sided: bool = False,
num_threads: int = 1,
rng: RNGLike = None,
) -> float | tuple[float, npt.NDArray[np.float64], JackknifeValues | None]:
r"""Achieved Significance Level, bcanon method
Parameters
----------
dist : EmpiricalDistribution
The empirical distribution.
stat : function
The test statistic.
x : array_like or pandas DataFrame or tuple
The data, used to evaluate the observed statistic and compute
jackknife values.
theta_0 : float, optional
The mean of the test statistic under the null
hypothesis. Defaults to 0.
B : int, optional
Number of bootstrap samples.
size : int or tuple of ints, optional
Size to pass for generating samples from the alternative
distribution. Defaults to None.
return_samples : boolean, optional
If True, return the bootstrapped statistic or jackknife
values. Defaults to False.
theta_star : array_like, optional
Bootstrapped statistic values. Can be passed if they have
already been calculated, which will speed this up
considerably.
theta_hat : float, optional
Observed statistic. Can be passed if it has already been
calculated, which will speed this up slightly.
jv : array_like, optional
Jackknife values. Can be passed if they have already been
calculated, which will speed this up considerably.
two_sided : boolean, optional
If True, computes a two-sided significance value. If False
(default), only a one-sided value is returned. Support for
two-sided tests is *experimental*. Use with caution!
num_threads : int, optional
Number of threads to use for multicore processing. Defaults to
1, meaning all calculations will be done in a single
thread. Set to -1 to use all available cores.
Returns
-------
asl : float
Achieved significance level, the probability of an outcome at
least as extreme as that actually observed under the null
hypothesis; aka the p-value.
theta_star : ndarray
Array of bootstrapped statistic values. Only returned if
`return_samples` is True.
jv : ndarray
Jackknife values. Only returned if `return_samples` is True.
See Also
--------
bootstrap_asl : General bootstrap achieved significance level.
percentile_asl : Percentile method for achieved significance level.
bcanon_interval : Related BCa confidence interval.
bootstrap_power : Bootstrap estimate of statistical power.
Notes
-----
The BCa-corrected ASL applies the same bias-correction and
acceleration adjustments used in :func:`bcanon_interval` to the
percentile-based ASL from :func:`percentile_asl`. Where
:func:`percentile_asl` simply counts bootstrap statistics on the
far side of ``theta_0``, this function converts that count to a
normal-quantile, shifts it by the bias-correction term
:math:`\hat{z}_0`, and further adjusts by the acceleration
:math:`\hat{a}`, which accounts for skewness in the sampling
distribution of the statistic. The result is more accurate than
the percentile ASL when the statistic is not symmetrically
distributed. See [ET93]_ (S14.3) for details.
"""
if theta_hat is None:
theta_hat = stat(x)
a0, theta_star = percentile_asl(
dist,
stat,
x,
theta_0=theta_0,
B=B,
size=size,
return_samples=True,
theta_star=theta_star,
theta_hat=theta_hat,
two_sided=False,
num_threads=num_threads,
rng=rng,
)
if a0 == 0 or a0 == 1:
if return_samples:
return a0, theta_star, None
else:
return a0
if theta_hat > theta_0:
zb = (theta_star < theta_hat).sum()
else:
zb = (theta_star > theta_hat).sum()
z0_hat = ss.norm.ppf(zb / len(theta_star))
if jv is None:
if dist.is_multi_sample:
jv = [
jackknife_values(x, stat, num_threads=num_threads, sample=i)
for i in range(len(x))
]
jv = (*jv,)
else:
jv = jackknife_values(x, stat, num_threads=num_threads)
a_hat = _bca_acceleration(jv)
w0 = ss.norm.ppf(a0)
t = (w0 - z0_hat) / (1 + a_hat * (w0 - z0_hat)) - z0_hat
asl = ss.norm.cdf(t)
if two_sided:
# To-do: generalize as follows.
# What we really want is stat with the samples
# interchanged. In many cases of practically interest, that is
# simply -stat, as shown below. But that's not always the case
# and we can be more general by allowing the user to pass a
# transform mapping stat to interchange stat. Need to think
# this through more.
other_asl = bcanon_asl(
dist,
lambda z: -stat(z),
x,
theta_hat=-theta_hat,
theta_0=-theta_0,
theta_star=-theta_star,
two_sided=False,
num_threads=num_threads,
)
asl += other_asl
if return_samples:
return asl, theta_star, jv
else:
return asl
[docs]
def bootstrap_power(
alt_dist: EmpiricalDistribution,
null_dist: type[EmpiricalDistribution],
stat: Statistic,
asl: Statistic = bootstrap_asl,
alpha: float = 0.05,
size: int | tuple[int, ...] | None = None,
P: int = 100,
rng: RNGLike = None,
**kwargs: Any,
) -> float:
"""Bootstrap Power
Parameters
----------
alt_dist : EmpiricalDistribution
Distribution under the alternative hypothesis.
null_dist : class
Class corresponding to the null distribution. See Notes.
stat : function
Function that computes the test statistic.
asl : function, optional
Function that computes an achieved significance
level. Defaults to bootstrap_asl.
alpha : float, optional
Desired Type-I error rate. Defaults to 0.05.
size : int or tuple of ints, optional
Size to pass for generating samples from the alternative
distribution. Defaults to None.
P : int, optional
Number of Monte Carlo simulations to run for the purposes of
calculating power. Defaults to 100.
kwargs : optional
Other keyword arguments to pass to `asl`, such as the number
of bootstrap samples to use.
Returns
-------
pwr : float
The fraction of Monte Carlo simulations in which the null
hypothesis was rejected.
See Also
--------
bootstrap_asl : General bootstrap achieved significance level.
bcanon_asl : BCa-corrected achieved significance level.
Notes
-----
Perhaps the most confusing aspect of this function is that there
are two distribution passed as input, and they are of a different
form. The `alt_dist` should be passed as an instance of an
:class:`EmpiricalDistribution` or a subclass thereof. We use this
parameter to generate samples from that distribution. We then need
to generate an :class:`EmpiricalDistribution` from that sample,
for which we
need the *class* corresponding to the null distribution, not an
instance thereof. I recognize this is confusing!
"""
if rng is not None:
_apply_rng(alt_dist, rng)
# Spawn one rng per Monte Carlo trial so each sim_dist has its own
# independent stream while remaining reproducible from `rng`.
sim_rngs = alt_dist._rng.spawn(P)
rejections = 0
for i in range(P):
sample = alt_dist.sample(size=size)
sim_dist = null_dist(sample)
_apply_rng(sim_dist, sim_rngs[i])
a = asl(sim_dist, stat, sample, **kwargs)
if a <= alpha:
rejections += 1
return rejections / P