Source code for bootstrap_stat.confidence

# pyre-unsafe
"""Confidence interval methods."""

import multiprocessing as mp
import warnings
from collections.abc import Callable
from typing import Any

import numpy as np
import numpy.typing as npt
import scipy.optimize as optimize
import scipy.stats as ss
from pathos.multiprocessing import ProcessPool as Pool

from bootstrap_stat._utils import (
    ArrayLike,
    JackknifeValues,
    RNGLike,
    Statistic,
    _adjust_percentiles,
    _apply_rng,
    _bca_acceleration,
    _influence_components,
    _percentile,
    _spawn_rngs,
    loess,
)
from bootstrap_stat.distributions import EmpiricalDistribution
from bootstrap_stat.sampling import bootstrap_samples, jackknife_values
from bootstrap_stat.standard_error import standard_error



[docs]
def t_interval(
    dist: EmpiricalDistribution,
    stat: Statistic,
    theta_hat: float,
    stabilize_variance: bool = False,
    se_hat: float | None = None,
    fast_std_err: Callable[[ArrayLike], float] | None = None,
    alpha: float = 0.05,
    Binner: int = 25,
    Bouter: int = 1000,
    Bvar: int = 100,
    size: int | tuple[int, ...] | None = None,
    empirical_distribution: type[EmpiricalDistribution] = EmpiricalDistribution,
    return_samples: bool = False,
    theta_star: npt.NDArray[np.float64] | None = None,
    se_star: npt.NDArray[np.float64] | None = None,
    z_star: npt.NDArray[np.float64] | None = None,
    num_threads: int = 1,
    rng: RNGLike = None,
) -> (
    tuple[float, float]
    | tuple[float, float, npt.NDArray[np.float64], npt.NDArray[np.float64]]
    | tuple[
        float,
        float,
        npt.NDArray[np.float64],
        npt.NDArray[np.float64],
        npt.NDArray[np.float64],
    ]
):
    r"""Bootstrap-t Intervals

    Parameters
    ----------
     dist : EmpiricalDistribution
        The empirical distribution.
     stat : function
        The statistic for which we wish to calculate a confidence
        interval.
     theta_hat : float
        The observed statistic.
     stabilize_variance : boolean, optional
        If True, use the variance stabilization technique. Defaults to
        False.
     se_hat : float or None, optional
        The standard error of the observed data. If None (default),
        will be calculated using the nonrobust bootstrap estimate of
        standard error, using the default number of iterations. The
        user may wish to use a nondefault number of bootstrap
        iterations to calculate this, or a robust variant. If so, the
        user should calculate this externally and pass it to this
        function.
     fast_std_err : function or None, optional
        To speed this up, the user may specify a fast function for
        computing the standard error of a bootstrap sample. If not
        specified, we will use the nonrobust bootstrap estimate of
        standard error, using the default number of iterations. This
        can also be used to specify a nondefault bootstrap
        methodology, such as a robust version. See Examples for some
        examples.
     alpha : float, optional
        Number controlling the size of the interval. That is, this
        function will return a :math:`100(1 - 2\alpha)\%` confidence
        interval. Defaults to 0.05, corresponding to a 90% confidence
        interval.
     Binner : int, optional
        Number of bootstrap samples for calculating standard
        error. Defaults to 25.
     Bouter : int, optional
        Number of bootstrap samples for calculating
        percentiles. Defaults to 1000.
     Bvar : int, optional
        Number of bootstrap samples used to estimate the relationship
        between the statistic and the standard error for use with
        variance stabilization. Defaults to 100.
     size : int or tuple of ints, optional
        Size to pass for generating samples from the distribution.
        Defaults to None, indicating the samples will be the same size
        as the original dataset.
     empirical_distribution : class, optional
        Class to be used to generate an empirical distribution.
        Defaults to the regular EmpiricalDistribution, but any class
        that implements a sample method will work. For example, the
        MultiSampleEmpiricalDistribution can be used. This can be used
        to accommodate more exotic applications of the Bootstrap.
     return_samples : boolean, optional
        If True, return the bootstrapped statistic values. Defaults to False.
     theta_star : array_like, optional
        Bootstrapped statistic values. Can be passed if they have
        already been calculated, which will speed this up
        considerably.
     se_star : array_like, optional
        Bootstrapped statistic standard errors. Can be passed if they
        have already been calculated, which will speed this up
        considerably.
     z_star : array_like, optional
        Bootstrapped pivot values. Can be passed if they have already
        been calculated, which will speed this up considerably.
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     ci_low, ci_high : float
        Lower and upper bounds on a :math:`100(1 - 2\alpha)\%` confidence
        interval on theta.
     theta_star : ndarray
        Array of bootstrapped statistic values. Only returned if
        `return_samples` is True.
     se_star : ndarray
        Array of bootstrapped statistic standard errors. Only returned
        if `return_samples` is True.
     z_star : ndarray
        Array of bootstrapped pivot values. Only returned if
        `return_samples` is True and `stabilize_variance` is False.

    See Also
    --------
    bcanon_interval : BCa confidence interval (recommended default).
    percentile_interval : Simple percentile interval.
    standard_error : Bootstrap standard error, used internally.

    Notes
    -----
    The bootstrap-t interval forms a pivotal quantity
    :math:`z^* = (\hat{\theta}^* - \hat{\theta}) / \widehat{\mathrm{se}}^*`
    for each bootstrap sample, where :math:`\widehat{\mathrm{se}}^*` is
    an estimate of the standard error computed from the bootstrap sample
    itself (the inner bootstrap). The :math:`\alpha` and
    :math:`1 - \alpha` quantiles of :math:`z^*` are then inverted to
    produce the interval. See [ET93]_ (S12.5) for details.

    This interval achieves second-order coverage accuracy,
    :math:`O(n^{-1})`, matching BCa. Its main drawback is computational
    cost: each outer bootstrap sample requires an inner bootstrap to
    estimate the standard error. The variance-stabilization option
    (``stabilize_variance=True``) reparameterizes to a scale where the
    standard error is approximately constant, which can improve
    stability when the statistic has strongly varying variance.

    Examples
    --------
    >>> x = np.random.randn(100)
    >>> dist = EmpiricalDistribution(x)
    >>> def statistic(x): return np.mean(x)
    >>> theta_hat = statistic(x)
    >>> ci_low, ci_high = t_interval(dist, statistic, theta_hat)  # doctest: +SKIP

    >>> se_hat = standard_error(dist, statistic, robustness=0.95, B=2000)
    >>> ci_low, ci_high = t_interval(dist, statistic, theta_hat, se_hat=se_hat)  # doctest: +SKIP

    >>> def fast_std_err(x): return np.sqrt(np.var(x, ddof=1) / len(x))
    >>> ci_low, ci_high = t_interval(dist, statistic, theta_hat,  # doctest: +SKIP
    ...                              fast_std_err=fast_std_err)

    >>> def fast_std_err(x):
    ...    dist = EmpiricalDistribution(x)
    ...    return standard_error(dist, statistic, robustness=0.95, B=2000)
    >>> ci_low, ci_high = t_interval(dist, statistic, theta_hat,  # doctest: +SKIP
    ...                              fast_std_err=fast_std_err)

    """
    if rng is not None:
        _apply_rng(dist, rng)

    if se_hat is None and not stabilize_variance:
        se_hat = standard_error(dist, stat, size=size, num_threads=num_threads)

    if stabilize_variance:
        B = Bvar
    else:
        B = Bouter

    # This function runs in two modes: with or without variance
    # stabilization. In addition, the user has the option of passing
    # theta_star and se_star to this function, in which case these are
    # always interpreted as the basis for calculating the
    # interval. The variance-stabilizing transformation is always
    # calculated separately. Doing so enables us to calculate one set
    # of bootstrap statistics and recycle them for percentile
    # intervals, standardized intervals, BCa intervals, t-intervals
    # without stabilization, and t-intervals with stabilization. Of
    # course, if we aren't comparing various techniques this ability
    # to recycle the same bootstrap statistics is irrelevant.
    if stabilize_variance or (
        z_star is None and (theta_star is None or se_star is None)
    ):
        statistics = {"theta_star": stat}
        if fast_std_err is not None:
            statistics["se_star"] = fast_std_err
        else:
            # Share the outer rng with the inner empirical distribution so
            # the nested bootstrap is reproducible from a single seed.
            outer_rng = dist._rng

            def _inner_se(x):
                inner_dist = empirical_distribution(x)
                inner_dist._rng = outer_rng
                if getattr(inner_dist, "is_multi_sample", False):
                    for child in inner_dist.dists:
                        child._rng = outer_rng
                return standard_error(inner_dist, stat, B=Binner)

            statistics["se_star"] = _inner_se

        boot_stats = bootstrap_samples(
            dist, statistics, B, size=size, num_threads=num_threads
        )  # rng already applied to dist above

        if stabilize_variance:
            # These values are used to estimate the
            # variance-stabilizing transformation, not to calculate
            # the confidence interval.
            var_theta_star = boot_stats["theta_star"]
            var_se_star = boot_stats["se_star"]
        else:
            theta_star = boot_stats["theta_star"]
            se_star = boot_stats["se_star"]

    if not stabilize_variance and z_star is None:
        z_star = np.empty((len(theta_star),))
        z_star = (theta_star - theta_hat) / se_star

    if not stabilize_variance:
        p = _percentile(z_star, [alpha, 1 - alpha])

        ci_low = theta_hat - p[1] * se_hat
        ci_high = theta_hat - p[0] * se_hat
    else:

        def one_over_s(u):
            """1 / s(u)

            s(u) is the estimated standard error of the statistic as a
            function of its mean value. We use a smoother (loess) to
            estimate it.

            """
            alpha = 0.3
            return 1.0 / loess(u, var_theta_star, var_se_star, alpha)

        theta_star_min = min(var_theta_star)
        theta_star_max = max(var_theta_star)
        theta_star_mid = 0.5 * (theta_star_min + theta_star_max)
        dx = (theta_star_max - theta_star_min) / 50.0

        def g(x, a=theta_star_mid):
            """Integral of 1/s

            Uses the trapezoid rule to compute the definite integral
            of 1 / s(u) from `a` to `x`, with `a` defaulting to the
            midrange of the observed statistic values. Using this
            default led to considerable speedups since when `x` is
            close to `a`, we need fewer evaluations of s.

            """
            if a == x:
                return 0.0
            elif a > x:
                return -g(a, x)

            # Hereafter we assume a < x
            integral = 0
            one_over_s_a = one_over_s(a)
            while a < x:
                if a + dx < x:
                    b = a + dx
                else:
                    b = x

                one_over_s_b = one_over_s(b)
                integral += (b - a) * (one_over_s_a + one_over_s_b)
                a = b
                one_over_s_a = one_over_s_b
            return 0.5 * integral

        def new_stat(x):
            """Reexpress the statistic in the transformed space"""
            return g(stat(x))

        # If theta_star was passed to this function, transform it to
        # the variance-stablized scale and pass it along.
        g_theta_hat = g(theta_hat)
        if theta_star is not None:
            # We should be able to speed this up using multicore, but
            # when I try to pickle g I get a maximum recursion depth
            # error.
            z_star = np.array([g(t_i) - g_theta_hat for t_i in theta_star])
        else:
            # Any z_star passed to this function would be meaningless.
            z_star = None

        # For some reason, multithreading doesn't work in this
        # call. And, since we are using 1 as the fast_std_err, this is
        # really fast anyway, so nothing to speed up.
        gci_low, gci_high = t_interval(
            dist,
            new_stat,
            g_theta_hat,
            stabilize_variance=False,
            se_hat=1,
            fast_std_err=lambda x: 1,
            alpha=alpha,
            Binner=Binner,
            Bouter=Bouter,
            size=size,
            empirical_distribution=empirical_distribution,
            z_star=z_star,
            num_threads=1,
        )

        try:
            ci_low = optimize.root_scalar(
                lambda x: g(x) - gci_low,
                method="bisect",
                bracket=(theta_star_min, theta_star_max),
            ).root

            ci_high = optimize.root_scalar(
                lambda x: g(x) - gci_high,
                method="bisect",
                bracket=(theta_star_min, theta_star_max),
            ).root
        except ValueError:
            warnings.warn(
                "Confidence limit outside values used to estimate variance-"
                "stabilizing transformation. Try specifying a higher `Bvar` "
                "when calling this function."
            )
            raise

    if return_samples:
        if stabilize_variance:
            return ci_low, ci_high, theta_star, se_star
        else:
            return ci_low, ci_high, theta_star, se_star, z_star
    else:
        return ci_low, ci_high




[docs]
def percentile_interval(
    dist: EmpiricalDistribution,
    stat: Statistic,
    alpha: float = 0.05,
    B: int = 1000,
    size: int | tuple[int, ...] | None = None,
    return_samples: bool = False,
    theta_star: npt.NDArray[np.float64] | None = None,
    num_threads: int = 1,
    rng: RNGLike = None,
) -> tuple[float, float] | tuple[float, float, npt.NDArray[np.float64]]:
    r"""Percentile Intervals

    Parameters
    ----------
     dist : EmpiricalDistribution
        The empirical distribution.
     stat : function
        The statistic.
     alpha : float, optional
        Number controlling the size of the interval. That is, this
        function will return a :math:`100(1 - 2\alpha)\%` confidence
        interval. Defaults to 0.05, corresponding to a 90% confidence
        interval.
     B : int, optional
        Number of bootstrap samples. Defaults to 1000.
     size : int or tuple of ints, optional
        Size to pass for generating samples from the distribution.
        Defaults to None, indicating the samples will be the same size
        as the original dataset.
     return_samples : boolean, optional
        If True, return the bootstrapped statistic values. Defaults to False.
     theta_star : array_like, optional
        Bootstrapped statistic values. Can be passed if they have
        already been calculated, which will speed this up
        considerably.
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     ci_low, ci_high : float
        Lower and upper bounds on a :math:`100(1 - 2\alpha)\%` confidence
        interval on theta.
     theta_star : ndarray
        Array of bootstrapped statistic values. Only returned if
        `return_samples` is True.

    See Also
    --------
    bcanon_interval : BCa confidence interval (recommended default).
    t_interval : Bootstrap-t interval.
    abcnon_interval : Analytical BCa approximation, no bootstrap
        required.
    calibrate_interval : Coverage-calibrated interval.

    Notes
    -----
    The percentile interval reads off the :math:`\alpha` and
    :math:`1 - \alpha` quantiles of the bootstrap distribution of
    :math:`\hat{\theta}^*` directly. It is transformation-respecting:
    if :math:`\phi = g(\theta)` for a monotone increasing :math:`g`,
    the interval on :math:`\phi` is exactly
    :math:`[g(\theta_\mathrm{lo}), g(\theta_\mathrm{hi})]`. See
    [ET93]_ (S13) for details.

    The coverage error is :math:`O(n^{-1/2})`. When bias or skewness
    in the bootstrap distribution is substantial,
    :func:`bcanon_interval` corrects for both and achieves
    :math:`O(n^{-1})` coverage error; it should generally be preferred.

    """
    if theta_star is None:
        theta_star = bootstrap_samples(
            dist, stat, B, size=size, num_threads=num_threads, rng=rng
        )

    p = _percentile(theta_star, [alpha, 1 - alpha])
    if return_samples:
        return p[0], p[1], theta_star
    else:
        return p[0], p[1]




[docs]
def bcanon_interval(
    dist: EmpiricalDistribution,
    stat: Statistic,
    x: ArrayLike | tuple[ArrayLike, ...],
    alpha: float = 0.05,
    B: int = 1000,
    size: int | tuple[int, ...] | None = None,
    return_samples: bool = False,
    theta_star: npt.NDArray[np.float64] | None = None,
    theta_hat: float | None = None,
    jv: JackknifeValues | None = None,
    num_threads: int = 1,
    rng: RNGLike = None,
) -> (
    tuple[float, float] | tuple[float, float, npt.NDArray[np.float64], JackknifeValues]
):
    r"""BCa Confidence Intervals

    Parameters
    ----------
     dist : EmpiricalDistribution
        The empirical distribution.
     stat : function
        The statistic.
     x : array_like or pandas DataFrame or tuple
        The data, used to evaluate the observed statistic and compute
        jackknife values.
     alpha : float, optional
        Number controlling the size of the interval. That is, this
        function will return a :math:`100(1 - 2\alpha)\%` confidence
        interval. Defaults to 0.05, corresponding to a 90% confidence
        interval.
     B : int, optional
        Number of bootstrap samples. Defaults to 1000.
     size : int or tuple of ints, optional
        Size to pass for generating samples from the distribution.
        Defaults to None, indicating the samples will be the same size
        as the original dataset.
     return_samples : boolean, optional
        If True, return the bootstrapped statistic values. Defaults to False.
     theta_star : array_like, optional
        Bootstrapped statistic values. Can be passed if they have
        already been calculated, which will speed this up
        considerably.
     theta_hat : float, optional
        Observed statistic. Can be passed if it has already been
        calculated, which will speed this up slightly.
     jv : array_like, optional
        Jackknife values. Can be passed if they have already been
        calculated, which will speed this up considerably.
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     ci_low, ci_high : float
        Lower and upper bounds on a :math:`100(1 - 2\alpha)\%` confidence
        interval on theta.
     theta_star : ndarray
        Array of bootstrapped statistic values. Only returned if
        `return_samples` is True.
     jv : ndarray
        Jackknife values. Only returned if `return_samples` is True.

    See Also
    --------
    percentile_interval : Simple percentile interval.
    abcnon_interval : Analytical BCa approximation, no bootstrap
        required.
    t_interval : Bootstrap-t interval.
    calibrate_interval : Coverage-calibrated interval.

    Notes
    -----
    The BCa (bias-corrected and accelerated) interval adjusts the
    percentile endpoints using two quantities estimated from the data.
    The bias-correction :math:`\hat{z}_0` accounts for median bias in
    the bootstrap distribution: it is the normal quantile of the
    fraction of bootstrap replicates below :math:`\hat{\theta}`. The
    acceleration :math:`\hat{a}` accounts for the rate at which the
    standard error of the statistic changes with :math:`\theta`, and is
    estimated from the jackknife values. See [ET93]_ (S14.3) for
    details.

    When :math:`\hat{z}_0 = 0` and :math:`\hat{a} = 0`, BCa reduces to
    the percentile interval. In general BCa achieves second-order
    accurate coverage (:math:`O(n^{-1})` error) versus first-order
    (:math:`O(n^{-1/2})`) for the percentile interval, and is the
    recommended default for most applications.

    """
    # The observed value of the statistic.
    if theta_hat is None:
        theta_hat = stat(x)

    if theta_star is None:
        theta_star = bootstrap_samples(
            dist, stat, B, size=size, num_threads=num_threads, rng=rng
        )

    zb = (theta_star < theta_hat).sum()
    z0_hat = ss.norm.ppf(zb / len(theta_star))

    if jv is None:
        if dist.is_multi_sample:
            jv = [
                jackknife_values(x, stat, sample=i, num_threads=num_threads)
                for i in range(len(x))
            ]
            jv = (*jv,)
        else:
            jv = jackknife_values(x, stat, num_threads=num_threads)

    a_hat = _bca_acceleration(jv)
    alpha1, alpha2 = _adjust_percentiles(alpha, a_hat, z0_hat)

    p = _percentile(theta_star, [alpha1, alpha2])
    if return_samples:
        return p[0], p[1], theta_star, jv
    else:
        return p[0], p[1]




[docs]
def abcnon_interval(
    x: ArrayLike,
    stat: Callable[[Any, npt.NDArray[np.float64]], float],
    alpha: float | list[float] = 0.05,
    eps: float = 0.001,
    influence_components: npt.NDArray[np.float64] | None = None,
    second_derivatives: npt.NDArray[np.float64] | None = None,
    return_influence_components: bool = False,
    num_threads: int = 1,
) -> tuple[float, ...]:
    """ABC Confidence Intervals

    Parameters
    ----------
     x : array_like or pandas DataFrame
        The data, used to evaluate the observed statistic and compute
        influence components.
     stat : function
        The statistic. Should take `x` and a resampling vector as
        input. See Notes.
     alpha : float or array of floats, optional
        Number controlling the size of the interval. That is, this
        function will return a :math:`100(1 - 2\\alpha)\\%` confidence
        interval. Defaults to 0.05, corresponding to a 90% confidence
        interval. Alternatively, the user can pass an array of floats
        in (0, 1). In that case, the return value will be a tuple of
        confidence points. See Notes.
     influence_components : array_like, optional
        Influence components. See Notes.
     second_derivatives : array_like, optional
        Vector of second derivatives. See Notes.
     return_influence_components : boolean, optional
        Specifies whether to return the influence components and
        second derivatives. Defaults to False.
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     ci_low, ci_high : float
        Lower and upper bounds on a :math:`100(1 - 2\\alpha)\\%` confidence
        interval on theta.
     influence_components : array_like
        Vector of influence components. Only returned if
        `return_influence_components` is True.
     second_derivatives : array_like
        Vector of second_derivatives. Only returned if
        `return_influence_components` is True.

    See Also
    --------
    bcanon_interval : BCa interval using bootstrap samples.
    percentile_interval : Simple percentile interval.
    calibrate_interval : Coverage-calibrated interval using ABC
        internally.

    Notes
    -----
    Approximate Bootstrap Confidence (ABC) intervals require the
    statistic to be expressed in resampling form. See [ET93]_ (S14.4)
    for details. It only applies to statistics which are smooth
    functions of the data. A notable example where ABC does not apply
    is the sample median.

    ABC works in terms of the derivatives and second derivatives of
    the statistic with respect to the data. In some cases, analytical
    forms are possible. In that case, the caller may wish to calculate
    them externally to this function and pass them in. The default
    behavior is to calculate them using a finite difference method.

    When we want to compute multiple confidence points, we can reuse
    many of the calculations. For example, if we want to compute a 90%
    confidence interval *and* a 99% confidence interval, we would
    specify `alpha` = [0.005, 0.05, 0.95, 0.995] and read off the
    appropriate return values. This would be much faster than multiple
    calls to this function. (The recurring cost is 1 call to `stat`
    for each additional point specified.) This in turn facilitates
    using these intervals for achieved significance levels,
    effectively by inverting the interval having endpoint 0. See
    :func:`bootstrap_asl` and :func:`bcanon_asl` for how this might be
    done.

    """
    n = len(x)
    n2 = n * n
    eps /= n
    p0 = np.ones((n,)) / n
    t0 = stat(x, p0)

    if influence_components is None or second_derivatives is None:
        influence_components, second_derivatives = _influence_components(
            x, stat, order=2, eps=eps, num_threads=num_threads
        )

    sum_inf_squared = np.sum(influence_components**2)
    sigma_hat = np.sqrt(sum_inf_squared) / n
    a_hat = np.sum(influence_components**3) / (6 * sum_inf_squared**1.5)

    delta_hat = influence_components / (n2 * sigma_hat)
    c_q = (stat(x, p0 + eps * delta_hat) - 2 * t0 + stat(x, p0 - eps * delta_hat)) / (
        2 * sigma_hat * eps * eps
    )

    b_hat = np.sum(second_derivatives) / (2 * n * n)
    gamma_hat = b_hat / sigma_hat - c_q
    z0_hat = ss.norm.ppf(2 * ss.norm.cdf(a_hat) * ss.norm.cdf(-gamma_hat))

    try:
        for a in alpha:
            break
    except TypeError:
        alpha = [alpha, 1 - alpha]

    conf_points = []
    for a in alpha:
        w = z0_hat + ss.norm.ppf(a)
        lmbda = w / ((1 - a_hat * w) ** 2)
        conf_points.append(stat(x, p0 + lmbda * delta_hat))

    if return_influence_components:
        conf_points.extend([influence_components, second_derivatives])

    return (*conf_points,)




[docs]
def calibrate_interval(
    dist: EmpiricalDistribution,
    stat: Callable[[Any, npt.NDArray[np.float64]], float],
    x: ArrayLike | tuple[ArrayLike, ...],
    theta_hat: float,
    alpha: float = 0.05,
    B: int = 1000,
    return_confidence_points: bool = False,
    num_threads: int = 1,
    rng: RNGLike = None,
) -> tuple[float, float] | tuple[float, float, float, float]:
    """Calibrated confidence interval

    Parameters
    ----------
     dist : EmpiricalDistribution
        The empirical distribution.
     stat : function
        The statistic.
     x : array_like or pandas DataFrame or tuple
        The data.
     theta_hat : float
        Observed statistic.
     alpha : float, optional
        Number controlling the size of the interval. That is, this
        function will return a :math:`100(1 - 2\\alpha)\\%` confidence
        interval. Defaults to 0.05, corresponding to a 90% confidence
        interval.
     B : int, optional
        Number of bootstrap samples. Defaults to 1000.
     return_confidence_points : boolean, optional
        If True, returns the estimated confidence points have the
        desired coverage. Defaults to False.
     num_threads : int, optional
        Number of threads to use for multicore processing. Defaults to
        1, meaning all calculations will be done in a single
        thread. Set to -1 to use all available cores.

    Returns
    -------
     ci_low, ci_high : float
        Lower and upper bounds on a :math:`100(1 - 2\\alpha)\\%` confidence
        interval on theta.
     lmbda_low, lmbda_high : float
        The estimated confidence points having the desired
        coverage. For example, if the interval has the nominal
        coverage, then lmbda_low would be `alpha` and lmbda_high would
        be 1 - `alpha`.

    See Also
    --------
    abcnon_interval : ABC interval used internally.
    bcanon_interval : BCa interval, often preferred.

    Notes
    -----
    While we can in principle calibrate any type of confidence
    interval, in most instances that results in a "double
    bootstrap". For this reason, only :func:`abcnon_interval` intervals
    are currently supported. Moreover, from my limited experience calibration seems
    pretty finicky. I would consider this function to be illustrative
    but experimental. See [ET93]_ (S18) for details.

    We compute the observed coverage for a range of points around the
    nominal `alpha` and 1-`alpha`. Then we fit a smoother (loess) to
    these data and invert to find the confidence point, lmbda, having
    the desired coverage.

    """

    def logit(p):
        return np.log(p / (1 - p))

    def inv_logit(p):
        return 1.0 / (1.0 + np.exp(-p))

    logit_alpha = logit(alpha)
    logit_lmbdas = np.linspace(logit_alpha - 4, -1.0, num=25)
    logit_lmbdas = np.concatenate((logit_lmbdas, np.flip(-logit_lmbdas)))
    lmbdas = inv_logit(logit_lmbdas)

    if rng is not None:
        _apply_rng(dist, rng)

    def _calc_p_hat(dist, stat, batch_size, lmbdas, theta_hat, worker_rng):
        _apply_rng(dist, worker_rng)

        p_hat = np.zeros((len(lmbdas),))
        for i in range(batch_size):
            x_star = dist.sample()
            theta_lambda = abcnon_interval(x_star, stat, alpha=lmbdas)
            for j, theta_lambda_b in enumerate(theta_lambda):
                if theta_hat <= theta_lambda_b:
                    p_hat[j] += 1

        return p_hat

    if num_threads == 1:
        p_hat = _calc_p_hat(dist, stat, B, lmbdas, theta_hat, dist._rng)
    else:
        if num_threads == -1:
            num_threads = mp.cpu_count()

        pool = Pool(num_threads)
        try:
            pool.restart()
        except AssertionError:
            pass

        results = []
        batch_size = B // num_threads
        extra = B % num_threads
        batch_sizes = [batch_size] * num_threads
        for i in range(extra):
            batch_sizes[i] += 1

        worker_rngs = _spawn_rngs(dist._rng, num_threads)
        for i, worker_rng in enumerate(worker_rngs):
            r = pool.apipe(
                _calc_p_hat,
                dist,
                stat,
                batch_sizes[i],
                lmbdas,
                theta_hat,
                worker_rng,
            )
            results.append(r)

        p_hat = np.zeros((len(lmbdas),))
        for res in results:
            p_hat += res.get()

        pool.close()
        pool.join()

    p_hat /= B

    def g(lmbda):
        zz = loess(logit(lmbda), logit_lmbdas, logit(p_hat), 0.3)
        return inv_logit(zz)

    lmbda_low = optimize.root_scalar(
        lambda x: g(x) - alpha, method="bisect", bracket=(1e-6, 0.5)
    ).root

    lmbda_high = optimize.root_scalar(
        lambda x: g(x) - (1 - alpha), method="bisect", bracket=(0.5, 1 - 1e-6)
    ).root

    ci_low, ci_high = abcnon_interval(x, stat, alpha=[lmbda_low, lmbda_high])

    if return_confidence_points:
        return ci_low, ci_high, lmbda_low, lmbda_high
    else:
        return ci_low, ci_high