remove venv
This commit is contained in:
parent
056387013d
commit
0680c7594e
13999 changed files with 0 additions and 2895688 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,356 +0,0 @@
|
|||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import numpy.ma.testutils as ma_npt
|
||||
|
||||
from scipy._lib._util import (
|
||||
getfullargspec_no_self as _getfullargspec, np_long
|
||||
)
|
||||
from scipy._lib._array_api_no_0d import xp_assert_equal
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def check_named_results(res, attributes, ma=False, xp=None):
|
||||
for i, attr in enumerate(attributes):
|
||||
if ma:
|
||||
ma_npt.assert_equal(res[i], getattr(res, attr))
|
||||
elif xp is not None:
|
||||
xp_assert_equal(res[i], getattr(res, attr))
|
||||
else:
|
||||
npt.assert_equal(res[i], getattr(res, attr))
|
||||
|
||||
|
||||
def check_normalization(distfn, args, distname):
|
||||
norm_moment = distfn.moment(0, *args)
|
||||
npt.assert_allclose(norm_moment, 1.0)
|
||||
|
||||
if distname == "rv_histogram_instance":
|
||||
atol, rtol = 1e-5, 0
|
||||
else:
|
||||
atol, rtol = 1e-7, 1e-7
|
||||
|
||||
normalization_expect = distfn.expect(lambda x: 1, args=args)
|
||||
npt.assert_allclose(normalization_expect, 1.0, atol=atol, rtol=rtol,
|
||||
err_msg=distname, verbose=True)
|
||||
|
||||
_a, _b = distfn.support(*args)
|
||||
normalization_cdf = distfn.cdf(_b, *args)
|
||||
npt.assert_allclose(normalization_cdf, 1.0)
|
||||
|
||||
|
||||
def check_moment(distfn, arg, m, v, msg):
|
||||
m1 = distfn.moment(1, *arg)
|
||||
m2 = distfn.moment(2, *arg)
|
||||
if not np.isinf(m):
|
||||
npt.assert_almost_equal(m1, m, decimal=10,
|
||||
err_msg=msg + ' - 1st moment')
|
||||
else: # or np.isnan(m1),
|
||||
npt.assert_(np.isinf(m1),
|
||||
msg + f' - 1st moment -infinite, m1={str(m1)}')
|
||||
|
||||
if not np.isinf(v):
|
||||
npt.assert_almost_equal(m2 - m1 * m1, v, decimal=10,
|
||||
err_msg=msg + ' - 2ndt moment')
|
||||
else: # or np.isnan(m2),
|
||||
npt.assert_(np.isinf(m2), msg + f' - 2nd moment -infinite, {m2=}')
|
||||
|
||||
|
||||
def check_mean_expect(distfn, arg, m, msg):
|
||||
if np.isfinite(m):
|
||||
m1 = distfn.expect(lambda x: x, arg)
|
||||
npt.assert_almost_equal(m1, m, decimal=5,
|
||||
err_msg=msg + ' - 1st moment (expect)')
|
||||
|
||||
|
||||
def check_var_expect(distfn, arg, m, v, msg):
|
||||
dist_looser_tolerances = {"rv_histogram_instance" , "ksone"}
|
||||
kwargs = {'rtol': 5e-6} if msg in dist_looser_tolerances else {}
|
||||
if np.isfinite(v):
|
||||
m2 = distfn.expect(lambda x: x*x, arg)
|
||||
npt.assert_allclose(m2, v + m*m, **kwargs)
|
||||
|
||||
|
||||
def check_skew_expect(distfn, arg, m, v, s, msg):
|
||||
if np.isfinite(s):
|
||||
m3e = distfn.expect(lambda x: np.power(x-m, 3), arg)
|
||||
npt.assert_almost_equal(m3e, s * np.power(v, 1.5),
|
||||
decimal=5, err_msg=msg + ' - skew')
|
||||
else:
|
||||
npt.assert_(np.isnan(s))
|
||||
|
||||
|
||||
def check_kurt_expect(distfn, arg, m, v, k, msg):
|
||||
if np.isfinite(k):
|
||||
m4e = distfn.expect(lambda x: np.power(x-m, 4), arg)
|
||||
npt.assert_allclose(m4e, (k + 3.) * np.power(v, 2),
|
||||
atol=1e-5, rtol=1e-5,
|
||||
err_msg=msg + ' - kurtosis')
|
||||
elif not np.isposinf(k):
|
||||
npt.assert_(np.isnan(k))
|
||||
|
||||
|
||||
def check_munp_expect(dist, args, msg):
|
||||
# If _munp is overridden, test a higher moment. (Before gh-18634, some
|
||||
# distributions had issues with moments 5 and higher.)
|
||||
if dist._munp.__func__ != stats.rv_continuous._munp:
|
||||
res = dist.moment(5, *args) # shouldn't raise an error
|
||||
ref = dist.expect(lambda x: x ** 5, args, lb=-np.inf, ub=np.inf)
|
||||
if not np.isfinite(res): # could be valid; automated test can't know
|
||||
return
|
||||
# loose tolerance, mostly to see whether _munp returns *something*
|
||||
assert_allclose(res, ref, atol=1e-10, rtol=1e-4,
|
||||
err_msg=msg + ' - higher moment / _munp')
|
||||
|
||||
|
||||
def check_entropy(distfn, arg, msg):
|
||||
ent = distfn.entropy(*arg)
|
||||
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||
|
||||
|
||||
def check_private_entropy(distfn, args, superclass):
|
||||
# compare a generic _entropy with the distribution-specific implementation
|
||||
npt.assert_allclose(distfn._entropy(*args),
|
||||
superclass._entropy(distfn, *args))
|
||||
|
||||
|
||||
def check_entropy_vect_scale(distfn, arg):
|
||||
# check 2-d
|
||||
sc = np.asarray([[1, 2], [3, 4]])
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc.ravel()]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
# check invalid value, check cast
|
||||
sc = [1, 2, -3]
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
|
||||
def check_edge_support(distfn, args):
|
||||
# Make sure that x=self.a and self.b are handled correctly.
|
||||
x = distfn.support(*args)
|
||||
if isinstance(distfn, stats.rv_discrete):
|
||||
x = x[0]-1, x[1]
|
||||
|
||||
npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
|
||||
npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
|
||||
|
||||
if distfn.name not in ('skellam', 'dlaplace'):
|
||||
# with a = -inf, log(0) generates warnings
|
||||
npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
|
||||
npt.assert_equal(distfn.logsf(x, *args), [0.0, -np.inf])
|
||||
|
||||
npt.assert_equal(distfn.ppf([0.0, 1.0], *args), x)
|
||||
npt.assert_equal(distfn.isf([0.0, 1.0], *args), x[::-1])
|
||||
|
||||
# out-of-bounds for isf & ppf
|
||||
npt.assert_(np.isnan(distfn.isf([-1, 2], *args)).all())
|
||||
npt.assert_(np.isnan(distfn.ppf([-1, 2], *args)).all())
|
||||
|
||||
|
||||
def check_named_args(distfn, x, shape_args, defaults, meths):
|
||||
## Check calling w/ named arguments.
|
||||
|
||||
# check consistency of shapes, numargs and _parse signature
|
||||
signature = _getfullargspec(distfn._parse_args)
|
||||
npt.assert_(signature.varargs is None)
|
||||
npt.assert_(signature.varkw is None)
|
||||
npt.assert_(not signature.kwonlyargs)
|
||||
npt.assert_(list(signature.defaults) == list(defaults))
|
||||
|
||||
shape_argnames = signature.args[:-len(defaults)] # a, b, loc=0, scale=1
|
||||
if distfn.shapes:
|
||||
shapes_ = distfn.shapes.replace(',', ' ').split()
|
||||
else:
|
||||
shapes_ = ''
|
||||
npt.assert_(len(shapes_) == distfn.numargs)
|
||||
npt.assert_(len(shapes_) == len(shape_argnames))
|
||||
|
||||
# check calling w/ named arguments
|
||||
shape_args = list(shape_args)
|
||||
|
||||
vals = [meth(x, *shape_args) for meth in meths]
|
||||
npt.assert_(np.all(np.isfinite(vals)))
|
||||
|
||||
names, a, k = shape_argnames[:], shape_args[:], {}
|
||||
while names:
|
||||
k.update({names.pop(): a.pop()})
|
||||
v = [meth(x, *a, **k) for meth in meths]
|
||||
npt.assert_array_equal(vals, v)
|
||||
if 'n' not in k.keys():
|
||||
# `n` is first parameter of moment(), so can't be used as named arg
|
||||
npt.assert_equal(distfn.moment(1, *a, **k),
|
||||
distfn.moment(1, *shape_args))
|
||||
|
||||
# unknown arguments should not go through:
|
||||
k.update({'kaboom': 42})
|
||||
assert_raises(TypeError, distfn.cdf, x, **k)
|
||||
|
||||
|
||||
def check_random_state_property(distfn, args):
|
||||
# check the random_state attribute of a distribution *instance*
|
||||
|
||||
# This test fiddles with distfn.random_state. This breaks other tests,
|
||||
# hence need to save it and then restore.
|
||||
rndm = distfn.random_state
|
||||
|
||||
# baseline: this relies on the global state
|
||||
np.random.seed(1234)
|
||||
distfn.random_state = None
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
# use an explicit instance-level random_state
|
||||
distfn.random_state = 1234
|
||||
r1 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
distfn.random_state = np.random.RandomState(1234)
|
||||
r2 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r2)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
distfn.rvs(*args, size=1, random_state=rng)
|
||||
|
||||
# can override the instance-level random_state for an individual .rvs call
|
||||
distfn.random_state = 2
|
||||
orig_state = distfn.random_state.get_state()
|
||||
|
||||
r3 = distfn.rvs(*args, size=8, random_state=np.random.RandomState(1234))
|
||||
npt.assert_equal(r0, r3)
|
||||
|
||||
# ... and that does not alter the instance-level random_state!
|
||||
npt.assert_equal(distfn.random_state.get_state(), orig_state)
|
||||
|
||||
# finally, restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_meth_dtype(distfn, arg, meths):
|
||||
q0 = [0.25, 0.5, 0.75]
|
||||
x0 = distfn.ppf(q0, *arg)
|
||||
x_cast = [x0.astype(tp) for tp in (np_long, np.float16, np.float32,
|
||||
np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
for meth in meths:
|
||||
val = meth(x, *arg)
|
||||
npt.assert_(val.dtype == np.float64)
|
||||
|
||||
|
||||
def check_ppf_dtype(distfn, arg):
|
||||
q0 = np.asarray([0.25, 0.5, 0.75])
|
||||
q_cast = [q0.astype(tp) for tp in (np.float16, np.float32, np.float64)]
|
||||
for q in q_cast:
|
||||
for meth in [distfn.ppf, distfn.isf]:
|
||||
val = meth(q, *arg)
|
||||
npt.assert_(val.dtype == np.float64)
|
||||
|
||||
|
||||
def check_cmplx_deriv(distfn, arg):
|
||||
# Distributions allow complex arguments.
|
||||
def deriv(f, x, *arg):
|
||||
x = np.asarray(x)
|
||||
h = 1e-10
|
||||
return (f(x + h*1j, *arg)/h).imag
|
||||
|
||||
x0 = distfn.ppf([0.25, 0.51, 0.75], *arg)
|
||||
x_cast = [x0.astype(tp) for tp in (np_long, np.float16, np.float32,
|
||||
np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
|
||||
pdf, cdf, sf = distfn.pdf(x, *arg), distfn.cdf(x, *arg), distfn.sf(x, *arg)
|
||||
assert_allclose(deriv(distfn.cdf, x, *arg), pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logcdf, x, *arg), pdf/cdf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.sf, x, *arg), -pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logsf, x, *arg), -pdf/sf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.logpdf, x, *arg),
|
||||
deriv(distfn.pdf, x, *arg) / distfn.pdf(x, *arg),
|
||||
rtol=1e-5)
|
||||
|
||||
|
||||
def check_pickling(distfn, args):
|
||||
# check that a distribution instance pickles and unpickles
|
||||
# pay special attention to the random_state property
|
||||
|
||||
# save the random_state (restore later)
|
||||
rndm = distfn.random_state
|
||||
|
||||
# check unfrozen
|
||||
distfn.random_state = 1234
|
||||
distfn.rvs(*args, size=8)
|
||||
s = pickle.dumps(distfn)
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
unpickled = pickle.loads(s)
|
||||
r1 = unpickled.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# also smoke test some methods
|
||||
medians = [distfn.ppf(0.5, *args), unpickled.ppf(0.5, *args)]
|
||||
npt.assert_equal(medians[0], medians[1])
|
||||
npt.assert_equal(distfn.cdf(medians[0], *args),
|
||||
unpickled.cdf(medians[1], *args))
|
||||
|
||||
# check frozen pickling/unpickling with rvs
|
||||
frozen_dist = distfn(*args)
|
||||
pkl = pickle.dumps(frozen_dist)
|
||||
unpickled = pickle.loads(pkl)
|
||||
|
||||
r0 = frozen_dist.rvs(size=8)
|
||||
r1 = unpickled.rvs(size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# check pickling/unpickling of .fit method
|
||||
if hasattr(distfn, "fit"):
|
||||
fit_function = distfn.fit
|
||||
pickled_fit_function = pickle.dumps(fit_function)
|
||||
unpickled_fit_function = pickle.loads(pickled_fit_function)
|
||||
assert fit_function.__name__ == unpickled_fit_function.__name__ == "fit"
|
||||
|
||||
# restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_freezing(distfn, args):
|
||||
# regression test for gh-11089: freezing a distribution fails
|
||||
# if loc and/or scale are specified
|
||||
if isinstance(distfn, stats.rv_continuous):
|
||||
locscale = {'loc': 1, 'scale': 2}
|
||||
else:
|
||||
locscale = {'loc': 1}
|
||||
|
||||
rv = distfn(*args, **locscale)
|
||||
assert rv.a == distfn(*args).a
|
||||
assert rv.b == distfn(*args).b
|
||||
|
||||
|
||||
def check_rvs_broadcast(distfunc, distname, allargs, shape, shape_only, otype):
|
||||
rng = np.random.RandomState(123)
|
||||
sample = distfunc.rvs(*allargs, random_state=rng)
|
||||
assert_equal(sample.shape, shape, f"{distname}: rvs failed to broadcast")
|
||||
if not shape_only:
|
||||
rvs = np.vectorize(
|
||||
lambda *allargs: distfunc.rvs(*allargs, random_state=rng),
|
||||
otypes=otype)
|
||||
rng = np.random.RandomState(123)
|
||||
expected = rvs(*allargs)
|
||||
assert_allclose(sample, expected, rtol=1e-13)
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -1,171 +0,0 @@
|
|||
import math
|
||||
import numpy as np
|
||||
from scipy import special
|
||||
from scipy.stats._qmc import primes_from_2_to
|
||||
|
||||
|
||||
def _primes(n):
|
||||
# Defined to facilitate comparison between translation and source
|
||||
# In Matlab, primes(10.5) -> first four primes, primes(11.5) -> first five
|
||||
return primes_from_2_to(math.ceil(n))
|
||||
|
||||
|
||||
def _gaminv(a, b):
|
||||
# Defined to facilitate comparison between translation and source
|
||||
# Matlab's `gaminv` is like `special.gammaincinv` but args are reversed
|
||||
return special.gammaincinv(b, a)
|
||||
|
||||
|
||||
def _qsimvtv(m, nu, sigma, a, b, rng):
|
||||
"""Estimates the multivariate t CDF using randomized QMC
|
||||
|
||||
Parameters
|
||||
----------
|
||||
m : int
|
||||
The number of points
|
||||
nu : float
|
||||
Degrees of freedom
|
||||
sigma : ndarray
|
||||
A 2D positive semidefinite covariance matrix
|
||||
a : ndarray
|
||||
Lower integration limits
|
||||
b : ndarray
|
||||
Upper integration limits.
|
||||
rng : Generator
|
||||
Pseudorandom number generator
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : float
|
||||
The estimated CDF.
|
||||
e : float
|
||||
An absolute error estimate.
|
||||
|
||||
"""
|
||||
# _qsimvtv is a Python translation of the Matlab function qsimvtv,
|
||||
# semicolons and all.
|
||||
#
|
||||
# This function uses an algorithm given in the paper
|
||||
# "Comparison of Methods for the Numerical Computation of
|
||||
# Multivariate t Probabilities", in
|
||||
# J. of Computational and Graphical Stat., 11(2002), pp. 950-971, by
|
||||
# Alan Genz and Frank Bretz
|
||||
#
|
||||
# The primary references for the numerical integration are
|
||||
# "On a Number-Theoretical Integration Method"
|
||||
# H. Niederreiter, Aequationes Mathematicae, 8(1972), pp. 304-11.
|
||||
# and
|
||||
# "Randomization of Number Theoretic Methods for Multiple Integration"
|
||||
# R. Cranley & T.N.L. Patterson, SIAM J Numer Anal, 13(1976), pp. 904-14.
|
||||
#
|
||||
# Alan Genz is the author of this function and following Matlab functions.
|
||||
# Alan Genz, WSU Math, PO Box 643113, Pullman, WA 99164-3113
|
||||
# Email : alangenz@wsu.edu
|
||||
#
|
||||
# Copyright (C) 2013, Alan Genz, All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided the following conditions are met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. The contributor name(s) may not be used to endorse or promote
|
||||
# products derived from this software without specific prior
|
||||
# written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# Initialization
|
||||
sn = max(1, math.sqrt(nu)); ch, az, bz = _chlrps(sigma, a/sn, b/sn)
|
||||
n = len(sigma); N = 10; P = math.ceil(m/N); on = np.ones(P); p = 0; e = 0
|
||||
ps = np.sqrt(_primes(5*n*math.log(n+4)/4)); q = ps[:, np.newaxis] # Richtmyer gens.
|
||||
|
||||
# Randomization loop for ns samples
|
||||
c = None; dc = None
|
||||
for S in range(N):
|
||||
vp = on.copy(); s = np.zeros((n, P))
|
||||
for i in range(n):
|
||||
x = np.abs(2*np.mod(q[i]*np.arange(1, P+1) + rng.random(), 1)-1) # periodizing transform
|
||||
if i == 0:
|
||||
r = on
|
||||
if nu > 0:
|
||||
r = np.sqrt(2*_gaminv(x, nu/2))
|
||||
else:
|
||||
y = _Phinv(c + x*dc)
|
||||
s[i:] += ch[i:, i-1:i] * y
|
||||
si = s[i, :]; c = on.copy(); ai = az[i]*r - si; d = on.copy(); bi = bz[i]*r - si
|
||||
c[ai <= -9] = 0; tl = abs(ai) < 9; c[tl] = _Phi(ai[tl])
|
||||
d[bi <= -9] = 0; tl = abs(bi) < 9; d[tl] = _Phi(bi[tl])
|
||||
dc = d - c; vp = vp * dc
|
||||
d = (np.mean(vp) - p)/(S + 1); p = p + d; e = (S - 1)*e/(S + 1) + d**2
|
||||
e = math.sqrt(e) # error estimate is 3 times std error with N samples.
|
||||
return p, e
|
||||
|
||||
|
||||
# Standard statistical normal distribution functions
|
||||
def _Phi(z):
|
||||
return special.ndtr(z)
|
||||
|
||||
|
||||
def _Phinv(p):
|
||||
return special.ndtri(p)
|
||||
|
||||
|
||||
def _chlrps(R, a, b):
|
||||
"""
|
||||
Computes permuted and scaled lower Cholesky factor c for R which may be
|
||||
singular, also permuting and scaling integration limit vectors a and b.
|
||||
"""
|
||||
ep = 1e-10 # singularity tolerance
|
||||
eps = np.finfo(R.dtype).eps
|
||||
|
||||
n = len(R); c = R.copy(); ap = a.copy(); bp = b.copy(); d = np.sqrt(np.maximum(np.diag(c), 0))
|
||||
for i in range(n):
|
||||
if d[i] > 0:
|
||||
c[:, i] /= d[i]; c[i, :] /= d[i]
|
||||
ap[i] /= d[i]; bp[i] /= d[i]
|
||||
y = np.zeros((n, 1)); sqtp = math.sqrt(2*math.pi)
|
||||
|
||||
for k in range(n):
|
||||
im = k; ckk = 0; dem = 1; s = 0
|
||||
for i in range(k, n):
|
||||
if c[i, i] > eps:
|
||||
cii = math.sqrt(max(c[i, i], 0))
|
||||
if i > 0: s = c[i, :k] @ y[:k]
|
||||
ai = (ap[i]-s)/cii; bi = (bp[i]-s)/cii; de = _Phi(bi)-_Phi(ai)
|
||||
if de <= dem:
|
||||
ckk = cii; dem = de; am = ai; bm = bi; im = i
|
||||
if im > k:
|
||||
ap[[im, k]] = ap[[k, im]]; bp[[im, k]] = bp[[k, im]]; c[im, im] = c[k, k]
|
||||
t = c[im, :k].copy(); c[im, :k] = c[k, :k]; c[k, :k] = t
|
||||
t = c[im+1:, im].copy(); c[im+1:, im] = c[im+1:, k]; c[im+1:, k] = t
|
||||
t = c[k+1:im, k].copy(); c[k+1:im, k] = c[im, k+1:im].T; c[im, k+1:im] = t.T
|
||||
if ckk > ep*(k+1):
|
||||
c[k, k] = ckk; c[k, k+1:] = 0
|
||||
for i in range(k+1, n):
|
||||
c[i, k] = c[i, k]/ckk; c[i, k+1:i+1] = c[i, k+1:i+1] - c[i, k]*c[k+1:i+1, k].T
|
||||
if abs(dem) > ep:
|
||||
y[k] = (np.exp(-am**2/2) - np.exp(-bm**2/2)) / (sqtp*dem)
|
||||
else:
|
||||
y[k] = (am + bm) / 2
|
||||
if am < -10:
|
||||
y[k] = bm
|
||||
elif bm > 10:
|
||||
y[k] = am
|
||||
c[k, :k+1] /= ckk; ap[k] /= ckk; bp[k] /= ckk
|
||||
else:
|
||||
c[k:, k] = 0; y[k] = (ap[k] + bp[k])/2
|
||||
pass
|
||||
return c, ap, bp
|
||||
|
|
@ -1,607 +0,0 @@
|
|||
# DO NOT EDIT THIS FILE!
|
||||
# This file was generated by the R script
|
||||
# generate_fisher_exact_results_from_r.R
|
||||
# The script was run with R version 3.6.2 (2019-12-12) at 2020-11-09 06:16:09
|
||||
|
||||
|
||||
from collections import namedtuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
Inf = np.inf
|
||||
|
||||
Parameters = namedtuple('Parameters',
|
||||
['table', 'confidence_level', 'alternative'])
|
||||
RResults = namedtuple('RResults',
|
||||
['pvalue', 'conditional_odds_ratio',
|
||||
'conditional_odds_ratio_ci'])
|
||||
data = [
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.04035202926536294,
|
||||
2.662846672960251))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.004668988338943325,
|
||||
0.895792956493601))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.4153910882532168,
|
||||
259.2593661129417))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.08056337526385809,
|
||||
1.22704788545557))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.1176691231650079,
|
||||
1.787463657995973))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.003857141267422399,
|
||||
2.407369893767229))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.451643573543705))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869288))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.024822256141754,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
39.00054996869287))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(349.2595113327733,
|
||||
3630.382605689872))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(152.4166024390096,
|
||||
1425.700792178893))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8520462587912048,
|
||||
1.340148950273938))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.02502345007115455,
|
||||
6.304424772117853))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.02301413756522116,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.001923034001462487,
|
||||
1.53670836950172))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157191,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2397970951413721,
|
||||
1291.342011095509))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.09580440012477633,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.05127576113762925,
|
||||
1.717176678806983))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.2697004098849359,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.07498546954483619,
|
||||
2.506969905199901))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.1973244147157192,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.0007743881879531337,
|
||||
4.170192301163831))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.06126482213438735,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.642491011905582))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904762,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.04761904761904761,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.496935393325443,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
198.019801980198))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=2.005657880389071e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(270.0334165523604,
|
||||
5461.333333326708))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=5.728437460831947e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(116.7944750275836,
|
||||
1931.995993191814))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='two.sided'),
|
||||
RResults(pvalue=0.574111858126088,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.7949398282935892,
|
||||
1.436229679394333))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.797867027270803))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
0.6785254803404526))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
127.8497388102893))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.032332939718425))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.502407513296985))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.820421051562392))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.06224603077045))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
19.00192394479939))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3045.460216525746))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1186.440170942579))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.293551891610822))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1300759363430016,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4.375946050832565))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0185217259520665,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.235282118191202))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.9782608695652173,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
657.2063583945989))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.05625775074399956,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.498867660683128))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1808979350599346,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.186159386716762))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.1652173913043479,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
3.335351451901569))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.0565217391304348,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
2.075407697450433))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.5,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969122))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.4999999999999999,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
99.00009507969123))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
4503.078257659934))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1811.766127544222))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='less'),
|
||||
RResults(pvalue=0.7416227010368963,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
1.396522811516685))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.05119649909830196,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.007163749169069961,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.5493234651081089,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.1003538933958604,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.146507416280863,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.007821681994077808,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(1.487678929918272,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(397.784359748113,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(174.7148056880929,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.95,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8828406663967776,
|
||||
Inf))),
|
||||
(Parameters(table=[[100, 2], [1000, 5]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.979790445314723,
|
||||
conditional_odds_ratio=0.25055839934223,
|
||||
conditional_odds_ratio_ci=(0.03045407081240429,
|
||||
Inf))),
|
||||
(Parameters(table=[[2, 7], [8, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9990149169715733,
|
||||
conditional_odds_ratio=0.0858623513573622,
|
||||
conditional_odds_ratio_ci=(0.002768053063547901,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [10, 10]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.1652173913043478,
|
||||
conditional_odds_ratio=4.725646047336587,
|
||||
conditional_odds_ratio_ci=(0.2998184792279909,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 15], [20, 20]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9849086665340765,
|
||||
conditional_odds_ratio=0.3394396617440851,
|
||||
conditional_odds_ratio_ci=(0.06180414342643172,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 16], [16, 25]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9330176609214881,
|
||||
conditional_odds_ratio=0.4937791394540491,
|
||||
conditional_odds_ratio_ci=(0.09037094010066403,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 1]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.9782608695652174,
|
||||
conditional_odds_ratio=0.2116112781158479,
|
||||
conditional_odds_ratio_ci=(0.001521592095430679,
|
||||
Inf))),
|
||||
(Parameters(table=[[10, 5], [10, 0]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 0], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.02380952380952382,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359722,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 5], [1, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[5, 1], [0, 4]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.0238095238095238,
|
||||
conditional_odds_ratio=Inf,
|
||||
conditional_odds_ratio_ci=(0.6661157890359725,
|
||||
Inf))),
|
||||
(Parameters(table=[[0, 1], [3, 2]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=1,
|
||||
conditional_odds_ratio=0,
|
||||
conditional_odds_ratio_ci=(0,
|
||||
Inf))),
|
||||
(Parameters(table=[[200, 7], [8, 300]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=2.005657880388915e-122,
|
||||
conditional_odds_ratio=977.7866978606228,
|
||||
conditional_odds_ratio_ci=(297.9619252357688,
|
||||
Inf))),
|
||||
(Parameters(table=[[28, 21], [6, 1957]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=5.728437460831983e-44,
|
||||
conditional_odds_ratio=425.2403028434684,
|
||||
conditional_odds_ratio_ci=(130.3213490295859,
|
||||
Inf))),
|
||||
(Parameters(table=[[190, 800], [200, 900]],
|
||||
confidence_level=0.99,
|
||||
alternative='greater'),
|
||||
RResults(pvalue=0.2959825901308897,
|
||||
conditional_odds_ratio=1.068697577856801,
|
||||
conditional_odds_ratio_ci=(0.8176272148267533,
|
||||
Inf))),
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,108 +0,0 @@
|
|||
NIST/ITL StRD
|
||||
Dataset Name: AtmWtAg (AtmWtAg.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 108)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Powell, L.J., Murphy, T.J. and Gramlich, J.W. (1982).
|
||||
"The Absolute Isotopic Abundance & Atomic Weight
|
||||
of a Reference Sample of Silver".
|
||||
NBS Journal of Research, 87, pp. 9-19.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
2 Treatments
|
||||
24 Replicates/Cell
|
||||
48 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 3 Parameters (mu, tau_1, tau_2)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
|
||||
Between Instrument 1 3.63834187500000E-09 3.63834187500000E-09 1.59467335677930E+01
|
||||
Within Instrument 46 1.04951729166667E-08 2.28155932971014E-10
|
||||
|
||||
Certified R-Squared 2.57426544538321E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.51048314446410E-05
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument AgWt
|
||||
1 107.8681568
|
||||
1 107.8681465
|
||||
1 107.8681572
|
||||
1 107.8681785
|
||||
1 107.8681446
|
||||
1 107.8681903
|
||||
1 107.8681526
|
||||
1 107.8681494
|
||||
1 107.8681616
|
||||
1 107.8681587
|
||||
1 107.8681519
|
||||
1 107.8681486
|
||||
1 107.8681419
|
||||
1 107.8681569
|
||||
1 107.8681508
|
||||
1 107.8681672
|
||||
1 107.8681385
|
||||
1 107.8681518
|
||||
1 107.8681662
|
||||
1 107.8681424
|
||||
1 107.8681360
|
||||
1 107.8681333
|
||||
1 107.8681610
|
||||
1 107.8681477
|
||||
2 107.8681079
|
||||
2 107.8681344
|
||||
2 107.8681513
|
||||
2 107.8681197
|
||||
2 107.8681604
|
||||
2 107.8681385
|
||||
2 107.8681642
|
||||
2 107.8681365
|
||||
2 107.8681151
|
||||
2 107.8681082
|
||||
2 107.8681517
|
||||
2 107.8681448
|
||||
2 107.8681198
|
||||
2 107.8681482
|
||||
2 107.8681334
|
||||
2 107.8681609
|
||||
2 107.8681101
|
||||
2 107.8681512
|
||||
2 107.8681469
|
||||
2 107.8681360
|
||||
2 107.8681254
|
||||
2 107.8681261
|
||||
2 107.8681450
|
||||
2 107.8681368
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
NIST/ITL StRD
|
||||
Dataset Name: SiRstv (SiRstv.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 85)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Ehrstein, James and Croarkin, M. Carroll.
|
||||
Unpublished NIST dataset.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
5 Treatments
|
||||
5 Replicates/Cell
|
||||
25 Observations
|
||||
3 Constant Leading Digits
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 6 Parameters (mu,tau_1, ... , tau_5)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Instrument 4 5.11462616000000E-02 1.27865654000000E-02 1.18046237440255E+00
|
||||
Within Instrument 20 2.16636560000000E-01 1.08318280000000E-02
|
||||
|
||||
Certified R-Squared 1.90999039051129E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.04076068334656E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument Resistance
|
||||
1 196.3052
|
||||
1 196.1240
|
||||
1 196.1890
|
||||
1 196.2569
|
||||
1 196.3403
|
||||
2 196.3042
|
||||
2 196.3825
|
||||
2 196.1669
|
||||
2 196.3257
|
||||
2 196.0422
|
||||
3 196.1303
|
||||
3 196.2005
|
||||
3 196.2889
|
||||
3 196.0343
|
||||
3 196.1811
|
||||
4 196.2795
|
||||
4 196.1748
|
||||
4 196.1494
|
||||
4 196.1485
|
||||
4 195.9885
|
||||
5 196.2119
|
||||
5 196.1051
|
||||
5 196.1850
|
||||
5 196.0052
|
||||
5 196.2090
|
||||
|
|
@ -1,249 +0,0 @@
|
|||
NIST/ITL StRD
|
||||
Dataset Name: SmLs01 (SmLs01.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
1 Constant Leading Digit
|
||||
Lower Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1.4
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
2 1.3
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
3 1.5
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
4 1.3
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
5 1.5
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
6 1.3
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
7 1.5
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
8 1.3
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
9 1.5
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,249 +0,0 @@
|
|||
NIST/ITL StRD
|
||||
Dataset Name: SmLs04 (SmLs04.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000.4
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
2 1000000.3
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
3 1000000.5
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
4 1000000.3
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
5 1000000.5
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
6 1000000.3
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
7 1000000.5
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
8 1000000.3
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
9 1000000.5
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,249 +0,0 @@
|
|||
NIST/ITL StRD
|
||||
Dataset Name: SmLs07 (SmLs07.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
13 Constant Leading Digits
|
||||
Higher Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000000000.4
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
2 1000000000000.3
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
3 1000000000000.5
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
4 1000000000000.3
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
5 1000000000000.5
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
6 1000000000000.3
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
7 1000000000000.5
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
8 1000000000000.3
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
9 1000000000000.5
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,97 +0,0 @@
|
|||
NIST/ITL StRD
|
||||
Dataset Name: Norris (Norris.dat)
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 31 to 46)
|
||||
Data (lines 61 to 96)
|
||||
|
||||
Procedure: Linear Least Squares Regression
|
||||
|
||||
Reference: Norris, J., NIST.
|
||||
Calibration of Ozone Monitors.
|
||||
|
||||
Data: 1 Response Variable (y)
|
||||
1 Predictor Variable (x)
|
||||
36 Observations
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
Model: Linear Class
|
||||
2 Parameters (B0,B1)
|
||||
|
||||
y = B0 + B1*x + e
|
||||
|
||||
|
||||
|
||||
Certified Regression Statistics
|
||||
|
||||
Standard Deviation
|
||||
Parameter Estimate of Estimate
|
||||
|
||||
B0 -0.262323073774029 0.232818234301152
|
||||
B1 1.00211681802045 0.429796848199937E-03
|
||||
|
||||
Residual
|
||||
Standard Deviation 0.884796396144373
|
||||
|
||||
R-Squared 0.999993745883712
|
||||
|
||||
|
||||
Certified Analysis of Variance Table
|
||||
|
||||
Source of Degrees of Sums of Mean
|
||||
Variation Freedom Squares Squares F Statistic
|
||||
|
||||
Regression 1 4255954.13232369 4255954.13232369 5436385.54079785
|
||||
Residual 34 26.6173985294224 0.782864662630069
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: y x
|
||||
0.1 0.2
|
||||
338.8 337.4
|
||||
118.1 118.2
|
||||
888.0 884.6
|
||||
9.2 10.1
|
||||
228.1 226.5
|
||||
668.5 666.3
|
||||
998.5 996.3
|
||||
449.1 448.6
|
||||
778.9 777.0
|
||||
559.2 558.2
|
||||
0.3 0.4
|
||||
0.1 0.6
|
||||
778.1 775.5
|
||||
668.8 666.9
|
||||
339.3 338.0
|
||||
448.9 447.5
|
||||
10.8 11.6
|
||||
557.7 556.0
|
||||
228.3 228.1
|
||||
998.0 995.8
|
||||
888.8 887.6
|
||||
119.6 120.2
|
||||
0.3 0.3
|
||||
0.6 0.3
|
||||
557.6 556.8
|
||||
339.3 339.1
|
||||
888.0 887.2
|
||||
998.5 999.0
|
||||
778.9 779.0
|
||||
10.2 11.1
|
||||
117.6 118.3
|
||||
228.9 229.2
|
||||
668.4 669.1
|
||||
449.2 448.9
|
||||
0.2 0.5
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,568 +0,0 @@
|
|||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy.stats import (binned_statistic, binned_statistic_2d,
|
||||
binned_statistic_dd)
|
||||
from scipy._lib._util import check_random_state
|
||||
|
||||
from .common_tests import check_named_results
|
||||
|
||||
|
||||
class TestBinnedStatistic:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
rng = check_random_state(9865)
|
||||
cls.x = rng.uniform(size=100)
|
||||
cls.y = rng.uniform(size=100)
|
||||
cls.v = rng.uniform(size=100)
|
||||
cls.X = rng.uniform(size=(100, 3))
|
||||
cls.w = rng.uniform(size=100)
|
||||
cls.u = rng.uniform(size=100) + 1e6
|
||||
|
||||
def test_1d_count(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
|
||||
count2, edges2 = np.histogram(x, bins=10)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_gh5927(self):
|
||||
# smoke test for gh5927 - binned_statistic was using `is` for string
|
||||
# comparison
|
||||
x = self.x
|
||||
v = self.v
|
||||
statistics = ['mean', 'median', 'count', 'sum']
|
||||
for statistic in statistics:
|
||||
binned_statistic(x, v, statistic, bins=10)
|
||||
|
||||
def test_big_number_std(self):
|
||||
# tests for numerical stability of std calculation
|
||||
# see issue gh-10126 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_empty_bins_std(self):
|
||||
# tests that std returns gives nan for empty bins
|
||||
x = self.x
|
||||
u = self.u
|
||||
print(binned_statistic(x, u, 'count', bins=1000))
|
||||
stat1, edges1, bc = binned_statistic(x, u, 'std', bins=1000)
|
||||
stat2, edges2, bc = binned_statistic(x, u, np.std, bins=1000)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
|
||||
def test_non_finite_inputs_and_int_bins(self):
|
||||
# if either `values` or `sample` contain np.inf or np.nan throw
|
||||
# see issue gh-9010 for more
|
||||
x = self.x
|
||||
u = self.u
|
||||
orig = u[0]
|
||||
u[0] = np.inf
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std', bins=10)
|
||||
# need to test for non-python specific ints, e.g. np.int8, np.int64
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'std',
|
||||
bins=np.int64(10))
|
||||
u[0] = np.nan
|
||||
assert_raises(ValueError, binned_statistic, u, x, 'count', bins=10)
|
||||
# replace original value, u belongs the class
|
||||
u[0] = orig
|
||||
|
||||
def test_1d_result_attributes(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic(x, v, 'count', bins=10)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_1d_sum(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
|
||||
sum2, edges2 = np.histogram(x, bins=10, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_mean(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_std(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_min(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'min', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.min, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_max(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_median(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_bincode(self):
|
||||
x = self.x[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
|
||||
bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
|
||||
1, 2, 1])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
assert_allclose(bcount, count1)
|
||||
|
||||
def test_1d_range_keyword(self):
|
||||
# Regression test for gh-3063, range can be (min, max) or [(min, max)]
|
||||
np.random.seed(9865)
|
||||
x = np.arange(30)
|
||||
data = np.random.random(30)
|
||||
|
||||
mean, bins, _ = binned_statistic(x[:15], data[:15])
|
||||
mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
|
||||
mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
|
||||
|
||||
assert_allclose(mean, mean_range)
|
||||
assert_allclose(bins, bins_range)
|
||||
assert_allclose(mean, mean_range2)
|
||||
assert_allclose(bins, bins_range2)
|
||||
|
||||
def test_1d_multi_values(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10)
|
||||
stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_count(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=5)
|
||||
count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_result_attributes(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_2d(x, y, v, 'count', bins=5)
|
||||
attributes = ('statistic', 'x_edge', 'y_edge', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_2d_sum(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
|
||||
sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean_unicode(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_std(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_min(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'min', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.min, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_max(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'max', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.max, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_median(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'median', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(
|
||||
x, y, v, np.median, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_bincode(self):
|
||||
x = self.x[:20]
|
||||
y = self.y[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=3)
|
||||
bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
|
||||
6, 11, 16, 6, 6, 11, 8])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_2d_multi_values(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, binx1v, biny1v, bc1v = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=8)
|
||||
stat1w, binx1w, biny1w, bc1w = binned_statistic_2d(
|
||||
x, y, w, 'mean', bins=8)
|
||||
stat2, binx2, biny2, bc2 = binned_statistic_2d(
|
||||
x, y, [v, w], 'mean', bins=8)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(binx1v, binx2)
|
||||
assert_allclose(biny1w, biny2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_binnumbers_unraveled(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(x, v, 'mean', bins=20)
|
||||
stat, edgesy, bcy = binned_statistic(y, v, 'mean', bins=10)
|
||||
|
||||
stat2, edgesx2, edgesy2, bc2 = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=(20, 10), expand_binnumbers=True)
|
||||
|
||||
bcx3 = np.searchsorted(edgesx, x, side='right')
|
||||
bcy3 = np.searchsorted(edgesy, y, side='right')
|
||||
|
||||
# `numpy.searchsorted` is non-inclusive on right-edge, compensate
|
||||
bcx3[x == x.max()] -= 1
|
||||
bcy3[y == y.max()] -= 1
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcx3, bc2[0])
|
||||
assert_allclose(bcy3, bc2[1])
|
||||
|
||||
def test_dd_count(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
count2, edges2 = np.histogramdd(X, bins=3)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_result_attributes(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_dd_sum(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
|
||||
sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
|
||||
sum3, edges3, bc = binned_statistic_dd(X, v, np.sum, bins=3)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
assert_allclose(sum1, sum3)
|
||||
assert_allclose(edges1, edges3)
|
||||
|
||||
def test_dd_mean(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_std(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_min(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'min', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.min, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_max(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_median(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_bincode(self):
|
||||
X = self.X[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
|
||||
32, 36, 91, 43, 87, 81, 81])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_dd_multi_values(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
for stat in ["count", "sum", "mean", "std", "min", "max", "median",
|
||||
np.std]:
|
||||
stat1v, edges1v, bc1v = binned_statistic_dd(X, v, stat, bins=8)
|
||||
stat1w, edges1w, bc1w = binned_statistic_dd(X, w, stat, bins=8)
|
||||
stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], stat, bins=8)
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(edges1w, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_dd_binnumbers_unraveled(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15)
|
||||
stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20)
|
||||
stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10)
|
||||
|
||||
stat2, edges2, bc2 = binned_statistic_dd(
|
||||
X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True)
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcz, bc2[2])
|
||||
|
||||
def test_dd_binned_statistic_result(self):
|
||||
# NOTE: tests the reuse of bin_edges from previous call
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random(10000)
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = (bins, bins, bins)
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
stat = result.statistic
|
||||
|
||||
result = binned_statistic_dd(x, v, 'mean',
|
||||
binned_statistic_result=result)
|
||||
stat2 = result.statistic
|
||||
|
||||
assert_allclose(stat, stat2)
|
||||
|
||||
def test_dd_zero_dedges(self):
|
||||
x = np.random.random((10000, 3))
|
||||
v = np.random.random(10000)
|
||||
bins = np.linspace(0, 1, 10)
|
||||
bins = np.append(bins, 1)
|
||||
bins = (bins, bins, bins)
|
||||
with assert_raises(ValueError, match='difference is numerically 0'):
|
||||
binned_statistic_dd(x, v, 'mean', bins=bins)
|
||||
|
||||
def test_dd_range_errors(self):
|
||||
# Test that descriptive exceptions are raised as appropriate for bad
|
||||
# values of the `range` argument. (See gh-12996)
|
||||
with assert_raises(ValueError,
|
||||
match='In range, start must be <= stop'):
|
||||
binned_statistic_dd([self.y], self.v,
|
||||
range=[[1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 1 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[1, 0], [0, 1]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='In dimension 2 of range, start must be <= stop'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1], [1, 0]])
|
||||
with assert_raises(
|
||||
ValueError,
|
||||
match='range given for 1 dimensions; 2 required'):
|
||||
binned_statistic_dd([self.x, self.y], self.v,
|
||||
range=[[0, 1]])
|
||||
|
||||
def test_binned_statistic_float32(self):
|
||||
X = np.array([0, 0.42358226], dtype=np.float32)
|
||||
stat, _, _ = binned_statistic(X, None, 'count', bins=5)
|
||||
assert_allclose(stat, np.array([1, 0, 0, 0, 1], dtype=np.float64))
|
||||
|
||||
def test_gh14332(self):
|
||||
# Test the wrong output when the `sample` is close to bin edge
|
||||
x = []
|
||||
size = 20
|
||||
for i in range(size):
|
||||
x += [1-0.1**i]
|
||||
|
||||
bins = np.linspace(0,1,11)
|
||||
sum1, edges1, bc = binned_statistic_dd(x, np.ones(len(x)),
|
||||
bins=[bins], statistic='sum')
|
||||
sum2, edges2 = np.histogram(x, bins=bins)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1[0], edges2)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float64, np.complex128])
|
||||
@pytest.mark.parametrize("statistic", [np.mean, np.median, np.sum, np.std,
|
||||
np.min, np.max, 'count',
|
||||
lambda x: (x**2).sum(),
|
||||
lambda x: (x**2).sum() * 1j])
|
||||
def test_dd_all(self, dtype, statistic):
|
||||
def ref_statistic(x):
|
||||
return len(x) if statistic == 'count' else statistic(x)
|
||||
|
||||
rng = np.random.default_rng(3704743126639371)
|
||||
n = 10
|
||||
x = rng.random(size=n)
|
||||
i = x >= 0.5
|
||||
v = rng.random(size=n)
|
||||
if dtype is np.complex128:
|
||||
v = v + rng.random(size=n)*1j
|
||||
|
||||
stat, _, _ = binned_statistic_dd(x, v, statistic, bins=2)
|
||||
ref = np.array([ref_statistic(v[~i]), ref_statistic(v[i])])
|
||||
assert_allclose(stat, ref)
|
||||
assert stat.dtype == np.result_type(ref.dtype, np.float64)
|
||||
|
|
@ -1,152 +0,0 @@
|
|||
# Tests for the CensoredData class.
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_array_equal
|
||||
from scipy.stats import CensoredData
|
||||
|
||||
|
||||
class TestCensoredData:
|
||||
|
||||
def test_basic(self):
|
||||
uncensored = [1]
|
||||
left = [0]
|
||||
right = [2, 5]
|
||||
interval = [[2, 3]]
|
||||
data = CensoredData(uncensored, left=left, right=right,
|
||||
interval=interval)
|
||||
assert_equal(data._uncensored, uncensored)
|
||||
assert_equal(data._left, left)
|
||||
assert_equal(data._right, right)
|
||||
assert_equal(data._interval, interval)
|
||||
|
||||
udata = data._uncensor()
|
||||
assert_equal(udata, np.concatenate((uncensored, left, right,
|
||||
np.mean(interval, axis=1))))
|
||||
|
||||
def test_right_censored(self):
|
||||
x = np.array([0, 3, 2.5])
|
||||
is_censored = np.array([0, 1, 0], dtype=bool)
|
||||
data = CensoredData.right_censored(x, is_censored)
|
||||
assert_equal(data._uncensored, x[~is_censored])
|
||||
assert_equal(data._right, x[is_censored])
|
||||
assert_equal(data._left, [])
|
||||
assert_equal(data._interval, np.empty((0, 2)))
|
||||
|
||||
def test_left_censored(self):
|
||||
x = np.array([0, 3, 2.5])
|
||||
is_censored = np.array([0, 1, 0], dtype=bool)
|
||||
data = CensoredData.left_censored(x, is_censored)
|
||||
assert_equal(data._uncensored, x[~is_censored])
|
||||
assert_equal(data._left, x[is_censored])
|
||||
assert_equal(data._right, [])
|
||||
assert_equal(data._interval, np.empty((0, 2)))
|
||||
|
||||
def test_interval_censored_basic(self):
|
||||
a = [0.5, 2.0, 3.0, 5.5]
|
||||
b = [1.0, 2.5, 3.5, 7.0]
|
||||
data = CensoredData.interval_censored(low=a, high=b)
|
||||
assert_array_equal(data._interval, np.array(list(zip(a, b))))
|
||||
assert data._uncensored.shape == (0,)
|
||||
assert data._left.shape == (0,)
|
||||
assert data._right.shape == (0,)
|
||||
|
||||
def test_interval_censored_mixed(self):
|
||||
# This is actually a mix of uncensored, left-censored, right-censored
|
||||
# and interval-censored data. Check that when the `interval_censored`
|
||||
# class method is used, the data is correctly separated into the
|
||||
# appropriate arrays.
|
||||
a = [0.5, -np.inf, -13.0, 2.0, 1.0, 10.0, -1.0]
|
||||
b = [0.5, 2500.0, np.inf, 3.0, 1.0, 11.0, np.inf]
|
||||
data = CensoredData.interval_censored(low=a, high=b)
|
||||
assert_array_equal(data._interval, [[2.0, 3.0], [10.0, 11.0]])
|
||||
assert_array_equal(data._uncensored, [0.5, 1.0])
|
||||
assert_array_equal(data._left, [2500.0])
|
||||
assert_array_equal(data._right, [-13.0, -1.0])
|
||||
|
||||
def test_interval_to_other_types(self):
|
||||
# The interval parameter can represent uncensored and
|
||||
# left- or right-censored data. Test the conversion of such
|
||||
# an example to the canonical form in which the different
|
||||
# types have been split into the separate arrays.
|
||||
interval = np.array([[0, 1], # interval-censored
|
||||
[2, 2], # not censored
|
||||
[3, 3], # not censored
|
||||
[9, np.inf], # right-censored
|
||||
[8, np.inf], # right-censored
|
||||
[-np.inf, 0], # left-censored
|
||||
[1, 2]]) # interval-censored
|
||||
data = CensoredData(interval=interval)
|
||||
assert_equal(data._uncensored, [2, 3])
|
||||
assert_equal(data._left, [0])
|
||||
assert_equal(data._right, [9, 8])
|
||||
assert_equal(data._interval, [[0, 1], [1, 2]])
|
||||
|
||||
def test_empty_arrays(self):
|
||||
data = CensoredData(uncensored=[], left=[], right=[], interval=[])
|
||||
assert data._uncensored.shape == (0,)
|
||||
assert data._left.shape == (0,)
|
||||
assert data._right.shape == (0,)
|
||||
assert data._interval.shape == (0, 2)
|
||||
assert len(data) == 0
|
||||
|
||||
def test_invalid_constructor_args(self):
|
||||
with pytest.raises(ValueError, match='must be a one-dimensional'):
|
||||
CensoredData(uncensored=[[1, 2, 3]])
|
||||
with pytest.raises(ValueError, match='must be a one-dimensional'):
|
||||
CensoredData(left=[[1, 2, 3]])
|
||||
with pytest.raises(ValueError, match='must be a one-dimensional'):
|
||||
CensoredData(right=[[1, 2, 3]])
|
||||
with pytest.raises(ValueError, match='must be a two-dimensional'):
|
||||
CensoredData(interval=[[1, 2, 3]])
|
||||
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(uncensored=[1, np.nan, 2])
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(left=[1, np.nan, 2])
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(right=[1, np.nan, 2])
|
||||
with pytest.raises(ValueError, match='must not contain nan'):
|
||||
CensoredData(interval=[[1, np.nan], [2, 3]])
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match='both values must not be infinite'):
|
||||
CensoredData(interval=[[1, 3], [2, 9], [np.inf, np.inf]])
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match='left value must not exceed the right'):
|
||||
CensoredData(interval=[[1, 0], [2, 2]])
|
||||
|
||||
@pytest.mark.parametrize('func', [CensoredData.left_censored,
|
||||
CensoredData.right_censored])
|
||||
def test_invalid_left_right_censored_args(self, func):
|
||||
with pytest.raises(ValueError,
|
||||
match='`x` must be one-dimensional'):
|
||||
func([[1, 2, 3]], [0, 1, 1])
|
||||
with pytest.raises(ValueError,
|
||||
match='`censored` must be one-dimensional'):
|
||||
func([1, 2, 3], [[0, 1, 1]])
|
||||
with pytest.raises(ValueError, match='`x` must not contain'):
|
||||
func([1, 2, np.nan], [0, 1, 1])
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
func([1, 2, 3], [0, 0, 1, 1])
|
||||
|
||||
def test_invalid_censored_args(self):
|
||||
with pytest.raises(ValueError,
|
||||
match='`low` must be a one-dimensional'):
|
||||
CensoredData.interval_censored(low=[[3]], high=[4, 5])
|
||||
with pytest.raises(ValueError,
|
||||
match='`high` must be a one-dimensional'):
|
||||
CensoredData.interval_censored(low=[3], high=[[4, 5]])
|
||||
with pytest.raises(ValueError, match='`low` must not contain'):
|
||||
CensoredData.interval_censored([1, 2, np.nan], [0, 1, 1])
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
CensoredData.interval_censored([1, 2, 3], [0, 0, 1, 1])
|
||||
|
||||
def test_count_censored(self):
|
||||
x = [1, 2, 3]
|
||||
# data1 has no censored data.
|
||||
data1 = CensoredData(x)
|
||||
assert data1.num_censored() == 0
|
||||
data2 = CensoredData(uncensored=[2.5], left=[10], interval=[[0, 1]])
|
||||
assert data2.num_censored() == 2
|
||||
|
|
@ -1,294 +0,0 @@
|
|||
import numpy as np
|
||||
from numpy.testing import (assert_equal, assert_array_equal,
|
||||
assert_array_almost_equal, assert_approx_equal,
|
||||
assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy import stats
|
||||
from scipy.special import xlogy
|
||||
from scipy.stats.contingency import (margins, expected_freq,
|
||||
chi2_contingency, association)
|
||||
|
||||
|
||||
def test_margins():
|
||||
a = np.array([1])
|
||||
m = margins(a)
|
||||
assert_equal(len(m), 1)
|
||||
m0 = m[0]
|
||||
assert_array_equal(m0, np.array([1]))
|
||||
|
||||
a = np.array([[1]])
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[1]])
|
||||
expected1 = np.array([[1]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(12).reshape(2, 6)
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[15], [51]])
|
||||
expected1 = np.array([[6, 8, 10, 12, 14, 16]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(24).reshape(2, 3, 4)
|
||||
m0, m1, m2 = margins(a)
|
||||
expected0 = np.array([[[66]], [[210]]])
|
||||
expected1 = np.array([[[60], [92], [124]]])
|
||||
expected2 = np.array([[[60, 66, 72, 78]]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
assert_array_equal(m2, expected2)
|
||||
|
||||
|
||||
def test_expected_freq():
|
||||
assert_array_equal(expected_freq([1]), np.array([1.0]))
|
||||
|
||||
observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
|
||||
e = expected_freq(observed)
|
||||
assert_array_equal(e, np.ones_like(observed))
|
||||
|
||||
observed = np.array([[10, 10, 20], [20, 20, 20]])
|
||||
e = expected_freq(observed)
|
||||
correct = np.array([[12., 12., 16.], [18., 18., 24.]])
|
||||
assert_array_almost_equal(e, correct)
|
||||
|
||||
|
||||
class TestChi2Contingency:
|
||||
def test_chi2_contingency_trivial(self):
|
||||
# Some very simple tests for chi2_contingency.
|
||||
|
||||
# A trivial case
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 1)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
# A *really* trivial case: 1-D data.
|
||||
obs = np.array([1, 2, 3])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 0)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
def test_chi2_contingency_R(self):
|
||||
# Some test cases that were computed independently, using R.
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# 12, 34, 23, 4, 47, 11,
|
||||
# 35, 31, 11, 34, 10, 18,
|
||||
# 12, 32, 9, 18, 13, 19,
|
||||
# 12, 12, 14, 9, 33, 25
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, t=tiers
|
||||
# r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
|
||||
# c <- factor(gl(3, 1, 2*3*4, labels=c("c1", "c2", "c3")))
|
||||
# t <- factor(gl(2, 3, 2*3*4, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 3-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + t)
|
||||
# Number of cases in table: 478
|
||||
# Number of factors: 3
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 102.17, df = 17, p-value = 3.514e-14
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[12, 34, 23],
|
||||
[35, 31, 11],
|
||||
[12, 32, 9],
|
||||
[12, 12, 14]],
|
||||
[[4, 47, 11],
|
||||
[34, 10, 18],
|
||||
[18, 13, 19],
|
||||
[9, 33, 25]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 102.17, significant=5)
|
||||
assert_approx_equal(p, 3.514e-14, significant=4)
|
||||
assert_equal(dof, 17)
|
||||
|
||||
# Rcode = \
|
||||
# """
|
||||
# # Data vector.
|
||||
# data <- c(
|
||||
# #
|
||||
# 12, 17,
|
||||
# 11, 16,
|
||||
# #
|
||||
# 11, 12,
|
||||
# 15, 16,
|
||||
# #
|
||||
# 23, 15,
|
||||
# 30, 22,
|
||||
# #
|
||||
# 14, 17,
|
||||
# 15, 16
|
||||
# )
|
||||
#
|
||||
# # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
|
||||
# r <- factor(gl(2, 2, 2*2*2*2, labels=c("r1", "r2")))
|
||||
# c <- factor(gl(2, 1, 2*2*2*2, labels=c("c1", "c2")))
|
||||
# d <- factor(gl(2, 4, 2*2*2*2, labels=c("d1", "d2")))
|
||||
# t <- factor(gl(2, 8, 2*2*2*2, labels=c("t1", "t2")))
|
||||
#
|
||||
# # 4-way Chi squared test of independence
|
||||
# s = summary(xtabs(data~r+c+d+t))
|
||||
# print(s)
|
||||
# """
|
||||
# Routput = \
|
||||
# """
|
||||
# Call: xtabs(formula = data ~ r + c + d + t)
|
||||
# Number of cases in table: 262
|
||||
# Number of factors: 4
|
||||
# Test for independence of all factors:
|
||||
# Chisq = 8.758, df = 11, p-value = 0.6442
|
||||
# """
|
||||
obs = np.array(
|
||||
[[[[12, 17],
|
||||
[11, 16]],
|
||||
[[11, 12],
|
||||
[15, 16]]],
|
||||
[[[23, 15],
|
||||
[30, 22]],
|
||||
[[14, 17],
|
||||
[15, 16]]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 8.758, significant=4)
|
||||
assert_approx_equal(p, 0.6442, significant=4)
|
||||
assert_equal(dof, 11)
|
||||
|
||||
def test_chi2_contingency_g(self):
|
||||
c = np.array([[15, 60], [15, 90]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=False)
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
|
||||
correction=True)
|
||||
c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
|
||||
assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
|
||||
|
||||
c = np.array([[10, 12, 10], [12, 10, 10]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
def test_chi2_contingency_bad_args(self):
|
||||
# Test that "bad" inputs raise a ValueError.
|
||||
|
||||
# Negative value in the array of observed frequencies.
|
||||
obs = np.array([[-1, 10], [1, 2]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# The zeros in this will result in zeros in the array
|
||||
# of expected frequencies.
|
||||
obs = np.array([[0, 1], [0, 1]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# A degenerate case: `observed` has size 0.
|
||||
obs = np.empty((0, 8))
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
def test_chi2_contingency_yates_gh13875(self):
|
||||
# Magnitude of Yates' continuity correction should not exceed difference
|
||||
# between expected and observed value of the statistic; see gh-13875
|
||||
observed = np.array([[1573, 3], [4, 0]])
|
||||
p = chi2_contingency(observed)[1]
|
||||
assert_allclose(p, 1, rtol=1e-12)
|
||||
|
||||
@pytest.mark.parametrize("correction", [False, True])
|
||||
def test_result(self, correction):
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
res = chi2_contingency(obs, correction=correction)
|
||||
assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_exact_permutation(self):
|
||||
table = np.arange(4).reshape(2, 2)
|
||||
ref_statistic = chi2_contingency(table, correction=False).statistic
|
||||
ref_pvalue = stats.fisher_exact(table).pvalue
|
||||
method = stats.PermutationMethod(n_resamples=50000)
|
||||
res = chi2_contingency(table, correction=False, method=method)
|
||||
assert_equal(res.statistic, ref_statistic)
|
||||
assert_allclose(res.pvalue, ref_pvalue, rtol=1e-15)
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('method', (stats.PermutationMethod,
|
||||
stats.MonteCarloMethod))
|
||||
def test_resampling_randomized(self, method):
|
||||
rng = np.random.default_rng(2592340925)
|
||||
# need to have big sum for asymptotic approximation to be good
|
||||
rows = [300, 1000, 800]
|
||||
cols = [200, 400, 800, 700]
|
||||
table = stats.random_table(rows, cols, seed=rng).rvs()
|
||||
res = chi2_contingency(table, correction=False, method=method(rng=rng))
|
||||
ref = chi2_contingency(table, correction=False)
|
||||
assert_equal(res.statistic, ref.statistic)
|
||||
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3)
|
||||
assert_equal(res.dof, np.nan)
|
||||
assert_equal(res.expected_freq, ref.expected_freq)
|
||||
|
||||
def test_resampling_invalid_args(self):
|
||||
table = np.arange(8).reshape(2, 2, 2)
|
||||
|
||||
method = stats.PermutationMethod()
|
||||
message = "Use of `method` is only compatible with two-way tables."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
chi2_contingency(table, correction=False, method=method)
|
||||
|
||||
table = np.arange(4).reshape(2, 2)
|
||||
|
||||
method = stats.PermutationMethod()
|
||||
message = "`correction=True` is not compatible with..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
chi2_contingency(table, method=method)
|
||||
|
||||
method = stats.MonteCarloMethod()
|
||||
message = "`lambda_=2` is not compatible with..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
chi2_contingency(table, correction=False, lambda_=2, method=method)
|
||||
|
||||
method = 'herring'
|
||||
message = "`method='herring'` not recognized; if provided, `method`..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
chi2_contingency(table, correction=False, method=method)
|
||||
|
||||
method = stats.MonteCarloMethod(rvs=stats.norm.rvs)
|
||||
message = "If the `method` argument of `chi2_contingency` is..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
chi2_contingency(table, correction=False, method=method)
|
||||
|
||||
|
||||
def test_bad_association_args():
|
||||
# Invalid Test Statistic
|
||||
assert_raises(ValueError, association, [[1, 2], [3, 4]], "X")
|
||||
# Invalid array shape
|
||||
assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer")
|
||||
# chi2_contingency exception
|
||||
assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer')
|
||||
# Invalid Array Item Data Type
|
||||
assert_raises(ValueError, association,
|
||||
np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('stat, expected',
|
||||
[('cramer', 0.09222412010290792),
|
||||
('tschuprow', 0.0775509319944633),
|
||||
('pearson', 0.12932925727138758)])
|
||||
def test_assoc(stat, expected):
|
||||
# 2d Array
|
||||
obs1 = np.array([[12, 13, 14, 15, 16],
|
||||
[17, 16, 18, 19, 11],
|
||||
[9, 15, 14, 12, 11]])
|
||||
a = association(observed=obs1, method=stat)
|
||||
assert_allclose(a, expected)
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
import math
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from scipy._lib._array_api import array_namespace
|
||||
from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_less, xp_assert_equal
|
||||
from scipy.stats._continued_fraction import _continued_fraction
|
||||
|
||||
|
||||
@pytest.mark.skip_xp_backends('array_api_strict', reason='No fancy indexing assignment')
|
||||
@pytest.mark.skip_xp_backends('jax.numpy', reason="Don't support mutation")
|
||||
# dask doesn't like lines like this
|
||||
# n = int(xp.real(xp_ravel(n))[0])
|
||||
# (at some point in here the shape becomes nan)
|
||||
@pytest.mark.skip_xp_backends('dask.array', reason="dask has issues with the shapes")
|
||||
class TestContinuedFraction:
|
||||
rng = np.random.default_rng(5895448232066142650)
|
||||
p = rng.uniform(1, 10, size=10)
|
||||
|
||||
def a1(self, n, x=1.5):
|
||||
if n == 0:
|
||||
y = 0*x
|
||||
elif n == 1:
|
||||
y = x
|
||||
else:
|
||||
y = -x**2
|
||||
if np.isscalar(y) and np.__version__ < "2.0":
|
||||
y = np.full_like(x, y) # preserve dtype pre NEP 50
|
||||
return y
|
||||
|
||||
def b1(self, n, x=1.5):
|
||||
if n == 0:
|
||||
y = 0*x
|
||||
else:
|
||||
one = x/x # gets array of correct type, dtype, and shape
|
||||
y = one * (2*n - 1)
|
||||
if np.isscalar(y) and np.__version__ < "2.0":
|
||||
y = np.full_like(x, y) # preserve dtype pre NEP 50
|
||||
return y
|
||||
|
||||
def log_a1(self, n, x):
|
||||
xp = array_namespace(x)
|
||||
if n == 0:
|
||||
y = xp.full_like(x, -xp.asarray(math.inf, dtype=x.dtype))
|
||||
elif n == 1:
|
||||
y = xp.log(x)
|
||||
else:
|
||||
y = 2 * xp.log(x) + math.pi * 1j
|
||||
return y
|
||||
|
||||
def log_b1(self, n, x):
|
||||
xp = array_namespace(x)
|
||||
if n == 0:
|
||||
y = xp.full_like(x, -xp.asarray(math.inf, dtype=x.dtype))
|
||||
else:
|
||||
one = x - x # gets array of correct type, dtype, and shape
|
||||
y = one + math.log(2 * n - 1)
|
||||
return y
|
||||
|
||||
def test_input_validation(self, xp):
|
||||
a1 = self.a1
|
||||
b1 = self.b1
|
||||
|
||||
message = '`a` and `b` must be callable.'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(1, b1)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, 1)
|
||||
|
||||
message = r'`eps` and `tiny` must be \(or represent the logarithm of\)...'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, tolerances={'eps': -10})
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, tolerances={'eps': np.nan})
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, tolerances={'eps': 1+1j}, log=True)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, tolerances={'tiny': 0})
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, tolerances={'tiny': np.inf})
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, tolerances={'tiny': np.inf}, log=True)
|
||||
# this should not raise
|
||||
kwargs = dict(args=xp.asarray(1.5+0j), log=True, maxiter=0)
|
||||
_continued_fraction(a1, b1, tolerances={'eps': -10}, **kwargs)
|
||||
_continued_fraction(a1, b1, tolerances={'tiny': -10}, **kwargs)
|
||||
|
||||
message = '`maxiter` must be a non-negative integer.'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, maxiter=-1)
|
||||
|
||||
message = '`log` must be boolean.'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
_continued_fraction(a1, b1, log=2)
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['float32', 'float64', 'complex64', 'complex128'])
|
||||
@pytest.mark.parametrize('shape', [(), (1,), (3,), (3, 2)])
|
||||
def test_basic(self, shape, dtype, xp):
|
||||
np_dtype = getattr(np, dtype)
|
||||
xp_dtype = getattr(xp, dtype)
|
||||
rng = np.random.default_rng(2435908729190400)
|
||||
|
||||
x = rng.random(shape).astype(np_dtype)
|
||||
x = x + rng.random(shape).astype(np_dtype)*1j if dtype.startswith('c') else x
|
||||
x = xp.asarray(x, dtype=xp_dtype)
|
||||
|
||||
res = _continued_fraction(self.a1, self.b1, args=(x,))
|
||||
ref = xp.tan(x)
|
||||
xp_assert_close(res.f, ref)
|
||||
|
||||
@pytest.mark.skip_xp_backends('torch', reason='pytorch/pytorch#136063')
|
||||
@pytest.mark.parametrize('dtype', ['float32', 'float64'])
|
||||
@pytest.mark.parametrize('shape', [(), (1,), (3,), (3, 2)])
|
||||
def test_log(self, shape, dtype, xp):
|
||||
if (np.__version__ < "2") and (dtype == 'float32'):
|
||||
pytest.skip("Scalar dtypes only respected after NEP 50.")
|
||||
np_dtype = getattr(np, dtype)
|
||||
rng = np.random.default_rng(2435908729190400)
|
||||
x = rng.random(shape).astype(np_dtype)
|
||||
x = xp.asarray(x)
|
||||
|
||||
res = _continued_fraction(self.log_a1, self.log_b1, args=(x + 0j,), log=True)
|
||||
ref = xp.tan(x)
|
||||
xp_assert_close(xp.exp(xp.real(res.f)), ref)
|
||||
|
||||
def test_maxiter(self, xp):
|
||||
rng = np.random.default_rng(2435908729190400)
|
||||
x = xp.asarray(rng.random(), dtype=xp.float64)
|
||||
ref = xp.tan(x)
|
||||
|
||||
res1 = _continued_fraction(self.a1, self.b1, args=(x,), maxiter=3)
|
||||
assert res1.nit == 3
|
||||
|
||||
res2 = _continued_fraction(self.a1, self.b1, args=(x,), maxiter=6)
|
||||
assert res2.nit == 6
|
||||
|
||||
xp_assert_less(xp.abs(res2.f - ref), xp.abs(res1.f - ref))
|
||||
|
||||
def test_eps(self, xp):
|
||||
x = xp.asarray(1.5, dtype=xp.float64) # x = 1.5 is the default defined above
|
||||
ref = xp.tan(x)
|
||||
res1 = _continued_fraction(self.a1, self.b1, args=(x,),
|
||||
tolerances={'eps': 1e-6})
|
||||
res2 = _continued_fraction(self.a1, self.b1, args=(x,))
|
||||
xp_assert_less(res1.nit, res2.nit)
|
||||
xp_assert_less(xp.abs(res2.f - ref), xp.abs(res1.f - ref))
|
||||
|
||||
def test_feval(self, xp):
|
||||
def a(n, x):
|
||||
a.nfev += 1
|
||||
return n * x
|
||||
|
||||
def b(n, x):
|
||||
b.nfev += 1
|
||||
return n * x
|
||||
|
||||
a.nfev, b.nfev = 0, 0
|
||||
|
||||
res = _continued_fraction(a, b, args=(xp.asarray(1.),))
|
||||
assert res.nfev == a.nfev == b.nfev == res.nit + 1
|
||||
|
||||
def test_status(self, xp):
|
||||
x = xp.asarray([1, 10, np.nan], dtype=xp.float64)
|
||||
res = _continued_fraction(self.a1, self.b1, args=(x,), maxiter=15)
|
||||
xp_assert_equal(res.success, xp.asarray([True, False, False]))
|
||||
xp_assert_equal(res.status, xp.asarray([0, -2, -3], dtype=xp.int32))
|
||||
|
||||
def test_special_cases(self, xp):
|
||||
one = xp.asarray(1)
|
||||
res = _continued_fraction(lambda x: one, lambda x: one, maxiter=0)
|
||||
xp_assert_close(res.f, xp.asarray(1.))
|
||||
assert res.nit == res.nfev - 1 == 0
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,683 +0,0 @@
|
|||
# Tests for fitting specific distributions to censored data.
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from scipy.optimize import fmin
|
||||
from scipy.stats import (CensoredData, beta, cauchy, chi2, expon, gamma,
|
||||
gumbel_l, gumbel_r, invgauss, invweibull, laplace,
|
||||
logistic, lognorm, nct, ncx2, norm, weibull_max,
|
||||
weibull_min)
|
||||
|
||||
|
||||
# In some tests, we'll use this optimizer for improved accuracy.
|
||||
def optimizer(func, x0, args=(), disp=0):
|
||||
return fmin(func, x0, args=args, disp=disp, xtol=1e-12, ftol=1e-12)
|
||||
|
||||
|
||||
def test_beta():
|
||||
"""
|
||||
Test fitting beta shape parameters to interval-censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
|
||||
+ right=c(0.20, 0.55, 0.90, 0.95))
|
||||
> result = fitdistcens(data, 'beta', control=list(reltol=1e-14))
|
||||
|
||||
> result
|
||||
Fitting of the distribution ' beta ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
shape1 1.419941
|
||||
shape2 1.027066
|
||||
> result$sd
|
||||
shape1 shape2
|
||||
0.9914177 0.6866565
|
||||
"""
|
||||
data = CensoredData(interval=[[0.10, 0.20],
|
||||
[0.50, 0.55],
|
||||
[0.75, 0.90],
|
||||
[0.80, 0.95]])
|
||||
|
||||
# For this test, fit only the shape parameters; loc and scale are fixed.
|
||||
a, b, loc, scale = beta.fit(data, floc=0, fscale=1, optimizer=optimizer)
|
||||
|
||||
assert_allclose(a, 1.419941, rtol=5e-6)
|
||||
assert_allclose(b, 1.027066, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_cauchy_right_censored():
|
||||
"""
|
||||
Test fitting the Cauchy distribution to right-censored data.
|
||||
|
||||
Calculation in R, with two values not censored [1, 10] and
|
||||
one right-censored value [30].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(1, 10, 30), right=c(1, 10, NA))
|
||||
> result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' cauchy ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 7.100001
|
||||
scale 7.455866
|
||||
"""
|
||||
data = CensoredData(uncensored=[1, 10], right=[30])
|
||||
loc, scale = cauchy.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 7.10001, rtol=5e-6)
|
||||
assert_allclose(scale, 7.455866, rtol=5e-6)
|
||||
|
||||
|
||||
def test_cauchy_mixed():
|
||||
"""
|
||||
Test fitting the Cauchy distribution to data with mixed censoring.
|
||||
|
||||
Calculation in R, with:
|
||||
* two values not censored [1, 10],
|
||||
* one left-censored [1],
|
||||
* one right-censored [30], and
|
||||
* one interval-censored [[4, 8]].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
|
||||
> result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' cauchy ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 4.605150
|
||||
scale 5.900852
|
||||
"""
|
||||
data = CensoredData(uncensored=[1, 10], left=[1], right=[30],
|
||||
interval=[[4, 8]])
|
||||
loc, scale = cauchy.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 4.605150, rtol=5e-6)
|
||||
assert_allclose(scale, 5.900852, rtol=5e-6)
|
||||
|
||||
|
||||
def test_chi2_mixed():
|
||||
"""
|
||||
Test fitting just the shape parameter (df) of chi2 to mixed data.
|
||||
|
||||
Calculation in R, with:
|
||||
* two values not censored [1, 10],
|
||||
* one left-censored [1],
|
||||
* one right-censored [30], and
|
||||
* one interval-censored [[4, 8]].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
|
||||
> result = fitdistcens(data, 'chisq', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' chisq ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
df 5.060329
|
||||
"""
|
||||
data = CensoredData(uncensored=[1, 10], left=[1], right=[30],
|
||||
interval=[[4, 8]])
|
||||
df, loc, scale = chi2.fit(data, floc=0, fscale=1, optimizer=optimizer)
|
||||
assert_allclose(df, 5.060329, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_expon_right_censored():
|
||||
"""
|
||||
For the exponential distribution with loc=0, the exact solution for
|
||||
fitting n uncensored points x[0]...x[n-1] and m right-censored points
|
||||
x[n]..x[n+m-1] is
|
||||
|
||||
scale = sum(x)/n
|
||||
|
||||
That is, divide the sum of all the values (not censored and
|
||||
right-censored) by the number of uncensored values. (See, for example,
|
||||
https://en.wikipedia.org/wiki/Censoring_(statistics)#Likelihood.)
|
||||
|
||||
The second derivative of the log-likelihood function is
|
||||
|
||||
n/scale**2 - 2*sum(x)/scale**3
|
||||
|
||||
from which the estimate of the standard error can be computed.
|
||||
|
||||
-----
|
||||
|
||||
Calculation in R, for reference only. The R results are not
|
||||
used in the test.
|
||||
|
||||
> library(fitdistrplus)
|
||||
> dexps <- function(x, scale) {
|
||||
+ return(dexp(x, 1/scale))
|
||||
+ }
|
||||
> pexps <- function(q, scale) {
|
||||
+ return(pexp(q, 1/scale))
|
||||
+ }
|
||||
> left <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
|
||||
+ 16, 16, 20, 20, 21, 22)
|
||||
> right <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
|
||||
+ NA, NA, NA, NA, NA, NA)
|
||||
> result = fitdistcens(data, 'exps', start=list(scale=mean(data$left)),
|
||||
+ control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' exps ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
scale 19.85
|
||||
> result$sd
|
||||
scale
|
||||
6.277119
|
||||
"""
|
||||
# This data has 10 uncensored values and 6 right-censored values.
|
||||
obs = [1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15, 16, 16, 20, 20, 21, 22]
|
||||
cens = [False]*10 + [True]*6
|
||||
data = CensoredData.right_censored(obs, cens)
|
||||
|
||||
loc, scale = expon.fit(data, floc=0, optimizer=optimizer)
|
||||
|
||||
assert loc == 0
|
||||
# Use the analytical solution to compute the expected value. This
|
||||
# is the sum of the observed values divided by the number of uncensored
|
||||
# values.
|
||||
n = len(data) - data.num_censored()
|
||||
total = data._uncensored.sum() + data._right.sum()
|
||||
expected = total / n
|
||||
assert_allclose(scale, expected, 1e-8)
|
||||
|
||||
|
||||
def test_gamma_right_censored():
|
||||
"""
|
||||
Fit gamma shape and scale to data with one right-censored value.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, 25.0),
|
||||
+ right=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, NA))
|
||||
> result = fitdistcens(data, 'gamma', start=list(shape=1, scale=10),
|
||||
+ control=list(reltol=1e-13))
|
||||
> result
|
||||
Fitting of the distribution ' gamma ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
shape 1.447623
|
||||
scale 8.360197
|
||||
> result$sd
|
||||
shape scale
|
||||
0.7053086 5.1016531
|
||||
"""
|
||||
# The last value is right-censored.
|
||||
x = CensoredData.right_censored([2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0,
|
||||
25.0],
|
||||
[0]*7 + [1])
|
||||
|
||||
a, loc, scale = gamma.fit(x, floc=0, optimizer=optimizer)
|
||||
|
||||
assert_allclose(a, 1.447623, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 8.360197, rtol=5e-6)
|
||||
|
||||
|
||||
def test_gumbel():
|
||||
"""
|
||||
Fit gumbel_l and gumbel_r to censored data.
|
||||
|
||||
This R calculation should match gumbel_r.
|
||||
|
||||
> library(evd)
|
||||
> library(fitdistrplus)
|
||||
> data = data.frame(left=c(0, 2, 3, 9, 10, 10),
|
||||
+ right=c(1, 2, 3, 9, NA, NA))
|
||||
> result = fitdistcens(data, 'gumbel',
|
||||
+ control=list(reltol=1e-14),
|
||||
+ start=list(loc=4, scale=5))
|
||||
> result
|
||||
Fitting of the distribution ' gumbel ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
loc 4.487853
|
||||
scale 4.843640
|
||||
"""
|
||||
# First value is interval-censored. Last two are right-censored.
|
||||
uncensored = np.array([2, 3, 9])
|
||||
right = np.array([10, 10])
|
||||
interval = np.array([[0, 1]])
|
||||
data = CensoredData(uncensored, right=right, interval=interval)
|
||||
loc, scale = gumbel_r.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 4.487853, rtol=5e-6)
|
||||
assert_allclose(scale, 4.843640, rtol=5e-6)
|
||||
|
||||
# Negate the data and reverse the intervals, and test with gumbel_l.
|
||||
data2 = CensoredData(-uncensored, left=-right,
|
||||
interval=-interval[:, ::-1])
|
||||
# Fitting gumbel_l to data2 should give the same result as above, but
|
||||
# with loc negated.
|
||||
loc2, scale2 = gumbel_l.fit(data2, optimizer=optimizer)
|
||||
assert_allclose(loc2, -4.487853, rtol=5e-6)
|
||||
assert_allclose(scale2, 4.843640, rtol=5e-6)
|
||||
|
||||
|
||||
def test_invgauss():
|
||||
"""
|
||||
Fit just the shape parameter of invgauss to data with one value
|
||||
left-censored and one value right-censored.
|
||||
|
||||
Calculation in R; using a fixed dispersion parameter amounts to fixing
|
||||
the scale to be 1.
|
||||
|
||||
> library(statmod)
|
||||
> library(fitdistrplus)
|
||||
> left <- c(NA, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
|
||||
+ 0.4822340, 0.3478597, 3, 0.7191797, 1.5810902, 0.4442299)
|
||||
> right <- c(0.15, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
|
||||
+ 0.4822340, 0.3478597, NA, 0.7191797, 1.5810902, 0.4442299)
|
||||
> data <- data.frame(left=left, right=right)
|
||||
> result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
|
||||
+ fix.arg=list(dispersion=1), start=list(mean=3))
|
||||
> result
|
||||
Fitting of the distribution ' invgauss ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
mean 0.853469
|
||||
Fixed parameters:
|
||||
value
|
||||
dispersion 1
|
||||
> result$sd
|
||||
mean
|
||||
0.247636
|
||||
|
||||
Here's the R calculation with the dispersion as a free parameter to
|
||||
be fit.
|
||||
|
||||
> result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
|
||||
+ start=list(mean=3, dispersion=1))
|
||||
> result
|
||||
Fitting of the distribution ' invgauss ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
mean 0.8699819
|
||||
dispersion 1.2261362
|
||||
|
||||
The parametrization of the inverse Gaussian distribution in the
|
||||
`statmod` package is not the same as in SciPy (see
|
||||
https://arxiv.org/abs/1603.06687
|
||||
for details). The translation from R to SciPy is
|
||||
|
||||
scale = 1/dispersion
|
||||
mu = mean * dispersion
|
||||
|
||||
> 1/result$estimate['dispersion'] # 1/dispersion
|
||||
dispersion
|
||||
0.8155701
|
||||
> result$estimate['mean'] * result$estimate['dispersion']
|
||||
mean
|
||||
1.066716
|
||||
|
||||
Those last two values are the SciPy scale and shape parameters.
|
||||
"""
|
||||
# One point is left-censored, and one is right-censored.
|
||||
x = [0.4813096, 0.5571880, 0.5132463, 0.3801414,
|
||||
0.5904386, 0.4822340, 0.3478597, 0.7191797,
|
||||
1.5810902, 0.4442299]
|
||||
data = CensoredData(uncensored=x, left=[0.15], right=[3])
|
||||
|
||||
# Fit only the shape parameter.
|
||||
mu, loc, scale = invgauss.fit(data, floc=0, fscale=1, optimizer=optimizer)
|
||||
|
||||
assert_allclose(mu, 0.853469, rtol=5e-5)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
# Fit the shape and scale.
|
||||
mu, loc, scale = invgauss.fit(data, floc=0, optimizer=optimizer)
|
||||
|
||||
assert_allclose(mu, 1.066716, rtol=5e-5)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 0.8155701, rtol=5e-5)
|
||||
|
||||
|
||||
def test_invweibull():
|
||||
"""
|
||||
Fit invweibull to censored data.
|
||||
|
||||
Here is the calculation in R. The 'frechet' distribution from the evd
|
||||
package matches SciPy's invweibull distribution. The `loc` parameter
|
||||
is fixed at 0.
|
||||
|
||||
> library(evd)
|
||||
> library(fitdistrplus)
|
||||
> data = data.frame(left=c(0, 2, 3, 9, 10, 10),
|
||||
+ right=c(1, 2, 3, 9, NA, NA))
|
||||
> result = fitdistcens(data, 'frechet',
|
||||
+ control=list(reltol=1e-14),
|
||||
+ start=list(loc=4, scale=5))
|
||||
> result
|
||||
Fitting of the distribution ' frechet ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
scale 2.7902200
|
||||
shape 0.6379845
|
||||
Fixed parameters:
|
||||
value
|
||||
loc 0
|
||||
"""
|
||||
# In the R data, the first value is interval-censored, and the last
|
||||
# two are right-censored. The rest are not censored.
|
||||
data = CensoredData(uncensored=[2, 3, 9], right=[10, 10],
|
||||
interval=[[0, 1]])
|
||||
c, loc, scale = invweibull.fit(data, floc=0, optimizer=optimizer)
|
||||
assert_allclose(c, 0.6379845, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 2.7902200, rtol=5e-6)
|
||||
|
||||
|
||||
def test_laplace():
|
||||
"""
|
||||
Fir the Laplace distribution to left- and right-censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> dlaplace <- function(x, location=0, scale=1) {
|
||||
+ return(0.5*exp(-abs((x - location)/scale))/scale)
|
||||
+ }
|
||||
> plaplace <- function(q, location=0, scale=1) {
|
||||
+ z <- (q - location)/scale
|
||||
+ s <- sign(z)
|
||||
+ f <- -s*0.5*exp(-abs(z)) + (s+1)/2
|
||||
+ return(f)
|
||||
+ }
|
||||
> left <- c(NA, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684,
|
||||
+ -19.5399, 50.0, 9.0005, 27.1227, 4.3113, -3.7372,
|
||||
+ 25.3111, 14.7987, 34.0887, 50.0, 42.8496, 18.5862,
|
||||
+ 32.8921, 9.0448, -27.4591, NA, 19.5083, -9.7199)
|
||||
> right <- c(-50.0, -41.564, NA, 15.7384, NA, 10.0452, -2.0684,
|
||||
+ -19.5399, NA, 9.0005, 27.1227, 4.3113, -3.7372,
|
||||
+ 25.3111, 14.7987, 34.0887, NA, 42.8496, 18.5862,
|
||||
+ 32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199)
|
||||
> data <- data.frame(left=left, right=right)
|
||||
> result <- fitdistcens(data, 'laplace', start=list(location=10, scale=10),
|
||||
+ control=list(reltol=1e-13))
|
||||
> result
|
||||
Fitting of the distribution ' laplace ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 14.79870
|
||||
scale 30.93601
|
||||
> result$sd
|
||||
location scale
|
||||
0.1758864 7.0972125
|
||||
"""
|
||||
# The value -50 is left-censored, and the value 50 is right-censored.
|
||||
obs = np.array([-50.0, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684,
|
||||
-19.5399, 50.0, 9.0005, 27.1227, 4.3113, -3.7372,
|
||||
25.3111, 14.7987, 34.0887, 50.0, 42.8496, 18.5862,
|
||||
32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199])
|
||||
x = obs[(obs != -50.0) & (obs != 50)]
|
||||
left = obs[obs == -50.0]
|
||||
right = obs[obs == 50.0]
|
||||
data = CensoredData(uncensored=x, left=left, right=right)
|
||||
loc, scale = laplace.fit(data, loc=10, scale=10, optimizer=optimizer)
|
||||
assert_allclose(loc, 14.79870, rtol=5e-6)
|
||||
assert_allclose(scale, 30.93601, rtol=5e-6)
|
||||
|
||||
|
||||
def test_logistic():
|
||||
"""
|
||||
Fit the logistic distribution to left-censored data.
|
||||
|
||||
Calculation in R:
|
||||
> library(fitdistrplus)
|
||||
> left = c(13.5401, 37.4235, 11.906 , 13.998 , NA , 0.4023, NA ,
|
||||
+ 10.9044, 21.0629, 9.6985, NA , 12.9016, 39.164 , 34.6396,
|
||||
+ NA , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306, 8.4949,
|
||||
+ 3.4041, NA , 7.2828, 37.1265, 6.5969, 17.6868, 17.4977,
|
||||
+ 16.3391, 36.0541)
|
||||
> right = c(13.5401, 37.4235, 11.906 , 13.998 , 0. , 0.4023, 0. ,
|
||||
+ 10.9044, 21.0629, 9.6985, 0. , 12.9016, 39.164 , 34.6396,
|
||||
+ 0. , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306, 8.4949,
|
||||
+ 3.4041, 0. , 7.2828, 37.1265, 6.5969, 17.6868, 17.4977,
|
||||
+ 16.3391, 36.0541)
|
||||
> data = data.frame(left=left, right=right)
|
||||
> result = fitdistcens(data, 'logis', control=list(reltol=1e-14))
|
||||
> result
|
||||
Fitting of the distribution ' logis ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
location 14.633459
|
||||
scale 9.232736
|
||||
> result$sd
|
||||
location scale
|
||||
2.931505 1.546879
|
||||
"""
|
||||
# Values that are zero are left-censored; the true values are less than 0.
|
||||
x = np.array([13.5401, 37.4235, 11.906, 13.998, 0.0, 0.4023, 0.0, 10.9044,
|
||||
21.0629, 9.6985, 0.0, 12.9016, 39.164, 34.6396, 0.0, 20.3665,
|
||||
16.5889, 18.0952, 45.3818, 35.3306, 8.4949, 3.4041, 0.0,
|
||||
7.2828, 37.1265, 6.5969, 17.6868, 17.4977, 16.3391,
|
||||
36.0541])
|
||||
data = CensoredData.left_censored(x, censored=(x == 0))
|
||||
loc, scale = logistic.fit(data, optimizer=optimizer)
|
||||
assert_allclose(loc, 14.633459, rtol=5e-7)
|
||||
assert_allclose(scale, 9.232736, rtol=5e-6)
|
||||
|
||||
|
||||
def test_lognorm():
|
||||
"""
|
||||
Ref: https://math.montana.edu/jobo/st528/documents/relc.pdf
|
||||
|
||||
The data is the locomotive control time to failure example that starts
|
||||
on page 8. That's the 8th page in the PDF; the page number shown in
|
||||
the text is 270).
|
||||
The document includes SAS output for the data.
|
||||
"""
|
||||
# These are the uncensored measurements. There are also 59 right-censored
|
||||
# measurements where the lower bound is 135.
|
||||
miles_to_fail = [22.5, 37.5, 46.0, 48.5, 51.5, 53.0, 54.5, 57.5, 66.5,
|
||||
68.0, 69.5, 76.5, 77.0, 78.5, 80.0, 81.5, 82.0, 83.0,
|
||||
84.0, 91.5, 93.5, 102.5, 107.0, 108.5, 112.5, 113.5,
|
||||
116.0, 117.0, 118.5, 119.0, 120.0, 122.5, 123.0, 127.5,
|
||||
131.0, 132.5, 134.0]
|
||||
|
||||
data = CensoredData.right_censored(miles_to_fail + [135]*59,
|
||||
[0]*len(miles_to_fail) + [1]*59)
|
||||
sigma, loc, scale = lognorm.fit(data, floc=0)
|
||||
|
||||
assert loc == 0
|
||||
# Convert the lognorm parameters to the mu and sigma of the underlying
|
||||
# normal distribution.
|
||||
mu = np.log(scale)
|
||||
# The expected results are from the 17th page of the PDF document
|
||||
# (labeled page 279), in the SAS output on the right side of the page.
|
||||
assert_allclose(mu, 5.1169, rtol=5e-4)
|
||||
assert_allclose(sigma, 0.7055, rtol=5e-3)
|
||||
|
||||
|
||||
def test_nct():
|
||||
"""
|
||||
Test fitting the noncentral t distribution to censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(1, 2, 3, 5, 8, 10, 25, 25),
|
||||
+ right=c(1, 2, 3, 5, 8, 10, NA, NA))
|
||||
> result = fitdistcens(data, 't', control=list(reltol=1e-14),
|
||||
+ start=list(df=1, ncp=2))
|
||||
> result
|
||||
Fitting of the distribution ' t ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
df 0.5432336
|
||||
ncp 2.8893565
|
||||
|
||||
"""
|
||||
data = CensoredData.right_censored([1, 2, 3, 5, 8, 10, 25, 25],
|
||||
[0, 0, 0, 0, 0, 0, 1, 1])
|
||||
# Fit just the shape parameter df and nc; loc and scale are fixed.
|
||||
with np.errstate(over='ignore'): # remove context when gh-14901 is closed
|
||||
df, nc, loc, scale = nct.fit(data, floc=0, fscale=1,
|
||||
optimizer=optimizer)
|
||||
assert_allclose(df, 0.5432336, rtol=5e-6)
|
||||
assert_allclose(nc, 2.8893565, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_ncx2():
|
||||
"""
|
||||
Test fitting the shape parameters (df, ncp) of ncx2 to mixed data.
|
||||
|
||||
Calculation in R, with
|
||||
* 5 not censored values [2.7, 0.2, 6.5, 0.4, 0.1],
|
||||
* 1 interval-censored value [[0.6, 1.0]], and
|
||||
* 2 right-censored values [8, 8].
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(2.7, 0.2, 6.5, 0.4, 0.1, 0.6, 8, 8),
|
||||
+ right=c(2.7, 0.2, 6.5, 0.4, 0.1, 1.0, NA, NA))
|
||||
> result = fitdistcens(data, 'chisq', control=list(reltol=1e-14),
|
||||
+ start=list(df=1, ncp=2))
|
||||
> result
|
||||
Fitting of the distribution ' chisq ' on censored data by maximum
|
||||
likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
df 1.052871
|
||||
ncp 2.362934
|
||||
"""
|
||||
data = CensoredData(uncensored=[2.7, 0.2, 6.5, 0.4, 0.1], right=[8, 8],
|
||||
interval=[[0.6, 1.0]])
|
||||
with np.errstate(over='ignore'): # remove context when gh-14901 is closed
|
||||
df, ncp, loc, scale = ncx2.fit(data, floc=0, fscale=1,
|
||||
optimizer=optimizer)
|
||||
assert_allclose(df, 1.052871, rtol=5e-6)
|
||||
assert_allclose(ncp, 2.362934, rtol=5e-6)
|
||||
assert loc == 0
|
||||
assert scale == 1
|
||||
|
||||
|
||||
def test_norm():
|
||||
"""
|
||||
Test fitting the normal distribution to interval-censored data.
|
||||
|
||||
Calculation in R:
|
||||
|
||||
> library(fitdistrplus)
|
||||
> data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
|
||||
+ right=c(0.20, 0.55, 0.90, 0.95))
|
||||
> result = fitdistcens(data, 'norm', control=list(reltol=1e-14))
|
||||
|
||||
> result
|
||||
Fitting of the distribution ' norm ' on censored data by maximum likelihood
|
||||
Parameters:
|
||||
estimate
|
||||
mean 0.5919990
|
||||
sd 0.2868042
|
||||
> result$sd
|
||||
mean sd
|
||||
0.1444432 0.1029451
|
||||
"""
|
||||
data = CensoredData(interval=[[0.10, 0.20],
|
||||
[0.50, 0.55],
|
||||
[0.75, 0.90],
|
||||
[0.80, 0.95]])
|
||||
|
||||
loc, scale = norm.fit(data, optimizer=optimizer)
|
||||
|
||||
assert_allclose(loc, 0.5919990, rtol=5e-6)
|
||||
assert_allclose(scale, 0.2868042, rtol=5e-6)
|
||||
|
||||
|
||||
def test_weibull_censored1():
|
||||
# Ref: http://www.ams.sunysb.edu/~zhu/ams588/Lecture_3_likelihood.pdf
|
||||
|
||||
# Survival times; '*' indicates right-censored.
|
||||
s = "3,5,6*,8,10*,11*,15,20*,22,23,27*,29,32,35,40,26,28,33*,21,24*"
|
||||
|
||||
times, cens = zip(*[(float(t[0]), len(t) == 2)
|
||||
for t in [w.split('*') for w in s.split(',')]])
|
||||
data = CensoredData.right_censored(times, cens)
|
||||
|
||||
c, loc, scale = weibull_min.fit(data, floc=0)
|
||||
|
||||
# Expected values are from the reference.
|
||||
assert_allclose(c, 2.149, rtol=1e-3)
|
||||
assert loc == 0
|
||||
assert_allclose(scale, 28.99, rtol=1e-3)
|
||||
|
||||
# Flip the sign of the data, and make the censored values
|
||||
# left-censored. We should get the same parameters when we fit
|
||||
# weibull_max to the flipped data.
|
||||
data2 = CensoredData.left_censored(-np.array(times), cens)
|
||||
|
||||
c2, loc2, scale2 = weibull_max.fit(data2, floc=0)
|
||||
|
||||
assert_allclose(c2, 2.149, rtol=1e-3)
|
||||
assert loc2 == 0
|
||||
assert_allclose(scale2, 28.99, rtol=1e-3)
|
||||
|
||||
|
||||
def test_weibull_min_sas1():
|
||||
# Data and SAS results from
|
||||
# https://support.sas.com/documentation/cdl/en/qcug/63922/HTML/default/
|
||||
# viewer.htm#qcug_reliability_sect004.htm
|
||||
|
||||
text = """
|
||||
450 0 460 1 1150 0 1150 0 1560 1
|
||||
1600 0 1660 1 1850 1 1850 1 1850 1
|
||||
1850 1 1850 1 2030 1 2030 1 2030 1
|
||||
2070 0 2070 0 2080 0 2200 1 3000 1
|
||||
3000 1 3000 1 3000 1 3100 0 3200 1
|
||||
3450 0 3750 1 3750 1 4150 1 4150 1
|
||||
4150 1 4150 1 4300 1 4300 1 4300 1
|
||||
4300 1 4600 0 4850 1 4850 1 4850 1
|
||||
4850 1 5000 1 5000 1 5000 1 6100 1
|
||||
6100 0 6100 1 6100 1 6300 1 6450 1
|
||||
6450 1 6700 1 7450 1 7800 1 7800 1
|
||||
8100 1 8100 1 8200 1 8500 1 8500 1
|
||||
8500 1 8750 1 8750 0 8750 1 9400 1
|
||||
9900 1 10100 1 10100 1 10100 1 11500 1
|
||||
"""
|
||||
|
||||
life, cens = np.array([int(w) for w in text.split()]).reshape(-1, 2).T
|
||||
life = life/1000.0
|
||||
|
||||
data = CensoredData.right_censored(life, cens)
|
||||
|
||||
c, loc, scale = weibull_min.fit(data, floc=0, optimizer=optimizer)
|
||||
assert_allclose(c, 1.0584, rtol=1e-4)
|
||||
assert_allclose(scale, 26.2968, rtol=1e-5)
|
||||
assert loc == 0
|
||||
|
||||
|
||||
def test_weibull_min_sas2():
|
||||
# http://support.sas.com/documentation/cdl/en/ormpug/67517/HTML/default/
|
||||
# viewer.htm#ormpug_nlpsolver_examples06.htm
|
||||
|
||||
# The last two values are right-censored.
|
||||
days = np.array([143, 164, 188, 188, 190, 192, 206, 209, 213, 216, 220,
|
||||
227, 230, 234, 246, 265, 304, 216, 244])
|
||||
|
||||
data = CensoredData.right_censored(days, [0]*(len(days) - 2) + [1]*2)
|
||||
|
||||
c, loc, scale = weibull_min.fit(data, 1, loc=100, scale=100,
|
||||
optimizer=optimizer)
|
||||
|
||||
assert_allclose(c, 2.7112, rtol=5e-4)
|
||||
assert_allclose(loc, 122.03, rtol=5e-4)
|
||||
assert_allclose(scale, 108.37, rtol=5e-4)
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats._axis_nan_policy import SmallSampleWarning
|
||||
|
||||
|
||||
class TestChatterjeeXi:
|
||||
@pytest.mark.parametrize('case', [
|
||||
dict(y_cont=True, statistic=-0.303030303030303, pvalue=0.9351329808526656),
|
||||
dict(y_cont=False, statistic=0.07407407407407396, pvalue=0.3709859367123997)])
|
||||
def test_against_R_XICOR(self, case):
|
||||
# Test against R package XICOR, e.g.
|
||||
# library(XICOR)
|
||||
# options(digits=16)
|
||||
# x = c(0.11027287231363914, 0.8154770102474279, 0.7073943466920335,
|
||||
# 0.6651317324378386, 0.6905752850115503, 0.06115250587536558,
|
||||
# 0.5209906494474178, 0.3155763519785274, 0.18405731803625924,
|
||||
# 0.8613557911541495)
|
||||
# y = c(0.8402081904493103, 0.5946972833914318, 0.23481606164114155,
|
||||
# 0.49754786197715384, 0.9146460831206026, 0.5848057749217579,
|
||||
# 0.7620801065573549, 0.31410063302647495, 0.7935620302236199,
|
||||
# 0.5423085761365468)
|
||||
# xicor(x, y, ties=FALSE, pvalue=TRUE)
|
||||
|
||||
rng = np.random.default_rng(25982435982346983)
|
||||
x = rng.random(size=10)
|
||||
|
||||
y = (rng.random(size=10) if case['y_cont']
|
||||
else rng.integers(0, 5, size=10))
|
||||
res = stats.chatterjeexi(x, y, y_continuous=case['y_cont'])
|
||||
|
||||
assert_allclose(res.statistic, case['statistic'])
|
||||
assert_allclose(res.pvalue, case['pvalue'])
|
||||
|
||||
@pytest.mark.parametrize('y_continuous', (False, True))
|
||||
def test_permutation_asymptotic(self, y_continuous):
|
||||
# XICOR doesn't seem to perform the permutation test as advertised, so
|
||||
# compare the result of a permutation test against an asymptotic test.
|
||||
rng = np.random.default_rng(2524579827426)
|
||||
n = np.floor(rng.uniform(100, 150)).astype(int)
|
||||
shape = (2, n)
|
||||
x = rng.random(size=shape)
|
||||
y = (rng.random(size=shape) if y_continuous
|
||||
else rng.integers(0, 10, size=shape))
|
||||
method = stats.PermutationMethod(rng=rng)
|
||||
res = stats.chatterjeexi(x, y, method=method,
|
||||
y_continuous=y_continuous, axis=-1)
|
||||
ref = stats.chatterjeexi(x, y, y_continuous=y_continuous, axis=-1)
|
||||
np.testing.assert_allclose(res.statistic, ref.statistic, rtol=1e-15)
|
||||
np.testing.assert_allclose(res.pvalue, ref.pvalue, rtol=2e-2)
|
||||
|
||||
def test_input_validation(self):
|
||||
rng = np.random.default_rng(25932435798274926)
|
||||
x, y = rng.random(size=(2, 10))
|
||||
|
||||
message = 'Array shapes are incompatible for broadcasting.'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.chatterjeexi(x, y[:-1])
|
||||
|
||||
message = '...axis 10 is out of bounds for array...'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.chatterjeexi(x, y, axis=10)
|
||||
|
||||
message = '`y_continuous` must be boolean.'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.chatterjeexi(x, y, y_continuous='a herring')
|
||||
|
||||
message = "`method` must be 'asymptotic' or"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.chatterjeexi(x, y, method='ekki ekii')
|
||||
|
||||
def test_special_cases(self):
|
||||
message = 'One or more sample arguments is too small...'
|
||||
with pytest.warns(SmallSampleWarning, match=message):
|
||||
res = stats.chatterjeexi([1], [2])
|
||||
|
||||
assert np.isnan(res.statistic)
|
||||
assert np.isnan(res.pvalue)
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal, assert_equal
|
||||
from scipy.stats.contingency import crosstab
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_basic(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [2, 1, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [1, 2, 3]
|
||||
expected_count = np.array([[1, 2, 1],
|
||||
[1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.toarray(), expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_1d():
|
||||
# Verify that a single input sequence works as expected.
|
||||
x = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_xvals = [1, 2, 3]
|
||||
expected_count = np.array([2, 2, 3])
|
||||
(xvals,), count = crosstab(x)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_crosstab_basic_3d():
|
||||
# Verify the function for three input sequences.
|
||||
a = 'a'
|
||||
b = 'b'
|
||||
x = [0, 0, 9, 9, 0, 0, 9, 9]
|
||||
y = [a, a, a, a, b, b, b, a]
|
||||
z = [1, 2, 3, 1, 2, 3, 3, 1]
|
||||
expected_xvals = [0, 9]
|
||||
expected_yvals = [a, b]
|
||||
expected_zvals = [1, 2, 3]
|
||||
expected_count = np.array([[[1, 1, 0],
|
||||
[0, 1, 1]],
|
||||
[[2, 0, 1],
|
||||
[0, 0, 1]]])
|
||||
(xvals, yvals, zvals), count = crosstab(x, y, z)
|
||||
assert_array_equal(xvals, expected_xvals)
|
||||
assert_array_equal(yvals, expected_yvals)
|
||||
assert_array_equal(zvals, expected_zvals)
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_levels(sparse):
|
||||
a = [0, 0, 9, 9, 0, 0, 9]
|
||||
b = [1, 2, 3, 1, 2, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[None, [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.toarray(), expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', [False, True])
|
||||
def test_crosstab_extra_levels(sparse):
|
||||
# The pair of values (-1, 3) will be ignored, because we explicitly
|
||||
# request the counted `a` values to be [0, 9].
|
||||
a = [0, 0, 9, 9, 0, 0, 9, -1]
|
||||
b = [1, 2, 3, 1, 2, 3, 3, 3]
|
||||
expected_avals = [0, 9]
|
||||
expected_bvals = [0, 1, 2, 3]
|
||||
expected_count = np.array([[0, 1, 2, 1],
|
||||
[0, 1, 0, 2]])
|
||||
(avals, bvals), count = crosstab(a, b, levels=[[0, 9], [0, 1, 2, 3]],
|
||||
sparse=sparse)
|
||||
assert_array_equal(avals, expected_avals)
|
||||
assert_array_equal(bvals, expected_bvals)
|
||||
if sparse:
|
||||
assert_array_equal(count.toarray(), expected_count)
|
||||
else:
|
||||
assert_array_equal(count, expected_count)
|
||||
|
||||
|
||||
def test_validation_at_least_one():
|
||||
with pytest.raises(TypeError, match='At least one'):
|
||||
crosstab()
|
||||
|
||||
|
||||
def test_validation_same_lengths():
|
||||
with pytest.raises(ValueError, match='must have the same length'):
|
||||
crosstab([1, 2], [1, 2, 3, 4])
|
||||
|
||||
|
||||
def test_validation_sparse_only_two_args():
|
||||
with pytest.raises(ValueError, match='only two input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], [1, 3, 3], sparse=True)
|
||||
|
||||
|
||||
def test_validation_len_levels_matches_args():
|
||||
with pytest.raises(ValueError, match='number of input sequences'):
|
||||
crosstab([0, 1, 1], [8, 8, 9], levels=([0, 1, 2, 3],))
|
||||
|
||||
|
||||
def test_result():
|
||||
res = crosstab([0, 1], [1, 2])
|
||||
assert_equal((res.elements, res.count), res)
|
||||
|
|
@ -1,580 +0,0 @@
|
|||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from .common_tests import (check_normalization, check_moment,
|
||||
check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_edge_support,
|
||||
check_named_args, check_random_state_property,
|
||||
check_pickling, check_rvs_broadcast,
|
||||
check_freezing,)
|
||||
from scipy.stats._distr_params import distdiscrete, invdistdiscrete
|
||||
from scipy.stats._distn_infrastructure import rv_discrete_frozen
|
||||
|
||||
vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4])
|
||||
distdiscrete += [[stats.rv_discrete(values=vals), ()]]
|
||||
|
||||
# For these distributions, test_discrete_basic only runs with test mode full
|
||||
distslow = {'zipfian', 'nhypergeom'}
|
||||
|
||||
# Override number of ULPs adjustment for `check_cdf_ppf`
|
||||
roundtrip_cdf_ppf_exceptions = {'nbinom': 30}
|
||||
|
||||
def cases_test_discrete_basic():
|
||||
seen = set()
|
||||
for distname, arg in distdiscrete:
|
||||
if distname in distslow:
|
||||
yield pytest.param(distname, arg, distname, marks=pytest.mark.slow)
|
||||
else:
|
||||
yield distname, arg, distname not in seen
|
||||
seen.add(distname)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg,first_case', cases_test_discrete_basic())
|
||||
def test_discrete_basic(distname, arg, first_case, num_parallel_threads):
|
||||
if (isinstance(distname, str) and distname.startswith('nchypergeom')
|
||||
and num_parallel_threads > 1):
|
||||
pytest.skip(reason='nchypergeom has a global random generator')
|
||||
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
rng = np.random.RandomState(9765456)
|
||||
rvs = distfn.rvs(*arg, size=2000, random_state=rng)
|
||||
supp = np.unique(rvs)
|
||||
m, v = distfn.stats(*arg)
|
||||
check_cdf_ppf(distfn, arg, supp, distname + ' cdf_ppf')
|
||||
|
||||
check_pmf_cdf(distfn, arg, distname)
|
||||
check_oth(distfn, arg, supp, distname + ' oth')
|
||||
check_edge_support(distfn, arg)
|
||||
|
||||
alpha = 0.01
|
||||
check_discrete_chisquare(distfn, arg, rvs, alpha,
|
||||
distname + ' chisquare')
|
||||
|
||||
if first_case:
|
||||
locscale_defaults = (0,)
|
||||
meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
# for some distributions, this needs to be overridden
|
||||
spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0,
|
||||
'nchypergeom_wallenius': 6}
|
||||
k = spec_k.get(distname, 1)
|
||||
check_named_args(distfn, k, arg, locscale_defaults, meths)
|
||||
if distname != 'sample distribution':
|
||||
check_scale_docstring(distfn)
|
||||
if num_parallel_threads == 1:
|
||||
check_random_state_property(distfn, arg)
|
||||
if distname not in {'poisson_binom'}: # can't be pickled
|
||||
check_pickling(distfn, arg)
|
||||
check_freezing(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
check_entropy(distfn, arg, distname)
|
||||
if distfn.__class__._entropy != stats.rv_discrete._entropy:
|
||||
check_private_entropy(distfn, arg, stats.rv_discrete)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg', distdiscrete)
|
||||
def test_moments(distname, arg):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
# compare `stats` and `moment` methods
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
with np.testing.suppress_warnings() as sup:
|
||||
if distname in ['zipf', 'betanbinom']:
|
||||
sup.filter(RuntimeWarning)
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
# frozen distr moments
|
||||
check_moment_frozen(distfn, arg, m, 1)
|
||||
check_moment_frozen(distfn, arg, v+m*m, 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distdiscrete)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['betabinom', 'betanbinom', 'skellam', 'yulesimon',
|
||||
'dlaplace', 'nchypergeom_fisher',
|
||||
'nchypergeom_wallenius', 'poisson_binom']
|
||||
try:
|
||||
distfunc = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfunc = dist
|
||||
dist = f'rv_discrete(values=({dist.xk!r}, {dist.pk!r}))'
|
||||
loc = np.zeros(2)
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = []
|
||||
|
||||
if dist == 'poisson_binom':
|
||||
# normal rules apply except the last axis of `p` is ignored
|
||||
p = np.full((3, 1, 10), 0.5)
|
||||
allargs = (p, loc)
|
||||
bshape = (3, 2)
|
||||
check_rvs_broadcast(distfunc, dist, allargs,
|
||||
bshape, shape_only, [np.dtype(int)])
|
||||
return
|
||||
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 3,) + (1,)*(k + 1)
|
||||
param_val = shape_args[k]
|
||||
allargs.append(np.full(shp, param_val))
|
||||
bshape.insert(0, shp[0])
|
||||
allargs.append(loc)
|
||||
bshape.append(loc.size)
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
check_rvs_broadcast(
|
||||
distfunc, dist, allargs, bshape, shape_only, [np.dtype(int)]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,args', distdiscrete)
|
||||
def test_ppf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
#check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
npt.assert_array_equal(
|
||||
[_a-1+loc, _b+loc],
|
||||
[distfn.ppf(0.0, *args, loc=loc), distfn.ppf(1.0, *args, loc=loc)]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist, args', distdiscrete)
|
||||
def test_isf_with_loc(dist, args):
|
||||
try:
|
||||
distfn = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfn = dist
|
||||
# check with a negative, no and positive relocation.
|
||||
np.random.seed(1942349)
|
||||
re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
# test broadcasting behaviour
|
||||
re_locs = [np.random.randint(-10, -1, size=(5, 3)),
|
||||
np.zeros((5, 3)),
|
||||
np.random.randint(1, 10, size=(5, 3))]
|
||||
_a, _b = distfn.support(*args)
|
||||
for loc in re_locs:
|
||||
expected = _b + loc, _a - 1 + loc
|
||||
res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc)
|
||||
npt.assert_array_equal(expected, res)
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, supp, msg):
|
||||
# supp is assumed to be an array of integers in the support of distfn
|
||||
# (but not necessarily all the integers in the support).
|
||||
# This test assumes that the PMF of any value in the support of the
|
||||
# distribution is greater than 1e-8.
|
||||
|
||||
# cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer}
|
||||
cdf_supp = distfn.cdf(supp, *arg)
|
||||
# In very rare cases, the finite precision calculation of ppf(cdf(supp))
|
||||
# can produce an array in which an element is off by one. We nudge the
|
||||
# CDF values down by a few ULPs help to avoid this.
|
||||
n_ulps = roundtrip_cdf_ppf_exceptions.get(distfn.name, 15)
|
||||
cdf_supp0 = cdf_supp - n_ulps*np.spacing(cdf_supp)
|
||||
npt.assert_array_equal(distfn.ppf(cdf_supp0, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
# Repeat the same calculation, but with the CDF values decreased by 1e-8.
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
|
||||
if not hasattr(distfn, 'xk'):
|
||||
_a, _b = distfn.support(*arg)
|
||||
supp1 = supp[supp < _b]
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
|
||||
supp1 + distfn.inc, msg + ' ppf-cdf-next')
|
||||
|
||||
|
||||
def check_pmf_cdf(distfn, arg, distname):
|
||||
if hasattr(distfn, 'xk'):
|
||||
index = distfn.xk
|
||||
else:
|
||||
startind = int(distfn.ppf(0.01, *arg) - 1)
|
||||
index = list(range(startind, startind + 10))
|
||||
cdfs = distfn.cdf(index, *arg)
|
||||
pmfs_cum = distfn.pmf(index, *arg).cumsum()
|
||||
|
||||
atol, rtol = 1e-10, 1e-10
|
||||
if distname == 'skellam': # ncx2 accuracy
|
||||
atol, rtol = 1e-5, 1e-5
|
||||
npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
|
||||
atol=atol, rtol=rtol)
|
||||
|
||||
# also check that pmf at non-integral k is zero
|
||||
k = np.asarray(index)
|
||||
k_shifted = k[:-1] + np.diff(k)/2
|
||||
npt.assert_equal(distfn.pmf(k_shifted, *arg), 0)
|
||||
|
||||
# better check frozen distributions, and also when loc != 0
|
||||
loc = 0.5
|
||||
dist = distfn(loc=loc, *arg)
|
||||
npt.assert_allclose(dist.pmf(k[1:] + loc), np.diff(dist.cdf(k + loc)))
|
||||
npt.assert_equal(dist.pmf(k_shifted + loc), 0)
|
||||
|
||||
|
||||
def check_moment_frozen(distfn, arg, m, k):
|
||||
npt.assert_allclose(distfn(*arg).moment(k), m,
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
|
||||
def check_oth(distfn, arg, supp, msg):
|
||||
# checking other methods of distfn
|
||||
npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
q = np.linspace(0.01, 0.99, 20)
|
||||
npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
median_sf = distfn.isf(0.5, *arg)
|
||||
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
|
||||
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
|
||||
|
||||
|
||||
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
|
||||
"""Perform chisquare test for random sample of a discrete distribution
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distname : string
|
||||
name of distribution function
|
||||
arg : sequence
|
||||
parameters of distribution
|
||||
alpha : float
|
||||
significance level, threshold for p-value
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
0 if test passes, 1 if test fails
|
||||
|
||||
"""
|
||||
wsupp = 0.05
|
||||
|
||||
# construct intervals with minimum mass `wsupp`.
|
||||
# intervals are left-half-open as in a cdf difference
|
||||
_a, _b = distfn.support(*arg)
|
||||
lo = int(max(_a, -1000))
|
||||
high = int(min(_b, 1000)) + 1
|
||||
distsupport = range(lo, high)
|
||||
last = 0
|
||||
distsupp = [lo]
|
||||
distmass = []
|
||||
for ii in distsupport:
|
||||
current = distfn.cdf(ii, *arg)
|
||||
if current - last >= wsupp - 1e-14:
|
||||
distsupp.append(ii)
|
||||
distmass.append(current - last)
|
||||
last = current
|
||||
if current > (1 - wsupp):
|
||||
break
|
||||
if distsupp[-1] < _b:
|
||||
distsupp.append(_b)
|
||||
distmass.append(1 - last)
|
||||
distsupp = np.array(distsupp)
|
||||
distmass = np.array(distmass)
|
||||
|
||||
# convert intervals to right-half-open as required by histogram
|
||||
histsupp = distsupp + 1e-8
|
||||
histsupp[0] = _a
|
||||
|
||||
# find sample frequencies and perform chisquare test
|
||||
freq, hsupp = np.histogram(rvs, histsupp)
|
||||
chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass)
|
||||
|
||||
npt.assert_(
|
||||
pval > alpha,
|
||||
f'chisquare - test for {msg} at arg = {str(arg)} with pval = {str(pval)}'
|
||||
)
|
||||
|
||||
|
||||
def check_scale_docstring(distfn):
|
||||
if distfn.__doc__ is not None:
|
||||
# Docstrings can be stripped if interpreter is run with -OO
|
||||
npt.assert_('scale' not in distfn.__doc__)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['pmf', 'logpmf', 'cdf', 'logcdf',
|
||||
'sf', 'logsf', 'ppf', 'isf'])
|
||||
@pytest.mark.parametrize('distname, args', distdiscrete)
|
||||
def test_methods_with_lists(method, distname, args):
|
||||
# Test that the discrete distributions can accept Python lists
|
||||
# as arguments.
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
return
|
||||
dist_method = getattr(dist, method)
|
||||
if method in ['ppf', 'isf']:
|
||||
z = [0.1, 0.2]
|
||||
else:
|
||||
z = [0, 1]
|
||||
p2 = [[p]*2 for p in args]
|
||||
loc = [0, 1]
|
||||
result = dist_method(z, *p2, loc=loc)
|
||||
npt.assert_allclose(result,
|
||||
[dist_method(*v) for v in zip(z, *p2, loc)],
|
||||
rtol=1e-15, atol=1e-15)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, args', invdistdiscrete)
|
||||
def test_cdf_gh13280_regression(distname, args):
|
||||
# Test for nan output when shape parameters are invalid
|
||||
dist = getattr(stats, distname)
|
||||
x = np.arange(-2, 15)
|
||||
vals = dist.cdf(x, *args)
|
||||
expected = np.nan
|
||||
npt.assert_equal(vals, expected)
|
||||
|
||||
|
||||
def cases_test_discrete_integer_shapes():
|
||||
# distributions parameters that are only allowed to be integral when
|
||||
# fitting, but are allowed to be real as input to PDF, etc.
|
||||
integrality_exceptions = {'nbinom': {'n'}, 'betanbinom': {'n'}}
|
||||
|
||||
seen = set()
|
||||
for distname, shapes in distdiscrete:
|
||||
if distname in seen:
|
||||
continue
|
||||
seen.add(distname)
|
||||
|
||||
try:
|
||||
dist = getattr(stats, distname)
|
||||
except TypeError:
|
||||
continue
|
||||
|
||||
shape_info = dist._shape_info()
|
||||
|
||||
for i, shape in enumerate(shape_info):
|
||||
if (shape.name in integrality_exceptions.get(distname, set()) or
|
||||
not shape.integrality):
|
||||
continue
|
||||
|
||||
yield distname, shape.name, shapes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapename, shapes',
|
||||
cases_test_discrete_integer_shapes())
|
||||
def test_integer_shapes(distname, shapename, shapes):
|
||||
dist = getattr(stats, distname)
|
||||
shape_info = dist._shape_info()
|
||||
shape_names = [shape.name for shape in shape_info]
|
||||
i = shape_names.index(shapename) # this element of params must be integral
|
||||
|
||||
shapes_copy = list(shapes)
|
||||
|
||||
valid_shape = shapes[i]
|
||||
invalid_shape = valid_shape - 0.5 # arbitrary non-integral value
|
||||
new_valid_shape = valid_shape - 1
|
||||
shapes_copy[i] = [[valid_shape], [invalid_shape], [new_valid_shape]]
|
||||
|
||||
a, b = dist.support(*shapes)
|
||||
x = np.round(np.linspace(a, b, 5))
|
||||
|
||||
pmf = dist.pmf(x, *shapes_copy)
|
||||
assert not np.any(np.isnan(pmf[0, :]))
|
||||
assert np.all(np.isnan(pmf[1, :]))
|
||||
assert not np.any(np.isnan(pmf[2, :]))
|
||||
|
||||
|
||||
@pytest.mark.parallel_threads(1)
|
||||
def test_frozen_attributes():
|
||||
# gh-14827 reported that all frozen distributions had both pmf and pdf
|
||||
# attributes; continuous should have pdf and discrete should have pmf.
|
||||
message = "'rv_discrete_frozen' object has no attribute"
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).pdf
|
||||
with pytest.raises(AttributeError, match=message):
|
||||
stats.binom(10, 0.5).logpdf
|
||||
stats.binom.pdf = "herring"
|
||||
frozen_binom = stats.binom(10, 0.5)
|
||||
assert isinstance(frozen_binom, rv_discrete_frozen)
|
||||
delattr(stats.binom, 'pdf')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname, shapes', distdiscrete)
|
||||
def test_interval(distname, shapes):
|
||||
# gh-11026 reported that `interval` returns incorrect values when
|
||||
# `confidence=1`. The values were not incorrect, but it was not intuitive
|
||||
# that the left end of the interval should extend beyond the support of the
|
||||
# distribution. Confirm that this is the behavior for all distributions.
|
||||
if isinstance(distname, str):
|
||||
dist = getattr(stats, distname)
|
||||
else:
|
||||
dist = distname
|
||||
a, b = dist.support(*shapes)
|
||||
npt.assert_equal(dist.ppf([0, 1], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.isf([1, 0], *shapes), (a-1, b))
|
||||
npt.assert_equal(dist.interval(1, *shapes), (a-1, b))
|
||||
|
||||
|
||||
@pytest.mark.xfail_on_32bit("Sensible to machine precision")
|
||||
def test_rv_sample():
|
||||
# Thoroughly test rv_sample and check that gh-3758 is resolved
|
||||
|
||||
# Generate a random discrete distribution
|
||||
rng = np.random.default_rng(98430143469)
|
||||
xk = np.sort(rng.random(10) * 10)
|
||||
pk = rng.random(10)
|
||||
pk /= np.sum(pk)
|
||||
dist = stats.rv_discrete(values=(xk, pk))
|
||||
|
||||
# Generate points to the left and right of xk
|
||||
xk_left = (np.array([0] + xk[:-1].tolist()) + xk)/2
|
||||
xk_right = (np.array(xk[1:].tolist() + [xk[-1]+1]) + xk)/2
|
||||
|
||||
# Generate points to the left and right of cdf
|
||||
cdf2 = np.cumsum(pk)
|
||||
cdf2_left = (np.array([0] + cdf2[:-1].tolist()) + cdf2)/2
|
||||
cdf2_right = (np.array(cdf2[1:].tolist() + [1]) + cdf2)/2
|
||||
|
||||
# support - leftmost and rightmost xk
|
||||
a, b = dist.support()
|
||||
assert_allclose(a, xk[0])
|
||||
assert_allclose(b, xk[-1])
|
||||
|
||||
# pmf - supported only on the xk
|
||||
assert_allclose(dist.pmf(xk), pk)
|
||||
assert_allclose(dist.pmf(xk_right), 0)
|
||||
assert_allclose(dist.pmf(xk_left), 0)
|
||||
|
||||
# logpmf is log of the pmf; log(0) = -np.inf
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logpmf(xk), np.log(pk))
|
||||
assert_allclose(dist.logpmf(xk_right), -np.inf)
|
||||
assert_allclose(dist.logpmf(xk_left), -np.inf)
|
||||
|
||||
# cdf - the cumulative sum of the pmf
|
||||
assert_allclose(dist.cdf(xk), cdf2)
|
||||
assert_allclose(dist.cdf(xk_right), cdf2)
|
||||
assert_allclose(dist.cdf(xk_left), [0]+cdf2[:-1].tolist())
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logcdf(xk), np.log(dist.cdf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_right), np.log(dist.cdf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logcdf(xk_left), np.log(dist.cdf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# sf is 1-cdf
|
||||
assert_allclose(dist.sf(xk), 1-dist.cdf(xk))
|
||||
assert_allclose(dist.sf(xk_right), 1-dist.cdf(xk_right))
|
||||
assert_allclose(dist.sf(xk_left), 1-dist.cdf(xk_left))
|
||||
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(dist.logsf(xk), np.log(dist.sf(xk)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_right), np.log(dist.sf(xk_right)),
|
||||
atol=1e-15)
|
||||
assert_allclose(dist.logsf(xk_left), np.log(dist.sf(xk_left)),
|
||||
atol=1e-15)
|
||||
|
||||
# ppf
|
||||
assert_allclose(dist.ppf(cdf2), xk)
|
||||
assert_allclose(dist.ppf(cdf2_left), xk)
|
||||
assert_allclose(dist.ppf(cdf2_right)[:-1], xk[1:])
|
||||
assert_allclose(dist.ppf(0), a - 1)
|
||||
assert_allclose(dist.ppf(1), b)
|
||||
|
||||
# isf
|
||||
sf2 = dist.sf(xk)
|
||||
assert_allclose(dist.isf(sf2), xk)
|
||||
assert_allclose(dist.isf(1-cdf2_left), dist.ppf(cdf2_left))
|
||||
assert_allclose(dist.isf(1-cdf2_right), dist.ppf(cdf2_right))
|
||||
assert_allclose(dist.isf(0), b)
|
||||
assert_allclose(dist.isf(1), a - 1)
|
||||
|
||||
# interval is (ppf(alpha/2), isf(alpha/2))
|
||||
ps = np.linspace(0.01, 0.99, 10)
|
||||
int2 = dist.ppf(ps/2), dist.isf(ps/2)
|
||||
assert_allclose(dist.interval(1-ps), int2)
|
||||
assert_allclose(dist.interval(0), dist.median())
|
||||
assert_allclose(dist.interval(1), (a-1, b))
|
||||
|
||||
# median is simply ppf(0.5)
|
||||
med2 = dist.ppf(0.5)
|
||||
assert_allclose(dist.median(), med2)
|
||||
|
||||
# all four stats (mean, var, skew, and kurtosis) from the definitions
|
||||
mean2 = np.sum(xk*pk)
|
||||
var2 = np.sum((xk - mean2)**2 * pk)
|
||||
skew2 = np.sum((xk - mean2)**3 * pk) / var2**(3/2)
|
||||
kurt2 = np.sum((xk - mean2)**4 * pk) / var2**2 - 3
|
||||
assert_allclose(dist.mean(), mean2)
|
||||
assert_allclose(dist.std(), np.sqrt(var2))
|
||||
assert_allclose(dist.var(), var2)
|
||||
assert_allclose(dist.stats(moments='mvsk'), (mean2, var2, skew2, kurt2))
|
||||
|
||||
# noncentral moment against definition
|
||||
mom3 = np.sum((xk**3) * pk)
|
||||
assert_allclose(dist.moment(3), mom3)
|
||||
|
||||
# expect - check against moments
|
||||
assert_allclose(dist.expect(lambda x: 1), 1)
|
||||
assert_allclose(dist.expect(), mean2)
|
||||
assert_allclose(dist.expect(lambda x: x**3), mom3)
|
||||
|
||||
# entropy is the negative of the expected value of log(p)
|
||||
with np.errstate(divide='ignore'):
|
||||
assert_allclose(-dist.expect(lambda x: dist.logpmf(x)), dist.entropy())
|
||||
|
||||
# RVS is just ppf of uniform random variates
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs = dist.rvs(size=100, random_state=rng)
|
||||
rng = np.random.default_rng(98430143469)
|
||||
rvs0 = dist.ppf(rng.random(size=100))
|
||||
assert_allclose(rvs, rvs0)
|
||||
|
||||
def test__pmf_float_input():
|
||||
# gh-21272
|
||||
# test that `rvs()` can be computed when `_pmf` requires float input
|
||||
|
||||
class rv_exponential(stats.rv_discrete):
|
||||
def _pmf(self, i):
|
||||
return (2/3)*3**(1 - i)
|
||||
|
||||
rv = rv_exponential(a=0.0, b=float('inf'))
|
||||
rvs = rv.rvs(random_state=42) # should not crash due to integer input to `_pmf`
|
||||
assert_allclose(rvs, 0)
|
||||
|
|
@ -1,700 +0,0 @@
|
|||
import pytest
|
||||
import itertools
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats import (betabinom, betanbinom, hypergeom, nhypergeom,
|
||||
bernoulli, boltzmann, skellam, zipf, zipfian, binom,
|
||||
nbinom, nchypergeom_fisher, nchypergeom_wallenius,
|
||||
randint, poisson_binom)
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (
|
||||
assert_almost_equal, assert_equal, assert_allclose, suppress_warnings
|
||||
)
|
||||
from scipy.special import binom as special_binom
|
||||
from scipy.optimize import root_scalar
|
||||
from scipy.integrate import quad
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression CDF[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(3, 10, 4, 5,
|
||||
0.9761904761904762, 1e-15),
|
||||
(107, 10000, 3000, 215,
|
||||
0.9999999997226765, 1e-15),
|
||||
(10, 10000, 3000, 215,
|
||||
2.681682217692179e-21, 5e-11)])
|
||||
def test_hypergeom_cdf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.cdf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
# The expected values were computed with Wolfram Alpha, using
|
||||
# the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k].
|
||||
@pytest.mark.parametrize('k, M, n, N, expected, rtol',
|
||||
[(25, 10000, 3000, 215,
|
||||
0.9999999999052958, 1e-15),
|
||||
(125, 10000, 3000, 215,
|
||||
1.4416781705752128e-18, 5e-11)])
|
||||
def test_hypergeom_sf(k, M, n, N, expected, rtol):
|
||||
p = hypergeom.sf(k, M, n, N)
|
||||
assert_allclose(p, expected, rtol=rtol)
|
||||
|
||||
|
||||
def test_hypergeom_logpmf():
|
||||
# symmetries test
|
||||
# f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
|
||||
k = 5
|
||||
N = 50
|
||||
K = 10
|
||||
n = 5
|
||||
logpmf1 = hypergeom.logpmf(k, N, K, n)
|
||||
logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
|
||||
logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
|
||||
logpmf4 = hypergeom.logpmf(k, N, n, K)
|
||||
assert_almost_equal(logpmf1, logpmf2, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf3, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf4, decimal=12)
|
||||
|
||||
# test related distribution
|
||||
# Bernoulli distribution if n = 1
|
||||
k = 1
|
||||
N = 10
|
||||
K = 7
|
||||
n = 1
|
||||
hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
|
||||
bernoulli_logpmf = bernoulli.logpmf(k, K/N)
|
||||
assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
|
||||
|
||||
|
||||
def test_nhypergeom_pmf():
|
||||
# test with hypergeom
|
||||
M, n, r = 45, 13, 8
|
||||
k = 6
|
||||
NHG = nhypergeom.pmf(k, M, n, r)
|
||||
HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
|
||||
assert_allclose(HG, NHG, rtol=1e-10)
|
||||
|
||||
|
||||
def test_nhypergeom_pmfcdf():
|
||||
# test pmf and cdf with arbitrary values.
|
||||
M = 8
|
||||
n = 3
|
||||
r = 4
|
||||
support = np.arange(n+1)
|
||||
pmf = nhypergeom.pmf(support, M, n, r)
|
||||
cdf = nhypergeom.cdf(support, M, n, r)
|
||||
assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13)
|
||||
assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_r0():
|
||||
# test with `r = 0`.
|
||||
M = 10
|
||||
n = 3
|
||||
r = 0
|
||||
pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r)
|
||||
assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13)
|
||||
|
||||
|
||||
def test_nhypergeom_rvs_shape():
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
def test_nhypergeom_accuracy():
|
||||
# Check that nhypergeom.rvs post-gh-13431 gives the same values as
|
||||
# inverse transform sampling
|
||||
rng = np.random.RandomState(0)
|
||||
x = nhypergeom.rvs(22, 7, 11, size=100, random_state=rng)
|
||||
rng = np.random.RandomState(0)
|
||||
p = rng.uniform(size=100)
|
||||
y = nhypergeom.ppf(p, 22, 7, 11)
|
||||
assert_equal(x, y)
|
||||
|
||||
|
||||
def test_boltzmann_upper_bound():
|
||||
k = np.arange(-3, 5)
|
||||
|
||||
N = 1
|
||||
p = boltzmann.pmf(k, 0.123, N)
|
||||
expected = k == 0
|
||||
assert_equal(p, expected)
|
||||
|
||||
lam = np.log(2)
|
||||
N = 3
|
||||
p = boltzmann.pmf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
|
||||
assert_allclose(p, expected, rtol=1e-13)
|
||||
|
||||
c = boltzmann.cdf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
|
||||
assert_allclose(c, expected, rtol=1e-13)
|
||||
|
||||
|
||||
def test_betabinom_a_and_b_unity():
|
||||
# test limiting case that betabinom(n, 1, 1) is a discrete uniform
|
||||
# distribution from 0 to n
|
||||
n = 20
|
||||
k = np.arange(n + 1)
|
||||
p = betabinom(n, 1, 1).pmf(k)
|
||||
expected = np.repeat(1 / (n + 1), n + 1)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtypes', itertools.product(*[(int, float)]*3))
|
||||
def test_betabinom_stats_a_and_b_integers_gh18026(dtypes):
|
||||
# gh-18026 reported that `betabinom` kurtosis calculation fails when some
|
||||
# parameters are integers. Check that this is resolved.
|
||||
n_type, a_type, b_type = dtypes
|
||||
n, a, b = n_type(10), a_type(2), b_type(3)
|
||||
assert_allclose(betabinom.stats(n, a, b, moments='k'), -0.6904761904761907)
|
||||
|
||||
|
||||
def test_betabinom_bernoulli():
|
||||
# test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b))
|
||||
a = 2.3
|
||||
b = 0.63
|
||||
k = np.arange(2)
|
||||
p = betabinom(1, a, b).pmf(k)
|
||||
expected = bernoulli(a / (a + b)).pmf(k)
|
||||
assert_almost_equal(p, expected)
|
||||
|
||||
|
||||
def test_issue_10317():
|
||||
alpha, n, p = 0.9, 10, 1
|
||||
assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_11134():
|
||||
alpha, n, p = 0.95, 10, 0
|
||||
assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0))
|
||||
|
||||
|
||||
def test_issue_7406():
|
||||
np.random.seed(0)
|
||||
assert_equal(binom.ppf(np.random.rand(10), 0, 0.5), 0)
|
||||
|
||||
# Also check that endpoints (q=0, q=1) are correct
|
||||
assert_equal(binom.ppf(0, 0, 0.5), -1)
|
||||
assert_equal(binom.ppf(1, 0, 0.5), 0)
|
||||
|
||||
|
||||
def test_issue_5122():
|
||||
p = 0
|
||||
n = np.random.randint(100, size=10)
|
||||
|
||||
x = 0
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, -1)
|
||||
|
||||
x = np.linspace(0.01, 0.99, 10)
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, 0)
|
||||
|
||||
x = 1
|
||||
ppf = binom.ppf(x, n, p)
|
||||
assert_equal(ppf, n)
|
||||
|
||||
|
||||
def test_issue_1603():
|
||||
assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0)
|
||||
|
||||
|
||||
def test_issue_5503():
|
||||
p = 0.5
|
||||
x = np.logspace(3, 14, 12)
|
||||
assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('x, n, p, cdf_desired', [
|
||||
(300, 1000, 3/10, 0.51559351981411995636),
|
||||
(3000, 10000, 3/10, 0.50493298381929698016),
|
||||
(30000, 100000, 3/10, 0.50156000591726422864),
|
||||
(300000, 1000000, 3/10, 0.50049331906666960038),
|
||||
(3000000, 10000000, 3/10, 0.50015600124585261196),
|
||||
(30000000, 100000000, 3/10, 0.50004933192735230102),
|
||||
(30010000, 100000000, 3/10, 0.98545384016570790717),
|
||||
(29990000, 100000000, 3/10, 0.01455017177985268670),
|
||||
(29950000, 100000000, 3/10, 5.02250963487432024943e-28),
|
||||
])
|
||||
def test_issue_5503pt2(x, n, p, cdf_desired):
|
||||
assert_allclose(binom.cdf(x, n, p), cdf_desired)
|
||||
|
||||
|
||||
def test_issue_5503pt3():
|
||||
# From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2]
|
||||
assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384)
|
||||
|
||||
|
||||
def test_issue_6682():
|
||||
# Reference value from R:
|
||||
# options(digits=16)
|
||||
# print(pnbinom(250, 50, 32/63, lower.tail=FALSE))
|
||||
assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35)
|
||||
|
||||
|
||||
def test_issue_19747():
|
||||
# test that negative k does not raise an error in nbinom.logcdf
|
||||
result = nbinom.logcdf([5, -1, 1], 5, 0.5)
|
||||
reference = [-0.47313352, -np.inf, -2.21297293]
|
||||
assert_allclose(result, reference)
|
||||
|
||||
|
||||
def test_boost_divide_by_zero_issue_15101():
|
||||
n = 1000
|
||||
p = 0.01
|
||||
k = 996
|
||||
assert_allclose(binom.pmf(k, n, p), 0.0)
|
||||
|
||||
|
||||
def test_skellam_gh11474():
|
||||
# test issue reported in gh-11474 caused by `cdfchn`
|
||||
mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
|
||||
cdf = skellam.cdf(0, mu, mu)
|
||||
# generated in R
|
||||
# library(skellam)
|
||||
# options(digits = 16)
|
||||
# mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
|
||||
# pskellam(0, mu, mu, TRUE)
|
||||
cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
|
||||
0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
|
||||
0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
|
||||
assert_allclose(cdf, cdf_expected)
|
||||
|
||||
|
||||
class TestZipfian:
|
||||
def test_zipfian_asymptotic(self):
|
||||
# test limiting case that zipfian(a, n) -> zipf(a) as n-> oo
|
||||
a = 6.5
|
||||
N = 10000000
|
||||
k = np.arange(1, 21)
|
||||
assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a))
|
||||
assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a))
|
||||
assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a))
|
||||
assert_allclose(zipfian.stats(a, N, moments='msvk'),
|
||||
zipf.stats(a, moments='msvk'))
|
||||
|
||||
def test_zipfian_continuity(self):
|
||||
# test that zipfian(0.999999, n) ~ zipfian(1.000001, n)
|
||||
# (a = 1 switches between methods of calculating harmonic sum)
|
||||
alt1, agt1 = 0.99999999, 1.00000001
|
||||
N = 30
|
||||
k = np.arange(1, N + 1)
|
||||
assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N),
|
||||
rtol=5e-7)
|
||||
assert_allclose(zipfian.stats(alt1, N, moments='msvk'),
|
||||
zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7)
|
||||
|
||||
def test_zipfian_R(self):
|
||||
# test against R VGAM package
|
||||
# library(VGAM)
|
||||
# k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7)
|
||||
# a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172,
|
||||
# 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154)
|
||||
# n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20)
|
||||
# pmf <- dzipf(k, N = n, shape = a)
|
||||
# cdf <- pzipf(k, N = n, shape = a)
|
||||
# print(pmf)
|
||||
# print(cdf)
|
||||
rng = np.random.RandomState(0)
|
||||
k = rng.randint(1, 20, size=10)
|
||||
a = rng.rand(10)*10 + 1
|
||||
n = rng.randint(1, 100, size=10)
|
||||
pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06,
|
||||
3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06,
|
||||
5.860662e-06, 1.053948e-04]
|
||||
cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584,
|
||||
0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498]
|
||||
# skip the first point; zipUC is not accurate for low a, n
|
||||
assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6)
|
||||
assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5)
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
naive_tests = np.vstack((np.logspace(-2, 1, 10),
|
||||
rng.randint(2, 40, 10))).T
|
||||
|
||||
@pytest.mark.parametrize("a, n", naive_tests)
|
||||
def test_zipfian_naive(self, a, n):
|
||||
# test against bare-bones implementation
|
||||
|
||||
@np.vectorize
|
||||
def Hns(n, s):
|
||||
"""Naive implementation of harmonic sum"""
|
||||
return (1/np.arange(1, n+1)**s).sum()
|
||||
|
||||
@np.vectorize
|
||||
def pzip(k, a, n):
|
||||
"""Naive implementation of zipfian pmf"""
|
||||
if k < 1 or k > n:
|
||||
return 0.
|
||||
else:
|
||||
return 1 / k**a / Hns(n, a)
|
||||
|
||||
k = np.arange(n+1)
|
||||
pmf = pzip(k, a, n)
|
||||
cdf = np.cumsum(pmf)
|
||||
mean = np.average(k, weights=pmf)
|
||||
var = np.average((k - mean)**2, weights=pmf)
|
||||
std = var**0.5
|
||||
skew = np.average(((k-mean)/std)**3, weights=pmf)
|
||||
kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3
|
||||
assert_allclose(zipfian.pmf(k, a, n), pmf)
|
||||
assert_allclose(zipfian.cdf(k, a, n), cdf)
|
||||
assert_allclose(zipfian.stats(a, n, moments="mvsk"),
|
||||
[mean, var, skew, kurtosis])
|
||||
|
||||
def test_pmf_integer_k(self):
|
||||
k = np.arange(0, 1000)
|
||||
k_int32 = k.astype(np.int32)
|
||||
dist = zipfian(111, 22)
|
||||
pmf = dist.pmf(k)
|
||||
pmf_k_int32 = dist.pmf(k_int32)
|
||||
assert_equal(pmf, pmf_k_int32)
|
||||
|
||||
|
||||
class TestNCH:
|
||||
np.random.seed(2) # seeds 0 and 1 had some xl = xu; randint failed
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = np.random.randint(1, max_m, size=shape) # red balls
|
||||
m2 = np.random.randint(1, max_m, size=shape) # white balls
|
||||
N = m1 + m2 # total balls
|
||||
n = randint.rvs(0, N, size=N.shape) # number of draws
|
||||
xl = np.maximum(0, n-m2) # lower bound of support
|
||||
xu = np.minimum(n, m1) # upper bound of support
|
||||
x = randint.rvs(xl, xu, size=xl.shape)
|
||||
odds = np.random.rand(*x.shape)*2
|
||||
|
||||
# test output is more readable when function names (strings) are passed
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_nch_hypergeom(self, dist_name):
|
||||
# Both noncentral hypergeometric distributions reduce to the
|
||||
# hypergeometric distribution when odds = 1
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x, N, m1, n = self.x, self.N, self.m1, self.n
|
||||
assert_allclose(dist.pmf(x, N, m1, n, odds=1),
|
||||
hypergeom.pmf(x, N, m1, n))
|
||||
|
||||
def test_nchypergeom_fisher_naive(self):
|
||||
# test against a very simple implementation
|
||||
x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds
|
||||
|
||||
@np.vectorize
|
||||
def pmf_mean_var(x, N, m1, n, w):
|
||||
# simple implementation of nchypergeom_fisher pmf
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
return t1 * t2 * w**x
|
||||
|
||||
def P(k):
|
||||
return sum(f(y)*y**k for y in range(xl, xu + 1))
|
||||
|
||||
P0 = P(0)
|
||||
P1 = P(1)
|
||||
P2 = P(2)
|
||||
pmf = f(x) / P0
|
||||
mean = P1 / P0
|
||||
var = P2 / P0 - (P1 / P0)**2
|
||||
return pmf, mean, var
|
||||
|
||||
pmf, mean, var = pmf_mean_var(x, N, m1, n, odds)
|
||||
assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'),
|
||||
mean)
|
||||
assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'),
|
||||
var)
|
||||
|
||||
def test_nchypergeom_wallenius_naive(self):
|
||||
# test against a very simple implementation
|
||||
|
||||
rng = np.random.RandomState(2)
|
||||
shape = (2, 4, 3)
|
||||
max_m = 100
|
||||
m1 = rng.randint(1, max_m, size=shape)
|
||||
m2 = rng.randint(1, max_m, size=shape)
|
||||
N = m1 + m2
|
||||
n = randint.rvs(0, N, size=N.shape, random_state=rng)
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
x = randint.rvs(xl, xu, size=xl.shape, random_state=rng)
|
||||
w = rng.rand(*x.shape)*2
|
||||
|
||||
def support(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl = np.maximum(0, n-m2)
|
||||
xu = np.minimum(n, m1)
|
||||
return xl, xu
|
||||
|
||||
@np.vectorize
|
||||
def mean(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def fun(u):
|
||||
return u/m1 + (1 - (n-u)/m2)**w - 1
|
||||
|
||||
return root_scalar(fun, bracket=(xl, xu)).root
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
|
||||
mean(N, m1, n, w), rtol=2e-2)
|
||||
|
||||
@np.vectorize
|
||||
def variance(N, m1, n, w):
|
||||
m2 = N - m1
|
||||
u = mean(N, m1, n, w)
|
||||
a = u * (m1 - u)
|
||||
b = (n-u)*(u + m2 - n)
|
||||
return N*a*b / ((N-1) * (m1*b + m2*a))
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
message="invalid value encountered in mean")
|
||||
assert_allclose(
|
||||
nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
|
||||
variance(N, m1, n, w),
|
||||
rtol=5e-2
|
||||
)
|
||||
|
||||
@np.vectorize
|
||||
def pmf(x, N, m1, n, w):
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
|
||||
def integrand(t):
|
||||
D = w*(m1 - x) + (m2 - (n-x))
|
||||
res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x)
|
||||
return res
|
||||
|
||||
def f(x):
|
||||
t1 = special_binom(m1, x)
|
||||
t2 = special_binom(m2, n - x)
|
||||
the_integral = quad(integrand, 0, 1,
|
||||
epsrel=1e-16, epsabs=1e-16)
|
||||
return t1 * t2 * the_integral[0]
|
||||
|
||||
return f(x)
|
||||
|
||||
pmf0 = pmf(x, N, m1, n, w)
|
||||
pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)
|
||||
|
||||
atol, rtol = 1e-6, 1e-6
|
||||
i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0)
|
||||
assert i.sum() > np.prod(shape) / 2 # works at least half the time
|
||||
|
||||
# for those that fail, discredit the naive implementation
|
||||
for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
|
||||
# get the support
|
||||
m2 = N - m1
|
||||
xl, xu = support(N, m1, n, w)
|
||||
x = np.arange(xl, xu + 1)
|
||||
|
||||
# calculate sum of pmf over the support
|
||||
# the naive implementation is very wrong in these cases
|
||||
assert pmf(x, N, m1, n, w).sum() < .5
|
||||
assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)
|
||||
|
||||
def test_wallenius_against_mpmath(self):
|
||||
# precompute data with mpmath since naive implementation above
|
||||
# is not reliable. See source code in gh-13330.
|
||||
M = 50
|
||||
n = 30
|
||||
N = 20
|
||||
odds = 2.25
|
||||
# Expected results, computed with mpmath.
|
||||
sup = np.arange(21)
|
||||
pmf = np.array([3.699003068656875e-20,
|
||||
5.89398584245431e-17,
|
||||
2.1594437742911123e-14,
|
||||
3.221458044649955e-12,
|
||||
2.4658279241205077e-10,
|
||||
1.0965862603981212e-08,
|
||||
3.057890479665704e-07,
|
||||
5.622818831643761e-06,
|
||||
7.056482841531681e-05,
|
||||
0.000618899425358671,
|
||||
0.003854172932571669,
|
||||
0.01720592676256026,
|
||||
0.05528844897093792,
|
||||
0.12772363313574242,
|
||||
0.21065898367825722,
|
||||
0.24465958845359234,
|
||||
0.1955114898110033,
|
||||
0.10355390084949237,
|
||||
0.03414490375225675,
|
||||
0.006231989845775931,
|
||||
0.0004715577304677075])
|
||||
mean = 14.808018384813426
|
||||
var = 2.6085975877923717
|
||||
|
||||
# nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
|
||||
# has only three digits of accuracy (~ 2.1511e-14).
|
||||
assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf,
|
||||
rtol=1e-13, atol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
|
||||
mean, rtol=1e-13)
|
||||
assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
|
||||
var, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('dist_name',
|
||||
['nchypergeom_fisher', 'nchypergeom_wallenius'])
|
||||
def test_rvs_shape(self, dist_name):
|
||||
# Check that when given a size with more dimensions than the
|
||||
# dimensions of the broadcast parameters, rvs returns an array
|
||||
# with the correct shape.
|
||||
dists = {'nchypergeom_fisher': nchypergeom_fisher,
|
||||
'nchypergeom_wallenius': nchypergeom_wallenius}
|
||||
dist = dists[dist_name]
|
||||
x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3))
|
||||
assert x.shape == (5, 1, 2, 3)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mu, q, expected",
|
||||
[[10, 120, -1.240089881791596e-38],
|
||||
[1500, 0, -86.61466680572661]])
|
||||
def test_nbinom_11465(mu, q, expected):
|
||||
# test nbinom.logcdf at extreme tails
|
||||
size = 20
|
||||
n, p = size, size/(size+mu)
|
||||
# In R:
|
||||
# options(digits=16)
|
||||
# pnbinom(mu=10, size=20, q=120, log.p=TRUE)
|
||||
assert_allclose(nbinom.logcdf(q, n, p), expected)
|
||||
|
||||
|
||||
def test_gh_17146():
|
||||
# Check that discrete distributions return PMF of zero at non-integral x.
|
||||
# See gh-17146.
|
||||
x = np.linspace(0, 1, 11)
|
||||
p = 0.8
|
||||
pmf = bernoulli(p).pmf(x)
|
||||
i = (x % 1 == 0)
|
||||
assert_allclose(pmf[-1], p)
|
||||
assert_allclose(pmf[0], 1-p)
|
||||
assert_equal(pmf[~i], 0)
|
||||
|
||||
|
||||
class TestBetaNBinom:
|
||||
@pytest.mark.parametrize('x, n, a, b, ref',
|
||||
[[5, 5e6, 5, 20, 1.1520944824139114e-107],
|
||||
[100, 50, 5, 20, 0.002855762954310226],
|
||||
[10000, 1000, 5, 20, 1.9648515726019154e-05]])
|
||||
def test_betanbinom_pmf(self, x, n, a, b, ref):
|
||||
# test that PMF stays accurate in the distribution tails
|
||||
# reference values computed with mpmath
|
||||
# from mpmath import mp
|
||||
# mp.dps = 500
|
||||
# def betanbinom_pmf(k, n, a, b):
|
||||
# k = mp.mpf(k)
|
||||
# a = mp.mpf(a)
|
||||
# b = mp.mpf(b)
|
||||
# n = mp.mpf(n)
|
||||
# return float(mp.binomial(n + k - mp.one, k)
|
||||
# * mp.beta(a + n, b + k) / mp.beta(a, b))
|
||||
assert_allclose(betanbinom.pmf(x, n, a, b), ref, rtol=1e-10)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('n, a, b, ref',
|
||||
[[10000, 5000, 50, 0.12841520515722202],
|
||||
[10, 9, 9, 7.9224400871459695],
|
||||
[100, 1000, 10, 1.5849602176622748]])
|
||||
def test_betanbinom_kurtosis(self, n, a, b, ref):
|
||||
# reference values were computed via mpmath
|
||||
# from mpmath import mp
|
||||
# def kurtosis_betanegbinom(n, a, b):
|
||||
# n = mp.mpf(n)
|
||||
# a = mp.mpf(a)
|
||||
# b = mp.mpf(b)
|
||||
# four = mp.mpf(4.)
|
||||
# mean = n * b / (a - mp.one)
|
||||
# var = (n * b * (n + a - 1.) * (a + b - 1.)
|
||||
# / ((a - 2.) * (a - 1.)**2.))
|
||||
# def f(k):
|
||||
# return (mp.binomial(n + k - mp.one, k)
|
||||
# * mp.beta(a + n, b + k) / mp.beta(a, b)
|
||||
# * (k - mean)**four)
|
||||
# fourth_moment = mp.nsum(f, [0, mp.inf])
|
||||
# return float(fourth_moment/var**2 - 3.)
|
||||
assert_allclose(betanbinom.stats(n, a, b, moments="k"),
|
||||
ref, rtol=3e-15)
|
||||
|
||||
|
||||
class TestZipf:
|
||||
def test_gh20692(self):
|
||||
# test that int32 data for k generates same output as double
|
||||
k = np.arange(0, 1000)
|
||||
k_int32 = k.astype(np.int32)
|
||||
dist = zipf(9)
|
||||
pmf = dist.pmf(k)
|
||||
pmf_k_int32 = dist.pmf(k_int32)
|
||||
assert_equal(pmf, pmf_k_int32)
|
||||
|
||||
|
||||
def test_gh20048():
|
||||
# gh-20048 reported an infinite loop in _drv2_ppfsingle
|
||||
# check that the one identified is resolved
|
||||
class test_dist_gen(stats.rv_discrete):
|
||||
def _cdf(self, k):
|
||||
return min(k / 100, 0.99)
|
||||
|
||||
test_dist = test_dist_gen(b=np.inf)
|
||||
|
||||
message = "Arguments that bracket..."
|
||||
with pytest.raises(RuntimeError, match=message):
|
||||
test_dist.ppf(0.999)
|
||||
|
||||
|
||||
class TestPoissonBinomial:
|
||||
def test_pmf(self):
|
||||
# Test pmf against R `poisbinom` to confirm that this is indeed the Poisson
|
||||
# binomial distribution. Consistency of other methods and all other behavior
|
||||
# should be covered by generic tests. (If not, please add a generic test.)
|
||||
# Like many other distributions, no special attempt is made to be more
|
||||
# accurate than the usual formulas provide, so we use default tolerances.
|
||||
#
|
||||
# library(poisbinom)
|
||||
# options(digits=16)
|
||||
# k = c(0, 1, 2, 3, 4)
|
||||
# p = c(0.9480654803913988, 0.052428488100509374,
|
||||
# 0.25863527358887417, 0.057764076043633206)
|
||||
# dpoisbinom(k, p)
|
||||
rng = np.random.default_rng(259823598254)
|
||||
n = rng.integers(10) # 4
|
||||
k = np.arange(n + 1)
|
||||
p = rng.random(n) # [0.9480654803913988, 0.052428488100509374,
|
||||
# 0.25863527358887417, 0.057764076043633206]
|
||||
res = poisson_binom.pmf(k, p)
|
||||
ref = [0.0343763443678060318, 0.6435428452689714307, 0.2936345519235536994,
|
||||
0.0277036647503902354, 0.0007425936892786034]
|
||||
assert_allclose(res, ref)
|
||||
|
||||
|
||||
class TestRandInt:
|
||||
def test_gh19759(self):
|
||||
# test zero PMF values within the support reported by gh-19759
|
||||
a = -354
|
||||
max_range = abs(a)
|
||||
all_b_1 = [a + 2 ** 31 + i for i in range(max_range)]
|
||||
res = randint.pmf(325, a, all_b_1)
|
||||
assert (res > 0).all()
|
||||
ref = 1 / (np.asarray(all_b_1, dtype=np.float64) - a)
|
||||
assert_allclose(res, ref)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,322 +0,0 @@
|
|||
import math
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import numpy as np
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats import norm, expon # type: ignore[attr-defined]
|
||||
from scipy._lib._array_api_no_0d import (xp_assert_close, xp_assert_equal,
|
||||
xp_assert_less)
|
||||
|
||||
skip_xp_backends = pytest.mark.skip_xp_backends
|
||||
|
||||
@pytest.mark.skip_xp_backends("dask.array", reason="boolean index assignment")
|
||||
class TestEntropy:
|
||||
def test_entropy_positive(self, xp):
|
||||
# See ticket #497
|
||||
pk = xp.asarray([0.5, 0.2, 0.3])
|
||||
qk = xp.asarray([0.1, 0.25, 0.65])
|
||||
eself = stats.entropy(pk, pk)
|
||||
edouble = stats.entropy(pk, qk)
|
||||
xp_assert_equal(eself, xp.asarray(0.))
|
||||
xp_assert_less(-edouble, xp.asarray(0.))
|
||||
|
||||
def test_entropy_base(self, xp):
|
||||
pk = xp.ones(16)
|
||||
S = stats.entropy(pk, base=2.)
|
||||
xp_assert_less(xp.abs(S - 4.), xp.asarray(1.e-5))
|
||||
|
||||
qk = xp.ones(16)
|
||||
qk = xp.where(xp.arange(16) < 8, 2., qk)
|
||||
S = stats.entropy(pk, qk)
|
||||
S2 = stats.entropy(pk, qk, base=2.)
|
||||
xp_assert_less(xp.abs(S/S2 - math.log(2.)), xp.asarray(1.e-5))
|
||||
|
||||
def test_entropy_zero(self, xp):
|
||||
# Test for PR-479
|
||||
x = xp.asarray([0., 1., 2.])
|
||||
xp_assert_close(stats.entropy(x),
|
||||
xp.asarray(0.63651416829481278))
|
||||
|
||||
def test_entropy_2d(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk),
|
||||
xp.asarray([0.1933259, 0.18609809]))
|
||||
|
||||
def test_entropy_2d_zero(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk),
|
||||
xp.asarray([xp.inf, 0.18609809]))
|
||||
|
||||
pk = xp.asarray([[0.0, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk, qk),
|
||||
xp.asarray([0.17403988, 0.18609809]))
|
||||
|
||||
def test_entropy_base_2d_nondefault_axis(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk, axis=1),
|
||||
xp.asarray([0.63651417, 0.63651417, 0.66156324]))
|
||||
|
||||
def test_entropy_2d_nondefault_axis(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk, axis=1),
|
||||
xp.asarray([0.23104906, 0.23104906, 0.12770641]))
|
||||
|
||||
def test_entropy_raises_value_error(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.1, 0.2], [0.6, 0.3]])
|
||||
message = "Array shapes are incompatible for broadcasting."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(pk, qk)
|
||||
|
||||
def test_base_entropy_with_axis_0_is_equal_to_default(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk, axis=0),
|
||||
stats.entropy(pk))
|
||||
|
||||
def test_entropy_with_axis_0_is_equal_to_default(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk, qk, axis=0),
|
||||
stats.entropy(pk, qk))
|
||||
|
||||
def test_base_entropy_transposed(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
xp_assert_close(stats.entropy(pk.T),
|
||||
stats.entropy(pk, axis=1))
|
||||
|
||||
def test_entropy_transposed(self, xp):
|
||||
pk = xp.asarray([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
|
||||
qk = xp.asarray([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
|
||||
xp_assert_close(stats.entropy(pk.T, qk.T),
|
||||
stats.entropy(pk, qk, axis=1))
|
||||
|
||||
def test_entropy_broadcasting(self, xp):
|
||||
rng = np.random.default_rng(74187315492831452)
|
||||
x = xp.asarray(rng.random(3))
|
||||
y = xp.asarray(rng.random((2, 1)))
|
||||
res = stats.entropy(x, y, axis=-1)
|
||||
xp_assert_equal(res[0], stats.entropy(x, y[0, ...]))
|
||||
xp_assert_equal(res[1], stats.entropy(x, y[1, ...]))
|
||||
|
||||
def test_entropy_shape_mismatch(self, xp):
|
||||
x = xp.ones((10, 1, 12))
|
||||
y = xp.ones((11, 2))
|
||||
message = "Array shapes are incompatible for broadcasting."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, y)
|
||||
|
||||
def test_input_validation(self, xp):
|
||||
x = xp.ones(10)
|
||||
message = "`base` must be a positive number."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.entropy(x, base=-2)
|
||||
|
||||
|
||||
@pytest.mark.skip_xp_backends("dask.array", reason="boolean index assignment")
|
||||
class TestDifferentialEntropy:
|
||||
"""
|
||||
Vasicek results are compared with the R package vsgoftest.
|
||||
|
||||
# library(vsgoftest)
|
||||
#
|
||||
# samp <- c(<values>)
|
||||
# entropy.estimate(x = samp, window = <window_length>)
|
||||
|
||||
"""
|
||||
|
||||
def test_differential_entropy_vasicek(self, xp):
|
||||
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal(100)
|
||||
values = xp.asarray(values.tolist())
|
||||
|
||||
entropy = stats.differential_entropy(values, method='vasicek')
|
||||
xp_assert_close(entropy, xp.asarray(1.342551187000946))
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=1,
|
||||
method='vasicek')
|
||||
xp_assert_close(entropy, xp.asarray(1.122044177725947))
|
||||
|
||||
entropy = stats.differential_entropy(values, window_length=8,
|
||||
method='vasicek')
|
||||
xp_assert_close(entropy, xp.asarray(1.349401487550325))
|
||||
|
||||
def test_differential_entropy_vasicek_2d_nondefault_axis(self, xp):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
values = xp.asarray(values.tolist())
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, method='vasicek')
|
||||
ref = xp.asarray([1.342551187000946, 1.341825903922332, 1.293774601883585])
|
||||
xp_assert_close(entropy, ref)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=1,
|
||||
method='vasicek')
|
||||
ref = xp.asarray([1.122044177725947, 1.10294413850758, 1.129615790292772])
|
||||
xp_assert_close(entropy, ref)
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=1, window_length=8,
|
||||
method='vasicek')
|
||||
ref = xp.asarray([1.349401487550325, 1.338514126301301, 1.292331889365405])
|
||||
xp_assert_close(entropy, ref)
|
||||
|
||||
|
||||
def test_differential_entropy_raises_value_error(self, xp):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
values = xp.asarray(values.tolist())
|
||||
|
||||
error_str = (
|
||||
r"Window length \({window_length}\) must be positive and less "
|
||||
r"than half the sample size \({sample_size}\)."
|
||||
)
|
||||
|
||||
sample_size = values.shape[1]
|
||||
|
||||
for window_length in {-1, 0, sample_size//2, sample_size}:
|
||||
|
||||
formatted_error_str = error_str.format(
|
||||
window_length=window_length,
|
||||
sample_size=sample_size,
|
||||
)
|
||||
|
||||
with assert_raises(ValueError, match=formatted_error_str):
|
||||
stats.differential_entropy(
|
||||
values,
|
||||
window_length=window_length,
|
||||
axis=1,
|
||||
)
|
||||
|
||||
def test_base_differential_entropy_with_axis_0_is_equal_to_default(self, xp):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((100, 3))
|
||||
values = xp.asarray(values.tolist())
|
||||
|
||||
entropy = stats.differential_entropy(values, axis=0)
|
||||
default_entropy = stats.differential_entropy(values)
|
||||
xp_assert_close(entropy, default_entropy)
|
||||
|
||||
def test_base_differential_entropy_transposed(self, xp):
|
||||
random_state = np.random.RandomState(0)
|
||||
values = random_state.standard_normal((3, 100))
|
||||
values = xp.asarray(values.tolist())
|
||||
|
||||
xp_assert_close(
|
||||
stats.differential_entropy(values.T),
|
||||
stats.differential_entropy(values, axis=1),
|
||||
)
|
||||
|
||||
def test_input_validation(self, xp):
|
||||
x = np.random.rand(10)
|
||||
x = xp.asarray(x.tolist())
|
||||
|
||||
message = "`base` must be a positive number or `None`."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, base=-2)
|
||||
|
||||
message = "`method` must be one of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.differential_entropy(x, method='ekki-ekki')
|
||||
|
||||
@pytest.mark.parametrize('method', [
|
||||
'vasicek',
|
||||
'van es',
|
||||
'ebrahimi',
|
||||
pytest.param(
|
||||
'correa',
|
||||
marks=skip_xp_backends("array_api_strict",
|
||||
reason="Needs fancy indexing.")
|
||||
)
|
||||
])
|
||||
def test_consistency(self, method, xp):
|
||||
# test that method is a consistent estimator
|
||||
n = 10000 if method == 'correa' else 1000000
|
||||
rvs = stats.norm.rvs(size=n, random_state=0)
|
||||
rvs = xp.asarray(rvs.tolist())
|
||||
expected = xp.asarray(float(stats.norm.entropy()))
|
||||
res = stats.differential_entropy(rvs, method=method)
|
||||
xp_assert_close(res, expected, rtol=0.005)
|
||||
|
||||
# values from differential_entropy reference [6], table 1, n=50, m=7
|
||||
norm_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.198, 0.109),
|
||||
'van es': (0.212, 0.110),
|
||||
'correa': (0.135, 0.112),
|
||||
'ebrahimi': (0.128, 0.109)
|
||||
}
|
||||
|
||||
# values from differential_entropy reference [6], table 2, n=50, m=7
|
||||
expon_rmse_std_cases = { # method: (RMSE, STD)
|
||||
'vasicek': (0.194, 0.148),
|
||||
'van es': (0.179, 0.149),
|
||||
'correa': (0.155, 0.152),
|
||||
'ebrahimi': (0.151, 0.148)
|
||||
}
|
||||
|
||||
rmse_std_cases = {norm: norm_rmse_std_cases,
|
||||
expon: expon_rmse_std_cases}
|
||||
|
||||
@pytest.mark.parametrize('method', [
|
||||
'vasicek',
|
||||
'van es',
|
||||
'ebrahimi',
|
||||
pytest.param(
|
||||
'correa',
|
||||
marks=skip_xp_backends("array_api_strict",
|
||||
reason="Needs fancy indexing.")
|
||||
)
|
||||
])
|
||||
@pytest.mark.parametrize('dist', [norm, expon])
|
||||
def test_rmse_std(self, method, dist, xp):
|
||||
# test that RMSE and standard deviation of estimators matches values
|
||||
# given in differential_entropy reference [6]. Incidentally, also
|
||||
# tests vectorization.
|
||||
reps, n, m = 10000, 50, 7
|
||||
expected = self.rmse_std_cases[dist][method]
|
||||
rmse_expected, std_expected = xp.asarray(expected[0]), xp.asarray(expected[1])
|
||||
rvs = dist.rvs(size=(reps, n), random_state=0)
|
||||
rvs = xp.asarray(rvs.tolist())
|
||||
true_entropy = xp.asarray(float(dist.entropy()))
|
||||
res = stats.differential_entropy(rvs, window_length=m,
|
||||
method=method, axis=-1)
|
||||
xp_assert_close(xp.sqrt(xp.mean((res - true_entropy)**2)),
|
||||
rmse_expected, atol=0.005)
|
||||
xp_assert_close(xp.std(res, correction=0), std_expected, atol=0.002)
|
||||
|
||||
@pytest.mark.parametrize('n, method', [
|
||||
(8, 'van es'),
|
||||
(12, 'ebrahimi'),
|
||||
(1001, 'vasicek')
|
||||
])
|
||||
def test_method_auto(self, n, method, xp):
|
||||
rvs = stats.norm.rvs(size=(n,), random_state=0)
|
||||
rvs = xp.asarray(rvs.tolist())
|
||||
res1 = stats.differential_entropy(rvs)
|
||||
res2 = stats.differential_entropy(rvs, method=method)
|
||||
xp_assert_equal(res1, res2)
|
||||
|
||||
@pytest.mark.parametrize('method', [
|
||||
"vasicek",
|
||||
"van es",
|
||||
pytest.param(
|
||||
"correa",
|
||||
marks=skip_xp_backends("array_api_strict", reason="Needs fancy indexing.")
|
||||
),
|
||||
"ebrahimi"
|
||||
])
|
||||
@pytest.mark.parametrize('dtype', [None, 'float32', 'float64'])
|
||||
def test_dtypes_gh21192(self, xp, method, dtype):
|
||||
# gh-21192 noted a change in the output of method='ebrahimi'
|
||||
# with integer input. Check that the output is consistent regardless
|
||||
# of input dtype.
|
||||
x = [1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 8, 9, 10, 11]
|
||||
dtype_in = getattr(xp, str(dtype), None)
|
||||
dtype_out = getattr(xp, str(dtype), xp.asarray(1.).dtype)
|
||||
res = stats.differential_entropy(xp.asarray(x, dtype=dtype_in), method=method)
|
||||
ref = stats.differential_entropy(xp.asarray(x, dtype=xp.float64), method=method)
|
||||
xp_assert_close(res, xp.asarray(ref, dtype=dtype_out)[()])
|
||||
|
|
@ -1,435 +0,0 @@
|
|||
import pytest
|
||||
import warnings
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_array_equal, assert_allclose,
|
||||
suppress_warnings)
|
||||
from copy import deepcopy
|
||||
from scipy.stats.sampling import FastGeneratorInversion
|
||||
from scipy import stats
|
||||
from scipy._lib._testutils import IS_MUSL
|
||||
|
||||
|
||||
def test_bad_args():
|
||||
# loc and scale must be scalar
|
||||
with pytest.raises(ValueError, match="loc must be scalar"):
|
||||
FastGeneratorInversion(stats.norm(loc=(1.2, 1.3)))
|
||||
with pytest.raises(ValueError, match="scale must be scalar"):
|
||||
FastGeneratorInversion(stats.norm(scale=[1.5, 5.7]))
|
||||
|
||||
with pytest.raises(ValueError, match="'test' cannot be used to seed"):
|
||||
FastGeneratorInversion(stats.norm(), random_state="test")
|
||||
|
||||
msg = "Each of the 1 shape parameters must be a scalar"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
FastGeneratorInversion(stats.gamma([1.3, 2.5]))
|
||||
|
||||
with pytest.raises(ValueError, match="`dist` must be a frozen"):
|
||||
FastGeneratorInversion("xy")
|
||||
|
||||
with pytest.raises(ValueError, match="Distribution 'truncnorm' is not"):
|
||||
FastGeneratorInversion(stats.truncnorm(1.3, 4.5))
|
||||
|
||||
|
||||
def test_random_state():
|
||||
# fixed seed
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=68734509)
|
||||
x1 = gen.rvs(size=10)
|
||||
gen.random_state = 68734509
|
||||
x2 = gen.rvs(size=10)
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
# Generator
|
||||
urng = np.random.default_rng(20375857)
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=urng)
|
||||
x1 = gen.rvs(size=10)
|
||||
gen.random_state = np.random.default_rng(20375857)
|
||||
x2 = gen.rvs(size=10)
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
# RandomState
|
||||
urng = np.random.RandomState(2364)
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=urng)
|
||||
x1 = gen.rvs(size=10)
|
||||
gen.random_state = np.random.RandomState(2364)
|
||||
x2 = gen.rvs(size=10)
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
# if evaluate_error is called, it must not interfere with the random_state
|
||||
# used by rvs
|
||||
gen = FastGeneratorInversion(stats.norm(), random_state=68734509)
|
||||
x1 = gen.rvs(size=10)
|
||||
_ = gen.evaluate_error(size=5) # this will generate 5 uniform rvs
|
||||
x2 = gen.rvs(size=10)
|
||||
gen.random_state = 68734509
|
||||
x3 = gen.rvs(size=20)
|
||||
assert_array_equal(x2, x3[10:])
|
||||
|
||||
|
||||
dists_with_params = [
|
||||
("alpha", (3.5,)),
|
||||
("anglit", ()),
|
||||
("argus", (3.5,)),
|
||||
("argus", (5.1,)),
|
||||
("beta", (1.5, 0.9)),
|
||||
("cosine", ()),
|
||||
("betaprime", (2.5, 3.3)),
|
||||
("bradford", (1.2,)),
|
||||
("burr", (1.3, 2.4)),
|
||||
("burr12", (0.7, 1.2)),
|
||||
("cauchy", ()),
|
||||
("chi2", (3.5,)),
|
||||
("chi", (4.5,)),
|
||||
("crystalball", (0.7, 1.2)),
|
||||
("expon", ()),
|
||||
("gamma", (1.5,)),
|
||||
("gennorm", (2.7,)),
|
||||
("gumbel_l", ()),
|
||||
("gumbel_r", ()),
|
||||
("hypsecant", ()),
|
||||
("invgauss", (3.1,)),
|
||||
("invweibull", (1.5,)),
|
||||
("laplace", ()),
|
||||
("logistic", ()),
|
||||
("maxwell", ()),
|
||||
("moyal", ()),
|
||||
("norm", ()),
|
||||
("pareto", (1.3,)),
|
||||
("powerlaw", (7.6,)),
|
||||
("rayleigh", ()),
|
||||
("semicircular", ()),
|
||||
("t", (5.7,)),
|
||||
("wald", ()),
|
||||
("weibull_max", (2.4,)),
|
||||
("weibull_min", (1.2,)),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args"), dists_with_params)
|
||||
def test_rvs_and_ppf(distname, args):
|
||||
# check sample against rvs generated by rv_continuous
|
||||
urng = np.random.default_rng(9807324628097097)
|
||||
rng1 = getattr(stats, distname)(*args)
|
||||
rvs1 = rng1.rvs(size=500, random_state=urng)
|
||||
rng2 = FastGeneratorInversion(rng1, random_state=urng)
|
||||
rvs2 = rng2.rvs(size=500)
|
||||
assert stats.cramervonmises_2samp(rvs1, rvs2).pvalue > 0.01
|
||||
|
||||
# check ppf
|
||||
q = [0.001, 0.1, 0.5, 0.9, 0.999]
|
||||
assert_allclose(rng1.ppf(q), rng2.ppf(q), atol=1e-10)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args"), dists_with_params)
|
||||
def test_u_error(distname, args):
|
||||
# check sample against rvs generated by rv_continuous
|
||||
dist = getattr(stats, distname)(*args)
|
||||
with suppress_warnings() as sup:
|
||||
# filter the warnings thrown by UNU.RAN
|
||||
sup.filter(RuntimeWarning)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
u_error, x_error = rng.evaluate_error(
|
||||
size=10_000, random_state=9807324628097097, x_error=False
|
||||
)
|
||||
assert u_error <= 1e-10
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.xfail(reason="geninvgauss CDF is not accurate")
|
||||
def test_geninvgauss_uerror():
|
||||
dist = stats.geninvgauss(3.2, 1.5)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
err = rng.evaluate_error(size=10_000, random_state=67982)
|
||||
assert err[0] < 1e-10
|
||||
|
||||
|
||||
# TODO: add more distributions
|
||||
@pytest.mark.skipif(IS_MUSL, reason="Hits RecursionError, see gh-23172")
|
||||
@pytest.mark.fail_slow(5)
|
||||
@pytest.mark.parametrize(("distname, args"), [("beta", (0.11, 0.11))])
|
||||
def test_error_extreme_params(distname, args):
|
||||
# take extreme parameters where u-error might not be below the tolerance
|
||||
# due to limitations of floating point arithmetic
|
||||
with suppress_warnings() as sup:
|
||||
# filter the warnings thrown by UNU.RAN for such extreme parameters
|
||||
sup.filter(RuntimeWarning)
|
||||
dist = getattr(stats, distname)(*args)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
u_error, x_error = rng.evaluate_error(
|
||||
size=10_000, random_state=980732462809709732623, x_error=True
|
||||
)
|
||||
if u_error >= 2.5 * 1e-10:
|
||||
assert x_error < 1e-9
|
||||
|
||||
|
||||
def test_evaluate_error_inputs():
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
with pytest.raises(ValueError, match="size must be an integer"):
|
||||
gen.evaluate_error(size=3.5)
|
||||
with pytest.raises(ValueError, match="size must be an integer"):
|
||||
gen.evaluate_error(size=(3, 3))
|
||||
|
||||
|
||||
def test_rvs_ppf_loc_scale():
|
||||
loc, scale = 3.5, 2.3
|
||||
dist = stats.norm(loc=loc, scale=scale)
|
||||
rng = FastGeneratorInversion(dist, random_state=1234)
|
||||
r = rng.rvs(size=1000)
|
||||
r_rescaled = (r - loc) / scale
|
||||
assert stats.cramervonmises(r_rescaled, "norm").pvalue > 0.01
|
||||
q = [0.001, 0.1, 0.5, 0.9, 0.999]
|
||||
assert_allclose(rng._ppf(q), rng.ppf(q), atol=1e-10)
|
||||
|
||||
|
||||
def test_domain():
|
||||
# only a basic check that the domain argument is passed to the
|
||||
# UNU.RAN generators
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=(-1, 1))
|
||||
r = rng.rvs(size=100)
|
||||
assert -1 <= r.min() < r.max() <= 1
|
||||
|
||||
# if loc and scale are used, new domain is loc + scale*domain
|
||||
loc, scale = 3.5, 1.3
|
||||
dist = stats.norm(loc=loc, scale=scale)
|
||||
rng = FastGeneratorInversion(dist, domain=(-1.5, 2))
|
||||
r = rng.rvs(size=100)
|
||||
lb, ub = loc - scale * 1.5, loc + scale * 2
|
||||
assert lb <= r.min() < r.max() <= ub
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args, expected"),
|
||||
[("beta", (3.5, 2.5), (0, 1)),
|
||||
("norm", (), (-np.inf, np.inf))])
|
||||
def test_support(distname, args, expected):
|
||||
# test that the support is updated if truncation and loc/scale are applied
|
||||
# use beta distribution since it is a transformed betaprime distribution,
|
||||
# so it is important that the correct support is considered
|
||||
# (i.e., the support of beta is (0,1), while betaprime is (0, inf))
|
||||
dist = getattr(stats, distname)(*args)
|
||||
rng = FastGeneratorInversion(dist)
|
||||
assert_array_equal(rng.support(), expected)
|
||||
rng.loc = 1
|
||||
rng.scale = 2
|
||||
assert_array_equal(rng.support(), 1 + 2*np.array(expected))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("distname, args"),
|
||||
[("beta", (3.5, 2.5)), ("norm", ())])
|
||||
def test_support_truncation(distname, args):
|
||||
# similar test for truncation
|
||||
dist = getattr(stats, distname)(*args)
|
||||
rng = FastGeneratorInversion(dist, domain=(0.5, 0.7))
|
||||
assert_array_equal(rng.support(), (0.5, 0.7))
|
||||
rng.loc = 1
|
||||
rng.scale = 2
|
||||
assert_array_equal(rng.support(), (1 + 2 * 0.5, 1 + 2 * 0.7))
|
||||
|
||||
|
||||
def test_domain_shift_truncation():
|
||||
# center of norm is zero, it should be shifted to the left endpoint of
|
||||
# domain. if this was not the case, PINV in UNURAN would raise a warning
|
||||
# as the center is not inside the domain
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=(1, 2))
|
||||
r = rng.rvs(size=100)
|
||||
assert 1 <= r.min() < r.max() <= 2
|
||||
|
||||
|
||||
def test_non_rvs_methods_with_domain():
|
||||
# as a first step, compare truncated normal against stats.truncnorm
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=(2.3, 3.2))
|
||||
trunc_norm = stats.truncnorm(2.3, 3.2)
|
||||
# take values that are inside and outside the domain
|
||||
x = (2.0, 2.4, 3.0, 3.4)
|
||||
p = (0.01, 0.5, 0.99)
|
||||
assert_allclose(rng._cdf(x), trunc_norm.cdf(x))
|
||||
assert_allclose(rng._ppf(p), trunc_norm.ppf(p))
|
||||
loc, scale = 2, 3
|
||||
rng.loc = 2
|
||||
rng.scale = 3
|
||||
trunc_norm = stats.truncnorm(2.3, 3.2, loc=loc, scale=scale)
|
||||
x = np.array(x) * scale + loc
|
||||
assert_allclose(rng._cdf(x), trunc_norm.cdf(x))
|
||||
assert_allclose(rng._ppf(p), trunc_norm.ppf(p))
|
||||
|
||||
# do another sanity check with beta distribution
|
||||
# in that case, it is important to use the correct domain since beta
|
||||
# is a transformation of betaprime which has a different support
|
||||
rng = FastGeneratorInversion(stats.beta(2.5, 3.5), domain=(0.3, 0.7))
|
||||
rng.loc = 2
|
||||
rng.scale = 2.5
|
||||
# the support is 2.75, , 3.75 (2 + 2.5 * 0.3, 2 + 2.5 * 0.7)
|
||||
assert_array_equal(rng.support(), (2.75, 3.75))
|
||||
x = np.array([2.74, 2.76, 3.74, 3.76])
|
||||
# the cdf needs to be zero outside of the domain
|
||||
y_cdf = rng._cdf(x)
|
||||
assert_array_equal((y_cdf[0], y_cdf[3]), (0, 1))
|
||||
assert np.min(y_cdf[1:3]) > 0
|
||||
# ppf needs to map 0 and 1 to the boundaries
|
||||
assert_allclose(rng._ppf(y_cdf), (2.75, 2.76, 3.74, 3.75))
|
||||
|
||||
|
||||
def test_non_rvs_methods_without_domain():
|
||||
norm_dist = stats.norm()
|
||||
rng = FastGeneratorInversion(norm_dist)
|
||||
x = np.linspace(-3, 3, num=10)
|
||||
p = (0.01, 0.5, 0.99)
|
||||
assert_allclose(rng._cdf(x), norm_dist.cdf(x))
|
||||
assert_allclose(rng._ppf(p), norm_dist.ppf(p))
|
||||
loc, scale = 0.5, 1.3
|
||||
rng.loc = loc
|
||||
rng.scale = scale
|
||||
norm_dist = stats.norm(loc=loc, scale=scale)
|
||||
assert_allclose(rng._cdf(x), norm_dist.cdf(x))
|
||||
assert_allclose(rng._ppf(p), norm_dist.ppf(p))
|
||||
|
||||
@pytest.mark.parametrize(("domain, x"),
|
||||
[(None, 0.5),
|
||||
((0, 1), 0.5),
|
||||
((0, 1), 1.5)])
|
||||
def test_scalar_inputs(domain, x):
|
||||
""" pdf, cdf etc should map scalar values to scalars. check with and
|
||||
w/o domain since domain impacts pdf, cdf etc
|
||||
Take x inside and outside of domain """
|
||||
rng = FastGeneratorInversion(stats.norm(), domain=domain)
|
||||
assert np.isscalar(rng._cdf(x))
|
||||
assert np.isscalar(rng._ppf(0.5))
|
||||
|
||||
|
||||
def test_domain_argus_large_chi():
|
||||
# for large chi, the Gamma distribution is used and the domain has to be
|
||||
# transformed. this is a test to ensure that the transformation works
|
||||
chi, lb, ub = 5.5, 0.25, 0.75
|
||||
rng = FastGeneratorInversion(stats.argus(chi), domain=(lb, ub))
|
||||
rng.random_state = 4574
|
||||
r = rng.rvs(size=500)
|
||||
assert lb <= r.min() < r.max() <= ub
|
||||
# perform goodness of fit test with conditional cdf
|
||||
cdf = stats.argus(chi).cdf
|
||||
prob = cdf(ub) - cdf(lb)
|
||||
assert stats.cramervonmises(r, lambda x: cdf(x) / prob).pvalue > 0.05
|
||||
|
||||
|
||||
def test_setting_loc_scale():
|
||||
rng = FastGeneratorInversion(stats.norm(), random_state=765765864)
|
||||
r1 = rng.rvs(size=1000)
|
||||
rng.loc = 3.0
|
||||
rng.scale = 2.5
|
||||
r2 = rng.rvs(1000)
|
||||
# rescaled r2 should be again standard normal
|
||||
assert stats.cramervonmises_2samp(r1, (r2 - 3) / 2.5).pvalue > 0.05
|
||||
# reset values to default loc=0, scale=1
|
||||
rng.loc = 0
|
||||
rng.scale = 1
|
||||
r2 = rng.rvs(1000)
|
||||
assert stats.cramervonmises_2samp(r1, r2).pvalue > 0.05
|
||||
|
||||
|
||||
def test_ignore_shape_range():
|
||||
msg = "No generator is defined for the shape parameters"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng = FastGeneratorInversion(stats.t(0.03))
|
||||
rng = FastGeneratorInversion(stats.t(0.03), ignore_shape_range=True)
|
||||
# we can ignore the recommended range of shape parameters
|
||||
# but u-error can be expected to be too large in that case
|
||||
u_err, _ = rng.evaluate_error(size=1000, random_state=234)
|
||||
assert u_err >= 1e-6
|
||||
|
||||
@pytest.mark.xfail_on_32bit(
|
||||
"NumericalInversePolynomial.qrvs fails for Win 32-bit"
|
||||
)
|
||||
class TestQRVS:
|
||||
def test_input_validation(self):
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
|
||||
match = "`qmc_engine` must be an instance of..."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
gen.qrvs(qmc_engine=0)
|
||||
|
||||
match = "`d` must be consistent with dimension of `qmc_engine`."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2))
|
||||
|
||||
qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)]
|
||||
# `size=None` should not add anything to the shape, `size=1` should
|
||||
sizes = [
|
||||
(None, tuple()),
|
||||
(1, (1,)),
|
||||
(4, (4,)),
|
||||
((4,), (4,)),
|
||||
((2, 4), (2, 4)),
|
||||
]
|
||||
# Neither `d=None` nor `d=1` should add anything to the shape
|
||||
ds = [(None, tuple()), (1, tuple()), (3, (3,))]
|
||||
|
||||
@pytest.mark.parametrize("qrng", qrngs)
|
||||
@pytest.mark.parametrize("size_in, size_out", sizes)
|
||||
@pytest.mark.parametrize("d_in, d_out", ds)
|
||||
def test_QRVS_shape_consistency(self, qrng, size_in, size_out,
|
||||
d_in, d_out):
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
|
||||
# If d and qrng.d are inconsistent, an error is raised
|
||||
if d_in is not None and qrng is not None and qrng.d != d_in:
|
||||
match = "`d` must be consistent with dimension of `qmc_engine`."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
gen.qrvs(size_in, d=d_in, qmc_engine=qrng)
|
||||
return
|
||||
|
||||
# Sometimes d is really determined by qrng
|
||||
if d_in is None and qrng is not None and qrng.d != 1:
|
||||
d_out = (qrng.d,)
|
||||
|
||||
shape_expected = size_out + d_out
|
||||
|
||||
qrng2 = deepcopy(qrng)
|
||||
qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng)
|
||||
if size_in is not None:
|
||||
assert qrvs.shape == shape_expected
|
||||
|
||||
if qrng2 is not None:
|
||||
uniform = qrng2.random(np.prod(size_in) or 1)
|
||||
qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected)
|
||||
assert_allclose(qrvs, qrvs2, atol=1e-12)
|
||||
|
||||
def test_QRVS_size_tuple(self):
|
||||
# QMCEngine samples are always of shape (n, d). When `size` is a tuple,
|
||||
# we set `n = prod(size)` in the call to qmc_engine.random, transform
|
||||
# the sample, and reshape it to the final dimensions. When we reshape,
|
||||
# we need to be careful, because the _columns_ of the sample returned
|
||||
# by a QMCEngine are "independent"-ish, but the elements within the
|
||||
# columns are not. We need to make sure that this doesn't get mixed up
|
||||
# by reshaping: qrvs[..., i] should remain "independent"-ish of
|
||||
# qrvs[..., i+1], but the elements within qrvs[..., i] should be
|
||||
# transformed from the same low-discrepancy sequence.
|
||||
|
||||
gen = FastGeneratorInversion(stats.norm())
|
||||
|
||||
size = (3, 4)
|
||||
d = 5
|
||||
qrng = stats.qmc.Halton(d, seed=0)
|
||||
qrng2 = stats.qmc.Halton(d, seed=0)
|
||||
|
||||
uniform = qrng2.random(np.prod(size))
|
||||
|
||||
qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng)
|
||||
qrvs2 = stats.norm.ppf(uniform)
|
||||
|
||||
for i in range(d):
|
||||
sample = qrvs[..., i]
|
||||
sample2 = qrvs2[:, i].reshape(size)
|
||||
assert_allclose(sample, sample2, atol=1e-12)
|
||||
|
||||
|
||||
def test_burr_overflow():
|
||||
# this case leads to an overflow error if math.exp is used
|
||||
# in the definition of the burr pdf instead of np.exp
|
||||
# a direct implementation of the PDF as x**(-c-1) / (1+x**(-c))**(d+1)
|
||||
# also leads to an overflow error in the setup
|
||||
args = (1.89128135, 0.30195177)
|
||||
with suppress_warnings() as sup:
|
||||
# filter potential overflow warning
|
||||
sup.filter(RuntimeWarning)
|
||||
gen = FastGeneratorInversion(stats.burr(*args))
|
||||
u_error, _ = gen.evaluate_error(random_state=4326)
|
||||
assert u_error <= 1e-10
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,676 +0,0 @@
|
|||
from scipy import stats, linalg, integrate
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_almost_equal, assert_, assert_equal,
|
||||
assert_array_almost_equal,
|
||||
assert_array_almost_equal_nulp, assert_allclose)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
|
||||
def test_kde_1d():
|
||||
#some basic tests comparing to normal distribution
|
||||
rng = np.random.default_rng(8765678)
|
||||
n_basesample = 500
|
||||
xn = rng.normal(0, 1, n_basesample)
|
||||
xnmean = xn.mean()
|
||||
xnstd = xn.std(ddof=1)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xx = np.asarray([0.1, 0.5, 0.9])
|
||||
loc, scale = gkde.dataset, np.sqrt(gkde.covariance)
|
||||
assert_allclose(
|
||||
gkde(xx),
|
||||
stats.norm.pdf(xx[:, None], loc=loc, scale=scale).sum(axis=-1) / gkde.n,
|
||||
rtol=5e-14
|
||||
)
|
||||
|
||||
xs = np.linspace(-7, 7, 501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
def test_kde_1d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
rng = np.random.default_rng(8765678)
|
||||
n_basesample = 500
|
||||
xn = rng.normal(0, 1, n_basesample)
|
||||
wn = rng.random(n_basesample)
|
||||
xnmean = np.average(xn, weights=wn)
|
||||
xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn))
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
# evaluate the density function for the kde for some points
|
||||
xx = np.asarray([0.1, 0.5, 0.9])
|
||||
loc, scale = gkde.dataset, np.sqrt(gkde.covariance)
|
||||
|
||||
pdf = stats.norm.pdf
|
||||
assert_allclose(
|
||||
gkde(xx),
|
||||
np.sum(pdf(xx[:, None], loc=loc, scale=scale) * gkde.weights, axis=-1),
|
||||
rtol=5e-14
|
||||
)
|
||||
|
||||
xs = np.linspace(-7, 7, 501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_basesample",
|
||||
[
|
||||
20,
|
||||
pytest.param(500, marks=[pytest.mark.xslow])
|
||||
]
|
||||
)
|
||||
def test_kde_2d(n_basesample):
|
||||
#some basic tests comparing to normal distribution
|
||||
rng = np.random.default_rng(8765678)
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = rng.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate vs multivariate normal, using the KDE definition
|
||||
xx = np.asarray([[1, 2], [3, 4], [5, 6]])
|
||||
arg = xx[:, None, :] - gkde.dataset.T
|
||||
pdf = stats.multivariate_normal.pdf
|
||||
assert_allclose(
|
||||
gkde(xx.T),
|
||||
pdf(arg, cov=gkde.covariance).sum(axis=-1) / gkde.n,
|
||||
rtol=5e-14
|
||||
)
|
||||
|
||||
# ... and cdf
|
||||
cdf = stats.multivariate_normal.cdf
|
||||
lo, hi = [-1, -2], [0, 0]
|
||||
lo_, hi_ = lo - gkde.dataset.T, hi - gkde.dataset.T
|
||||
assert_allclose(
|
||||
gkde.integrate_box(lo, hi, rng=rng),
|
||||
cdf(hi_, lower_limit=lo_, cov=gkde.covariance, rng=rng).sum(axis=-1) / gkde.n,
|
||||
rtol=5e-7
|
||||
)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]),
|
||||
mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large], rng=rng)
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]], rng=rng)
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_basesample",
|
||||
[
|
||||
20,
|
||||
pytest.param(500, marks=[pytest.mark.xslow])
|
||||
]
|
||||
)
|
||||
def test_kde_2d_weighted(n_basesample):
|
||||
#some basic tests comparing to normal distribution
|
||||
rng = np.random.RandomState(8765678)
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = rng.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
wn = rng.rand(n_basesample)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
|
||||
# evaluate vs multivariate normal, using the kde definition
|
||||
xx = np.asarray([[1, 2], [3, 4], [5, 6]])
|
||||
arg = xx[:, None, :] - gkde.dataset.T
|
||||
pdf = stats.multivariate_normal.pdf
|
||||
assert_allclose(
|
||||
gkde(xx.T),
|
||||
np.sum(pdf(arg, cov=gkde.covariance) * gkde.weights, axis=-1),
|
||||
rtol=5e-14
|
||||
)
|
||||
|
||||
# ... and cdf
|
||||
cdf = stats.multivariate_normal.cdf
|
||||
lo, hi = [-1, -2], [0, 0]
|
||||
lo_, hi_ = lo - gkde.dataset.T, hi - gkde.dataset.T
|
||||
assert_allclose(
|
||||
gkde.integrate_box(lo, hi, rng=rng),
|
||||
np.sum(cdf(hi_, lower_limit=lo_, cov=gkde.covariance, rng=rng) *
|
||||
gkde.weights, axis=-1),
|
||||
rtol=5e-6
|
||||
)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]),
|
||||
mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large], rng=rng)
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]], rng=rng)
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
def test_kde_bandwidth_method():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.n, -1./(kde_obj.d+4))
|
||||
|
||||
rng = np.random.default_rng(8765678)
|
||||
n_basesample = 50
|
||||
xn = rng.normal(0, 1, n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
def test_kde_bandwidth_method_weighted():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.neff, -1./(kde_obj.d+4))
|
||||
|
||||
rng = np.random.default_rng(8765678)
|
||||
n_basesample = 50
|
||||
xn = rng.normal(0, 1, n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
# Subclasses that should stay working (extracted from various sources).
|
||||
# Unfortunately the earlier design of gaussian_kde made it necessary for users
|
||||
# to create these kinds of subclasses, or call _compute_covariance() directly.
|
||||
|
||||
class _kde_subclass1(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.dataset = np.atleast_2d(dataset)
|
||||
self.d, self.n = self.dataset.shape
|
||||
self.covariance_factor = self.scotts_factor
|
||||
self._compute_covariance()
|
||||
|
||||
|
||||
class _kde_subclass2(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.covariance_factor = self.scotts_factor
|
||||
super().__init__(dataset)
|
||||
|
||||
|
||||
class _kde_subclass4(stats.gaussian_kde):
|
||||
def covariance_factor(self):
|
||||
return 0.5 * self.silverman_factor()
|
||||
|
||||
|
||||
def test_gaussian_kde_subclassing():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# gaussian_kde itself
|
||||
kde = stats.gaussian_kde(x1)
|
||||
ys = kde(xs)
|
||||
|
||||
# subclass 1
|
||||
kde1 = _kde_subclass1(x1)
|
||||
y1 = kde1(xs)
|
||||
assert_array_almost_equal_nulp(ys, y1, nulp=10)
|
||||
|
||||
# subclass 2
|
||||
kde2 = _kde_subclass2(x1)
|
||||
y2 = kde2(xs)
|
||||
assert_array_almost_equal_nulp(ys, y2, nulp=10)
|
||||
|
||||
# subclass 3 was removed because we have no obligation to maintain support
|
||||
# for user invocation of private methods
|
||||
|
||||
# subclass 4
|
||||
kde4 = _kde_subclass4(x1)
|
||||
y4 = kde4(x1)
|
||||
y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
|
||||
|
||||
assert_array_almost_equal(y_expected, y4, decimal=6)
|
||||
|
||||
# Not a subclass, but check for use of _compute_covariance()
|
||||
kde5 = kde
|
||||
kde5.covariance_factor = lambda: kde.factor
|
||||
kde5._compute_covariance()
|
||||
y5 = kde5(xs)
|
||||
assert_array_almost_equal_nulp(ys, y5, nulp=10)
|
||||
|
||||
|
||||
def test_gaussian_kde_covariance_caching():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=5)
|
||||
# These expected values are from scipy 0.10, before some changes to
|
||||
# gaussian_kde. They were not compared with any external reference.
|
||||
y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
|
||||
|
||||
# Set the bandwidth, then reset it to the default.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.set_bandwidth(bw_method=0.5)
|
||||
kde.set_bandwidth(bw_method='scott')
|
||||
y2 = kde(xs)
|
||||
|
||||
assert_array_almost_equal(y_expected, y2, decimal=7)
|
||||
|
||||
|
||||
def test_gaussian_kde_monkeypatch():
|
||||
"""Ugly, but people may rely on this. See scipy pull request 123,
|
||||
specifically the linked ML thread "Width of the Gaussian in stats.kde".
|
||||
If it is necessary to break this later on, that is to be discussed on ML.
|
||||
"""
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# The old monkeypatched version to get at Silverman's Rule.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.covariance_factor = kde.silverman_factor
|
||||
kde._compute_covariance()
|
||||
y1 = kde(xs)
|
||||
|
||||
# The new saner version.
|
||||
kde2 = stats.gaussian_kde(x1, bw_method='silverman')
|
||||
y2 = kde2(xs)
|
||||
|
||||
assert_array_almost_equal_nulp(y1, y2, nulp=10)
|
||||
|
||||
|
||||
def test_kde_integer_input():
|
||||
"""Regression test for #1181."""
|
||||
x1 = np.arange(5)
|
||||
kde = stats.gaussian_kde(x1)
|
||||
y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
|
||||
assert_array_almost_equal(kde(x1), y_expected, decimal=6)
|
||||
|
||||
|
||||
_ftypes = ['float32', 'float64', 'float96', 'float128', 'int32', 'int64']
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bw_type", _ftypes + ["scott", "silverman"])
|
||||
@pytest.mark.parametrize("dtype", _ftypes)
|
||||
def test_kde_output_dtype(dtype, bw_type):
|
||||
# Check whether the datatypes are available
|
||||
dtype = getattr(np, dtype, None)
|
||||
|
||||
if bw_type in ["scott", "silverman"]:
|
||||
bw = bw_type
|
||||
else:
|
||||
bw_type = getattr(np, bw_type, None)
|
||||
bw = bw_type(3) if bw_type else None
|
||||
|
||||
if any(dt is None for dt in [dtype, bw]):
|
||||
pytest.skip()
|
||||
|
||||
weights = np.arange(5, dtype=dtype)
|
||||
dataset = np.arange(5, dtype=dtype)
|
||||
k = stats.gaussian_kde(dataset, bw_method=bw, weights=weights)
|
||||
points = np.arange(5, dtype=dtype)
|
||||
result = k(points)
|
||||
# weights are always cast to float64
|
||||
assert result.dtype == np.result_type(dataset, points, np.float64(weights),
|
||||
k.factor)
|
||||
|
||||
|
||||
def test_pdf_logpdf_validation():
|
||||
rng = np.random.default_rng(64202298293133848336925499069837723291)
|
||||
xn = rng.standard_normal((2, 10))
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
xs = rng.standard_normal((3, 10))
|
||||
|
||||
msg = "points have dimension 3, dataset has dimension 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
gkde.logpdf(xs)
|
||||
|
||||
|
||||
def test_pdf_logpdf():
|
||||
rng = np.random.default_rng(1)
|
||||
n_basesample = 50
|
||||
xn = rng.normal(0, 1, n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs)
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_pdf_logpdf_weighted():
|
||||
rng = np.random.default_rng(1)
|
||||
n_basesample = 50
|
||||
xn = rng.normal(0, 1, n_basesample)
|
||||
wn = rng.random(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs, weights=np.random.rand(len(xs)))
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_marginal_1_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 50
|
||||
n_dim = 10
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf_single(point):
|
||||
def f(x):
|
||||
x = np.concatenate(([x], point[dimensions]))
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.quad(f, -np.inf, np.inf)[0]
|
||||
|
||||
def marginal_pdf(points):
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_marginal_2_axis():
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
dimensions = np.array([1, 3]) # dimensions to keep
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
marginal = kde.marginal(dimensions)
|
||||
pdf = marginal.pdf(points[dimensions])
|
||||
|
||||
def marginal_pdf(points):
|
||||
def marginal_pdf_single(point):
|
||||
def f(y, x):
|
||||
w, z = point[dimensions]
|
||||
x = np.array([x, w, y, z])
|
||||
return kde.pdf(x)[0]
|
||||
return integrate.dblquad(f, -np.inf, np.inf, -np.inf, np.inf)[0]
|
||||
|
||||
return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
|
||||
|
||||
ref = marginal_pdf(points)
|
||||
|
||||
assert_allclose(pdf, ref, rtol=1e-6)
|
||||
|
||||
|
||||
def test_marginal_iv():
|
||||
# test input validation
|
||||
rng = np.random.default_rng(6111799263660870475)
|
||||
n_data = 30
|
||||
n_dim = 4
|
||||
dataset = rng.normal(size=(n_dim, n_data))
|
||||
points = rng.normal(size=(n_dim, 3))
|
||||
|
||||
kde = stats.gaussian_kde(dataset)
|
||||
|
||||
# check that positive and negative indices are equivalent
|
||||
dimensions1 = [-1, 1]
|
||||
marginal1 = kde.marginal(dimensions1)
|
||||
pdf1 = marginal1.pdf(points[dimensions1])
|
||||
|
||||
dimensions2 = [3, -3]
|
||||
marginal2 = kde.marginal(dimensions2)
|
||||
pdf2 = marginal2.pdf(points[dimensions2])
|
||||
|
||||
assert_equal(pdf1, pdf2)
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = "Elements of `dimensions` must be integers..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2.5])
|
||||
|
||||
# IV for uniqueness
|
||||
message = "All elements of `dimensions` must be unique."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, 2, 2])
|
||||
|
||||
# IV for non-integer dimensions
|
||||
message = (r"Dimensions \[-5 6\] are invalid for a distribution in 4...")
|
||||
with pytest.raises(ValueError, match=message):
|
||||
kde.marginal([1, -5, 6])
|
||||
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_logpdf_overflow():
|
||||
# regression test for gh-12988; testing against linalg instability for
|
||||
# very high dimensionality kde
|
||||
rng = np.random.default_rng(1)
|
||||
n_dimensions = 2500
|
||||
n_samples = 5000
|
||||
xn = np.array([rng.normal(0, 1, n_samples) + (n) for n in range(
|
||||
0, n_dimensions)])
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
logpdf = gkde.logpdf(np.arange(0, n_dimensions))
|
||||
np.testing.assert_equal(np.isneginf(logpdf[0]), False)
|
||||
np.testing.assert_equal(np.isnan(logpdf[0]), False)
|
||||
|
||||
|
||||
def test_weights_intact():
|
||||
# regression test for gh-9709: weights are not modified
|
||||
rng = np.random.default_rng(12345)
|
||||
vals = rng.lognormal(size=100)
|
||||
weights = rng.choice([1.0, 10.0, 100], size=vals.size)
|
||||
orig_weights = weights.copy()
|
||||
|
||||
stats.gaussian_kde(np.log10(vals), weights=weights)
|
||||
assert_allclose(weights, orig_weights, atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_weights_integer():
|
||||
# integer weights are OK, cf gh-9709 (comment)
|
||||
values = [0.2, 13.5, 21.0, 75.0, 99.0]
|
||||
weights = [1, 2, 4, 8, 16] # a list of integers
|
||||
pdf_i = stats.gaussian_kde(values, weights=weights)
|
||||
pdf_f = stats.gaussian_kde(values, weights=np.float64(weights))
|
||||
|
||||
xn = [0.3, 11, 88]
|
||||
assert_allclose(pdf_i.evaluate(xn),
|
||||
pdf_f.evaluate(xn), atol=1e-14, rtol=1e-14)
|
||||
|
||||
|
||||
def test_seed():
|
||||
# Test the seed option of the resample method
|
||||
def test_seed_sub(gkde_trail):
|
||||
n_sample = 200
|
||||
# The results should be different without using seed
|
||||
samp1 = gkde_trail.resample(n_sample)
|
||||
samp2 = gkde_trail.resample(n_sample)
|
||||
assert_raises(
|
||||
AssertionError, assert_allclose, samp1, samp2, atol=1e-13
|
||||
)
|
||||
# Use integer seed
|
||||
seed = 831
|
||||
samp1 = gkde_trail.resample(n_sample, seed=seed)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=seed)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
# Use RandomState
|
||||
rstate1 = np.random.RandomState(seed=138)
|
||||
samp1 = gkde_trail.resample(n_sample, seed=rstate1)
|
||||
rstate2 = np.random.RandomState(seed=138)
|
||||
samp2 = gkde_trail.resample(n_sample, seed=rstate2)
|
||||
assert_allclose(samp1, samp2, atol=1e-13)
|
||||
|
||||
# check that np.random.Generator can be used (numpy >= 1.17)
|
||||
if hasattr(np.random, 'default_rng'):
|
||||
# obtain a np.random.Generator object
|
||||
rng = np.random.default_rng(1234)
|
||||
gkde_trail.resample(n_sample, seed=rng)
|
||||
|
||||
rng = np.random.default_rng(8765678)
|
||||
n_basesample = 500
|
||||
wn = rng.random(n_basesample)
|
||||
# Test 1D case
|
||||
xn_1d = rng.normal(0, 1, n_basesample)
|
||||
|
||||
gkde_1d = stats.gaussian_kde(xn_1d)
|
||||
test_seed_sub(gkde_1d)
|
||||
gkde_1d_weighted = stats.gaussian_kde(xn_1d, weights=wn)
|
||||
test_seed_sub(gkde_1d_weighted)
|
||||
|
||||
# Test 2D case
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
xn_2d = rng.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
gkde_2d = stats.gaussian_kde(xn_2d)
|
||||
test_seed_sub(gkde_2d)
|
||||
gkde_2d_weighted = stats.gaussian_kde(xn_2d, weights=wn)
|
||||
test_seed_sub(gkde_2d_weighted)
|
||||
|
||||
|
||||
def test_singular_data_covariance_gh10205():
|
||||
# When the data lie in a lower-dimensional subspace and this causes
|
||||
# and exception, check that the error message is informative.
|
||||
rng = np.random.default_rng(2321583144339784787)
|
||||
mu = np.array([1, 10, 20])
|
||||
sigma = np.array([[4, 10, 0], [10, 25, 0], [0, 0, 100]])
|
||||
data = rng.multivariate_normal(mu, sigma, 1000)
|
||||
try: # doesn't raise any error on some platforms, and that's OK
|
||||
stats.gaussian_kde(data.T)
|
||||
except linalg.LinAlgError:
|
||||
msg = "The data appears to lie in a lower-dimensional subspace..."
|
||||
with assert_raises(linalg.LinAlgError, match=msg):
|
||||
stats.gaussian_kde(data.T)
|
||||
|
||||
|
||||
def test_fewer_points_than_dimensions_gh17436():
|
||||
# When the number of points is fewer than the number of dimensions, the
|
||||
# the covariance matrix would be singular, and the exception tested in
|
||||
# test_singular_data_covariance_gh10205 would occur. However, sometimes
|
||||
# this occurs when the user passes in the transpose of what `gaussian_kde`
|
||||
# expects. This can result in a huge covariance matrix, so bail early.
|
||||
rng = np.random.default_rng(2046127537594925772)
|
||||
rvs = rng.multivariate_normal(np.zeros(3), np.eye(3), size=5)
|
||||
message = "Number of dimensions is greater than number of samples..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.gaussian_kde(rvs)
|
||||
|
|
@ -1,289 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
from scipy._lib._array_api import xp_assert_close, xp_assert_equal
|
||||
from scipy.stats._stats_py import _xp_mean, _xp_var, _length_nonmasked
|
||||
from scipy.stats._axis_nan_policy import _axis_nan_policy_factory
|
||||
|
||||
|
||||
marray = pytest.importorskip('marray')
|
||||
skip_backend = pytest.mark.skip_xp_backends
|
||||
|
||||
|
||||
def get_arrays(n_arrays, *, dtype='float64', xp=np, shape=(7, 8), seed=84912165484321):
|
||||
mxp = marray._get_namespace(xp)
|
||||
rng = np.random.default_rng(seed)
|
||||
|
||||
datas, masks = [], []
|
||||
for i in range(n_arrays):
|
||||
data = rng.random(size=shape)
|
||||
if dtype.startswith('complex'):
|
||||
data = 10*data * 10j*rng.standard_normal(size=shape)
|
||||
data = data.astype(dtype)
|
||||
datas.append(data)
|
||||
mask = rng.random(size=shape) > 0.75
|
||||
masks.append(mask)
|
||||
|
||||
marrays = []
|
||||
nan_arrays = []
|
||||
for array, mask in zip(datas, masks):
|
||||
marrays.append(mxp.asarray(array, mask=mask))
|
||||
nan_array = array.copy()
|
||||
nan_array[mask] = xp.nan
|
||||
nan_arrays.append(nan_array)
|
||||
|
||||
return mxp, marrays, nan_arrays
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="marray#99")
|
||||
@pytest.mark.parametrize('fun, kwargs', [(stats.gmean, {}),
|
||||
(stats.hmean, {}),
|
||||
(stats.pmean, {'p': 2})])
|
||||
@pytest.mark.parametrize('axis', [0, 1])
|
||||
def test_xmean(fun, kwargs, axis, xp):
|
||||
mxp, marrays, narrays = get_arrays(2, xp=xp)
|
||||
res = fun(marrays[0], weights=marrays[1], axis=axis, **kwargs)
|
||||
ref = fun(narrays[0], weights=narrays[1], nan_policy='omit', axis=axis, **kwargs)
|
||||
xp_assert_close(res.data, xp.asarray(ref))
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="marray#99")
|
||||
@pytest.mark.parametrize('axis', [0, 1, None])
|
||||
@pytest.mark.parametrize('keepdims', [False, True])
|
||||
def test_xp_mean(axis, keepdims, xp):
|
||||
mxp, marrays, narrays = get_arrays(2, xp=xp)
|
||||
kwargs = dict(axis=axis, keepdims=keepdims)
|
||||
res = _xp_mean(marrays[0], weights=marrays[1], **kwargs)
|
||||
ref = _xp_mean(narrays[0], weights=narrays[1], nan_policy='omit', **kwargs)
|
||||
xp_assert_close(res.data, xp.asarray(ref))
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@pytest.mark.parametrize('fun, kwargs',
|
||||
[(stats.moment, {'order': 2}),
|
||||
(stats.skew, {}),
|
||||
(stats.skew, {'bias': False}),
|
||||
(stats.kurtosis, {}),
|
||||
(stats.kurtosis, {'bias': False}),
|
||||
(stats.sem, {}),
|
||||
(stats.kstat, {'n': 1}),
|
||||
(stats.kstat, {'n': 2}),
|
||||
(stats.kstat, {'n': 3}),
|
||||
(stats.kstat, {'n': 4}),
|
||||
(stats.kstatvar, {'n': 1}),
|
||||
(stats.kstatvar, {'n': 2}),
|
||||
(stats.circmean, {}),
|
||||
(stats.circvar, {}),
|
||||
(stats.circstd, {}),
|
||||
(_xp_var, {}),
|
||||
(stats.tmean, {'limits': (0.1, 0.9)}),
|
||||
(stats.tvar, {'limits': (0.1, 0.9)}),
|
||||
(stats.tmin, {'lowerlimit': 0.5}),
|
||||
(stats.tmax, {'upperlimit': 0.5}),
|
||||
(stats.tstd, {'limits': (0.1, 0.9)}),
|
||||
(stats.tsem, {'limits': (0.1, 0.9)}),
|
||||
])
|
||||
@pytest.mark.parametrize('axis', [0, 1, None])
|
||||
def test_several(fun, kwargs, axis, xp):
|
||||
mxp, marrays, narrays = get_arrays(1, xp=xp)
|
||||
kwargs = dict(axis=axis) | kwargs
|
||||
res = fun(marrays[0], **kwargs)
|
||||
ref = fun(narrays[0], nan_policy='omit', **kwargs)
|
||||
xp_assert_close(res.data, xp.asarray(ref))
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@pytest.mark.parametrize('axis', [0, 1])
|
||||
@pytest.mark.parametrize('kwargs', [{}])
|
||||
def test_describe(axis, kwargs, xp):
|
||||
mxp, marrays, narrays = get_arrays(1, xp=xp)
|
||||
kwargs = dict(axis=axis) | kwargs
|
||||
res = stats.describe(marrays[0], **kwargs)
|
||||
ref = stats.describe(narrays[0], nan_policy='omit', **kwargs)
|
||||
xp_assert_close(res.nobs.data, xp.asarray(ref.nobs))
|
||||
xp_assert_close(res.minmax[0].data, xp.asarray(ref.minmax[0].data))
|
||||
xp_assert_close(res.minmax[1].data, xp.asarray(ref.minmax[1].data))
|
||||
xp_assert_close(res.variance.data, xp.asarray(ref.variance.data))
|
||||
xp_assert_close(res.skewness.data, xp.asarray(ref.skewness.data))
|
||||
xp_assert_close(res.kurtosis.data, xp.asarray(ref.kurtosis.data))
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@pytest.mark.parametrize('fun', [stats.zscore, stats.gzscore, stats.zmap])
|
||||
@pytest.mark.parametrize('axis', [0, 1, None])
|
||||
def test_zscore(fun, axis, xp):
|
||||
mxp, marrays, narrays = (get_arrays(2, xp=xp) if fun == stats.zmap
|
||||
else get_arrays(1, xp=xp))
|
||||
res = fun(*marrays, axis=axis)
|
||||
ref = xp.asarray(fun(*narrays, nan_policy='omit', axis=axis))
|
||||
xp_assert_close(res.data[~res.mask], ref[~xp.isnan(ref)])
|
||||
xp_assert_equal(res.mask, marrays[0].mask)
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@skip_backend('cupy', reason="special functions won't work")
|
||||
@pytest.mark.parametrize('f_name', ['ttest_1samp', 'ttest_rel', 'ttest_ind'])
|
||||
@pytest.mark.parametrize('axis', [0, 1, None])
|
||||
def test_ttest(f_name, axis, xp):
|
||||
f = getattr(stats, f_name)
|
||||
mxp, marrays, narrays = get_arrays(2, xp=xp)
|
||||
if f_name == 'ttest_1samp':
|
||||
marrays[1] = mxp.mean(marrays[1], axis=axis, keepdims=axis is not None)
|
||||
narrays[1] = np.nanmean(narrays[1], axis=axis, keepdims=axis is not None)
|
||||
res = f(*marrays, axis=axis)
|
||||
ref = f(*narrays, nan_policy='omit', axis=axis)
|
||||
xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
|
||||
xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
|
||||
res_ci = res.confidence_interval()
|
||||
ref_ci = ref.confidence_interval()
|
||||
xp_assert_close(res_ci.low.data, xp.asarray(ref_ci.low))
|
||||
xp_assert_close(res_ci.high.data, xp.asarray(ref_ci.high))
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@skip_backend('cupy', reason="special functions won't work")
|
||||
@pytest.mark.filterwarnings("ignore::scipy.stats._axis_nan_policy.SmallSampleWarning")
|
||||
@pytest.mark.parametrize('f_name', ['skewtest', 'kurtosistest',
|
||||
'normaltest', 'jarque_bera'])
|
||||
@pytest.mark.parametrize('axis', [0, 1, None])
|
||||
def test_normality_tests(f_name, axis, xp):
|
||||
f = getattr(stats, f_name)
|
||||
mxp, marrays, narrays = get_arrays(1, xp=xp, shape=(10, 11))
|
||||
|
||||
res = f(*marrays, axis=axis)
|
||||
ref = f(*narrays, nan_policy='omit', axis=axis)
|
||||
|
||||
xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
|
||||
xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
|
||||
|
||||
|
||||
def pd_nsamples(kwargs):
|
||||
return 2 if kwargs.get('f_exp', None) is not None else 1
|
||||
|
||||
|
||||
@_axis_nan_policy_factory(lambda *args: tuple(args), paired=True, n_samples=pd_nsamples)
|
||||
def power_divergence_ref(f_obs, f_exp=None, *, ddof, lambda_, axis=0):
|
||||
return stats.power_divergence(f_obs, f_exp, axis=axis, ddof=ddof, lambda_=lambda_)
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@skip_backend('cupy', reason="special functions won't work")
|
||||
@pytest.mark.parametrize('lambda_', ['pearson', 'log-likelihood', 'freeman-tukey',
|
||||
'mod-log-likelihood', 'neyman', 'cressie-read',
|
||||
'chisquare'])
|
||||
@pytest.mark.parametrize('ddof', [0, 1])
|
||||
@pytest.mark.parametrize('axis', [0, 1, None])
|
||||
def test_power_divergence_chisquare(lambda_, ddof, axis, xp):
|
||||
mxp, marrays, narrays = get_arrays(2, xp=xp, shape=(5, 6))
|
||||
|
||||
kwargs = dict(axis=axis, ddof=ddof)
|
||||
if lambda_ == 'chisquare':
|
||||
lambda_ = "pearson"
|
||||
def f(*args, **kwargs):
|
||||
return stats.chisquare(*args, **kwargs)
|
||||
else:
|
||||
def f(*args, **kwargs):
|
||||
return stats.power_divergence(*args, lambda_=lambda_, **kwargs)
|
||||
|
||||
# test 1-arg
|
||||
res = f(marrays[0], **kwargs)
|
||||
ref = power_divergence_ref(narrays[0], nan_policy='omit', lambda_=lambda_, **kwargs)
|
||||
|
||||
xp_assert_close(res.statistic.data, xp.asarray(ref[0]))
|
||||
xp_assert_close(res.pvalue.data, xp.asarray(ref[1]))
|
||||
|
||||
# test 2-arg
|
||||
common_mask = np.isnan(narrays[0]) | np.isnan(narrays[1])
|
||||
normalize = (np.nansum(narrays[1] * ~common_mask, axis=axis, keepdims=True)
|
||||
/ np.nansum(narrays[0] * ~common_mask, axis=axis, keepdims=True))
|
||||
marrays[0] *= xp.asarray(normalize)
|
||||
narrays[0] *= normalize
|
||||
|
||||
res = f(*marrays, **kwargs)
|
||||
ref = power_divergence_ref(*narrays, nan_policy='omit', lambda_=lambda_, **kwargs)
|
||||
|
||||
xp_assert_close(res.statistic.data, xp.asarray(ref[0]))
|
||||
xp_assert_close(res.pvalue.data, xp.asarray(ref[1]))
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@skip_backend('cupy', reason="special functions won't work")
|
||||
@pytest.mark.parametrize('method', ['fisher', 'pearson', 'mudholkar_george',
|
||||
'tippett', 'stouffer'])
|
||||
@pytest.mark.parametrize('axis', [0, 1, None])
|
||||
def test_combine_pvalues(method, axis, xp):
|
||||
mxp, marrays, narrays = get_arrays(2, xp=xp, shape=(10, 11))
|
||||
|
||||
kwargs = dict(method=method, axis=axis)
|
||||
res = stats.combine_pvalues(marrays[0], **kwargs)
|
||||
ref = stats.combine_pvalues(narrays[0], nan_policy='omit', **kwargs)
|
||||
|
||||
xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
|
||||
xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
|
||||
|
||||
if method != 'stouffer':
|
||||
return
|
||||
|
||||
res = stats.combine_pvalues(marrays[0], weights=marrays[1], **kwargs)
|
||||
ref = stats.combine_pvalues(narrays[0], weights=narrays[1],
|
||||
nan_policy='omit', **kwargs)
|
||||
|
||||
xp_assert_close(res.statistic.data, xp.asarray(ref.statistic))
|
||||
xp_assert_close(res.pvalue.data, xp.asarray(ref.pvalue))
|
||||
|
||||
|
||||
@skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711')
|
||||
@skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.")
|
||||
@skip_backend('torch', reason="array-api-compat#242")
|
||||
@skip_backend('cupy', reason="special functions won't work")
|
||||
def test_ttest_ind_from_stats(xp):
|
||||
shape = (10, 11)
|
||||
mxp, marrays, narrays = get_arrays(6, xp=xp, shape=shape)
|
||||
mask = np.astype(np.sum(np.stack([np.isnan(arg) for arg in narrays]), axis=0), bool)
|
||||
narrays = [arg[~mask] for arg in narrays]
|
||||
marrays[2], marrays[5] = marrays[2] * 100, marrays[5] * 100
|
||||
narrays[2], narrays[5] = narrays[2] * 100, narrays[5] * 100
|
||||
|
||||
res = stats.ttest_ind_from_stats(*marrays)
|
||||
ref = stats.ttest_ind_from_stats(*narrays)
|
||||
|
||||
mask = xp.asarray(mask)
|
||||
assert xp.any(mask) and xp.any(~mask)
|
||||
xp_assert_close(res.statistic.data[~mask], xp.asarray(ref.statistic))
|
||||
xp_assert_close(res.pvalue.data[~mask], xp.asarray(ref.pvalue))
|
||||
xp_assert_close(res.statistic.mask, mask)
|
||||
xp_assert_close(res.pvalue.mask, mask)
|
||||
assert res.statistic.shape == shape
|
||||
assert res.pvalue.shape == shape
|
||||
|
||||
def test_length_nonmasked_marray_iterable_axis_raises():
|
||||
xp = marray._get_namespace(np)
|
||||
|
||||
data = [[1.0, 2.0], [3.0, 4.0]]
|
||||
mask = [[False, False], [True, False]]
|
||||
marr = xp.asarray(data, mask=mask)
|
||||
|
||||
# Axis tuples are not currently supported for MArray input.
|
||||
# This test can be removed after support is added.
|
||||
with pytest.raises(NotImplementedError,
|
||||
match="`axis` must be an integer or None for use with `MArray`"):
|
||||
_length_nonmasked(marr, axis=(0, 1), xp=xp)
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
import pytest
|
||||
from pytest import raises as assert_raises, warns as assert_warns
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_approx_equal, assert_allclose, assert_equal
|
||||
|
||||
from scipy.spatial.distance import cdist
|
||||
from scipy import stats
|
||||
|
||||
class TestMGCErrorWarnings:
|
||||
""" Tests errors and warnings derived from MGC.
|
||||
"""
|
||||
def test_error_notndarray(self):
|
||||
# raises error if x or y is not a ndarray
|
||||
x = np.arange(20)
|
||||
y = [5] * 20
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, y, x)
|
||||
|
||||
def test_error_shape(self):
|
||||
# raises error if number of samples different (n)
|
||||
x = np.arange(100).reshape(25, 4)
|
||||
y = x.reshape(10, 10)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
def test_error_lowsamples(self):
|
||||
# raises error if samples are low (< 3)
|
||||
x = np.arange(3)
|
||||
y = np.arange(3)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
def test_error_nans(self):
|
||||
# raises error if inputs contain NaNs
|
||||
x = np.arange(20, dtype=float)
|
||||
x[0] = np.nan
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, x)
|
||||
|
||||
y = np.arange(20)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
def test_error_wrongdisttype(self):
|
||||
# raises error if metric is not a function
|
||||
x = np.arange(20)
|
||||
compute_distance = 0
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, x,
|
||||
compute_distance=compute_distance)
|
||||
|
||||
@pytest.mark.parametrize("reps", [
|
||||
-1, # reps is negative
|
||||
'1', # reps is not integer
|
||||
])
|
||||
def test_error_reps(self, reps):
|
||||
# raises error if reps is negative
|
||||
x = np.arange(20)
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, x, reps=reps)
|
||||
|
||||
def test_warns_reps(self):
|
||||
# raises warning when reps is less than 1000
|
||||
x = np.arange(20)
|
||||
reps = 100
|
||||
assert_warns(RuntimeWarning, stats.multiscale_graphcorr, x, x, reps=reps)
|
||||
|
||||
def test_error_infty(self):
|
||||
# raises error if input contains infinities
|
||||
x = np.arange(20)
|
||||
y = np.ones(20) * np.inf
|
||||
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
|
||||
|
||||
|
||||
class TestMGCStat:
|
||||
""" Test validity of MGC test statistic
|
||||
"""
|
||||
def _simulations(self, samps=100, dims=1, sim_type=""):
|
||||
# linear simulation
|
||||
if sim_type == "linear":
|
||||
x = np.random.uniform(-1, 1, size=(samps, 1))
|
||||
y = x + 0.3 * np.random.random_sample(size=(x.size, 1))
|
||||
|
||||
# spiral simulation
|
||||
elif sim_type == "nonlinear":
|
||||
unif = np.array(np.random.uniform(0, 5, size=(samps, 1)))
|
||||
x = unif * np.cos(np.pi * unif)
|
||||
y = (unif * np.sin(np.pi * unif) +
|
||||
0.4*np.random.random_sample(size=(x.size, 1)))
|
||||
|
||||
# independence (tests type I simulation)
|
||||
elif sim_type == "independence":
|
||||
u = np.random.normal(0, 1, size=(samps, 1))
|
||||
v = np.random.normal(0, 1, size=(samps, 1))
|
||||
u_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
|
||||
v_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
|
||||
x = u/3 + 2*u_2 - 1
|
||||
y = v/3 + 2*v_2 - 1
|
||||
|
||||
# raises error if not approved sim_type
|
||||
else:
|
||||
raise ValueError("sim_type must be linear, nonlinear, or "
|
||||
"independence")
|
||||
|
||||
# add dimensions of noise for higher dimensions
|
||||
if dims > 1:
|
||||
dims_noise = np.random.normal(0, 1, size=(samps, dims-1))
|
||||
x = np.concatenate((x, dims_noise), axis=1)
|
||||
|
||||
return x, y
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
|
||||
("linear", 0.97, 1/1000), # test linear simulation
|
||||
("nonlinear", 0.163, 1/1000), # test spiral simulation
|
||||
("independence", -0.0094, 0.78) # test independence simulation
|
||||
])
|
||||
def test_oned(self, sim_type, obs_stat, obs_pvalue):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type=sim_type)
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
|
||||
assert_approx_equal(stat, obs_stat, significant=1)
|
||||
assert_approx_equal(pvalue, obs_pvalue, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
|
||||
("linear", 0.184, 1/1000), # test linear simulation
|
||||
("nonlinear", 0.0190, 0.117), # test spiral simulation
|
||||
])
|
||||
def test_fived(self, sim_type, obs_stat, obs_pvalue):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=5, sim_type=sim_type)
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
|
||||
assert_approx_equal(stat, obs_stat, significant=1)
|
||||
assert_approx_equal(pvalue, obs_pvalue, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_twosamp(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x = np.random.binomial(100, 0.5, size=(100, 5))
|
||||
y = np.random.normal(0, 1, size=(80, 5))
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
|
||||
assert_approx_equal(stat, 1.0, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
# generate x and y
|
||||
y = np.random.normal(0, 1, size=(100, 5))
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, is_twosamp=True)
|
||||
assert_approx_equal(stat, 1.0, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_workers(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, workers=2)
|
||||
assert_approx_equal(stat, 0.97, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_random_state(self):
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
# test stat and pvalue
|
||||
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
|
||||
assert_approx_equal(stat, 0.97, significant=1)
|
||||
assert_approx_equal(pvalue, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_dist_perm(self):
|
||||
np.random.seed(12345678)
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="nonlinear")
|
||||
distx = cdist(x, x, metric="euclidean")
|
||||
disty = cdist(y, y, metric="euclidean")
|
||||
|
||||
stat_dist, pvalue_dist, _ = stats.multiscale_graphcorr(distx, disty,
|
||||
compute_distance=None,
|
||||
random_state=1)
|
||||
assert_approx_equal(stat_dist, 0.163, significant=1)
|
||||
assert_approx_equal(pvalue_dist, 0.001, significant=1)
|
||||
|
||||
@pytest.mark.fail_slow(20) # all other tests are XSLOW; we need at least one to run
|
||||
@pytest.mark.slow
|
||||
def test_pvalue_literature(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
# test stat and pvalue
|
||||
_, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
|
||||
assert_allclose(pvalue, 1/1001)
|
||||
|
||||
@pytest.mark.xslow
|
||||
def test_alias(self):
|
||||
np.random.seed(12345678)
|
||||
|
||||
# generate x and y
|
||||
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
|
||||
|
||||
res = stats.multiscale_graphcorr(x, y, random_state=1)
|
||||
assert_equal(res.stat, res.statistic)
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,172 +0,0 @@
|
|||
import numpy as np
|
||||
import numpy.ma as ma
|
||||
import scipy.stats.mstats as ms
|
||||
|
||||
from numpy.testing import (assert_equal, assert_almost_equal, assert_,
|
||||
assert_allclose)
|
||||
|
||||
|
||||
def test_compare_medians_ms():
|
||||
x = np.arange(7)
|
||||
y = x + 10
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y), 0)
|
||||
|
||||
y2 = np.linspace(0, 1, num=10)
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y2), 0.017116406778)
|
||||
|
||||
|
||||
def test_hdmedian():
|
||||
# 1-D array
|
||||
x = ma.arange(11)
|
||||
assert_allclose(ms.hdmedian(x), 5, rtol=1e-14)
|
||||
x.mask = ma.make_mask(x)
|
||||
x.mask[:7] = False
|
||||
assert_allclose(ms.hdmedian(x), 3, rtol=1e-14)
|
||||
|
||||
# Check that `var` keyword returns a value. TODO: check whether returned
|
||||
# value is actually correct.
|
||||
assert_(ms.hdmedian(x, var=True).size == 2)
|
||||
|
||||
# 2-D array
|
||||
x2 = ma.arange(22).reshape((11, 2))
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [10, 11])
|
||||
x2.mask = ma.make_mask(x2)
|
||||
x2.mask[:7, :] = False
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [6, 7])
|
||||
|
||||
|
||||
def test_rsh():
|
||||
np.random.seed(132345)
|
||||
x = np.random.randn(100)
|
||||
res = ms.rsh(x)
|
||||
# Just a sanity check that the code runs and output shape is correct.
|
||||
# TODO: check that implementation is correct.
|
||||
assert_(res.shape == x.shape)
|
||||
|
||||
# Check points keyword
|
||||
res = ms.rsh(x, points=[0, 1.])
|
||||
assert_(res.size == 2)
|
||||
|
||||
|
||||
def test_mjci():
|
||||
# Tests the Marits-Jarrett estimator
|
||||
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
|
||||
|
||||
|
||||
def test_trimmed_mean_ci():
|
||||
# Tests the confidence intervals of the trimmed mean.
|
||||
data = ma.array([545,555,558,572,575,576,578,580,
|
||||
594,605,635,651,653,661,666])
|
||||
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
|
||||
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
|
||||
[561.8, 630.6])
|
||||
|
||||
|
||||
def test_idealfourths():
|
||||
# Tests ideal-fourths
|
||||
test = np.arange(100)
|
||||
assert_almost_equal(np.asarray(ms.idealfourths(test)),
|
||||
[24.416667,74.583333],6)
|
||||
test_2D = test.repeat(3).reshape(-1,3)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
|
||||
[[24.416667,24.416667,24.416667],
|
||||
[74.583333,74.583333,74.583333]],6)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
|
||||
test.repeat(2).reshape(-1,2))
|
||||
test = [0, 0]
|
||||
_result = ms.idealfourths(test)
|
||||
assert_(np.isnan(_result).all())
|
||||
|
||||
|
||||
class TestQuantiles:
|
||||
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
|
||||
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
|
||||
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
|
||||
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
|
||||
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
|
||||
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
|
||||
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
|
||||
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
|
||||
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
|
||||
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
|
||||
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
|
||||
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
|
||||
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
|
||||
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
|
||||
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
|
||||
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
|
||||
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
|
||||
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
|
||||
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
|
||||
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
|
||||
|
||||
def test_hdquantiles(self):
|
||||
data = self.data
|
||||
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
|
||||
[0.006514031, 0.995309248])
|
||||
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
|
||||
|
||||
data = np.array(data).reshape(10,10)
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
|
||||
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
|
||||
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
|
||||
assert_almost_equal(hdq[...,0],
|
||||
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
|
||||
assert_almost_equal(hdq[...,-1],
|
||||
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
|
||||
|
||||
def test_hdquantiles_sd(self):
|
||||
# Standard deviation is a jackknife estimator, so we can check if
|
||||
# the efficient version (hdquantiles_sd) matches a rudimentary,
|
||||
# but clear version here.
|
||||
|
||||
hd_std_errs = ms.hdquantiles_sd(self.data)
|
||||
|
||||
# jacknnife standard error, Introduction to the Bootstrap Eq. 11.5
|
||||
n = len(self.data)
|
||||
jdata = np.broadcast_to(self.data, (n, n))
|
||||
jselector = np.logical_not(np.eye(n)) # leave out one sample each row
|
||||
jdata = jdata[jselector].reshape(n, n-1)
|
||||
jdist = ms.hdquantiles(jdata, axis=1)
|
||||
jdist_mean = np.mean(jdist, axis=0)
|
||||
jstd = ((n-1)/n * np.sum((jdist - jdist_mean)**2, axis=0))**.5
|
||||
|
||||
assert_almost_equal(hd_std_errs, jstd)
|
||||
# Test actual values for good measure
|
||||
assert_almost_equal(hd_std_errs, [0.0379258, 0.0380656, 0.0380013])
|
||||
|
||||
two_data_points = ms.hdquantiles_sd([1, 2])
|
||||
assert_almost_equal(two_data_points, [0.5, 0.5, 0.5])
|
||||
|
||||
def test_mquantiles_cimj(self):
|
||||
# Only test that code runs, implementation not checked for correctness
|
||||
ci_lower, ci_upper = ms.mquantiles_cimj(self.data)
|
||||
assert_(ci_lower.size == ci_upper.size == 3)
|
||||
|
||||
|
||||
def test_median_cihs():
|
||||
# Basic test against R library EnvStats function `eqnpar`, e.g.
|
||||
# library(EnvStats)
|
||||
# options(digits=8)
|
||||
# x = c(0.88612955, 0.35242375, 0.66240904, 0.94617974, 0.10929913,
|
||||
# 0.76699506, 0.88550655, 0.62763754, 0.76818588, 0.68506508,
|
||||
# 0.88043148, 0.03911248, 0.93805564, 0.95326961, 0.25291112,
|
||||
# 0.16128487, 0.49784577, 0.24588924, 0.6597, 0.92239679)
|
||||
# eqnpar(x, p=0.5,
|
||||
# ci.method = "interpolate", approx.conf.level = 0.95, ci = TRUE)
|
||||
rng = np.random.default_rng(8824288259505800535)
|
||||
x = rng.random(size=20)
|
||||
assert_allclose(ms.median_cihs(x), (0.38663198, 0.88431272))
|
||||
|
||||
# SciPy's 90% CI upper limit doesn't match that of EnvStats eqnpar. SciPy
|
||||
# doesn't look wrong, and it agrees with a different reference,
|
||||
# `median_confint_hs` from `hoehleatsu/quantileCI`.
|
||||
# In (e.g.) Colab with R runtime:
|
||||
# devtools::install_github("hoehleatsu/quantileCI")
|
||||
# library(quantileCI)
|
||||
# median_confint_hs(x=x, conf.level=0.90, interpolate=TRUE)
|
||||
assert_allclose(ms.median_cihs(x, 0.1), (0.48319773366, 0.88094268050))
|
||||
|
|
@ -1,405 +0,0 @@
|
|||
import copy
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats._multicomp import _pvalue_dunnett, DunnettResult
|
||||
|
||||
|
||||
class TestDunnett:
|
||||
# For the following tests, p-values were computed using Matlab, e.g.
|
||||
# sample = [18. 15. 18. 16. 17. 15. 14. 14. 14. 15. 15....
|
||||
# 14. 15. 14. 22. 18. 21. 21. 10. 10. 11. 9....
|
||||
# 25. 26. 17.5 16. 15.5 14.5 22. 22. 24. 22.5 29....
|
||||
# 24.5 20. 18. 18.5 17.5 26.5 13. 16.5 13. 13. 13....
|
||||
# 28. 27. 34. 31. 29. 27. 24. 23. 38. 36. 25....
|
||||
# 38. 26. 22. 36. 27. 27. 32. 28. 31....
|
||||
# 24. 27. 33. 32. 28. 19. 37. 31. 36. 36....
|
||||
# 34. 38. 32. 38. 32....
|
||||
# 26. 24. 26. 25. 29. 29.5 16.5 36. 44....
|
||||
# 25. 27. 19....
|
||||
# 25. 20....
|
||||
# 28.];
|
||||
# j = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
||||
# 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
||||
# 0 0 0 0...
|
||||
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1...
|
||||
# 2 2 2 2 2 2 2 2 2...
|
||||
# 3 3 3...
|
||||
# 4 4...
|
||||
# 5];
|
||||
# [~, ~, stats] = anova1(sample, j, "off");
|
||||
# [results, ~, ~, gnames] = multcompare(stats, ...
|
||||
# "CriticalValueType", "dunnett", ...
|
||||
# "Approximate", false);
|
||||
# tbl = array2table(results, "VariableNames", ...
|
||||
# ["Group", "Control Group", "Lower Limit", ...
|
||||
# "Difference", "Upper Limit", "P-value"]);
|
||||
# tbl.("Group") = gnames(tbl.("Group"));
|
||||
# tbl.("Control Group") = gnames(tbl.("Control Group"))
|
||||
|
||||
# Matlab doesn't report the statistic, so the statistics were
|
||||
# computed using R multcomp `glht`, e.g.:
|
||||
# library(multcomp)
|
||||
# options(digits=16)
|
||||
# control < - c(18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0,
|
||||
# 15.0, 15.0, 14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0,
|
||||
# 10.0, 10.0, 11.0, 9.0, 25.0, 26.0, 17.5, 16.0, 15.5,
|
||||
# 14.5, 22.0, 22.0, 24.0, 22.5, 29.0, 24.5, 20.0, 18.0,
|
||||
# 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0, 28.0,
|
||||
# 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0,
|
||||
# 25.0, 38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0,
|
||||
# 31.0)
|
||||
# t < - c(24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
|
||||
# 34.0, 38.0, 32.0, 38.0, 32.0)
|
||||
# w < - c(26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0)
|
||||
# x < - c(25.0, 27.0, 19.0)
|
||||
# y < - c(25.0, 20.0)
|
||||
# z < - c(28.0)
|
||||
#
|
||||
# groups = factor(rep(c("control", "t", "w", "x", "y", "z"),
|
||||
# times=c(length(control), length(t), length(w),
|
||||
# length(x), length(y), length(z))))
|
||||
# df < - data.frame(response=c(control, t, w, x, y, z),
|
||||
# group=groups)
|
||||
# model < - aov(response
|
||||
# ~group, data = df)
|
||||
# test < - glht(model=model,
|
||||
# linfct=mcp(group="Dunnett"),
|
||||
# alternative="g")
|
||||
# summary(test)
|
||||
# confint(test)
|
||||
# p-values agreed with those produced by Matlab to at least atol=1e-3
|
||||
|
||||
# From Matlab's documentation on multcompare
|
||||
samples_1 = [
|
||||
[
|
||||
24.0, 27.0, 33.0, 32.0, 28.0, 19.0, 37.0, 31.0, 36.0, 36.0,
|
||||
34.0, 38.0, 32.0, 38.0, 32.0
|
||||
],
|
||||
[26.0, 24.0, 26.0, 25.0, 29.0, 29.5, 16.5, 36.0, 44.0],
|
||||
[25.0, 27.0, 19.0],
|
||||
[25.0, 20.0],
|
||||
[28.0]
|
||||
]
|
||||
control_1 = [
|
||||
18.0, 15.0, 18.0, 16.0, 17.0, 15.0, 14.0, 14.0, 14.0, 15.0, 15.0,
|
||||
14.0, 15.0, 14.0, 22.0, 18.0, 21.0, 21.0, 10.0, 10.0, 11.0, 9.0,
|
||||
25.0, 26.0, 17.5, 16.0, 15.5, 14.5, 22.0, 22.0, 24.0, 22.5, 29.0,
|
||||
24.5, 20.0, 18.0, 18.5, 17.5, 26.5, 13.0, 16.5, 13.0, 13.0, 13.0,
|
||||
28.0, 27.0, 34.0, 31.0, 29.0, 27.0, 24.0, 23.0, 38.0, 36.0, 25.0,
|
||||
38.0, 26.0, 22.0, 36.0, 27.0, 27.0, 32.0, 28.0, 31.0
|
||||
]
|
||||
pvalue_1 = [4.727e-06, 0.022346, 0.97912, 0.99953, 0.86579] # Matlab
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_1_twosided = [1e-4, 0.02237, 0.97913, 0.99953, 0.86583]
|
||||
p_1_greater = [1e-4, 0.011217, 0.768500, 0.896991, 0.577211]
|
||||
p_1_less = [1, 1, 0.99660, 0.98398, .99953]
|
||||
statistic_1 = [5.27356, 2.91270, 0.60831, 0.27002, 0.96637]
|
||||
ci_1_twosided = [[5.3633917835622, 0.7296142201217, -8.3879817106607,
|
||||
-11.9090753452911, -11.7655021543469],
|
||||
[15.9709832164378, 13.8936496687672, 13.4556900439941,
|
||||
14.6434503452911, 25.4998771543469]]
|
||||
ci_1_greater = [5.9036402398526, 1.4000632918725, -7.2754756323636,
|
||||
-10.5567456382391, -9.8675629499576]
|
||||
ci_1_less = [15.4306165948619, 13.2230539537359, 12.3429406339544,
|
||||
13.2908248513211, 23.6015228251660]
|
||||
pvalues_1 = dict(twosided=p_1_twosided, less=p_1_less, greater=p_1_greater)
|
||||
cis_1 = dict(twosided=ci_1_twosided, less=ci_1_less, greater=ci_1_greater)
|
||||
case_1 = dict(samples=samples_1, control=control_1, statistic=statistic_1,
|
||||
pvalues=pvalues_1, cis=cis_1)
|
||||
|
||||
# From Dunnett1955 comparing with R's DescTools: DunnettTest
|
||||
samples_2 = [[9.76, 8.80, 7.68, 9.36], [12.80, 9.68, 12.16, 9.20, 10.55]]
|
||||
control_2 = [7.40, 8.50, 7.20, 8.24, 9.84, 8.32]
|
||||
pvalue_2 = [0.6201, 0.0058]
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_2_twosided = [0.6201020, 0.0058254]
|
||||
p_2_greater = [0.3249776, 0.0029139]
|
||||
p_2_less = [0.91676, 0.99984]
|
||||
statistic_2 = [0.85703, 3.69375]
|
||||
ci_2_twosided = [[-1.2564116462124, 0.8396273539789],
|
||||
[2.5564116462124, 4.4163726460211]]
|
||||
ci_2_greater = [-0.9588591188156, 1.1187563667543]
|
||||
ci_2_less = [2.2588591188156, 4.1372436332457]
|
||||
pvalues_2 = dict(twosided=p_2_twosided, less=p_2_less, greater=p_2_greater)
|
||||
cis_2 = dict(twosided=ci_2_twosided, less=ci_2_less, greater=ci_2_greater)
|
||||
case_2 = dict(samples=samples_2, control=control_2, statistic=statistic_2,
|
||||
pvalues=pvalues_2, cis=cis_2)
|
||||
|
||||
samples_3 = [[55, 64, 64], [55, 49, 52], [50, 44, 41]]
|
||||
control_3 = [55, 47, 48]
|
||||
pvalue_3 = [0.0364, 0.8966, 0.4091]
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_3_twosided = [0.036407, 0.896539, 0.409295]
|
||||
p_3_greater = [0.018277, 0.521109, 0.981892]
|
||||
p_3_less = [0.99944, 0.90054, 0.20974]
|
||||
statistic_3 = [3.09073, 0.56195, -1.40488]
|
||||
ci_3_twosided = [[0.7529028025053, -8.2470971974947, -15.2470971974947],
|
||||
[21.2470971974947, 12.2470971974947, 5.2470971974947]]
|
||||
ci_3_greater = [2.4023682323149, -6.5976317676851, -13.5976317676851]
|
||||
ci_3_less = [19.5984402363662, 10.5984402363662, 3.5984402363662]
|
||||
pvalues_3 = dict(twosided=p_3_twosided, less=p_3_less, greater=p_3_greater)
|
||||
cis_3 = dict(twosided=ci_3_twosided, less=ci_3_less, greater=ci_3_greater)
|
||||
case_3 = dict(samples=samples_3, control=control_3, statistic=statistic_3,
|
||||
pvalues=pvalues_3, cis=cis_3)
|
||||
|
||||
# From Thomson and Short,
|
||||
# Mucociliary function in health, chronic obstructive airway disease,
|
||||
# and asbestosis, Journal of Applied Physiology, 1969. Table 1
|
||||
# Comparing with R's DescTools: DunnettTest
|
||||
samples_4 = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
|
||||
control_4 = [2.9, 3.0, 2.5, 2.6, 3.2]
|
||||
pvalue_4 = [0.5832, 0.9982]
|
||||
# Statistic, alternative p-values, and CIs computed with R multcomp `glht`
|
||||
p_4_twosided = [0.58317, 0.99819]
|
||||
p_4_greater = [0.30225, 0.69115]
|
||||
p_4_less = [0.91929, 0.65212]
|
||||
statistic_4 = [0.90875, -0.05007]
|
||||
ci_4_twosided = [[-0.6898153448579, -1.0333456251632],
|
||||
[1.4598153448579, 0.9933456251632]]
|
||||
ci_4_greater = [-0.5186459268412, -0.8719655502147 ]
|
||||
ci_4_less = [1.2886459268412, 0.8319655502147]
|
||||
pvalues_4 = dict(twosided=p_4_twosided, less=p_4_less, greater=p_4_greater)
|
||||
cis_4 = dict(twosided=ci_4_twosided, less=ci_4_less, greater=ci_4_greater)
|
||||
case_4 = dict(samples=samples_4, control=control_4, statistic=statistic_4,
|
||||
pvalues=pvalues_4, cis=cis_4)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'rho, n_groups, df, statistic, pvalue, alternative',
|
||||
[
|
||||
# From Dunnett1955
|
||||
# Tables 1a and 1b pages 1117-1118
|
||||
(0.5, 1, 10, 1.81, 0.05, "greater"), # different than two-sided
|
||||
(0.5, 3, 10, 2.34, 0.05, "greater"),
|
||||
(0.5, 2, 30, 1.99, 0.05, "greater"),
|
||||
(0.5, 5, 30, 2.33, 0.05, "greater"),
|
||||
(0.5, 4, 12, 3.32, 0.01, "greater"),
|
||||
(0.5, 7, 12, 3.56, 0.01, "greater"),
|
||||
(0.5, 2, 60, 2.64, 0.01, "greater"),
|
||||
(0.5, 4, 60, 2.87, 0.01, "greater"),
|
||||
(0.5, 4, 60, [2.87, 2.21], [0.01, 0.05], "greater"),
|
||||
# Tables 2a and 2b pages 1119-1120
|
||||
(0.5, 1, 10, 2.23, 0.05, "two-sided"), # two-sided
|
||||
(0.5, 3, 10, 2.81, 0.05, "two-sided"),
|
||||
(0.5, 2, 30, 2.32, 0.05, "two-sided"),
|
||||
(0.5, 3, 20, 2.57, 0.05, "two-sided"),
|
||||
(0.5, 4, 12, 3.76, 0.01, "two-sided"),
|
||||
(0.5, 7, 12, 4.08, 0.01, "two-sided"),
|
||||
(0.5, 2, 60, 2.90, 0.01, "two-sided"),
|
||||
(0.5, 4, 60, 3.14, 0.01, "two-sided"),
|
||||
(0.5, 4, 60, [3.14, 2.55], [0.01, 0.05], "two-sided"),
|
||||
],
|
||||
)
|
||||
def test_critical_values(
|
||||
self, rho, n_groups, df, statistic, pvalue, alternative
|
||||
):
|
||||
rng = np.random.default_rng(165250594791731684851746311027739134893)
|
||||
rho = np.full((n_groups, n_groups), rho)
|
||||
np.fill_diagonal(rho, 1)
|
||||
|
||||
statistic = np.array(statistic)
|
||||
res = _pvalue_dunnett(
|
||||
rho=rho, df=df, statistic=statistic,
|
||||
alternative=alternative,
|
||||
rng=rng
|
||||
)
|
||||
assert_allclose(res, pvalue, atol=5e-3)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'samples, control, pvalue, statistic',
|
||||
[
|
||||
(samples_1, control_1, pvalue_1, statistic_1),
|
||||
(samples_2, control_2, pvalue_2, statistic_2),
|
||||
(samples_3, control_3, pvalue_3, statistic_3),
|
||||
(samples_4, control_4, pvalue_4, statistic_4),
|
||||
]
|
||||
)
|
||||
def test_basic(self, samples, control, pvalue, statistic):
|
||||
rng = np.random.default_rng(11681140010308601919115036826969764808)
|
||||
|
||||
res = stats.dunnett(*samples, control=control, rng=rng)
|
||||
|
||||
assert isinstance(res, DunnettResult)
|
||||
assert_allclose(res.statistic, statistic, rtol=5e-5)
|
||||
assert_allclose(res.pvalue, pvalue, rtol=1e-2, atol=1e-4)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'alternative',
|
||||
['two-sided', 'less', 'greater']
|
||||
)
|
||||
def test_ttest_ind(self, alternative):
|
||||
# check that `dunnett` agrees with `ttest_ind`
|
||||
# when there are only two groups
|
||||
rng = np.random.default_rng(114184017807316971636137493526995620351)
|
||||
|
||||
for _ in range(10):
|
||||
sample = rng.integers(-100, 100, size=(10,))
|
||||
control = rng.integers(-100, 100, size=(10,))
|
||||
|
||||
# preserve use of old random_state during SPEC 7 transition
|
||||
res = stats.dunnett(
|
||||
sample, control=control,
|
||||
alternative=alternative, random_state=rng
|
||||
)
|
||||
ref = stats.ttest_ind(
|
||||
sample, control,
|
||||
alternative=alternative
|
||||
)
|
||||
|
||||
assert_allclose(res.statistic, ref.statistic, rtol=1e-3, atol=1e-5)
|
||||
assert_allclose(res.pvalue, ref.pvalue, rtol=1e-3, atol=1e-5)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'alternative, pvalue',
|
||||
[
|
||||
('less', [0, 1]),
|
||||
('greater', [1, 0]),
|
||||
('two-sided', [0, 0]),
|
||||
]
|
||||
)
|
||||
def test_alternatives(self, alternative, pvalue):
|
||||
rng = np.random.default_rng(114184017807316971636137493526995620351)
|
||||
|
||||
# width of 20 and min diff between samples/control is 60
|
||||
# and maximal diff would be 100
|
||||
sample_less = rng.integers(0, 20, size=(10,))
|
||||
control = rng.integers(80, 100, size=(10,))
|
||||
sample_greater = rng.integers(160, 180, size=(10,))
|
||||
|
||||
res = stats.dunnett(
|
||||
sample_less, sample_greater, control=control,
|
||||
alternative=alternative, rng=rng
|
||||
)
|
||||
assert_allclose(res.pvalue, pvalue, atol=1e-7)
|
||||
|
||||
ci = res.confidence_interval()
|
||||
# two-sided is comparable for high/low
|
||||
if alternative == 'less':
|
||||
assert np.isneginf(ci.low).all()
|
||||
assert -100 < ci.high[0] < -60
|
||||
assert 60 < ci.high[1] < 100
|
||||
elif alternative == 'greater':
|
||||
assert -100 < ci.low[0] < -60
|
||||
assert 60 < ci.low[1] < 100
|
||||
assert np.isposinf(ci.high).all()
|
||||
elif alternative == 'two-sided':
|
||||
assert -100 < ci.low[0] < -60
|
||||
assert 60 < ci.low[1] < 100
|
||||
assert -100 < ci.high[0] < -60
|
||||
assert 60 < ci.high[1] < 100
|
||||
|
||||
@pytest.mark.parametrize("case", [case_1, case_2, case_3, case_4])
|
||||
@pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided'])
|
||||
def test_against_R_multicomp_glht(self, case, alternative):
|
||||
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
||||
samples = case['samples']
|
||||
control = case['control']
|
||||
alternatives = {'less': 'less', 'greater': 'greater',
|
||||
'two-sided': 'twosided'}
|
||||
p_ref = case['pvalues'][alternative.replace('-', '')]
|
||||
|
||||
res = stats.dunnett(*samples, control=control, alternative=alternative,
|
||||
rng=rng)
|
||||
# atol can't be tighter because R reports some pvalues as "< 1e-4"
|
||||
assert_allclose(res.pvalue, p_ref, rtol=5e-3, atol=1e-4)
|
||||
|
||||
ci_ref = case['cis'][alternatives[alternative]]
|
||||
if alternative == "greater":
|
||||
ci_ref = [ci_ref, np.inf]
|
||||
elif alternative == "less":
|
||||
ci_ref = [-np.inf, ci_ref]
|
||||
assert res._ci is None
|
||||
assert res._ci_cl is None
|
||||
ci = res.confidence_interval(confidence_level=0.95)
|
||||
assert_allclose(ci.low, ci_ref[0], rtol=5e-3, atol=1e-5)
|
||||
assert_allclose(ci.high, ci_ref[1], rtol=5e-3, atol=1e-5)
|
||||
|
||||
# re-run to use the cached value "is" to check id as same object
|
||||
assert res._ci is ci
|
||||
assert res._ci_cl == 0.95
|
||||
ci_ = res.confidence_interval(confidence_level=0.95)
|
||||
assert ci_ is ci
|
||||
|
||||
@pytest.mark.parametrize('alternative', ["two-sided", "less", "greater"])
|
||||
def test_str(self, alternative):
|
||||
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
||||
|
||||
res = stats.dunnett(
|
||||
*self.samples_3, control=self.control_3, alternative=alternative,
|
||||
rng=rng
|
||||
)
|
||||
|
||||
# check some str output
|
||||
res_str = str(res)
|
||||
assert '(Sample 2 - Control)' in res_str
|
||||
assert '95.0%' in res_str
|
||||
|
||||
if alternative == 'less':
|
||||
assert '-inf' in res_str
|
||||
assert '19.' in res_str
|
||||
elif alternative == 'greater':
|
||||
assert 'inf' in res_str
|
||||
assert '-13.' in res_str
|
||||
else:
|
||||
assert 'inf' not in res_str
|
||||
assert '21.' in res_str
|
||||
|
||||
def test_warnings(self):
|
||||
rng = np.random.default_rng(189117774084579816190295271136455278291)
|
||||
|
||||
res = stats.dunnett(
|
||||
*self.samples_3, control=self.control_3, rng=rng
|
||||
)
|
||||
msg = r"Computation of the confidence interval did not converge"
|
||||
with pytest.warns(UserWarning, match=msg):
|
||||
res._allowance(tol=1e-5)
|
||||
|
||||
def test_raises(self):
|
||||
samples, control = self.samples_3, self.control_3
|
||||
|
||||
# alternative
|
||||
with pytest.raises(ValueError, match="alternative must be"):
|
||||
stats.dunnett(*samples, control=control, alternative='bob')
|
||||
|
||||
# 2D for a sample
|
||||
samples_ = copy.deepcopy(samples)
|
||||
samples_[0] = [samples_[0]]
|
||||
with pytest.raises(ValueError, match="must be 1D arrays"):
|
||||
stats.dunnett(*samples_, control=control)
|
||||
|
||||
# 2D for control
|
||||
control_ = copy.deepcopy(control)
|
||||
control_ = [control_]
|
||||
with pytest.raises(ValueError, match="must be 1D arrays"):
|
||||
stats.dunnett(*samples, control=control_)
|
||||
|
||||
# No obs in a sample
|
||||
samples_ = copy.deepcopy(samples)
|
||||
samples_[1] = []
|
||||
with pytest.raises(ValueError, match="at least 1 observation"):
|
||||
stats.dunnett(*samples_, control=control)
|
||||
|
||||
# No obs in control
|
||||
control_ = []
|
||||
with pytest.raises(ValueError, match="at least 1 observation"):
|
||||
stats.dunnett(*samples, control=control_)
|
||||
|
||||
res = stats.dunnett(*samples, control=control)
|
||||
with pytest.raises(ValueError, match="Confidence level must"):
|
||||
res.confidence_interval(confidence_level=3)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Computation of the confidence")
|
||||
@pytest.mark.parametrize('n_samples', [1, 2, 3])
|
||||
def test_shapes(self, n_samples):
|
||||
rng = np.random.default_rng(689448934110805334)
|
||||
samples = rng.normal(size=(n_samples, 10))
|
||||
control = rng.normal(size=10)
|
||||
res = stats.dunnett(*samples, control=control, rng=rng)
|
||||
assert res.statistic.shape == (n_samples,)
|
||||
assert res.pvalue.shape == (n_samples,)
|
||||
ci = res.confidence_interval()
|
||||
assert ci.low.shape == (n_samples,)
|
||||
assert ci.high.shape == (n_samples,)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,148 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
from .._discrete_distns import nchypergeom_fisher, hypergeom
|
||||
from scipy.stats._odds_ratio import odds_ratio
|
||||
from .data.fisher_exact_results_from_r import data
|
||||
|
||||
|
||||
class TestOddsRatio:
|
||||
|
||||
@pytest.mark.parametrize('parameters, rresult', data)
|
||||
def test_results_from_r(self, parameters, rresult):
|
||||
alternative = parameters.alternative.replace('.', '-')
|
||||
result = odds_ratio(parameters.table)
|
||||
# The results computed by R are not very accurate.
|
||||
if result.statistic < 400:
|
||||
or_rtol = 5e-4
|
||||
ci_rtol = 2e-2
|
||||
else:
|
||||
or_rtol = 5e-2
|
||||
ci_rtol = 1e-1
|
||||
assert_allclose(result.statistic,
|
||||
rresult.conditional_odds_ratio, rtol=or_rtol)
|
||||
ci = result.confidence_interval(parameters.confidence_level,
|
||||
alternative)
|
||||
assert_allclose((ci.low, ci.high), rresult.conditional_odds_ratio_ci,
|
||||
rtol=ci_rtol)
|
||||
|
||||
# Also do a self-check for the conditional odds ratio.
|
||||
# With the computed conditional odds ratio as the noncentrality
|
||||
# parameter of the noncentral hypergeometric distribution with
|
||||
# parameters table.sum(), table[0].sum(), and table[:,0].sum() as
|
||||
# total, ngood and nsample, respectively, the mean of the distribution
|
||||
# should equal table[0, 0].
|
||||
cor = result.statistic
|
||||
table = np.array(parameters.table)
|
||||
total = table.sum()
|
||||
ngood = table[0].sum()
|
||||
nsample = table[:, 0].sum()
|
||||
# nchypergeom_fisher does not allow the edge cases where the
|
||||
# noncentrality parameter is 0 or inf, so handle those values
|
||||
# separately here.
|
||||
if cor == 0:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[0]
|
||||
elif cor == np.inf:
|
||||
nchg_mean = hypergeom.support(total, ngood, nsample)[1]
|
||||
else:
|
||||
nchg_mean = nchypergeom_fisher.mean(total, ngood, nsample, cor)
|
||||
assert_allclose(nchg_mean, table[0, 0], rtol=1e-13)
|
||||
|
||||
# Check that the confidence interval is correct.
|
||||
alpha = 1 - parameters.confidence_level
|
||||
if alternative == 'two-sided':
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha/2, rtol=1e-11)
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha/2, rtol=1e-11)
|
||||
elif alternative == 'less':
|
||||
if np.isfinite(ci.high):
|
||||
cdf = nchypergeom_fisher.cdf(table[0, 0],
|
||||
total, ngood, nsample, ci.high)
|
||||
assert_allclose(cdf, alpha, rtol=1e-11)
|
||||
else:
|
||||
# alternative == 'greater'
|
||||
if ci.low > 0:
|
||||
sf = nchypergeom_fisher.sf(table[0, 0] - 1,
|
||||
total, ngood, nsample, ci.low)
|
||||
assert_allclose(sf, alpha, rtol=1e-11)
|
||||
|
||||
@pytest.mark.parametrize('table', [
|
||||
[[0, 0], [5, 10]],
|
||||
[[5, 10], [0, 0]],
|
||||
[[0, 5], [0, 10]],
|
||||
[[5, 0], [10, 0]],
|
||||
])
|
||||
def test_row_or_col_zero(self, table):
|
||||
result = odds_ratio(table)
|
||||
assert_equal(result.statistic, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
@pytest.mark.parametrize("case",
|
||||
[[0.95, 'two-sided', 0.4879913, 2.635883],
|
||||
[0.90, 'two-sided', 0.5588516, 2.301663]])
|
||||
def test_sample_odds_ratio_ci(self, case):
|
||||
# Compare the sample odds ratio confidence interval to the R function
|
||||
# oddsratio.wald from the epitools package, e.g.
|
||||
# > library(epitools)
|
||||
# > table = matrix(c(10, 20, 41, 93), nrow=2, ncol=2, byrow=TRUE)
|
||||
# > result = oddsratio.wald(table)
|
||||
# > result$measure
|
||||
# odds ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.134146 0.4879913 2.635883
|
||||
|
||||
confidence_level, alternative, ref_low, ref_high = case
|
||||
table = [[10, 20], [41, 93]]
|
||||
result = odds_ratio(table, kind='sample')
|
||||
assert_allclose(result.statistic, 1.134146, rtol=1e-6)
|
||||
ci = result.confidence_interval(confidence_level, alternative)
|
||||
assert_allclose([ci.low, ci.high], [ref_low, ref_high], rtol=1e-6)
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
|
||||
def test_sample_odds_ratio_one_sided_ci(self, alternative):
|
||||
# can't find a good reference for one-sided CI, so bump up the sample
|
||||
# size and compare against the conditional odds ratio CI
|
||||
table = [[1000, 2000], [4100, 9300]]
|
||||
res = odds_ratio(table, kind='sample')
|
||||
ref = odds_ratio(table, kind='conditional')
|
||||
assert_allclose(res.statistic, ref.statistic, atol=1e-5)
|
||||
assert_allclose(res.confidence_interval(alternative=alternative),
|
||||
ref.confidence_interval(alternative=alternative),
|
||||
atol=2e-3)
|
||||
|
||||
@pytest.mark.parametrize('kind', ['sample', 'conditional'])
|
||||
@pytest.mark.parametrize('bad_table', [123, "foo", [10, 11, 12]])
|
||||
def test_invalid_table_shape(self, kind, bad_table):
|
||||
with pytest.raises(ValueError, match="Invalid shape"):
|
||||
odds_ratio(bad_table, kind=kind)
|
||||
|
||||
def test_invalid_table_type(self):
|
||||
with pytest.raises(ValueError, match='must be an array of integers'):
|
||||
odds_ratio([[1.0, 3.4], [5.0, 9.9]])
|
||||
|
||||
def test_negative_table_values(self):
|
||||
with pytest.raises(ValueError, match='must be nonnegative'):
|
||||
odds_ratio([[1, 2], [3, -4]])
|
||||
|
||||
def test_invalid_kind(self):
|
||||
with pytest.raises(ValueError, match='`kind` must be'):
|
||||
odds_ratio([[10, 20], [30, 14]], kind='magnetoreluctance')
|
||||
|
||||
def test_invalid_alternative(self):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='`alternative` must be'):
|
||||
result.confidence_interval(alternative='depleneration')
|
||||
|
||||
@pytest.mark.parametrize('level', [-0.5, 1.5])
|
||||
def test_invalid_confidence_level(self, level):
|
||||
result = odds_ratio([[5, 10], [2, 32]])
|
||||
with pytest.raises(ValueError, match='must be between 0 and 1'):
|
||||
result.confidence_interval(confidence_level=level)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,199 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from scipy import stats
|
||||
from scipy._lib._array_api import xp_default_dtype, is_numpy, is_torch, SCIPY_ARRAY_API
|
||||
from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_equal
|
||||
from scipy._lib._util import _apply_over_batch
|
||||
|
||||
skip_xp_backends = pytest.mark.skip_xp_backends
|
||||
|
||||
|
||||
@_apply_over_batch(('x', 1), ('p', 1))
|
||||
def quantile_reference_last_axis(x, p, nan_policy, method):
|
||||
if nan_policy == 'omit':
|
||||
x = x[~np.isnan(x)]
|
||||
p_mask = np.isnan(p)
|
||||
p = p.copy()
|
||||
p[p_mask] = 0.5
|
||||
if method == 'harrell-davis':
|
||||
# hdquantiles returns masked element if length along axis is 1 (bug)
|
||||
res = (np.full_like(p, x[0]) if x.size == 1
|
||||
else stats.mstats.hdquantiles(x, p).data)
|
||||
if nan_policy == 'propagate' and np.any(np.isnan(x)):
|
||||
res[:] = np.nan
|
||||
else:
|
||||
res = np.quantile(x, p)
|
||||
res[p_mask] = np.nan
|
||||
return res
|
||||
|
||||
|
||||
def quantile_reference(x, p, *, axis, nan_policy, keepdims, method):
|
||||
x, p = np.moveaxis(x, axis, -1), np.moveaxis(p, axis, -1)
|
||||
res = quantile_reference_last_axis(x, p, nan_policy, method)
|
||||
res = np.moveaxis(res, -1, axis)
|
||||
if not keepdims:
|
||||
res = np.squeeze(res, axis=axis)
|
||||
return res
|
||||
|
||||
|
||||
@skip_xp_backends('dask.array', reason="No take_along_axis yet.")
|
||||
@skip_xp_backends('jax.numpy', reason="No mutation.")
|
||||
class TestQuantile:
|
||||
|
||||
def test_input_validation(self, xp):
|
||||
x = xp.asarray([1, 2, 3])
|
||||
p = xp.asarray(0.5)
|
||||
|
||||
message = "`x` must have real dtype."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(xp.asarray([True, False]), p)
|
||||
with pytest.raises(ValueError):
|
||||
stats.quantile(xp.asarray([1+1j, 2]), p)
|
||||
|
||||
message = "`p` must have real floating dtype."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(x, xp.asarray([0, 1]))
|
||||
|
||||
message = "`axis` must be an integer or None."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(x, p, axis=0.5)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(x, p, axis=(0, -1))
|
||||
|
||||
message = "`axis` is not compatible with the shapes of the inputs."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(x, p, axis=2)
|
||||
|
||||
message = "The input contains nan values"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(xp.asarray([xp.nan, 1, 2]), p, nan_policy='raise')
|
||||
|
||||
message = "method` must be one of..."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(x, p, method='a duck')
|
||||
|
||||
message = "If specified, `keepdims` must be True or False."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(x, p, keepdims=42)
|
||||
|
||||
message = "`keepdims` may be False only if the length of `p` along `axis` is 1."
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.quantile(x, xp.asarray([0.5, 0.6]), keepdims=False)
|
||||
|
||||
@pytest.mark.parametrize('method',
|
||||
['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
|
||||
'hazen', 'interpolated_inverted_cdf', 'linear',
|
||||
'median_unbiased', 'normal_unbiased', 'weibull'])
|
||||
@pytest.mark.parametrize('shape_x, shape_p, axis',
|
||||
[(10, None, -1), (10, 10, -1), (10, (2, 3), -1),
|
||||
((10, 2), None, 0), ((10, 2), None, 0),])
|
||||
def test_against_numpy(self, method, shape_x, shape_p, axis, xp):
|
||||
dtype = xp_default_dtype(xp)
|
||||
rng = np.random.default_rng(23458924568734956)
|
||||
x = rng.random(size=shape_x)
|
||||
p = rng.random(size=shape_p)
|
||||
ref = np.quantile(x, p, method=method, axis=axis)
|
||||
|
||||
x, p = xp.asarray(x, dtype=dtype), xp.asarray(p, dtype=dtype)
|
||||
res = stats.quantile(x, p, method=method, axis=axis)
|
||||
|
||||
xp_assert_close(res, xp.asarray(ref, dtype=dtype))
|
||||
|
||||
@skip_xp_backends(cpu_only=True, reason="PyTorch doesn't have `betainc`.")
|
||||
@pytest.mark.parametrize('axis', [0, 1])
|
||||
@pytest.mark.parametrize('keepdims', [False, True])
|
||||
@pytest.mark.parametrize('nan_policy', ['omit', 'propagate', 'marray'])
|
||||
@pytest.mark.parametrize('dtype', ['float32', 'float64'])
|
||||
@pytest.mark.parametrize('method', ['linear', 'harrell-davis'])
|
||||
def test_against_reference(self, axis, keepdims, nan_policy, dtype, method, xp):
|
||||
rng = np.random.default_rng(23458924568734956)
|
||||
shape = (5, 6)
|
||||
x = rng.random(size=shape).astype(dtype)
|
||||
p = rng.random(size=shape).astype(dtype)
|
||||
mask = rng.random(size=shape) > 0.8
|
||||
assert np.any(mask)
|
||||
x[mask] = np.nan
|
||||
if not keepdims:
|
||||
p = np.mean(p, axis=axis, keepdims=True)
|
||||
|
||||
# inject p = 0 and p = 1 to test edge cases
|
||||
# Currently would fail with CuPy/JAX (cupy/cupy#8934, jax-ml/jax#21900);
|
||||
# remove the `if` when those are resolved.
|
||||
if is_numpy(xp):
|
||||
p0 = p.ravel()
|
||||
p0[1] = 0.
|
||||
p0[-2] = 1.
|
||||
|
||||
dtype = getattr(xp, dtype)
|
||||
|
||||
if nan_policy == 'marray':
|
||||
if method == 'harrell-davis':
|
||||
pytest.skip("Needs gh-22490")
|
||||
if is_torch(xp):
|
||||
pytest.skip("sum_cpu not implemented for UInt64, see "
|
||||
"data-apis/array-api-compat#242")
|
||||
if not SCIPY_ARRAY_API:
|
||||
pytest.skip("MArray is only available if SCIPY_ARRAY_API=1")
|
||||
marray = pytest.importorskip('marray')
|
||||
kwargs = dict(axis=axis, keepdims=keepdims, method=method)
|
||||
mxp = marray._get_namespace(xp)
|
||||
x_mp = mxp.asarray(x, mask=mask)
|
||||
res = stats.quantile(x_mp, mxp.asarray(p), **kwargs)
|
||||
ref = quantile_reference(x, p, nan_policy='omit', **kwargs)
|
||||
xp_assert_close(res.data, xp.asarray(ref, dtype=dtype))
|
||||
return
|
||||
|
||||
kwargs = dict(axis=axis, keepdims=keepdims,
|
||||
nan_policy=nan_policy, method=method)
|
||||
res = stats.quantile(xp.asarray(x), xp.asarray(p), **kwargs)
|
||||
ref = quantile_reference(x, p, **kwargs)
|
||||
xp_assert_close(res, xp.asarray(ref, dtype=dtype))
|
||||
|
||||
def test_integer_input_output_dtype(self, xp):
|
||||
res = stats.quantile(xp.arange(10, dtype=xp.int64), 0.5)
|
||||
assert res.dtype == xp_default_dtype(xp)
|
||||
|
||||
@pytest.mark.parametrize('x, p, ref, kwargs',
|
||||
[([], 0.5, np.nan, {}),
|
||||
([1, 2, 3], [-1, 0, 1, 1.5, np.nan], [np.nan, 1, 3, np.nan, np.nan], {}),
|
||||
([1, 2, 3], [], [], {}),
|
||||
([[np.nan, 2]], 0.5, [np.nan, 2], {'nan_policy': 'omit'}),
|
||||
([[], []], 0.5, np.full(2, np.nan), {'axis': -1}),
|
||||
([[], []], 0.5, np.zeros((0,)), {'axis': 0, 'keepdims': False}),
|
||||
([[], []], 0.5, np.zeros((1, 0)), {'axis': 0, 'keepdims': True}),
|
||||
([], [0.5, 0.6], np.full(2, np.nan), {}),
|
||||
(np.arange(1, 28).reshape((3, 3, 3)), 0.5, [[[14.]]],
|
||||
{'axis': None, 'keepdims': True}),
|
||||
([[1, 2], [3, 4]], [0.25, 0.5, 0.75], [[1.75, 2.5, 3.25]],
|
||||
{'axis': None, 'keepdims': True}),])
|
||||
def test_edge_cases(self, x, p, ref, kwargs, xp):
|
||||
default_dtype = xp_default_dtype(xp)
|
||||
x, p, ref = xp.asarray(x), xp.asarray(p), xp.asarray(ref, dtype=default_dtype)
|
||||
res = stats.quantile(x, p, **kwargs)
|
||||
xp_assert_equal(res, ref)
|
||||
|
||||
@pytest.mark.parametrize('axis', [0, 1, 2])
|
||||
@pytest.mark.parametrize('keepdims', [False, True])
|
||||
def test_size_0(self, axis, keepdims, xp):
|
||||
shape = [3, 4, 0]
|
||||
out_shape = shape.copy()
|
||||
if keepdims:
|
||||
out_shape[axis] = 1
|
||||
else:
|
||||
out_shape.pop(axis)
|
||||
res = stats.quantile(xp.zeros(tuple(shape)), 0.5, axis=axis, keepdims=keepdims)
|
||||
assert res.shape == tuple(out_shape)
|
||||
|
||||
@pytest.mark.parametrize('method',
|
||||
['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation'])
|
||||
def test_transition(self, method, xp):
|
||||
# test that values of discontinuous estimators are correct when
|
||||
# p*n + m - 1 is integral.
|
||||
if method == 'closest_observation' and np.__version__ < '2.0.1':
|
||||
pytest.skip('Bug in np.quantile (numpy/numpy#26656) fixed in 2.0.1')
|
||||
x = np.arange(8., dtype=np.float64)
|
||||
p = np.arange(0, 1.0625, 0.0625)
|
||||
res = stats.quantile(xp.asarray(x), xp.asarray(p), method=method)
|
||||
ref = np.quantile(x, p, method=method)
|
||||
xp_assert_equal(res, xp.asarray(ref, dtype=xp.float64))
|
||||
|
|
@ -1,345 +0,0 @@
|
|||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_array_equal
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from scipy.conftest import skip_xp_invalid_arg
|
||||
from scipy.stats import rankdata, tiecorrect
|
||||
from scipy._lib._array_api import xp_assert_equal, make_xp_test_case
|
||||
|
||||
class TestTieCorrect:
|
||||
|
||||
def test_empty(self):
|
||||
"""An empty array requires no correction, should return 1.0."""
|
||||
ranks = np.array([], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_one(self):
|
||||
"""A single element requires no correction, should return 1.0."""
|
||||
ranks = np.array([1.0], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_no_correction(self):
|
||||
"""Arrays with no ties require no correction."""
|
||||
ranks = np.arange(2.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
ranks = np.arange(3.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_basic(self):
|
||||
"""Check a few basic examples of the tie correction factor."""
|
||||
# One tie of two elements
|
||||
ranks = np.array([1.0, 2.5, 2.5])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of two elements (same as above, but tie is not at the end)
|
||||
ranks = np.array([1.5, 1.5, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of three elements
|
||||
ranks = np.array([1.0, 3.0, 3.0, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# Two ties, lengths 2 and 3.
|
||||
ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
|
||||
c = tiecorrect(ranks)
|
||||
T1 = 2.0
|
||||
T2 = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
def test_overflow(self):
|
||||
ntie, k = 2000, 5
|
||||
a = np.repeat(np.arange(k), ntie)
|
||||
n = a.size # ntie * k
|
||||
out = tiecorrect(rankdata(a))
|
||||
assert_equal(out, 1.0 - k * (ntie**3 - ntie) / float(n**3 - n))
|
||||
|
||||
|
||||
@make_xp_test_case(stats.rankdata)
|
||||
class TestRankData:
|
||||
|
||||
def desired_dtype(self, method='average', has_nans=False, *, xp):
|
||||
if has_nans:
|
||||
return xp.asarray(1.).dtype
|
||||
return xp.asarray(1.).dtype if method=='average' else xp.asarray(1).dtype
|
||||
|
||||
def test_empty(self, xp):
|
||||
"""stats.rankdata of empty array should return an empty array."""
|
||||
a = xp.asarray([], dtype=xp.int64)
|
||||
r = rankdata(a)
|
||||
xp_assert_equal(r, xp.asarray([], dtype=self.desired_dtype(xp=xp)))
|
||||
|
||||
def test_list(self):
|
||||
# test that NumPy still accepts lists
|
||||
r = rankdata([])
|
||||
assert_array_equal(r, np.array([]))
|
||||
|
||||
r = rankdata([40, 10, 30, 10, 50])
|
||||
assert_equal(r, [4.0, 1.5, 3.0, 1.5, 5.0])
|
||||
|
||||
@pytest.mark.parametrize("shape", [(0, 1, 2)])
|
||||
@pytest.mark.parametrize("axis", [None, *range(3)])
|
||||
def test_empty_multidim(self, shape, axis, xp):
|
||||
a = xp.empty(shape, dtype=xp.int64)
|
||||
r = rankdata(a, axis=axis)
|
||||
expected_shape = (0,) if axis is None else shape
|
||||
xp_assert_equal(r, xp.empty(expected_shape, dtype=self.desired_dtype(xp=xp)))
|
||||
|
||||
def test_one(self, xp):
|
||||
"""Check stats.rankdata with an array of length 1."""
|
||||
data = [100]
|
||||
a = xp.asarray(data, dtype=xp.int64)
|
||||
r = rankdata(a)
|
||||
xp_assert_equal(r, xp.asarray([1.0], dtype=self.desired_dtype(xp=xp)))
|
||||
|
||||
def test_basic(self, xp):
|
||||
"""Basic tests of stats.rankdata."""
|
||||
desired_dtype = self.desired_dtype(xp=xp)
|
||||
|
||||
data = [100, 10, 50]
|
||||
expected = xp.asarray([3.0, 1.0, 2.0], dtype=desired_dtype)
|
||||
a = xp.asarray(data, dtype=xp.int64)
|
||||
r = rankdata(a)
|
||||
xp_assert_equal(r, expected)
|
||||
|
||||
data = [40, 10, 30, 10, 50]
|
||||
expected = xp.asarray([4.0, 1.5, 3.0, 1.5, 5.0], dtype=desired_dtype)
|
||||
a = xp.asarray(data, dtype=xp.int64)
|
||||
r = rankdata(a)
|
||||
xp_assert_equal(r, expected)
|
||||
|
||||
data = [20, 20, 20, 10, 10, 10]
|
||||
expected = xp.asarray([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=desired_dtype)
|
||||
a = xp.asarray(data, dtype=xp.int64)
|
||||
r = rankdata(a)
|
||||
xp_assert_equal(r, expected)
|
||||
|
||||
# # The docstring states explicitly that the argument is flattened.
|
||||
a2d = xp.reshape(a, (2, 3))
|
||||
r = rankdata(a2d)
|
||||
xp_assert_equal(r, expected)
|
||||
|
||||
@skip_xp_invalid_arg
|
||||
def test_rankdata_object_string(self):
|
||||
|
||||
def min_rank(a):
|
||||
return [1 + sum(i < j for i in a) for j in a]
|
||||
|
||||
def max_rank(a):
|
||||
return [sum(i <= j for i in a) for j in a]
|
||||
|
||||
def ordinal_rank(a):
|
||||
return min_rank([(x, i) for i, x in enumerate(a)])
|
||||
|
||||
def average_rank(a):
|
||||
return [(i + j) / 2.0 for i, j in zip(min_rank(a), max_rank(a))]
|
||||
|
||||
def dense_rank(a):
|
||||
b = np.unique(a)
|
||||
return [1 + sum(i < j for i in b) for j in a]
|
||||
|
||||
rankf = dict(min=min_rank, max=max_rank, ordinal=ordinal_rank,
|
||||
average=average_rank, dense=dense_rank)
|
||||
|
||||
def check_ranks(a):
|
||||
for method in 'min', 'max', 'dense', 'ordinal', 'average':
|
||||
out = rankdata(a, method=method)
|
||||
assert_array_equal(out, rankf[method](a))
|
||||
|
||||
val = ['foo', 'bar', 'qux', 'xyz', 'abc', 'efg', 'ace', 'qwe', 'qaz']
|
||||
check_ranks(np.random.choice(val, 200))
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
val = np.array([0, 1, 2, 2.718, 3, 3.141], dtype='object')
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
def test_large_int(self, xp):
|
||||
if hasattr(xp, 'uint64'):
|
||||
data = xp.asarray([2**60, 2**60+1], dtype=xp.uint64)
|
||||
r = rankdata(data)
|
||||
xp_assert_equal(r, xp.asarray([1.0, 2.0], dtype=self.desired_dtype(xp=xp)))
|
||||
|
||||
data = xp.asarray([2**60, 2**60+1], dtype=xp.int64)
|
||||
r = rankdata(data)
|
||||
xp_assert_equal(r, xp.asarray([1.0, 2.0], dtype=self.desired_dtype(xp=xp)))
|
||||
|
||||
data = xp.asarray([2**60, -2**60+1], dtype=xp.int64)
|
||||
r = rankdata(data)
|
||||
xp_assert_equal(r, xp.asarray([2.0, 1.0], dtype=self.desired_dtype(xp=xp)))
|
||||
|
||||
@pytest.mark.parametrize('n', [10000, 100000, 1000000])
|
||||
def test_big_tie(self, n, xp):
|
||||
data = xp.ones(n)
|
||||
r = rankdata(data)
|
||||
expected_rank = 0.5 * (n + 1)
|
||||
ref = xp.asarray(expected_rank * data, dtype=self.desired_dtype(xp=xp))
|
||||
xp_assert_equal(r, ref)
|
||||
|
||||
def test_axis(self, xp):
|
||||
data = xp.asarray([[0, 2, 1], [4, 2, 2]])
|
||||
|
||||
expected0 = xp.asarray([[1., 1.5, 1.], [2., 1.5, 2.]])
|
||||
r0 = rankdata(data, axis=0)
|
||||
xp_assert_equal(r0, expected0)
|
||||
|
||||
expected1 = xp.asarray([[1., 3., 2.], [3., 1.5, 1.5]])
|
||||
r1 = rankdata(data, axis=1)
|
||||
xp_assert_equal(r1, expected1)
|
||||
|
||||
methods= ["average", "min", "max", "dense", "ordinal"]
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1])
|
||||
@pytest.mark.parametrize("method", methods)
|
||||
def test_size_0_axis(self, axis, method, xp):
|
||||
shape = (3, 0)
|
||||
desired_dtype = self.desired_dtype(method, xp=xp)
|
||||
data = xp.zeros(shape)
|
||||
r = rankdata(data, method=method, axis=axis)
|
||||
assert_equal(r.shape, shape)
|
||||
assert_equal(r.dtype, desired_dtype)
|
||||
xp_assert_equal(r, xp.empty(shape, dtype=desired_dtype))
|
||||
|
||||
@pytest.mark.parametrize('axis', range(3))
|
||||
@pytest.mark.parametrize('method', methods)
|
||||
def test_nan_policy_omit_3d(self, axis, method):
|
||||
shape = (20, 21, 22)
|
||||
rng = np.random.RandomState(23983242)
|
||||
|
||||
a = rng.random(size=shape)
|
||||
i = rng.random(size=shape) < 0.4
|
||||
j = rng.random(size=shape) < 0.1
|
||||
k = rng.random(size=shape) < 0.1
|
||||
a[i] = np.nan
|
||||
a[j] = -np.inf
|
||||
a[k] - np.inf
|
||||
|
||||
def rank_1d_omit(a, method):
|
||||
out = np.zeros_like(a)
|
||||
i = np.isnan(a)
|
||||
a_compressed = a[~i]
|
||||
res = rankdata(a_compressed, method)
|
||||
out[~i] = res
|
||||
out[i] = np.nan
|
||||
return out
|
||||
|
||||
def rank_omit(a, method, axis):
|
||||
return np.apply_along_axis(lambda a: rank_1d_omit(a, method),
|
||||
axis, a)
|
||||
|
||||
res = rankdata(a, method, axis=axis, nan_policy='omit')
|
||||
res0 = rank_omit(a, method, axis=axis)
|
||||
|
||||
assert_array_equal(res, res0)
|
||||
|
||||
def test_nan_policy_2d_axis_none(self):
|
||||
# 2 2d-array test with axis=None
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='omit'),
|
||||
[1., np.nan, 6., 7., 4., np.nan, 2., 4., 4.])
|
||||
assert_array_equal(rankdata(data, axis=None, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
|
||||
np.nan, np.nan, np.nan])
|
||||
|
||||
def test_nan_policy_raise(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, nan_policy='raise')
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[np.nan, 2, 2]]
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=0, nan_policy="raise")
|
||||
|
||||
with pytest.raises(ValueError, match="The input contains nan"):
|
||||
rankdata(data, axis=1, nan_policy="raise")
|
||||
|
||||
def test_nan_policy_propagate(self):
|
||||
# 1 1d-array test
|
||||
data = [0, 2, 3, -2, np.nan, np.nan]
|
||||
assert_array_equal(rankdata(data, nan_policy='propagate'),
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
|
||||
# 2 2d-array test
|
||||
data = [[0, np.nan, 3],
|
||||
[4, 2, np.nan],
|
||||
[1, 2, 2]]
|
||||
assert_array_equal(rankdata(data, axis=0, nan_policy='propagate'),
|
||||
[[1, np.nan, np.nan],
|
||||
[3, np.nan, np.nan],
|
||||
[2, np.nan, np.nan]])
|
||||
assert_array_equal(rankdata(data, axis=1, nan_policy='propagate'),
|
||||
[[np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan],
|
||||
[1, 2.5, 2.5]])
|
||||
|
||||
_rankdata_cases = (
|
||||
# values, method, expected
|
||||
([], 'average', []),
|
||||
([], 'min', []),
|
||||
([], 'max', []),
|
||||
([], 'dense', []),
|
||||
([], 'ordinal', []),
|
||||
#
|
||||
([100], 'average', [1.0]),
|
||||
([100], 'min', [1.0]),
|
||||
([100], 'max', [1.0]),
|
||||
([100], 'dense', [1.0]),
|
||||
([100], 'ordinal', [1.0]),
|
||||
#
|
||||
([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
|
||||
([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
|
||||
([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
|
||||
#
|
||||
([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
|
||||
#
|
||||
([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
|
||||
([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
|
||||
([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
|
||||
([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
|
||||
#
|
||||
([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
|
||||
([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
|
||||
#
|
||||
([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize('case', _rankdata_cases)
|
||||
def test_cases(self, case, xp):
|
||||
values, method, expected = case
|
||||
r = rankdata(xp.asarray(values), method=method)
|
||||
ref = xp.asarray(expected, dtype=self.desired_dtype(method, xp=xp))
|
||||
xp_assert_equal(r, ref)
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy.stats.contingency import relative_risk
|
||||
|
||||
|
||||
# Test just the calculation of the relative risk, including edge
|
||||
# cases that result in a relative risk of 0, inf or nan.
|
||||
@pytest.mark.parametrize(
|
||||
'exposed_cases, exposed_total, control_cases, control_total, expected_rr',
|
||||
[(1, 4, 3, 8, 0.25 / 0.375),
|
||||
(0, 10, 5, 20, 0),
|
||||
(0, 10, 0, 20, np.nan),
|
||||
(5, 15, 0, 20, np.inf)]
|
||||
)
|
||||
def test_relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total, expected_rr):
|
||||
result = relative_risk(exposed_cases, exposed_total,
|
||||
control_cases, control_total)
|
||||
assert_allclose(result.relative_risk, expected_rr, rtol=1e-13)
|
||||
|
||||
|
||||
def test_relative_risk_confidence_interval():
|
||||
result = relative_risk(exposed_cases=16, exposed_total=128,
|
||||
control_cases=24, control_total=256)
|
||||
rr = result.relative_risk
|
||||
ci = result.confidence_interval(confidence_level=0.95)
|
||||
# The corresponding calculation in R using the epitools package.
|
||||
#
|
||||
# > library(epitools)
|
||||
# > c <- matrix(c(232, 112, 24, 16), nrow=2)
|
||||
# > result <- riskratio(c)
|
||||
# > result$measure
|
||||
# risk ratio with 95% C.I.
|
||||
# Predictor estimate lower upper
|
||||
# Exposed1 1.000000 NA NA
|
||||
# Exposed2 1.333333 0.7347317 2.419628
|
||||
#
|
||||
# The last line is the result that we want.
|
||||
assert_allclose(rr, 4/3)
|
||||
assert_allclose((ci.low, ci.high), (0.7347317, 2.419628), rtol=5e-7)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel0():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
rr = result.relative_risk
|
||||
assert_allclose(rr, 2.0, rtol=1e-14)
|
||||
ci = result.confidence_interval(0)
|
||||
assert_allclose((ci.low, ci.high), (2.0, 2.0), rtol=1e-12)
|
||||
|
||||
|
||||
def test_relative_risk_ci_conflevel1():
|
||||
result = relative_risk(exposed_cases=4, exposed_total=12,
|
||||
control_cases=5, control_total=30)
|
||||
ci = result.confidence_interval(1)
|
||||
assert_equal((ci.low, ci.high), (0, np.inf))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_00():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.nan)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_01():
|
||||
result = relative_risk(exposed_cases=0, exposed_total=12,
|
||||
control_cases=1, control_total=30)
|
||||
assert_equal(result.relative_risk, 0)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (0.0, np.nan))
|
||||
|
||||
|
||||
def test_relative_risk_ci_edge_cases_10():
|
||||
result = relative_risk(exposed_cases=1, exposed_total=12,
|
||||
control_cases=0, control_total=30)
|
||||
assert_equal(result.relative_risk, np.inf)
|
||||
ci = result.confidence_interval()
|
||||
assert_equal((ci.low, ci.high), (np.nan, np.inf))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ec, et, cc, ct', [(0, 0, 10, 20),
|
||||
(-1, 10, 1, 5),
|
||||
(1, 10, 0, 0),
|
||||
(1, 10, -1, 4)])
|
||||
def test_relative_risk_bad_value(ec, et, cc, ct):
|
||||
with pytest.raises(ValueError, match="must be an integer not less than"):
|
||||
relative_risk(ec, et, cc, ct)
|
||||
|
||||
|
||||
def test_relative_risk_bad_type():
|
||||
with pytest.raises(TypeError, match="must be an integer"):
|
||||
relative_risk(1, 10, 2.0, 40)
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,310 +0,0 @@
|
|||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_array_less
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats import sobol_indices
|
||||
from scipy.stats._resampling import BootstrapResult
|
||||
from scipy.stats._sensitivity_analysis import (
|
||||
BootstrapSobolResult, f_ishigami, sample_AB, sample_A_B
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def ishigami_ref_indices():
|
||||
"""Reference values for Ishigami from Saltelli2007.
|
||||
|
||||
Chapter 4, exercise 5 pages 179-182.
|
||||
"""
|
||||
a = 7.
|
||||
b = 0.1
|
||||
|
||||
var = 0.5 + a**2/8 + b*np.pi**4/5 + b**2*np.pi**8/18
|
||||
v1 = 0.5 + b*np.pi**4/5 + b**2*np.pi**8/50
|
||||
v2 = a**2/8
|
||||
v3 = 0
|
||||
v12 = 0
|
||||
# v13: mistake in the book, see other derivations e.g. in 10.1002/nme.4856
|
||||
v13 = b**2*np.pi**8*8/225
|
||||
v23 = 0
|
||||
|
||||
s_first = np.array([v1, v2, v3])/var
|
||||
s_second = np.array([
|
||||
[0., 0., v13],
|
||||
[v12, 0., v23],
|
||||
[v13, v23, 0.]
|
||||
])/var
|
||||
s_total = s_first + s_second.sum(axis=1)
|
||||
|
||||
return s_first, s_total
|
||||
|
||||
|
||||
def f_ishigami_vec(x):
|
||||
"""Output of shape (2, n)."""
|
||||
res = f_ishigami(x)
|
||||
return res, res
|
||||
|
||||
|
||||
class TestSobolIndices:
|
||||
|
||||
dists = [
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi) # type: ignore[attr-defined]
|
||||
] * 3
|
||||
|
||||
def test_sample_AB(self):
|
||||
# (d, n)
|
||||
A = np.array(
|
||||
[[1, 4, 7, 10],
|
||||
[2, 5, 8, 11],
|
||||
[3, 6, 9, 12]]
|
||||
)
|
||||
B = A + 100
|
||||
# (d, d, n)
|
||||
ref = np.array(
|
||||
[[[101, 104, 107, 110],
|
||||
[2, 5, 8, 11],
|
||||
[3, 6, 9, 12]],
|
||||
[[1, 4, 7, 10],
|
||||
[102, 105, 108, 111],
|
||||
[3, 6, 9, 12]],
|
||||
[[1, 4, 7, 10],
|
||||
[2, 5, 8, 11],
|
||||
[103, 106, 109, 112]]]
|
||||
)
|
||||
AB = sample_AB(A=A, B=B)
|
||||
assert_allclose(AB, ref)
|
||||
|
||||
@pytest.mark.xslow
|
||||
@pytest.mark.xfail_on_32bit("Can't create large array for test")
|
||||
@pytest.mark.parametrize(
|
||||
'func',
|
||||
[f_ishigami, pytest.param(f_ishigami_vec, marks=pytest.mark.slow)],
|
||||
ids=['scalar', 'vector']
|
||||
)
|
||||
def test_ishigami(self, ishigami_ref_indices, func):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=func, n=4096,
|
||||
dists=self.dists,
|
||||
rng=rng
|
||||
)
|
||||
|
||||
if func.__name__ == 'f_ishigami_vec':
|
||||
ishigami_ref_indices = [
|
||||
[ishigami_ref_indices[0], ishigami_ref_indices[0]],
|
||||
[ishigami_ref_indices[1], ishigami_ref_indices[1]]
|
||||
]
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-2)
|
||||
|
||||
assert res._bootstrap_result is None
|
||||
bootstrap_res = res.bootstrap(n_resamples=99)
|
||||
assert isinstance(bootstrap_res, BootstrapSobolResult)
|
||||
assert isinstance(res._bootstrap_result, BootstrapResult)
|
||||
|
||||
assert res._bootstrap_result.confidence_interval.low.shape[0] == 2
|
||||
assert res._bootstrap_result.confidence_interval.low[1].shape \
|
||||
== res.first_order.shape
|
||||
|
||||
assert bootstrap_res.first_order.confidence_interval.low.shape \
|
||||
== res.first_order.shape
|
||||
assert bootstrap_res.total_order.confidence_interval.low.shape \
|
||||
== res.total_order.shape
|
||||
|
||||
assert_array_less(
|
||||
bootstrap_res.first_order.confidence_interval.low, res.first_order
|
||||
)
|
||||
assert_array_less(
|
||||
res.first_order, bootstrap_res.first_order.confidence_interval.high
|
||||
)
|
||||
assert_array_less(
|
||||
bootstrap_res.total_order.confidence_interval.low, res.total_order
|
||||
)
|
||||
assert_array_less(
|
||||
res.total_order, bootstrap_res.total_order.confidence_interval.high
|
||||
)
|
||||
|
||||
# call again to use previous results and change a param
|
||||
assert isinstance(
|
||||
res.bootstrap(confidence_level=0.9, n_resamples=99),
|
||||
BootstrapSobolResult
|
||||
)
|
||||
assert isinstance(res._bootstrap_result, BootstrapResult)
|
||||
|
||||
def test_func_dict(self, ishigami_ref_indices):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
n = 4096
|
||||
dists = [
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi),
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi),
|
||||
stats.uniform(loc=-np.pi, scale=2*np.pi)
|
||||
]
|
||||
|
||||
A, B = sample_A_B(n=n, dists=dists, rng=rng)
|
||||
AB = sample_AB(A=A, B=B)
|
||||
|
||||
func = {
|
||||
'f_A': f_ishigami(A).reshape(1, -1),
|
||||
'f_B': f_ishigami(B).reshape(1, -1),
|
||||
'f_AB': f_ishigami(AB).reshape((3, 1, -1))
|
||||
}
|
||||
|
||||
# preserve use of old random_state during SPEC 7 transition
|
||||
res = sobol_indices(
|
||||
func=func, n=n,
|
||||
dists=dists,
|
||||
rng=rng
|
||||
)
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
|
||||
res = sobol_indices(
|
||||
func=func, n=n,
|
||||
rng=rng
|
||||
)
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
# Ideally should be exactly equal but since f_ishigami
|
||||
# uses floating point operations, so exact equality
|
||||
# might not be possible (due to flakiness in computation).
|
||||
# So, assert_allclose is used with default parameters
|
||||
# Regression test for https://github.com/scipy/scipy/issues/21383
|
||||
assert_allclose(f_ishigami(A).reshape(1, -1), func['f_A'])
|
||||
assert_allclose(f_ishigami(B).reshape(1, -1), func['f_B'])
|
||||
assert_allclose(f_ishigami(AB).reshape((3, 1, -1)), func['f_AB'])
|
||||
|
||||
def test_method(self, ishigami_ref_indices):
|
||||
def jansen_sobol(f_A, f_B, f_AB):
|
||||
"""Jansen for S and Sobol' for St.
|
||||
|
||||
From Saltelli2010, table 2 formulations (c) and (e)."""
|
||||
var = np.var([f_A, f_B], axis=(0, -1))
|
||||
|
||||
s = (var - 0.5*np.mean((f_B - f_AB)**2, axis=-1)) / var
|
||||
st = np.mean(f_A*(f_A - f_AB), axis=-1) / var
|
||||
|
||||
return s.T, st.T
|
||||
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=f_ishigami, n=4096,
|
||||
dists=self.dists,
|
||||
method=jansen_sobol,
|
||||
rng=rng
|
||||
)
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-2)
|
||||
|
||||
def jansen_sobol_typed(
|
||||
f_A: np.ndarray, f_B: np.ndarray, f_AB: np.ndarray
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
return jansen_sobol(f_A, f_B, f_AB)
|
||||
|
||||
_ = sobol_indices(
|
||||
func=f_ishigami, n=8,
|
||||
dists=self.dists,
|
||||
method=jansen_sobol_typed,
|
||||
rng=rng
|
||||
)
|
||||
|
||||
def test_normalization(self, ishigami_ref_indices):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=lambda x: f_ishigami(x) + 1000, n=4096,
|
||||
dists=self.dists,
|
||||
rng=rng
|
||||
)
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-2)
|
||||
|
||||
def test_constant_function(self, ishigami_ref_indices):
|
||||
|
||||
def f_ishigami_vec_const(x):
|
||||
"""Output of shape (3, n)."""
|
||||
res = f_ishigami(x)
|
||||
return res, res * 0 + 10, res
|
||||
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=f_ishigami_vec_const, n=4096,
|
||||
dists=self.dists,
|
||||
rng=rng
|
||||
)
|
||||
|
||||
ishigami_vec_indices = [
|
||||
[ishigami_ref_indices[0], [0, 0, 0], ishigami_ref_indices[0]],
|
||||
[ishigami_ref_indices[1], [0, 0, 0], ishigami_ref_indices[1]]
|
||||
]
|
||||
|
||||
assert_allclose(res.first_order, ishigami_vec_indices[0], atol=1e-2)
|
||||
assert_allclose(res.total_order, ishigami_vec_indices[1], atol=1e-2)
|
||||
|
||||
@pytest.mark.xfail_on_32bit("Can't create large array for test")
|
||||
def test_more_converged(self, ishigami_ref_indices):
|
||||
rng = np.random.default_rng(28631265345463262246170309650372465332)
|
||||
res = sobol_indices(
|
||||
func=f_ishigami, n=2**19, # 524288
|
||||
dists=self.dists,
|
||||
rng=rng
|
||||
)
|
||||
|
||||
assert_allclose(res.first_order, ishigami_ref_indices[0], atol=1e-4)
|
||||
assert_allclose(res.total_order, ishigami_ref_indices[1], atol=1e-4)
|
||||
|
||||
def test_raises(self):
|
||||
|
||||
message = r"Each distribution in `dists` must have method `ppf`"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, dists="uniform")
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, dists=[lambda x: x])
|
||||
|
||||
message = r"The balance properties of Sobol'"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=7, func=f_ishigami, dists=[stats.uniform()])
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=4.1, func=f_ishigami, dists=[stats.uniform()])
|
||||
|
||||
message = r"'toto' is not a valid 'method'"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, method='toto')
|
||||
|
||||
message = r"must have the following signature"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami, method=lambda x: x)
|
||||
|
||||
message = r"'dists' must be defined when 'func' is a callable"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(n=0, func=f_ishigami)
|
||||
|
||||
def func_wrong_shape_output(x):
|
||||
return x.reshape(-1, 1)
|
||||
|
||||
message = r"'func' output should have a shape"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(
|
||||
n=2, func=func_wrong_shape_output, dists=[stats.uniform()]
|
||||
)
|
||||
|
||||
message = r"When 'func' is a dictionary"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
sobol_indices(
|
||||
n=2, func={'f_A': [], 'f_AB': []}, dists=[stats.uniform()]
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
# f_B malformed
|
||||
sobol_indices(
|
||||
n=2,
|
||||
func={'f_A': [1, 2], 'f_B': [3], 'f_AB': [5, 6, 7, 8]},
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
# f_AB malformed
|
||||
sobol_indices(
|
||||
n=2,
|
||||
func={'f_A': [1, 2], 'f_B': [3, 4], 'f_AB': [5, 6, 7]},
|
||||
)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,466 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
from scipy import stats
|
||||
from scipy.stats import _survival
|
||||
|
||||
|
||||
def _kaplan_meier_reference(times, censored):
|
||||
# This is a very straightforward implementation of the Kaplan-Meier
|
||||
# estimator that does almost everything differently from the implementation
|
||||
# in stats.ecdf.
|
||||
|
||||
# Begin by sorting the raw data. Note that the order of death and loss
|
||||
# at a given time matters: death happens first. See [2] page 461:
|
||||
# "These conventions may be paraphrased by saying that deaths recorded as
|
||||
# of an age t are treated as if they occurred slightly before t, and losses
|
||||
# recorded as of an age t are treated as occurring slightly after t."
|
||||
# We implement this by sorting the data first by time, then by `censored`,
|
||||
# (which is 0 when there is a death and 1 when there is only a loss).
|
||||
dtype = [('time', float), ('censored', int)]
|
||||
data = np.array([(t, d) for t, d in zip(times, censored)], dtype=dtype)
|
||||
data = np.sort(data, order=('time', 'censored'))
|
||||
times = data['time']
|
||||
died = np.logical_not(data['censored'])
|
||||
|
||||
m = times.size
|
||||
n = np.arange(m, 0, -1) # number at risk
|
||||
sf = np.cumprod((n - died) / n)
|
||||
|
||||
# Find the indices of the *last* occurrence of unique times. The
|
||||
# corresponding entries of `times` and `sf` are what we want.
|
||||
_, indices = np.unique(times[::-1], return_index=True)
|
||||
ref_times = times[-indices - 1]
|
||||
ref_sf = sf[-indices - 1]
|
||||
return ref_times, ref_sf
|
||||
|
||||
|
||||
class TestSurvival:
|
||||
|
||||
@staticmethod
|
||||
def get_random_sample(rng, n_unique):
|
||||
# generate random sample
|
||||
unique_times = rng.random(n_unique)
|
||||
# convert to `np.int32` to resolve `np.repeat` failure in 32-bit CI
|
||||
repeats = rng.integers(1, 4, n_unique).astype(np.int32)
|
||||
times = rng.permuted(np.repeat(unique_times, repeats))
|
||||
censored = rng.random(size=times.size) > rng.random()
|
||||
sample = stats.CensoredData.right_censored(times, censored)
|
||||
return sample, times, censored
|
||||
|
||||
def test_input_validation(self):
|
||||
message = '`sample` must be a one-dimensional sequence.'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.ecdf([[1]])
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.ecdf(1)
|
||||
|
||||
message = '`sample` must not contain nan'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
stats.ecdf([np.nan])
|
||||
|
||||
message = 'Currently, only uncensored and right-censored data...'
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
stats.ecdf(stats.CensoredData.left_censored([1], censored=[True]))
|
||||
|
||||
message = 'method` must be one of...'
|
||||
res = stats.ecdf([1, 2, 3])
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.cdf.confidence_interval(method='ekki-ekki')
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.sf.confidence_interval(method='shrubbery')
|
||||
|
||||
message = 'confidence_level` must be a scalar between 0 and 1'
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.cdf.confidence_interval(-1)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
res.sf.confidence_interval([0.5, 0.6])
|
||||
|
||||
message = 'The confidence interval is undefined at some observations.'
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.cdf.confidence_interval()
|
||||
|
||||
message = 'Confidence interval bounds do not implement...'
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
ci.low.confidence_interval()
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
ci.high.confidence_interval()
|
||||
|
||||
def test_edge_cases(self):
|
||||
res = stats.ecdf([])
|
||||
assert_equal(res.cdf.quantiles, [])
|
||||
assert_equal(res.cdf.probabilities, [])
|
||||
|
||||
res = stats.ecdf([1])
|
||||
assert_equal(res.cdf.quantiles, [1])
|
||||
assert_equal(res.cdf.probabilities, [1])
|
||||
|
||||
def test_unique(self):
|
||||
# Example with unique observations; `stats.ecdf` ref. [1] page 80
|
||||
sample = [6.23, 5.58, 7.06, 6.42, 5.20]
|
||||
res = stats.ecdf(sample)
|
||||
ref_x = np.sort(np.unique(sample))
|
||||
ref_cdf = np.arange(1, 6) / 5
|
||||
ref_sf = 1 - ref_cdf
|
||||
assert_equal(res.cdf.quantiles, ref_x)
|
||||
assert_equal(res.cdf.probabilities, ref_cdf)
|
||||
assert_equal(res.sf.quantiles, ref_x)
|
||||
assert_equal(res.sf.probabilities, ref_sf)
|
||||
|
||||
def test_nonunique(self):
|
||||
# Example with non-unique observations; `stats.ecdf` ref. [1] page 82
|
||||
sample = [0, 2, 1, 2, 3, 4]
|
||||
res = stats.ecdf(sample)
|
||||
ref_x = np.sort(np.unique(sample))
|
||||
ref_cdf = np.array([1/6, 2/6, 4/6, 5/6, 1])
|
||||
ref_sf = 1 - ref_cdf
|
||||
assert_equal(res.cdf.quantiles, ref_x)
|
||||
assert_equal(res.cdf.probabilities, ref_cdf)
|
||||
assert_equal(res.sf.quantiles, ref_x)
|
||||
assert_equal(res.sf.probabilities, ref_sf)
|
||||
|
||||
def test_evaluate_methods(self):
|
||||
# Test CDF and SF `evaluate` methods
|
||||
rng = np.random.default_rng(1162729143302572461)
|
||||
sample, _, _ = self.get_random_sample(rng, 15)
|
||||
res = stats.ecdf(sample)
|
||||
x = res.cdf.quantiles
|
||||
xr = x + np.diff(x, append=x[-1]+1)/2 # right shifted points
|
||||
|
||||
assert_equal(res.cdf.evaluate(x), res.cdf.probabilities)
|
||||
assert_equal(res.cdf.evaluate(xr), res.cdf.probabilities)
|
||||
assert_equal(res.cdf.evaluate(x[0]-1), 0) # CDF starts at 0
|
||||
assert_equal(res.cdf.evaluate([-np.inf, np.inf]), [0, 1])
|
||||
|
||||
assert_equal(res.sf.evaluate(x), res.sf.probabilities)
|
||||
assert_equal(res.sf.evaluate(xr), res.sf.probabilities)
|
||||
assert_equal(res.sf.evaluate(x[0]-1), 1) # SF starts at 1
|
||||
assert_equal(res.sf.evaluate([-np.inf, np.inf]), [1, 0])
|
||||
|
||||
# ref. [1] page 91
|
||||
t1 = [37, 43, 47, 56, 60, 62, 71, 77, 80, 81] # times
|
||||
d1 = [0, 0, 1, 1, 0, 0, 0, 1, 1, 1] # 1 means deaths (not censored)
|
||||
r1 = [1, 1, 0.875, 0.75, 0.75, 0.75, 0.75, 0.5, 0.25, 0] # reference SF
|
||||
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
|
||||
t2 = [8, 12, 26, 14, 21, 27, 8, 32, 20, 40]
|
||||
d2 = [1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
|
||||
r2 = [0.9, 0.788, 0.675, 0.675, 0.54, 0.405, 0.27, 0.27, 0.27]
|
||||
t3 = [33, 28, 41, 48, 48, 25, 37, 48, 25, 43]
|
||||
d3 = [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
|
||||
r3 = [1, 0.875, 0.75, 0.75, 0.6, 0.6, 0.6]
|
||||
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/bs704_survival4.html
|
||||
t4 = [24, 3, 11, 19, 24, 13, 14, 2, 18, 17,
|
||||
24, 21, 12, 1, 10, 23, 6, 5, 9, 17]
|
||||
d4 = [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1]
|
||||
r4 = [0.95, 0.95, 0.897, 0.844, 0.844, 0.844, 0.844, 0.844, 0.844,
|
||||
0.844, 0.76, 0.676, 0.676, 0.676, 0.676, 0.507, 0.507]
|
||||
|
||||
# https://www.real-statistics.com/survival-analysis/kaplan-meier-procedure/confidence-interval-for-the-survival-function/
|
||||
t5 = [3, 5, 8, 10, 5, 5, 8, 12, 15, 14, 2, 11, 10, 9, 12, 5, 8, 11]
|
||||
d5 = [1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1]
|
||||
r5 = [0.944, 0.889, 0.722, 0.542, 0.542, 0.542, 0.361, 0.181, 0.181, 0.181]
|
||||
|
||||
@pytest.mark.parametrize("case", [(t1, d1, r1), (t2, d2, r2), (t3, d3, r3),
|
||||
(t4, d4, r4), (t5, d5, r5)])
|
||||
def test_right_censored_against_examples(self, case):
|
||||
# test `ecdf` against other implementations on example problems
|
||||
times, died, ref = case
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
assert_allclose(res.sf.probabilities, ref, atol=1e-3)
|
||||
assert_equal(res.sf.quantiles, np.sort(np.unique(times)))
|
||||
|
||||
# test reference implementation against other implementations
|
||||
res = _kaplan_meier_reference(times, np.logical_not(died))
|
||||
assert_equal(res[0], np.sort(np.unique(times)))
|
||||
assert_allclose(res[1], ref, atol=1e-3)
|
||||
|
||||
@pytest.mark.parametrize('seed', [182746786639392128, 737379171436494115,
|
||||
576033618403180168, 308115465002673650])
|
||||
def test_right_censored_against_reference_implementation(self, seed):
|
||||
# test `ecdf` against reference implementation on random problems
|
||||
rng = np.random.default_rng(seed)
|
||||
n_unique = rng.integers(10, 100)
|
||||
sample, times, censored = self.get_random_sample(rng, n_unique)
|
||||
res = stats.ecdf(sample)
|
||||
ref = _kaplan_meier_reference(times, censored)
|
||||
assert_allclose(res.sf.quantiles, ref[0])
|
||||
assert_allclose(res.sf.probabilities, ref[1])
|
||||
|
||||
# If all observations are uncensored, the KM estimate should match
|
||||
# the usual estimate for uncensored data
|
||||
sample = stats.CensoredData(uncensored=times)
|
||||
res = _survival._ecdf_right_censored(sample) # force Kaplan-Meier
|
||||
ref = stats.ecdf(times)
|
||||
assert_equal(res[0], ref.sf.quantiles)
|
||||
assert_allclose(res[1], ref.cdf.probabilities, rtol=1e-14)
|
||||
assert_allclose(res[2], ref.sf.probabilities, rtol=1e-14)
|
||||
|
||||
def test_right_censored_ci(self):
|
||||
# test "greenwood" confidence interval against example 4 (URL above).
|
||||
times, died = self.t4, self.d4
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
ref_allowance = [0.096, 0.096, 0.135, 0.162, 0.162, 0.162, 0.162,
|
||||
0.162, 0.162, 0.162, 0.214, 0.246, 0.246, 0.246,
|
||||
0.246, 0.341, 0.341]
|
||||
|
||||
sf_ci = res.sf.confidence_interval()
|
||||
cdf_ci = res.cdf.confidence_interval()
|
||||
allowance = res.sf.probabilities - sf_ci.low.probabilities
|
||||
|
||||
assert_allclose(allowance, ref_allowance, atol=1e-3)
|
||||
assert_allclose(sf_ci.low.probabilities,
|
||||
np.clip(res.sf.probabilities - allowance, 0, 1))
|
||||
assert_allclose(sf_ci.high.probabilities,
|
||||
np.clip(res.sf.probabilities + allowance, 0, 1))
|
||||
assert_allclose(cdf_ci.low.probabilities,
|
||||
np.clip(res.cdf.probabilities - allowance, 0, 1))
|
||||
assert_allclose(cdf_ci.high.probabilities,
|
||||
np.clip(res.cdf.probabilities + allowance, 0, 1))
|
||||
|
||||
# test "log-log" confidence interval against Mathematica
|
||||
# e = {24, 3, 11, 19, 24, 13, 14, 2, 18, 17, 24, 21, 12, 1, 10, 23, 6, 5,
|
||||
# 9, 17}
|
||||
# ci = {1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0}
|
||||
# R = EventData[e, ci]
|
||||
# S = SurvivalModelFit[R]
|
||||
# S["PointwiseIntervals", ConfidenceLevel->0.95,
|
||||
# ConfidenceTransform->"LogLog"]
|
||||
|
||||
ref_low = [0.694743, 0.694743, 0.647529, 0.591142, 0.591142, 0.591142,
|
||||
0.591142, 0.591142, 0.591142, 0.591142, 0.464605, 0.370359,
|
||||
0.370359, 0.370359, 0.370359, 0.160489, 0.160489]
|
||||
ref_high = [0.992802, 0.992802, 0.973299, 0.947073, 0.947073, 0.947073,
|
||||
0.947073, 0.947073, 0.947073, 0.947073, 0.906422, 0.856521,
|
||||
0.856521, 0.856521, 0.856521, 0.776724, 0.776724]
|
||||
sf_ci = res.sf.confidence_interval(method='log-log')
|
||||
assert_allclose(sf_ci.low.probabilities, ref_low, atol=1e-6)
|
||||
assert_allclose(sf_ci.high.probabilities, ref_high, atol=1e-6)
|
||||
|
||||
def test_right_censored_ci_example_5(self):
|
||||
# test "exponential greenwood" confidence interval against example 5
|
||||
times, died = self.t5, self.d5
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
lower = np.array([0.66639, 0.624174, 0.456179, 0.287822, 0.287822,
|
||||
0.287822, 0.128489, 0.030957, 0.030957, 0.030957])
|
||||
upper = np.array([0.991983, 0.970995, 0.87378, 0.739467, 0.739467,
|
||||
0.739467, 0.603133, 0.430365, 0.430365, 0.430365])
|
||||
|
||||
sf_ci = res.sf.confidence_interval(method='log-log')
|
||||
cdf_ci = res.cdf.confidence_interval(method='log-log')
|
||||
|
||||
assert_allclose(sf_ci.low.probabilities, lower, atol=1e-5)
|
||||
assert_allclose(sf_ci.high.probabilities, upper, atol=1e-5)
|
||||
assert_allclose(cdf_ci.low.probabilities, 1-upper, atol=1e-5)
|
||||
assert_allclose(cdf_ci.high.probabilities, 1-lower, atol=1e-5)
|
||||
|
||||
# Test against R's `survival` library `survfit` function, 90%CI
|
||||
# library(survival)
|
||||
# options(digits=16)
|
||||
# time = c(3, 5, 8, 10, 5, 5, 8, 12, 15, 14, 2, 11, 10, 9, 12, 5, 8, 11)
|
||||
# status = c(1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1)
|
||||
# res = survfit(Surv(time, status)
|
||||
# ~1, conf.type = "log-log", conf.int = 0.90)
|
||||
# res$time; res$lower; res$upper
|
||||
low = [0.74366748406861172, 0.68582332289196246, 0.50596835651480121,
|
||||
0.32913131413336727, 0.32913131413336727, 0.32913131413336727,
|
||||
0.15986912028781664, 0.04499539918147757, 0.04499539918147757,
|
||||
0.04499539918147757]
|
||||
high = [0.9890291867238429, 0.9638835422144144, 0.8560366823086629,
|
||||
0.7130167643978450, 0.7130167643978450, 0.7130167643978450,
|
||||
0.5678602982997164, 0.3887616766886558, 0.3887616766886558,
|
||||
0.3887616766886558]
|
||||
sf_ci = res.sf.confidence_interval(method='log-log',
|
||||
confidence_level=0.9)
|
||||
assert_allclose(sf_ci.low.probabilities, low)
|
||||
assert_allclose(sf_ci.high.probabilities, high)
|
||||
|
||||
# And with conf.type = "plain"
|
||||
low = [0.8556383113628162, 0.7670478794850761, 0.5485720663578469,
|
||||
0.3441515412527123, 0.3441515412527123, 0.3441515412527123,
|
||||
0.1449184105424544, 0., 0., 0.]
|
||||
high = [1., 1., 0.8958723780865975, 0.7391817920806210,
|
||||
0.7391817920806210, 0.7391817920806210, 0.5773038116797676,
|
||||
0.3642270254596720, 0.3642270254596720, 0.3642270254596720]
|
||||
sf_ci = res.sf.confidence_interval(confidence_level=0.9)
|
||||
assert_allclose(sf_ci.low.probabilities, low)
|
||||
assert_allclose(sf_ci.high.probabilities, high)
|
||||
|
||||
def test_right_censored_ci_nans(self):
|
||||
# test `ecdf` confidence interval on a problem that results in NaNs
|
||||
times, died = self.t1, self.d1
|
||||
sample = stats.CensoredData.right_censored(times, np.logical_not(died))
|
||||
res = stats.ecdf(sample)
|
||||
|
||||
# Reference values generated with Matlab
|
||||
# format long
|
||||
# t = [37 43 47 56 60 62 71 77 80 81];
|
||||
# d = [0 0 1 1 0 0 0 1 1 1];
|
||||
# censored = ~d1;
|
||||
# [f, x, flo, fup] = ecdf(t, 'Censoring', censored, 'Alpha', 0.05);
|
||||
x = [37, 47, 56, 77, 80, 81]
|
||||
flo = [np.nan, 0, 0, 0.052701464070711, 0.337611126231790, np.nan]
|
||||
fup = [np.nan, 0.35417230377, 0.5500569798, 0.9472985359, 1.0, np.nan]
|
||||
i = np.searchsorted(res.cdf.quantiles, x)
|
||||
|
||||
message = "The confidence interval is undefined at some observations"
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.cdf.confidence_interval()
|
||||
|
||||
# Matlab gives NaN as the first element of the CIs. Mathematica agrees,
|
||||
# but R's survfit does not. It makes some sense, but it's not what the
|
||||
# formula gives, so skip that element.
|
||||
assert_allclose(ci.low.probabilities[i][1:], flo[1:])
|
||||
assert_allclose(ci.high.probabilities[i][1:], fup[1:])
|
||||
|
||||
# [f, x, flo, fup] = ecdf(t, 'Censoring', censored, 'Function',
|
||||
# 'survivor', 'Alpha', 0.05);
|
||||
flo = [np.nan, 0.64582769623, 0.449943020228, 0.05270146407, 0, np.nan]
|
||||
fup = [np.nan, 1.0, 1.0, 0.947298535929289, 0.662388873768210, np.nan]
|
||||
i = np.searchsorted(res.cdf.quantiles, x)
|
||||
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.sf.confidence_interval()
|
||||
|
||||
assert_allclose(ci.low.probabilities[i][1:], flo[1:])
|
||||
assert_allclose(ci.high.probabilities[i][1:], fup[1:])
|
||||
|
||||
# With the same data, R's `survival` library `survfit` function
|
||||
# doesn't produce the leading NaN
|
||||
# library(survival)
|
||||
# options(digits=16)
|
||||
# time = c(37, 43, 47, 56, 60, 62, 71, 77, 80, 81)
|
||||
# status = c(0, 0, 1, 1, 0, 0, 0, 1, 1, 1)
|
||||
# res = survfit(Surv(time, status)
|
||||
# ~1, conf.type = "plain", conf.int = 0.95)
|
||||
# res$time
|
||||
# res$lower
|
||||
# res$upper
|
||||
low = [1., 1., 0.64582769623233816, 0.44994302022779326,
|
||||
0.44994302022779326, 0.44994302022779326, 0.44994302022779326,
|
||||
0.05270146407071086, 0., np.nan]
|
||||
high = [1., 1., 1., 1., 1., 1., 1., 0.9472985359292891,
|
||||
0.6623888737682101, np.nan]
|
||||
assert_allclose(ci.low.probabilities, low)
|
||||
assert_allclose(ci.high.probabilities, high)
|
||||
|
||||
# It does with conf.type="log-log", as do we
|
||||
with pytest.warns(RuntimeWarning, match=message):
|
||||
ci = res.sf.confidence_interval(method='log-log')
|
||||
low = [np.nan, np.nan, 0.38700001403202522, 0.31480711370551911,
|
||||
0.31480711370551911, 0.31480711370551911, 0.31480711370551911,
|
||||
0.08048821148507734, 0.01049958986680601, np.nan]
|
||||
high = [np.nan, np.nan, 0.9813929658789660, 0.9308983170906275,
|
||||
0.9308983170906275, 0.9308983170906275, 0.9308983170906275,
|
||||
0.8263946341076415, 0.6558775085110887, np.nan]
|
||||
assert_allclose(ci.low.probabilities, low)
|
||||
assert_allclose(ci.high.probabilities, high)
|
||||
|
||||
def test_right_censored_against_uncensored(self):
|
||||
rng = np.random.default_rng(7463952748044886637)
|
||||
sample = rng.integers(10, 100, size=1000)
|
||||
censored = np.zeros_like(sample)
|
||||
censored[np.argmax(sample)] = True
|
||||
res = stats.ecdf(sample)
|
||||
ref = stats.ecdf(stats.CensoredData.right_censored(sample, censored))
|
||||
assert_equal(res.sf.quantiles, ref.sf.quantiles)
|
||||
assert_equal(res.sf._n, ref.sf._n)
|
||||
assert_equal(res.sf._d[:-1], ref.sf._d[:-1]) # difference @ [-1]
|
||||
assert_allclose(res.sf._sf[:-1], ref.sf._sf[:-1], rtol=1e-14)
|
||||
|
||||
def test_plot_iv(self):
|
||||
rng = np.random.default_rng(1769658657308472721)
|
||||
n_unique = rng.integers(10, 100)
|
||||
sample, _, _ = self.get_random_sample(rng, n_unique)
|
||||
res = stats.ecdf(sample)
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt # noqa: F401
|
||||
res.sf.plot() # no other errors occur
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
message = r"matplotlib must be installed to use method `plot`."
|
||||
with pytest.raises(ModuleNotFoundError, match=message):
|
||||
res.sf.plot()
|
||||
|
||||
|
||||
class TestLogRank:
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"x, y, statistic, pvalue",
|
||||
# Results validate with R
|
||||
# library(survival)
|
||||
# options(digits=16)
|
||||
#
|
||||
# futime_1 <- c(8, 12, 26, 14, 21, 27, 8, 32, 20, 40)
|
||||
# fustat_1 <- c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
|
||||
# rx_1 <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
#
|
||||
# futime_2 <- c(33, 28, 41, 48, 48, 25, 37, 48, 25, 43)
|
||||
# fustat_2 <- c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
|
||||
# rx_2 <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
|
||||
#
|
||||
# futime <- c(futime_1, futime_2)
|
||||
# fustat <- c(fustat_1, fustat_2)
|
||||
# rx <- c(rx_1, rx_2)
|
||||
#
|
||||
# survdiff(formula = Surv(futime, fustat) ~ rx)
|
||||
#
|
||||
# Also check against another library which handle alternatives
|
||||
# library(nph)
|
||||
# logrank.test(futime, fustat, rx, alternative = "two.sided")
|
||||
# res["test"]
|
||||
[(
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
|
||||
# uncensored, censored
|
||||
[[8, 12, 26, 14, 21, 27], [8, 32, 20, 40]],
|
||||
[[33, 28, 41], [48, 48, 25, 37, 48, 25, 43]],
|
||||
# chi2, ["two-sided", "less", "greater"]
|
||||
6.91598157449,
|
||||
[0.008542873404, 0.9957285632979385, 0.004271436702061537]
|
||||
),
|
||||
(
|
||||
# https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_survival/BS704_Survival5.html
|
||||
[[19, 6, 5, 4], [20, 19, 17, 14]],
|
||||
[[16, 21, 7], [21, 15, 18, 18, 5]],
|
||||
0.835004855038,
|
||||
[0.3608293039, 0.8195853480676912, 0.1804146519323088]
|
||||
),
|
||||
(
|
||||
# Bland, Altman, "The logrank test", BMJ, 2004
|
||||
# https://www.bmj.com/content/328/7447/1073.short
|
||||
[[6, 13, 21, 30, 37, 38, 49, 50, 63, 79, 86, 98, 202, 219],
|
||||
[31, 47, 80, 82, 82, 149]],
|
||||
[[10, 10, 12, 13, 14, 15, 16, 17, 18, 20, 24, 24, 25, 28, 30,
|
||||
33, 35, 37, 40, 40, 46, 48, 76, 81, 82, 91, 112, 181],
|
||||
[34, 40, 70]],
|
||||
7.49659416854,
|
||||
[0.006181578637, 0.003090789318730882, 0.9969092106812691]
|
||||
)]
|
||||
)
|
||||
def test_log_rank(self, x, y, statistic, pvalue):
|
||||
x = stats.CensoredData(uncensored=x[0], right=x[1])
|
||||
y = stats.CensoredData(uncensored=y[0], right=y[1])
|
||||
|
||||
for i, alternative in enumerate(["two-sided", "less", "greater"]):
|
||||
res = stats.logrank(x=x, y=y, alternative=alternative)
|
||||
|
||||
# we return z and use the normal distribution while other framework
|
||||
# return z**2. The p-value are directly comparable, but we have to
|
||||
# square the statistic
|
||||
assert_allclose(res.statistic**2, statistic, atol=1e-10)
|
||||
assert_allclose(res.pvalue, pvalue[i], atol=1e-10)
|
||||
|
||||
def test_raises(self):
|
||||
sample = stats.CensoredData([1, 2])
|
||||
|
||||
msg = r"`y` must be"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
stats.logrank(x=sample, y=[[1, 2]])
|
||||
|
||||
msg = r"`x` must be"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
stats.logrank(x=[[1, 2]], y=sample)
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
|
||||
from scipy.stats._tukeylambda_stats import (tukeylambda_variance,
|
||||
tukeylambda_kurtosis)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_known_exact():
|
||||
"""Compare results with some known exact formulas."""
|
||||
# Some exact values of the Tukey Lambda variance and kurtosis:
|
||||
# lambda var kurtosis
|
||||
# 0 pi**2/3 6/5 (logistic distribution)
|
||||
# 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3
|
||||
# 1 1/3 -6/5 (uniform distribution on (-1,1))
|
||||
# 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2))
|
||||
|
||||
# lambda = 0
|
||||
var = tukeylambda_variance(0)
|
||||
assert_allclose(var, np.pi**2 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0)
|
||||
assert_allclose(kurt, 1.2, atol=1e-10)
|
||||
|
||||
# lambda = 0.5
|
||||
var = tukeylambda_variance(0.5)
|
||||
assert_allclose(var, 4 - np.pi, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0.5)
|
||||
desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
|
||||
assert_allclose(kurt, desired, atol=1e-10)
|
||||
|
||||
# lambda = 1
|
||||
var = tukeylambda_variance(1)
|
||||
assert_allclose(var, 1.0 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(1)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
# lambda = 2
|
||||
var = tukeylambda_variance(2)
|
||||
assert_allclose(var, 1.0 / 12, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(2)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_mpmath():
|
||||
"""Compare results with some values that were computed using mpmath."""
|
||||
a10 = dict(atol=1e-10, rtol=0)
|
||||
a12 = dict(atol=1e-12, rtol=0)
|
||||
data = [
|
||||
# lambda variance kurtosis
|
||||
[-0.1, 4.78050217874253547, 3.78559520346454510],
|
||||
[-0.0649, 4.16428023599895777, 2.52019675947435718],
|
||||
[-0.05, 3.93672267890775277, 2.13129793057777277],
|
||||
[-0.001, 3.30128380390964882, 1.21452460083542988],
|
||||
[0.001, 3.27850775649572176, 1.18560634779287585],
|
||||
[0.03125, 2.95927803254615800, 0.804487555161819980],
|
||||
[0.05, 2.78281053405464501, 0.611604043886644327],
|
||||
[0.0649, 2.65282386754100551, 0.476834119532774540],
|
||||
[1.2, 0.242153920578588346, -1.23428047169049726],
|
||||
[10.0, 0.00095237579757703597, 2.37810697355144933],
|
||||
[20.0, 0.00012195121951131043, 7.37654321002709531],
|
||||
]
|
||||
|
||||
for lam, var_expected, kurt_expected in data:
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
# Test with vector arguments (most of the other tests are for single
|
||||
# values).
|
||||
lam, var_expected, kurt_expected = zip(*data)
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_invalid():
|
||||
"""Test values of lambda outside the domains of the functions."""
|
||||
lam = [-1.0, -0.5]
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_equal(var, np.array([np.nan, np.inf]))
|
||||
|
||||
lam = [-1.0, -0.25]
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_equal(kurt, np.array([np.nan, np.inf]))
|
||||
|
|
@ -1,216 +0,0 @@
|
|||
import math
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import suppress_warnings
|
||||
|
||||
from scipy.stats import variation
|
||||
from scipy._lib._util import AxisError
|
||||
from scipy._lib._array_api import is_numpy
|
||||
from scipy._lib._array_api_no_0d import xp_assert_equal, xp_assert_close
|
||||
from scipy.stats._axis_nan_policy import (too_small_nd_omit, too_small_nd_not_omit,
|
||||
SmallSampleWarning)
|
||||
|
||||
skip_xp_backends = pytest.mark.skip_xp_backends
|
||||
|
||||
|
||||
@skip_xp_backends('torch', reason='data-apis/array-api-compat#271')
|
||||
class TestVariation:
|
||||
"""
|
||||
Test class for scipy.stats.variation
|
||||
"""
|
||||
|
||||
def test_ddof(self, xp):
|
||||
x = xp.arange(9.0)
|
||||
xp_assert_close(variation(x, ddof=1), xp.asarray(math.sqrt(60/8)/4))
|
||||
|
||||
@pytest.mark.parametrize('sgn', [1, -1])
|
||||
def test_sign(self, sgn, xp):
|
||||
x = xp.asarray([1., 2., 3., 4., 5.])
|
||||
v = variation(sgn*x)
|
||||
expected = xp.asarray(sgn*math.sqrt(2)/3)
|
||||
xp_assert_close(v, expected, rtol=1e-10)
|
||||
|
||||
@skip_xp_backends(np_only=True, reason="test plain python scalar input")
|
||||
def test_scalar(self, xp):
|
||||
# A scalar is treated like a 1-d sequence with length 1.
|
||||
assert variation(4.0) == 0.0
|
||||
|
||||
@pytest.mark.parametrize('nan_policy, expected',
|
||||
[('propagate', np.nan),
|
||||
('omit', np.sqrt(20/3)/4)])
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
def test_variation_nan(self, nan_policy, expected, xp):
|
||||
x = xp.arange(10.)
|
||||
x[9] = xp.nan
|
||||
xp_assert_close(variation(x, nan_policy=nan_policy), expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
def test_nan_policy_raise(self, xp):
|
||||
x = xp.asarray([1.0, 2.0, xp.nan, 3.0])
|
||||
with pytest.raises(ValueError, match='input contains nan'):
|
||||
variation(x, nan_policy='raise')
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
def test_bad_nan_policy(self, xp):
|
||||
with pytest.raises(ValueError, match='must be one of'):
|
||||
variation([1, 2, 3], nan_policy='foobar')
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`keepdims` only supports NumPy backend')
|
||||
def test_keepdims(self, xp):
|
||||
x = xp.reshape(xp.arange(10), (2, 5))
|
||||
y = variation(x, axis=1, keepdims=True)
|
||||
expected = np.array([[np.sqrt(2)/2],
|
||||
[np.sqrt(2)/7]])
|
||||
xp_assert_close(y, expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`keepdims` only supports NumPy backend')
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, np.empty((1, 0))),
|
||||
(1, np.full((5, 1), fill_value=np.nan))])
|
||||
def test_keepdims_size0(self, axis, expected, xp):
|
||||
x = xp.zeros((5, 0))
|
||||
if axis == 1:
|
||||
with pytest.warns(SmallSampleWarning, match=too_small_nd_not_omit):
|
||||
y = variation(x, axis=axis, keepdims=True)
|
||||
else:
|
||||
y = variation(x, axis=axis, keepdims=True)
|
||||
xp_assert_equal(y, expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`keepdims` only supports NumPy backend')
|
||||
@pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
|
||||
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill, xp):
|
||||
x = xp.asarray([[1, 1, 2, 2], [1, 2, 3, 3]])
|
||||
y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
|
||||
xp_assert_equal(y, xp.full((2, 1), fill_value=expected_fill))
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
def test_propagate_nan(self, xp):
|
||||
# Check that the shape of the result is the same for inputs
|
||||
# with and without nans, cf gh-5817
|
||||
a = xp.reshape(xp.arange(8, dtype=float), (2, -1))
|
||||
a[1, 0] = xp.nan
|
||||
v = variation(a, axis=1, nan_policy="propagate")
|
||||
xp_assert_close(v, [math.sqrt(5/4)/1.5, xp.nan], atol=1e-15)
|
||||
|
||||
@skip_xp_backends(np_only=True, reason='Python list input uses NumPy backend')
|
||||
def test_axis_none(self, xp):
|
||||
# Check that `variation` computes the result on the flattened
|
||||
# input when axis is None.
|
||||
y = variation([[0, 1], [2, 3]], axis=None)
|
||||
xp_assert_close(y, math.sqrt(5/4)/1.5)
|
||||
|
||||
def test_bad_axis(self, xp):
|
||||
# Check that an invalid axis raises np.exceptions.AxisError.
|
||||
x = xp.asarray([[1, 2, 3], [4, 5, 6]])
|
||||
with pytest.raises((AxisError, IndexError)):
|
||||
variation(x, axis=10)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask")
|
||||
def test_mean_zero(self, xp):
|
||||
# Check that `variation` returns inf for a sequence that is not
|
||||
# identically zero but whose mean is zero.
|
||||
x = xp.asarray([10., -3., 1., -4., -4.])
|
||||
y = variation(x)
|
||||
xp_assert_equal(y, xp.asarray(xp.inf))
|
||||
|
||||
x2 = xp.stack([x, -10.*x])
|
||||
y2 = variation(x2, axis=1)
|
||||
xp_assert_equal(y2, xp.asarray([xp.inf, xp.inf]))
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
|
||||
@pytest.mark.parametrize('x', [[0.]*5, [1, 2, np.inf, 9]])
|
||||
def test_return_nan(self, x, xp):
|
||||
x = xp.asarray(x)
|
||||
# Test some cases where `variation` returns nan.
|
||||
y = variation(x)
|
||||
xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))
|
||||
|
||||
@pytest.mark.parametrize('axis, expected',
|
||||
[(0, []), (1, [np.nan]*3), (None, np.nan)])
|
||||
def test_2d_size_zero_with_axis(self, axis, expected, xp):
|
||||
x = xp.empty((3, 0))
|
||||
with suppress_warnings() as sup:
|
||||
# torch
|
||||
sup.filter(UserWarning, "std*")
|
||||
if axis != 0:
|
||||
if is_numpy(xp):
|
||||
with pytest.warns(SmallSampleWarning, match="See documentation..."):
|
||||
y = variation(x, axis=axis)
|
||||
else:
|
||||
y = variation(x, axis=axis)
|
||||
else:
|
||||
y = variation(x, axis=axis)
|
||||
xp_assert_equal(y, xp.asarray(expected))
|
||||
|
||||
def test_neg_inf(self, xp):
|
||||
# Edge case that produces -inf: ddof equals the number of non-nan
|
||||
# values, the values are not constant, and the mean is negative.
|
||||
x1 = xp.asarray([-3., -5.])
|
||||
xp_assert_equal(variation(x1, ddof=2), xp.asarray(-xp.inf))
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
def test_neg_inf_nan(self, xp):
|
||||
x2 = xp.asarray([[xp.nan, 1, -10, xp.nan],
|
||||
[-20, -3, xp.nan, xp.nan]])
|
||||
xp_assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
|
||||
[-xp.inf, -xp.inf])
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
@pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
|
||||
def test_combined_edge_cases(self, nan_policy, xp):
|
||||
x = xp.asarray([[0, 10, xp.nan, 1],
|
||||
[0, -5, xp.nan, 2],
|
||||
[0, -5, xp.nan, 3]])
|
||||
if nan_policy == 'omit':
|
||||
with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
|
||||
y = variation(x, axis=0, nan_policy=nan_policy)
|
||||
else:
|
||||
y = variation(x, axis=0, nan_policy=nan_policy)
|
||||
xp_assert_close(y, [xp.nan, xp.inf, xp.nan, math.sqrt(2/3)/2])
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
@pytest.mark.parametrize(
|
||||
'ddof, expected',
|
||||
[(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
|
||||
(1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
|
||||
(2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
|
||||
)
|
||||
def test_more_nan_policy_omit_tests(self, ddof, expected, xp):
|
||||
# The slightly strange formatting in the follow array is my attempt to
|
||||
# maintain a clean tabular arrangement of the data while satisfying
|
||||
# the demands of pycodestyle. Currently, E201 and E241 are not
|
||||
# disabled by the `noqa` annotation.
|
||||
nan = xp.nan
|
||||
x = xp.asarray([[1.0, 2.0, nan, 3.0],
|
||||
[0.0, 4.0, 3.0, 1.0],
|
||||
[nan, -.5, 0.5, nan],
|
||||
[nan, 9.0, 9.0, nan],
|
||||
[nan, nan, nan, nan],
|
||||
[3.0, 3.0, 3.0, 3.0],
|
||||
[0.0, 0.0, 0.0, 0.0]])
|
||||
with pytest.warns(SmallSampleWarning, match=too_small_nd_omit):
|
||||
v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
|
||||
xp_assert_close(v, expected)
|
||||
|
||||
@skip_xp_backends(np_only=True,
|
||||
reason='`nan_policy` only supports NumPy backend')
|
||||
def test_variation_ddof(self, xp):
|
||||
# test variation with delta degrees of freedom
|
||||
# regression test for gh-13341
|
||||
a = xp.asarray([1., 2., 3., 4., 5.])
|
||||
nan_a = xp.asarray([1, 2, 3, xp.nan, 4, 5, xp.nan])
|
||||
y = variation(a, ddof=1)
|
||||
nan_y = variation(nan_a, nan_policy="omit", ddof=1)
|
||||
xp_assert_close(y, math.sqrt(5/2)/3)
|
||||
assert y == nan_y
|
||||
Loading…
Add table
Add a link
Reference in a new issue