up follow livre
This commit is contained in:
parent
b4b4398bb0
commit
3a7a3849ae
12242 changed files with 2564461 additions and 6914 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,145 @@
|
|||
from numpy import array
|
||||
|
||||
|
||||
Q_X = array([[5.26563660e-01, 3.14160190e-01, 8.00656370e-02],
|
||||
[7.50205180e-01, 4.60299830e-01, 8.98696460e-01],
|
||||
[6.65461230e-01, 6.94011420e-01, 9.10465700e-01],
|
||||
[9.64047590e-01, 1.43082200e-03, 7.39874220e-01],
|
||||
[1.08159060e-01, 5.53028790e-01, 6.63804780e-02],
|
||||
[9.31359130e-01, 8.25424910e-01, 9.52315440e-01],
|
||||
[6.78086960e-01, 3.41903970e-01, 5.61481950e-01],
|
||||
[9.82730940e-01, 7.04605210e-01, 8.70978630e-02],
|
||||
[6.14691610e-01, 4.69989230e-02, 6.02406450e-01],
|
||||
[5.80161260e-01, 9.17354970e-01, 5.88163850e-01],
|
||||
[1.38246310e+00, 1.96358160e+00, 1.94437880e+00],
|
||||
[2.10675860e+00, 1.67148730e+00, 1.34854480e+00],
|
||||
[1.39880070e+00, 1.66142050e+00, 1.32224550e+00],
|
||||
[1.71410460e+00, 1.49176380e+00, 1.45432170e+00],
|
||||
[1.54102340e+00, 1.84374950e+00, 1.64658950e+00],
|
||||
[2.08512480e+00, 1.84524350e+00, 2.17340850e+00],
|
||||
[1.30748740e+00, 1.53801650e+00, 2.16007740e+00],
|
||||
[1.41447700e+00, 1.99329070e+00, 1.99107420e+00],
|
||||
[1.61943490e+00, 1.47703280e+00, 1.89788160e+00],
|
||||
[1.59880600e+00, 1.54988980e+00, 1.57563350e+00],
|
||||
[3.37247380e+00, 2.69635310e+00, 3.39981700e+00],
|
||||
[3.13705120e+00, 3.36528090e+00, 3.06089070e+00],
|
||||
[3.29413250e+00, 3.19619500e+00, 2.90700170e+00],
|
||||
[2.65510510e+00, 3.06785900e+00, 2.97198540e+00],
|
||||
[3.30941040e+00, 2.59283970e+00, 2.57714110e+00],
|
||||
[2.59557220e+00, 3.33477370e+00, 3.08793190e+00],
|
||||
[2.58206180e+00, 3.41615670e+00, 3.26441990e+00],
|
||||
[2.71127000e+00, 2.77032450e+00, 2.63466500e+00],
|
||||
[2.79617850e+00, 3.25473720e+00, 3.41801560e+00],
|
||||
[2.64741750e+00, 2.54538040e+00, 3.25354110e+00]])
|
||||
|
||||
ytdist = array([662., 877., 255., 412., 996., 295., 468., 268., 400., 754.,
|
||||
564., 138., 219., 869., 669.])
|
||||
|
||||
linkage_ytdist_single = array([[2., 5., 138., 2.],
|
||||
[3., 4., 219., 2.],
|
||||
[0., 7., 255., 3.],
|
||||
[1., 8., 268., 4.],
|
||||
[6., 9., 295., 6.]])
|
||||
|
||||
linkage_ytdist_complete = array([[2., 5., 138., 2.],
|
||||
[3., 4., 219., 2.],
|
||||
[1., 6., 400., 3.],
|
||||
[0., 7., 412., 3.],
|
||||
[8., 9., 996., 6.]])
|
||||
|
||||
linkage_ytdist_average = array([[2., 5., 138., 2.],
|
||||
[3., 4., 219., 2.],
|
||||
[0., 7., 333.5, 3.],
|
||||
[1., 6., 347.5, 3.],
|
||||
[8., 9., 680.77777778, 6.]])
|
||||
|
||||
linkage_ytdist_weighted = array([[2., 5., 138., 2.],
|
||||
[3., 4., 219., 2.],
|
||||
[0., 7., 333.5, 3.],
|
||||
[1., 6., 347.5, 3.],
|
||||
[8., 9., 670.125, 6.]])
|
||||
|
||||
# the optimal leaf ordering of linkage_ytdist_single
|
||||
linkage_ytdist_single_olo = array([[5., 2., 138., 2.],
|
||||
[4., 3., 219., 2.],
|
||||
[7., 0., 255., 3.],
|
||||
[1., 8., 268., 4.],
|
||||
[6., 9., 295., 6.]])
|
||||
|
||||
X = array([[1.43054825, -7.5693489],
|
||||
[6.95887839, 6.82293382],
|
||||
[2.87137846, -9.68248579],
|
||||
[7.87974764, -6.05485803],
|
||||
[8.24018364, -6.09495602],
|
||||
[7.39020262, 8.54004355]])
|
||||
|
||||
linkage_X_centroid = array([[3., 4., 0.36265956, 2.],
|
||||
[1., 5., 1.77045373, 2.],
|
||||
[0., 2., 2.55760419, 2.],
|
||||
[6., 8., 6.43614494, 4.],
|
||||
[7., 9., 15.17363237, 6.]])
|
||||
|
||||
linkage_X_median = array([[3., 4., 0.36265956, 2.],
|
||||
[1., 5., 1.77045373, 2.],
|
||||
[0., 2., 2.55760419, 2.],
|
||||
[6., 8., 6.43614494, 4.],
|
||||
[7., 9., 15.17363237, 6.]])
|
||||
|
||||
linkage_X_ward = array([[3., 4., 0.36265956, 2.],
|
||||
[1., 5., 1.77045373, 2.],
|
||||
[0., 2., 2.55760419, 2.],
|
||||
[6., 8., 9.10208346, 4.],
|
||||
[7., 9., 24.7784379, 6.]])
|
||||
|
||||
# the optimal leaf ordering of linkage_X_ward
|
||||
linkage_X_ward_olo = array([[4., 3., 0.36265956, 2.],
|
||||
[5., 1., 1.77045373, 2.],
|
||||
[2., 0., 2.55760419, 2.],
|
||||
[6., 8., 9.10208346, 4.],
|
||||
[7., 9., 24.7784379, 6.]])
|
||||
|
||||
inconsistent_ytdist = {
|
||||
1: array([[138., 0., 1., 0.],
|
||||
[219., 0., 1., 0.],
|
||||
[255., 0., 1., 0.],
|
||||
[268., 0., 1., 0.],
|
||||
[295., 0., 1., 0.]]),
|
||||
2: array([[138., 0., 1., 0.],
|
||||
[219., 0., 1., 0.],
|
||||
[237., 25.45584412, 2., 0.70710678],
|
||||
[261.5, 9.19238816, 2., 0.70710678],
|
||||
[233.66666667, 83.9424406, 3., 0.7306594]]),
|
||||
3: array([[138., 0., 1., 0.],
|
||||
[219., 0., 1., 0.],
|
||||
[237., 25.45584412, 2., 0.70710678],
|
||||
[247.33333333, 25.38372182, 3., 0.81417007],
|
||||
[239., 69.36377537, 4., 0.80733783]]),
|
||||
4: array([[138., 0., 1., 0.],
|
||||
[219., 0., 1., 0.],
|
||||
[237., 25.45584412, 2., 0.70710678],
|
||||
[247.33333333, 25.38372182, 3., 0.81417007],
|
||||
[235., 60.73302232, 5., 0.98793042]])}
|
||||
|
||||
fcluster_inconsistent = {
|
||||
0.8: array([6, 2, 2, 4, 6, 2, 3, 7, 3, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1]),
|
||||
1.0: array([6, 2, 2, 4, 6, 2, 3, 7, 3, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1]),
|
||||
2.0: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1])}
|
||||
|
||||
fcluster_distance = {
|
||||
0.6: array([4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 3,
|
||||
1, 1, 1, 2, 1, 1, 1, 1, 1]),
|
||||
1.0: array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1]),
|
||||
2.0: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1])}
|
||||
|
||||
fcluster_maxclust = {
|
||||
8.0: array([5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 4,
|
||||
1, 1, 1, 3, 1, 1, 1, 1, 2]),
|
||||
4.0: array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1]),
|
||||
1.0: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1])}
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
import numpy as np
|
||||
from scipy.cluster.hierarchy import DisjointSet
|
||||
import string
|
||||
|
||||
|
||||
def generate_random_token():
|
||||
k = len(string.ascii_letters)
|
||||
tokens = list(np.arange(k, dtype=int))
|
||||
tokens += list(np.arange(k, dtype=float))
|
||||
tokens += list(string.ascii_letters)
|
||||
tokens += [None for i in range(k)]
|
||||
tokens = np.array(tokens, dtype=object)
|
||||
rng = np.random.RandomState(seed=0)
|
||||
|
||||
while 1:
|
||||
size = rng.randint(1, 3)
|
||||
element = rng.choice(tokens, size)
|
||||
if size == 1:
|
||||
yield element[0]
|
||||
else:
|
||||
yield tuple(element)
|
||||
|
||||
|
||||
def get_elements(n):
|
||||
# dict is deterministic without difficulty of comparing numpy ints
|
||||
elements = {}
|
||||
for element in generate_random_token():
|
||||
if element not in elements:
|
||||
elements[element] = len(elements)
|
||||
if len(elements) >= n:
|
||||
break
|
||||
return list(elements.keys())
|
||||
|
||||
|
||||
def test_init():
|
||||
n = 10
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
assert dis.n_subsets == n
|
||||
assert list(dis) == elements
|
||||
|
||||
|
||||
def test_len():
|
||||
n = 10
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
assert len(dis) == n
|
||||
|
||||
dis.add("dummy")
|
||||
assert len(dis) == n + 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n", [10, 100])
|
||||
def test_contains(n):
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
for x in elements:
|
||||
assert x in dis
|
||||
|
||||
assert "dummy" not in dis
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n", [10, 100])
|
||||
def test_add(n):
|
||||
elements = get_elements(n)
|
||||
dis1 = DisjointSet(elements)
|
||||
|
||||
dis2 = DisjointSet()
|
||||
for i, x in enumerate(elements):
|
||||
dis2.add(x)
|
||||
assert len(dis2) == i + 1
|
||||
|
||||
# test idempotency by adding element again
|
||||
dis2.add(x)
|
||||
assert len(dis2) == i + 1
|
||||
|
||||
assert list(dis1) == list(dis2)
|
||||
|
||||
|
||||
def test_element_not_present():
|
||||
elements = get_elements(n=10)
|
||||
dis = DisjointSet(elements)
|
||||
|
||||
with assert_raises(KeyError):
|
||||
dis["dummy"]
|
||||
|
||||
with assert_raises(KeyError):
|
||||
dis.merge(elements[0], "dummy")
|
||||
|
||||
with assert_raises(KeyError):
|
||||
dis.connected(elements[0], "dummy")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("direction", ["forwards", "backwards"])
|
||||
@pytest.mark.parametrize("n", [10, 100])
|
||||
def test_linear_union_sequence(n, direction):
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
assert elements == list(dis)
|
||||
|
||||
indices = list(range(n - 1))
|
||||
if direction == "backwards":
|
||||
indices = indices[::-1]
|
||||
|
||||
for it, i in enumerate(indices):
|
||||
assert not dis.connected(elements[i], elements[i + 1])
|
||||
assert dis.merge(elements[i], elements[i + 1])
|
||||
assert dis.connected(elements[i], elements[i + 1])
|
||||
assert dis.n_subsets == n - 1 - it
|
||||
|
||||
roots = [dis[i] for i in elements]
|
||||
if direction == "forwards":
|
||||
assert all(elements[0] == r for r in roots)
|
||||
else:
|
||||
assert all(elements[-2] == r for r in roots)
|
||||
assert not dis.merge(elements[0], elements[-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n", [10, 100])
|
||||
def test_self_unions(n):
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
|
||||
for x in elements:
|
||||
assert dis.connected(x, x)
|
||||
assert not dis.merge(x, x)
|
||||
assert dis.connected(x, x)
|
||||
assert dis.n_subsets == len(elements)
|
||||
|
||||
assert elements == list(dis)
|
||||
roots = [dis[x] for x in elements]
|
||||
assert elements == roots
|
||||
|
||||
|
||||
@pytest.mark.parametrize("order", ["ab", "ba"])
|
||||
@pytest.mark.parametrize("n", [10, 100])
|
||||
def test_equal_size_ordering(n, order):
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
|
||||
rng = np.random.RandomState(seed=0)
|
||||
indices = np.arange(n)
|
||||
rng.shuffle(indices)
|
||||
|
||||
for i in range(0, len(indices), 2):
|
||||
a, b = elements[indices[i]], elements[indices[i + 1]]
|
||||
if order == "ab":
|
||||
assert dis.merge(a, b)
|
||||
else:
|
||||
assert dis.merge(b, a)
|
||||
|
||||
expected = elements[min(indices[i], indices[i + 1])]
|
||||
assert dis[a] == expected
|
||||
assert dis[b] == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kmax", [5, 10])
|
||||
def test_binary_tree(kmax):
|
||||
n = 2**kmax
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
rng = np.random.RandomState(seed=0)
|
||||
|
||||
for k in 2**np.arange(kmax):
|
||||
for i in range(0, n, 2 * k):
|
||||
r1, r2 = rng.randint(0, k, size=2)
|
||||
a, b = elements[i + r1], elements[i + k + r2]
|
||||
assert not dis.connected(a, b)
|
||||
assert dis.merge(a, b)
|
||||
assert dis.connected(a, b)
|
||||
|
||||
assert elements == list(dis)
|
||||
roots = [dis[i] for i in elements]
|
||||
expected_indices = np.arange(n) - np.arange(n) % (2 * k)
|
||||
expected = [elements[i] for i in expected_indices]
|
||||
assert roots == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n", [10, 100])
|
||||
def test_subsets(n):
|
||||
elements = get_elements(n)
|
||||
dis = DisjointSet(elements)
|
||||
|
||||
rng = np.random.RandomState(seed=0)
|
||||
for i, j in rng.randint(0, n, (n, 2)):
|
||||
x = elements[i]
|
||||
y = elements[j]
|
||||
|
||||
expected = {element for element in dis if {dis[element]} == {dis[x]}}
|
||||
assert dis.subset_size(x) == len(dis.subset(x))
|
||||
assert expected == dis.subset(x)
|
||||
|
||||
expected = {dis[element]: set() for element in dis}
|
||||
for element in dis:
|
||||
expected[dis[element]].add(element)
|
||||
expected = list(expected.values())
|
||||
assert expected == dis.subsets()
|
||||
|
||||
dis.merge(x, y)
|
||||
assert dis.subset(x) == dis.subset(y)
|
||||
File diff suppressed because it is too large
Load diff
434
venv/lib/python3.13/site-packages/scipy/cluster/tests/test_vq.py
Normal file
434
venv/lib/python3.13/site-packages/scipy/cluster/tests/test_vq.py
Normal file
|
|
@ -0,0 +1,434 @@
|
|||
import math
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from threading import Lock
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal, suppress_warnings
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten,
|
||||
ClusterError, _krandinit)
|
||||
from scipy.cluster import _vq
|
||||
from scipy.sparse._sputils import matrix
|
||||
|
||||
from scipy._lib import array_api_extra as xpx
|
||||
from scipy._lib._array_api import (
|
||||
SCIPY_ARRAY_API, eager_warns, is_lazy_array, make_xp_test_case,
|
||||
xp_copy, xp_assert_close, xp_assert_equal
|
||||
)
|
||||
|
||||
xfail_xp_backends = pytest.mark.xfail_xp_backends
|
||||
skip_xp_backends = pytest.mark.skip_xp_backends
|
||||
|
||||
TESTDATA_2D = np.array([
|
||||
-2.2, 1.17, -1.63, 1.69, -2.04, 4.38, -3.09, 0.95, -1.7, 4.79, -1.68, 0.68,
|
||||
-2.26, 3.34, -2.29, 2.55, -1.72, -0.72, -1.99, 2.34, -2.75, 3.43, -2.45,
|
||||
2.41, -4.26, 3.65, -1.57, 1.87, -1.96, 4.03, -3.01, 3.86, -2.53, 1.28,
|
||||
-4.0, 3.95, -1.62, 1.25, -3.42, 3.17, -1.17, 0.12, -3.03, -0.27, -2.07,
|
||||
-0.55, -1.17, 1.34, -2.82, 3.08, -2.44, 0.24, -1.71, 2.48, -5.23, 4.29,
|
||||
-2.08, 3.69, -1.89, 3.62, -2.09, 0.26, -0.92, 1.07, -2.25, 0.88, -2.25,
|
||||
2.02, -4.31, 3.86, -2.03, 3.42, -2.76, 0.3, -2.48, -0.29, -3.42, 3.21,
|
||||
-2.3, 1.73, -2.84, 0.69, -1.81, 2.48, -5.24, 4.52, -2.8, 1.31, -1.67,
|
||||
-2.34, -1.18, 2.17, -2.17, 2.82, -1.85, 2.25, -2.45, 1.86, -6.79, 3.94,
|
||||
-2.33, 1.89, -1.55, 2.08, -1.36, 0.93, -2.51, 2.74, -2.39, 3.92, -3.33,
|
||||
2.99, -2.06, -0.9, -2.83, 3.35, -2.59, 3.05, -2.36, 1.85, -1.69, 1.8,
|
||||
-1.39, 0.66, -2.06, 0.38, -1.47, 0.44, -4.68, 3.77, -5.58, 3.44, -2.29,
|
||||
2.24, -1.04, -0.38, -1.85, 4.23, -2.88, 0.73, -2.59, 1.39, -1.34, 1.75,
|
||||
-1.95, 1.3, -2.45, 3.09, -1.99, 3.41, -5.55, 5.21, -1.73, 2.52, -2.17,
|
||||
0.85, -2.06, 0.49, -2.54, 2.07, -2.03, 1.3, -3.23, 3.09, -1.55, 1.44,
|
||||
-0.81, 1.1, -2.99, 2.92, -1.59, 2.18, -2.45, -0.73, -3.12, -1.3, -2.83,
|
||||
0.2, -2.77, 3.24, -1.98, 1.6, -4.59, 3.39, -4.85, 3.75, -2.25, 1.71, -3.28,
|
||||
3.38, -1.74, 0.88, -2.41, 1.92, -2.24, 1.19, -2.48, 1.06, -1.68, -0.62,
|
||||
-1.3, 0.39, -1.78, 2.35, -3.54, 2.44, -1.32, 0.66, -2.38, 2.76, -2.35,
|
||||
3.95, -1.86, 4.32, -2.01, -1.23, -1.79, 2.76, -2.13, -0.13, -5.25, 3.84,
|
||||
-2.24, 1.59, -4.85, 2.96, -2.41, 0.01, -0.43, 0.13, -3.92, 2.91, -1.75,
|
||||
-0.53, -1.69, 1.69, -1.09, 0.15, -2.11, 2.17, -1.53, 1.22, -2.1, -0.86,
|
||||
-2.56, 2.28, -3.02, 3.33, -1.12, 3.86, -2.18, -1.19, -3.03, 0.79, -0.83,
|
||||
0.97, -3.19, 1.45, -1.34, 1.28, -2.52, 4.22, -4.53, 3.22, -1.97, 1.75,
|
||||
-2.36, 3.19, -0.83, 1.53, -1.59, 1.86, -2.17, 2.3, -1.63, 2.71, -2.03,
|
||||
3.75, -2.57, -0.6, -1.47, 1.33, -1.95, 0.7, -1.65, 1.27, -1.42, 1.09, -3.0,
|
||||
3.87, -2.51, 3.06, -2.6, 0.74, -1.08, -0.03, -2.44, 1.31, -2.65, 2.99,
|
||||
-1.84, 1.65, -4.76, 3.75, -2.07, 3.98, -2.4, 2.67, -2.21, 1.49, -1.21,
|
||||
1.22, -5.29, 2.38, -2.85, 2.28, -5.6, 3.78, -2.7, 0.8, -1.81, 3.5, -3.75,
|
||||
4.17, -1.29, 2.99, -5.92, 3.43, -1.83, 1.23, -1.24, -1.04, -2.56, 2.37,
|
||||
-3.26, 0.39, -4.63, 2.51, -4.52, 3.04, -1.7, 0.36, -1.41, 0.04, -2.1, 1.0,
|
||||
-1.87, 3.78, -4.32, 3.59, -2.24, 1.38, -1.99, -0.22, -1.87, 1.95, -0.84,
|
||||
2.17, -5.38, 3.56, -1.27, 2.9, -1.79, 3.31, -5.47, 3.85, -1.44, 3.69,
|
||||
-2.02, 0.37, -1.29, 0.33, -2.34, 2.56, -1.74, -1.27, -1.97, 1.22, -2.51,
|
||||
-0.16, -1.64, -0.96, -2.99, 1.4, -1.53, 3.31, -2.24, 0.45, -2.46, 1.71,
|
||||
-2.88, 1.56, -1.63, 1.46, -1.41, 0.68, -1.96, 2.76, -1.61,
|
||||
2.11]).reshape((200, 2))
|
||||
|
||||
|
||||
# Global data
|
||||
X = np.array([[3.0, 3], [4, 3], [4, 2],
|
||||
[9, 2], [5, 1], [6, 2], [9, 4],
|
||||
[5, 2], [5, 4], [7, 4], [6, 5]])
|
||||
|
||||
CODET1 = np.array([[3.0000, 3.0000],
|
||||
[6.2000, 4.0000],
|
||||
[5.8000, 1.8000]])
|
||||
|
||||
CODET2 = np.array([[11.0/3, 8.0/3],
|
||||
[6.7500, 4.2500],
|
||||
[6.2500, 1.7500]])
|
||||
|
||||
LABEL1 = np.array([0, 1, 2, 2, 2, 2, 1, 2, 1, 1, 1])
|
||||
|
||||
|
||||
@make_xp_test_case(whiten)
|
||||
class TestWhiten:
|
||||
|
||||
def test_whiten(self, xp):
|
||||
desired = xp.asarray([[5.08738849, 2.97091878],
|
||||
[3.19909255, 0.69660580],
|
||||
[4.51041982, 0.02640918],
|
||||
[4.38567074, 0.95120889],
|
||||
[2.32191480, 1.63195503]])
|
||||
|
||||
obs = xp.asarray([[0.98744510, 0.82766775],
|
||||
[0.62093317, 0.19406729],
|
||||
[0.87545741, 0.00735733],
|
||||
[0.85124403, 0.26499712],
|
||||
[0.45067590, 0.45464607]])
|
||||
xp_assert_close(whiten(obs), desired, rtol=1e-5)
|
||||
|
||||
def test_whiten_zero_std(self, xp):
|
||||
desired = xp.asarray([[0., 1.0, 2.86666544],
|
||||
[0., 1.0, 1.32460034],
|
||||
[0., 1.0, 3.74382172]])
|
||||
|
||||
obs = xp.asarray([[0., 1., 0.74109533],
|
||||
[0., 1., 0.34243798],
|
||||
[0., 1., 0.96785929]])
|
||||
|
||||
with eager_warns(obs, RuntimeWarning, match="standard deviation zero"):
|
||||
actual = whiten(obs)
|
||||
xp_assert_close(actual, desired, rtol=1e-5)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
|
||||
@pytest.mark.parametrize("bad_value", [math.nan, math.inf, -math.inf])
|
||||
def test_whiten_not_finite(self, bad_value, xp):
|
||||
obs = xp.asarray([[0.98744510, bad_value],
|
||||
[0.62093317, 0.19406729],
|
||||
[0.87545741, 0.00735733],
|
||||
[0.85124403, 0.26499712],
|
||||
[0.45067590, 0.45464607]])
|
||||
|
||||
if is_lazy_array(obs):
|
||||
desired = xp.asarray([[5.08738849, math.nan],
|
||||
[3.19909255, math.nan],
|
||||
[4.51041982, math.nan],
|
||||
[4.38567074, math.nan],
|
||||
[2.32191480, math.nan]])
|
||||
xp_assert_close(whiten(obs), desired, rtol=1e-5)
|
||||
else:
|
||||
assert_raises(ValueError, whiten, obs)
|
||||
|
||||
@pytest.mark.skipif(SCIPY_ARRAY_API,
|
||||
reason='`np.matrix` unsupported in array API mode')
|
||||
def test_whiten_not_finite_matrix(self):
|
||||
for bad_value in np.nan, np.inf, -np.inf:
|
||||
obs = matrix([[0.98744510, bad_value],
|
||||
[0.62093317, 0.19406729],
|
||||
[0.87545741, 0.00735733],
|
||||
[0.85124403, 0.26499712],
|
||||
[0.45067590, 0.45464607]])
|
||||
assert_raises(ValueError, whiten, obs)
|
||||
|
||||
|
||||
@make_xp_test_case(vq)
|
||||
class TestVq:
|
||||
|
||||
def test_py_vq(self, xp):
|
||||
initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
|
||||
# label1.dtype varies between int32 and int64 over platforms
|
||||
label1 = py_vq(xp.asarray(X), xp.asarray(initc))[0]
|
||||
xp_assert_equal(label1, xp.asarray(LABEL1, dtype=xp.int64),
|
||||
check_dtype=False)
|
||||
|
||||
@pytest.mark.skipif(SCIPY_ARRAY_API,
|
||||
reason='`np.matrix` unsupported in array API mode')
|
||||
def test_py_vq_matrix(self):
|
||||
initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
|
||||
# label1.dtype varies between int32 and int64 over platforms
|
||||
label1 = py_vq(matrix(X), matrix(initc))[0]
|
||||
assert_array_equal(label1, LABEL1)
|
||||
|
||||
def test_vq(self, xp):
|
||||
initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
|
||||
label1, _ = _vq.vq(X, initc)
|
||||
assert_array_equal(label1, LABEL1)
|
||||
_, _ = vq(xp.asarray(X), xp.asarray(initc))
|
||||
|
||||
@pytest.mark.skipif(SCIPY_ARRAY_API,
|
||||
reason='`np.matrix` unsupported in array API mode')
|
||||
def test_vq_matrix(self):
|
||||
initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
|
||||
label1, _ = _vq.vq(matrix(X), matrix(initc))
|
||||
assert_array_equal(label1, LABEL1)
|
||||
_, _ = vq(matrix(X), matrix(initc))
|
||||
|
||||
def test_vq_1d(self, xp):
|
||||
# Test special rank 1 vq algo, python implementation.
|
||||
data = X[:, 0]
|
||||
initc = data[:3]
|
||||
a, b = _vq.vq(data, initc)
|
||||
data = xp.asarray(data)
|
||||
initc = xp.asarray(initc)
|
||||
ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis])
|
||||
# ta.dtype varies between int32 and int64 over platforms
|
||||
xp_assert_equal(ta, xp.asarray(a, dtype=xp.int64), check_dtype=False)
|
||||
xp_assert_equal(tb, xp.asarray(b))
|
||||
|
||||
def test__vq_sametype(self):
|
||||
a = np.asarray([1.0, 2.0])
|
||||
b = a.astype(np.float32)
|
||||
assert_raises(TypeError, _vq.vq, a, b)
|
||||
|
||||
def test__vq_invalid_type(self):
|
||||
a = np.asarray([1, 2], dtype=int)
|
||||
assert_raises(TypeError, _vq.vq, a, a)
|
||||
|
||||
def test_vq_large_nfeat(self, xp):
|
||||
X = np.random.rand(20, 20)
|
||||
code_book = np.random.rand(3, 20)
|
||||
|
||||
codes0, dis0 = _vq.vq(X, code_book)
|
||||
codes1, dis1 = py_vq(
|
||||
xp.asarray(X), xp.asarray(code_book)
|
||||
)
|
||||
xp_assert_close(dis1, xp.asarray(dis0), rtol=1e-5)
|
||||
# codes1.dtype varies between int32 and int64 over platforms
|
||||
xp_assert_equal(codes1, xp.asarray(codes0, dtype=xp.int64), check_dtype=False)
|
||||
|
||||
X = X.astype(np.float32)
|
||||
code_book = code_book.astype(np.float32)
|
||||
|
||||
codes0, dis0 = _vq.vq(X, code_book)
|
||||
codes1, dis1 = py_vq(
|
||||
xp.asarray(X), xp.asarray(code_book)
|
||||
)
|
||||
xp_assert_close(dis1, xp.asarray(dis0, dtype=xp.float64), rtol=1e-5)
|
||||
# codes1.dtype varies between int32 and int64 over platforms
|
||||
xp_assert_equal(codes1, xp.asarray(codes0, dtype=xp.int64), check_dtype=False)
|
||||
|
||||
def test_vq_large_features(self, xp):
|
||||
X = np.random.rand(10, 5) * 1000000
|
||||
code_book = np.random.rand(2, 5) * 1000000
|
||||
|
||||
codes0, dis0 = _vq.vq(X, code_book)
|
||||
codes1, dis1 = py_vq(
|
||||
xp.asarray(X), xp.asarray(code_book)
|
||||
)
|
||||
xp_assert_close(dis1, xp.asarray(dis0), rtol=1e-5)
|
||||
# codes1.dtype varies between int32 and int64 over platforms
|
||||
xp_assert_equal(codes1, xp.asarray(codes0, dtype=xp.int64), check_dtype=False)
|
||||
|
||||
|
||||
# Whole class skipped on GPU for now;
|
||||
# once pdist/cdist are hooked up for CuPy, more tests will work
|
||||
@make_xp_test_case(kmeans, kmeans2)
|
||||
class TestKMeans:
|
||||
|
||||
def test_large_features(self, xp):
|
||||
# Generate a data set with large values, and run kmeans on it to
|
||||
# (regression for 1077).
|
||||
d = 300
|
||||
n = 100
|
||||
|
||||
m1 = np.random.randn(d)
|
||||
m2 = np.random.randn(d)
|
||||
x = 10000 * np.random.randn(n, d) - 20000 * m1
|
||||
y = 10000 * np.random.randn(n, d) + 20000 * m2
|
||||
|
||||
data = np.empty((x.shape[0] + y.shape[0], d), np.float64)
|
||||
data[:x.shape[0]] = x
|
||||
data[x.shape[0]:] = y
|
||||
|
||||
# use `seed` to ensure backwards compatibility after SPEC7
|
||||
kmeans(xp.asarray(data), 2, seed=1)
|
||||
|
||||
def test_kmeans_simple(self, xp):
|
||||
rng = np.random.default_rng(54321)
|
||||
initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
|
||||
code1 = kmeans(xp.asarray(X), xp.asarray(initc), iter=1, rng=rng)[0]
|
||||
xp_assert_close(code1, xp.asarray(CODET2))
|
||||
|
||||
@pytest.mark.skipif(SCIPY_ARRAY_API,
|
||||
reason='`np.matrix` unsupported in array API mode')
|
||||
def test_kmeans_simple_matrix(self):
|
||||
rng = np.random.default_rng(54321)
|
||||
initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
|
||||
code1 = kmeans(matrix(X), matrix(initc), iter=1, rng=rng)[0]
|
||||
xp_assert_close(code1, CODET2)
|
||||
|
||||
def test_kmeans_lost_cluster(self, xp):
|
||||
# This will cause kmeans to have a cluster with no points.
|
||||
data = xp.asarray(TESTDATA_2D)
|
||||
initk = xp.asarray([[-1.8127404, -0.67128041],
|
||||
[2.04621601, 0.07401111],
|
||||
[-2.31149087, -0.05160469]])
|
||||
|
||||
kmeans(data, initk)
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(UserWarning,
|
||||
"One of the clusters is empty. Re-run kmeans with a "
|
||||
"different initialization")
|
||||
kmeans2(data, initk, missing='warn')
|
||||
|
||||
assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
|
||||
|
||||
def test_kmeans2_simple(self, xp):
|
||||
rng = np.random.default_rng(12345678)
|
||||
initc = xp.asarray(np.concatenate([[X[0]], [X[1]], [X[2]]]))
|
||||
arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix]
|
||||
for tp in arrays:
|
||||
code1 = kmeans2(tp(X), tp(initc), iter=1, rng=rng)[0]
|
||||
code2 = kmeans2(tp(X), tp(initc), iter=2, rng=rng)[0]
|
||||
|
||||
xp_assert_close(code1, xp.asarray(CODET1))
|
||||
xp_assert_close(code2, xp.asarray(CODET2))
|
||||
|
||||
@pytest.mark.skipif(SCIPY_ARRAY_API,
|
||||
reason='`np.matrix` unsupported in array API mode')
|
||||
def test_kmeans2_simple_matrix(self):
|
||||
rng = np.random.default_rng(12345678)
|
||||
initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
|
||||
code1 = kmeans2(matrix(X), matrix(initc), iter=1, rng=rng)[0]
|
||||
code2 = kmeans2(matrix(X), matrix(initc), iter=2, rng=rng)[0]
|
||||
|
||||
xp_assert_close(code1, CODET1)
|
||||
xp_assert_close(code2, CODET2)
|
||||
|
||||
def test_kmeans2_rank1(self, xp):
|
||||
data = xp.asarray(TESTDATA_2D)
|
||||
data1 = data[:, 0]
|
||||
|
||||
initc = data1[:3]
|
||||
code = xp_copy(initc, xp=xp)
|
||||
|
||||
# use `seed` to ensure backwards compatibility after SPEC7
|
||||
kmeans2(data1, code, iter=1, seed=1)[0]
|
||||
kmeans2(data1, code, iter=2)[0]
|
||||
|
||||
def test_kmeans2_rank1_2(self, xp):
|
||||
data = xp.asarray(TESTDATA_2D)
|
||||
data1 = data[:, 0]
|
||||
kmeans2(data1, 2, iter=1)
|
||||
|
||||
def test_kmeans2_high_dim(self, xp):
|
||||
# test kmeans2 when the number of dimensions exceeds the number
|
||||
# of input points
|
||||
data = xp.asarray(TESTDATA_2D)
|
||||
data = xp.reshape(data, (20, 20))[:10, :]
|
||||
kmeans2(data, 2)
|
||||
|
||||
def test_kmeans2_init(self, xp):
|
||||
rng = np.random.default_rng(12345678)
|
||||
data = xp.asarray(TESTDATA_2D)
|
||||
k = 3
|
||||
|
||||
kmeans2(data, k, minit='points', rng=rng)
|
||||
kmeans2(data[:, 1], k, minit='points', rng=rng) # special case (1-D)
|
||||
|
||||
kmeans2(data, k, minit='++', rng=rng)
|
||||
kmeans2(data[:, 1], k, minit='++', rng=rng) # special case (1-D)
|
||||
|
||||
# minit='random' can give warnings, filter those
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(message="One of the clusters is empty. Re-run.")
|
||||
kmeans2(data, k, minit='random', rng=rng)
|
||||
kmeans2(data[:, 1], k, minit='random', rng=rng) # special case (1-D)
|
||||
|
||||
@pytest.fixture
|
||||
def krand_lock(self):
|
||||
return Lock()
|
||||
|
||||
@xfail_xp_backends('dask.array', reason="Wrong answer")
|
||||
@pytest.mark.skipif(sys.platform == 'win32',
|
||||
reason='Fails with MemoryError in Wine.')
|
||||
def test_krandinit(self, xp, krand_lock):
|
||||
data = xp.asarray(TESTDATA_2D)
|
||||
datas = [xp.reshape(data, (200, 2)),
|
||||
xp.reshape(data, (20, 20))[:10, :]]
|
||||
k = int(1e6)
|
||||
with krand_lock:
|
||||
for data in datas:
|
||||
rng = np.random.default_rng(1234)
|
||||
init = _krandinit(data, k, rng, xp)
|
||||
orig_cov = xpx.cov(data.T, xp=xp)
|
||||
init_cov = xpx.cov(init.T, xp=xp)
|
||||
xp_assert_close(orig_cov, init_cov, atol=1.1e-2)
|
||||
|
||||
def test_kmeans2_empty(self, xp):
|
||||
# Regression test for gh-1032.
|
||||
assert_raises(ValueError, kmeans2, xp.asarray([]), 2)
|
||||
|
||||
def test_kmeans_0k(self, xp):
|
||||
# Regression test for gh-1073: fail when k arg is 0.
|
||||
assert_raises(ValueError, kmeans, xp.asarray(X), 0)
|
||||
assert_raises(ValueError, kmeans2, xp.asarray(X), 0)
|
||||
assert_raises(ValueError, kmeans2, xp.asarray(X), xp.asarray([]))
|
||||
|
||||
def test_kmeans_large_thres(self, xp):
|
||||
# Regression test for gh-1774
|
||||
x = xp.asarray([1, 2, 3, 4, 10], dtype=xp.float64)
|
||||
res = kmeans(x, 1, thresh=1e16)
|
||||
xp_assert_close(res[0], xp.asarray([4.], dtype=xp.float64))
|
||||
xp_assert_close(res[1], xp.asarray(2.3999999999999999, dtype=xp.float64)[()])
|
||||
|
||||
def test_kmeans2_kpp_low_dim(self, xp):
|
||||
# Regression test for gh-11462
|
||||
rng = np.random.default_rng(2358792345678234568)
|
||||
prev_res = xp.asarray([[-1.95266667, 0.898],
|
||||
[-3.153375, 3.3945]], dtype=xp.float64)
|
||||
res, _ = kmeans2(xp.asarray(TESTDATA_2D), 2, minit='++', rng=rng)
|
||||
xp_assert_close(res, prev_res)
|
||||
|
||||
@pytest.mark.thread_unsafe
|
||||
def test_kmeans2_kpp_high_dim(self, xp):
|
||||
# Regression test for gh-11462
|
||||
rng = np.random.default_rng(23587923456834568)
|
||||
n_dim = 100
|
||||
size = 10
|
||||
centers = np.vstack([5 * np.ones(n_dim),
|
||||
-5 * np.ones(n_dim)])
|
||||
|
||||
data = np.vstack([
|
||||
rng.multivariate_normal(centers[0], np.eye(n_dim), size=size),
|
||||
rng.multivariate_normal(centers[1], np.eye(n_dim), size=size)
|
||||
])
|
||||
|
||||
data = xp.asarray(data)
|
||||
res, _ = kmeans2(data, 2, minit='++', rng=rng)
|
||||
xp_assert_equal(xp.sign(res), xp.sign(xp.asarray(centers)))
|
||||
|
||||
def test_kmeans_diff_convergence(self, xp):
|
||||
# Regression test for gh-8727
|
||||
obs = xp.asarray([-3, -1, 0, 1, 1, 8], dtype=xp.float64)
|
||||
res = kmeans(obs, xp.asarray([-3., 0.99]))
|
||||
xp_assert_close(res[0], xp.asarray([-0.4, 8.], dtype=xp.float64))
|
||||
xp_assert_close(res[1], xp.asarray(1.0666666666666667, dtype=xp.float64)[()])
|
||||
|
||||
def test_kmeans_and_kmeans2_random_seed(self, xp):
|
||||
|
||||
seed_list = [
|
||||
1234, np.random.RandomState(1234), np.random.default_rng(1234)
|
||||
]
|
||||
|
||||
for seed in seed_list:
|
||||
seed1 = deepcopy(seed)
|
||||
seed2 = deepcopy(seed)
|
||||
data = xp.asarray(TESTDATA_2D)
|
||||
# test for kmeans
|
||||
res1, _ = kmeans(data, 2, seed=seed1)
|
||||
res2, _ = kmeans(data, 2, seed=seed2)
|
||||
xp_assert_close(res1, res2) # should be same results
|
||||
# test for kmeans2
|
||||
for minit in ["random", "points", "++"]:
|
||||
res1, _ = kmeans2(data, 2, minit=minit, seed=seed1)
|
||||
res2, _ = kmeans2(data, 2, minit=minit, seed=seed2)
|
||||
xp_assert_close(res1, res2) # should be same results
|
||||
Loading…
Add table
Add a link
Reference in a new issue