up follow livre

2025-08-30 18:14:14 +02:00 · 2025-08-30 18:14:14 +02:00 · 3a7a3849ae
commit 3a7a3849ae
parent b4b4398bb0
12242 changed files with 2564461 additions and 6914 deletions
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/init.py
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/init.py
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/init.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/init.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/hierarchy_test_data.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/hierarchy_test_data.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/test_disjoint_set.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/test_disjoint_set.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/test_hierarchy.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/test_hierarchy.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/test_vq.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/pycache/test_vq.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/hierarchy_test_data.py
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/hierarchy_test_data.py
@ -0,0 +1,145 @@
+from numpy import array
+
+
+Q_X = array([[5.26563660e-01, 3.14160190e-01, 8.00656370e-02],
+             [7.50205180e-01, 4.60299830e-01, 8.98696460e-01],
+             [6.65461230e-01, 6.94011420e-01, 9.10465700e-01],
+             [9.64047590e-01, 1.43082200e-03, 7.39874220e-01],
+             [1.08159060e-01, 5.53028790e-01, 6.63804780e-02],
+             [9.31359130e-01, 8.25424910e-01, 9.52315440e-01],
+             [6.78086960e-01, 3.41903970e-01, 5.61481950e-01],
+             [9.82730940e-01, 7.04605210e-01, 8.70978630e-02],
+             [6.14691610e-01, 4.69989230e-02, 6.02406450e-01],
+             [5.80161260e-01, 9.17354970e-01, 5.88163850e-01],
+             [1.38246310e+00, 1.96358160e+00, 1.94437880e+00],
+             [2.10675860e+00, 1.67148730e+00, 1.34854480e+00],
+             [1.39880070e+00, 1.66142050e+00, 1.32224550e+00],
+             [1.71410460e+00, 1.49176380e+00, 1.45432170e+00],
+             [1.54102340e+00, 1.84374950e+00, 1.64658950e+00],
+             [2.08512480e+00, 1.84524350e+00, 2.17340850e+00],
+             [1.30748740e+00, 1.53801650e+00, 2.16007740e+00],
+             [1.41447700e+00, 1.99329070e+00, 1.99107420e+00],
+             [1.61943490e+00, 1.47703280e+00, 1.89788160e+00],
+             [1.59880600e+00, 1.54988980e+00, 1.57563350e+00],
+             [3.37247380e+00, 2.69635310e+00, 3.39981700e+00],
+             [3.13705120e+00, 3.36528090e+00, 3.06089070e+00],
+             [3.29413250e+00, 3.19619500e+00, 2.90700170e+00],
+             [2.65510510e+00, 3.06785900e+00, 2.97198540e+00],
+             [3.30941040e+00, 2.59283970e+00, 2.57714110e+00],
+             [2.59557220e+00, 3.33477370e+00, 3.08793190e+00],
+             [2.58206180e+00, 3.41615670e+00, 3.26441990e+00],
+             [2.71127000e+00, 2.77032450e+00, 2.63466500e+00],
+             [2.79617850e+00, 3.25473720e+00, 3.41801560e+00],
+             [2.64741750e+00, 2.54538040e+00, 3.25354110e+00]])
+
+ytdist = array([662., 877., 255., 412., 996., 295., 468., 268., 400., 754.,
+                564., 138., 219., 869., 669.])
+
+linkage_ytdist_single = array([[2., 5., 138., 2.],
+                               [3., 4., 219., 2.],
+                               [0., 7., 255., 3.],
+                               [1., 8., 268., 4.],
+                               [6., 9., 295., 6.]])
+
+linkage_ytdist_complete = array([[2., 5., 138., 2.],
+                                 [3., 4., 219., 2.],
+                                 [1., 6., 400., 3.],
+                                 [0., 7., 412., 3.],
+                                 [8., 9., 996., 6.]])
+
+linkage_ytdist_average = array([[2., 5., 138., 2.],
+                                [3., 4., 219., 2.],
+                                [0., 7., 333.5, 3.],
+                                [1., 6., 347.5, 3.],
+                                [8., 9., 680.77777778, 6.]])
+
+linkage_ytdist_weighted = array([[2., 5., 138., 2.],
+                                 [3., 4., 219., 2.],
+                                 [0., 7., 333.5, 3.],
+                                 [1., 6., 347.5, 3.],
+                                 [8., 9., 670.125, 6.]])
+
+# the optimal leaf ordering of linkage_ytdist_single
+linkage_ytdist_single_olo = array([[5., 2., 138., 2.],
+                                   [4., 3., 219., 2.],
+                                   [7., 0., 255., 3.],
+                                   [1., 8., 268., 4.],
+                                   [6., 9., 295., 6.]])
+
+X = array([[1.43054825, -7.5693489],
+           [6.95887839, 6.82293382],
+           [2.87137846, -9.68248579],
+           [7.87974764, -6.05485803],
+           [8.24018364, -6.09495602],
+           [7.39020262, 8.54004355]])
+ 
+linkage_X_centroid = array([[3., 4., 0.36265956, 2.],
+                            [1., 5., 1.77045373, 2.],
+                            [0., 2., 2.55760419, 2.],
+                            [6., 8., 6.43614494, 4.],
+                            [7., 9., 15.17363237, 6.]])
+
+linkage_X_median = array([[3., 4., 0.36265956, 2.],
+                          [1., 5., 1.77045373, 2.],
+                          [0., 2., 2.55760419, 2.],
+                          [6., 8., 6.43614494, 4.],
+                          [7., 9., 15.17363237, 6.]])
+
+linkage_X_ward = array([[3., 4., 0.36265956, 2.],
+                        [1., 5., 1.77045373, 2.],
+                        [0., 2., 2.55760419, 2.],
+                        [6., 8., 9.10208346, 4.],
+                        [7., 9., 24.7784379, 6.]])
+
+# the optimal leaf ordering of linkage_X_ward
+linkage_X_ward_olo = array([[4., 3., 0.36265956, 2.],
+                            [5., 1., 1.77045373, 2.],
+                            [2., 0., 2.55760419, 2.],
+                            [6., 8., 9.10208346, 4.],
+                            [7., 9., 24.7784379, 6.]])
+
+inconsistent_ytdist = {
+    1: array([[138., 0., 1., 0.],
+              [219., 0., 1., 0.],
+              [255., 0., 1., 0.],
+              [268., 0., 1., 0.],
+              [295., 0., 1., 0.]]),
+    2: array([[138., 0., 1., 0.],
+              [219., 0., 1., 0.],
+              [237., 25.45584412, 2., 0.70710678],
+              [261.5, 9.19238816, 2., 0.70710678],
+              [233.66666667, 83.9424406, 3., 0.7306594]]),
+    3: array([[138., 0., 1., 0.],
+              [219., 0., 1., 0.],
+              [237., 25.45584412, 2., 0.70710678],
+              [247.33333333, 25.38372182, 3., 0.81417007],
+              [239., 69.36377537, 4., 0.80733783]]),
+    4: array([[138., 0., 1., 0.],
+              [219., 0., 1., 0.],
+              [237., 25.45584412, 2., 0.70710678],
+              [247.33333333, 25.38372182, 3., 0.81417007],
+              [235., 60.73302232, 5., 0.98793042]])}
+
+fcluster_inconsistent = {
+    0.8: array([6, 2, 2, 4, 6, 2, 3, 7, 3, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
+                1, 1, 1, 1, 1, 1, 1, 1, 1]),
+    1.0: array([6, 2, 2, 4, 6, 2, 3, 7, 3, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1,
+                1, 1, 1, 1, 1, 1, 1, 1, 1]),
+    2.0: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                1, 1, 1, 1, 1, 1, 1, 1, 1])}
+
+fcluster_distance = {
+    0.6: array([4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 3,
+                1, 1, 1, 2, 1, 1, 1, 1, 1]),
+    1.0: array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1,
+                1, 1, 1, 1, 1, 1, 1, 1, 1]),
+    2.0: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                1, 1, 1, 1, 1, 1, 1, 1, 1])}
+
+fcluster_maxclust = {
+    8.0: array([5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 4,
+                1, 1, 1, 3, 1, 1, 1, 1, 2]),
+    4.0: array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2,
+                1, 1, 1, 1, 1, 1, 1, 1, 1]),
+    1.0: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                1, 1, 1, 1, 1, 1, 1, 1, 1])}
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/test_disjoint_set.py
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/test_disjoint_set.py
@ -0,0 +1,202 @@
+import pytest
+from pytest import raises as assert_raises
+import numpy as np
+from scipy.cluster.hierarchy import DisjointSet
+import string
+
+
+def generate_random_token():
+    k = len(string.ascii_letters)
+    tokens = list(np.arange(k, dtype=int))
+    tokens += list(np.arange(k, dtype=float))
+    tokens += list(string.ascii_letters)
+    tokens += [None for i in range(k)]
+    tokens = np.array(tokens, dtype=object)
+    rng = np.random.RandomState(seed=0)
+
+    while 1:
+        size = rng.randint(1, 3)
+        element = rng.choice(tokens, size)
+        if size == 1:
+            yield element[0]
+        else:
+            yield tuple(element)
+
+
+def get_elements(n):
+    # dict is deterministic without difficulty of comparing numpy ints
+    elements = {}
+    for element in generate_random_token():
+        if element not in elements:
+            elements[element] = len(elements)
+            if len(elements) >= n:
+                break
+    return list(elements.keys())
+
+
+def test_init():
+    n = 10
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+    assert dis.n_subsets == n
+    assert list(dis) == elements
+
+
+def test_len():
+    n = 10
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+    assert len(dis) == n
+
+    dis.add("dummy")
+    assert len(dis) == n + 1
+
+
+@pytest.mark.parametrize("n", [10, 100])
+def test_contains(n):
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+    for x in elements:
+        assert x in dis
+
+    assert "dummy" not in dis
+
+
+@pytest.mark.parametrize("n", [10, 100])
+def test_add(n):
+    elements = get_elements(n)
+    dis1 = DisjointSet(elements)
+
+    dis2 = DisjointSet()
+    for i, x in enumerate(elements):
+        dis2.add(x)
+        assert len(dis2) == i + 1
+
+        # test idempotency by adding element again
+        dis2.add(x)
+        assert len(dis2) == i + 1
+
+    assert list(dis1) == list(dis2)
+
+
+def test_element_not_present():
+    elements = get_elements(n=10)
+    dis = DisjointSet(elements)
+
+    with assert_raises(KeyError):
+        dis["dummy"]
+
+    with assert_raises(KeyError):
+        dis.merge(elements[0], "dummy")
+
+    with assert_raises(KeyError):
+        dis.connected(elements[0], "dummy")
+
+
+@pytest.mark.parametrize("direction", ["forwards", "backwards"])
+@pytest.mark.parametrize("n", [10, 100])
+def test_linear_union_sequence(n, direction):
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+    assert elements == list(dis)
+
+    indices = list(range(n - 1))
+    if direction == "backwards":
+        indices = indices[::-1]
+
+    for it, i in enumerate(indices):
+        assert not dis.connected(elements[i], elements[i + 1])
+        assert dis.merge(elements[i], elements[i + 1])
+        assert dis.connected(elements[i], elements[i + 1])
+        assert dis.n_subsets == n - 1 - it
+
+    roots = [dis[i] for i in elements]
+    if direction == "forwards":
+        assert all(elements[0] == r for r in roots)
+    else:
+        assert all(elements[-2] == r for r in roots)
+    assert not dis.merge(elements[0], elements[-1])
+
+
+@pytest.mark.parametrize("n", [10, 100])
+def test_self_unions(n):
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+
+    for x in elements:
+        assert dis.connected(x, x)
+        assert not dis.merge(x, x)
+        assert dis.connected(x, x)
+    assert dis.n_subsets == len(elements)
+
+    assert elements == list(dis)
+    roots = [dis[x] for x in elements]
+    assert elements == roots
+
+
+@pytest.mark.parametrize("order", ["ab", "ba"])
+@pytest.mark.parametrize("n", [10, 100])
+def test_equal_size_ordering(n, order):
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+
+    rng = np.random.RandomState(seed=0)
+    indices = np.arange(n)
+    rng.shuffle(indices)
+
+    for i in range(0, len(indices), 2):
+        a, b = elements[indices[i]], elements[indices[i + 1]]
+        if order == "ab":
+            assert dis.merge(a, b)
+        else:
+            assert dis.merge(b, a)
+
+        expected = elements[min(indices[i], indices[i + 1])]
+        assert dis[a] == expected
+        assert dis[b] == expected
+
+
+@pytest.mark.parametrize("kmax", [5, 10])
+def test_binary_tree(kmax):
+    n = 2**kmax
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+    rng = np.random.RandomState(seed=0)
+
+    for k in 2**np.arange(kmax):
+        for i in range(0, n, 2 * k):
+            r1, r2 = rng.randint(0, k, size=2)
+            a, b = elements[i + r1], elements[i + k + r2]
+            assert not dis.connected(a, b)
+            assert dis.merge(a, b)
+            assert dis.connected(a, b)
+
+        assert elements == list(dis)
+        roots = [dis[i] for i in elements]
+        expected_indices = np.arange(n) - np.arange(n) % (2 * k)
+        expected = [elements[i] for i in expected_indices]
+        assert roots == expected
+
+
+@pytest.mark.parametrize("n", [10, 100])
+def test_subsets(n):
+    elements = get_elements(n)
+    dis = DisjointSet(elements)
+
+    rng = np.random.RandomState(seed=0)
+    for i, j in rng.randint(0, n, (n, 2)):
+        x = elements[i]
+        y = elements[j]
+
+        expected = {element for element in dis if {dis[element]} == {dis[x]}}
+        assert dis.subset_size(x) == len(dis.subset(x))
+        assert expected == dis.subset(x)
+
+        expected = {dis[element]: set() for element in dis}
+        for element in dis:
+            expected[dis[element]].add(element)
+        expected = list(expected.values())
+        assert expected == dis.subsets()
+
+        dis.merge(x, y)
+        assert dis.subset(x) == dis.subset(y)
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/test_hierarchy.py
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/test_hierarchy.py
--- a/venv/lib/python3.13/site-packages/scipy/cluster/tests/test_vq.py
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/tests/test_vq.py
@ -0,0 +1,434 @@
+import math
+import sys
+from copy import deepcopy
+from threading import Lock
+
+import numpy as np
+from numpy.testing import assert_array_equal, suppress_warnings
+import pytest
+from pytest import raises as assert_raises
+
+from scipy.cluster.vq import (kmeans, kmeans2, py_vq, vq, whiten,
+                              ClusterError, _krandinit)
+from scipy.cluster import _vq
+from scipy.sparse._sputils import matrix
+
+from scipy._lib import array_api_extra as xpx
+from scipy._lib._array_api import (
+    SCIPY_ARRAY_API, eager_warns, is_lazy_array, make_xp_test_case,
+    xp_copy, xp_assert_close, xp_assert_equal
+)
+
+xfail_xp_backends = pytest.mark.xfail_xp_backends
+skip_xp_backends = pytest.mark.skip_xp_backends
+
+TESTDATA_2D = np.array([
+    -2.2, 1.17, -1.63, 1.69, -2.04, 4.38, -3.09, 0.95, -1.7, 4.79, -1.68, 0.68,
+    -2.26, 3.34, -2.29, 2.55, -1.72, -0.72, -1.99, 2.34, -2.75, 3.43, -2.45,
+    2.41, -4.26, 3.65, -1.57, 1.87, -1.96, 4.03, -3.01, 3.86, -2.53, 1.28,
+    -4.0, 3.95, -1.62, 1.25, -3.42, 3.17, -1.17, 0.12, -3.03, -0.27, -2.07,
+    -0.55, -1.17, 1.34, -2.82, 3.08, -2.44, 0.24, -1.71, 2.48, -5.23, 4.29,
+    -2.08, 3.69, -1.89, 3.62, -2.09, 0.26, -0.92, 1.07, -2.25, 0.88, -2.25,
+    2.02, -4.31, 3.86, -2.03, 3.42, -2.76, 0.3, -2.48, -0.29, -3.42, 3.21,
+    -2.3, 1.73, -2.84, 0.69, -1.81, 2.48, -5.24, 4.52, -2.8, 1.31, -1.67,
+    -2.34, -1.18, 2.17, -2.17, 2.82, -1.85, 2.25, -2.45, 1.86, -6.79, 3.94,
+    -2.33, 1.89, -1.55, 2.08, -1.36, 0.93, -2.51, 2.74, -2.39, 3.92, -3.33,
+    2.99, -2.06, -0.9, -2.83, 3.35, -2.59, 3.05, -2.36, 1.85, -1.69, 1.8,
+    -1.39, 0.66, -2.06, 0.38, -1.47, 0.44, -4.68, 3.77, -5.58, 3.44, -2.29,
+    2.24, -1.04, -0.38, -1.85, 4.23, -2.88, 0.73, -2.59, 1.39, -1.34, 1.75,
+    -1.95, 1.3, -2.45, 3.09, -1.99, 3.41, -5.55, 5.21, -1.73, 2.52, -2.17,
+    0.85, -2.06, 0.49, -2.54, 2.07, -2.03, 1.3, -3.23, 3.09, -1.55, 1.44,
+    -0.81, 1.1, -2.99, 2.92, -1.59, 2.18, -2.45, -0.73, -3.12, -1.3, -2.83,
+    0.2, -2.77, 3.24, -1.98, 1.6, -4.59, 3.39, -4.85, 3.75, -2.25, 1.71, -3.28,
+    3.38, -1.74, 0.88, -2.41, 1.92, -2.24, 1.19, -2.48, 1.06, -1.68, -0.62,
+    -1.3, 0.39, -1.78, 2.35, -3.54, 2.44, -1.32, 0.66, -2.38, 2.76, -2.35,
+    3.95, -1.86, 4.32, -2.01, -1.23, -1.79, 2.76, -2.13, -0.13, -5.25, 3.84,
+    -2.24, 1.59, -4.85, 2.96, -2.41, 0.01, -0.43, 0.13, -3.92, 2.91, -1.75,
+    -0.53, -1.69, 1.69, -1.09, 0.15, -2.11, 2.17, -1.53, 1.22, -2.1, -0.86,
+    -2.56, 2.28, -3.02, 3.33, -1.12, 3.86, -2.18, -1.19, -3.03, 0.79, -0.83,
+    0.97, -3.19, 1.45, -1.34, 1.28, -2.52, 4.22, -4.53, 3.22, -1.97, 1.75,
+    -2.36, 3.19, -0.83, 1.53, -1.59, 1.86, -2.17, 2.3, -1.63, 2.71, -2.03,
+    3.75, -2.57, -0.6, -1.47, 1.33, -1.95, 0.7, -1.65, 1.27, -1.42, 1.09, -3.0,
+    3.87, -2.51, 3.06, -2.6, 0.74, -1.08, -0.03, -2.44, 1.31, -2.65, 2.99,
+    -1.84, 1.65, -4.76, 3.75, -2.07, 3.98, -2.4, 2.67, -2.21, 1.49, -1.21,
+    1.22, -5.29, 2.38, -2.85, 2.28, -5.6, 3.78, -2.7, 0.8, -1.81, 3.5, -3.75,
+    4.17, -1.29, 2.99, -5.92, 3.43, -1.83, 1.23, -1.24, -1.04, -2.56, 2.37,
+    -3.26, 0.39, -4.63, 2.51, -4.52, 3.04, -1.7, 0.36, -1.41, 0.04, -2.1, 1.0,
+    -1.87, 3.78, -4.32, 3.59, -2.24, 1.38, -1.99, -0.22, -1.87, 1.95, -0.84,
+    2.17, -5.38, 3.56, -1.27, 2.9, -1.79, 3.31, -5.47, 3.85, -1.44, 3.69,
+    -2.02, 0.37, -1.29, 0.33, -2.34, 2.56, -1.74, -1.27, -1.97, 1.22, -2.51,
+    -0.16, -1.64, -0.96, -2.99, 1.4, -1.53, 3.31, -2.24, 0.45, -2.46, 1.71,
+    -2.88, 1.56, -1.63, 1.46, -1.41, 0.68, -1.96, 2.76, -1.61,
+    2.11]).reshape((200, 2))
+
+
+# Global data
+X = np.array([[3.0, 3], [4, 3], [4, 2],
+              [9, 2], [5, 1], [6, 2], [9, 4],
+              [5, 2], [5, 4], [7, 4], [6, 5]])
+
+CODET1 = np.array([[3.0000, 3.0000],
+                   [6.2000, 4.0000],
+                   [5.8000, 1.8000]])
+
+CODET2 = np.array([[11.0/3, 8.0/3],
+                   [6.7500, 4.2500],
+                   [6.2500, 1.7500]])
+
+LABEL1 = np.array([0, 1, 2, 2, 2, 2, 1, 2, 1, 1, 1])
+
+
+@make_xp_test_case(whiten)
+class TestWhiten:
+
+    def test_whiten(self, xp):
+        desired = xp.asarray([[5.08738849, 2.97091878],
+                            [3.19909255, 0.69660580],
+                            [4.51041982, 0.02640918],
+                            [4.38567074, 0.95120889],
+                            [2.32191480, 1.63195503]])
+
+        obs = xp.asarray([[0.98744510, 0.82766775],
+                          [0.62093317, 0.19406729],
+                          [0.87545741, 0.00735733],
+                          [0.85124403, 0.26499712],
+                          [0.45067590, 0.45464607]])
+        xp_assert_close(whiten(obs), desired, rtol=1e-5)
+
+    def test_whiten_zero_std(self, xp):
+        desired = xp.asarray([[0., 1.0, 2.86666544],
+                              [0., 1.0, 1.32460034],
+                              [0., 1.0, 3.74382172]])
+
+        obs = xp.asarray([[0., 1., 0.74109533],
+                          [0., 1., 0.34243798],
+                          [0., 1., 0.96785929]])
+
+        with eager_warns(obs, RuntimeWarning, match="standard deviation zero"):
+            actual = whiten(obs)
+        xp_assert_close(actual, desired, rtol=1e-5)
+
+    @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask")
+    @pytest.mark.parametrize("bad_value", [math.nan, math.inf, -math.inf])
+    def test_whiten_not_finite(self, bad_value, xp):
+        obs = xp.asarray([[0.98744510, bad_value],
+                          [0.62093317, 0.19406729],
+                          [0.87545741, 0.00735733],
+                          [0.85124403, 0.26499712],
+                          [0.45067590, 0.45464607]])
+
+        if is_lazy_array(obs):
+            desired = xp.asarray([[5.08738849, math.nan],
+                                  [3.19909255, math.nan],
+                                  [4.51041982, math.nan],
+                                  [4.38567074, math.nan],
+                                  [2.32191480, math.nan]])
+            xp_assert_close(whiten(obs), desired, rtol=1e-5)
+        else:
+            assert_raises(ValueError, whiten, obs)
+
+    @pytest.mark.skipif(SCIPY_ARRAY_API,
+                        reason='`np.matrix` unsupported in array API mode')
+    def test_whiten_not_finite_matrix(self):
+        for bad_value in np.nan, np.inf, -np.inf:
+            obs = matrix([[0.98744510, bad_value],
+                          [0.62093317, 0.19406729],
+                          [0.87545741, 0.00735733],
+                          [0.85124403, 0.26499712],
+                          [0.45067590, 0.45464607]])
+            assert_raises(ValueError, whiten, obs)
+
+
+@make_xp_test_case(vq)
+class TestVq:
+
+    def test_py_vq(self, xp):
+        initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
+        # label1.dtype varies between int32 and int64 over platforms
+        label1 = py_vq(xp.asarray(X), xp.asarray(initc))[0]
+        xp_assert_equal(label1, xp.asarray(LABEL1, dtype=xp.int64),
+                        check_dtype=False)
+
+    @pytest.mark.skipif(SCIPY_ARRAY_API,
+                        reason='`np.matrix` unsupported in array API mode')
+    def test_py_vq_matrix(self):
+        initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
+        # label1.dtype varies between int32 and int64 over platforms
+        label1 = py_vq(matrix(X), matrix(initc))[0]
+        assert_array_equal(label1, LABEL1)
+
+    def test_vq(self, xp):
+        initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
+        label1, _ = _vq.vq(X, initc)
+        assert_array_equal(label1, LABEL1)
+        _, _ = vq(xp.asarray(X), xp.asarray(initc))
+
+    @pytest.mark.skipif(SCIPY_ARRAY_API,
+                        reason='`np.matrix` unsupported in array API mode')
+    def test_vq_matrix(self):
+        initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
+        label1, _ = _vq.vq(matrix(X), matrix(initc))
+        assert_array_equal(label1, LABEL1)
+        _, _ = vq(matrix(X), matrix(initc))
+
+    def test_vq_1d(self, xp):
+        # Test special rank 1 vq algo, python implementation.
+        data = X[:, 0]
+        initc = data[:3]
+        a, b = _vq.vq(data, initc)
+        data = xp.asarray(data)
+        initc = xp.asarray(initc)
+        ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis])
+        # ta.dtype varies between int32 and int64 over platforms
+        xp_assert_equal(ta, xp.asarray(a, dtype=xp.int64), check_dtype=False)
+        xp_assert_equal(tb, xp.asarray(b))
+
+    def test__vq_sametype(self):
+        a = np.asarray([1.0, 2.0])
+        b = a.astype(np.float32)
+        assert_raises(TypeError, _vq.vq, a, b)
+
+    def test__vq_invalid_type(self):
+        a = np.asarray([1, 2], dtype=int)
+        assert_raises(TypeError, _vq.vq, a, a)
+
+    def test_vq_large_nfeat(self, xp):
+        X = np.random.rand(20, 20)
+        code_book = np.random.rand(3, 20)
+
+        codes0, dis0 = _vq.vq(X, code_book)
+        codes1, dis1 = py_vq(
+            xp.asarray(X), xp.asarray(code_book)
+        )
+        xp_assert_close(dis1, xp.asarray(dis0), rtol=1e-5)
+        # codes1.dtype varies between int32 and int64 over platforms
+        xp_assert_equal(codes1, xp.asarray(codes0, dtype=xp.int64), check_dtype=False)
+
+        X = X.astype(np.float32)
+        code_book = code_book.astype(np.float32)
+
+        codes0, dis0 = _vq.vq(X, code_book)
+        codes1, dis1 = py_vq(
+            xp.asarray(X), xp.asarray(code_book)
+        )
+        xp_assert_close(dis1, xp.asarray(dis0, dtype=xp.float64), rtol=1e-5)
+        # codes1.dtype varies between int32 and int64 over platforms
+        xp_assert_equal(codes1, xp.asarray(codes0, dtype=xp.int64), check_dtype=False)
+
+    def test_vq_large_features(self, xp):
+        X = np.random.rand(10, 5) * 1000000
+        code_book = np.random.rand(2, 5) * 1000000
+
+        codes0, dis0 = _vq.vq(X, code_book)
+        codes1, dis1 = py_vq(
+            xp.asarray(X), xp.asarray(code_book)
+        )
+        xp_assert_close(dis1, xp.asarray(dis0), rtol=1e-5)
+        # codes1.dtype varies between int32 and int64 over platforms
+        xp_assert_equal(codes1, xp.asarray(codes0, dtype=xp.int64), check_dtype=False)
+
+
+# Whole class skipped on GPU for now;
+# once pdist/cdist are hooked up for CuPy, more tests will work
+@make_xp_test_case(kmeans, kmeans2)
+class TestKMeans:
+
+    def test_large_features(self, xp):
+        # Generate a data set with large values, and run kmeans on it to
+        # (regression for 1077).
+        d = 300
+        n = 100
+
+        m1 = np.random.randn(d)
+        m2 = np.random.randn(d)
+        x = 10000 * np.random.randn(n, d) - 20000 * m1
+        y = 10000 * np.random.randn(n, d) + 20000 * m2
+
+        data = np.empty((x.shape[0] + y.shape[0], d), np.float64)
+        data[:x.shape[0]] = x
+        data[x.shape[0]:] = y
+
+        # use `seed` to ensure backwards compatibility after SPEC7
+        kmeans(xp.asarray(data), 2, seed=1)
+
+    def test_kmeans_simple(self, xp):
+        rng = np.random.default_rng(54321)
+        initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
+        code1 = kmeans(xp.asarray(X), xp.asarray(initc), iter=1, rng=rng)[0]
+        xp_assert_close(code1, xp.asarray(CODET2))
+
+    @pytest.mark.skipif(SCIPY_ARRAY_API,
+                        reason='`np.matrix` unsupported in array API mode')
+    def test_kmeans_simple_matrix(self):
+        rng = np.random.default_rng(54321)
+        initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
+        code1 = kmeans(matrix(X), matrix(initc), iter=1, rng=rng)[0]
+        xp_assert_close(code1, CODET2)
+
+    def test_kmeans_lost_cluster(self, xp):
+        # This will cause kmeans to have a cluster with no points.
+        data = xp.asarray(TESTDATA_2D)
+        initk = xp.asarray([[-1.8127404, -0.67128041],
+                            [2.04621601, 0.07401111],
+                            [-2.31149087, -0.05160469]])
+
+        kmeans(data, initk)
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning,
+                       "One of the clusters is empty. Re-run kmeans with a "
+                       "different initialization")
+            kmeans2(data, initk, missing='warn')
+
+        assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
+
+    def test_kmeans2_simple(self, xp):
+        rng = np.random.default_rng(12345678)
+        initc = xp.asarray(np.concatenate([[X[0]], [X[1]], [X[2]]]))
+        arrays = [xp.asarray] if SCIPY_ARRAY_API else [np.asarray, matrix]
+        for tp in arrays:
+            code1 = kmeans2(tp(X), tp(initc), iter=1, rng=rng)[0]
+            code2 = kmeans2(tp(X), tp(initc), iter=2, rng=rng)[0]
+
+            xp_assert_close(code1, xp.asarray(CODET1))
+            xp_assert_close(code2, xp.asarray(CODET2))
+
+    @pytest.mark.skipif(SCIPY_ARRAY_API,
+                        reason='`np.matrix` unsupported in array API mode')
+    def test_kmeans2_simple_matrix(self):
+        rng = np.random.default_rng(12345678)
+        initc = np.concatenate([[X[0]], [X[1]], [X[2]]])
+        code1 = kmeans2(matrix(X), matrix(initc), iter=1, rng=rng)[0]
+        code2 = kmeans2(matrix(X), matrix(initc), iter=2, rng=rng)[0]
+
+        xp_assert_close(code1, CODET1)
+        xp_assert_close(code2, CODET2)
+
+    def test_kmeans2_rank1(self, xp):
+        data = xp.asarray(TESTDATA_2D)
+        data1 = data[:, 0]
+
+        initc = data1[:3]
+        code = xp_copy(initc, xp=xp)
+
+        # use `seed` to ensure backwards compatibility after SPEC7
+        kmeans2(data1, code, iter=1, seed=1)[0]
+        kmeans2(data1, code, iter=2)[0]
+
+    def test_kmeans2_rank1_2(self, xp):
+        data = xp.asarray(TESTDATA_2D)
+        data1 = data[:, 0]
+        kmeans2(data1, 2, iter=1)
+
+    def test_kmeans2_high_dim(self, xp):
+        # test kmeans2 when the number of dimensions exceeds the number
+        # of input points
+        data = xp.asarray(TESTDATA_2D)
+        data = xp.reshape(data, (20, 20))[:10, :]
+        kmeans2(data, 2)
+
+    def test_kmeans2_init(self, xp):
+        rng = np.random.default_rng(12345678)
+        data = xp.asarray(TESTDATA_2D)
+        k = 3
+
+        kmeans2(data, k, minit='points', rng=rng)
+        kmeans2(data[:, 1], k, minit='points', rng=rng)  # special case (1-D)
+
+        kmeans2(data, k, minit='++', rng=rng)
+        kmeans2(data[:, 1], k, minit='++', rng=rng)  # special case (1-D)
+
+        # minit='random' can give warnings, filter those
+        with suppress_warnings() as sup:
+            sup.filter(message="One of the clusters is empty. Re-run.")
+            kmeans2(data, k, minit='random', rng=rng)
+            kmeans2(data[:, 1], k, minit='random', rng=rng)  # special case (1-D)
+
+    @pytest.fixture
+    def krand_lock(self):
+        return Lock()
+
+    @xfail_xp_backends('dask.array', reason="Wrong answer")
+    @pytest.mark.skipif(sys.platform == 'win32',
+                        reason='Fails with MemoryError in Wine.')
+    def test_krandinit(self, xp, krand_lock):
+        data = xp.asarray(TESTDATA_2D)
+        datas = [xp.reshape(data, (200, 2)),
+                 xp.reshape(data, (20, 20))[:10, :]]
+        k = int(1e6)
+        with krand_lock:
+            for data in datas:
+                rng = np.random.default_rng(1234)
+                init = _krandinit(data, k, rng, xp)
+                orig_cov = xpx.cov(data.T, xp=xp)
+                init_cov = xpx.cov(init.T, xp=xp)
+                xp_assert_close(orig_cov, init_cov, atol=1.1e-2)
+
+    def test_kmeans2_empty(self, xp):
+        # Regression test for gh-1032.
+        assert_raises(ValueError, kmeans2, xp.asarray([]), 2)
+
+    def test_kmeans_0k(self, xp):
+        # Regression test for gh-1073: fail when k arg is 0.
+        assert_raises(ValueError, kmeans, xp.asarray(X), 0)
+        assert_raises(ValueError, kmeans2, xp.asarray(X), 0)
+        assert_raises(ValueError, kmeans2, xp.asarray(X), xp.asarray([]))
+
+    def test_kmeans_large_thres(self, xp):
+        # Regression test for gh-1774
+        x = xp.asarray([1, 2, 3, 4, 10], dtype=xp.float64)
+        res = kmeans(x, 1, thresh=1e16)
+        xp_assert_close(res[0], xp.asarray([4.], dtype=xp.float64))
+        xp_assert_close(res[1], xp.asarray(2.3999999999999999, dtype=xp.float64)[()])
+
+    def test_kmeans2_kpp_low_dim(self, xp):
+        # Regression test for gh-11462
+        rng = np.random.default_rng(2358792345678234568)
+        prev_res = xp.asarray([[-1.95266667, 0.898],
+                               [-3.153375, 3.3945]], dtype=xp.float64)
+        res, _ = kmeans2(xp.asarray(TESTDATA_2D), 2, minit='++', rng=rng)
+        xp_assert_close(res, prev_res)
+
+    @pytest.mark.thread_unsafe
+    def test_kmeans2_kpp_high_dim(self, xp):
+        # Regression test for gh-11462
+        rng = np.random.default_rng(23587923456834568)
+        n_dim = 100
+        size = 10
+        centers = np.vstack([5 * np.ones(n_dim),
+                             -5 * np.ones(n_dim)])
+
+        data = np.vstack([
+            rng.multivariate_normal(centers[0], np.eye(n_dim), size=size),
+            rng.multivariate_normal(centers[1], np.eye(n_dim), size=size)
+        ])
+
+        data = xp.asarray(data)
+        res, _ = kmeans2(data, 2, minit='++', rng=rng)
+        xp_assert_equal(xp.sign(res), xp.sign(xp.asarray(centers)))
+
+    def test_kmeans_diff_convergence(self, xp):
+        # Regression test for gh-8727
+        obs = xp.asarray([-3, -1, 0, 1, 1, 8], dtype=xp.float64)
+        res = kmeans(obs, xp.asarray([-3., 0.99]))
+        xp_assert_close(res[0], xp.asarray([-0.4,  8.], dtype=xp.float64))
+        xp_assert_close(res[1], xp.asarray(1.0666666666666667, dtype=xp.float64)[()])
+
+    def test_kmeans_and_kmeans2_random_seed(self, xp):
+
+        seed_list = [
+            1234, np.random.RandomState(1234), np.random.default_rng(1234)
+        ]
+
+        for seed in seed_list:
+            seed1 = deepcopy(seed)
+            seed2 = deepcopy(seed)
+            data = xp.asarray(TESTDATA_2D)
+            # test for kmeans
+            res1, _ = kmeans(data, 2, seed=seed1)
+            res2, _ = kmeans(data, 2, seed=seed2)
+            xp_assert_close(res1, res2)  # should be same results
+            # test for kmeans2
+            for minit in ["random", "points", "++"]:
+                res1, _ = kmeans2(data, 2, minit=minit, seed=seed1)
+                res2, _ = kmeans2(data, 2, minit=minit, seed=seed2)
+                xp_assert_close(res1, res2)  # should be same results