up follow livre

2025-08-30 18:14:14 +02:00 · 2025-08-30 18:14:14 +02:00 · 3a7a3849ae
commit 3a7a3849ae
parent b4b4398bb0
12242 changed files with 2564461 additions and 6914 deletions
--- a/venv/lib/python3.13/site-packages/scipy/cluster/vq.py
+++ b/venv/lib/python3.13/site-packages/scipy/cluster/vq.py
@ -0,0 +1,832 @@
+"""
+K-means clustering and vector quantization (:mod:`scipy.cluster.vq`)
+====================================================================
+
+Provides routines for k-means clustering, generating code books
+from k-means models and quantizing vectors by comparing them with
+centroids in a code book.
+
+.. autosummary::
+   :toctree: generated/
+
+   whiten -- Normalize a group of observations so each feature has unit variance
+   vq -- Calculate code book membership of a set of observation vectors
+   kmeans -- Perform k-means on a set of observation vectors forming k clusters
+   kmeans2 -- A different implementation of k-means with more methods
+           -- for initializing centroids
+
+Background information
+----------------------
+The k-means algorithm takes as input the number of clusters to
+generate, k, and a set of observation vectors to cluster. It
+returns a set of centroids, one for each of the k clusters. An
+observation vector is classified with the cluster number or
+centroid index of the centroid closest to it.
+
+A vector v belongs to cluster i if it is closer to centroid i than
+any other centroid. If v belongs to i, we say centroid i is the
+dominating centroid of v. The k-means algorithm tries to
+minimize distortion, which is defined as the sum of the squared distances
+between each observation vector and its dominating centroid.
+The minimization is achieved by iteratively reclassifying
+the observations into clusters and recalculating the centroids until
+a configuration is reached in which the centroids are stable. One can
+also define a maximum number of iterations.
+
+Since vector quantization is a natural application for k-means,
+information theory terminology is often used. The centroid index
+or cluster index is also referred to as a "code" and the table
+mapping codes to centroids and, vice versa, is often referred to as a
+"code book". The result of k-means, a set of centroids, can be
+used to quantize vectors. Quantization aims to find an encoding of
+vectors that reduces the expected distortion.
+
+All routines expect obs to be an M by N array, where the rows are
+the observation vectors. The codebook is a k by N array, where the
+ith row is the centroid of code word i. The observation vectors
+and centroids have the same feature dimension.
+
+As an example, suppose we wish to compress a 24-bit color image
+(each pixel is represented by one byte for red, one for blue, and
+one for green) before sending it over the web. By using a smaller
+8-bit encoding, we can reduce the amount of data by two
+thirds. Ideally, the colors for each of the 256 possible 8-bit
+encoding values should be chosen to minimize distortion of the
+color. Running k-means with k=256 generates a code book of 256
+codes, which fills up all possible 8-bit sequences. Instead of
+sending a 3-byte value for each pixel, the 8-bit centroid index
+(or code word) of the dominating centroid is transmitted. The code
+book is also sent over the wire so each 8-bit code can be
+translated back to a 24-bit pixel value representation. If the
+image of interest was of an ocean, we would expect many 24-bit
+blues to be represented by 8-bit codes. If it was an image of a
+human face, more flesh-tone colors would be represented in the
+code book.
+
+"""
+import warnings
+import numpy as np
+from collections import deque
+from scipy._lib._array_api import (_asarray, array_namespace, is_lazy_array,
+                                   xp_capabilities, xp_copy, xp_size)
+from scipy._lib._util import (check_random_state, rng_integers,
+                              _transition_to_rng)
+from scipy._lib import array_api_extra as xpx
+from scipy.spatial.distance import cdist
+
+from . import _vq
+
+__docformat__ = 'restructuredtext'
+
+__all__ = ['whiten', 'vq', 'kmeans', 'kmeans2']
+
+
+class ClusterError(Exception):
+    pass
+
+
+@xp_capabilities()
+def whiten(obs, check_finite=None):
+    """
+    Normalize a group of observations on a per feature basis.
+
+    Before running k-means, it is beneficial to rescale each feature
+    dimension of the observation set by its standard deviation (i.e. "whiten"
+    it - as in "white noise" where each frequency has equal power).
+    Each feature is divided by its standard deviation across all observations
+    to give it unit variance.
+
+    Parameters
+    ----------
+    obs : ndarray
+        Each row of the array is an observation.  The
+        columns are the features seen during each observation::
+
+            #        f0  f1  f2
+            obs = [[ 1., 1., 1.],  #o0
+                   [ 2., 2., 2.],  #o1
+                   [ 3., 3., 3.],  #o2
+                   [ 4., 4., 4.]]  #o3
+
+    check_finite : bool, optional
+        Whether to check that the input matrices contain only finite numbers.
+        Disabling may give a performance gain, but may result in problems
+        (crashes, non-termination) if the inputs do contain infinities or NaNs.
+        Default: True for eager backends and False for lazy ones.
+
+    Returns
+    -------
+    result : ndarray
+        Contains the values in `obs` scaled by the standard deviation
+        of each column.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.cluster.vq import whiten
+    >>> features  = np.array([[1.9, 2.3, 1.7],
+    ...                       [1.5, 2.5, 2.2],
+    ...                       [0.8, 0.6, 1.7,]])
+    >>> whiten(features)
+    array([[ 4.17944278,  2.69811351,  7.21248917],
+           [ 3.29956009,  2.93273208,  9.33380951],
+           [ 1.75976538,  0.7038557 ,  7.21248917]])
+
+    """
+    xp = array_namespace(obs)
+    if check_finite is None:
+        check_finite = not is_lazy_array(obs)
+    obs = _asarray(obs, check_finite=check_finite, xp=xp)
+    std_dev = xp.std(obs, axis=0)
+    zero_std_mask = std_dev == 0
+    std_dev = xpx.at(std_dev, zero_std_mask).set(1.0)
+    if check_finite and xp.any(zero_std_mask):
+        warnings.warn("Some columns have standard deviation zero. "
+                      "The values of these columns will not change.",
+                      RuntimeWarning, stacklevel=2)
+    return obs / std_dev
+
+
+@xp_capabilities(cpu_only=True, reason="uses spatial.distance.cdist",
+                 jax_jit=False, allow_dask_compute=True)
+def vq(obs, code_book, check_finite=True):
+    """
+    Assign codes from a code book to observations.
+
+    Assigns a code from a code book to each observation. Each
+    observation vector in the 'M' by 'N' `obs` array is compared with the
+    centroids in the code book and assigned the code of the closest
+    centroid.
+
+    The features in `obs` should have unit variance, which can be
+    achieved by passing them through the whiten function. The code
+    book can be created with the k-means algorithm or a different
+    encoding algorithm.
+
+    Parameters
+    ----------
+    obs : ndarray
+        Each row of the 'M' x 'N' array is an observation. The columns are
+        the "features" seen during each observation. The features must be
+        whitened first using the whiten function or something equivalent.
+    code_book : ndarray
+        The code book is usually generated using the k-means algorithm.
+        Each row of the array holds a different code, and the columns are
+        the features of the code::
+
+            #              f0  f1  f2  f3
+            code_book = [[ 1., 2., 3., 4.],  #c0
+                         [ 1., 2., 3., 4.],  #c1
+                         [ 1., 2., 3., 4.]]  #c2
+
+    check_finite : bool, optional
+        Whether to check that the input matrices contain only finite numbers.
+        Disabling may give a performance gain, but may result in problems
+        (crashes, non-termination) if the inputs do contain infinities or NaNs.
+        Default: True
+
+    Returns
+    -------
+    code : ndarray
+        A length M array holding the code book index for each observation.
+    dist : ndarray
+        The distortion (distance) between the observation and its nearest
+        code.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.cluster.vq import vq
+    >>> code_book = np.array([[1., 1., 1.],
+    ...                       [2., 2., 2.]])
+    >>> features  = np.array([[1.9, 2.3, 1.7],
+    ...                       [1.5, 2.5, 2.2],
+    ...                       [0.8, 0.6, 1.7]])
+    >>> vq(features, code_book)
+    (array([1, 1, 0], dtype=int32), array([0.43588989, 0.73484692, 0.83066239]))
+
+    """
+    xp = array_namespace(obs, code_book)
+    obs = _asarray(obs, xp=xp, check_finite=check_finite)
+    code_book = _asarray(code_book, xp=xp, check_finite=check_finite)
+    ct = xp.result_type(obs, code_book)
+
+    if xp.isdtype(ct, kind='real floating'):
+        c_obs = xp.astype(obs, ct, copy=False)
+        c_code_book = xp.astype(code_book, ct, copy=False)
+        c_obs = np.asarray(c_obs)
+        c_code_book = np.asarray(c_code_book)
+        result = _vq.vq(c_obs, c_code_book)
+        return xp.asarray(result[0]), xp.asarray(result[1])
+    return py_vq(obs, code_book, check_finite=False)
+
+
+def py_vq(obs, code_book, check_finite=True):
+    """ Python version of vq algorithm.
+
+    The algorithm computes the Euclidean distance between each
+    observation and every frame in the code_book.
+
+    Parameters
+    ----------
+    obs : ndarray
+        Expects a rank 2 array. Each row is one observation.
+    code_book : ndarray
+        Code book to use. Same format than obs. Should have same number of
+        features (e.g., columns) than obs.
+    check_finite : bool, optional
+        Whether to check that the input matrices contain only finite numbers.
+        Disabling may give a performance gain, but may result in problems
+        (crashes, non-termination) if the inputs do contain infinities or NaNs.
+        Default: True
+
+    Returns
+    -------
+    code : ndarray
+        code[i] gives the label of the ith obversation; its code is
+        code_book[code[i]].
+    mind_dist : ndarray
+        min_dist[i] gives the distance between the ith observation and its
+        corresponding code.
+
+    Notes
+    -----
+    This function is slower than the C version but works for
+    all input types. If the inputs have the wrong types for the
+    C versions of the function, this one is called as a last resort.
+
+    It is about 20 times slower than the C version.
+
+    """
+    xp = array_namespace(obs, code_book)
+    obs = _asarray(obs, xp=xp, check_finite=check_finite)
+    code_book = _asarray(code_book, xp=xp, check_finite=check_finite)
+
+    if obs.ndim != code_book.ndim:
+        raise ValueError("Observation and code_book should have the same rank")
+
+    if obs.ndim == 1:
+        obs = obs[:, xp.newaxis]
+        code_book = code_book[:, xp.newaxis]
+
+    # Once `cdist` has array API support, this `xp.asarray` call can be removed
+    dist = xp.asarray(cdist(obs, code_book))
+    code = xp.argmin(dist, axis=1)
+    min_dist = xp.min(dist, axis=1)
+    return code, min_dist
+
+
+def _kmeans(obs, guess, thresh=1e-5, xp=None):
+    """ "raw" version of k-means.
+
+    Returns
+    -------
+    code_book
+        The lowest distortion codebook found.
+    avg_dist
+        The average distance a observation is from a code in the book.
+        Lower means the code_book matches the data better.
+
+    See Also
+    --------
+    kmeans : wrapper around k-means
+
+    Examples
+    --------
+    Note: not whitened in this example.
+
+    >>> import numpy as np
+    >>> from scipy.cluster.vq import _kmeans
+    >>> features  = np.array([[ 1.9,2.3],
+    ...                       [ 1.5,2.5],
+    ...                       [ 0.8,0.6],
+    ...                       [ 0.4,1.8],
+    ...                       [ 1.0,1.0]])
+    >>> book = np.array((features[0],features[2]))
+    >>> _kmeans(features,book)
+    (array([[ 1.7       ,  2.4       ],
+           [ 0.73333333,  1.13333333]]), 0.40563916697728591)
+
+    """
+    xp = np if xp is None else xp
+    code_book = guess
+    diff = xp.inf
+    prev_avg_dists = deque([diff], maxlen=2)
+
+    np_obs = np.asarray(obs)
+    while diff > thresh:
+        # compute membership and distances between obs and code_book
+        obs_code, distort = vq(obs, code_book, check_finite=False)
+        prev_avg_dists.append(xp.mean(distort, axis=-1))
+        # recalc code_book as centroids of associated obs
+        obs_code = np.asarray(obs_code)
+        code_book, has_members = _vq.update_cluster_means(np_obs, obs_code,
+                                                          code_book.shape[0])
+        code_book = code_book[has_members]
+        code_book = xp.asarray(code_book)
+        diff = xp.abs(prev_avg_dists[0] - prev_avg_dists[1])
+
+    return code_book, prev_avg_dists[1]
+
+
+@xp_capabilities(cpu_only=True, jax_jit=False, allow_dask_compute=True)
+@_transition_to_rng("seed")
+def kmeans(obs, k_or_guess, iter=20, thresh=1e-5, check_finite=True,
+           *, rng=None):
+    """
+    Performs k-means on a set of observation vectors forming k clusters.
+
+    The k-means algorithm adjusts the classification of the observations
+    into clusters and updates the cluster centroids until the position of
+    the centroids is stable over successive iterations. In this
+    implementation of the algorithm, the stability of the centroids is
+    determined by comparing the absolute value of the change in the average
+    Euclidean distance between the observations and their corresponding
+    centroids against a threshold. This yields
+    a code book mapping centroids to codes and vice versa.
+
+    Parameters
+    ----------
+    obs : ndarray
+       Each row of the M by N array is an observation vector. The
+       columns are the features seen during each observation.
+       The features must be whitened first with the `whiten` function.
+
+    k_or_guess : int or ndarray
+       The number of centroids to generate. A code is assigned to
+       each centroid, which is also the row index of the centroid
+       in the code_book matrix generated.
+
+       The initial k centroids are chosen by randomly selecting
+       observations from the observation matrix. Alternatively,
+       passing a k by N array specifies the initial k centroids.
+
+    iter : int, optional
+       The number of times to run k-means, returning the codebook
+       with the lowest distortion. This argument is ignored if
+       initial centroids are specified with an array for the
+       ``k_or_guess`` parameter. This parameter does not represent the
+       number of iterations of the k-means algorithm.
+
+    thresh : float, optional
+       Terminates the k-means algorithm if the change in
+       distortion since the last k-means iteration is less than
+       or equal to threshold.
+
+    check_finite : bool, optional
+        Whether to check that the input matrices contain only finite numbers.
+        Disabling may give a performance gain, but may result in problems
+        (crashes, non-termination) if the inputs do contain infinities or NaNs.
+        Default: True
+    rng : `numpy.random.Generator`, optional
+        Pseudorandom number generator state. When `rng` is None, a new
+        `numpy.random.Generator` is created using entropy from the
+        operating system. Types other than `numpy.random.Generator` are
+        passed to `numpy.random.default_rng` to instantiate a ``Generator``.
+
+    Returns
+    -------
+    codebook : ndarray
+       A k by N array of k centroids. The ith centroid
+       codebook[i] is represented with the code i. The centroids
+       and codes generated represent the lowest distortion seen,
+       not necessarily the globally minimal distortion.
+       Note that the number of centroids is not necessarily the same as the
+       ``k_or_guess`` parameter, because centroids assigned to no observations
+       are removed during iterations.
+
+    distortion : float
+       The mean (non-squared) Euclidean distance between the observations
+       passed and the centroids generated. Note the difference to the standard
+       definition of distortion in the context of the k-means algorithm, which
+       is the sum of the squared distances.
+
+    See Also
+    --------
+    kmeans2 : a different implementation of k-means clustering
+       with more methods for generating initial centroids but without
+       using a distortion change threshold as a stopping criterion.
+
+    whiten : must be called prior to passing an observation matrix
+       to kmeans.
+
+    Notes
+    -----
+    For more functionalities or optimal performance, you can use
+    `sklearn.cluster.KMeans <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html>`_.
+    `This <https://hdbscan.readthedocs.io/en/latest/performance_and_scalability.html#comparison-of-high-performance-implementations>`_
+    is a benchmark result of several implementations.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy.cluster.vq import vq, kmeans, whiten
+    >>> import matplotlib.pyplot as plt
+    >>> features  = np.array([[ 1.9,2.3],
+    ...                       [ 1.5,2.5],
+    ...                       [ 0.8,0.6],
+    ...                       [ 0.4,1.8],
+    ...                       [ 0.1,0.1],
+    ...                       [ 0.2,1.8],
+    ...                       [ 2.0,0.5],
+    ...                       [ 0.3,1.5],
+    ...                       [ 1.0,1.0]])
+    >>> whitened = whiten(features)
+    >>> book = np.array((whitened[0],whitened[2]))
+    >>> kmeans(whitened,book)
+    (array([[ 2.3110306 ,  2.86287398],    # random
+           [ 0.93218041,  1.24398691]]), 0.85684700941625547)
+
+    >>> codes = 3
+    >>> kmeans(whitened,codes)
+    (array([[ 2.3110306 ,  2.86287398],    # random
+           [ 1.32544402,  0.65607529],
+           [ 0.40782893,  2.02786907]]), 0.5196582527686241)
+
+    >>> # Create 50 datapoints in two clusters a and b
+    >>> pts = 50
+    >>> rng = np.random.default_rng()
+    >>> a = rng.multivariate_normal([0, 0], [[4, 1], [1, 4]], size=pts)
+    >>> b = rng.multivariate_normal([30, 10],
+    ...                             [[10, 2], [2, 1]],
+    ...                             size=pts)
+    >>> features = np.concatenate((a, b))
+    >>> # Whiten data
+    >>> whitened = whiten(features)
+    >>> # Find 2 clusters in the data
+    >>> codebook, distortion = kmeans(whitened, 2)
+    >>> # Plot whitened data and cluster centers in red
+    >>> plt.scatter(whitened[:, 0], whitened[:, 1])
+    >>> plt.scatter(codebook[:, 0], codebook[:, 1], c='r')
+    >>> plt.show()
+
+    """
+    if isinstance(k_or_guess, int):
+        xp = array_namespace(obs)
+    else:
+        xp = array_namespace(obs, k_or_guess)
+    obs = _asarray(obs, xp=xp, check_finite=check_finite)
+    guess = _asarray(k_or_guess, xp=xp, check_finite=check_finite)
+    if iter < 1:
+        raise ValueError(f"iter must be at least 1, got {iter}")
+
+    # Determine whether a count (scalar) or an initial guess (array) was passed.
+    if xp_size(guess) != 1:
+        if xp_size(guess) < 1:
+            raise ValueError(f"Asked for 0 clusters. Initial book was {guess}")
+        return _kmeans(obs, guess, thresh=thresh, xp=xp)
+
+    # k_or_guess is a scalar, now verify that it's an integer
+    k = int(guess)
+    if k != guess:
+        raise ValueError("If k_or_guess is a scalar, it must be an integer.")
+    if k < 1:
+        raise ValueError(f"Asked for {k} clusters.")
+
+    rng = check_random_state(rng)
+
+    # initialize best distance value to a large value
+    best_dist = xp.inf
+    for i in range(iter):
+        # the initial code book is randomly selected from observations
+        guess = _kpoints(obs, k, rng, xp)
+        book, dist = _kmeans(obs, guess, thresh=thresh, xp=xp)
+        if dist < best_dist:
+            best_book = book
+            best_dist = dist
+    return best_book, best_dist
+
+
+def _kpoints(data, k, rng, xp):
+    """Pick k points at random in data (one row = one observation).
+
+    Parameters
+    ----------
+    data : ndarray
+        Expect a rank 1 or 2 array. Rank 1 are assumed to describe one
+        dimensional data, rank 2 multidimensional data, in which case one
+        row is one observation.
+    k : int
+        Number of samples to generate.
+    rng : `numpy.random.Generator` or `numpy.random.RandomState`
+        Random number generator.
+
+    Returns
+    -------
+    x : ndarray
+        A 'k' by 'N' containing the initial centroids
+
+    """
+    idx = rng.choice(data.shape[0], size=int(k), replace=False)
+    # convert to array with default integer dtype (avoids numpy#25607)
+    idx = xp.asarray(idx, dtype=xp.asarray([1]).dtype)
+    return xp.take(data, idx, axis=0)
+
+
+def _krandinit(data, k, rng, xp):
+    """Returns k samples of a random variable whose parameters depend on data.
+
+    More precisely, it returns k observations sampled from a Gaussian random
+    variable whose mean and covariances are the ones estimated from the data.
+
+    Parameters
+    ----------
+    data : ndarray
+        Expect a rank 1 or 2 array. Rank 1 is assumed to describe 1-D
+        data, rank 2 multidimensional data, in which case one
+        row is one observation.
+    k : int
+        Number of samples to generate.
+    rng : `numpy.random.Generator` or `numpy.random.RandomState`
+        Random number generator.
+
+    Returns
+    -------
+    x : ndarray
+        A 'k' by 'N' containing the initial centroids
+
+    """
+    mu = xp.mean(data, axis=0)
+    k = np.asarray(k)
+
+    if data.ndim == 1:
+        _cov = xpx.cov(data, xp=xp)
+        x = rng.standard_normal(size=k)
+        x = xp.asarray(x)
+        x *= xp.sqrt(_cov)
+    elif data.shape[1] > data.shape[0]:
+        # initialize when the covariance matrix is rank deficient
+        _, s, vh = xp.linalg.svd(data - mu, full_matrices=False)
+        x = rng.standard_normal(size=(k, xp_size(s)))
+        x = xp.asarray(x)
+        sVh = s[:, None] * vh / xp.sqrt(data.shape[0] - xp.asarray(1.))
+        x = x @ sVh
+    else:
+        _cov = xpx.atleast_nd(xpx.cov(data.T, xp=xp), ndim=2, xp=xp)
+
+        # k rows, d cols (one row = one obs)
+        # Generate k sample of a random variable ~ Gaussian(mu, cov)
+        x = rng.standard_normal(size=(k, xp_size(mu)))
+        x = xp.asarray(x)
+        x = x @ xp.linalg.cholesky(_cov).T
+
+    x += mu
+    return x
+
+
+def _kpp(data, k, rng, xp):
+    """ Picks k points in the data based on the kmeans++ method.
+
+    Parameters
+    ----------
+    data : ndarray
+        Expect a rank 1 or 2 array. Rank 1 is assumed to describe 1-D
+        data, rank 2 multidimensional data, in which case one
+        row is one observation.
+    k : int
+        Number of samples to generate.
+    rng : `numpy.random.Generator` or `numpy.random.RandomState`
+        Random number generator.
+
+    Returns
+    -------
+    init : ndarray
+        A 'k' by 'N' containing the initial centroids.
+
+    References
+    ----------
+    .. [1] D. Arthur and S. Vassilvitskii, "k-means++: the advantages of
+       careful seeding", Proceedings of the Eighteenth Annual ACM-SIAM Symposium
+       on Discrete Algorithms, 2007.
+    """
+
+    ndim = len(data.shape)
+    if ndim == 1:
+        data = data[:, None]
+
+    dims = data.shape[1]
+
+    init = xp.empty((int(k), dims))
+
+    for i in range(k):
+        if i == 0:
+            data_idx = rng_integers(rng, data.shape[0])
+        else:
+            D2 = cdist(init[:i,:], data, metric='sqeuclidean').min(axis=0)
+            probs = D2/D2.sum()
+            cumprobs = probs.cumsum()
+            r = rng.uniform()
+            cumprobs = np.asarray(cumprobs)
+            data_idx = int(np.searchsorted(cumprobs, r))
+
+        init = xpx.at(init)[i, :].set(data[data_idx, :])
+
+    if ndim == 1:
+        init = init[:, 0]
+    return init
+
+
+_valid_init_meth = {'random': _krandinit, 'points': _kpoints, '++': _kpp}
+
+
+def _missing_warn():
+    """Print a warning when called."""
+    warnings.warn("One of the clusters is empty. "
+                  "Re-run kmeans with a different initialization.",
+                  stacklevel=3)
+
+
+def _missing_raise():
+    """Raise a ClusterError when called."""
+    raise ClusterError("One of the clusters is empty. "
+                       "Re-run kmeans with a different initialization.")
+
+
+_valid_miss_meth = {'warn': _missing_warn, 'raise': _missing_raise}
+
+
+@xp_capabilities(cpu_only=True, jax_jit=False, allow_dask_compute=True)
+@_transition_to_rng("seed")
+def kmeans2(data, k, iter=10, thresh=1e-5, minit='random',
+            missing='warn', check_finite=True, *, rng=None):
+    """
+    Classify a set of observations into k clusters using the k-means algorithm.
+
+    The algorithm attempts to minimize the Euclidean distance between
+    observations and centroids. Several initialization methods are
+    included.
+
+    Parameters
+    ----------
+    data : ndarray
+        A 'M' by 'N' array of 'M' observations in 'N' dimensions or a length
+        'M' array of 'M' 1-D observations.
+    k : int or ndarray
+        The number of clusters to form as well as the number of
+        centroids to generate. If `minit` initialization string is
+        'matrix', or if a ndarray is given instead, it is
+        interpreted as initial cluster to use instead.
+    iter : int, optional
+        Number of iterations of the k-means algorithm to run. Note
+        that this differs in meaning from the iters parameter to
+        the kmeans function.
+    thresh : float, optional
+        (not used yet)
+    minit : str, optional
+        Method for initialization. Available methods are 'random',
+        'points', '++' and 'matrix':
+
+        'random': generate k centroids from a Gaussian with mean and
+        variance estimated from the data.
+
+        'points': choose k observations (rows) at random from data for
+        the initial centroids.
+
+        '++': choose k observations accordingly to the kmeans++ method
+        (careful seeding)
+
+        'matrix': interpret the k parameter as a k by M (or length k
+        array for 1-D data) array of initial centroids.
+    missing : str, optional
+        Method to deal with empty clusters. Available methods are
+        'warn' and 'raise':
+
+        'warn': give a warning and continue.
+
+        'raise': raise an ClusterError and terminate the algorithm.
+    check_finite : bool, optional
+        Whether to check that the input matrices contain only finite numbers.
+        Disabling may give a performance gain, but may result in problems
+        (crashes, non-termination) if the inputs do contain infinities or NaNs.
+        Default: True
+    rng : `numpy.random.Generator`, optional
+        Pseudorandom number generator state. When `rng` is None, a new
+        `numpy.random.Generator` is created using entropy from the
+        operating system. Types other than `numpy.random.Generator` are
+        passed to `numpy.random.default_rng` to instantiate a ``Generator``.
+
+    Returns
+    -------
+    centroid : ndarray
+        A 'k' by 'N' array of centroids found at the last iteration of
+        k-means.
+    label : ndarray
+        label[i] is the code or index of the centroid the
+        ith observation is closest to.
+
+    See Also
+    --------
+    kmeans
+
+    References
+    ----------
+    .. [1] D. Arthur and S. Vassilvitskii, "k-means++: the advantages of
+       careful seeding", Proceedings of the Eighteenth Annual ACM-SIAM Symposium
+       on Discrete Algorithms, 2007.
+
+    Examples
+    --------
+    >>> from scipy.cluster.vq import kmeans2
+    >>> import matplotlib.pyplot as plt
+    >>> import numpy as np
+
+    Create z, an array with shape (100, 2) containing a mixture of samples
+    from three multivariate normal distributions.
+
+    >>> rng = np.random.default_rng()
+    >>> a = rng.multivariate_normal([0, 6], [[2, 1], [1, 1.5]], size=45)
+    >>> b = rng.multivariate_normal([2, 0], [[1, -1], [-1, 3]], size=30)
+    >>> c = rng.multivariate_normal([6, 4], [[5, 0], [0, 1.2]], size=25)
+    >>> z = np.concatenate((a, b, c))
+    >>> rng.shuffle(z)
+
+    Compute three clusters.
+
+    >>> centroid, label = kmeans2(z, 3, minit='points')
+    >>> centroid
+    array([[ 2.22274463, -0.61666946],  # may vary
+           [ 0.54069047,  5.86541444],
+           [ 6.73846769,  4.01991898]])
+
+    How many points are in each cluster?
+
+    >>> counts = np.bincount(label)
+    >>> counts
+    array([29, 51, 20])  # may vary
+
+    Plot the clusters.
+
+    >>> w0 = z[label == 0]
+    >>> w1 = z[label == 1]
+    >>> w2 = z[label == 2]
+    >>> plt.plot(w0[:, 0], w0[:, 1], 'o', alpha=0.5, label='cluster 0')
+    >>> plt.plot(w1[:, 0], w1[:, 1], 'd', alpha=0.5, label='cluster 1')
+    >>> plt.plot(w2[:, 0], w2[:, 1], 's', alpha=0.5, label='cluster 2')
+    >>> plt.plot(centroid[:, 0], centroid[:, 1], 'k*', label='centroids')
+    >>> plt.axis('equal')
+    >>> plt.legend(shadow=True)
+    >>> plt.show()
+
+    """
+    if int(iter) < 1:
+        raise ValueError(f"Invalid iter ({iter}), must be a positive integer.")
+    try:
+        miss_meth = _valid_miss_meth[missing]
+    except KeyError as e:
+        raise ValueError(f"Unknown missing method {missing!r}") from e
+
+    if isinstance(k, int):
+        xp = array_namespace(data)
+    else:
+        xp = array_namespace(data, k)
+    data = _asarray(data, xp=xp, check_finite=check_finite)
+    code_book = xp_copy(k, xp=xp)
+    if data.ndim == 1:
+        d = 1
+    elif data.ndim == 2:
+        d = data.shape[1]
+    else:
+        raise ValueError("Input of rank > 2 is not supported.")
+
+    if xp_size(data) < 1 or xp_size(code_book) < 1:
+        raise ValueError("Empty input is not supported.")
+
+    # If k is not a single value, it should be compatible with data's shape
+    if minit == 'matrix' or xp_size(code_book) > 1:
+        if data.ndim != code_book.ndim:
+            raise ValueError("k array doesn't match data rank")
+        nc = code_book.shape[0]
+        if data.ndim > 1 and code_book.shape[1] != d:
+            raise ValueError("k array doesn't match data dimension")
+    else:
+        nc = int(code_book)
+
+        if nc < 1:
+            raise ValueError(
+                f"Cannot ask kmeans2 for {nc} clusters (k was {code_book})"
+            )
+        elif nc != code_book:
+            warnings.warn("k was not an integer, was converted.", stacklevel=2)
+
+        try:
+            init_meth = _valid_init_meth[minit]
+        except KeyError as e:
+            raise ValueError(f"Unknown init method {minit!r}") from e
+        else:
+            rng = check_random_state(rng)
+            code_book = init_meth(data, code_book, rng, xp)
+
+    data = np.asarray(data)
+    code_book = np.asarray(code_book)
+    for _ in range(iter):
+        # Compute the nearest neighbor for each obs using the current code book
+        label = vq(data, code_book, check_finite=check_finite)[0]
+        # Update the code book by computing centroids
+        new_code_book, has_members = _vq.update_cluster_means(data, label, nc)
+        if not has_members.all():
+            miss_meth()
+            # Set the empty clusters to their previous positions
+            new_code_book[~has_members] = code_book[~has_members]
+        code_book = new_code_book
+
+    return xp.asarray(code_book), xp.asarray(label)