4349 lines
		
	
	
	
		
			153 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			4349 lines
		
	
	
	
		
			153 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | """
 | ||
|  | Hierarchical clustering (:mod:`scipy.cluster.hierarchy`) | ||
|  | ======================================================== | ||
|  | 
 | ||
|  | .. currentmodule:: scipy.cluster.hierarchy | ||
|  | 
 | ||
|  | These functions cut hierarchical clusterings into flat clusterings | ||
|  | or find the roots of the forest formed by a cut by providing the flat | ||
|  | cluster ids of each observation. | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    fcluster | ||
|  |    fclusterdata | ||
|  |    leaders | ||
|  | 
 | ||
|  | These are routines for agglomerative clustering. | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    linkage | ||
|  |    single | ||
|  |    complete | ||
|  |    average | ||
|  |    weighted | ||
|  |    centroid | ||
|  |    median | ||
|  |    ward | ||
|  | 
 | ||
|  | These routines compute statistics on hierarchies. | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    cophenet | ||
|  |    from_mlab_linkage | ||
|  |    inconsistent | ||
|  |    maxinconsts | ||
|  |    maxdists | ||
|  |    maxRstat | ||
|  |    to_mlab_linkage | ||
|  | 
 | ||
|  | Routines for visualizing flat clusters. | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    dendrogram | ||
|  | 
 | ||
|  | These are data structures and routines for representing hierarchies as | ||
|  | tree objects. | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    ClusterNode | ||
|  |    leaves_list | ||
|  |    to_tree | ||
|  |    cut_tree | ||
|  |    optimal_leaf_ordering | ||
|  | 
 | ||
|  | These are predicates for checking the validity of linkage and | ||
|  | inconsistency matrices as well as for checking isomorphism of two | ||
|  | flat cluster assignments. | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    is_valid_im | ||
|  |    is_valid_linkage | ||
|  |    is_isomorphic | ||
|  |    is_monotonic | ||
|  |    correspond | ||
|  |    num_obs_linkage | ||
|  | 
 | ||
|  | Utility routines for plotting: | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    set_link_color_palette | ||
|  | 
 | ||
|  | Utility classes: | ||
|  | 
 | ||
|  | .. autosummary:: | ||
|  |    :toctree: generated/ | ||
|  | 
 | ||
|  |    DisjointSet -- data structure for incremental connectivity queries | ||
|  | 
 | ||
|  | """
 | ||
|  | # Copyright (C) Damian Eads, 2007-2008. New BSD License. | ||
|  | 
 | ||
|  | # hierarchy.py (derived from cluster.py, http://scipy-cluster.googlecode.com) | ||
|  | # | ||
|  | # Author: Damian Eads | ||
|  | # Date:   September 22, 2007 | ||
|  | # | ||
|  | # Copyright (c) 2007, 2008, Damian Eads | ||
|  | # | ||
|  | # All rights reserved. | ||
|  | # | ||
|  | # Redistribution and use in source and binary forms, with or without | ||
|  | # modification, are permitted provided that the following conditions | ||
|  | # are met: | ||
|  | #   - Redistributions of source code must retain the above | ||
|  | #     copyright notice, this list of conditions and the | ||
|  | #     following disclaimer. | ||
|  | #   - Redistributions in binary form must reproduce the above copyright | ||
|  | #     notice, this list of conditions and the following disclaimer | ||
|  | #     in the documentation and/or other materials provided with the | ||
|  | #     distribution. | ||
|  | #   - Neither the name of the author nor the names of its | ||
|  | #     contributors may be used to endorse or promote products derived | ||
|  | #     from this software without specific prior written permission. | ||
|  | # | ||
|  | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
|  | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
|  | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
|  | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
|  | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
|  | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
|  | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
|  | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
|  | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
|  | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
|  | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|  | 
 | ||
|  | import warnings | ||
|  | import bisect | ||
|  | from collections import deque | ||
|  | 
 | ||
|  | import numpy as np | ||
|  | from . import _hierarchy, _optimal_leaf_ordering | ||
|  | import scipy.spatial.distance as distance | ||
|  | from scipy._lib._array_api import (_asarray, array_namespace, is_dask, | ||
|  |                                    is_lazy_array, xp_capabilities, xp_copy) | ||
|  | from scipy._lib._disjoint_set import DisjointSet | ||
|  | import scipy._lib.array_api_extra as xpx | ||
|  | 
 | ||
|  | 
 | ||
|  | _LINKAGE_METHODS = {'single': 0, 'complete': 1, 'average': 2, 'centroid': 3, | ||
|  |                     'median': 4, 'ward': 5, 'weighted': 6} | ||
|  | _EUCLIDEAN_METHODS = ('centroid', 'median', 'ward') | ||
|  | 
 | ||
|  | __all__ = ['ClusterNode', 'DisjointSet', 'average', 'centroid', 'complete', | ||
|  |            'cophenet', 'correspond', 'cut_tree', 'dendrogram', 'fcluster', | ||
|  |            'fclusterdata', 'from_mlab_linkage', 'inconsistent', | ||
|  |            'is_isomorphic', 'is_monotonic', 'is_valid_im', 'is_valid_linkage', | ||
|  |            'leaders', 'leaves_list', 'linkage', 'maxRstat', 'maxdists', | ||
|  |            'maxinconsts', 'median', 'num_obs_linkage', 'optimal_leaf_ordering', | ||
|  |            'set_link_color_palette', 'single', 'to_mlab_linkage', 'to_tree', | ||
|  |            'ward', 'weighted'] | ||
|  | 
 | ||
|  | 
 | ||
|  | class ClusterWarning(UserWarning): | ||
|  |     pass | ||
|  | 
 | ||
|  | 
 | ||
|  | def _warning(s): | ||
|  |     warnings.warn(f'scipy.cluster: {s}', ClusterWarning, stacklevel=3) | ||
|  | 
 | ||
|  | 
 | ||
|  | def int_floor(arr, xp): | ||
|  |     # array_api_strict is strict about not allowing `int()` on a float array. | ||
|  |     # That's typically not needed, here it is - so explicitly convert | ||
|  |     return int(xp.asarray(arr, dtype=xp.int64)) | ||
|  | 
 | ||
|  | 
 | ||
|  | lazy_cython = xp_capabilities( | ||
|  |     cpu_only=True, reason="Cython code", | ||
|  |     warnings=[("dask.array", "merges chunks")]) | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def single(y): | ||
|  |     """
 | ||
|  |     Perform single/min/nearest linkage on the condensed distance matrix ``y``. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         The upper triangular of the distance matrix. The result of | ||
|  |         ``pdist`` is returned in this form. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         The linkage matrix. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for advanced creation of hierarchical clusterings. | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import single, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     First, we need a toy dataset to play with:: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     Then, we get a condensed distance matrix from this dataset: | ||
|  | 
 | ||
|  |     >>> y = pdist(X) | ||
|  | 
 | ||
|  |     Finally, we can perform the clustering: | ||
|  | 
 | ||
|  |     >>> Z = single(y) | ||
|  |     >>> Z | ||
|  |     array([[ 0.,  1.,  1.,  2.], | ||
|  |            [ 2., 12.,  1.,  3.], | ||
|  |            [ 3.,  4.,  1.,  2.], | ||
|  |            [ 5., 14.,  1.,  3.], | ||
|  |            [ 6.,  7.,  1.,  2.], | ||
|  |            [ 8., 16.,  1.,  3.], | ||
|  |            [ 9., 10.,  1.,  2.], | ||
|  |            [11., 18.,  1.,  3.], | ||
|  |            [13., 15.,  2.,  6.], | ||
|  |            [17., 20.,  2.,  9.], | ||
|  |            [19., 21.,  2., 12.]]) | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram - see | ||
|  |     `scipy.cluster.hierarchy.linkage` for a detailed explanation of its | ||
|  |     contents. | ||
|  | 
 | ||
|  |     We can use `scipy.cluster.hierarchy.fcluster` to see to which cluster | ||
|  |     each initial point would belong given a distance threshold: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, 0.9, criterion='distance') | ||
|  |     array([ 7,  8,  9, 10, 11, 12,  4,  5,  6,  1,  2,  3], dtype=int32) | ||
|  |     >>> fcluster(Z, 1, criterion='distance') | ||
|  |     array([3, 3, 3, 4, 4, 4, 2, 2, 2, 1, 1, 1], dtype=int32) | ||
|  |     >>> fcluster(Z, 2, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     Also, `scipy.cluster.hierarchy.dendrogram` can be used to generate a | ||
|  |     plot of the dendrogram. | ||
|  |     """
 | ||
|  |     return linkage(y, method='single', metric='euclidean') | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def complete(y): | ||
|  |     """
 | ||
|  |     Perform complete/max/farthest point linkage on a condensed distance matrix. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         The upper triangular of the distance matrix. The result of | ||
|  |         ``pdist`` is returned in this form. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         A linkage matrix containing the hierarchical clustering. See | ||
|  |         the `linkage` function documentation for more information | ||
|  |         on its structure. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for advanced creation of hierarchical clusterings. | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import complete, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     First, we need a toy dataset to play with:: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     Then, we get a condensed distance matrix from this dataset: | ||
|  | 
 | ||
|  |     >>> y = pdist(X) | ||
|  | 
 | ||
|  |     Finally, we can perform the clustering: | ||
|  | 
 | ||
|  |     >>> Z = complete(y) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.41421356,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.41421356,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.41421356,  3.        ], | ||
|  |            [11.        , 15.        ,  1.41421356,  3.        ], | ||
|  |            [16.        , 17.        ,  4.12310563,  6.        ], | ||
|  |            [18.        , 19.        ,  4.12310563,  6.        ], | ||
|  |            [20.        , 21.        ,  5.65685425, 12.        ]]) | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram - see | ||
|  |     `scipy.cluster.hierarchy.linkage` for a detailed explanation of its | ||
|  |     contents. | ||
|  | 
 | ||
|  |     We can use `scipy.cluster.hierarchy.fcluster` to see to which cluster | ||
|  |     each initial point would belong given a distance threshold: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, 0.9, criterion='distance') | ||
|  |     array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32) | ||
|  |     >>> fcluster(Z, 1.5, criterion='distance') | ||
|  |     array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], dtype=int32) | ||
|  |     >>> fcluster(Z, 4.5, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], dtype=int32) | ||
|  |     >>> fcluster(Z, 6, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     Also, `scipy.cluster.hierarchy.dendrogram` can be used to generate a | ||
|  |     plot of the dendrogram. | ||
|  |     """
 | ||
|  |     return linkage(y, method='complete', metric='euclidean') | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def average(y): | ||
|  |     """
 | ||
|  |     Perform average/UPGMA linkage on a condensed distance matrix. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         The upper triangular of the distance matrix. The result of | ||
|  |         ``pdist`` is returned in this form. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         A linkage matrix containing the hierarchical clustering. See | ||
|  |         `linkage` for more information on its structure. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for advanced creation of hierarchical clusterings. | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import average, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     First, we need a toy dataset to play with:: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     Then, we get a condensed distance matrix from this dataset: | ||
|  | 
 | ||
|  |     >>> y = pdist(X) | ||
|  | 
 | ||
|  |     Finally, we can perform the clustering: | ||
|  | 
 | ||
|  |     >>> Z = average(y) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.20710678,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.20710678,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.20710678,  3.        ], | ||
|  |            [11.        , 15.        ,  1.20710678,  3.        ], | ||
|  |            [16.        , 17.        ,  3.39675184,  6.        ], | ||
|  |            [18.        , 19.        ,  3.39675184,  6.        ], | ||
|  |            [20.        , 21.        ,  4.09206523, 12.        ]]) | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram - see | ||
|  |     `scipy.cluster.hierarchy.linkage` for a detailed explanation of its | ||
|  |     contents. | ||
|  | 
 | ||
|  |     We can use `scipy.cluster.hierarchy.fcluster` to see to which cluster | ||
|  |     each initial point would belong given a distance threshold: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, 0.9, criterion='distance') | ||
|  |     array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32) | ||
|  |     >>> fcluster(Z, 1.5, criterion='distance') | ||
|  |     array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], dtype=int32) | ||
|  |     >>> fcluster(Z, 4, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2], dtype=int32) | ||
|  |     >>> fcluster(Z, 6, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     Also, `scipy.cluster.hierarchy.dendrogram` can be used to generate a | ||
|  |     plot of the dendrogram. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     return linkage(y, method='average', metric='euclidean') | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def weighted(y): | ||
|  |     """
 | ||
|  |     Perform weighted/WPGMA linkage on the condensed distance matrix. | ||
|  | 
 | ||
|  |     See `linkage` for more information on the return | ||
|  |     structure and algorithm. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         The upper triangular of the distance matrix. The result of | ||
|  |         ``pdist`` is returned in this form. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         A linkage matrix containing the hierarchical clustering. See | ||
|  |         `linkage` for more information on its structure. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for advanced creation of hierarchical clusterings. | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import weighted, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     First, we need a toy dataset to play with:: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     Then, we get a condensed distance matrix from this dataset: | ||
|  | 
 | ||
|  |     >>> y = pdist(X) | ||
|  | 
 | ||
|  |     Finally, we can perform the clustering: | ||
|  | 
 | ||
|  |     >>> Z = weighted(y) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 11.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.20710678,  3.        ], | ||
|  |            [ 8.        , 13.        ,  1.20710678,  3.        ], | ||
|  |            [ 5.        , 14.        ,  1.20710678,  3.        ], | ||
|  |            [10.        , 15.        ,  1.20710678,  3.        ], | ||
|  |            [18.        , 19.        ,  3.05595762,  6.        ], | ||
|  |            [16.        , 17.        ,  3.32379407,  6.        ], | ||
|  |            [20.        , 21.        ,  4.06357713, 12.        ]]) | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram - see | ||
|  |     `scipy.cluster.hierarchy.linkage` for a detailed explanation of its | ||
|  |     contents. | ||
|  | 
 | ||
|  |     We can use `scipy.cluster.hierarchy.fcluster` to see to which cluster | ||
|  |     each initial point would belong given a distance threshold: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, 0.9, criterion='distance') | ||
|  |     array([ 7,  8,  9,  1,  2,  3, 10, 11, 12,  4,  6,  5], dtype=int32) | ||
|  |     >>> fcluster(Z, 1.5, criterion='distance') | ||
|  |     array([3, 3, 3, 1, 1, 1, 4, 4, 4, 2, 2, 2], dtype=int32) | ||
|  |     >>> fcluster(Z, 4, criterion='distance') | ||
|  |     array([2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1], dtype=int32) | ||
|  |     >>> fcluster(Z, 6, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     Also, `scipy.cluster.hierarchy.dendrogram` can be used to generate a | ||
|  |     plot of the dendrogram. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     return linkage(y, method='weighted', metric='euclidean') | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def centroid(y): | ||
|  |     """
 | ||
|  |     Perform centroid/UPGMC linkage. | ||
|  | 
 | ||
|  |     See `linkage` for more information on the input matrix, | ||
|  |     return structure, and algorithm. | ||
|  | 
 | ||
|  |     The following are common calling conventions: | ||
|  | 
 | ||
|  |     1. ``Z = centroid(y)`` | ||
|  | 
 | ||
|  |        Performs centroid/UPGMC linkage on the condensed distance | ||
|  |        matrix ``y``. | ||
|  | 
 | ||
|  |     2. ``Z = centroid(X)`` | ||
|  | 
 | ||
|  |        Performs centroid/UPGMC linkage on the observation matrix ``X`` | ||
|  |        using Euclidean distance as the distance metric. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         A condensed distance matrix. A condensed | ||
|  |         distance matrix is a flat array containing the upper | ||
|  |         triangular of the distance matrix. This is the form that | ||
|  |         ``pdist`` returns. Alternatively, a collection of | ||
|  |         m observation vectors in n dimensions may be passed as | ||
|  |         an m by n array. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         A linkage matrix containing the hierarchical clustering. See | ||
|  |         the `linkage` function documentation for more information | ||
|  |         on its structure. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for advanced creation of hierarchical clusterings. | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import centroid, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     First, we need a toy dataset to play with:: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     Then, we get a condensed distance matrix from this dataset: | ||
|  | 
 | ||
|  |     >>> y = pdist(X) | ||
|  | 
 | ||
|  |     Finally, we can perform the clustering: | ||
|  | 
 | ||
|  |     >>> Z = centroid(y) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.11803399,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.11803399,  3.        ], | ||
|  |            [ 8.        , 15.        ,  1.11803399,  3.        ], | ||
|  |            [11.        , 14.        ,  1.11803399,  3.        ], | ||
|  |            [18.        , 19.        ,  3.33333333,  6.        ], | ||
|  |            [16.        , 17.        ,  3.33333333,  6.        ], | ||
|  |            [20.        , 21.        ,  3.33333333, 12.        ]]) # may vary | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram - see | ||
|  |     `scipy.cluster.hierarchy.linkage` for a detailed explanation of its | ||
|  |     contents. | ||
|  | 
 | ||
|  |     We can use `scipy.cluster.hierarchy.fcluster` to see to which cluster | ||
|  |     each initial point would belong given a distance threshold: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, 0.9, criterion='distance') | ||
|  |     array([ 7,  8,  9, 10, 11, 12,  1,  2,  3,  4,  5,  6], dtype=int32) # may vary | ||
|  |     >>> fcluster(Z, 1.1, criterion='distance') | ||
|  |     array([5, 5, 6, 7, 7, 8, 1, 1, 2, 3, 3, 4], dtype=int32) # may vary | ||
|  |     >>> fcluster(Z, 2, criterion='distance') | ||
|  |     array([3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2], dtype=int32) # may vary | ||
|  |     >>> fcluster(Z, 4, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     Also, `scipy.cluster.hierarchy.dendrogram` can be used to generate a | ||
|  |     plot of the dendrogram. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     return linkage(y, method='centroid', metric='euclidean') | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def median(y): | ||
|  |     """
 | ||
|  |     Perform median/WPGMC linkage. | ||
|  | 
 | ||
|  |     See `linkage` for more information on the return structure | ||
|  |     and algorithm. | ||
|  | 
 | ||
|  |      The following are common calling conventions: | ||
|  | 
 | ||
|  |      1. ``Z = median(y)`` | ||
|  | 
 | ||
|  |         Performs median/WPGMC linkage on the condensed distance matrix | ||
|  |         ``y``.  See ``linkage`` for more information on the return | ||
|  |         structure and algorithm. | ||
|  | 
 | ||
|  |      2. ``Z = median(X)`` | ||
|  | 
 | ||
|  |         Performs median/WPGMC linkage on the observation matrix ``X`` | ||
|  |         using Euclidean distance as the distance metric. See `linkage` | ||
|  |         for more information on the return structure and algorithm. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         A condensed distance matrix. A condensed | ||
|  |         distance matrix is a flat array containing the upper | ||
|  |         triangular of the distance matrix. This is the form that | ||
|  |         ``pdist`` returns.  Alternatively, a collection of | ||
|  |         m observation vectors in n dimensions may be passed as | ||
|  |         an m by n array. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a linkage matrix. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for advanced creation of hierarchical clusterings. | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import median, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     First, we need a toy dataset to play with:: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     Then, we get a condensed distance matrix from this dataset: | ||
|  | 
 | ||
|  |     >>> y = pdist(X) | ||
|  | 
 | ||
|  |     Finally, we can perform the clustering: | ||
|  | 
 | ||
|  |     >>> Z = median(y) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.11803399,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.11803399,  3.        ], | ||
|  |            [ 8.        , 15.        ,  1.11803399,  3.        ], | ||
|  |            [11.        , 14.        ,  1.11803399,  3.        ], | ||
|  |            [18.        , 19.        ,  3.        ,  6.        ], | ||
|  |            [16.        , 17.        ,  3.5       ,  6.        ], | ||
|  |            [20.        , 21.        ,  3.25      , 12.        ]]) | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram - see | ||
|  |     `scipy.cluster.hierarchy.linkage` for a detailed explanation of its | ||
|  |     contents. | ||
|  | 
 | ||
|  |     We can use `scipy.cluster.hierarchy.fcluster` to see to which cluster | ||
|  |     each initial point would belong given a distance threshold: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, 0.9, criterion='distance') | ||
|  |     array([ 7,  8,  9, 10, 11, 12,  1,  2,  3,  4,  5,  6], dtype=int32) | ||
|  |     >>> fcluster(Z, 1.1, criterion='distance') | ||
|  |     array([5, 5, 6, 7, 7, 8, 1, 1, 2, 3, 3, 4], dtype=int32) | ||
|  |     >>> fcluster(Z, 2, criterion='distance') | ||
|  |     array([3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2], dtype=int32) | ||
|  |     >>> fcluster(Z, 4, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     Also, `scipy.cluster.hierarchy.dendrogram` can be used to generate a | ||
|  |     plot of the dendrogram. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     return linkage(y, method='median', metric='euclidean') | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def ward(y): | ||
|  |     """
 | ||
|  |     Perform Ward's linkage on a condensed distance matrix. | ||
|  | 
 | ||
|  |     See `linkage` for more information on the return structure | ||
|  |     and algorithm. | ||
|  | 
 | ||
|  |     The following are common calling conventions: | ||
|  | 
 | ||
|  |     1. ``Z = ward(y)`` | ||
|  |        Performs Ward's linkage on the condensed distance matrix ``y``. | ||
|  | 
 | ||
|  |     2. ``Z = ward(X)`` | ||
|  |        Performs Ward's linkage on the observation matrix ``X`` using | ||
|  |        Euclidean distance as the distance metric. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         A condensed distance matrix. A condensed | ||
|  |         distance matrix is a flat array containing the upper | ||
|  |         triangular of the distance matrix. This is the form that | ||
|  |         ``pdist`` returns.  Alternatively, a collection of | ||
|  |         m observation vectors in n dimensions may be passed as | ||
|  |         an m by n array. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a linkage matrix. See | ||
|  |         `linkage` for more information on the return structure and | ||
|  |         algorithm. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for advanced creation of hierarchical clusterings. | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     First, we need a toy dataset to play with:: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     Then, we get a condensed distance matrix from this dataset: | ||
|  | 
 | ||
|  |     >>> y = pdist(X) | ||
|  | 
 | ||
|  |     Finally, we can perform the clustering: | ||
|  | 
 | ||
|  |     >>> Z = ward(y) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.29099445,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.29099445,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.29099445,  3.        ], | ||
|  |            [11.        , 15.        ,  1.29099445,  3.        ], | ||
|  |            [16.        , 17.        ,  5.77350269,  6.        ], | ||
|  |            [18.        , 19.        ,  5.77350269,  6.        ], | ||
|  |            [20.        , 21.        ,  8.16496581, 12.        ]]) | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram - see | ||
|  |     `scipy.cluster.hierarchy.linkage` for a detailed explanation of its | ||
|  |     contents. | ||
|  | 
 | ||
|  |     We can use `scipy.cluster.hierarchy.fcluster` to see to which cluster | ||
|  |     each initial point would belong given a distance threshold: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, 0.9, criterion='distance') | ||
|  |     array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32) | ||
|  |     >>> fcluster(Z, 1.1, criterion='distance') | ||
|  |     array([1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8], dtype=int32) | ||
|  |     >>> fcluster(Z, 3, criterion='distance') | ||
|  |     array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], dtype=int32) | ||
|  |     >>> fcluster(Z, 9, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     Also, `scipy.cluster.hierarchy.dendrogram` can be used to generate a | ||
|  |     plot of the dendrogram. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     return linkage(y, method='ward', metric='euclidean') | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def linkage(y, method='single', metric='euclidean', optimal_ordering=False): | ||
|  |     """
 | ||
|  |     Perform hierarchical/agglomerative clustering. | ||
|  | 
 | ||
|  |     The input y may be either a 1-D condensed distance matrix | ||
|  |     or a 2-D array of observation vectors. | ||
|  | 
 | ||
|  |     If y is a 1-D condensed distance matrix, | ||
|  |     then y must be a :math:`\\binom{n}{2}` sized | ||
|  |     vector, where n is the number of original observations paired | ||
|  |     in the distance matrix. The behavior of this function is very | ||
|  |     similar to the MATLAB linkage function. | ||
|  | 
 | ||
|  |     A :math:`(n-1)` by 4 matrix ``Z`` is returned. At the | ||
|  |     :math:`i`-th iteration, clusters with indices ``Z[i, 0]`` and | ||
|  |     ``Z[i, 1]`` are combined to form cluster :math:`n + i`. A | ||
|  |     cluster with an index less than :math:`n` corresponds to one of | ||
|  |     the :math:`n` original observations. The distance between | ||
|  |     clusters ``Z[i, 0]`` and ``Z[i, 1]`` is given by ``Z[i, 2]``. The | ||
|  |     fourth value ``Z[i, 3]`` represents the number of original | ||
|  |     observations in the newly formed cluster. | ||
|  | 
 | ||
|  |     The following linkage methods are used to compute the distance | ||
|  |     :math:`d(s, t)` between two clusters :math:`s` and | ||
|  |     :math:`t`. The algorithm begins with a forest of clusters that | ||
|  |     have yet to be used in the hierarchy being formed. When two | ||
|  |     clusters :math:`s` and :math:`t` from this forest are combined | ||
|  |     into a single cluster :math:`u`, :math:`s` and :math:`t` are | ||
|  |     removed from the forest, and :math:`u` is added to the | ||
|  |     forest. When only one cluster remains in the forest, the algorithm | ||
|  |     stops, and this cluster becomes the root. | ||
|  | 
 | ||
|  |     A distance matrix is maintained at each iteration. The ``d[i,j]`` | ||
|  |     entry corresponds to the distance between cluster :math:`i` and | ||
|  |     :math:`j` in the original forest. | ||
|  | 
 | ||
|  |     At each iteration, the algorithm must update the distance matrix | ||
|  |     to reflect the distance of the newly formed cluster u with the | ||
|  |     remaining clusters in the forest. | ||
|  | 
 | ||
|  |     Suppose there are :math:`|u|` original observations | ||
|  |     :math:`u[0], \\ldots, u[|u|-1]` in cluster :math:`u` and | ||
|  |     :math:`|v|` original objects :math:`v[0], \\ldots, v[|v|-1]` in | ||
|  |     cluster :math:`v`. Recall, :math:`s` and :math:`t` are | ||
|  |     combined to form cluster :math:`u`. Let :math:`v` be any | ||
|  |     remaining cluster in the forest that is not :math:`u`. | ||
|  | 
 | ||
|  |     The following are methods for calculating the distance between the | ||
|  |     newly formed cluster :math:`u` and each :math:`v`. | ||
|  | 
 | ||
|  |       * method='single' assigns | ||
|  | 
 | ||
|  |         .. math:: | ||
|  |            d(u,v) = \\min(dist(u[i],v[j])) | ||
|  | 
 | ||
|  |         for all points :math:`i` in cluster :math:`u` and | ||
|  |         :math:`j` in cluster :math:`v`. This is also known as the | ||
|  |         Nearest Point Algorithm. | ||
|  | 
 | ||
|  |       * method='complete' assigns | ||
|  | 
 | ||
|  |         .. math:: | ||
|  |            d(u, v) = \\max(dist(u[i],v[j])) | ||
|  | 
 | ||
|  |         for all points :math:`i` in cluster u and :math:`j` in | ||
|  |         cluster :math:`v`. This is also known by the Farthest Point | ||
|  |         Algorithm or Voor Hees Algorithm. | ||
|  | 
 | ||
|  |       * method='average' assigns | ||
|  | 
 | ||
|  |         .. math:: | ||
|  |            d(u,v) = \\sum_{ij} \\frac{d(u[i], v[j])} | ||
|  |                                    {(|u|*|v|)} | ||
|  | 
 | ||
|  |         for all points :math:`i` and :math:`j` where :math:`|u|` | ||
|  |         and :math:`|v|` are the cardinalities of clusters :math:`u` | ||
|  |         and :math:`v`, respectively. This is also called the UPGMA | ||
|  |         algorithm. | ||
|  | 
 | ||
|  |       * method='weighted' assigns | ||
|  | 
 | ||
|  |         .. math:: | ||
|  |            d(u,v) = (dist(s,v) + dist(t,v))/2 | ||
|  | 
 | ||
|  |         where cluster u was formed with cluster s and t and v | ||
|  |         is a remaining cluster in the forest (also called WPGMA). | ||
|  | 
 | ||
|  |       * method='centroid' assigns | ||
|  | 
 | ||
|  |         .. math:: | ||
|  |            dist(s,t) = ||c_s-c_t||_2 | ||
|  | 
 | ||
|  |         where :math:`c_s` and :math:`c_t` are the centroids of | ||
|  |         clusters :math:`s` and :math:`t`, respectively. When two | ||
|  |         clusters :math:`s` and :math:`t` are combined into a new | ||
|  |         cluster :math:`u`, the new centroid is computed over all the | ||
|  |         original objects in clusters :math:`s` and :math:`t`. The | ||
|  |         distance then becomes the Euclidean distance between the | ||
|  |         centroid of :math:`u` and the centroid of a remaining cluster | ||
|  |         :math:`v` in the forest. This is also known as the UPGMC | ||
|  |         algorithm. | ||
|  | 
 | ||
|  |       * method='median' assigns :math:`d(s,t)` like the ``centroid`` | ||
|  |         method. When two clusters :math:`s` and :math:`t` are combined | ||
|  |         into a new cluster :math:`u`, the average of centroids s and t | ||
|  |         give the new centroid :math:`u`. This is also known as the | ||
|  |         WPGMC algorithm. | ||
|  | 
 | ||
|  |       * method='ward' uses the Ward variance minimization algorithm. | ||
|  |         The new entry :math:`d(u,v)` is computed as follows, | ||
|  | 
 | ||
|  |         .. math:: | ||
|  | 
 | ||
|  |            d(u,v) = \\sqrt{\\frac{|v|+|s|} | ||
|  |                                {T}d(v,s)^2 | ||
|  |                         + \\frac{|v|+|t|} | ||
|  |                                {T}d(v,t)^2 | ||
|  |                         - \\frac{|v|} | ||
|  |                                {T}d(s,t)^2} | ||
|  | 
 | ||
|  |         where :math:`u` is the newly joined cluster consisting of | ||
|  |         clusters :math:`s` and :math:`t`, :math:`v` is an unused | ||
|  |         cluster in the forest, :math:`T=|v|+|s|+|t|`, and | ||
|  |         :math:`|*|` is the cardinality of its argument. This is also | ||
|  |         known as the incremental algorithm. | ||
|  | 
 | ||
|  |     Warning: When the minimum distance pair in the forest is chosen, there | ||
|  |     may be two or more pairs with the same minimum distance. This | ||
|  |     implementation may choose a different minimum than the MATLAB | ||
|  |     version. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     y : ndarray | ||
|  |         A condensed distance matrix. A condensed distance matrix | ||
|  |         is a flat array containing the upper triangular of the distance matrix. | ||
|  |         This is the form that ``pdist`` returns. Alternatively, a collection of | ||
|  |         :math:`m` observation vectors in :math:`n` dimensions may be passed as | ||
|  |         an :math:`m` by :math:`n` array. All elements of the condensed distance | ||
|  |         matrix must be finite, i.e., no NaNs or infs. | ||
|  |     method : str, optional | ||
|  |         The linkage algorithm to use. See the ``Linkage Methods`` section below | ||
|  |         for full descriptions. | ||
|  |     metric : str or function, optional | ||
|  |         The distance metric to use in the case that y is a collection of | ||
|  |         observation vectors; ignored otherwise. See the ``pdist`` | ||
|  |         function for a list of valid distance metrics. A custom distance | ||
|  |         function can also be used. | ||
|  |     optimal_ordering : bool, optional | ||
|  |         If True, the linkage matrix will be reordered so that the distance | ||
|  |         between successive leaves is minimal. This results in a more intuitive | ||
|  |         tree structure when the data are visualized. defaults to False, because | ||
|  |         this algorithm can be slow, particularly on large datasets [2]_. See | ||
|  |         also the `optimal_leaf_ordering` function. | ||
|  | 
 | ||
|  |         .. versionadded:: 1.0.0 | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a linkage matrix. | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     1. For method 'single', an optimized algorithm based on minimum spanning | ||
|  |        tree is implemented. It has time complexity :math:`O(n^2)`. | ||
|  |        For methods 'complete', 'average', 'weighted' and 'ward', an algorithm | ||
|  |        called nearest-neighbors chain is implemented. It also has time | ||
|  |        complexity :math:`O(n^2)`. | ||
|  |        For other methods, a naive algorithm is implemented with :math:`O(n^3)` | ||
|  |        time complexity. | ||
|  |        All algorithms use :math:`O(n^2)` memory. | ||
|  |        Refer to [1]_ for details about the algorithms. | ||
|  |     2. Methods 'centroid', 'median', and 'ward' are correctly defined only if | ||
|  |        Euclidean pairwise metric is used. If `y` is passed as precomputed | ||
|  |        pairwise distances, then it is the user's responsibility to assure that | ||
|  |        these distances are in fact Euclidean, otherwise the produced result | ||
|  |        will be incorrect. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     References | ||
|  |     ---------- | ||
|  |     .. [1] Daniel Mullner, "Modern hierarchical, agglomerative clustering | ||
|  |            algorithms", :arXiv:`1109.2378v1`. | ||
|  |     .. [2] Ziv Bar-Joseph, David K. Gifford, Tommi S. Jaakkola, "Fast optimal | ||
|  |            leaf ordering for hierarchical clustering", 2001. Bioinformatics | ||
|  |            :doi:`10.1093/bioinformatics/17.suppl_1.S22` | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import dendrogram, linkage | ||
|  |     >>> from matplotlib import pyplot as plt | ||
|  |     >>> X = [[i] for i in [2, 8, 0, 4, 1, 9, 9, 0]] | ||
|  | 
 | ||
|  |     >>> Z = linkage(X, 'ward') | ||
|  |     >>> fig = plt.figure(figsize=(25, 10)) | ||
|  |     >>> dn = dendrogram(Z) | ||
|  | 
 | ||
|  |     >>> Z = linkage(X, 'single') | ||
|  |     >>> fig = plt.figure(figsize=(25, 10)) | ||
|  |     >>> dn = dendrogram(Z) | ||
|  |     >>> plt.show() | ||
|  |     """
 | ||
|  |     xp = array_namespace(y) | ||
|  |     y = _asarray(y, order='C', dtype=xp.float64, xp=xp) | ||
|  |     lazy = is_lazy_array(y) | ||
|  | 
 | ||
|  |     if method not in _LINKAGE_METHODS: | ||
|  |         raise ValueError(f"Invalid method: {method}") | ||
|  | 
 | ||
|  |     if method in _EUCLIDEAN_METHODS and metric != 'euclidean' and y.ndim == 2: | ||
|  |         msg = f"`method={method}` requires the distance metric to be Euclidean" | ||
|  |         raise ValueError(msg) | ||
|  | 
 | ||
|  |     if y.ndim == 1: | ||
|  |         distance.is_valid_y(y, throw=True, name='y') | ||
|  |     elif y.ndim == 2: | ||
|  |         if (not lazy and y.shape[0] == y.shape[1] | ||
|  |             and xp.all(xpx.isclose(xp.linalg.diagonal(y), 0)) | ||
|  |             and xp.all(y >= 0) and xp.all(xpx.isclose(y, y.T))): | ||
|  |             warnings.warn('The symmetric non-negative hollow observation ' | ||
|  |                           'matrix looks suspiciously like an uncondensed ' | ||
|  |                           'distance matrix', | ||
|  |                           ClusterWarning, stacklevel=2) | ||
|  |         y = distance.pdist(y, metric) | ||
|  |     else: | ||
|  |         raise ValueError("`y` must be 1 or 2 dimensional.") | ||
|  | 
 | ||
|  |     if not lazy and not xp.all(xp.isfinite(y)): | ||
|  |         raise ValueError("The condensed distance matrix must contain only " | ||
|  |                          "finite values.") | ||
|  | 
 | ||
|  |     n = distance.num_obs_y(y) | ||
|  |     method_code = _LINKAGE_METHODS[method] | ||
|  | 
 | ||
|  |     def cy_linkage(y, validate): | ||
|  |         if validate and not np.all(np.isfinite(y)): | ||
|  |             raise ValueError("The condensed distance matrix must contain only " | ||
|  |                             "finite values.")             | ||
|  | 
 | ||
|  |         if method == 'single': | ||
|  |             return _hierarchy.mst_single_linkage(y, n) | ||
|  |         elif method in ('complete', 'average', 'weighted', 'ward'): | ||
|  |             return _hierarchy.nn_chain(y, n, method_code) | ||
|  |         else: | ||
|  |             return _hierarchy.fast_linkage(y, n, method_code) | ||
|  | 
 | ||
|  |     result = xpx.lazy_apply(cy_linkage, y, validate=lazy, | ||
|  |                             shape=(n - 1, 4), dtype=xp.float64, | ||
|  |                             as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  |     if optimal_ordering: | ||
|  |         return optimal_leaf_ordering(result, y) | ||
|  |     else: | ||
|  |         return result | ||
|  | 
 | ||
|  | 
 | ||
|  | class ClusterNode: | ||
|  |     """
 | ||
|  |     A tree node class for representing a cluster. | ||
|  | 
 | ||
|  |     Leaf nodes correspond to original observations, while non-leaf nodes | ||
|  |     correspond to non-singleton clusters. | ||
|  | 
 | ||
|  |     The `to_tree` function converts a matrix returned by the linkage | ||
|  |     function into an easy-to-use tree representation. | ||
|  | 
 | ||
|  |     All parameter names are also attributes. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     id : int | ||
|  |         The node id. | ||
|  |     left : ClusterNode instance, optional | ||
|  |         The left child tree node. | ||
|  |     right : ClusterNode instance, optional | ||
|  |         The right child tree node. | ||
|  |     dist : float, optional | ||
|  |         Distance for this cluster in the linkage matrix. | ||
|  |     count : int, optional | ||
|  |         The number of samples in this cluster. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     to_tree : for converting a linkage matrix ``Z`` into a tree object. | ||
|  | 
 | ||
|  |     """
 | ||
|  | 
 | ||
|  |     def __init__(self, id, left=None, right=None, dist=0.0, count=1): | ||
|  |         if id < 0: | ||
|  |             raise ValueError('The id must be non-negative.') | ||
|  |         if dist < 0: | ||
|  |             raise ValueError('The distance must be non-negative.') | ||
|  |         if (left is None and right is not None) or \ | ||
|  |            (left is not None and right is None): | ||
|  |             raise ValueError('Only full or proper binary trees are permitted.' | ||
|  |                              '  This node has one child.') | ||
|  |         if count < 1: | ||
|  |             raise ValueError('A cluster must contain at least one original ' | ||
|  |                              'observation.') | ||
|  |         self.id = id | ||
|  |         self.left = left | ||
|  |         self.right = right | ||
|  |         self.dist = dist | ||
|  |         if self.left is None: | ||
|  |             self.count = count | ||
|  |         else: | ||
|  |             self.count = left.count + right.count | ||
|  | 
 | ||
|  |     def __lt__(self, node): | ||
|  |         if not isinstance(node, ClusterNode): | ||
|  |             raise ValueError("Can't compare ClusterNode " | ||
|  |                              f"to type {type(node)}") | ||
|  |         return self.dist < node.dist | ||
|  | 
 | ||
|  |     def __gt__(self, node): | ||
|  |         if not isinstance(node, ClusterNode): | ||
|  |             raise ValueError("Can't compare ClusterNode " | ||
|  |                              f"to type {type(node)}") | ||
|  |         return self.dist > node.dist | ||
|  | 
 | ||
|  |     def __eq__(self, node): | ||
|  |         if not isinstance(node, ClusterNode): | ||
|  |             raise ValueError("Can't compare ClusterNode " | ||
|  |                              f"to type {type(node)}") | ||
|  |         return self.dist == node.dist | ||
|  | 
 | ||
|  |     def get_id(self): | ||
|  |         """
 | ||
|  |         The identifier of the target node. | ||
|  | 
 | ||
|  |         For ``0 <= i < n``, `i` corresponds to original observation i. | ||
|  |         For ``n <= i < 2n-1``, `i` corresponds to non-singleton cluster formed | ||
|  |         at iteration ``i-n``. | ||
|  | 
 | ||
|  |         Returns | ||
|  |         ------- | ||
|  |         id : int | ||
|  |             The identifier of the target node. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         return self.id | ||
|  | 
 | ||
|  |     def get_count(self): | ||
|  |         """
 | ||
|  |         The number of leaf nodes (original observations) belonging to | ||
|  |         the cluster node nd. If the target node is a leaf, 1 is | ||
|  |         returned. | ||
|  | 
 | ||
|  |         Returns | ||
|  |         ------- | ||
|  |         get_count : int | ||
|  |             The number of leaf nodes below the target node. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         return self.count | ||
|  | 
 | ||
|  |     def get_left(self): | ||
|  |         """
 | ||
|  |         Return a reference to the left child tree object. | ||
|  | 
 | ||
|  |         Returns | ||
|  |         ------- | ||
|  |         left : ClusterNode | ||
|  |             The left child of the target node. If the node is a leaf, | ||
|  |             None is returned. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         return self.left | ||
|  | 
 | ||
|  |     def get_right(self): | ||
|  |         """
 | ||
|  |         Return a reference to the right child tree object. | ||
|  | 
 | ||
|  |         Returns | ||
|  |         ------- | ||
|  |         right : ClusterNode | ||
|  |             The left child of the target node. If the node is a leaf, | ||
|  |             None is returned. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         return self.right | ||
|  | 
 | ||
|  |     def is_leaf(self): | ||
|  |         """
 | ||
|  |         Return True if the target node is a leaf. | ||
|  | 
 | ||
|  |         Returns | ||
|  |         ------- | ||
|  |         leafness : bool | ||
|  |             True if the target node is a leaf node. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         return self.left is None | ||
|  | 
 | ||
|  |     def pre_order(self, func=(lambda x: x.id)): | ||
|  |         """
 | ||
|  |         Perform pre-order traversal without recursive function calls. | ||
|  | 
 | ||
|  |         When a leaf node is first encountered, ``func`` is called with | ||
|  |         the leaf node as its argument, and its result is appended to | ||
|  |         the list. | ||
|  | 
 | ||
|  |         For example, the statement:: | ||
|  | 
 | ||
|  |            ids = root.pre_order(lambda x: x.id) | ||
|  | 
 | ||
|  |         returns a list of the node ids corresponding to the leaf nodes | ||
|  |         of the tree as they appear from left to right. | ||
|  | 
 | ||
|  |         Parameters | ||
|  |         ---------- | ||
|  |         func : function | ||
|  |             Applied to each leaf ClusterNode object in the pre-order traversal. | ||
|  |             Given the ``i``-th leaf node in the pre-order traversal ``n[i]``, | ||
|  |             the result of ``func(n[i])`` is stored in ``L[i]``. If not | ||
|  |             provided, the index of the original observation to which the node | ||
|  |             corresponds is used. | ||
|  | 
 | ||
|  |         Returns | ||
|  |         ------- | ||
|  |         L : list | ||
|  |             The pre-order traversal. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         # Do a preorder traversal, caching the result. To avoid having to do | ||
|  |         # recursion, we'll store the previous index we've visited in a vector. | ||
|  |         n = self.count | ||
|  | 
 | ||
|  |         curNode = [None] * (2 * n) | ||
|  |         lvisited = set() | ||
|  |         rvisited = set() | ||
|  |         curNode[0] = self | ||
|  |         k = 0 | ||
|  |         preorder = [] | ||
|  |         while k >= 0: | ||
|  |             nd = curNode[k] | ||
|  |             ndid = nd.id | ||
|  |             if nd.is_leaf(): | ||
|  |                 preorder.append(func(nd)) | ||
|  |                 k = k - 1 | ||
|  |             else: | ||
|  |                 if ndid not in lvisited: | ||
|  |                     curNode[k + 1] = nd.left | ||
|  |                     lvisited.add(ndid) | ||
|  |                     k = k + 1 | ||
|  |                 elif ndid not in rvisited: | ||
|  |                     curNode[k + 1] = nd.right | ||
|  |                     rvisited.add(ndid) | ||
|  |                     k = k + 1 | ||
|  |                 # If we've visited the left and right of this non-leaf | ||
|  |                 # node already, go up in the tree. | ||
|  |                 else: | ||
|  |                     k = k - 1 | ||
|  | 
 | ||
|  |         return preorder | ||
|  | 
 | ||
|  | 
 | ||
|  | _cnode_bare = ClusterNode(0) | ||
|  | _cnode_type = type(ClusterNode) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _order_cluster_tree(Z): | ||
|  |     """
 | ||
|  |     Return clustering nodes in bottom-up order by distance. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : scipy.cluster.linkage array | ||
|  |         The linkage matrix. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     nodes : list | ||
|  |         A list of ClusterNode objects. | ||
|  |     """
 | ||
|  |     q = deque() | ||
|  |     tree = to_tree(Z) | ||
|  |     q.append(tree) | ||
|  |     nodes = [] | ||
|  | 
 | ||
|  |     while q: | ||
|  |         node = q.popleft() | ||
|  |         if not node.is_leaf(): | ||
|  |             bisect.insort_left(nodes, node) | ||
|  |             q.append(node.get_right()) | ||
|  |             q.append(node.get_left()) | ||
|  |     return nodes | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(np_only=True, reason="non-standard indexing") | ||
|  | def cut_tree(Z, n_clusters=None, height=None): | ||
|  |     """
 | ||
|  |     Given a linkage matrix Z, return the cut tree. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : scipy.cluster.linkage array | ||
|  |         The linkage matrix. | ||
|  |     n_clusters : array_like, optional | ||
|  |         Number of clusters in the tree at the cut point. | ||
|  |     height : array_like, optional | ||
|  |         The height at which to cut the tree. Only possible for ultrametric | ||
|  |         trees. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     cutree : array | ||
|  |         An array indicating group membership at each agglomeration step. I.e., | ||
|  |         for a full cut tree, in the first column each data point is in its own | ||
|  |         cluster. At the next step, two nodes are merged. Finally, all | ||
|  |         singleton and non-singleton clusters are in one group. If `n_clusters` | ||
|  |         or `height` are given, the columns correspond to the columns of | ||
|  |         `n_clusters` or `height`. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy import cluster | ||
|  |     >>> import numpy as np | ||
|  |     >>> from numpy.random import default_rng | ||
|  |     >>> rng = default_rng() | ||
|  |     >>> X = rng.random((50, 4)) | ||
|  |     >>> Z = cluster.hierarchy.ward(X) | ||
|  |     >>> cutree = cluster.hierarchy.cut_tree(Z, n_clusters=[5, 10]) | ||
|  |     >>> cutree[:10] | ||
|  |     array([[0, 0], | ||
|  |            [1, 1], | ||
|  |            [2, 2], | ||
|  |            [3, 3], | ||
|  |            [3, 4], | ||
|  |            [2, 2], | ||
|  |            [0, 0], | ||
|  |            [1, 5], | ||
|  |            [3, 6], | ||
|  |            [4, 7]])  # random | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     nobs = num_obs_linkage(Z) | ||
|  |     nodes = _order_cluster_tree(Z) | ||
|  | 
 | ||
|  |     if height is not None and n_clusters is not None: | ||
|  |         raise ValueError("At least one of either height or n_clusters " | ||
|  |                          "must be None") | ||
|  |     elif height is None and n_clusters is None:  # return the full cut tree | ||
|  |         cols_idx = xp.arange(nobs) | ||
|  |     elif height is not None: | ||
|  |         height = xp.asarray(height) | ||
|  |         heights = xp.asarray([x.dist for x in nodes]) | ||
|  |         cols_idx = xp.searchsorted(heights, height) | ||
|  |     else: | ||
|  |         n_clusters = xp.asarray(n_clusters) | ||
|  |         cols_idx = nobs - xp.searchsorted(xp.arange(nobs), n_clusters) | ||
|  | 
 | ||
|  |     try: | ||
|  |         n_cols = len(cols_idx) | ||
|  |     except TypeError:  # scalar | ||
|  |         n_cols = 1 | ||
|  |         cols_idx = xp.asarray([cols_idx]) | ||
|  | 
 | ||
|  |     groups = xp.zeros((n_cols, nobs), dtype=xp.int64) | ||
|  |     last_group = xp.arange(nobs) | ||
|  |     if 0 in cols_idx: | ||
|  |         groups[0] = last_group | ||
|  | 
 | ||
|  |     for i, node in enumerate(nodes): | ||
|  |         idx = node.pre_order() | ||
|  |         this_group = xp_copy(last_group, xp=xp) | ||
|  |         # TODO ARRAY_API complex indexing not supported | ||
|  |         this_group[idx] = xp.min(last_group[idx]) | ||
|  |         this_group[this_group > xp.max(last_group[idx])] -= 1 | ||
|  |         if i + 1 in cols_idx: | ||
|  |             groups[np.nonzero(i + 1 == cols_idx)[0]] = this_group | ||
|  |         last_group = this_group | ||
|  | 
 | ||
|  |     return groups.T | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(jax_jit=False, allow_dask_compute=True) | ||
|  | def to_tree(Z, rd=False): | ||
|  |     """
 | ||
|  |     Convert a linkage matrix into an easy-to-use tree object. | ||
|  | 
 | ||
|  |     The reference to the root `ClusterNode` object is returned (by default). | ||
|  | 
 | ||
|  |     Each `ClusterNode` object has a ``left``, ``right``, ``dist``, ``id``, | ||
|  |     and ``count`` attribute. The left and right attributes point to | ||
|  |     ClusterNode objects that were combined to generate the cluster. | ||
|  |     If both are None then the `ClusterNode` object is a leaf node, its count | ||
|  |     must be 1, and its distance is meaningless but set to 0. | ||
|  | 
 | ||
|  |     *Note: This function is provided for the convenience of the library | ||
|  |     user. ClusterNodes are not used as input to any of the functions in this | ||
|  |     library.* | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The linkage matrix in proper form (see the `linkage` | ||
|  |         function documentation). | ||
|  |     rd : bool, optional | ||
|  |         When False (default), a reference to the root `ClusterNode` object is | ||
|  |         returned.  Otherwise, a tuple ``(r, d)`` is returned. ``r`` is a | ||
|  |         reference to the root node while ``d`` is a list of `ClusterNode` | ||
|  |         objects - one per original entry in the linkage matrix plus entries | ||
|  |         for all clustering steps. If a cluster id is | ||
|  |         less than the number of samples ``n`` in the data that the linkage | ||
|  |         matrix describes, then it corresponds to a singleton cluster (leaf | ||
|  |         node). | ||
|  |         See `linkage` for more information on the assignment of cluster ids | ||
|  |         to clusters. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     tree : ClusterNode or tuple (ClusterNode, list of ClusterNode) | ||
|  |         If ``rd`` is False, a `ClusterNode`. | ||
|  |         If ``rd`` is True, a list of length ``2*n - 1``, with ``n`` the number | ||
|  |         of samples.  See the description of `rd` above for more details. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage, is_valid_linkage, ClusterNode | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> import numpy as np | ||
|  |     >>> from scipy.cluster import hierarchy | ||
|  |     >>> rng = np.random.default_rng() | ||
|  |     >>> x = rng.random((5, 2)) | ||
|  |     >>> Z = hierarchy.linkage(x) | ||
|  |     >>> hierarchy.to_tree(Z) | ||
|  |     <scipy.cluster.hierarchy.ClusterNode object at ... | ||
|  |     >>> rootnode, nodelist = hierarchy.to_tree(Z, rd=True) | ||
|  |     >>> rootnode | ||
|  |     <scipy.cluster.hierarchy.ClusterNode object at ... | ||
|  |     >>> len(nodelist) | ||
|  |     9 | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, order='C', xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', materialize=True, xp=xp) | ||
|  | 
 | ||
|  |     # Number of original objects is equal to the number of rows plus 1. | ||
|  |     n = Z.shape[0] + 1 | ||
|  | 
 | ||
|  |     # Create a list full of None's to store the node objects | ||
|  |     d = [None] * (n * 2 - 1) | ||
|  | 
 | ||
|  |     # Create the nodes corresponding to the n original objects. | ||
|  |     for i in range(0, n): | ||
|  |         d[i] = ClusterNode(i) | ||
|  | 
 | ||
|  |     nd = None | ||
|  | 
 | ||
|  |     for i in range(Z.shape[0]): | ||
|  |         row = Z[i, :] | ||
|  | 
 | ||
|  |         fi = int_floor(row[0], xp) | ||
|  |         fj = int_floor(row[1], xp) | ||
|  |         if fi > i + n: | ||
|  |             raise ValueError('Corrupt matrix Z. Index to derivative cluster ' | ||
|  |                               f'is used before it is formed. See row {fi}, ' | ||
|  |                               'column 0') | ||
|  |         if fj > i + n: | ||
|  |             raise ValueError('Corrupt matrix Z. Index to derivative cluster ' | ||
|  |                               f'is used before it is formed. See row {fj}, ' | ||
|  |                               'column 1') | ||
|  | 
 | ||
|  |         nd = ClusterNode(i + n, d[fi], d[fj], row[2]) | ||
|  |         #                ^ id   ^ left ^ right ^ dist | ||
|  |         if row[3] != nd.count: | ||
|  |             raise ValueError(f'Corrupt matrix Z. The count Z[{i},3] is ' | ||
|  |                               'incorrect.') | ||
|  |         d[n + i] = nd | ||
|  | 
 | ||
|  |     if rd: | ||
|  |         return (nd, d) | ||
|  |     else: | ||
|  |         return nd | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def optimal_leaf_ordering(Z, y, metric='euclidean'): | ||
|  |     """
 | ||
|  |     Given a linkage matrix Z and distance, reorder the cut tree. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a linkage matrix. See | ||
|  |         `linkage` for more information on the return structure and | ||
|  |         algorithm. | ||
|  |     y : ndarray | ||
|  |         The condensed distance matrix from which Z was generated. | ||
|  |         Alternatively, a collection of m observation vectors in n | ||
|  |         dimensions may be passed as an m by n array. | ||
|  |     metric : str or function, optional | ||
|  |         The distance metric to use in the case that y is a collection of | ||
|  |         observation vectors; ignored otherwise. See the ``pdist`` | ||
|  |         function for a list of valid distance metrics. A custom distance | ||
|  |         function can also be used. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     Z_ordered : ndarray | ||
|  |         A copy of the linkage matrix Z, reordered to minimize the distance | ||
|  |         between adjacent leaves. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> import numpy as np | ||
|  |     >>> from scipy.cluster import hierarchy | ||
|  |     >>> rng = np.random.default_rng() | ||
|  |     >>> X = rng.standard_normal((10, 10)) | ||
|  |     >>> Z = hierarchy.ward(X) | ||
|  |     >>> hierarchy.leaves_list(Z) | ||
|  |     array([0, 3, 1, 9, 2, 5, 7, 4, 6, 8], dtype=int32) | ||
|  |     >>> hierarchy.leaves_list(hierarchy.optimal_leaf_ordering(Z, X)) | ||
|  |     array([3, 0, 2, 5, 7, 4, 8, 6, 9, 1], dtype=int32) | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z, y) | ||
|  |     Z = _asarray(Z, order='C', xp=xp) | ||
|  |     y = _asarray(y, order='C', dtype=xp.float64, xp=xp) | ||
|  |     lazy = is_lazy_array(Z) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     if y.ndim == 1: | ||
|  |         distance.is_valid_y(y, throw=True, name='y') | ||
|  |     elif y.ndim == 2: | ||
|  |         if (not lazy and y.shape[0] == y.shape[1] | ||
|  |             and xp.all(xpx.isclose(xp.linalg.diagonal(y), 0)) | ||
|  |             and xp.all(y >= 0) and xp.all(xpx.isclose(y, y.T))): | ||
|  |             warnings.warn('The symmetric non-negative hollow observation ' | ||
|  |                           'matrix looks suspiciously like an uncondensed ' | ||
|  |                           'distance matrix', | ||
|  |                           ClusterWarning, stacklevel=2) | ||
|  |         y = distance.pdist(y, metric) | ||
|  |     else: | ||
|  |         raise ValueError("`y` must be 1 or 2 dimensional.") | ||
|  |     if not lazy and not xp.all(xp.isfinite(y)): | ||
|  |         raise ValueError("The condensed distance matrix must contain only " | ||
|  |                          "finite values.") | ||
|  | 
 | ||
|  |     # The function name is prominently visible on the user-facing Dask dashboard; | ||
|  |     # make sure it is meaningful. | ||
|  |     def cy_optimal_leaf_ordering(Z, y, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |             if not np.all(np.isfinite(y)): | ||
|  |                 raise ValueError("The condensed distance matrix must contain only " | ||
|  |                                  "finite values.") | ||
|  |         return _optimal_leaf_ordering.optimal_leaf_ordering(Z, y) | ||
|  | 
 | ||
|  |     return xpx.lazy_apply(cy_optimal_leaf_ordering, Z, y, validate=lazy, | ||
|  |                           shape=Z.shape, dtype=Z.dtype, as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def cophenet(Z, Y=None): | ||
|  |     """
 | ||
|  |     Calculate the cophenetic distances between each observation in | ||
|  |     the hierarchical clustering defined by the linkage ``Z``. | ||
|  | 
 | ||
|  |     Suppose ``p`` and ``q`` are original observations in | ||
|  |     disjoint clusters ``s`` and ``t``, respectively and | ||
|  |     ``s`` and ``t`` are joined by a direct parent cluster | ||
|  |     ``u``. The cophenetic distance between observations | ||
|  |     ``i`` and ``j`` is simply the distance between | ||
|  |     clusters ``s`` and ``t``. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as an array | ||
|  |         (see `linkage` function). | ||
|  |     Y : ndarray (optional) | ||
|  |         Calculates the cophenetic correlation coefficient ``c`` of a | ||
|  |         hierarchical clustering defined by the linkage matrix `Z` | ||
|  |         of a set of :math:`n` observations in :math:`m` | ||
|  |         dimensions. `Y` is the condensed distance matrix from which | ||
|  |         `Z` was generated. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     c : ndarray | ||
|  |         The cophentic correlation distance (if ``Y`` is passed). | ||
|  |     d : ndarray | ||
|  |         The cophenetic distance matrix in condensed form. The | ||
|  |         :math:`ij` th entry is the cophenetic distance between | ||
|  |         original observations :math:`i` and :math:`j`. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : | ||
|  |         for a description of what a linkage matrix is. | ||
|  |     scipy.spatial.distance.squareform : | ||
|  |         transforming condensed matrices into square ones. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import single, cophenet | ||
|  |     >>> from scipy.spatial.distance import pdist, squareform | ||
|  | 
 | ||
|  |     Given a dataset ``X`` and a linkage matrix ``Z``, the cophenetic distance | ||
|  |     between two points of ``X`` is the distance between the largest two | ||
|  |     distinct clusters that each of the points: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     ``X`` corresponds to this dataset :: | ||
|  | 
 | ||
|  |         x x    x x | ||
|  |         x        x | ||
|  | 
 | ||
|  |         x        x | ||
|  |         x x    x x | ||
|  | 
 | ||
|  |     >>> Z = single(pdist(X)) | ||
|  |     >>> Z | ||
|  |     array([[ 0.,  1.,  1.,  2.], | ||
|  |            [ 2., 12.,  1.,  3.], | ||
|  |            [ 3.,  4.,  1.,  2.], | ||
|  |            [ 5., 14.,  1.,  3.], | ||
|  |            [ 6.,  7.,  1.,  2.], | ||
|  |            [ 8., 16.,  1.,  3.], | ||
|  |            [ 9., 10.,  1.,  2.], | ||
|  |            [11., 18.,  1.,  3.], | ||
|  |            [13., 15.,  2.,  6.], | ||
|  |            [17., 20.,  2.,  9.], | ||
|  |            [19., 21.,  2., 12.]]) | ||
|  |     >>> cophenet(Z) | ||
|  |     array([1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2., 2., | ||
|  |            2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 2., 2., | ||
|  |            2., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., | ||
|  |            1., 1., 2., 2., 2., 1., 2., 2., 2., 2., 2., 2., 1., 1., 1.]) | ||
|  | 
 | ||
|  |     The output of the `scipy.cluster.hierarchy.cophenet` method is | ||
|  |     represented in condensed form. We can use | ||
|  |     `scipy.spatial.distance.squareform` to see the output as a | ||
|  |     regular matrix (where each element ``ij`` denotes the cophenetic distance | ||
|  |     between each ``i``, ``j`` pair of points in ``X``): | ||
|  | 
 | ||
|  |     >>> squareform(cophenet(Z)) | ||
|  |     array([[0., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2.], | ||
|  |            [1., 0., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2.], | ||
|  |            [1., 1., 0., 2., 2., 2., 2., 2., 2., 2., 2., 2.], | ||
|  |            [2., 2., 2., 0., 1., 1., 2., 2., 2., 2., 2., 2.], | ||
|  |            [2., 2., 2., 1., 0., 1., 2., 2., 2., 2., 2., 2.], | ||
|  |            [2., 2., 2., 1., 1., 0., 2., 2., 2., 2., 2., 2.], | ||
|  |            [2., 2., 2., 2., 2., 2., 0., 1., 1., 2., 2., 2.], | ||
|  |            [2., 2., 2., 2., 2., 2., 1., 0., 1., 2., 2., 2.], | ||
|  |            [2., 2., 2., 2., 2., 2., 1., 1., 0., 2., 2., 2.], | ||
|  |            [2., 2., 2., 2., 2., 2., 2., 2., 2., 0., 1., 1.], | ||
|  |            [2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 0., 1.], | ||
|  |            [2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 0.]]) | ||
|  | 
 | ||
|  |     In this example, the cophenetic distance between points on ``X`` that are | ||
|  |     very close (i.e., in the same corner) is 1. For other pairs of points is 2, | ||
|  |     because the points will be located in clusters at different | ||
|  |     corners - thus, the distance between these clusters will be larger. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z, Y) | ||
|  |     # Ensure float64 C-contiguous array. Cython code doesn't deal with striding. | ||
|  |     Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     def cy_cophenet(Z, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |         n = Z.shape[0] + 1 | ||
|  |         zz = np.zeros((n * (n-1)) // 2, dtype=np.float64) | ||
|  |         _hierarchy.cophenetic_distances(Z, zz, n) | ||
|  |         return zz | ||
|  |      | ||
|  |     n = Z.shape[0] + 1 | ||
|  |     zz = xpx.lazy_apply(cy_cophenet, Z, validate=is_lazy_array(Z), | ||
|  |                         shape=((n * (n-1)) // 2, ), dtype=xp.float64, | ||
|  |                         as_numpy=True, xp=xp) | ||
|  |                          | ||
|  |     if Y is None: | ||
|  |         return zz | ||
|  | 
 | ||
|  |     Y = _asarray(Y, order='C', xp=xp) | ||
|  |     distance.is_valid_y(Y, throw=True, name='Y') | ||
|  | 
 | ||
|  |     z = xp.mean(zz) | ||
|  |     y = xp.mean(Y) | ||
|  |     Yy = Y - y | ||
|  |     Zz = zz - z | ||
|  |     numerator = (Yy * Zz) | ||
|  |     denomA = Yy**2 | ||
|  |     denomB = Zz**2 | ||
|  |     c = xp.sum(numerator) / xp.sqrt(xp.sum(denomA) * xp.sum(denomB)) | ||
|  |     return (c, zz) | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def inconsistent(Z, d=2): | ||
|  |     r"""
 | ||
|  |     Calculate inconsistency statistics on a linkage matrix. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The :math:`(n-1)` by 4 matrix encoding the linkage (hierarchical | ||
|  |         clustering).  See `linkage` documentation for more information on its | ||
|  |         form. | ||
|  |     d : int, optional | ||
|  |         The number of links up to `d` levels below each non-singleton cluster. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     R : ndarray | ||
|  |         A :math:`(n-1)` by 4 matrix where the ``i``'th row contains the link | ||
|  |         statistics for the non-singleton cluster ``i``. The link statistics are | ||
|  |         computed over the link heights for links :math:`d` levels below the | ||
|  |         cluster ``i``. ``R[i,0]`` and ``R[i,1]`` are the mean and standard | ||
|  |         deviation of the link heights, respectively; ``R[i,2]`` is the number | ||
|  |         of links included in the calculation; and ``R[i,3]`` is the | ||
|  |         inconsistency coefficient, | ||
|  | 
 | ||
|  |         .. math:: \frac{\mathtt{Z[i,2]} - \mathtt{R[i,0]}} {R[i,1]} | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     This function behaves similarly to the MATLAB(TM) ``inconsistent`` | ||
|  |     function. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import inconsistent, linkage | ||
|  |     >>> from matplotlib import pyplot as plt | ||
|  |     >>> X = [[i] for i in [2, 8, 0, 4, 1, 9, 9, 0]] | ||
|  |     >>> Z = linkage(X, 'ward') | ||
|  |     >>> print(Z) | ||
|  |     [[ 5.          6.          0.          2.        ] | ||
|  |      [ 2.          7.          0.          2.        ] | ||
|  |      [ 0.          4.          1.          2.        ] | ||
|  |      [ 1.          8.          1.15470054  3.        ] | ||
|  |      [ 9.         10.          2.12132034  4.        ] | ||
|  |      [ 3.         12.          4.11096096  5.        ] | ||
|  |      [11.         13.         14.07183949  8.        ]] | ||
|  |     >>> inconsistent(Z) | ||
|  |     array([[ 0.        ,  0.        ,  1.        ,  0.        ], | ||
|  |            [ 0.        ,  0.        ,  1.        ,  0.        ], | ||
|  |            [ 1.        ,  0.        ,  1.        ,  0.        ], | ||
|  |            [ 0.57735027,  0.81649658,  2.        ,  0.70710678], | ||
|  |            [ 1.04044011,  1.06123822,  3.        ,  1.01850858], | ||
|  |            [ 3.11614065,  1.40688837,  2.        ,  0.70710678], | ||
|  |            [ 6.44583366,  6.76770586,  3.        ,  1.12682288]]) | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     if d != np.floor(d) or d < 0: | ||
|  |         raise ValueError('The second argument d must be a nonnegative ' | ||
|  |                          'integer value.') | ||
|  | 
 | ||
|  |     def cy_inconsistent(Z, d, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |         R = np.zeros((Z.shape[0], 4), dtype=np.float64) | ||
|  |         n = Z.shape[0] + 1 | ||
|  |         _hierarchy.inconsistent(Z, R, n, d) | ||
|  |         return R | ||
|  | 
 | ||
|  |     return xpx.lazy_apply(cy_inconsistent, Z, d=int(d), validate=is_lazy_array(Z), | ||
|  |                           shape=(Z.shape[0], 4), dtype=xp.float64, | ||
|  |                           as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def from_mlab_linkage(Z): | ||
|  |     """
 | ||
|  |     Convert a linkage matrix generated by MATLAB(TM) to a new | ||
|  |     linkage matrix compatible with this module. | ||
|  | 
 | ||
|  |     The conversion does two things: | ||
|  | 
 | ||
|  |      * the indices are converted from ``1..N`` to ``0..(N-1)`` form, | ||
|  |        and | ||
|  | 
 | ||
|  |      * a fourth column ``Z[:,3]`` is added where ``Z[i,3]`` represents the | ||
|  |        number of original observations (leaves) in the non-singleton | ||
|  |        cluster ``i``. | ||
|  | 
 | ||
|  |     This function is useful when loading in linkages from legacy data | ||
|  |     files generated by MATLAB. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         A linkage matrix generated by MATLAB(TM). | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     ZS : ndarray | ||
|  |         A linkage matrix compatible with ``scipy.cluster.hierarchy``. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  |     to_mlab_linkage : transform from SciPy to MATLAB format. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> import numpy as np | ||
|  |     >>> from scipy.cluster.hierarchy import ward, from_mlab_linkage | ||
|  | 
 | ||
|  |     Given a linkage matrix in MATLAB format ``mZ``, we can use | ||
|  |     `scipy.cluster.hierarchy.from_mlab_linkage` to import | ||
|  |     it into SciPy format: | ||
|  | 
 | ||
|  |     >>> mZ = np.array([[1, 2, 1], [4, 5, 1], [7, 8, 1], | ||
|  |     ...                [10, 11, 1], [3, 13, 1.29099445], | ||
|  |     ...                [6, 14, 1.29099445], | ||
|  |     ...                [9, 15, 1.29099445], | ||
|  |     ...                [12, 16, 1.29099445], | ||
|  |     ...                [17, 18, 5.77350269], | ||
|  |     ...                [19, 20, 5.77350269], | ||
|  |     ...                [21, 22,  8.16496581]]) | ||
|  | 
 | ||
|  |     >>> Z = from_mlab_linkage(mZ) | ||
|  |     >>> Z | ||
|  |     array([[  0.        ,   1.        ,   1.        ,   2.        ], | ||
|  |            [  3.        ,   4.        ,   1.        ,   2.        ], | ||
|  |            [  6.        ,   7.        ,   1.        ,   2.        ], | ||
|  |            [  9.        ,  10.        ,   1.        ,   2.        ], | ||
|  |            [  2.        ,  12.        ,   1.29099445,   3.        ], | ||
|  |            [  5.        ,  13.        ,   1.29099445,   3.        ], | ||
|  |            [  8.        ,  14.        ,   1.29099445,   3.        ], | ||
|  |            [ 11.        ,  15.        ,   1.29099445,   3.        ], | ||
|  |            [ 16.        ,  17.        ,   5.77350269,   6.        ], | ||
|  |            [ 18.        ,  19.        ,   5.77350269,   6.        ], | ||
|  |            [ 20.        ,  21.        ,   8.16496581,  12.        ]]) | ||
|  | 
 | ||
|  |     As expected, the linkage matrix ``Z`` returned includes an | ||
|  |     additional column counting the number of original samples in | ||
|  |     each cluster. Also, all cluster indices are reduced by 1 | ||
|  |     (MATLAB format uses 1-indexing, whereas SciPy uses 0-indexing). | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, dtype=xp.float64, order='C', xp=xp) | ||
|  | 
 | ||
|  |     # If it's empty, return it. | ||
|  |     if Z.shape in ((), (0, )): | ||
|  |         return xp_copy(Z, xp=xp) | ||
|  | 
 | ||
|  |     if Z.ndim != 2: | ||
|  |         raise ValueError("The linkage array must be rectangular.") | ||
|  | 
 | ||
|  |     # If it contains no rows, return it. | ||
|  |     n = Z.shape[0] | ||
|  |     if n == 0: | ||
|  |         return xp_copy(Z, xp=xp) | ||
|  | 
 | ||
|  |     lazy = is_lazy_array(Z) | ||
|  | 
 | ||
|  |     if not lazy and xp.min(Z[:, :2]) != 1.0 and xp.max(Z[:, :2]) != 2 * n: | ||
|  |         raise ValueError('The format of the indices is not 1..N') | ||
|  | 
 | ||
|  |     res = xp.empty((Z.shape[0], Z.shape[1] + 1), dtype=Z.dtype) | ||
|  |     res = xpx.at(res)[:, :2].set(Z[:, :2] - 1.0) | ||
|  |     res = xpx.at(res)[:, 2:-1].set(Z[:, 2:]) | ||
|  | 
 | ||
|  |     def cy_from_mlab_linkage(Zpart, validate): | ||
|  |         n = Zpart.shape[0] | ||
|  |         if validate and np.min(Zpart[:, :2]) != 0.0 and np.max(Zpart[:, :2]) != 2 * n: | ||
|  |             raise ValueError('The format of the indices is not 1..N') | ||
|  | 
 | ||
|  |         if not Zpart.flags.writeable: | ||
|  |             Zpart = Zpart.copy()  # xp=jax.numpy | ||
|  | 
 | ||
|  |         CS = np.zeros((n,)) | ||
|  |         _hierarchy.calculate_cluster_sizes(Zpart, CS, n + 1) | ||
|  |         return CS | ||
|  | 
 | ||
|  |     CS = xpx.lazy_apply(cy_from_mlab_linkage, res[:, :-1], validate=lazy, | ||
|  |                         shape=(res.shape[0],), dtype=xp.float64, | ||
|  |                         as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  |     return xpx.at(res)[:, -1].set(CS) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities() | ||
|  | def to_mlab_linkage(Z): | ||
|  |     """
 | ||
|  |     Convert a linkage matrix to a MATLAB(TM) compatible one. | ||
|  | 
 | ||
|  |     Converts a linkage matrix ``Z`` generated by the linkage function | ||
|  |     of this module to a MATLAB(TM) compatible one. The return linkage | ||
|  |     matrix has the last column removed and the cluster indices are | ||
|  |     converted to ``1..N`` indexing. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         A linkage matrix generated by ``scipy.cluster.hierarchy``. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     to_mlab_linkage : ndarray | ||
|  |         A linkage matrix compatible with MATLAB(TM)'s hierarchical | ||
|  |         clustering functions. | ||
|  | 
 | ||
|  |         The return linkage matrix has the last column removed | ||
|  |         and the cluster indices are converted to ``1..N`` indexing. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  |     from_mlab_linkage : transform from Matlab to SciPy format. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, to_mlab_linkage | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.29099445,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.29099445,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.29099445,  3.        ], | ||
|  |            [11.        , 15.        ,  1.29099445,  3.        ], | ||
|  |            [16.        , 17.        ,  5.77350269,  6.        ], | ||
|  |            [18.        , 19.        ,  5.77350269,  6.        ], | ||
|  |            [20.        , 21.        ,  8.16496581, 12.        ]]) | ||
|  | 
 | ||
|  |     After a linkage matrix ``Z`` has been created, we can use | ||
|  |     `scipy.cluster.hierarchy.to_mlab_linkage` to convert it | ||
|  |     into MATLAB format: | ||
|  | 
 | ||
|  |     >>> mZ = to_mlab_linkage(Z) | ||
|  |     >>> mZ | ||
|  |     array([[  1.        ,   2.        ,   1.        ], | ||
|  |            [  4.        ,   5.        ,   1.        ], | ||
|  |            [  7.        ,   8.        ,   1.        ], | ||
|  |            [ 10.        ,  11.        ,   1.        ], | ||
|  |            [  3.        ,  13.        ,   1.29099445], | ||
|  |            [  6.        ,  14.        ,   1.29099445], | ||
|  |            [  9.        ,  15.        ,   1.29099445], | ||
|  |            [ 12.        ,  16.        ,   1.29099445], | ||
|  |            [ 17.        ,  18.        ,   5.77350269], | ||
|  |            [ 19.        ,  20.        ,   5.77350269], | ||
|  |            [ 21.        ,  22.        ,   8.16496581]]) | ||
|  | 
 | ||
|  |     The new linkage matrix ``mZ`` uses 1-indexing for all the | ||
|  |     clusters (instead of 0-indexing). Also, the last column of | ||
|  |     the original linkage matrix has been dropped. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, dtype=xp.float64, xp=xp) | ||
|  |     if Z.shape in ((), (0, )): | ||
|  |         return xp_copy(Z, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     return xp.concat((Z[:, :2] + 1.0, Z[:, 2:3]), axis=1) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities() | ||
|  | def is_monotonic(Z): | ||
|  |     """
 | ||
|  |     Return True if the linkage passed is monotonic. | ||
|  | 
 | ||
|  |     The linkage is monotonic if for every cluster :math:`s` and :math:`t` | ||
|  |     joined, the distance between them is no less than the distance | ||
|  |     between any previously joined clusters. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The linkage matrix to check for monotonicity. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     b : bool | ||
|  |         A boolean indicating whether the linkage is monotonic. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import median, ward, is_monotonic | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     By definition, some hierarchical clustering algorithms - such as | ||
|  |     `scipy.cluster.hierarchy.ward` - produce monotonic assignments of | ||
|  |     samples to clusters; however, this is not always true for other | ||
|  |     hierarchical methods - e.g. `scipy.cluster.hierarchy.median`. | ||
|  | 
 | ||
|  |     Given a linkage matrix ``Z`` (as the result of a hierarchical clustering | ||
|  |     method) we can test programmatically whether it has the monotonicity | ||
|  |     property or not, using `scipy.cluster.hierarchy.is_monotonic`: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.29099445,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.29099445,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.29099445,  3.        ], | ||
|  |            [11.        , 15.        ,  1.29099445,  3.        ], | ||
|  |            [16.        , 17.        ,  5.77350269,  6.        ], | ||
|  |            [18.        , 19.        ,  5.77350269,  6.        ], | ||
|  |            [20.        , 21.        ,  8.16496581, 12.        ]]) | ||
|  |     >>> is_monotonic(Z) | ||
|  |     True | ||
|  | 
 | ||
|  |     >>> Z = median(pdist(X)) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.11803399,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.11803399,  3.        ], | ||
|  |            [ 8.        , 15.        ,  1.11803399,  3.        ], | ||
|  |            [11.        , 14.        ,  1.11803399,  3.        ], | ||
|  |            [18.        , 19.        ,  3.        ,  6.        ], | ||
|  |            [16.        , 17.        ,  3.5       ,  6.        ], | ||
|  |            [20.        , 21.        ,  3.25      , 12.        ]]) | ||
|  |     >>> is_monotonic(Z) | ||
|  |     False | ||
|  | 
 | ||
|  |     Note that this method is equivalent to just verifying that the distances | ||
|  |     in the third column of the linkage matrix appear in a monotonically | ||
|  |     increasing order. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     # We expect the i'th value to be greater than its successor. | ||
|  |     return xp.all(Z[1:, 2] >= Z[:-1, 2]) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(warnings=[("dask.array", "see notes"), ("jax.numpy", "see notes")]) | ||
|  | def is_valid_im(R, warning=False, throw=False, name=None): | ||
|  |     """Return True if the inconsistency matrix passed is valid.
 | ||
|  | 
 | ||
|  |     It must be a :math:`n` by 4 array of doubles. The standard | ||
|  |     deviations ``R[:,1]`` must be nonnegative. The link counts | ||
|  |     ``R[:,2]`` must be positive and no greater than :math:`n-1`. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     R : ndarray | ||
|  |         The inconsistency matrix to check for validity. | ||
|  |     warning : bool, optional | ||
|  |         When True, issues a Python warning if the linkage | ||
|  |         matrix passed is invalid. | ||
|  |     throw : bool, optional | ||
|  |         When True, throws a Python exception if the linkage | ||
|  |         matrix passed is invalid. | ||
|  |     name : str, optional | ||
|  |         This string refers to the variable name of the invalid | ||
|  |         linkage matrix. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     b : bool | ||
|  |         True if the inconsistency matrix is valid; False otherwise. | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     *Array API support (experimental):* If the input is a lazy Array (e.g. Dask | ||
|  |     or JAX), the return value may be a 0-dimensional bool Array. When warning=True | ||
|  |     or throw=True, calling this function materializes the array. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  |     inconsistent : for the creation of a inconsistency matrix. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, inconsistent, is_valid_im | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     Given a data set ``X``, we can apply a clustering method to obtain a | ||
|  |     linkage matrix ``Z``. `scipy.cluster.hierarchy.inconsistent` can | ||
|  |     be also used to obtain the inconsistency matrix ``R`` associated to | ||
|  |     this clustering process: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  |     >>> R = inconsistent(Z) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.29099445,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.29099445,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.29099445,  3.        ], | ||
|  |            [11.        , 15.        ,  1.29099445,  3.        ], | ||
|  |            [16.        , 17.        ,  5.77350269,  6.        ], | ||
|  |            [18.        , 19.        ,  5.77350269,  6.        ], | ||
|  |            [20.        , 21.        ,  8.16496581, 12.        ]]) | ||
|  |     >>> R | ||
|  |     array([[1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.14549722, 0.20576415, 2.        , 0.70710678], | ||
|  |            [1.14549722, 0.20576415, 2.        , 0.70710678], | ||
|  |            [1.14549722, 0.20576415, 2.        , 0.70710678], | ||
|  |            [1.14549722, 0.20576415, 2.        , 0.70710678], | ||
|  |            [2.78516386, 2.58797734, 3.        , 1.15470054], | ||
|  |            [2.78516386, 2.58797734, 3.        , 1.15470054], | ||
|  |            [6.57065706, 1.38071187, 3.        , 1.15470054]]) | ||
|  | 
 | ||
|  |     Now we can use `scipy.cluster.hierarchy.is_valid_im` to verify that | ||
|  |     ``R`` is correct: | ||
|  | 
 | ||
|  |     >>> is_valid_im(R) | ||
|  |     True | ||
|  | 
 | ||
|  |     However, if ``R`` is wrongly constructed (e.g., one of the standard | ||
|  |     deviations is set to a negative value), then the check will fail: | ||
|  | 
 | ||
|  |     >>> R[-1,1] = R[-1,1] * -1 | ||
|  |     >>> is_valid_im(R) | ||
|  |     False | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(R) | ||
|  |     R = _asarray(R, xp=xp) | ||
|  |     return _is_valid_im(R, warning=warning, throw=throw, name=name, | ||
|  |                         materialize=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _is_valid_im(R, warning=False, throw=False, name=None, materialize=False, *, xp): | ||
|  |     """Variant of `is_valid_im` to be called internally by other scipy functions,
 | ||
|  |     which by default does not materialize lazy input arrays (Dask, JAX, etc.) when | ||
|  |     warning=True or throw=True. | ||
|  |     """
 | ||
|  |     name_str = f"{name!r} " if name else '' | ||
|  |     try: | ||
|  |         if R.dtype != xp.float64: | ||
|  |             raise TypeError(f'Inconsistency matrix {name_str}must contain doubles ' | ||
|  |                             '(double).') | ||
|  |         if len(R.shape) != 2: | ||
|  |             raise ValueError(f'Inconsistency matrix {name_str}must have shape=2 (i.e. ' | ||
|  |                              'be two-dimensional).') | ||
|  |         if R.shape[1] != 4: | ||
|  |             raise ValueError(f'Inconsistency matrix {name_str}' | ||
|  |                              'must have 4 columns.') | ||
|  |         if R.shape[0] < 1: | ||
|  |             raise ValueError(f'Inconsistency matrix {name_str}' | ||
|  |                              'must have at least one row.') | ||
|  |     except (TypeError, ValueError) as e: | ||
|  |         if throw: | ||
|  |             raise | ||
|  |         if warning: | ||
|  |             _warning(str(e)) | ||
|  |         return False | ||
|  | 
 | ||
|  |     return _lazy_valid_checks( | ||
|  |         (xp.any(R[:, 0] < 0), | ||
|  |          f'Inconsistency matrix {name_str} contains negative link height means.'), | ||
|  |         (xp.any(R[:, 1] < 0), | ||
|  |          f'Inconsistency matrix {name_str} contains negative link height standard ' | ||
|  |          'deviations.'), | ||
|  |         (xp.any(R[:, 2] < 0), | ||
|  |          f'Inconsistency matrix {name_str} contains negative link counts.'), | ||
|  |         throw=throw, warning=warning, materialize=materialize, xp=xp | ||
|  |     ) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(warnings=[("dask.array", "see notes"), ("jax.numpy", "see notes")]) | ||
|  | def is_valid_linkage(Z, warning=False, throw=False, name=None): | ||
|  |     """
 | ||
|  |     Check the validity of a linkage matrix. | ||
|  | 
 | ||
|  |     A linkage matrix is valid if it is a 2-D array (type double) | ||
|  |     with :math:`n` rows and 4 columns. The first two columns must contain | ||
|  |     indices between 0 and :math:`2n-1`. For a given row ``i``, the following | ||
|  |     two expressions have to hold: | ||
|  | 
 | ||
|  |     .. math:: | ||
|  | 
 | ||
|  |         0 \\leq \\mathtt{Z[i,0]} \\leq i+n-1 | ||
|  |         0 \\leq Z[i,1] \\leq i+n-1 | ||
|  | 
 | ||
|  |     I.e., a cluster cannot join another cluster unless the cluster being joined | ||
|  |     has been generated. | ||
|  | 
 | ||
|  |     The fourth column of `Z` represents the number of original observations | ||
|  |     in a cluster, so a valid ``Z[i, 3]`` value may not exceed the number of | ||
|  |     original observations. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : array_like | ||
|  |         Linkage matrix. | ||
|  |     warning : bool, optional | ||
|  |         When True, issues a Python warning if the linkage | ||
|  |         matrix passed is invalid. | ||
|  |     throw : bool, optional | ||
|  |         When True, throws a Python exception if the linkage | ||
|  |         matrix passed is invalid. | ||
|  |     name : str, optional | ||
|  |         This string refers to the variable name of the invalid | ||
|  |         linkage matrix. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     b : bool | ||
|  |         True if the inconsistency matrix is valid; False otherwise. | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     *Array API support (experimental):* If the input is a lazy Array (e.g. Dask | ||
|  |     or JAX), the return value may be a 0-dimensional bool Array. When warning=True | ||
|  |     or throw=True, calling this function materializes the array. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage: for a description of what a linkage matrix is. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, is_valid_linkage | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     All linkage matrices generated by the clustering methods in this module | ||
|  |     will be valid (i.e., they will have the appropriate dimensions and the two | ||
|  |     required expressions will hold for all the rows). | ||
|  | 
 | ||
|  |     We can check this using `scipy.cluster.hierarchy.is_valid_linkage`: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.29099445,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.29099445,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.29099445,  3.        ], | ||
|  |            [11.        , 15.        ,  1.29099445,  3.        ], | ||
|  |            [16.        , 17.        ,  5.77350269,  6.        ], | ||
|  |            [18.        , 19.        ,  5.77350269,  6.        ], | ||
|  |            [20.        , 21.        ,  8.16496581, 12.        ]]) | ||
|  |     >>> is_valid_linkage(Z) | ||
|  |     True | ||
|  | 
 | ||
|  |     However, if we create a linkage matrix in a wrong way - or if we modify | ||
|  |     a valid one in a way that any of the required expressions don't hold | ||
|  |     anymore, then the check will fail: | ||
|  | 
 | ||
|  |     >>> Z[3][1] = 20    # the cluster number 20 is not defined at this point | ||
|  |     >>> is_valid_linkage(Z) | ||
|  |     False | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, xp=xp) | ||
|  |     return _is_valid_linkage(Z, warning=warning, throw=throw, | ||
|  |                              name=name, materialize=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _is_valid_linkage(Z, warning=False, throw=False, name=None, | ||
|  |                       materialize=False, *, xp): | ||
|  |     """Variant of `is_valid_linkage` to be called internally by other scipy functions,
 | ||
|  |     which by default does not materialize lazy input arrays (Dask, JAX, etc.) when | ||
|  |     warning=True or throw=True. | ||
|  |     """
 | ||
|  |     name_str = f"{name!r} " if name else '' | ||
|  |     try: | ||
|  |         if Z.dtype != xp.float64: | ||
|  |             raise TypeError(f'Linkage matrix {name_str}must contain doubles.') | ||
|  |         if len(Z.shape) != 2: | ||
|  |             raise ValueError(f'Linkage matrix {name_str}must have shape=2 (i.e. be' | ||
|  |                              ' two-dimensional).') | ||
|  |         if Z.shape[1] != 4: | ||
|  |             raise ValueError(f'Linkage matrix {name_str}must have 4 columns.') | ||
|  |         if Z.shape[0] == 0: | ||
|  |             raise ValueError('Linkage must be computed on at least two ' | ||
|  |                              'observations.') | ||
|  |     except (TypeError, ValueError) as e: | ||
|  |         if throw: | ||
|  |             raise | ||
|  |         if warning: | ||
|  |             _warning(str(e)) | ||
|  |         return False | ||
|  | 
 | ||
|  |     n = Z.shape[0] | ||
|  |     if n < 2: | ||
|  |         return True | ||
|  | 
 | ||
|  |     return _lazy_valid_checks( | ||
|  |         (xp.any(Z[:, :2] < 0), | ||
|  |          f'Linkage {name_str}contains negative indices.'), | ||
|  |         (xp.any(Z[:, 2] < 0), | ||
|  |          f'Linkage {name_str}contains negative distances.'), | ||
|  |         (xp.any(Z[:, 3] < 0), | ||
|  |          f'Linkage {name_str}contains negative counts.'), | ||
|  |         (xp.any(Z[:, 3] > n + 1), | ||
|  |          f'Linkage {name_str}contains excessive observations in a cluster'), | ||
|  |         (xp.any(xp.max(Z[:, :2], axis=1) >= xp.arange(n + 1, 2 * n + 1, dtype=Z.dtype)), | ||
|  |          f'Linkage {name_str}uses non-singleton cluster before it is formed.'), | ||
|  |         (xpx.nunique(Z[:, :2]) < n * 2, | ||
|  |          f'Linkage {name_str}uses the same cluster more than once.'), | ||
|  |         throw=throw, warning=warning, materialize=materialize, xp=xp | ||
|  |     ) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _lazy_valid_checks(*args, throw=False, warning=False, materialize=False, xp): | ||
|  |     """Validate a set of conditions on the contents of possibly lazy arrays.
 | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     args : tuples of (Array, str) | ||
|  |         The first element of each tuple must be a 0-dimensional Array | ||
|  |         that evaluates to bool; the second element must be the message to convey | ||
|  |         if the  first element evaluates to True. | ||
|  |     throw: bool | ||
|  |         Set to True to `raise ValueError(args[i][1])` if `args[i][0]` is True. | ||
|  |     warning: bool | ||
|  |         Set to True to issue a warning with message `args[i][1]` if `args[i][0]` | ||
|  |         is True. | ||
|  |     materialize: bool | ||
|  |         Set to True to force materialization of lazy arrays when throw=True or | ||
|  |         warning=True. If the inputs are lazy and materialize=False, ignore the | ||
|  |         `throw` and `warning` flags. | ||
|  |     xp: module | ||
|  |         Array API namespace | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     If xp is an eager backend (e.g. numpy) and all conditions are False, return True. | ||
|  |     If throw is True, raise. Otherwise, return False. | ||
|  | 
 | ||
|  |     If xp is a lazy backend (e.g. Dask or JAX), return a 0-dimensional bool Array. | ||
|  |     """
 | ||
|  |     conds = xp.concat([xp.reshape(cond, (1, )) for cond, _ in args]) | ||
|  | 
 | ||
|  |     lazy = is_lazy_array(conds) | ||
|  |     if not throw and not warning or (lazy and not materialize): | ||
|  |         out = ~xp.any(conds) | ||
|  |         return out if lazy else bool(out) | ||
|  | 
 | ||
|  |     if is_dask(xp): | ||
|  |         # Only materialize the graph once, instead of once per check | ||
|  |         conds = conds.compute() | ||
|  | 
 | ||
|  |     # Don't call np.asarray(conds), as it would be blocked by the device transfer | ||
|  |     # guard on CuPy and PyTorch and the densification guard on Sparse, whereas | ||
|  |     # bool() will not. | ||
|  |     conds = [bool(cond) for cond in conds] | ||
|  | 
 | ||
|  |     for cond, (_, msg) in zip(conds, args): | ||
|  |         if throw and cond: | ||
|  |             raise ValueError(msg) | ||
|  |         elif warning and cond: | ||
|  |             warnings.warn(msg, ClusterWarning, stacklevel=3) | ||
|  | 
 | ||
|  |     return not any(conds) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities() | ||
|  | def num_obs_linkage(Z): | ||
|  |     """
 | ||
|  |     Return the number of original observations of the linkage matrix passed. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The linkage matrix on which to perform the operation. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     n : int | ||
|  |         The number of original observations in the linkage. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, num_obs_linkage | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  | 
 | ||
|  |     ``Z`` is a linkage matrix obtained after using the Ward clustering method | ||
|  |     with ``X``, a dataset with 12 data points. | ||
|  | 
 | ||
|  |     >>> num_obs_linkage(Z) | ||
|  |     12 | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  |     return Z.shape[0] + 1 | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities() | ||
|  | def correspond(Z, Y): | ||
|  |     """
 | ||
|  |     Check for correspondence between linkage and condensed distance matrices. | ||
|  | 
 | ||
|  |     They must have the same number of original observations for | ||
|  |     the check to succeed. | ||
|  | 
 | ||
|  |     This function is useful as a sanity check in algorithms that make | ||
|  |     extensive use of linkage and distance matrices that must | ||
|  |     correspond to the same set of original observations. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : array_like | ||
|  |         The linkage matrix to check for correspondence. | ||
|  |     Y : array_like | ||
|  |         The condensed distance matrix to check for correspondence. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     b : bool | ||
|  |         A boolean indicating whether the linkage matrix and distance | ||
|  |         matrix could possibly correspond to one another. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, correspond | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     This method can be used to check if a given linkage matrix ``Z`` has been | ||
|  |     obtained from the application of a cluster method over a dataset ``X``: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  |     >>> X_condensed = pdist(X) | ||
|  |     >>> Z = ward(X_condensed) | ||
|  | 
 | ||
|  |     Here, we can compare ``Z`` and ``X`` (in condensed form): | ||
|  | 
 | ||
|  |     >>> correspond(Z, X_condensed) | ||
|  |     True | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z, Y) | ||
|  |     Z = _asarray(Z, xp=xp) | ||
|  |     Y = _asarray(Y, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, xp=xp) | ||
|  |     distance.is_valid_y(Y, throw=True) | ||
|  |     return distance.num_obs_y(Y) == num_obs_linkage(Z) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(cpu_only=True, reason="Cython code", | ||
|  |                  jax_jit=False, allow_dask_compute=True) | ||
|  | def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None): | ||
|  |     """
 | ||
|  |     Form flat clusters from the hierarchical clustering defined by | ||
|  |     the given linkage matrix. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded with the matrix returned | ||
|  |         by the `linkage` function. | ||
|  |     t : scalar | ||
|  |         For criteria 'inconsistent', 'distance' or 'monocrit', | ||
|  |          this is the threshold to apply when forming flat clusters. | ||
|  |         For 'maxclust' or 'maxclust_monocrit' criteria, | ||
|  |          this would be max number of clusters requested. | ||
|  |     criterion : str, optional | ||
|  |         The criterion to use in forming flat clusters. This can | ||
|  |         be any of the following values: | ||
|  | 
 | ||
|  |           ``inconsistent`` : | ||
|  |               If a cluster node and all its | ||
|  |               descendants have an inconsistent value less than or equal | ||
|  |               to `t`, then all its leaf descendants belong to the | ||
|  |               same flat cluster. When no non-singleton cluster meets | ||
|  |               this criterion, every node is assigned to its own | ||
|  |               cluster. (Default) | ||
|  | 
 | ||
|  |           ``distance`` : | ||
|  |               Forms flat clusters so that the original | ||
|  |               observations in each flat cluster have no greater a | ||
|  |               cophenetic distance than `t`. | ||
|  | 
 | ||
|  |           ``maxclust`` : | ||
|  |               Finds a minimum threshold ``r`` so that | ||
|  |               the cophenetic distance between any two original | ||
|  |               observations in the same flat cluster is no more than | ||
|  |               ``r`` and no more than `t` flat clusters are formed. | ||
|  | 
 | ||
|  |           ``monocrit`` : | ||
|  |               Forms a flat cluster from a cluster node c | ||
|  |               with index i when ``monocrit[j] <= t``. | ||
|  | 
 | ||
|  |               For example, to threshold on the maximum mean distance | ||
|  |               as computed in the inconsistency matrix R with a | ||
|  |               threshold of 0.8 do:: | ||
|  | 
 | ||
|  |                   MR = maxRstat(Z, R, 3) | ||
|  |                   fcluster(Z, t=0.8, criterion='monocrit', monocrit=MR) | ||
|  | 
 | ||
|  |           ``maxclust_monocrit`` : | ||
|  |               Forms a flat cluster from a | ||
|  |               non-singleton cluster node ``c`` when ``monocrit[i] <= | ||
|  |               r`` for all cluster indices ``i`` below and including | ||
|  |               ``c``. ``r`` is minimized such that no more than ``t`` | ||
|  |               flat clusters are formed. monocrit must be | ||
|  |               monotonic. For example, to minimize the threshold t on | ||
|  |               maximum inconsistency values so that no more than 3 flat | ||
|  |               clusters are formed, do:: | ||
|  | 
 | ||
|  |                   MI = maxinconsts(Z, R) | ||
|  |                   fcluster(Z, t=3, criterion='maxclust_monocrit', monocrit=MI) | ||
|  |     depth : int, optional | ||
|  |         The maximum depth to perform the inconsistency calculation. | ||
|  |         It has no meaning for the other criteria. Default is 2. | ||
|  |     R : ndarray, optional | ||
|  |         The inconsistency matrix to use for the ``'inconsistent'`` | ||
|  |         criterion. This matrix is computed if not provided. | ||
|  |     monocrit : ndarray, optional | ||
|  |         An array of length n-1. `monocrit[i]` is the | ||
|  |         statistics upon which non-singleton i is thresholded. The | ||
|  |         monocrit vector must be monotonic, i.e., given a node c with | ||
|  |         index i, for all node indices j corresponding to nodes | ||
|  |         below c, ``monocrit[i] >= monocrit[j]``. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     fcluster : ndarray | ||
|  |         An array of length ``n``. ``T[i]`` is the flat cluster number to | ||
|  |         which original observation ``i`` belongs. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for information about hierarchical clustering methods work. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, fcluster | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     All cluster linkage methods - e.g., `scipy.cluster.hierarchy.ward` | ||
|  |     generate a linkage matrix ``Z`` as their output: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  | 
 | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.29099445,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.29099445,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.29099445,  3.        ], | ||
|  |            [11.        , 15.        ,  1.29099445,  3.        ], | ||
|  |            [16.        , 17.        ,  5.77350269,  6.        ], | ||
|  |            [18.        , 19.        ,  5.77350269,  6.        ], | ||
|  |            [20.        , 21.        ,  8.16496581, 12.        ]]) | ||
|  | 
 | ||
|  |     This matrix represents a dendrogram, where the first and second elements | ||
|  |     are the two clusters merged at each step, the third element is the | ||
|  |     distance between these clusters, and the fourth element is the size of | ||
|  |     the new cluster - the number of original data points included. | ||
|  | 
 | ||
|  |     `scipy.cluster.hierarchy.fcluster` can be used to flatten the | ||
|  |     dendrogram, obtaining as a result an assignation of the original data | ||
|  |     points to single clusters. | ||
|  | 
 | ||
|  |     This assignation mostly depends on a distance threshold ``t`` - the maximum | ||
|  |     inter-cluster distance allowed: | ||
|  | 
 | ||
|  |     >>> fcluster(Z, t=0.9, criterion='distance') | ||
|  |     array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int32) | ||
|  | 
 | ||
|  |     >>> fcluster(Z, t=1.1, criterion='distance') | ||
|  |     array([1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8], dtype=int32) | ||
|  | 
 | ||
|  |     >>> fcluster(Z, t=3, criterion='distance') | ||
|  |     array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], dtype=int32) | ||
|  | 
 | ||
|  |     >>> fcluster(Z, t=9, criterion='distance') | ||
|  |     array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     In the first case, the threshold ``t`` is too small to allow any two | ||
|  |     samples in the data to form a cluster, so 12 different clusters are | ||
|  |     returned. | ||
|  | 
 | ||
|  |     In the second case, the threshold is large enough to allow the first | ||
|  |     4 points to be merged with their nearest neighbors. So, here, only 8 | ||
|  |     clusters are returned. | ||
|  | 
 | ||
|  |     The third case, with a much higher threshold, allows for up to 8 data | ||
|  |     points to be connected - so 4 clusters are returned here. | ||
|  | 
 | ||
|  |     Lastly, the threshold of the fourth case is large enough to allow for | ||
|  |     all data points to be merged together - so a single cluster is returned. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', materialize=True, xp=xp) | ||
|  | 
 | ||
|  |     n = Z.shape[0] + 1 | ||
|  |     T = np.zeros((n,), dtype='i') | ||
|  | 
 | ||
|  |     if monocrit is not None: | ||
|  |         monocrit = np.asarray(monocrit, order='C', dtype=np.float64) | ||
|  | 
 | ||
|  |     Z = np.asarray(Z) | ||
|  |     monocrit = np.asarray(monocrit) | ||
|  |     if criterion == 'inconsistent': | ||
|  |         if R is None: | ||
|  |             R = inconsistent(Z, depth) | ||
|  |         else: | ||
|  |             R = _asarray(R, order='C', dtype=xp.float64, xp=xp) | ||
|  |             _is_valid_im(R, throw=True, name='R', materialize=True, xp=xp) | ||
|  |             # Since the C code does not support striding using strides. | ||
|  |             # The dimensions are used instead. | ||
|  |             R = np.asarray(R) | ||
|  |         _hierarchy.cluster_in(Z, R, T, float(t), int(n)) | ||
|  |     elif criterion == 'distance': | ||
|  |         _hierarchy.cluster_dist(Z, T, float(t), int(n)) | ||
|  |     elif criterion == 'maxclust': | ||
|  |         _hierarchy.cluster_maxclust_dist(Z, T, int(n), t) | ||
|  |     elif criterion == 'monocrit': | ||
|  |         _hierarchy.cluster_monocrit(Z, monocrit, T, float(t), int(n)) | ||
|  |     elif criterion == 'maxclust_monocrit': | ||
|  |         _hierarchy.cluster_maxclust_monocrit(Z, monocrit, T, int(n), int(t)) | ||
|  |     else: | ||
|  |         raise ValueError(f'Invalid cluster formation criterion: {str(criterion)}') | ||
|  |     return xp.asarray(T) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(cpu_only=True, reason="Cython code", | ||
|  |                  jax_jit=False, allow_dask_compute=True) | ||
|  | def fclusterdata(X, t, criterion='inconsistent', | ||
|  |                  metric='euclidean', depth=2, method='single', R=None): | ||
|  |     """
 | ||
|  |     Cluster observation data using a given metric. | ||
|  | 
 | ||
|  |     Clusters the original observations in the n-by-m data | ||
|  |     matrix X (n observations in m dimensions), using the euclidean | ||
|  |     distance metric to calculate distances between original observations, | ||
|  |     performs hierarchical clustering using the single linkage algorithm, | ||
|  |     and forms flat clusters using the inconsistency method with `t` as the | ||
|  |     cut-off threshold. | ||
|  | 
 | ||
|  |     A 1-D array ``T`` of length ``n`` is returned. ``T[i]`` is | ||
|  |     the index of the flat cluster to which the original observation ``i`` | ||
|  |     belongs. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     X : (N, M) ndarray | ||
|  |         N by M data matrix with N observations in M dimensions. | ||
|  |     t : scalar | ||
|  |         For criteria 'inconsistent', 'distance' or 'monocrit', | ||
|  |          this is the threshold to apply when forming flat clusters. | ||
|  |         For 'maxclust' or 'maxclust_monocrit' criteria, | ||
|  |          this would be max number of clusters requested. | ||
|  |     criterion : str, optional | ||
|  |         Specifies the criterion for forming flat clusters. Valid | ||
|  |         values are 'inconsistent' (default), 'distance', or 'maxclust' | ||
|  |         cluster formation algorithms. See `fcluster` for descriptions. | ||
|  |     metric : str or function, optional | ||
|  |         The distance metric for calculating pairwise distances. See | ||
|  |         ``distance.pdist`` for descriptions and linkage to verify | ||
|  |         compatibility with the linkage method. | ||
|  |     depth : int, optional | ||
|  |         The maximum depth for the inconsistency calculation. See | ||
|  |         `inconsistent` for more information. | ||
|  |     method : str, optional | ||
|  |         The linkage method to use (single, complete, average, | ||
|  |         weighted, median centroid, ward). See `linkage` for more | ||
|  |         information. Default is "single". | ||
|  |     R : ndarray, optional | ||
|  |         The inconsistency matrix. It will be computed if necessary | ||
|  |         if it is not passed. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     fclusterdata : ndarray | ||
|  |         A vector of length n. T[i] is the flat cluster number to | ||
|  |         which original observation i belongs. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     scipy.spatial.distance.pdist : pairwise distance metrics | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     This function is similar to the MATLAB function ``clusterdata``. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import fclusterdata | ||
|  | 
 | ||
|  |     This is a convenience method that abstracts all the steps to perform in a | ||
|  |     typical SciPy's hierarchical clustering workflow. | ||
|  | 
 | ||
|  |     * Transform the input data into a condensed matrix with | ||
|  |       `scipy.spatial.distance.pdist`. | ||
|  | 
 | ||
|  |     * Apply a clustering method. | ||
|  | 
 | ||
|  |     * Obtain flat clusters at a user defined distance threshold ``t`` using | ||
|  |       `scipy.cluster.hierarchy.fcluster`. | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> fclusterdata(X, t=1) | ||
|  |     array([3, 3, 3, 4, 4, 4, 2, 2, 2, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     The output here (for the dataset ``X``, distance threshold ``t``, and the | ||
|  |     default settings) is four clusters with three data points each. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(X) | ||
|  |     X = _asarray(X, order='C', dtype=xp.float64, xp=xp) | ||
|  | 
 | ||
|  |     if X.ndim != 2: | ||
|  |         raise TypeError('The observation matrix X must be an n by m array.') | ||
|  | 
 | ||
|  |     Y = distance.pdist(X, metric=metric) | ||
|  |     Z = linkage(Y, method=method) | ||
|  |     if R is None: | ||
|  |         R = inconsistent(Z, d=depth) | ||
|  |     else: | ||
|  |         R = _asarray(R, order='C', xp=xp) | ||
|  |     T = fcluster(Z, criterion=criterion, depth=depth, R=R, t=t) | ||
|  |     return T | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def leaves_list(Z): | ||
|  |     """
 | ||
|  |     Return a list of leaf node ids. | ||
|  | 
 | ||
|  |     The return corresponds to the observation vector index as it appears | ||
|  |     in the tree from left to right. Z is a linkage matrix. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a matrix.  `Z` is | ||
|  |         a linkage matrix.  See `linkage` for more information. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     leaves_list : ndarray | ||
|  |         The list of leaf node ids. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     dendrogram : for information about dendrogram structure. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, dendrogram, leaves_list | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  |     >>> from matplotlib import pyplot as plt | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  | 
 | ||
|  |     The linkage matrix ``Z`` represents a dendrogram, that is, a tree that | ||
|  |     encodes the structure of the clustering performed. | ||
|  |     `scipy.cluster.hierarchy.leaves_list` shows the mapping between | ||
|  |     indices in the ``X`` dataset and leaves in the dendrogram: | ||
|  | 
 | ||
|  |     >>> leaves_list(Z) | ||
|  |     array([ 2,  0,  1,  5,  3,  4,  8,  6,  7, 11,  9, 10], dtype=int32) | ||
|  | 
 | ||
|  |     >>> fig = plt.figure(figsize=(25, 10)) | ||
|  |     >>> dn = dendrogram(Z) | ||
|  |     >>> plt.show() | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, order='C', xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     def cy_leaves_list(Z, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |         n = Z.shape[0] + 1 | ||
|  |         ML = np.zeros((n,), dtype=np.int32) | ||
|  |         _hierarchy.prelist(Z, ML, n) | ||
|  |         return ML | ||
|  | 
 | ||
|  |     n = Z.shape[0] + 1 | ||
|  |     return xpx.lazy_apply(cy_leaves_list, Z, validate=is_lazy_array(Z), | ||
|  |                           shape=(n, ), dtype=xp.int32, | ||
|  |                           as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | # Maps number of leaves to text size. | ||
|  | # | ||
|  | # p <= 20, size="12" | ||
|  | # 20 < p <= 30, size="10" | ||
|  | # 30 < p <= 50, size="8" | ||
|  | # 50 < p <= np.inf, size="6" | ||
|  | 
 | ||
|  | _dtextsizes = {20: 12, 30: 10, 50: 8, 85: 6, np.inf: 5} | ||
|  | _drotation = {20: 0, 40: 45, np.inf: 90} | ||
|  | _dtextsortedkeys = list(_dtextsizes.keys()) | ||
|  | _dtextsortedkeys.sort() | ||
|  | _drotationsortedkeys = list(_drotation.keys()) | ||
|  | _drotationsortedkeys.sort() | ||
|  | 
 | ||
|  | 
 | ||
|  | def _remove_dups(L): | ||
|  |     """
 | ||
|  |     Remove duplicates AND preserve the original order of the elements. | ||
|  | 
 | ||
|  |     The set class is not guaranteed to do this. | ||
|  |     """
 | ||
|  |     seen_before = set() | ||
|  |     L2 = [] | ||
|  |     for i in L: | ||
|  |         if i not in seen_before: | ||
|  |             seen_before.add(i) | ||
|  |             L2.append(i) | ||
|  |     return L2 | ||
|  | 
 | ||
|  | 
 | ||
|  | def _get_tick_text_size(p): | ||
|  |     for k in _dtextsortedkeys: | ||
|  |         if p <= k: | ||
|  |             return _dtextsizes[k] | ||
|  | 
 | ||
|  | 
 | ||
|  | def _get_tick_rotation(p): | ||
|  |     for k in _drotationsortedkeys: | ||
|  |         if p <= k: | ||
|  |             return _drotation[k] | ||
|  | 
 | ||
|  | 
 | ||
|  | def _plot_dendrogram(icoords, dcoords, ivl, p, n, mh, orientation, | ||
|  |                      no_labels, color_list, leaf_font_size=None, | ||
|  |                      leaf_rotation=None, contraction_marks=None, | ||
|  |                      ax=None, above_threshold_color='C0'): | ||
|  |     # Import matplotlib here so that it's not imported unless dendrograms | ||
|  |     # are plotted. Raise an informative error if importing fails. | ||
|  |     try: | ||
|  |         # if an axis is provided, don't use pylab at all | ||
|  |         if ax is None: | ||
|  |             import matplotlib.pylab | ||
|  |         import matplotlib.patches | ||
|  |         import matplotlib.collections | ||
|  |     except ImportError as e: | ||
|  |         raise ImportError("You must install the matplotlib library to plot " | ||
|  |                           "the dendrogram. Use no_plot=True to calculate the " | ||
|  |                           "dendrogram without plotting.") from e | ||
|  | 
 | ||
|  |     if ax is None: | ||
|  |         ax = matplotlib.pylab.gca() | ||
|  |         # if we're using pylab, we want to trigger a draw at the end | ||
|  |         trigger_redraw = True | ||
|  |     else: | ||
|  |         trigger_redraw = False | ||
|  | 
 | ||
|  |     # Independent variable plot width | ||
|  |     ivw = len(ivl) * 10 | ||
|  |     # Dependent variable plot height | ||
|  |     dvw = mh + mh * 0.05 | ||
|  | 
 | ||
|  |     iv_ticks = np.arange(5, len(ivl) * 10 + 5, 10) | ||
|  |     if orientation in ('top', 'bottom'): | ||
|  |         if orientation == 'top': | ||
|  |             ax.set_ylim([0, dvw]) | ||
|  |             ax.set_xlim([0, ivw]) | ||
|  |         else: | ||
|  |             ax.set_ylim([dvw, 0]) | ||
|  |             ax.set_xlim([0, ivw]) | ||
|  | 
 | ||
|  |         xlines = icoords | ||
|  |         ylines = dcoords | ||
|  |         if no_labels: | ||
|  |             ax.set_xticks([]) | ||
|  |             ax.set_xticklabels([]) | ||
|  |         else: | ||
|  |             ax.set_xticks(iv_ticks) | ||
|  | 
 | ||
|  |             if orientation == 'top': | ||
|  |                 ax.xaxis.set_ticks_position('bottom') | ||
|  |             else: | ||
|  |                 ax.xaxis.set_ticks_position('top') | ||
|  | 
 | ||
|  |             # Make the tick marks invisible because they cover up the links | ||
|  |             for line in ax.get_xticklines(): | ||
|  |                 line.set_visible(False) | ||
|  | 
 | ||
|  |             leaf_rot = (float(_get_tick_rotation(len(ivl))) | ||
|  |                         if (leaf_rotation is None) else leaf_rotation) | ||
|  |             leaf_font = (float(_get_tick_text_size(len(ivl))) | ||
|  |                          if (leaf_font_size is None) else leaf_font_size) | ||
|  |             ax.set_xticklabels(ivl, rotation=leaf_rot, size=leaf_font) | ||
|  | 
 | ||
|  |     elif orientation in ('left', 'right'): | ||
|  |         if orientation == 'left': | ||
|  |             ax.set_xlim([dvw, 0]) | ||
|  |             ax.set_ylim([0, ivw]) | ||
|  |         else: | ||
|  |             ax.set_xlim([0, dvw]) | ||
|  |             ax.set_ylim([0, ivw]) | ||
|  | 
 | ||
|  |         xlines = dcoords | ||
|  |         ylines = icoords | ||
|  |         if no_labels: | ||
|  |             ax.set_yticks([]) | ||
|  |             ax.set_yticklabels([]) | ||
|  |         else: | ||
|  |             ax.set_yticks(iv_ticks) | ||
|  | 
 | ||
|  |             if orientation == 'left': | ||
|  |                 ax.yaxis.set_ticks_position('right') | ||
|  |             else: | ||
|  |                 ax.yaxis.set_ticks_position('left') | ||
|  | 
 | ||
|  |             # Make the tick marks invisible because they cover up the links | ||
|  |             for line in ax.get_yticklines(): | ||
|  |                 line.set_visible(False) | ||
|  | 
 | ||
|  |             leaf_font = (float(_get_tick_text_size(len(ivl))) | ||
|  |                          if (leaf_font_size is None) else leaf_font_size) | ||
|  | 
 | ||
|  |             if leaf_rotation is not None: | ||
|  |                 ax.set_yticklabels(ivl, rotation=leaf_rotation, size=leaf_font) | ||
|  |             else: | ||
|  |                 ax.set_yticklabels(ivl, size=leaf_font) | ||
|  | 
 | ||
|  |     # Let's use collections instead. This way there is a separate legend item | ||
|  |     # for each tree grouping, rather than stupidly one for each line segment. | ||
|  |     colors_used = _remove_dups(color_list) | ||
|  |     color_to_lines = {} | ||
|  |     for color in colors_used: | ||
|  |         color_to_lines[color] = [] | ||
|  |     for (xline, yline, color) in zip(xlines, ylines, color_list): | ||
|  |         color_to_lines[color].append(list(zip(xline, yline))) | ||
|  | 
 | ||
|  |     colors_to_collections = {} | ||
|  |     # Construct the collections. | ||
|  |     for color in colors_used: | ||
|  |         coll = matplotlib.collections.LineCollection(color_to_lines[color], | ||
|  |                                                      colors=(color,)) | ||
|  |         colors_to_collections[color] = coll | ||
|  | 
 | ||
|  |     # Add all the groupings below the color threshold. | ||
|  |     for color in colors_used: | ||
|  |         if color != above_threshold_color: | ||
|  |             ax.add_collection(colors_to_collections[color]) | ||
|  |     # If there's a grouping of links above the color threshold, it goes last. | ||
|  |     if above_threshold_color in colors_to_collections: | ||
|  |         ax.add_collection(colors_to_collections[above_threshold_color]) | ||
|  | 
 | ||
|  |     if contraction_marks is not None: | ||
|  |         Ellipse = matplotlib.patches.Ellipse | ||
|  |         for (x, y) in contraction_marks: | ||
|  |             if orientation in ('left', 'right'): | ||
|  |                 e = Ellipse((y, x), width=dvw / 100, height=1.0) | ||
|  |             else: | ||
|  |                 e = Ellipse((x, y), width=1.0, height=dvw / 100) | ||
|  |             ax.add_artist(e) | ||
|  |             e.set_clip_box(ax.bbox) | ||
|  |             e.set_alpha(0.5) | ||
|  |             e.set_facecolor('k') | ||
|  | 
 | ||
|  |     if trigger_redraw: | ||
|  |         matplotlib.pylab.draw_if_interactive() | ||
|  | 
 | ||
|  | 
 | ||
|  | # C0  is used for above threshold color | ||
|  | _link_line_colors_default = ('C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9') | ||
|  | _link_line_colors = list(_link_line_colors_default) | ||
|  | 
 | ||
|  | 
 | ||
|  | def set_link_color_palette(palette): | ||
|  |     """
 | ||
|  |     Set list of matplotlib color codes for use by dendrogram. | ||
|  | 
 | ||
|  |     Note that this palette is global (i.e., setting it once changes the colors | ||
|  |     for all subsequent calls to `dendrogram`) and that it affects only the | ||
|  |     the colors below ``color_threshold``. | ||
|  | 
 | ||
|  |     Note that `dendrogram` also accepts a custom coloring function through its | ||
|  |     ``link_color_func`` keyword, which is more flexible and non-global. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     palette : list of str or None | ||
|  |         A list of matplotlib color codes.  The order of the color codes is the | ||
|  |         order in which the colors are cycled through when color thresholding in | ||
|  |         the dendrogram. | ||
|  | 
 | ||
|  |         If ``None``, resets the palette to its default (which are matplotlib | ||
|  |         default colors C1 to C9). | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     None | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     dendrogram | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     Ability to reset the palette with ``None`` added in SciPy 0.17.0. | ||
|  | 
 | ||
|  |     Thread safety: using this function in a multi-threaded fashion may | ||
|  |     result in `dendrogram` producing plots with unexpected colors. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> import numpy as np | ||
|  |     >>> from scipy.cluster import hierarchy | ||
|  |     >>> ytdist = np.array([662., 877., 255., 412., 996., 295., 468., 268., | ||
|  |     ...                    400., 754., 564., 138., 219., 869., 669.]) | ||
|  |     >>> Z = hierarchy.linkage(ytdist, 'single') | ||
|  |     >>> dn = hierarchy.dendrogram(Z, no_plot=True) | ||
|  |     >>> dn['color_list'] | ||
|  |     ['C1', 'C0', 'C0', 'C0', 'C0'] | ||
|  |     >>> hierarchy.set_link_color_palette(['c', 'm', 'y', 'k']) | ||
|  |     >>> dn = hierarchy.dendrogram(Z, no_plot=True, above_threshold_color='b') | ||
|  |     >>> dn['color_list'] | ||
|  |     ['c', 'b', 'b', 'b', 'b'] | ||
|  |     >>> dn = hierarchy.dendrogram(Z, no_plot=True, color_threshold=267, | ||
|  |     ...                           above_threshold_color='k') | ||
|  |     >>> dn['color_list'] | ||
|  |     ['c', 'm', 'm', 'k', 'k'] | ||
|  | 
 | ||
|  |     Now, reset the color palette to its default: | ||
|  | 
 | ||
|  |     >>> hierarchy.set_link_color_palette(None) | ||
|  | 
 | ||
|  |     """
 | ||
|  |     if palette is None: | ||
|  |         # reset to its default | ||
|  |         palette = _link_line_colors_default | ||
|  |     elif not isinstance(palette, list | tuple): | ||
|  |         raise TypeError("palette must be a list or tuple") | ||
|  |     _ptypes = [isinstance(p, str) for p in palette] | ||
|  | 
 | ||
|  |     if False in _ptypes: | ||
|  |         raise TypeError("all palette list elements must be color strings") | ||
|  | 
 | ||
|  |     global _link_line_colors | ||
|  |     _link_line_colors = palette | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(cpu_only=True, jax_jit=False, allow_dask_compute=True) | ||
|  | def dendrogram(Z, p=30, truncate_mode=None, color_threshold=None, | ||
|  |                get_leaves=True, orientation='top', labels=None, | ||
|  |                count_sort=False, distance_sort=False, show_leaf_counts=True, | ||
|  |                no_plot=False, no_labels=False, leaf_font_size=None, | ||
|  |                leaf_rotation=None, leaf_label_func=None, | ||
|  |                show_contracted=False, link_color_func=None, ax=None, | ||
|  |                above_threshold_color='C0'): | ||
|  |     """
 | ||
|  |     Plot the hierarchical clustering as a dendrogram. | ||
|  | 
 | ||
|  |     The dendrogram illustrates how each cluster is | ||
|  |     composed by drawing a U-shaped link between a non-singleton | ||
|  |     cluster and its children. The top of the U-link indicates a | ||
|  |     cluster merge. The two legs of the U-link indicate which clusters | ||
|  |     were merged. The length of the two legs of the U-link represents | ||
|  |     the distance between the child clusters. It is also the | ||
|  |     cophenetic distance between original observations in the two | ||
|  |     children clusters. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The linkage matrix encoding the hierarchical clustering to | ||
|  |         render as a dendrogram. See the ``linkage`` function for more | ||
|  |         information on the format of ``Z``. | ||
|  |     p : int, optional | ||
|  |         The ``p`` parameter for ``truncate_mode``. | ||
|  |     truncate_mode : str, optional | ||
|  |         The dendrogram can be hard to read when the original | ||
|  |         observation matrix from which the linkage is derived is | ||
|  |         large. Truncation is used to condense the dendrogram. There | ||
|  |         are several modes: | ||
|  | 
 | ||
|  |         ``None`` | ||
|  |           No truncation is performed (default). | ||
|  |           Note: ``'none'`` is an alias for ``None`` that's kept for | ||
|  |           backward compatibility. | ||
|  | 
 | ||
|  |         ``'lastp'`` | ||
|  |           The last ``p`` non-singleton clusters formed in the linkage are the | ||
|  |           only non-leaf nodes in the linkage; they correspond to rows | ||
|  |           ``Z[n-p-2:end]`` in ``Z``. All other non-singleton clusters are | ||
|  |           contracted into leaf nodes. | ||
|  | 
 | ||
|  |         ``'level'`` | ||
|  |           No more than ``p`` levels of the dendrogram tree are displayed. | ||
|  |           A "level" includes all nodes with ``p`` merges from the final merge. | ||
|  | 
 | ||
|  |           Note: ``'mtica'`` is an alias for ``'level'`` that's kept for | ||
|  |           backward compatibility. | ||
|  | 
 | ||
|  |     color_threshold : double, optional | ||
|  |         For brevity, let :math:`t` be the ``color_threshold``. | ||
|  |         Colors all the descendent links below a cluster node | ||
|  |         :math:`k` the same color if :math:`k` is the first node below | ||
|  |         the cut threshold :math:`t`. All links connecting nodes with | ||
|  |         distances greater than or equal to the threshold are colored | ||
|  |         with de default matplotlib color ``'C0'``. If :math:`t` is less | ||
|  |         than or equal to zero, all nodes are colored ``'C0'``. | ||
|  |         If ``color_threshold`` is None or 'default', | ||
|  |         corresponding with MATLAB(TM) behavior, the threshold is set to | ||
|  |         ``0.7*max(Z[:,2])``. | ||
|  | 
 | ||
|  |     get_leaves : bool, optional | ||
|  |         Includes a list ``R['leaves']=H`` in the result | ||
|  |         dictionary. For each :math:`i`, ``H[i] == j``, cluster node | ||
|  |         ``j`` appears in position ``i`` in the left-to-right traversal | ||
|  |         of the leaves, where :math:`j < 2n-1` and :math:`i < n`. | ||
|  |     orientation : str, optional | ||
|  |         The direction to plot the dendrogram, which can be any | ||
|  |         of the following strings: | ||
|  | 
 | ||
|  |         ``'top'`` | ||
|  |           Plots the root at the top, and plot descendent links going downwards. | ||
|  |           (default). | ||
|  | 
 | ||
|  |         ``'bottom'`` | ||
|  |           Plots the root at the bottom, and plot descendent links going | ||
|  |           upwards. | ||
|  | 
 | ||
|  |         ``'left'`` | ||
|  |           Plots the root at the left, and plot descendent links going right. | ||
|  | 
 | ||
|  |         ``'right'`` | ||
|  |           Plots the root at the right, and plot descendent links going left. | ||
|  | 
 | ||
|  |     labels : ndarray, optional | ||
|  |         By default, ``labels`` is None so the index of the original observation | ||
|  |         is used to label the leaf nodes.  Otherwise, this is an :math:`n`-sized | ||
|  |         sequence, with ``n == Z.shape[0] + 1``. The ``labels[i]`` value is the | ||
|  |         text to put under the :math:`i` th leaf node only if it corresponds to | ||
|  |         an original observation and not a non-singleton cluster. | ||
|  |     count_sort : str or bool, optional | ||
|  |         For each node n, the order (visually, from left-to-right) n's | ||
|  |         two descendent links are plotted is determined by this | ||
|  |         parameter, which can be any of the following values: | ||
|  | 
 | ||
|  |         ``False`` | ||
|  |           Nothing is done. | ||
|  | 
 | ||
|  |         ``'ascending'`` or ``True`` | ||
|  |           The child with the minimum number of original objects in its cluster | ||
|  |           is plotted first. | ||
|  | 
 | ||
|  |         ``'descending'`` | ||
|  |           The child with the maximum number of original objects in its cluster | ||
|  |           is plotted first. | ||
|  | 
 | ||
|  |         Note, ``distance_sort`` and ``count_sort`` cannot both be True. | ||
|  |     distance_sort : str or bool, optional | ||
|  |         For each node n, the order (visually, from left-to-right) n's | ||
|  |         two descendent links are plotted is determined by this | ||
|  |         parameter, which can be any of the following values: | ||
|  | 
 | ||
|  |         ``False`` | ||
|  |           Nothing is done. | ||
|  | 
 | ||
|  |         ``'ascending'`` or ``True`` | ||
|  |           The child with the minimum distance between its direct descendents is | ||
|  |           plotted first. | ||
|  | 
 | ||
|  |         ``'descending'`` | ||
|  |           The child with the maximum distance between its direct descendents is | ||
|  |           plotted first. | ||
|  | 
 | ||
|  |         Note ``distance_sort`` and ``count_sort`` cannot both be True. | ||
|  |     show_leaf_counts : bool, optional | ||
|  |          When True, leaf nodes representing :math:`k>1` original | ||
|  |          observation are labeled with the number of observations they | ||
|  |          contain in parentheses. | ||
|  |     no_plot : bool, optional | ||
|  |         When True, the final rendering is not performed. This is | ||
|  |         useful if only the data structures computed for the rendering | ||
|  |         are needed or if matplotlib is not available. | ||
|  |     no_labels : bool, optional | ||
|  |         When True, no labels appear next to the leaf nodes in the | ||
|  |         rendering of the dendrogram. | ||
|  |     leaf_rotation : double, optional | ||
|  |         Specifies the angle (in degrees) to rotate the leaf | ||
|  |         labels. When unspecified, the rotation is based on the number of | ||
|  |         nodes in the dendrogram (default is 0). | ||
|  |     leaf_font_size : int, optional | ||
|  |         Specifies the font size (in points) of the leaf labels. When | ||
|  |         unspecified, the size based on the number of nodes in the | ||
|  |         dendrogram. | ||
|  |     leaf_label_func : lambda or function, optional | ||
|  |         When ``leaf_label_func`` is a callable function, for each | ||
|  |         leaf with cluster index :math:`k < 2n-1`. The function | ||
|  |         is expected to return a string with the label for the | ||
|  |         leaf. | ||
|  | 
 | ||
|  |         Indices :math:`k < n` correspond to original observations | ||
|  |         while indices :math:`k \\geq n` correspond to non-singleton | ||
|  |         clusters. | ||
|  | 
 | ||
|  |         For example, to label singletons with their node id and | ||
|  |         non-singletons with their id, count, and inconsistency | ||
|  |         coefficient, simply do:: | ||
|  | 
 | ||
|  |             # First define the leaf label function. | ||
|  |             def llf(id): | ||
|  |                 if id < n: | ||
|  |                     return str(id) | ||
|  |                 else: | ||
|  |                     return '[%d %d %1.2f]' % (id, count, R[n-id,3]) | ||
|  | 
 | ||
|  |             # The text for the leaf nodes is going to be big so force | ||
|  |             # a rotation of 90 degrees. | ||
|  |             dendrogram(Z, leaf_label_func=llf, leaf_rotation=90) | ||
|  | 
 | ||
|  |             # leaf_label_func can also be used together with ``truncate_mode``, | ||
|  |             # in which case you will get your leaves labeled after truncation: | ||
|  |             dendrogram(Z, leaf_label_func=llf, leaf_rotation=90, | ||
|  |                        truncate_mode='level', p=2) | ||
|  | 
 | ||
|  |     show_contracted : bool, optional | ||
|  |         When True the heights of non-singleton nodes contracted | ||
|  |         into a leaf node are plotted as crosses along the link | ||
|  |         connecting that leaf node.  This really is only useful when | ||
|  |         truncation is used (see ``truncate_mode`` parameter). | ||
|  |     link_color_func : callable, optional | ||
|  |         If given, `link_color_function` is called with each non-singleton id | ||
|  |         corresponding to each U-shaped link it will paint. The function is | ||
|  |         expected to return the color to paint the link, encoded as a matplotlib | ||
|  |         color string code. For example:: | ||
|  | 
 | ||
|  |             dendrogram(Z, link_color_func=lambda k: colors[k]) | ||
|  | 
 | ||
|  |         colors the direct links below each untruncated non-singleton node | ||
|  |         ``k`` using ``colors[k]``. | ||
|  |     ax : matplotlib Axes instance, optional | ||
|  |         If None and `no_plot` is not True, the dendrogram will be plotted | ||
|  |         on the current axes.  Otherwise if `no_plot` is not True the | ||
|  |         dendrogram will be plotted on the given ``Axes`` instance. This can be | ||
|  |         useful if the dendrogram is part of a more complex figure. | ||
|  |     above_threshold_color : str, optional | ||
|  |         This matplotlib color string sets the color of the links above the | ||
|  |         color_threshold. The default is ``'C0'``. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     R : dict | ||
|  |         A dictionary of data structures computed to render the | ||
|  |         dendrogram. Its has the following keys: | ||
|  | 
 | ||
|  |         ``'color_list'`` | ||
|  |           A list of color names. The k'th element represents the color of the | ||
|  |           k'th link. | ||
|  | 
 | ||
|  |         ``'icoord'`` and ``'dcoord'`` | ||
|  |           Each of them is a list of lists. Let ``icoord = [I1, I2, ..., Ip]`` | ||
|  |           where ``Ik = [xk1, xk2, xk3, xk4]`` and ``dcoord = [D1, D2, ..., Dp]`` | ||
|  |           where ``Dk = [yk1, yk2, yk3, yk4]``, then the k'th link painted is | ||
|  |           ``(xk1, yk1)`` - ``(xk2, yk2)`` - ``(xk3, yk3)`` - ``(xk4, yk4)``. | ||
|  | 
 | ||
|  |         ``'ivl'`` | ||
|  |           A list of labels corresponding to the leaf nodes. | ||
|  | 
 | ||
|  |         ``'leaves'`` | ||
|  |           For each i, ``H[i] == j``, cluster node ``j`` appears in position | ||
|  |           ``i`` in the left-to-right traversal of the leaves, where | ||
|  |           :math:`j < 2n-1` and :math:`i < n`. If ``j`` is less than ``n``, the | ||
|  |           ``i``-th leaf node corresponds to an original observation. | ||
|  |           Otherwise, it corresponds to a non-singleton cluster. | ||
|  | 
 | ||
|  |         ``'leaves_color_list'`` | ||
|  |           A list of color names. The k'th element represents the color of the | ||
|  |           k'th leaf. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage, set_link_color_palette | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     It is expected that the distances in ``Z[:,2]`` be monotonic, otherwise | ||
|  |     crossings appear in the dendrogram. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> import numpy as np | ||
|  |     >>> from scipy.cluster import hierarchy | ||
|  |     >>> import matplotlib.pyplot as plt | ||
|  | 
 | ||
|  |     A very basic example: | ||
|  | 
 | ||
|  |     >>> ytdist = np.array([662., 877., 255., 412., 996., 295., 468., 268., | ||
|  |     ...                    400., 754., 564., 138., 219., 869., 669.]) | ||
|  |     >>> Z = hierarchy.linkage(ytdist, 'single') | ||
|  |     >>> plt.figure() | ||
|  |     >>> dn = hierarchy.dendrogram(Z) | ||
|  | 
 | ||
|  |     Now, plot in given axes, improve the color scheme and use both vertical and | ||
|  |     horizontal orientations: | ||
|  | 
 | ||
|  |     >>> hierarchy.set_link_color_palette(['m', 'c', 'y', 'k']) | ||
|  |     >>> fig, axes = plt.subplots(1, 2, figsize=(8, 3)) | ||
|  |     >>> dn1 = hierarchy.dendrogram(Z, ax=axes[0], above_threshold_color='y', | ||
|  |     ...                            orientation='top') | ||
|  |     >>> dn2 = hierarchy.dendrogram(Z, ax=axes[1], | ||
|  |     ...                            above_threshold_color='#bcbddc', | ||
|  |     ...                            orientation='right') | ||
|  |     >>> hierarchy.set_link_color_palette(None)  # reset to default after use | ||
|  |     >>> plt.show() | ||
|  | 
 | ||
|  |     """
 | ||
|  |     # This feature was thought about but never implemented (still useful?): | ||
|  |     # | ||
|  |     #         ... = dendrogram(..., leaves_order=None) | ||
|  |     # | ||
|  |     #         Plots the leaves in the order specified by a vector of | ||
|  |     #         original observation indices. If the vector contains duplicates | ||
|  |     #         or results in a crossing, an exception will be thrown. Passing | ||
|  |     #         None orders leaf nodes based on the order they appear in the | ||
|  |     #         pre-order traversal. | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, order='C', xp=xp) | ||
|  | 
 | ||
|  |     if orientation not in ["top", "left", "bottom", "right"]: | ||
|  |         raise ValueError("orientation must be one of 'top', 'left', " | ||
|  |                          "'bottom', or 'right'") | ||
|  | 
 | ||
|  |     if labels is not None: | ||
|  |         try: | ||
|  |             len_labels = len(labels) | ||
|  |         except (TypeError, AttributeError): | ||
|  |             len_labels = labels.shape[0] | ||
|  |         if Z.shape[0] + 1 != len_labels: | ||
|  |             raise ValueError("Dimensions of Z and labels must be consistent.") | ||
|  | 
 | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', materialize=True, xp=xp) | ||
|  |     Zs = Z.shape | ||
|  |     n = Zs[0] + 1 | ||
|  |     if isinstance(p, int | float): | ||
|  |         p = int(p) | ||
|  |     else: | ||
|  |         raise TypeError('The second argument must be a number') | ||
|  | 
 | ||
|  |     if truncate_mode not in ('lastp', 'mtica', 'level', 'none', None): | ||
|  |         # 'mtica' is kept working for backwards compat. | ||
|  |         raise ValueError('Invalid truncation mode.') | ||
|  | 
 | ||
|  |     if truncate_mode == 'lastp': | ||
|  |         if p > n or p == 0: | ||
|  |             p = n | ||
|  | 
 | ||
|  |     if truncate_mode == 'mtica': | ||
|  |         # 'mtica' is an alias | ||
|  |         truncate_mode = 'level' | ||
|  | 
 | ||
|  |     if truncate_mode == 'level': | ||
|  |         if p <= 0: | ||
|  |             p = np.inf | ||
|  | 
 | ||
|  |     if get_leaves: | ||
|  |         lvs = [] | ||
|  |     else: | ||
|  |         lvs = None | ||
|  | 
 | ||
|  |     icoord_list = [] | ||
|  |     dcoord_list = [] | ||
|  |     color_list = [] | ||
|  |     current_color = [0] | ||
|  |     currently_below_threshold = [False] | ||
|  |     ivl = []  # list of leaves | ||
|  | 
 | ||
|  |     if color_threshold is None or (isinstance(color_threshold, str) and | ||
|  |                                    color_threshold == 'default'): | ||
|  |         color_threshold = xp.max(Z[:, 2]) * 0.7 | ||
|  | 
 | ||
|  |     R = {'icoord': icoord_list, 'dcoord': dcoord_list, 'ivl': ivl, | ||
|  |          'leaves': lvs, 'color_list': color_list} | ||
|  | 
 | ||
|  |     # Empty list will be filled in _dendrogram_calculate_info | ||
|  |     contraction_marks = [] if show_contracted else None | ||
|  | 
 | ||
|  |     _dendrogram_calculate_info( | ||
|  |         Z=Z, p=p, | ||
|  |         truncate_mode=truncate_mode, | ||
|  |         color_threshold=color_threshold, | ||
|  |         get_leaves=get_leaves, | ||
|  |         orientation=orientation, | ||
|  |         labels=labels, | ||
|  |         count_sort=count_sort, | ||
|  |         distance_sort=distance_sort, | ||
|  |         show_leaf_counts=show_leaf_counts, | ||
|  |         i=2*n - 2, | ||
|  |         iv=0.0, | ||
|  |         ivl=ivl, | ||
|  |         n=n, | ||
|  |         icoord_list=icoord_list, | ||
|  |         dcoord_list=dcoord_list, | ||
|  |         lvs=lvs, | ||
|  |         current_color=current_color, | ||
|  |         color_list=color_list, | ||
|  |         currently_below_threshold=currently_below_threshold, | ||
|  |         leaf_label_func=leaf_label_func, | ||
|  |         contraction_marks=contraction_marks, | ||
|  |         link_color_func=link_color_func, | ||
|  |         above_threshold_color=above_threshold_color) | ||
|  | 
 | ||
|  |     if not no_plot: | ||
|  |         mh = xp.max(Z[:, 2]) | ||
|  |         _plot_dendrogram(icoord_list, dcoord_list, ivl, p, n, mh, orientation, | ||
|  |                          no_labels, color_list, | ||
|  |                          leaf_font_size=leaf_font_size, | ||
|  |                          leaf_rotation=leaf_rotation, | ||
|  |                          contraction_marks=contraction_marks, | ||
|  |                          ax=ax, | ||
|  |                          above_threshold_color=above_threshold_color) | ||
|  | 
 | ||
|  |     R["leaves_color_list"] = _get_leaves_color_list(R) | ||
|  | 
 | ||
|  |     return R | ||
|  | 
 | ||
|  | 
 | ||
|  | def _get_leaves_color_list(R): | ||
|  |     leaves_color_list = [None] * len(R['leaves']) | ||
|  |     for link_x, link_y, link_color in zip(R['icoord'], | ||
|  |                                           R['dcoord'], | ||
|  |                                           R['color_list']): | ||
|  |         for (xi, yi) in zip(link_x, link_y): | ||
|  |             if yi == 0.0 and (xi % 5 == 0 and xi % 2 == 1): | ||
|  |                 # if yi is 0.0 and xi is divisible by 5 and odd, | ||
|  |                 # the point is a leaf | ||
|  |                 # xi of leaves are      5, 15, 25, 35, ... (see `iv_ticks`) | ||
|  |                 # index of leaves are   0,  1,  2,  3, ... as below | ||
|  |                 leaf_index = (int(xi) - 5) // 10 | ||
|  |                 # each leaf has a same color of its link. | ||
|  |                 leaves_color_list[leaf_index] = link_color | ||
|  |     return leaves_color_list | ||
|  | 
 | ||
|  | 
 | ||
|  | def _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, | ||
|  |                                 i, labels): | ||
|  |     # If the leaf id structure is not None and is a list then the caller | ||
|  |     # to dendrogram has indicated that cluster id's corresponding to the | ||
|  |     # leaf nodes should be recorded. | ||
|  | 
 | ||
|  |     if lvs is not None: | ||
|  |         lvs.append(int(i)) | ||
|  | 
 | ||
|  |     # If leaf node labels are to be displayed... | ||
|  |     if ivl is not None: | ||
|  |         # If a leaf_label_func has been provided, the label comes from the | ||
|  |         # string returned from the leaf_label_func, which is a function | ||
|  |         # passed to dendrogram. | ||
|  |         if leaf_label_func: | ||
|  |             ivl.append(leaf_label_func(int(i))) | ||
|  |         else: | ||
|  |             # Otherwise, if the dendrogram caller has passed a labels list | ||
|  |             # for the leaf nodes, use it. | ||
|  |             if labels is not None: | ||
|  |                 ivl.append(labels[int(i - n)]) | ||
|  |             else: | ||
|  |                 # Otherwise, use the id as the label for the leaf.x | ||
|  |                 ivl.append(str(int(i))) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, leaf_label_func, | ||
|  |                                    i, labels, show_leaf_counts): | ||
|  |     # If the leaf id structure is not None and is a list then the caller | ||
|  |     # to dendrogram has indicated that cluster id's corresponding to the | ||
|  |     # leaf nodes should be recorded. | ||
|  | 
 | ||
|  |     if lvs is not None: | ||
|  |         lvs.append(int(i)) | ||
|  |     if ivl is not None: | ||
|  |         if leaf_label_func: | ||
|  |             ivl.append(leaf_label_func(int(i))) | ||
|  |         else: | ||
|  |             if show_leaf_counts: | ||
|  |                 ivl.append("(" + str(np.asarray(Z[i - n, 3], dtype=np.int64)) + ")") | ||
|  |             else: | ||
|  |                 ivl.append("") | ||
|  | 
 | ||
|  | 
 | ||
|  | def _append_contraction_marks(Z, iv, i, n, contraction_marks, xp): | ||
|  |     _append_contraction_marks_sub(Z, iv, int_floor(Z[i - n, 0], xp), | ||
|  |                                   n, contraction_marks, xp) | ||
|  |     _append_contraction_marks_sub(Z, iv, int_floor(Z[i - n, 1], xp), | ||
|  |                                   n, contraction_marks, xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _append_contraction_marks_sub(Z, iv, i, n, contraction_marks, xp): | ||
|  |     if i >= n: | ||
|  |         contraction_marks.append((iv, Z[i - n, 2])) | ||
|  |         _append_contraction_marks_sub(Z, iv, int_floor(Z[i - n, 0], xp), | ||
|  |                                       n, contraction_marks, xp) | ||
|  |         _append_contraction_marks_sub(Z, iv, int_floor(Z[i - n, 1], xp), | ||
|  |                                       n, contraction_marks, xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _dendrogram_calculate_info(Z, p, truncate_mode, | ||
|  |                                color_threshold=np.inf, get_leaves=True, | ||
|  |                                orientation='top', labels=None, | ||
|  |                                count_sort=False, distance_sort=False, | ||
|  |                                show_leaf_counts=False, i=-1, iv=0.0, | ||
|  |                                ivl=None, n=0, icoord_list=None, dcoord_list=None, | ||
|  |                                lvs=None, mhr=False, | ||
|  |                                current_color=None, color_list=None, | ||
|  |                                currently_below_threshold=None, | ||
|  |                                leaf_label_func=None, level=0, | ||
|  |                                contraction_marks=None, | ||
|  |                                link_color_func=None, | ||
|  |                                above_threshold_color='C0'): | ||
|  |     """
 | ||
|  |     Calculate the endpoints of the links as well as the labels for the | ||
|  |     the dendrogram rooted at the node with index i. iv is the independent | ||
|  |     variable value to plot the left-most leaf node below the root node i | ||
|  |     (if orientation='top', this would be the left-most x value where the | ||
|  |     plotting of this root node i and its descendents should begin). | ||
|  | 
 | ||
|  |     ivl is a list to store the labels of the leaf nodes. The leaf_label_func | ||
|  |     is called whenever ivl != None, labels == None, and | ||
|  |     leaf_label_func != None. When ivl != None and labels != None, the | ||
|  |     labels list is used only for labeling the leaf nodes. When | ||
|  |     ivl == None, no labels are generated for leaf nodes. | ||
|  | 
 | ||
|  |     When get_leaves==True, a list of leaves is built as they are visited | ||
|  |     in the dendrogram. | ||
|  | 
 | ||
|  |     Returns a tuple with l being the independent variable coordinate that | ||
|  |     corresponds to the midpoint of cluster to the left of cluster i if | ||
|  |     i is non-singleton, otherwise the independent coordinate of the leaf | ||
|  |     node if i is a leaf node. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     A tuple (left, w, h, md), where: | ||
|  |         * left is the independent variable coordinate of the center of the | ||
|  |           the U of the subtree | ||
|  | 
 | ||
|  |         * w is the amount of space used for the subtree (in independent | ||
|  |           variable units) | ||
|  | 
 | ||
|  |         * h is the height of the subtree in dependent variable units | ||
|  | 
 | ||
|  |         * md is the ``max(Z[*,2]``) for all nodes ``*`` below and including | ||
|  |           the target node. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     if n == 0: | ||
|  |         raise ValueError("Invalid singleton cluster count n.") | ||
|  | 
 | ||
|  |     if i == -1: | ||
|  |         raise ValueError("Invalid root cluster index i.") | ||
|  | 
 | ||
|  |     if truncate_mode == 'lastp': | ||
|  |         # If the node is a leaf node but corresponds to a non-singleton | ||
|  |         # cluster, its label is either the empty string or the number of | ||
|  |         # original observations belonging to cluster i. | ||
|  |         if 2*n - p > i >= n: | ||
|  |             d = Z[i - n, 2] | ||
|  |             _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, | ||
|  |                                            leaf_label_func, i, labels, | ||
|  |                                            show_leaf_counts) | ||
|  |             if contraction_marks is not None: | ||
|  |                 _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) | ||
|  |             return (iv + 5.0, 10.0, 0.0, d) | ||
|  |         elif i < n: | ||
|  |             _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, | ||
|  |                                         leaf_label_func, i, labels) | ||
|  |             return (iv + 5.0, 10.0, 0.0, 0.0) | ||
|  |     elif truncate_mode == 'level': | ||
|  |         if i > n and level > p: | ||
|  |             d = Z[i - n, 2] | ||
|  |             _append_nonsingleton_leaf_node(Z, p, n, level, lvs, ivl, | ||
|  |                                            leaf_label_func, i, labels, | ||
|  |                                            show_leaf_counts) | ||
|  |             if contraction_marks is not None: | ||
|  |                 _append_contraction_marks(Z, iv + 5.0, i, n, contraction_marks, xp) | ||
|  |             return (iv + 5.0, 10.0, 0.0, d) | ||
|  |         elif i < n: | ||
|  |             _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, | ||
|  |                                         leaf_label_func, i, labels) | ||
|  |             return (iv + 5.0, 10.0, 0.0, 0.0) | ||
|  | 
 | ||
|  |     # Otherwise, only truncate if we have a leaf node. | ||
|  |     # | ||
|  |     # Only place leaves if they correspond to original observations. | ||
|  |     if i < n: | ||
|  |         _append_singleton_leaf_node(Z, p, n, level, lvs, ivl, | ||
|  |                                     leaf_label_func, i, labels) | ||
|  |         return (iv + 5.0, 10.0, 0.0, 0.0) | ||
|  | 
 | ||
|  |     # !!! Otherwise, we don't have a leaf node, so work on plotting a | ||
|  |     # non-leaf node. | ||
|  |     # Actual indices of a and b | ||
|  |     aa = int_floor(Z[i - n, 0], xp) | ||
|  |     ab = int_floor(Z[i - n, 1], xp) | ||
|  |     if aa >= n: | ||
|  |         # The number of singletons below cluster a | ||
|  |         na = Z[aa - n, 3] | ||
|  |         # The distance between a's two direct children. | ||
|  |         da = Z[aa - n, 2] | ||
|  |     else: | ||
|  |         na = 1 | ||
|  |         da = 0.0 | ||
|  |     if ab >= n: | ||
|  |         nb = Z[ab - n, 3] | ||
|  |         db = Z[ab - n, 2] | ||
|  |     else: | ||
|  |         nb = 1 | ||
|  |         db = 0.0 | ||
|  | 
 | ||
|  |     if count_sort == 'ascending' or count_sort is True: | ||
|  |         # If a has a count greater than b, it and its descendents should | ||
|  |         # be drawn to the right. Otherwise, to the left. | ||
|  |         if na > nb: | ||
|  |             # The cluster index to draw to the left (ua) will be ab | ||
|  |             # and the one to draw to the right (ub) will be aa | ||
|  |             ua = ab | ||
|  |             ub = aa | ||
|  |         else: | ||
|  |             ua = aa | ||
|  |             ub = ab | ||
|  |     elif count_sort == 'descending': | ||
|  |         # If a has a count less than or equal to b, it and its | ||
|  |         # descendents should be drawn to the left. Otherwise, to | ||
|  |         # the right. | ||
|  |         if na > nb: | ||
|  |             ua = aa | ||
|  |             ub = ab | ||
|  |         else: | ||
|  |             ua = ab | ||
|  |             ub = aa | ||
|  |     elif distance_sort == 'ascending' or distance_sort is True: | ||
|  |         # If a has a distance greater than b, it and its descendents should | ||
|  |         # be drawn to the right. Otherwise, to the left. | ||
|  |         if da > db: | ||
|  |             ua = ab | ||
|  |             ub = aa | ||
|  |         else: | ||
|  |             ua = aa | ||
|  |             ub = ab | ||
|  |     elif distance_sort == 'descending': | ||
|  |         # If a has a distance less than or equal to b, it and its | ||
|  |         # descendents should be drawn to the left. Otherwise, to | ||
|  |         # the right. | ||
|  |         if da > db: | ||
|  |             ua = aa | ||
|  |             ub = ab | ||
|  |         else: | ||
|  |             ua = ab | ||
|  |             ub = aa | ||
|  |     else: | ||
|  |         ua = aa | ||
|  |         ub = ab | ||
|  | 
 | ||
|  |     # Updated iv variable and the amount of space used. | ||
|  |     (uiva, uwa, uah, uamd) = \ | ||
|  |         _dendrogram_calculate_info( | ||
|  |             Z=Z, p=p, | ||
|  |             truncate_mode=truncate_mode, | ||
|  |             color_threshold=color_threshold, | ||
|  |             get_leaves=get_leaves, | ||
|  |             orientation=orientation, | ||
|  |             labels=labels, | ||
|  |             count_sort=count_sort, | ||
|  |             distance_sort=distance_sort, | ||
|  |             show_leaf_counts=show_leaf_counts, | ||
|  |             i=ua, iv=iv, ivl=ivl, n=n, | ||
|  |             icoord_list=icoord_list, | ||
|  |             dcoord_list=dcoord_list, lvs=lvs, | ||
|  |             current_color=current_color, | ||
|  |             color_list=color_list, | ||
|  |             currently_below_threshold=currently_below_threshold, | ||
|  |             leaf_label_func=leaf_label_func, | ||
|  |             level=level + 1, contraction_marks=contraction_marks, | ||
|  |             link_color_func=link_color_func, | ||
|  |             above_threshold_color=above_threshold_color) | ||
|  | 
 | ||
|  |     h = Z[i - n, 2] | ||
|  |     if h >= color_threshold or color_threshold <= 0: | ||
|  |         c = above_threshold_color | ||
|  | 
 | ||
|  |         if currently_below_threshold[0]: | ||
|  |             current_color[0] = (current_color[0] + 1) % len(_link_line_colors) | ||
|  |         currently_below_threshold[0] = False | ||
|  |     else: | ||
|  |         currently_below_threshold[0] = True | ||
|  |         c = _link_line_colors[current_color[0]] | ||
|  | 
 | ||
|  |     (uivb, uwb, ubh, ubmd) = \ | ||
|  |         _dendrogram_calculate_info( | ||
|  |             Z=Z, p=p, | ||
|  |             truncate_mode=truncate_mode, | ||
|  |             color_threshold=color_threshold, | ||
|  |             get_leaves=get_leaves, | ||
|  |             orientation=orientation, | ||
|  |             labels=labels, | ||
|  |             count_sort=count_sort, | ||
|  |             distance_sort=distance_sort, | ||
|  |             show_leaf_counts=show_leaf_counts, | ||
|  |             i=ub, iv=iv + uwa, ivl=ivl, n=n, | ||
|  |             icoord_list=icoord_list, | ||
|  |             dcoord_list=dcoord_list, lvs=lvs, | ||
|  |             current_color=current_color, | ||
|  |             color_list=color_list, | ||
|  |             currently_below_threshold=currently_below_threshold, | ||
|  |             leaf_label_func=leaf_label_func, | ||
|  |             level=level + 1, contraction_marks=contraction_marks, | ||
|  |             link_color_func=link_color_func, | ||
|  |             above_threshold_color=above_threshold_color) | ||
|  | 
 | ||
|  |     max_dist = max(uamd, ubmd, h) | ||
|  | 
 | ||
|  |     icoord_list.append([uiva, uiva, uivb, uivb]) | ||
|  |     dcoord_list.append([uah, h, h, ubh]) | ||
|  |     if link_color_func is not None: | ||
|  |         v = link_color_func(int(i)) | ||
|  |         if not isinstance(v, str): | ||
|  |             raise TypeError("link_color_func must return a matplotlib " | ||
|  |                             "color string!") | ||
|  |         color_list.append(v) | ||
|  |     else: | ||
|  |         color_list.append(c) | ||
|  | 
 | ||
|  |     return (((uiva + uivb) / 2), uwa + uwb, h, max_dist) | ||
|  | 
 | ||
|  | 
 | ||
|  | @xp_capabilities(cpu_only=True, | ||
|  |                  warnings=[("dask.array", "see notes"), ("jax.numpy", "see notes")]) | ||
|  | def is_isomorphic(T1, T2): | ||
|  |     """
 | ||
|  |     Determine if two different cluster assignments are equivalent. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     T1 : array_like | ||
|  |         An assignment of singleton cluster ids to flat cluster ids. | ||
|  |     T2 : array_like | ||
|  |         An assignment of singleton cluster ids to flat cluster ids. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     b : bool | ||
|  |         Whether the flat cluster assignments `T1` and `T2` are | ||
|  |         equivalent. | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     *Array API support (experimental):* If the input is a lazy Array (e.g. Dask | ||
|  |     or JAX), the return value will be a 0-dimensional bool Array. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  |     fcluster : for the creation of flat cluster assignments. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import fcluster, is_isomorphic | ||
|  |     >>> from scipy.cluster.hierarchy import single, complete | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     Two flat cluster assignments can be isomorphic if they represent the same | ||
|  |     cluster assignment, with different labels. | ||
|  | 
 | ||
|  |     For example, we can use the `scipy.cluster.hierarchy.single` method | ||
|  |     and flatten the output to four clusters: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = single(pdist(X)) | ||
|  |     >>> T = fcluster(Z, 1, criterion='distance') | ||
|  |     >>> T | ||
|  |     array([3, 3, 3, 4, 4, 4, 2, 2, 2, 1, 1, 1], dtype=int32) | ||
|  | 
 | ||
|  |     We can then do the same using the | ||
|  |     `scipy.cluster.hierarchy.complete`: method: | ||
|  | 
 | ||
|  |     >>> Z = complete(pdist(X)) | ||
|  |     >>> T_ = fcluster(Z, 1.5, criterion='distance') | ||
|  |     >>> T_ | ||
|  |     array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], dtype=int32) | ||
|  | 
 | ||
|  |     As we can see, in both cases we obtain four clusters and all the data | ||
|  |     points are distributed in the same way - the only thing that changes | ||
|  |     are the flat cluster labels (3 => 1, 4 =>2, 2 =>3 and 4 =>1), so both | ||
|  |     cluster assignments are isomorphic: | ||
|  | 
 | ||
|  |     >>> is_isomorphic(T, T_) | ||
|  |     True | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(T1, T2) | ||
|  |     T1 = _asarray(T1, xp=xp) | ||
|  |     T2 = _asarray(T2, xp=xp) | ||
|  | 
 | ||
|  |     if T1.ndim != 1: | ||
|  |         raise ValueError('T1 must be one-dimensional.') | ||
|  |     if T2.ndim != 1: | ||
|  |         raise ValueError('T2 must be one-dimensional.') | ||
|  |     if T1.shape != T2.shape: | ||
|  |         raise ValueError('T1 and T2 must have the same number of elements.') | ||
|  | 
 | ||
|  |     def py_is_isomorphic(T1, T2): | ||
|  |         d1 = {} | ||
|  |         d2 = {} | ||
|  |         for t1, t2 in zip(T1, T2): | ||
|  |             if t1 in d1: | ||
|  |                 if t2 not in d2: | ||
|  |                     return False | ||
|  |                 if d1[t1] != t2 or d2[t2] != t1: | ||
|  |                     return False | ||
|  |             elif t2 in d2: | ||
|  |                 return False | ||
|  |             else: | ||
|  |                 d1[t1] = t2 | ||
|  |                 d2[t2] = t1 | ||
|  |         return True | ||
|  | 
 | ||
|  |     res = xpx.lazy_apply(py_is_isomorphic, T1, T2, | ||
|  |                          shape=(), dtype=xp.bool, | ||
|  |                          as_numpy=True, xp=xp) | ||
|  |     return res if is_lazy_array(res) else bool(res) | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def maxdists(Z): | ||
|  |     """
 | ||
|  |     Return the maximum distance between any non-singleton cluster. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a matrix. See | ||
|  |         ``linkage`` for more information. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     maxdists : ndarray | ||
|  |         A ``(n-1)`` sized numpy array of doubles; ``MD[i]`` represents | ||
|  |         the maximum distance between any cluster (including | ||
|  |         singletons) below and including the node with index i. More | ||
|  |         specifically, ``MD[i] = Z[Q(i)-n, 2].max()`` where ``Q(i)`` is the | ||
|  |         set of all node indices below and including node i. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  |     is_monotonic : for testing for monotonicity of a linkage matrix. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import median, maxdists | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     Given a linkage matrix ``Z``, `scipy.cluster.hierarchy.maxdists` | ||
|  |     computes for each new cluster generated (i.e., for each row of the linkage | ||
|  |     matrix) what is the maximum distance between any two child clusters. | ||
|  | 
 | ||
|  |     Due to the nature of hierarchical clustering, in many cases this is going | ||
|  |     to be just the distance between the two child clusters that were merged | ||
|  |     to form the current one - that is, Z[:,2]. | ||
|  | 
 | ||
|  |     However, for non-monotonic cluster assignments such as | ||
|  |     `scipy.cluster.hierarchy.median` clustering this is not always the | ||
|  |     case: There may be cluster formations were the distance between the two | ||
|  |     clusters merged is smaller than the distance between their children. | ||
|  | 
 | ||
|  |     We can see this in an example: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = median(pdist(X)) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.11803399,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.11803399,  3.        ], | ||
|  |            [ 8.        , 15.        ,  1.11803399,  3.        ], | ||
|  |            [11.        , 14.        ,  1.11803399,  3.        ], | ||
|  |            [18.        , 19.        ,  3.        ,  6.        ], | ||
|  |            [16.        , 17.        ,  3.5       ,  6.        ], | ||
|  |            [20.        , 21.        ,  3.25      , 12.        ]]) | ||
|  |     >>> maxdists(Z) | ||
|  |     array([1.        , 1.        , 1.        , 1.        , 1.11803399, | ||
|  |            1.11803399, 1.11803399, 1.11803399, 3.        , 3.5       , | ||
|  |            3.5       ]) | ||
|  | 
 | ||
|  |     Note that while the distance between the two clusters merged when creating the | ||
|  |     last cluster is 3.25, there are two children (clusters 16 and 17) whose distance | ||
|  |     is larger (3.5). Thus, `scipy.cluster.hierarchy.maxdists` returns 3.5 in | ||
|  |     this case. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z) | ||
|  |     Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     def cy_maxdists(Z, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |         MD = np.zeros((Z.shape[0],)) | ||
|  |         _hierarchy.get_max_dist_for_each_cluster(Z, MD, Z.shape[0] + 1) | ||
|  |         return MD | ||
|  | 
 | ||
|  |     return xpx.lazy_apply(cy_maxdists, Z, validate=is_lazy_array(Z), | ||
|  |                           shape=(Z.shape[0], ), dtype=xp.float64, | ||
|  |                           as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def maxinconsts(Z, R): | ||
|  |     """
 | ||
|  |     Return the maximum inconsistency coefficient for each | ||
|  |     non-singleton cluster and its children. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a matrix. See | ||
|  |         `linkage` for more information. | ||
|  |     R : ndarray | ||
|  |         The inconsistency matrix. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     MI : ndarray | ||
|  |         A monotonic ``(n-1)``-sized numpy array of doubles. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  |     inconsistent : for the creation of a inconsistency matrix. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import median, inconsistent, maxinconsts | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     Given a data set ``X``, we can apply a clustering method to obtain a | ||
|  |     linkage matrix ``Z``. `scipy.cluster.hierarchy.inconsistent` can | ||
|  |     be also used to obtain the inconsistency matrix ``R`` associated to | ||
|  |     this clustering process: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = median(pdist(X)) | ||
|  |     >>> R = inconsistent(Z) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.11803399,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.11803399,  3.        ], | ||
|  |            [ 8.        , 15.        ,  1.11803399,  3.        ], | ||
|  |            [11.        , 14.        ,  1.11803399,  3.        ], | ||
|  |            [18.        , 19.        ,  3.        ,  6.        ], | ||
|  |            [16.        , 17.        ,  3.5       ,  6.        ], | ||
|  |            [20.        , 21.        ,  3.25      , 12.        ]]) | ||
|  |     >>> R | ||
|  |     array([[1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.74535599, 1.08655358, 3.        , 1.15470054], | ||
|  |            [1.91202266, 1.37522872, 3.        , 1.15470054], | ||
|  |            [3.25      , 0.25      , 3.        , 0.        ]]) | ||
|  | 
 | ||
|  |     Here, `scipy.cluster.hierarchy.maxinconsts` can be used to compute | ||
|  |     the maximum value of the inconsistency statistic (the last column of | ||
|  |     ``R``) for each non-singleton cluster and its children: | ||
|  | 
 | ||
|  |     >>> maxinconsts(Z, R) | ||
|  |     array([0.        , 0.        , 0.        , 0.        , 0.70710678, | ||
|  |            0.70710678, 0.70710678, 0.70710678, 1.15470054, 1.15470054, | ||
|  |            1.15470054]) | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z, R) | ||
|  |     Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) | ||
|  |     R = _asarray(R, order='C', dtype=xp.float64, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  |     _is_valid_im(R, throw=True, name='R', xp=xp) | ||
|  | 
 | ||
|  |     if Z.shape[0] != R.shape[0]: | ||
|  |         raise ValueError("The inconsistency matrix and linkage matrix each " | ||
|  |                          "have a different number of rows.") | ||
|  |      | ||
|  |     def cy_maxinconsts(Z, R, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |             _is_valid_im(R, throw=True, name='R', xp=np) | ||
|  |         n = Z.shape[0] + 1 | ||
|  |         MI = np.zeros((n - 1,)) | ||
|  |         _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MI, n, 3) | ||
|  |         return MI | ||
|  | 
 | ||
|  |     return xpx.lazy_apply(cy_maxinconsts, Z, R, validate=is_lazy_array(Z), | ||
|  |                           shape=(Z.shape[0], ), dtype=xp.float64, | ||
|  |                           as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | @lazy_cython | ||
|  | def maxRstat(Z, R, i): | ||
|  |     """
 | ||
|  |     Return the maximum statistic for each non-singleton cluster and its | ||
|  |     children. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : array_like | ||
|  |         The hierarchical clustering encoded as a matrix. See `linkage` for more | ||
|  |         information. | ||
|  |     R : array_like | ||
|  |         The inconsistency matrix. | ||
|  |     i : int | ||
|  |         The column of `R` to use as the statistic. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     MR : ndarray | ||
|  |         Calculates the maximum statistic for the i'th column of the | ||
|  |         inconsistency matrix `R` for each non-singleton cluster | ||
|  |         node. ``MR[j]`` is the maximum over ``R[Q(j)-n, i]``, where | ||
|  |         ``Q(j)`` the set of all node ids corresponding to nodes below | ||
|  |         and including ``j``. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     linkage : for a description of what a linkage matrix is. | ||
|  |     inconsistent : for the creation of a inconsistency matrix. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import median, inconsistent, maxRstat | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     Given a data set ``X``, we can apply a clustering method to obtain a | ||
|  |     linkage matrix ``Z``. `scipy.cluster.hierarchy.inconsistent` can | ||
|  |     be also used to obtain the inconsistency matrix ``R`` associated to | ||
|  |     this clustering process: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = median(pdist(X)) | ||
|  |     >>> R = inconsistent(Z) | ||
|  |     >>> R | ||
|  |     array([[1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.        , 0.        , 1.        , 0.        ], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.05901699, 0.08346263, 2.        , 0.70710678], | ||
|  |            [1.74535599, 1.08655358, 3.        , 1.15470054], | ||
|  |            [1.91202266, 1.37522872, 3.        , 1.15470054], | ||
|  |            [3.25      , 0.25      , 3.        , 0.        ]]) | ||
|  | 
 | ||
|  |     `scipy.cluster.hierarchy.maxRstat` can be used to compute | ||
|  |     the maximum value of each column of ``R``, for each non-singleton | ||
|  |     cluster and its children: | ||
|  | 
 | ||
|  |     >>> maxRstat(Z, R, 0) | ||
|  |     array([1.        , 1.        , 1.        , 1.        , 1.05901699, | ||
|  |            1.05901699, 1.05901699, 1.05901699, 1.74535599, 1.91202266, | ||
|  |            3.25      ]) | ||
|  |     >>> maxRstat(Z, R, 1) | ||
|  |     array([0.        , 0.        , 0.        , 0.        , 0.08346263, | ||
|  |            0.08346263, 0.08346263, 0.08346263, 1.08655358, 1.37522872, | ||
|  |            1.37522872]) | ||
|  |     >>> maxRstat(Z, R, 3) | ||
|  |     array([0.        , 0.        , 0.        , 0.        , 0.70710678, | ||
|  |            0.70710678, 0.70710678, 0.70710678, 1.15470054, 1.15470054, | ||
|  |            1.15470054]) | ||
|  | 
 | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z, R) | ||
|  |     Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) | ||
|  |     R = _asarray(R, order='C', dtype=xp.float64, xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  |     _is_valid_im(R, throw=True, name='R', xp=xp) | ||
|  | 
 | ||
|  |     if not isinstance(i, int): | ||
|  |         raise TypeError('The third argument must be an integer.') | ||
|  | 
 | ||
|  |     if i < 0 or i > 3: | ||
|  |         raise ValueError('i must be an integer between 0 and 3 inclusive.') | ||
|  | 
 | ||
|  |     if Z.shape[0] != R.shape[0]: | ||
|  |         raise ValueError("The inconsistency matrix and linkage matrix each " | ||
|  |                          "have a different number of rows.") | ||
|  | 
 | ||
|  |     def cy_maxRstat(Z, R, i, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |             _is_valid_im(R, throw=True, name='R', xp=np) | ||
|  |         MR = np.zeros((Z.shape[0],)) | ||
|  |         n = Z.shape[0] + 1 | ||
|  |         _hierarchy.get_max_Rfield_for_each_cluster(Z, R, MR, n, i) | ||
|  |         return MR | ||
|  | 
 | ||
|  |     return xpx.lazy_apply(cy_maxRstat, Z, R, i=i, validate=is_lazy_array(Z), | ||
|  |                           shape=(Z.shape[0], ), dtype=xp.float64, | ||
|  |                           as_numpy=True, xp=xp) | ||
|  | 
 | ||
|  | 
 | ||
|  | # Data-dependent output shape makes it impossible to use jax.jit | ||
|  | @xp_capabilities(cpu_only=True, reason="Cython code", jax_jit=False, | ||
|  |                  warnings=[("dask.array", "merges chunks")]) | ||
|  | def leaders(Z, T): | ||
|  |     """
 | ||
|  |     Return the root nodes in a hierarchical clustering. | ||
|  | 
 | ||
|  |     Returns the root nodes in a hierarchical clustering corresponding | ||
|  |     to a cut defined by a flat cluster assignment vector ``T``. See | ||
|  |     the ``fcluster`` function for more information on the format of ``T``. | ||
|  | 
 | ||
|  |     For each flat cluster :math:`j` of the :math:`k` flat clusters | ||
|  |     represented in the n-sized flat cluster assignment vector ``T``, | ||
|  |     this function finds the lowest cluster node :math:`i` in the linkage | ||
|  |     tree Z, such that: | ||
|  | 
 | ||
|  |       * leaf descendants belong only to flat cluster j | ||
|  |         (i.e., ``T[p]==j`` for all :math:`p` in :math:`S(i)`, where | ||
|  |         :math:`S(i)` is the set of leaf ids of descendant leaf nodes | ||
|  |         with cluster node :math:`i`) | ||
|  | 
 | ||
|  |       * there does not exist a leaf that is not a descendant with | ||
|  |         :math:`i` that also belongs to cluster :math:`j` | ||
|  |         (i.e., ``T[q]!=j`` for all :math:`q` not in :math:`S(i)`). If | ||
|  |         this condition is violated, ``T`` is not a valid cluster | ||
|  |         assignment vector, and an exception will be thrown. | ||
|  | 
 | ||
|  |     Parameters | ||
|  |     ---------- | ||
|  |     Z : ndarray | ||
|  |         The hierarchical clustering encoded as a matrix. See | ||
|  |         `linkage` for more information. | ||
|  |     T : ndarray | ||
|  |         The flat cluster assignment vector. | ||
|  | 
 | ||
|  |     Returns | ||
|  |     ------- | ||
|  |     L : ndarray | ||
|  |         The leader linkage node id's stored as a k-element 1-D array, | ||
|  |         where ``k`` is the number of flat clusters found in ``T``. | ||
|  | 
 | ||
|  |         ``L[j]=i`` is the linkage cluster node id that is the | ||
|  |         leader of flat cluster with id M[j]. If ``i < n``, ``i`` | ||
|  |         corresponds to an original observation, otherwise it | ||
|  |         corresponds to a non-singleton cluster. | ||
|  |     M : ndarray | ||
|  |         The leader linkage node id's stored as a k-element 1-D array, where | ||
|  |         ``k`` is the number of flat clusters found in ``T``. This allows the | ||
|  |         set of flat cluster ids to be any arbitrary set of ``k`` integers. | ||
|  | 
 | ||
|  |         For example: if ``L[3]=2`` and ``M[3]=8``, the flat cluster with | ||
|  |         id 8's leader is linkage node 2. | ||
|  | 
 | ||
|  |     See Also | ||
|  |     -------- | ||
|  |     fcluster : for the creation of flat cluster assignments. | ||
|  | 
 | ||
|  |     Examples | ||
|  |     -------- | ||
|  |     >>> from scipy.cluster.hierarchy import ward, fcluster, leaders | ||
|  |     >>> from scipy.spatial.distance import pdist | ||
|  | 
 | ||
|  |     Given a linkage matrix ``Z`` - obtained after apply a clustering method | ||
|  |     to a dataset ``X`` - and a flat cluster assignment array ``T``: | ||
|  | 
 | ||
|  |     >>> X = [[0, 0], [0, 1], [1, 0], | ||
|  |     ...      [0, 4], [0, 3], [1, 4], | ||
|  |     ...      [4, 0], [3, 0], [4, 1], | ||
|  |     ...      [4, 4], [3, 4], [4, 3]] | ||
|  | 
 | ||
|  |     >>> Z = ward(pdist(X)) | ||
|  |     >>> Z | ||
|  |     array([[ 0.        ,  1.        ,  1.        ,  2.        ], | ||
|  |            [ 3.        ,  4.        ,  1.        ,  2.        ], | ||
|  |            [ 6.        ,  7.        ,  1.        ,  2.        ], | ||
|  |            [ 9.        , 10.        ,  1.        ,  2.        ], | ||
|  |            [ 2.        , 12.        ,  1.29099445,  3.        ], | ||
|  |            [ 5.        , 13.        ,  1.29099445,  3.        ], | ||
|  |            [ 8.        , 14.        ,  1.29099445,  3.        ], | ||
|  |            [11.        , 15.        ,  1.29099445,  3.        ], | ||
|  |            [16.        , 17.        ,  5.77350269,  6.        ], | ||
|  |            [18.        , 19.        ,  5.77350269,  6.        ], | ||
|  |            [20.        , 21.        ,  8.16496581, 12.        ]]) | ||
|  | 
 | ||
|  |     >>> T = fcluster(Z, 3, criterion='distance') | ||
|  |     >>> T | ||
|  |     array([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4], dtype=int32) | ||
|  | 
 | ||
|  |     `scipy.cluster.hierarchy.leaders` returns the indices of the nodes | ||
|  |     in the dendrogram that are the leaders of each flat cluster: | ||
|  | 
 | ||
|  |     >>> L, M = leaders(Z, T) | ||
|  |     >>> L | ||
|  |     array([16, 17, 18, 19], dtype=int32) | ||
|  | 
 | ||
|  |     (remember that indices 0-11 point to the 12 data points in ``X``, | ||
|  |     whereas indices 12-22 point to the 11 rows of ``Z``) | ||
|  | 
 | ||
|  |     `scipy.cluster.hierarchy.leaders` also returns the indices of | ||
|  |     the flat clusters in ``T``: | ||
|  | 
 | ||
|  |     >>> M | ||
|  |     array([1, 2, 3, 4], dtype=int32) | ||
|  | 
 | ||
|  |     Notes | ||
|  |     ----- | ||
|  |     *Array API support (experimental):* This function returns arrays | ||
|  |     with data-dependent shape. In JAX, at the moment of writing this makes it | ||
|  |     impossible to execute it inside `@jax.jit`. | ||
|  |     """
 | ||
|  |     xp = array_namespace(Z, T) | ||
|  |     Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) | ||
|  |     T = _asarray(T, order='C', xp=xp) | ||
|  |     _is_valid_linkage(Z, throw=True, name='Z', xp=xp) | ||
|  | 
 | ||
|  |     if T.dtype != xp.int32: | ||
|  |         raise TypeError('T must be a 1-D array of dtype int32.') | ||
|  | 
 | ||
|  |     if T.shape[0] != Z.shape[0] + 1: | ||
|  |         raise ValueError('Mismatch: len(T)!=Z.shape[0] + 1.') | ||
|  | 
 | ||
|  |     n_obs = Z.shape[0] + 1 | ||
|  | 
 | ||
|  |     def cy_leaders(Z, T, validate): | ||
|  |         if validate: | ||
|  |             _is_valid_linkage(Z, throw=True, name='Z', xp=np) | ||
|  |         n_clusters = int(xpx.nunique(T)) | ||
|  |         L = np.zeros(n_clusters, dtype=np.int32) | ||
|  |         M = np.zeros(n_clusters, dtype=np.int32) | ||
|  |         s = _hierarchy.leaders(Z, T, L, M, n_clusters, n_obs) | ||
|  |         if s >= 0: | ||
|  |             raise ValueError('T is not a valid assignment vector. Error found ' | ||
|  |                              f'when examining linkage node {s} (< 2n-1).') | ||
|  |         return L, M | ||
|  | 
 | ||
|  |     return xpx.lazy_apply(cy_leaders, Z, T, validate=is_lazy_array(Z), | ||
|  |                           shape=((None,), (None, )), dtype=(xp.int32, xp.int32), | ||
|  |                           as_numpy=True, xp=xp) |