up follow livre

2025-08-30 18:14:14 +02:00 · 2025-08-30 18:14:14 +02:00 · 3a7a3849ae
commit 3a7a3849ae
parent b4b4398bb0
12242 changed files with 2564461 additions and 6914 deletions
--- a/venv/lib/python3.13/site-packages/scipy/datasets/init.py
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/init.py
@ -0,0 +1,90 @@
+"""
+================================
+Datasets (:mod:`scipy.datasets`)
+================================
+
+.. currentmodule:: scipy.datasets
+
+Dataset Methods
+===============
+
+.. autosummary::
+   :toctree: generated/
+
+   ascent
+   face
+   electrocardiogram
+
+Utility Methods
+===============
+
+.. autosummary::
+   :toctree: generated/
+
+   download_all    -- Download all the dataset files to specified path.
+   clear_cache     -- Clear cached dataset directory.
+
+
+Usage of Datasets
+=================
+
+SciPy dataset methods can be simply called as follows: ``'<dataset-name>()'``
+This downloads the dataset files over the network once, and saves the cache,
+before returning a `numpy.ndarray` object representing the dataset.
+
+Note that the return data structure and data type might be different for
+different dataset methods. For a more detailed example on usage, please look
+into the particular dataset method documentation above.
+
+
+How dataset retrieval and storage works
+=======================================
+
+SciPy dataset files are stored within individual GitHub repositories under the
+SciPy GitHub organization, following a naming convention as
+``'dataset-<name>'``, for example `scipy.datasets.face` files live at
+https://github.com/scipy/dataset-face.  The `scipy.datasets` submodule utilizes
+and depends on `Pooch <https://www.fatiando.org/pooch/latest/>`_, a Python
+package built to simplify fetching data files. Pooch uses these repos to
+retrieve the respective dataset files when calling the dataset function.
+
+A registry of all the datasets, essentially a mapping of filenames with their
+SHA256 hash and repo urls are maintained, which Pooch uses to handle and verify
+the downloads on function call. After downloading the dataset once, the files
+are saved in the system cache directory under ``'scipy-data'``.
+
+Dataset cache locations may vary on different platforms.
+
+For macOS::
+
+    '~/Library/Caches/scipy-data'
+
+For Linux and other Unix-like platforms::
+
+    '~/.cache/scipy-data'  # or the value of the XDG_CACHE_HOME env var, if defined
+
+For Windows::
+
+    'C:\\Users\\<user>\\AppData\\Local\\<AppAuthor>\\scipy-data\\Cache'
+
+
+In environments with constrained network connectivity for various security
+reasons or on systems without continuous internet connections, one may manually
+load the cache of the datasets by placing the contents of the dataset repo in
+the above mentioned cache directory to avoid fetching dataset errors without
+the internet connectivity.
+
+"""
+
+
+from ._fetchers import face, ascent, electrocardiogram
+from ._download_all import download_all
+from ._utils import clear_cache
+
+__all__ = ['ascent', 'electrocardiogram', 'face',
+           'download_all', 'clear_cache']
+
+
+from scipy._lib._testutils import PytestTester
+test = PytestTester(__name__)
+del PytestTester
--- a/venv/lib/python3.13/site-packages/scipy/datasets/pycache/init.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/pycache/init.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_download_all.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_download_all.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_fetchers.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_fetchers.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_registry.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_registry.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_utils.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/pycache/_utils.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/datasets/_download_all.py
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/_download_all.py
@ -0,0 +1,71 @@
+"""
+Platform independent script to download all the
+`scipy.datasets` module data files.
+This doesn't require a full scipy build.
+
+Run: python _download_all.py <download_dir>
+"""
+
+import argparse
+try:
+    import pooch
+except ImportError:
+    pooch = None
+
+
+if __package__ is None or __package__ == '':
+    # Running as python script, use absolute import
+    import _registry  # type: ignore
+else:
+    # Running as python module, use relative import
+    from . import _registry
+
+
+def download_all(path=None):
+    """
+    Utility method to download all the dataset files
+    for `scipy.datasets` module.
+
+    Parameters
+    ----------
+    path : str, optional
+        Directory path to download all the dataset files.
+        If None, default to the system cache_dir detected by pooch.
+    
+    Examples
+    --------
+    Download the datasets to the default cache location:
+
+    >>> from scipy import datasets
+    >>> datasets.download_all()
+
+    Download the datasets to the current directory:
+
+    >>> datasets.download_all(".")
+    
+    """
+    if pooch is None:
+        raise ImportError("Missing optional dependency 'pooch' required "
+                          "for scipy.datasets module. Please use pip or "
+                          "conda to install 'pooch'.")
+    if path is None:
+        path = pooch.os_cache('scipy-data')
+    # https://github.com/scipy/scipy/issues/21879
+    downloader = pooch.HTTPDownloader(headers={"User-Agent": "SciPy"})
+    for dataset_name, dataset_hash in _registry.registry.items():
+        pooch.retrieve(url=_registry.registry_urls[dataset_name],
+                       known_hash=dataset_hash,
+                       fname=dataset_name, path=path, downloader=downloader)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Download SciPy data files.')
+    parser.add_argument("path", nargs='?', type=str,
+                        default=pooch.os_cache('scipy-data'),
+                        help="Directory path to download all the data files.")
+    args = parser.parse_args()
+    download_all(args.path)
+
+
+if __name__ == "__main__":
+    main()
--- a/venv/lib/python3.13/site-packages/scipy/datasets/_fetchers.py
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/_fetchers.py
@ -0,0 +1,225 @@
+import sys
+
+from numpy import array, frombuffer, load
+from ._registry import registry, registry_urls
+
+try:
+    import pooch
+except ImportError:
+    pooch = None
+    data_fetcher = None
+else:
+    data_fetcher = pooch.create(
+        # Use the default cache folder for the operating system
+        # Pooch uses appdirs (https://github.com/ActiveState/appdirs) to
+        # select an appropriate directory for the cache on each platform.
+        path=pooch.os_cache("scipy-data"),
+
+        # The remote data is on Github
+        # base_url is a required param, even though we override this
+        # using individual urls in the registry.
+        base_url="https://github.com/scipy/",
+        registry=registry,
+        urls=registry_urls
+    )
+
+
+def fetch_data(dataset_name, data_fetcher=data_fetcher):
+    if data_fetcher is None:
+        raise ImportError("Missing optional dependency 'pooch' required "
+                          "for scipy.datasets module. Please use pip or "
+                          "conda to install 'pooch'.")
+    # https://github.com/scipy/scipy/issues/21879
+    downloader = pooch.HTTPDownloader(
+        headers={"User-Agent": f"SciPy {sys.modules['scipy'].__version__}"}
+    )
+    # The "fetch" method returns the full path to the downloaded data file.
+    return data_fetcher.fetch(dataset_name, downloader=downloader)
+
+
+def ascent():
+    """
+    Get an 8-bit grayscale bit-depth, 512 x 512 derived image for easy
+    use in demos.
+
+    The image is derived from
+    https://pixnio.com/people/accent-to-the-top
+
+    Parameters
+    ----------
+    None
+
+    Returns
+    -------
+    ascent : ndarray
+       convenient image to use for testing and demonstration
+
+    Examples
+    --------
+    >>> import scipy.datasets
+    >>> ascent = scipy.datasets.ascent()
+    >>> ascent.shape
+    (512, 512)
+    >>> ascent.max()
+    np.uint8(255)
+
+    >>> import matplotlib.pyplot as plt
+    >>> plt.gray()
+    >>> plt.imshow(ascent)
+    >>> plt.show()
+
+    """
+    import pickle
+
+    # The file will be downloaded automatically the first time this is run,
+    # returning the path to the downloaded file. Afterwards, Pooch finds
+    # it in the local cache and doesn't repeat the download.
+    fname = fetch_data("ascent.dat")
+    # Now we just need to load it with our standard Python tools.
+    with open(fname, 'rb') as f:
+        ascent = array(pickle.load(f))
+    return ascent
+
+
+def electrocardiogram():
+    """
+    Load an electrocardiogram as an example for a 1-D signal.
+
+    The returned signal is a 5 minute long electrocardiogram (ECG), a medical
+    recording of the heart's electrical activity, sampled at 360 Hz.
+
+    Returns
+    -------
+    ecg : ndarray
+        The electrocardiogram in millivolt (mV) sampled at 360 Hz.
+
+    Notes
+    -----
+    The provided signal is an excerpt (19:35 to 24:35) from the `record 208`_
+    (lead MLII) provided by the MIT-BIH Arrhythmia Database [1]_ on
+    PhysioNet [2]_. The excerpt includes noise induced artifacts, typical
+    heartbeats as well as pathological changes.
+
+    .. _record 208: https://physionet.org/physiobank/database/html/mitdbdir/records.htm#208
+
+    .. versionadded:: 1.1.0
+
+    References
+    ----------
+    .. [1] Moody GB, Mark RG. The impact of the MIT-BIH Arrhythmia Database.
+           IEEE Eng in Med and Biol 20(3):45-50 (May-June 2001).
+           (PMID: 11446209); :doi:`10.13026/C2F305`
+    .. [2] Goldberger AL, Amaral LAN, Glass L, Hausdorff JM, Ivanov PCh,
+           Mark RG, Mietus JE, Moody GB, Peng C-K, Stanley HE. PhysioBank,
+           PhysioToolkit, and PhysioNet: Components of a New Research Resource
+           for Complex Physiologic Signals. Circulation 101(23):e215-e220;
+           :doi:`10.1161/01.CIR.101.23.e215`
+
+    Examples
+    --------
+    >>> from scipy.datasets import electrocardiogram
+    >>> ecg = electrocardiogram()
+    >>> ecg
+    array([-0.245, -0.215, -0.185, ..., -0.405, -0.395, -0.385], shape=(108000,))
+    >>> ecg.shape, ecg.mean(), ecg.std()
+    ((108000,), -0.16510875, 0.5992473991177294)
+
+    As stated the signal features several areas with a different morphology.
+    E.g., the first few seconds show the electrical activity of a heart in
+    normal sinus rhythm as seen below.
+
+    >>> import numpy as np
+    >>> import matplotlib.pyplot as plt
+    >>> fs = 360
+    >>> time = np.arange(ecg.size) / fs
+    >>> plt.plot(time, ecg)
+    >>> plt.xlabel("time in s")
+    >>> plt.ylabel("ECG in mV")
+    >>> plt.xlim(9, 10.2)
+    >>> plt.ylim(-1, 1.5)
+    >>> plt.show()
+
+    After second 16, however, the first premature ventricular contractions,
+    also called extrasystoles, appear. These have a different morphology
+    compared to typical heartbeats. The difference can easily be observed
+    in the following plot.
+
+    >>> plt.plot(time, ecg)
+    >>> plt.xlabel("time in s")
+    >>> plt.ylabel("ECG in mV")
+    >>> plt.xlim(46.5, 50)
+    >>> plt.ylim(-2, 1.5)
+    >>> plt.show()
+
+    At several points large artifacts disturb the recording, e.g.:
+
+    >>> plt.plot(time, ecg)
+    >>> plt.xlabel("time in s")
+    >>> plt.ylabel("ECG in mV")
+    >>> plt.xlim(207, 215)
+    >>> plt.ylim(-2, 3.5)
+    >>> plt.show()
+
+    Finally, examining the power spectrum reveals that most of the biosignal is
+    made up of lower frequencies. At 60 Hz the noise induced by the mains
+    electricity can be clearly observed.
+
+    >>> from scipy.signal import welch
+    >>> f, Pxx = welch(ecg, fs=fs, nperseg=2048, scaling="spectrum")
+    >>> plt.semilogy(f, Pxx)
+    >>> plt.xlabel("Frequency in Hz")
+    >>> plt.ylabel("Power spectrum of the ECG in mV**2")
+    >>> plt.xlim(f[[0, -1]])
+    >>> plt.show()
+    """
+    fname = fetch_data("ecg.dat")
+    with load(fname) as file:
+        ecg = file["ecg"].astype(int)  # np.uint16 -> int
+    # Convert raw output of ADC to mV: (ecg - adc_zero) / adc_gain
+    ecg = (ecg - 1024) / 200.0
+    return ecg
+
+
+def face(gray=False):
+    """
+    Get a 1024 x 768, color image of a raccoon face.
+
+    The image is derived from
+    https://pixnio.com/fauna-animals/raccoons/raccoon-procyon-lotor
+
+    Parameters
+    ----------
+    gray : bool, optional
+        If True return 8-bit grey-scale image, otherwise return a color image
+
+    Returns
+    -------
+    face : ndarray
+        image of a raccoon face
+
+    Examples
+    --------
+    >>> import scipy.datasets
+    >>> face = scipy.datasets.face()
+    >>> face.shape
+    (768, 1024, 3)
+    >>> face.max()
+    np.uint8(255)
+
+    >>> import matplotlib.pyplot as plt
+    >>> plt.gray()
+    >>> plt.imshow(face)
+    >>> plt.show()
+
+    """
+    import bz2
+    fname = fetch_data("face.dat")
+    with open(fname, 'rb') as f:
+        rawdata = f.read()
+    face_data = bz2.decompress(rawdata)
+    face = frombuffer(face_data, dtype='uint8')
+    face.shape = (768, 1024, 3)
+    if gray is True:
+        face = (0.21 * face[:, :, 0] + 0.71 * face[:, :, 1] +
+                0.07 * face[:, :, 2]).astype('uint8')
+    return face
--- a/venv/lib/python3.13/site-packages/scipy/datasets/_registry.py
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/_registry.py
@ -0,0 +1,26 @@
+##########################################################################
+# This file serves as the dataset registry for SciPy Datasets SubModule.
+##########################################################################
+
+
+# To generate the SHA256 hash, use the command
+# openssl sha256 <filename>
+registry = {
+    "ascent.dat": "03ce124c1afc880f87b55f6b061110e2e1e939679184f5614e38dacc6c1957e2",
+    "ecg.dat": "f20ad3365fb9b7f845d0e5c48b6fe67081377ee466c3a220b7f69f35c8958baf",
+    "face.dat": "9d8b0b4d081313e2b485748c770472e5a95ed1738146883d84c7030493e82886"
+}
+
+registry_urls = {
+    "ascent.dat": "https://raw.githubusercontent.com/scipy/dataset-ascent/main/ascent.dat",
+    "ecg.dat": "https://raw.githubusercontent.com/scipy/dataset-ecg/main/ecg.dat",
+    "face.dat": "https://raw.githubusercontent.com/scipy/dataset-face/main/face.dat"
+}
+
+# dataset method mapping with their associated filenames
+# <method_name> : ["filename1", "filename2", ...]
+method_files_map = {
+    "ascent": ["ascent.dat"],
+    "electrocardiogram": ["ecg.dat"],
+    "face": ["face.dat"]
+}
--- a/venv/lib/python3.13/site-packages/scipy/datasets/_utils.py
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/_utils.py
@ -0,0 +1,81 @@
+import os
+import shutil
+from ._registry import method_files_map
+
+try:
+    import platformdirs
+except ImportError:
+    platformdirs = None  # type: ignore[assignment]
+
+
+def _clear_cache(datasets, cache_dir=None, method_map=None):
+    if method_map is None:
+        # Use SciPy Datasets method map
+        method_map = method_files_map
+    if cache_dir is None:
+        # Use default cache_dir path
+        if platformdirs is None:
+            # platformdirs is pooch dependency
+            raise ImportError("Missing optional dependency 'pooch' required "
+                              "for scipy.datasets module. Please use pip or "
+                              "conda to install 'pooch'.")
+        cache_dir = platformdirs.user_cache_dir("scipy-data")
+
+    if not os.path.exists(cache_dir):
+        print(f"Cache Directory {cache_dir} doesn't exist. Nothing to clear.")
+        return
+
+    if datasets is None:
+        print(f"Cleaning the cache directory {cache_dir}!")
+        shutil.rmtree(cache_dir)
+    else:
+        if not isinstance(datasets, list | tuple):
+            # single dataset method passed should be converted to list
+            datasets = [datasets, ]
+        for dataset in datasets:
+            assert callable(dataset)
+            dataset_name = dataset.__name__  # Name of the dataset method
+            if dataset_name not in method_map:
+                raise ValueError(f"Dataset method {dataset_name} doesn't "
+                                 "exist. Please check if the passed dataset "
+                                 "is a subset of the following dataset "
+                                 f"methods: {list(method_map.keys())}")
+
+            data_files = method_map[dataset_name]
+            data_filepaths = [os.path.join(cache_dir, file)
+                              for file in data_files]
+            for data_filepath in data_filepaths:
+                if os.path.exists(data_filepath):
+                    print("Cleaning the file "
+                          f"{os.path.split(data_filepath)[1]} "
+                          f"for dataset {dataset_name}")
+                    os.remove(data_filepath)
+                else:
+                    print(f"Path {data_filepath} doesn't exist. "
+                          "Nothing to clear.")
+
+
+def clear_cache(datasets=None):
+    """
+    Cleans the scipy datasets cache directory.
+
+    If a scipy.datasets method or a list/tuple of the same is
+    provided, then clear_cache removes all the data files
+    associated to the passed dataset method callable(s).
+
+    By default, it removes all the cached data files.
+
+    Parameters
+    ----------
+    datasets : callable or list/tuple of callable or None
+
+    Examples
+    --------
+    >>> from scipy import datasets
+    >>> ascent_array = datasets.ascent()
+    >>> ascent_array.shape
+    (512, 512)
+    >>> datasets.clear_cache([datasets.ascent])
+    Cleaning the file ascent.dat for dataset ascent
+    """
+    _clear_cache(datasets)
--- a/venv/lib/python3.13/site-packages/scipy/datasets/tests/init.py
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/tests/init.py
--- a/venv/lib/python3.13/site-packages/scipy/datasets/tests/pycache/init.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/tests/pycache/init.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/datasets/tests/pycache/test_data.cpython-313.pyc
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/tests/pycache/test_data.cpython-313.pyc
--- a/venv/lib/python3.13/site-packages/scipy/datasets/tests/test_data.py
+++ b/venv/lib/python3.13/site-packages/scipy/datasets/tests/test_data.py
@ -0,0 +1,128 @@
+from scipy.datasets._registry import registry
+from scipy.datasets._fetchers import data_fetcher
+from scipy.datasets._utils import _clear_cache
+from scipy.datasets import ascent, face, electrocardiogram, download_all
+from numpy.testing import assert_equal, assert_almost_equal
+import os
+from threading import get_ident
+import pytest
+
+try:
+    import pooch
+except ImportError:
+    raise ImportError("Missing optional dependency 'pooch' required "
+                      "for scipy.datasets module. Please use pip or "
+                      "conda to install 'pooch'.")
+
+
+data_dir = data_fetcher.path  # type: ignore
+
+
+def _has_hash(path, expected_hash):
+    """Check if the provided path has the expected hash."""
+    if not os.path.exists(path):
+        return False
+    return pooch.file_hash(path) == expected_hash
+
+
+class TestDatasets:
+
+    @pytest.fixture(scope='module', autouse=True)
+    def test_download_all(self):
+        # This fixture requires INTERNET CONNECTION
+
+        # test_setup phase
+        download_all()
+
+        yield
+
+    @pytest.mark.fail_slow(10)
+    def test_existence_all(self):
+        assert len(os.listdir(data_dir)) >= len(registry)
+
+    def test_ascent(self):
+        assert_equal(ascent().shape, (512, 512))
+
+        # hash check
+        assert _has_hash(os.path.join(data_dir, "ascent.dat"),
+                         registry["ascent.dat"])
+
+    def test_face(self):
+        assert_equal(face().shape, (768, 1024, 3))
+
+        # hash check
+        assert _has_hash(os.path.join(data_dir, "face.dat"),
+                         registry["face.dat"])
+
+    def test_electrocardiogram(self):
+        # Test shape, dtype and stats of signal
+        ecg = electrocardiogram()
+        assert_equal(ecg.dtype, float)
+        assert_equal(ecg.shape, (108000,))
+        assert_almost_equal(ecg.mean(), -0.16510875)
+        assert_almost_equal(ecg.std(), 0.5992473991177294)
+
+        # hash check
+        assert _has_hash(os.path.join(data_dir, "ecg.dat"),
+                         registry["ecg.dat"])
+
+
+def test_clear_cache(tmp_path):
+    # Note: `tmp_path` is a pytest fixture, it handles cleanup
+    thread_basepath = tmp_path / str(get_ident())
+    thread_basepath.mkdir()
+
+    dummy_basepath = thread_basepath / "dummy_cache_dir"
+    dummy_basepath.mkdir()
+
+    # Create three dummy dataset files for dummy dataset methods
+    dummy_method_map = {}
+    for i in range(4):
+        dummy_method_map[f"data{i}"] = [f"data{i}.dat"]
+        data_filepath = dummy_basepath / f"data{i}.dat"
+        data_filepath.write_text("")
+
+    # clear files associated to single dataset method data0
+    # also test callable argument instead of list of callables
+    def data0():
+        pass
+    _clear_cache(datasets=data0, cache_dir=dummy_basepath,
+                 method_map=dummy_method_map)
+    assert not os.path.exists(dummy_basepath/"data0.dat")
+
+    # clear files associated to multiple dataset methods "data3" and "data4"
+    def data1():
+        pass
+
+    def data2():
+        pass
+    _clear_cache(datasets=[data1, data2], cache_dir=dummy_basepath,
+                 method_map=dummy_method_map)
+    assert not os.path.exists(dummy_basepath/"data1.dat")
+    assert not os.path.exists(dummy_basepath/"data2.dat")
+
+    # clear multiple dataset files "data3_0.dat" and "data3_1.dat"
+    # associated with dataset method "data3"
+    def data4():
+        pass
+    # create files
+    (dummy_basepath / "data4_0.dat").write_text("")
+    (dummy_basepath / "data4_1.dat").write_text("")
+
+    dummy_method_map["data4"] = ["data4_0.dat", "data4_1.dat"]
+    _clear_cache(datasets=[data4], cache_dir=dummy_basepath,
+                 method_map=dummy_method_map)
+    assert not os.path.exists(dummy_basepath/"data4_0.dat")
+    assert not os.path.exists(dummy_basepath/"data4_1.dat")
+
+    # wrong dataset method should raise ValueError since it
+    # doesn't exist in the dummy_method_map
+    def data5():
+        pass
+    with pytest.raises(ValueError):
+        _clear_cache(datasets=[data5], cache_dir=dummy_basepath,
+                     method_map=dummy_method_map)
+
+    # remove all dataset cache
+    _clear_cache(datasets=None, cache_dir=dummy_basepath)
+    assert not os.path.exists(dummy_basepath)