from __future__ import annotations import os import re from collections.abc import Collection, Container, Iterable, Iterator, Mapping, Sequence from datetime import timezone from enum import Enum, auto from functools import cache, lru_cache, partial, wraps from importlib.util import find_spec from inspect import getattr_static, getdoc from itertools import chain from operator import attrgetter from secrets import token_hex from typing import ( TYPE_CHECKING, Any, Callable, Generic, Literal, Protocol, TypeVar, Union, cast, overload, ) from narwhals._enum import NoAutoEnum from narwhals._exceptions import issue_deprecation_warning from narwhals._typing_compat import assert_never, deprecated from narwhals.dependencies import ( get_cudf, get_dask_dataframe, get_duckdb, get_ibis, get_modin, get_pandas, get_polars, get_pyarrow, get_pyspark_connect, get_pyspark_sql, get_sqlframe, is_narwhals_series, is_narwhals_series_int, is_numpy_array_1d, is_numpy_array_1d_int, is_pandas_like_dataframe, is_pandas_like_series, ) from narwhals.exceptions import ColumnNotFoundError, DuplicateError, InvalidOperationError if TYPE_CHECKING: from collections.abc import Set # noqa: PYI025 from types import ModuleType import pandas as pd import polars as pl import pyarrow as pa from typing_extensions import ( Concatenate, LiteralString, ParamSpec, Self, TypeAlias, TypeIs, ) from narwhals._compliant import ( CompliantExpr, CompliantExprT, CompliantFrameT, CompliantSeriesOrNativeExprT_co, CompliantSeriesT, NativeFrameT_co, NativeSeriesT_co, ) from narwhals._compliant.typing import EvalNames, NativeLazyFrameT from narwhals._namespace import Namespace from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals._typing import ( Backend, IntoBackend, _DataFrameLazyImpl, _EagerAllowedImpl, _LazyFrameCollectImpl, ) from narwhals.dataframe import DataFrame, LazyFrame from narwhals.dtypes import DType from narwhals.series import Series from narwhals.typing import ( CompliantDataFrame, CompliantLazyFrame, CompliantSeries, DTypes, IntoSeriesT, MultiIndexSelector, SingleIndexSelector, SizedMultiIndexSelector, SizeUnit, SupportsNativeNamespace, TimeUnit, _1DArray, _SliceIndex, _SliceName, _SliceNone, ) UnknownBackendName: TypeAlias = str FrameOrSeriesT = TypeVar( "FrameOrSeriesT", bound=Union[LazyFrame[Any], DataFrame[Any], Series[Any]] ) _T1 = TypeVar("_T1") _T2 = TypeVar("_T2") _T3 = TypeVar("_T3") _T4 = TypeVar("_T4") _T5 = TypeVar("_T5") _T6 = TypeVar("_T6") _T7 = TypeVar("_T7") _Fn = TypeVar("_Fn", bound="Callable[..., Any]") P = ParamSpec("P") R = TypeVar("R") R1 = TypeVar("R1") R2 = TypeVar("R2") class _SupportsVersion(Protocol): __version__: str class _SupportsGet(Protocol): # noqa: PYI046 def __get__(self, instance: Any, owner: Any | None = None, /) -> Any: ... class _StoresColumns(Protocol): @property def columns(self) -> Sequence[str]: ... _T = TypeVar("_T") NativeT_co = TypeVar("NativeT_co", covariant=True) CompliantT_co = TypeVar("CompliantT_co", covariant=True) _ContextT = TypeVar("_ContextT", bound="_FullContext") _Method: TypeAlias = "Callable[Concatenate[_ContextT, P], R]" _Constructor: TypeAlias = "Callable[Concatenate[_T, P], R2]" class _StoresNative(Protocol[NativeT_co]): # noqa: PYI046 """Provides access to a native object. Native objects have types like: >>> from pandas import Series >>> from pyarrow import Table """ @property def native(self) -> NativeT_co: """Return the native object.""" ... class _StoresCompliant(Protocol[CompliantT_co]): # noqa: PYI046 """Provides access to a compliant object. Compliant objects have types like: >>> from narwhals._pandas_like.series import PandasLikeSeries >>> from narwhals._arrow.dataframe import ArrowDataFrame """ @property def compliant(self) -> CompliantT_co: """Return the compliant object.""" ... class _StoresBackendVersion(Protocol): @property def _backend_version(self) -> tuple[int, ...]: """Version tuple for a native package.""" ... class _StoresVersion(Protocol): _version: Version """Narwhals API version (V1 or MAIN).""" class _StoresImplementation(Protocol): _implementation: Implementation """Implementation of native object (pandas, Polars, PyArrow, ...).""" class _LimitedContext(_StoresImplementation, _StoresVersion, Protocol): """Provides 2 attributes. - `_implementation` - `_version` """ class _FullContext(_StoresBackendVersion, _LimitedContext, Protocol): """Provides 3 attributes. - `_implementation` - `_backend_version` - `_version` """ class ValidateBackendVersion(_StoresImplementation, Protocol): """Ensure the target `Implementation` is on a supported version.""" def _validate_backend_version(self) -> None: """Raise if installed version below `nw._utils.MIN_VERSIONS`. **Only use this when moving between backends.** Otherwise, the validation will have taken place already. """ _ = self._implementation._backend_version() class Version(Enum): V1 = auto() V2 = auto() MAIN = auto() @property def namespace(self) -> type[Namespace[Any]]: if self is Version.V1: from narwhals.stable.v1._namespace import Namespace as NamespaceV1 return NamespaceV1 if self is Version.V2: from narwhals.stable.v2._namespace import Namespace as NamespaceV2 return NamespaceV2 from narwhals._namespace import Namespace return Namespace @property def dtypes(self) -> DTypes: if self is Version.V1: from narwhals.stable.v1 import dtypes as dtypes_v1 return dtypes_v1 if self is Version.V2: from narwhals.stable.v2 import dtypes as dtypes_v2 return dtypes_v2 from narwhals import dtypes return dtypes @property def dataframe(self) -> type[DataFrame[Any]]: if self is Version.V1: from narwhals.stable.v1 import DataFrame as DataFrameV1 return DataFrameV1 if self is Version.V2: from narwhals.stable.v2 import DataFrame as DataFrameV2 return DataFrameV2 from narwhals.dataframe import DataFrame return DataFrame @property def lazyframe(self) -> type[LazyFrame[Any]]: if self is Version.V1: from narwhals.stable.v1 import LazyFrame as LazyFrameV1 return LazyFrameV1 if self is Version.V2: from narwhals.stable.v2 import LazyFrame as LazyFrameV2 return LazyFrameV2 from narwhals.dataframe import LazyFrame return LazyFrame @property def series(self) -> type[Series[Any]]: if self is Version.V1: from narwhals.stable.v1 import Series as SeriesV1 return SeriesV1 if self is Version.V2: from narwhals.stable.v2 import Series as SeriesV2 return SeriesV2 from narwhals.series import Series return Series class Implementation(NoAutoEnum): """Implementation of native object (pandas, Polars, PyArrow, ...).""" PANDAS = "pandas" """pandas implementation.""" MODIN = "modin" """Modin implementation.""" CUDF = "cudf" """cuDF implementation.""" PYARROW = "pyarrow" """PyArrow implementation.""" PYSPARK = "pyspark" """PySpark implementation.""" POLARS = "polars" """Polars implementation.""" DASK = "dask" """Dask implementation.""" DUCKDB = "duckdb" """DuckDB implementation.""" IBIS = "ibis" """Ibis implementation.""" SQLFRAME = "sqlframe" """SQLFrame implementation.""" PYSPARK_CONNECT = "pyspark[connect]" """PySpark Connect implementation.""" UNKNOWN = "unknown" """Unknown implementation.""" def __str__(self) -> str: return str(self.value) @classmethod def from_native_namespace( cls: type[Self], native_namespace: ModuleType ) -> Implementation: # pragma: no cover """Instantiate Implementation object from a native namespace module. Arguments: native_namespace: Native namespace. """ mapping = { get_pandas(): Implementation.PANDAS, get_modin(): Implementation.MODIN, get_cudf(): Implementation.CUDF, get_pyarrow(): Implementation.PYARROW, get_pyspark_sql(): Implementation.PYSPARK, get_polars(): Implementation.POLARS, get_dask_dataframe(): Implementation.DASK, get_duckdb(): Implementation.DUCKDB, get_ibis(): Implementation.IBIS, get_sqlframe(): Implementation.SQLFRAME, get_pyspark_connect(): Implementation.PYSPARK_CONNECT, } return mapping.get(native_namespace, Implementation.UNKNOWN) @classmethod def from_string(cls: type[Self], backend_name: str) -> Implementation: """Instantiate Implementation object from a native namespace module. Arguments: backend_name: Name of backend, expressed as string. """ try: return cls(backend_name) except ValueError: return Implementation.UNKNOWN @classmethod def from_backend( cls: type[Self], backend: IntoBackend[Backend] | UnknownBackendName ) -> Implementation: """Instantiate from native namespace module, string, or Implementation. Arguments: backend: Backend to instantiate Implementation from. """ return ( cls.from_string(backend) if isinstance(backend, str) else backend if isinstance(backend, Implementation) else cls.from_native_namespace(backend) ) def to_native_namespace(self) -> ModuleType: """Return the native namespace module corresponding to Implementation.""" if self is Implementation.UNKNOWN: msg = "Cannot return native namespace from UNKNOWN Implementation" raise AssertionError(msg) self._backend_version() module_name = _IMPLEMENTATION_TO_MODULE_NAME.get(self, self.value) return _import_native_namespace(module_name) def is_pandas(self) -> bool: """Return whether implementation is pandas. Examples: >>> import pandas as pd >>> import narwhals as nw >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_pandas() True """ return self is Implementation.PANDAS def is_pandas_like(self) -> bool: """Return whether implementation is pandas, Modin, or cuDF. Examples: >>> import pandas as pd >>> import narwhals as nw >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_pandas_like() True """ return self in {Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF} def is_spark_like(self) -> bool: """Return whether implementation is pyspark or sqlframe. Examples: >>> import pandas as pd >>> import narwhals as nw >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_spark_like() False """ return self in { Implementation.PYSPARK, Implementation.SQLFRAME, Implementation.PYSPARK_CONNECT, } def is_polars(self) -> bool: """Return whether implementation is Polars. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_polars() True """ return self is Implementation.POLARS def is_cudf(self) -> bool: """Return whether implementation is cuDF. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_cudf() False """ return self is Implementation.CUDF # pragma: no cover def is_modin(self) -> bool: """Return whether implementation is Modin. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_modin() False """ return self is Implementation.MODIN # pragma: no cover def is_pyspark(self) -> bool: """Return whether implementation is PySpark. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_pyspark() False """ return self is Implementation.PYSPARK # pragma: no cover def is_pyspark_connect(self) -> bool: """Return whether implementation is PySpark. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_pyspark_connect() False """ return self is Implementation.PYSPARK_CONNECT # pragma: no cover def is_pyarrow(self) -> bool: """Return whether implementation is PyArrow. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_pyarrow() False """ return self is Implementation.PYARROW # pragma: no cover def is_dask(self) -> bool: """Return whether implementation is Dask. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_dask() False """ return self is Implementation.DASK # pragma: no cover def is_duckdb(self) -> bool: """Return whether implementation is DuckDB. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_duckdb() False """ return self is Implementation.DUCKDB # pragma: no cover def is_ibis(self) -> bool: """Return whether implementation is Ibis. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_ibis() False """ return self is Implementation.IBIS # pragma: no cover def is_sqlframe(self) -> bool: """Return whether implementation is SQLFrame. Examples: >>> import polars as pl >>> import narwhals as nw >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) >>> df = nw.from_native(df_native) >>> df.implementation.is_sqlframe() False """ return self is Implementation.SQLFRAME # pragma: no cover def _backend_version(self) -> tuple[int, ...]: """Returns backend version.""" return backend_version(self) MIN_VERSIONS: Mapping[Implementation, tuple[int, ...]] = { Implementation.PANDAS: (1, 1, 3), Implementation.MODIN: (0, 8, 2), Implementation.CUDF: (24, 10), Implementation.PYARROW: (13,), Implementation.PYSPARK: (3, 5), Implementation.PYSPARK_CONNECT: (3, 5), Implementation.POLARS: (0, 20, 4), Implementation.DASK: (2024, 8), Implementation.DUCKDB: (1,), Implementation.IBIS: (6,), Implementation.SQLFRAME: (3, 22, 0), } _IMPLEMENTATION_TO_MODULE_NAME: Mapping[Implementation, str] = { Implementation.DASK: "dask.dataframe", Implementation.MODIN: "modin.pandas", Implementation.PYSPARK: "pyspark.sql", Implementation.PYSPARK_CONNECT: "pyspark.sql.connect", } """Stores non default mapping from Implementation to module name""" @lru_cache(maxsize=16) def _import_native_namespace(module_name: str) -> ModuleType: from importlib import import_module return import_module(module_name) # NOTE: We can safely use an unbounded cache, the size is constrained by `len(Implementation._member_names_)` # Faster than `lru_cache` # https://docs.python.org/3/library/functools.html#functools.cache @cache def backend_version(implementation: Implementation, /) -> tuple[int, ...]: if not isinstance(implementation, Implementation): assert_never(implementation) if implementation is Implementation.UNKNOWN: # pragma: no cover msg = "Cannot return backend version from UNKNOWN Implementation" raise AssertionError(msg) into_version: ModuleType | str impl = implementation module_name = _IMPLEMENTATION_TO_MODULE_NAME.get(impl, impl.value) native_namespace = _import_native_namespace(module_name) if impl.is_sqlframe(): import sqlframe._version into_version = sqlframe._version elif impl.is_pyspark() or impl.is_pyspark_connect(): # pragma: no cover import pyspark # ignore-banned-import into_version = pyspark elif impl.is_dask(): import dask # ignore-banned-import into_version = dask else: into_version = native_namespace version = parse_version(into_version) if version < (min_version := MIN_VERSIONS[impl]): msg = f"Minimum version of {impl} supported by Narwhals is {min_version}, found: {version}" raise ValueError(msg) return version def flatten(args: Any) -> list[Any]: return list(args[0] if (len(args) == 1 and _is_iterable(args[0])) else args) def tupleify(arg: Any) -> Any: if not isinstance(arg, (list, tuple)): # pragma: no cover return (arg,) return arg def _is_iterable(arg: Any | Iterable[Any]) -> bool: from narwhals.series import Series if ( (pd := get_pandas()) is not None and isinstance(arg, (pd.Series, pd.DataFrame)) ) or ( (pl := get_polars()) is not None and isinstance(arg, (pl.Series, pl.Expr, pl.DataFrame, pl.LazyFrame)) ): # Non-exhaustive check for common potential mistakes. msg = ( f"Expected Narwhals class or scalar, got: {qualified_type_name(arg)!r}.\n\n" "Hint: Perhaps you\n" "- forgot a `nw.from_native` somewhere?\n" "- used `pl.col` instead of `nw.col`?" ) raise TypeError(msg) return isinstance(arg, Iterable) and not isinstance(arg, (str, bytes, Series)) def parse_version(version: str | ModuleType | _SupportsVersion) -> tuple[int, ...]: """Simple version parser; split into a tuple of ints for comparison. Arguments: version: Version string, or object with one, to parse. Returns: Parsed version number. """ # lifted from Polars # [marco]: Take care of DuckDB pre-releases which end with e.g. `-dev4108` # and pandas pre-releases which end with e.g. .dev0+618.gb552dc95c9 version_str = version if isinstance(version, str) else version.__version__ version_str = re.sub(r"(\D?dev.*$)", "", version_str) return tuple(int(re.sub(r"\D", "", v)) for v in version_str.split(".")) @overload def isinstance_or_issubclass( obj_or_cls: type, cls_or_tuple: type[_T] ) -> TypeIs[type[_T]]: ... @overload def isinstance_or_issubclass( obj_or_cls: object | type, cls_or_tuple: type[_T] ) -> TypeIs[_T | type[_T]]: ... @overload def isinstance_or_issubclass( obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2]] ) -> TypeIs[type[_T1 | _T2]]: ... @overload def isinstance_or_issubclass( obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2]] ) -> TypeIs[_T1 | _T2 | type[_T1 | _T2]]: ... @overload def isinstance_or_issubclass( obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3]] ) -> TypeIs[type[_T1 | _T2 | _T3]]: ... @overload def isinstance_or_issubclass( obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3]] ) -> TypeIs[_T1 | _T2 | _T3 | type[_T1 | _T2 | _T3]]: ... @overload def isinstance_or_issubclass( obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3], type[_T4]] ) -> TypeIs[type[_T1 | _T2 | _T3 | _T4]]: ... @overload def isinstance_or_issubclass( obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3], type[_T4]], ) -> TypeIs[_T1 | _T2 | _T3 | _T4 | type[_T1 | _T2 | _T3 | _T4]]: ... @overload def isinstance_or_issubclass( obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3], type[_T4], type[_T5]], ) -> TypeIs[type[_T1 | _T2 | _T3 | _T4 | _T5]]: ... @overload def isinstance_or_issubclass( obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3], type[_T4], type[_T5]], ) -> TypeIs[_T1 | _T2 | _T3 | _T4 | _T5 | type[_T1 | _T2 | _T3 | _T4 | _T5]]: ... @overload def isinstance_or_issubclass( obj_or_cls: type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3], type[_T4], type[_T5], type[_T6]], ) -> TypeIs[type[_T1 | _T2 | _T3 | _T4 | _T5 | _T6]]: ... @overload def isinstance_or_issubclass( obj_or_cls: object | type, cls_or_tuple: tuple[type[_T1], type[_T2], type[_T3], type[_T4], type[_T5], type[_T6]], ) -> TypeIs[ _T1 | _T2 | _T3 | _T4 | _T5 | _T6 | type[_T1 | _T2 | _T3 | _T4 | _T5 | _T6] ]: ... @overload def isinstance_or_issubclass( obj_or_cls: type, cls_or_tuple: tuple[ type[_T1], type[_T2], type[_T3], type[_T4], type[_T5], type[_T6], type[_T7] ], ) -> TypeIs[type[_T1 | _T2 | _T3 | _T4 | _T5 | _T6 | _T7]]: ... @overload def isinstance_or_issubclass( obj_or_cls: object | type, cls_or_tuple: tuple[ type[_T1], type[_T2], type[_T3], type[_T4], type[_T5], type[_T6], type[_T7] ], ) -> TypeIs[ _T1 | _T2 | _T3 | _T4 | _T5 | _T6 | _T7 | type[_T1 | _T2 | _T3 | _T4 | _T5 | _T6 | _T7] ]: ... @overload def isinstance_or_issubclass( obj_or_cls: Any, cls_or_tuple: tuple[type, ...] ) -> TypeIs[Any]: ... def isinstance_or_issubclass(obj_or_cls: Any, cls_or_tuple: Any) -> bool: from narwhals.dtypes import DType if isinstance(obj_or_cls, DType): return isinstance(obj_or_cls, cls_or_tuple) return isinstance(obj_or_cls, cls_or_tuple) or ( isinstance(obj_or_cls, type) and issubclass(obj_or_cls, cls_or_tuple) ) def validate_laziness(items: Iterable[Any]) -> None: from narwhals.dataframe import DataFrame, LazyFrame if all(isinstance(item, DataFrame) for item in items) or ( all(isinstance(item, LazyFrame) for item in items) ): return msg = f"The items to concatenate should either all be eager, or all lazy, got: {[type(item) for item in items]}" raise TypeError(msg) def maybe_align_index( lhs: FrameOrSeriesT, rhs: Series[Any] | DataFrame[Any] | LazyFrame[Any] ) -> FrameOrSeriesT: """Align `lhs` to the Index of `rhs`, if they're both pandas-like. Arguments: lhs: Dataframe or Series. rhs: Dataframe or Series to align with. Returns: Same type as input. Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. If you're designing a new library, we highly encourage you to not rely on the Index. For non-pandas-like inputs, this only checks that `lhs` and `rhs` are the same length. Examples: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw >>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4]) >>> s_pd = pd.Series([6, 7], index=[4, 3]) >>> df = nw.from_native(df_pd) >>> s = nw.from_native(s_pd, series_only=True) >>> nw.to_native(nw.maybe_align_index(df, s)) a 4 2 3 1 """ from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.series import PandasLikeSeries def _validate_index(index: Any) -> None: if not index.is_unique: msg = "given index doesn't have a unique index" raise ValueError(msg) lhs_any = cast("Any", lhs) rhs_any = cast("Any", rhs) if isinstance( getattr(lhs_any, "_compliant_frame", None), PandasLikeDataFrame ) and isinstance(getattr(rhs_any, "_compliant_frame", None), PandasLikeDataFrame): _validate_index(lhs_any._compliant_frame.native.index) _validate_index(rhs_any._compliant_frame.native.index) return lhs_any._with_compliant( lhs_any._compliant_frame._with_native( lhs_any._compliant_frame.native.loc[rhs_any._compliant_frame.native.index] ) ) if isinstance( getattr(lhs_any, "_compliant_frame", None), PandasLikeDataFrame ) and isinstance(getattr(rhs_any, "_compliant_series", None), PandasLikeSeries): _validate_index(lhs_any._compliant_frame.native.index) _validate_index(rhs_any._compliant_series.native.index) return lhs_any._with_compliant( lhs_any._compliant_frame._with_native( lhs_any._compliant_frame.native.loc[ rhs_any._compliant_series.native.index ] ) ) if isinstance( getattr(lhs_any, "_compliant_series", None), PandasLikeSeries ) and isinstance(getattr(rhs_any, "_compliant_frame", None), PandasLikeDataFrame): _validate_index(lhs_any._compliant_series.native.index) _validate_index(rhs_any._compliant_frame.native.index) return lhs_any._with_compliant( lhs_any._compliant_series._with_native( lhs_any._compliant_series.native.loc[ rhs_any._compliant_frame.native.index ] ) ) if isinstance( getattr(lhs_any, "_compliant_series", None), PandasLikeSeries ) and isinstance(getattr(rhs_any, "_compliant_series", None), PandasLikeSeries): _validate_index(lhs_any._compliant_series.native.index) _validate_index(rhs_any._compliant_series.native.index) return lhs_any._with_compliant( lhs_any._compliant_series._with_native( lhs_any._compliant_series.native.loc[ rhs_any._compliant_series.native.index ] ) ) if len(lhs_any) != len(rhs_any): msg = f"Expected `lhs` and `rhs` to have the same length, got {len(lhs_any)} and {len(rhs_any)}" raise ValueError(msg) return lhs def maybe_get_index(obj: DataFrame[Any] | LazyFrame[Any] | Series[Any]) -> Any | None: """Get the index of a DataFrame or a Series, if it's pandas-like. Arguments: obj: Dataframe or Series. Returns: Same type as input. Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. If you're designing a new library, we highly encourage you to not rely on the Index. For non-pandas-like inputs, this returns `None`. Examples: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) >>> df = nw.from_native(df_pd) >>> nw.maybe_get_index(df) RangeIndex(start=0, stop=2, step=1) >>> series_pd = pd.Series([1, 2]) >>> series = nw.from_native(series_pd, series_only=True) >>> nw.maybe_get_index(series) RangeIndex(start=0, stop=2, step=1) """ obj_any = cast("Any", obj) native_obj = obj_any.to_native() if is_pandas_like_dataframe(native_obj) or is_pandas_like_series(native_obj): return native_obj.index return None def maybe_set_index( obj: FrameOrSeriesT, column_names: str | list[str] | None = None, *, index: Series[IntoSeriesT] | list[Series[IntoSeriesT]] | None = None, ) -> FrameOrSeriesT: """Set the index of a DataFrame or a Series, if it's pandas-like. Arguments: obj: object for which maybe set the index (can be either a Narwhals `DataFrame` or `Series`). column_names: name or list of names of the columns to set as index. For dataframes, only one of `column_names` and `index` can be specified but not both. If `column_names` is passed and `df` is a Series, then a `ValueError` is raised. index: series or list of series to set as index. Returns: Same type as input. Raises: ValueError: If one of the following conditions happens - none of `column_names` and `index` are provided - both `column_names` and `index` are provided - `column_names` is provided and `df` is a Series Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. If you're designing a new library, we highly encourage you to not rely on the Index. For non-pandas-like inputs, this is a no-op. Examples: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) >>> df = nw.from_native(df_pd) >>> nw.to_native(nw.maybe_set_index(df, "b")) # doctest: +NORMALIZE_WHITESPACE a b 4 1 5 2 """ from narwhals.translate import to_native df_any = cast("Any", obj) native_obj = df_any.to_native() if column_names is not None and index is not None: msg = "Only one of `column_names` or `index` should be provided" raise ValueError(msg) if not column_names and index is None: msg = "Either `column_names` or `index` should be provided" raise ValueError(msg) if index is not None: keys = ( [to_native(idx, pass_through=True) for idx in index] if _is_iterable(index) else to_native(index, pass_through=True) ) else: keys = column_names if is_pandas_like_dataframe(native_obj): return df_any._with_compliant( df_any._compliant_frame._with_native(native_obj.set_index(keys)) ) if is_pandas_like_series(native_obj): from narwhals._pandas_like.utils import set_index if column_names: msg = "Cannot set index using column names on a Series" raise ValueError(msg) native_obj = set_index( native_obj, keys, implementation=obj._compliant_series._implementation, # type: ignore[union-attr] ) return df_any._with_compliant(df_any._compliant_series._with_native(native_obj)) return df_any def maybe_reset_index(obj: FrameOrSeriesT) -> FrameOrSeriesT: """Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like. Arguments: obj: Dataframe or Series. Returns: Same type as input. Notes: This is only really intended for backwards-compatibility purposes, for example if your library already resets the index for users. If you're designing a new library, we highly encourage you to not rely on the Index. For non-pandas-like inputs, this is a no-op. Examples: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7])) >>> df = nw.from_native(df_pd) >>> nw.to_native(nw.maybe_reset_index(df)) a b 0 1 4 1 2 5 >>> series_pd = pd.Series([1, 2]) >>> series = nw.from_native(series_pd, series_only=True) >>> nw.maybe_get_index(series) RangeIndex(start=0, stop=2, step=1) """ obj_any = cast("Any", obj) native_obj = obj_any.to_native() if is_pandas_like_dataframe(native_obj): native_namespace = obj_any.__native_namespace__() if _has_default_index(native_obj, native_namespace): return obj_any return obj_any._with_compliant( obj_any._compliant_frame._with_native(native_obj.reset_index(drop=True)) ) if is_pandas_like_series(native_obj): native_namespace = obj_any.__native_namespace__() if _has_default_index(native_obj, native_namespace): return obj_any return obj_any._with_compliant( obj_any._compliant_series._with_native(native_obj.reset_index(drop=True)) ) return obj_any if TYPE_CHECKING: zip_strict = partial(zip, strict=True) else: import sys if sys.version_info >= (3, 10): zip_strict = partial(zip, strict=True) else: # pragma: no cover # https://stackoverflow.com/questions/32954486/zip-iterators-asserting-for-equal-length-in-python/69485272#69485272 def zip_strict(*iterables: Iterable[Any]) -> Iterable[tuple[Any, ...]]: # For trivial cases, use pure zip. if len(iterables) < 2: return zip(*iterables) # Tail for the first iterable first_stopped = False def first_tail() -> Any: nonlocal first_stopped first_stopped = True return yield # Tail for the zip def zip_tail() -> Any: if not first_stopped: # pragma: no cover msg = "zip_strict: first iterable is longer" raise ValueError(msg) for _ in chain.from_iterable(rest): # pragma: no cover msg = "zip_strict: first iterable is shorter" raise ValueError(msg) yield # Put the pieces together iterables_it = iter(iterables) first = chain(next(iterables_it), first_tail()) rest = list(map(iter, iterables_it)) return chain(zip(first, *rest), zip_tail()) def _is_range_index(obj: Any, native_namespace: Any) -> TypeIs[pd.RangeIndex]: return isinstance(obj, native_namespace.RangeIndex) def _has_default_index( native_frame_or_series: pd.Series[Any] | pd.DataFrame, native_namespace: Any ) -> bool: index = native_frame_or_series.index return ( _is_range_index(index, native_namespace) and index.start == 0 and index.stop == len(index) and index.step == 1 ) def maybe_convert_dtypes( obj: FrameOrSeriesT, *args: bool, **kwargs: bool | str ) -> FrameOrSeriesT: """Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. Arguments: obj: DataFrame or Series. *args: Additional arguments which gets passed through. **kwargs: Additional arguments which gets passed through. Returns: Same type as input. Notes: For non-pandas-like inputs, this is a no-op. Also, `args` and `kwargs` just get passed down to the underlying library as-is. Examples: >>> import pandas as pd >>> import polars as pl >>> import narwhals as nw >>> import numpy as np >>> df_pd = pd.DataFrame( ... { ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), ... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")), ... } ... ) >>> df = nw.from_native(df_pd) >>> nw.to_native( ... nw.maybe_convert_dtypes(df) ... ).dtypes # doctest: +NORMALIZE_WHITESPACE a Int32 b boolean dtype: object """ obj_any = cast("Any", obj) native_obj = obj_any.to_native() if is_pandas_like_dataframe(native_obj): return obj_any._with_compliant( obj_any._compliant_frame._with_native( native_obj.convert_dtypes(*args, **kwargs) ) ) if is_pandas_like_series(native_obj): return obj_any._with_compliant( obj_any._compliant_series._with_native( native_obj.convert_dtypes(*args, **kwargs) ) ) return obj_any def scale_bytes(sz: int, unit: SizeUnit) -> int | float: """Scale size in bytes to other size units (eg: "kb", "mb", "gb", "tb"). Arguments: sz: original size in bytes unit: size unit to convert into Returns: Integer or float. """ if unit in {"b", "bytes"}: return sz if unit in {"kb", "kilobytes"}: return sz / 1024 if unit in {"mb", "megabytes"}: return sz / 1024**2 if unit in {"gb", "gigabytes"}: return sz / 1024**3 if unit in {"tb", "terabytes"}: return sz / 1024**4 msg = f"`unit` must be one of {{'b', 'kb', 'mb', 'gb', 'tb'}}, got {unit!r}" raise ValueError(msg) def is_ordered_categorical(series: Series[Any]) -> bool: """Return whether indices of categories are semantically meaningful. This is a convenience function to accessing what would otherwise be the `is_ordered` property from the DataFrame Interchange Protocol, see https://data-apis.org/dataframe-protocol/latest/API.html. - For Polars: - Enums are always ordered. - Categoricals are ordered if `dtype.ordering == "physical"`. - For pandas-like APIs: - Categoricals are ordered if `dtype.cat.ordered == True`. - For PyArrow table: - Categoricals are ordered if `dtype.type.ordered == True`. Arguments: series: Input Series. Returns: Whether the Series is an ordered categorical. Examples: >>> import narwhals as nw >>> import pandas as pd >>> import polars as pl >>> data = ["x", "y"] >>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True)) >>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="lexical")) Let's define a library-agnostic function: >>> @nw.narwhalify ... def func(s): ... return nw.is_ordered_categorical(s) Then, we can pass any supported library to `func`: >>> func(s_pd) True >>> func(s_pl) False """ from narwhals._interchange.series import InterchangeSeries dtypes = series._compliant_series._version.dtypes compliant = series._compliant_series # If it doesn't match any branches, let's just play it safe and return False. result: bool = False if isinstance(compliant, InterchangeSeries) and isinstance( series.dtype, dtypes.Categorical ): result = compliant.native.describe_categorical["is_ordered"] elif series.dtype == dtypes.Enum: result = True elif series.dtype != dtypes.Categorical: result = False else: native = series.to_native() impl = series.implementation if impl.is_polars() and impl._backend_version() < (1, 32): # NOTE: Deprecated https://github.com/pola-rs/polars/pull/23779 # Since version 1.32.0, ordering parameter is ignored and # it always behaves as if 'lexical' was passed. result = cast("pl.Categorical", native.dtype).ordering == "physical" elif impl.is_pandas_like(): result = bool(native.cat.ordered) elif impl.is_pyarrow(): from narwhals._arrow.utils import is_dictionary result = is_dictionary(native.type) and native.type.ordered return result def generate_unique_token( n_bytes: int, columns: Container[str] ) -> str: # pragma: no cover msg = ( "Use `generate_temporary_column_name` instead. `generate_unique_token` is " "deprecated and it will be removed in future versions" ) issue_deprecation_warning(msg, _version="1.13.0") return generate_temporary_column_name(n_bytes=n_bytes, columns=columns) def generate_temporary_column_name(n_bytes: int, columns: Container[str]) -> str: """Generates a unique column name that is not present in the given list of columns. It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex) function to return a string nbytes random bytes. Arguments: n_bytes: The number of bytes to generate for the token. columns: The list of columns to check for uniqueness. Returns: A unique token that is not present in the given list of columns. Raises: AssertionError: If a unique token cannot be generated after 100 attempts. Examples: >>> import narwhals as nw >>> columns = ["abc", "xyz"] >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns True """ counter = 0 while True: # Prepend `'nw'` to ensure it always starts with a character # https://github.com/narwhals-dev/narwhals/issues/2510 token = f"nw{token_hex(n_bytes - 1)}" if token not in columns: return token counter += 1 if counter > 100: msg = ( "Internal Error: Narwhals was not able to generate a column name with " f"{n_bytes=} and not in {columns}" ) raise AssertionError(msg) def parse_columns_to_drop( frame: _StoresColumns, subset: Iterable[str], /, *, strict: bool ) -> list[str]: if not strict: return list(set(frame.columns).intersection(subset)) to_drop = list(subset) if error := check_columns_exist(to_drop, available=frame.columns): raise error return to_drop def is_sequence_but_not_str(sequence: Sequence[_T] | Any) -> TypeIs[Sequence[_T]]: return isinstance(sequence, Sequence) and not isinstance(sequence, str) def is_slice_none(obj: Any) -> TypeIs[_SliceNone]: return isinstance(obj, slice) and obj == slice(None) def is_sized_multi_index_selector( obj: Any, ) -> TypeIs[SizedMultiIndexSelector[Series[Any] | CompliantSeries[Any]]]: return ( ( is_sequence_but_not_str(obj) and ((len(obj) > 0 and isinstance(obj[0], int)) or (len(obj) == 0)) ) or is_numpy_array_1d_int(obj) or is_narwhals_series_int(obj) or is_compliant_series_int(obj) ) def is_sequence_like( obj: Sequence[_T] | Any, ) -> TypeIs[Sequence[_T] | Series[Any] | _1DArray]: return ( is_sequence_but_not_str(obj) or is_numpy_array_1d(obj) or is_narwhals_series(obj) or is_compliant_series(obj) ) def is_slice_index(obj: Any) -> TypeIs[_SliceIndex]: return isinstance(obj, slice) and ( isinstance(obj.start, int) or isinstance(obj.stop, int) or (isinstance(obj.step, int) and obj.start is None and obj.stop is None) ) def is_range(obj: Any) -> TypeIs[range]: return isinstance(obj, range) def is_single_index_selector(obj: Any) -> TypeIs[SingleIndexSelector]: return bool(isinstance(obj, int) and not isinstance(obj, bool)) def is_index_selector( obj: Any, ) -> TypeIs[SingleIndexSelector | MultiIndexSelector[Series[Any] | CompliantSeries[Any]]]: return ( is_single_index_selector(obj) or is_sized_multi_index_selector(obj) or is_slice_index(obj) ) def is_list_of(obj: Any, tp: type[_T]) -> TypeIs[list[_T]]: # Check if an object is a list of `tp`, only sniffing the first element. return bool(isinstance(obj, list) and obj and isinstance(obj[0], tp)) def is_sequence_of(obj: Any, tp: type[_T]) -> TypeIs[Sequence[_T]]: # Check if an object is a sequence of `tp`, only sniffing the first element. return bool( is_sequence_but_not_str(obj) and (first := next(iter(obj), None)) and isinstance(first, tp) ) def validate_strict_and_pass_though( strict: bool | None, # noqa: FBT001 pass_through: bool | None, # noqa: FBT001 *, pass_through_default: bool, ) -> bool: if strict is None and pass_through is None: pass_through = pass_through_default elif strict is not None and pass_through is None: pass_through = not strict elif strict is None and pass_through is not None: pass else: msg = "Cannot pass both `strict` and `pass_through`" raise ValueError(msg) return pass_through def deprecate_native_namespace( *, warn_version: str = "", required: bool = False ) -> Callable[[Callable[P, R]], Callable[P, R]]: """Decorator to transition from `native_namespace` to `backend` argument. Arguments: warn_version: Emit a deprecation warning from this version. required: Raise when both `native_namespace`, `backend` are `None`. Returns: Wrapped function, with `native_namespace` **removed**. """ def decorate(fn: Callable[P, R], /) -> Callable[P, R]: @wraps(fn) def wrapper(*args: P.args, **kwds: P.kwargs) -> R: backend = kwds.pop("backend", None) native_namespace = kwds.pop("native_namespace", None) if native_namespace is not None and backend is None: if warn_version: msg = ( "`native_namespace` is deprecated, please use `backend` instead.\n\n" "Note: `native_namespace` will remain available in `narwhals.stable.v1`.\n" "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" ) issue_deprecation_warning(msg, _version=warn_version) backend = native_namespace elif native_namespace is not None and backend is not None: msg = "Can't pass both `native_namespace` and `backend`" raise ValueError(msg) elif native_namespace is None and backend is None and required: msg = f"`backend` must be specified in `{fn.__name__}`." raise ValueError(msg) kwds["backend"] = backend return fn(*args, **kwds) return wrapper return decorate def _validate_rolling_arguments( window_size: int, min_samples: int | None ) -> tuple[int, int]: ensure_type(window_size, int, param_name="window_size") ensure_type(min_samples, int, type(None), param_name="min_samples") if window_size < 1: msg = "window_size must be greater or equal than 1" raise ValueError(msg) if min_samples is not None: if min_samples < 1: msg = "min_samples must be greater or equal than 1" raise ValueError(msg) if min_samples > window_size: msg = "`min_samples` must be less or equal than `window_size`" raise InvalidOperationError(msg) else: min_samples = window_size return window_size, min_samples def generate_repr(header: str, native_repr: str) -> str: try: terminal_width = os.get_terminal_size().columns except OSError: terminal_width = int(os.getenv("COLUMNS", 80)) # noqa: PLW1508 native_lines = native_repr.expandtabs().splitlines() max_native_width = max(len(line) for line in native_lines) if max_native_width + 2 <= terminal_width: length = max(max_native_width, len(header)) output = f"┌{'─' * length}┐\n" header_extra = length - len(header) output += f"|{' ' * (header_extra // 2)}{header}{' ' * (header_extra // 2 + header_extra % 2)}|\n" output += f"|{'-' * (length)}|\n" start_extra = (length - max_native_width) // 2 end_extra = (length - max_native_width) // 2 + (length - max_native_width) % 2 for line in native_lines: output += f"|{' ' * (start_extra)}{line}{' ' * (end_extra + max_native_width - len(line))}|\n" output += f"└{'─' * length}┘" return output diff = 39 - len(header) return ( f"┌{'─' * (39)}┐\n" f"|{' ' * (diff // 2)}{header}{' ' * (diff // 2 + diff % 2)}|\n" "| Use `.to_native` to see native output |\n└" f"{'─' * 39}┘" ) def check_columns_exist( subset: Collection[str], /, *, available: Collection[str] ) -> ColumnNotFoundError | None: if missing := set(subset).difference(available): return ColumnNotFoundError.from_missing_and_available_column_names( missing, available ) return None def check_column_names_are_unique(columns: Collection[str]) -> None: if len(columns) != len(set(columns)): from collections import Counter counter = Counter(columns) duplicates = {k: v for k, v in counter.items() if v > 1} msg = "".join(f"\n- '{k}' {v} times" for k, v in duplicates.items()) msg = f"Expected unique column names, got:{msg}" raise DuplicateError(msg) def _parse_time_unit_and_time_zone( time_unit: TimeUnit | Iterable[TimeUnit] | None, time_zone: str | timezone | Iterable[str | timezone | None] | None, ) -> tuple[Set[TimeUnit], Set[str | None]]: time_units: Set[TimeUnit] = ( {"ms", "us", "ns", "s"} if time_unit is None else {time_unit} if isinstance(time_unit, str) else set(time_unit) ) time_zones: Set[str | None] = ( {None} if time_zone is None else {str(time_zone)} if isinstance(time_zone, (str, timezone)) else {str(tz) if tz is not None else None for tz in time_zone} ) return time_units, time_zones def dtype_matches_time_unit_and_time_zone( dtype: DType, dtypes: DTypes, time_units: Set[TimeUnit], time_zones: Set[str | None] ) -> bool: return ( isinstance(dtype, dtypes.Datetime) and (dtype.time_unit in time_units) and ( dtype.time_zone in time_zones or ("*" in time_zones and dtype.time_zone is not None) ) ) def get_column_names(frame: _StoresColumns, /) -> Sequence[str]: return frame.columns def exclude_column_names(frame: _StoresColumns, names: Container[str]) -> Sequence[str]: return [col_name for col_name in frame.columns if col_name not in names] def passthrough_column_names(names: Sequence[str], /) -> EvalNames[Any]: def fn(_frame: Any, /) -> Sequence[str]: return names return fn def _hasattr_static(obj: Any, attr: str) -> bool: sentinel = object() return getattr_static(obj, attr, sentinel) is not sentinel def is_compliant_dataframe( obj: CompliantDataFrame[ CompliantSeriesT, CompliantExprT, NativeFrameT_co, ToNarwhalsT_co ] | Any, ) -> TypeIs[ CompliantDataFrame[CompliantSeriesT, CompliantExprT, NativeFrameT_co, ToNarwhalsT_co] ]: return _hasattr_static(obj, "__narwhals_dataframe__") def is_compliant_lazyframe( obj: CompliantLazyFrame[CompliantExprT, NativeLazyFrameT, ToNarwhalsT_co] | Any, ) -> TypeIs[CompliantLazyFrame[CompliantExprT, NativeLazyFrameT, ToNarwhalsT_co]]: return _hasattr_static(obj, "__narwhals_lazyframe__") def is_compliant_series( obj: CompliantSeries[NativeSeriesT_co] | Any, ) -> TypeIs[CompliantSeries[NativeSeriesT_co]]: return _hasattr_static(obj, "__narwhals_series__") def is_compliant_series_int( obj: CompliantSeries[NativeSeriesT_co] | Any, ) -> TypeIs[CompliantSeries[NativeSeriesT_co]]: return is_compliant_series(obj) and obj.dtype.is_integer() def is_compliant_expr( obj: CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co] | Any, ) -> TypeIs[CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co]]: return hasattr(obj, "__narwhals_expr__") def is_eager_allowed(impl: Implementation, /) -> TypeIs[_EagerAllowedImpl]: """Return True if `impl` allows eager operations.""" return impl in { Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF, Implementation.POLARS, Implementation.PYARROW, } def can_lazyframe_collect(impl: Implementation, /) -> TypeIs[_LazyFrameCollectImpl]: """Return True if `LazyFrame.collect(impl)` is allowed.""" return impl in {Implementation.PANDAS, Implementation.POLARS, Implementation.PYARROW} def can_dataframe_lazy(impl: Implementation, /) -> TypeIs[_DataFrameLazyImpl]: """Return True if `DataFrame.lazy(impl)` is allowed.""" return impl in { Implementation.DASK, Implementation.DUCKDB, Implementation.POLARS, Implementation.IBIS, } def has_native_namespace(obj: Any) -> TypeIs[SupportsNativeNamespace]: return _hasattr_static(obj, "__native_namespace__") def supports_arrow_c_stream(obj: Any) -> TypeIs[ArrowStreamExportable]: return _hasattr_static(obj, "__arrow_c_stream__") def _remap_full_join_keys( left_on: Collection[str], right_on: Collection[str], suffix: str ) -> dict[str, str]: """Remap join keys to avoid collisions. If left keys collide with the right keys, append the suffix. If there's no collision, let the right keys be. Arguments: left_on: Left keys. right_on: Right keys. suffix: Suffix to append to right keys. Returns: A map of old to new right keys. """ right_keys_suffixed = ( f"{key}{suffix}" if key in left_on else key for key in right_on ) return dict(zip(right_on, right_keys_suffixed)) def _into_arrow_table(data: IntoArrowTable, context: _LimitedContext, /) -> pa.Table: """Guards `ArrowDataFrame.from_arrow` w/ safer imports. Arguments: data: Object which implements `__arrow_c_stream__`. context: Initialized compliant object. Returns: A PyArrow Table. """ if find_spec("pyarrow"): ns = context._version.namespace.from_backend("pyarrow").compliant return ns._dataframe.from_arrow(data, context=ns).native msg = f"'pyarrow>=14.0.0' is required for `from_arrow` for object of type {qualified_type_name(data)!r}." # pragma: no cover raise ModuleNotFoundError(msg) # pragma: no cover # TODO @dangotbanned: Extend with runtime behavior for `v1.*` # See `narwhals.exceptions.NarwhalsUnstableWarning` def unstable(fn: _Fn, /) -> _Fn: """Visual-only marker for unstable functionality. Arguments: fn: Function to decorate. Returns: Decorated function (unchanged). Examples: >>> from narwhals._utils import unstable >>> @unstable ... def a_work_in_progress_feature(*args): ... return args >>> >>> a_work_in_progress_feature.__name__ 'a_work_in_progress_feature' >>> a_work_in_progress_feature(1, 2, 3) (1, 2, 3) """ return fn def _is_naive_format(format: str) -> bool: """Determines if a datetime format string is 'naive', i.e., does not include timezone information. A format is considered naive if it does not contain any of the following - '%s': Unix timestamp - '%z': UTC offset - 'Z' : UTC timezone designator Arguments: format: The datetime format string to check. Returns: bool: True if the format is naive (does not include timezone info), False otherwise. """ return not any(x in format for x in ("%s", "%z", "Z")) class not_implemented: # noqa: N801 """Mark some functionality as unsupported. Arguments: alias: optional name used instead of the data model hook [`__set_name__`]. Returns: An exception-raising [descriptor]. Notes: - Attribute/method name *doesn't* need to be declared twice - Allows different behavior when looked up on the class vs instance - Allows us to use `isinstance(...)` instead of monkeypatching an attribute to the function Examples: >>> from narwhals._utils import not_implemented >>> class Thing: ... def totally_ready(self) -> str: ... return "I'm ready!" ... ... not_ready_yet = not_implemented() >>> >>> thing = Thing() >>> thing.totally_ready() "I'm ready!" >>> thing.not_ready_yet() Traceback (most recent call last): ... NotImplementedError: 'not_ready_yet' is not implemented for: 'Thing'. ... >>> isinstance(Thing.not_ready_yet, not_implemented) True [`__set_name__`]: https://docs.python.org/3/reference/datamodel.html#object.__set_name__ [descriptor]: https://docs.python.org/3/howto/descriptor.html """ def __init__(self, alias: str | None = None, /) -> None: # NOTE: Don't like this # Trying to workaround `mypy` requiring `@property` everywhere self._alias: str | None = alias def __repr__(self) -> str: return f"<{type(self).__name__}>: {self._name_owner}.{self._name}" def __set_name__(self, owner: type[_T], name: str) -> None: # https://docs.python.org/3/howto/descriptor.html#customized-names self._name_owner: str = owner.__name__ self._name: str = self._alias or name def __get__( self, instance: _T | Literal["raise"] | None, owner: type[_T] | None = None, / ) -> Any: if instance is None: # NOTE: Branch for `cls._name` # We can check that to see if an instance of `type(self)` for # https://narwhals-dev.github.io/narwhals/api-completeness/expr/ return self # NOTE: Prefer not exposing the actual class we're defining in # `_implementation` may not be available everywhere who = getattr(instance, "_implementation", self._name_owner) _raise_not_implemented_error(self._name, who) return None # pragma: no cover def __call__(self, *args: Any, **kwds: Any) -> Any: # NOTE: Purely to duck-type as assignable to **any** instance method # Wouldn't be reachable through *regular* attribute access return self.__get__("raise") @classmethod def deprecated(cls, message: LiteralString, /) -> Self: """Alt constructor, wraps with `@deprecated`. Arguments: message: **Static-only** deprecation message, emitted in an IDE. [descriptor]: https://docs.python.org/3/howto/descriptor.html """ obj = cls() return deprecated(message)(obj) def _raise_not_implemented_error(what: str, who: str, /) -> NotImplementedError: msg = ( f"{what!r} is not implemented for: {who!r}.\n\n" "If you would like to see this functionality in `narwhals`, " "please open an issue at: https://github.com/narwhals-dev/narwhals/issues" ) raise NotImplementedError(msg) class requires: # noqa: N801 """Method decorator for raising under certain constraints. Attributes: _min_version: Minimum backend version. _hint: Optional suggested alternative. Examples: >>> from narwhals._utils import requires, Implementation >>> class SomeBackend: ... _implementation = Implementation.PYARROW ... _backend_version = 20, 0, 0 ... ... @requires.backend_version((9000, 0, 0)) ... def really_complex_feature(self) -> str: ... return "hello" >>> backend = SomeBackend() >>> backend.really_complex_feature() Traceback (most recent call last): ... NotImplementedError: `really_complex_feature` is only available in 'pyarrow>=9000.0.0', found version '20.0.0'. """ _min_version: tuple[int, ...] _hint: str @classmethod def backend_version(cls, minimum: tuple[int, ...], /, hint: str = "") -> Self: """Method decorator for raising below a minimum `_backend_version`. Arguments: minimum: Minimum backend version. hint: Optional suggested alternative. """ obj = cls.__new__(cls) obj._min_version = minimum obj._hint = hint return obj @staticmethod def _unparse_version(backend_version: tuple[int, ...], /) -> str: return ".".join(f"{d}" for d in backend_version) def _ensure_version(self, instance: _FullContext, /) -> None: if instance._backend_version >= self._min_version: return method = self._wrapped_name backend = instance._implementation minimum = self._unparse_version(self._min_version) found = self._unparse_version(instance._backend_version) msg = f"`{method}` is only available in '{backend}>={minimum}', found version {found!r}." if self._hint: msg = f"{msg}\n{self._hint}" raise NotImplementedError(msg) def __call__(self, fn: _Method[_ContextT, P, R], /) -> _Method[_ContextT, P, R]: self._wrapped_name = fn.__name__ @wraps(fn) def wrapper(instance: _ContextT, *args: P.args, **kwds: P.kwargs) -> R: self._ensure_version(instance) return fn(instance, *args, **kwds) # NOTE: Only getting a complaint from `mypy` return wrapper # type: ignore[return-value] def convert_str_slice_to_int_slice( str_slice: _SliceName, columns: Sequence[str] ) -> tuple[int | None, int | None, Any]: start = columns.index(str_slice.start) if str_slice.start is not None else None stop = columns.index(str_slice.stop) + 1 if str_slice.stop is not None else None step = str_slice.step return (start, stop, step) def inherit_doc( tp_parent: Callable[P, R1], / ) -> Callable[[_Constructor[_T, P, R2]], _Constructor[_T, P, R2]]: """Steal the class-level docstring from parent and attach to child `__init__`. Returns: Decorated constructor. Notes: - Passes static typing (mostly) - Passes at runtime """ def decorate(init_child: _Constructor[_T, P, R2], /) -> _Constructor[_T, P, R2]: if init_child.__name__ == "__init__" and issubclass(type(tp_parent), type): init_child.__doc__ = getdoc(tp_parent) return init_child msg = ( # pragma: no cover f"`@{inherit_doc.__name__}` is only allowed to decorate an `__init__` with a class-level doc.\n" f"Method: {init_child.__qualname__!r}\n" f"Parent: {tp_parent!r}" ) raise TypeError(msg) # pragma: no cover return decorate def qualified_type_name(obj: object | type[Any], /) -> str: tp = obj if isinstance(obj, type) else type(obj) module = tp.__module__ if tp.__module__ != "builtins" else "" return f"{module}.{tp.__name__}".lstrip(".") def ensure_type(obj: Any, /, *valid_types: type[Any], param_name: str = "") -> None: """Validate that an object is an instance of one or more specified types. Parameters: obj: The object to validate. *valid_types: One or more valid types that `obj` is expected to match. param_name: The name of the parameter being validated. Used to improve error message clarity. Raises: TypeError: If `obj` is not an instance of any of the provided `valid_types`. Examples: >>> from narwhals._utils import ensure_type >>> ensure_type(42, int, float) >>> ensure_type("hello", str) >>> ensure_type("hello", int, param_name="test") Traceback (most recent call last): ... TypeError: Expected 'int', got: 'str' test='hello' ^^^^^^^ >>> import polars as pl >>> import pandas as pd >>> df = pl.DataFrame([[1], [2], [3], [4], [5]], schema=[*"abcde"]) >>> ensure_type(df, pd.DataFrame, param_name="df") Traceback (most recent call last): ... TypeError: Expected 'pandas.core.frame.DataFrame', got: 'polars.dataframe.frame.DataFrame' df=polars.dataframe.frame.DataFrame(...) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ """ if not isinstance(obj, valid_types): # pragma: no cover tp_names = " | ".join(qualified_type_name(tp) for tp in valid_types) msg = f"Expected {tp_names!r}, got: {qualified_type_name(obj)!r}" if param_name: left_pad = " " * 4 val = repr(obj) if len(val) > 40: # truncate long reprs val = f"{qualified_type_name(obj)}(...)" assign = f"{left_pad}{param_name}=" underline = (" " * len(assign)) + ("^" * len(val)) msg = f"{msg}\n{assign}{val}\n{underline}" raise TypeError(msg) class _DeferredIterable(Generic[_T]): """Store a callable producing an iterable to defer collection until we need it.""" def __init__(self, into_iter: Callable[[], Iterable[_T]], /) -> None: self._into_iter: Callable[[], Iterable[_T]] = into_iter def __iter__(self) -> Iterator[_T]: yield from self._into_iter() def to_tuple(self) -> tuple[_T, ...]: # Collect and return as a `tuple`. it = self._into_iter() return it if isinstance(it, tuple) else tuple(it) @lru_cache(maxsize=64) def deep_attrgetter(attr: str, *nested: str) -> attrgetter[Any]: name = ".".join((attr, *nested)) if nested else attr return attrgetter(name) def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: """Perform a nested attribute lookup on `obj`.""" return deep_attrgetter(name_1, *nested)(obj)