up follow livre
This commit is contained in:
parent
70a5c3465c
commit
cffb31c1ef
12198 changed files with 2562132 additions and 35 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,978 @@
|
|||
import decimal
|
||||
|
||||
import numpy as np
|
||||
from numpy import iinfo
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
ArrowDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
option_context,
|
||||
to_numeric,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "ignore", "raise", "coerce"])
|
||||
def errors(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def signed(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[lambda x: x, str], ids=["identity", "str"])
|
||||
def transform(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[47393996303418497800, 100000000000000000000])
|
||||
def large_val(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def multiple_elts(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(lambda x: Index(x, name="idx"), tm.assert_index_equal),
|
||||
(lambda x: Series(x, name="ser"), tm.assert_series_equal),
|
||||
(lambda x: np.array(Index(x).values), tm.assert_numpy_array_equal),
|
||||
]
|
||||
)
|
||||
def transform_assert_equal(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_kwargs,result_kwargs",
|
||||
[
|
||||
({}, {"dtype": np.int64}),
|
||||
({"errors": "coerce", "downcast": "integer"}, {"dtype": np.int8}),
|
||||
],
|
||||
)
|
||||
def test_empty(input_kwargs, result_kwargs):
|
||||
# see gh-16302
|
||||
ser = Series([], dtype=object)
|
||||
result = to_numeric(ser, **input_kwargs)
|
||||
|
||||
expected = Series([], **result_kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
|
||||
)
|
||||
@pytest.mark.parametrize("last_val", ["7", 7])
|
||||
def test_series(last_val, infer_string):
|
||||
with option_context("future.infer_string", infer_string):
|
||||
ser = Series(["1", "-3.14", last_val])
|
||||
result = to_numeric(ser)
|
||||
|
||||
expected = Series([1, -3.14, 7])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[1, 3, 4, 5],
|
||||
[1.0, 3.0, 4.0, 5.0],
|
||||
# Bool is regarded as numeric.
|
||||
[True, False, True, True],
|
||||
],
|
||||
)
|
||||
def test_series_numeric(data):
|
||||
ser = Series(data, index=list("ABCD"), name="EFG")
|
||||
|
||||
result = to_numeric(ser)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,msg",
|
||||
[
|
||||
([1, -3.14, "apple"], 'Unable to parse string "apple" at position 2'),
|
||||
(
|
||||
["orange", 1, -3.14, "apple"],
|
||||
'Unable to parse string "orange" at position 0',
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_error(data, msg):
|
||||
ser = Series(data)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_numeric(ser, errors="raise")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])]
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_ignore_error(errors, exp_data):
|
||||
ser = Series([1, -3.14, "apple"])
|
||||
result = to_numeric(ser, errors=errors)
|
||||
|
||||
expected = Series(exp_data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"errors,exp",
|
||||
[
|
||||
("raise", 'Unable to parse string "apple" at position 2'),
|
||||
("ignore", [True, False, "apple"]),
|
||||
# Coerces to float.
|
||||
("coerce", [1.0, 0.0, np.nan]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_bool_handling(errors, exp):
|
||||
ser = Series([True, False, "apple"])
|
||||
|
||||
if isinstance(exp, str):
|
||||
with pytest.raises(ValueError, match=exp):
|
||||
to_numeric(ser, errors=errors)
|
||||
else:
|
||||
result = to_numeric(ser, errors=errors)
|
||||
expected = Series(exp)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_list():
|
||||
ser = ["1", "-3.14", "7"]
|
||||
res = to_numeric(ser)
|
||||
|
||||
expected = np.array([1, -3.14, 7])
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,arr_kwargs",
|
||||
[
|
||||
([1, 3, 4, 5], {"dtype": np.int64}),
|
||||
([1.0, 3.0, 4.0, 5.0], {}),
|
||||
# Boolean is regarded as numeric.
|
||||
([True, False, True, True], {}),
|
||||
],
|
||||
)
|
||||
def test_list_numeric(data, arr_kwargs):
|
||||
result = to_numeric(data)
|
||||
expected = np.array(data, **arr_kwargs)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kwargs", [{"dtype": "O"}, {}])
|
||||
def test_numeric(kwargs):
|
||||
data = [1, -3.14, 7]
|
||||
|
||||
ser = Series(data, **kwargs)
|
||||
result = to_numeric(ser)
|
||||
|
||||
expected = Series(data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[
|
||||
# One column.
|
||||
"a",
|
||||
# Multiple columns.
|
||||
["a", "b"],
|
||||
],
|
||||
)
|
||||
def test_numeric_df_columns(columns):
|
||||
# see gh-14827
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), "0.1"],
|
||||
"b": [1.0, 2.0, 3.0, 4.0],
|
||||
}
|
||||
)
|
||||
|
||||
expected = DataFrame({"a": [1.2, 3.14, np.inf, 0.1], "b": [1.0, 2.0, 3.0, 4.0]})
|
||||
|
||||
df_copy = df.copy()
|
||||
df_copy[columns] = df_copy[columns].apply(to_numeric)
|
||||
|
||||
tm.assert_frame_equal(df_copy, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,exp_data",
|
||||
[
|
||||
(
|
||||
[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1],
|
||||
[[3.14, 1.0], 1.6, 0.1],
|
||||
),
|
||||
([np.array([decimal.Decimal(3.14), 1.0]), 0.1], [[3.14, 1.0], 0.1]),
|
||||
],
|
||||
)
|
||||
def test_numeric_embedded_arr_likes(data, exp_data):
|
||||
# Test to_numeric with embedded lists and arrays
|
||||
df = DataFrame({"a": data})
|
||||
df["a"] = df["a"].apply(to_numeric)
|
||||
|
||||
expected = DataFrame({"a": exp_data})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_all_nan():
|
||||
ser = Series(["a", "b", "c"])
|
||||
result = to_numeric(ser, errors="coerce")
|
||||
|
||||
expected = Series([np.nan, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_type_check(errors):
|
||||
# see gh-11776
|
||||
df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
|
||||
kwargs = {"errors": errors} if errors is not None else {}
|
||||
with pytest.raises(TypeError, match="1-d array"):
|
||||
to_numeric(df, **kwargs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.1, 20001])
|
||||
def test_scalar(val, signed, transform):
|
||||
val = -val if signed else val
|
||||
assert to_numeric(transform(val)) == float(val)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_really_large_scalar(large_val, signed, transform, errors):
|
||||
# see gh-24910
|
||||
kwargs = {"errors": errors} if errors is not None else {}
|
||||
val = -large_val if signed else large_val
|
||||
|
||||
val = transform(val)
|
||||
val_is_string = isinstance(val, str)
|
||||
|
||||
if val_is_string and errors in (None, "raise"):
|
||||
msg = "Integer out of range. at position 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_numeric(val, **kwargs)
|
||||
else:
|
||||
expected = float(val) if (errors == "coerce" and val_is_string) else val
|
||||
tm.assert_almost_equal(to_numeric(val, **kwargs), expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors):
|
||||
# see gh-24910
|
||||
kwargs = {"errors": errors} if errors is not None else {}
|
||||
val = -large_val if signed else large_val
|
||||
val = transform(val)
|
||||
|
||||
extra_elt = "string"
|
||||
arr = [val] + multiple_elts * [extra_elt]
|
||||
|
||||
val_is_string = isinstance(val, str)
|
||||
coercing = errors == "coerce"
|
||||
|
||||
if errors in (None, "raise") and (val_is_string or multiple_elts):
|
||||
if val_is_string:
|
||||
msg = "Integer out of range. at position 0"
|
||||
else:
|
||||
msg = 'Unable to parse string "string" at position 1'
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_numeric(arr, **kwargs)
|
||||
else:
|
||||
result = to_numeric(arr, **kwargs)
|
||||
|
||||
exp_val = float(val) if (coercing and val_is_string) else val
|
||||
expected = [exp_val]
|
||||
|
||||
if multiple_elts:
|
||||
if coercing:
|
||||
expected.append(np.nan)
|
||||
exp_dtype = float
|
||||
else:
|
||||
expected.append(extra_elt)
|
||||
exp_dtype = object
|
||||
else:
|
||||
exp_dtype = float if isinstance(exp_val, (int, float)) else object
|
||||
|
||||
tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype))
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors):
|
||||
# see gh-24910
|
||||
#
|
||||
# Even if we discover that we have to hold float, does not mean
|
||||
# we should be lenient on subsequent elements that fail to be integer.
|
||||
kwargs = {"errors": errors} if errors is not None else {}
|
||||
arr = [str(-large_val if signed else large_val)]
|
||||
|
||||
if multiple_elts:
|
||||
arr.insert(0, large_val)
|
||||
|
||||
if errors in (None, "raise"):
|
||||
index = int(multiple_elts)
|
||||
msg = f"Integer out of range. at position {index}"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_numeric(arr, **kwargs)
|
||||
else:
|
||||
result = to_numeric(arr, **kwargs)
|
||||
|
||||
if errors == "coerce":
|
||||
expected = [float(i) for i in arr]
|
||||
exp_dtype = float
|
||||
else:
|
||||
expected = arr
|
||||
exp_dtype = object
|
||||
|
||||
tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"errors,checker",
|
||||
[
|
||||
("raise", 'Unable to parse string "fail" at position 0'),
|
||||
("ignore", lambda x: x == "fail"),
|
||||
("coerce", lambda x: np.isnan(x)),
|
||||
],
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_scalar_fail(errors, checker):
|
||||
scalar = "fail"
|
||||
|
||||
if isinstance(checker, str):
|
||||
with pytest.raises(ValueError, match=checker):
|
||||
to_numeric(scalar, errors=errors)
|
||||
else:
|
||||
assert checker(to_numeric(scalar, errors=errors))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data", [[1, 2, 3], [1.0, np.nan, 3, np.nan]])
|
||||
def test_numeric_dtypes(data, transform_assert_equal):
|
||||
transform, assert_equal = transform_assert_equal
|
||||
data = transform(data)
|
||||
|
||||
result = to_numeric(data)
|
||||
assert_equal(result, data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,exp",
|
||||
[
|
||||
(["1", "2", "3"], np.array([1, 2, 3], dtype="int64")),
|
||||
(["1.5", "2.7", "3.4"], np.array([1.5, 2.7, 3.4])),
|
||||
],
|
||||
)
|
||||
def test_str(data, exp, transform_assert_equal):
|
||||
transform, assert_equal = transform_assert_equal
|
||||
result = to_numeric(transform(data))
|
||||
|
||||
expected = transform(exp)
|
||||
assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_datetime_like(tz_naive_fixture, transform_assert_equal):
|
||||
transform, assert_equal = transform_assert_equal
|
||||
idx = pd.date_range("20130101", periods=3, tz=tz_naive_fixture)
|
||||
|
||||
result = to_numeric(transform(idx))
|
||||
expected = transform(idx.asi8)
|
||||
assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_timedelta(transform_assert_equal):
|
||||
transform, assert_equal = transform_assert_equal
|
||||
idx = pd.timedelta_range("1 days", periods=3, freq="D")
|
||||
|
||||
result = to_numeric(transform(idx))
|
||||
expected = transform(idx.asi8)
|
||||
assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_period(request, transform_assert_equal):
|
||||
transform, assert_equal = transform_assert_equal
|
||||
|
||||
idx = pd.period_range("2011-01", periods=3, freq="M", name="")
|
||||
inp = transform(idx)
|
||||
|
||||
if not isinstance(inp, Index):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="Missing PeriodDtype support in to_numeric")
|
||||
)
|
||||
result = to_numeric(inp)
|
||||
expected = transform(idx.asi8)
|
||||
assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"errors,expected",
|
||||
[
|
||||
("raise", "Invalid object type at position 0"),
|
||||
("ignore", Series([[10.0, 2], 1.0, "apple"])),
|
||||
("coerce", Series([np.nan, 1.0, np.nan])),
|
||||
],
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_non_hashable(errors, expected):
|
||||
# see gh-13324
|
||||
ser = Series([[10.0, 2], 1.0, "apple"])
|
||||
|
||||
if isinstance(expected, str):
|
||||
with pytest.raises(TypeError, match=expected):
|
||||
to_numeric(ser, errors=errors)
|
||||
else:
|
||||
result = to_numeric(ser, errors=errors)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_downcast_invalid_cast():
|
||||
# see gh-13352
|
||||
data = ["1", 2, 3]
|
||||
invalid_downcast = "unsigned-integer"
|
||||
msg = "invalid downcasting method provided"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_numeric(data, downcast=invalid_downcast)
|
||||
|
||||
|
||||
def test_errors_invalid_value():
|
||||
# see gh-26466
|
||||
data = ["1", 2, 3]
|
||||
invalid_error_value = "invalid"
|
||||
msg = "invalid error value specified"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_numeric(data, errors=invalid_error_value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
["1", 2, 3],
|
||||
[1, 2, 3],
|
||||
np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs,exp_dtype",
|
||||
[
|
||||
# Basic function tests.
|
||||
({}, np.int64),
|
||||
({"downcast": None}, np.int64),
|
||||
# Support below np.float32 is rare and far between.
|
||||
({"downcast": "float"}, np.dtype(np.float32).char),
|
||||
# Basic dtype support.
|
||||
({"downcast": "unsigned"}, np.dtype(np.typecodes["UnsignedInteger"][0])),
|
||||
],
|
||||
)
|
||||
def test_downcast_basic(data, kwargs, exp_dtype):
|
||||
# see gh-13352
|
||||
result = to_numeric(data, **kwargs)
|
||||
expected = np.array([1, 2, 3], dtype=exp_dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("signed_downcast", ["integer", "signed"])
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
["1", 2, 3],
|
||||
[1, 2, 3],
|
||||
np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"),
|
||||
],
|
||||
)
|
||||
def test_signed_downcast(data, signed_downcast):
|
||||
# see gh-13352
|
||||
smallest_int_dtype = np.dtype(np.typecodes["Integer"][0])
|
||||
expected = np.array([1, 2, 3], dtype=smallest_int_dtype)
|
||||
|
||||
res = to_numeric(data, downcast=signed_downcast)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
|
||||
def test_ignore_downcast_invalid_data():
|
||||
# If we can't successfully cast the given
|
||||
# data to a numeric dtype, do not bother
|
||||
# with the downcast parameter.
|
||||
data = ["foo", 2, 3]
|
||||
expected = np.array(data, dtype=object)
|
||||
|
||||
msg = "errors='ignore' is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = to_numeric(data, errors="ignore", downcast="unsigned")
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
|
||||
def test_ignore_downcast_neg_to_unsigned():
|
||||
# Cannot cast to an unsigned integer
|
||||
# because we have a negative number.
|
||||
data = ["-1", 2, 3]
|
||||
expected = np.array([-1, 2, 3], dtype=np.int64)
|
||||
|
||||
res = to_numeric(data, downcast="unsigned")
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
|
||||
# Warning in 32 bit platforms
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
|
||||
@pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"])
|
||||
@pytest.mark.parametrize(
|
||||
"data,expected",
|
||||
[
|
||||
(["1.1", 2, 3], np.array([1.1, 2, 3], dtype=np.float64)),
|
||||
(
|
||||
[10000.0, 20000, 3000, 40000.36, 50000, 50000.00],
|
||||
np.array(
|
||||
[10000.0, 20000, 3000, 40000.36, 50000, 50000.00], dtype=np.float64
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ignore_downcast_cannot_convert_float(data, expected, downcast):
|
||||
# Cannot cast to an integer (signed or unsigned)
|
||||
# because we have a float number.
|
||||
res = to_numeric(data, downcast=downcast)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"downcast,expected_dtype",
|
||||
[("integer", np.int16), ("signed", np.int16), ("unsigned", np.uint16)],
|
||||
)
|
||||
def test_downcast_not8bit(downcast, expected_dtype):
|
||||
# the smallest integer dtype need not be np.(u)int8
|
||||
data = ["256", 257, 258]
|
||||
|
||||
expected = np.array([256, 257, 258], dtype=expected_dtype)
|
||||
res = to_numeric(data, downcast=downcast)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,downcast,min_max",
|
||||
[
|
||||
("int8", "integer", [iinfo(np.int8).min, iinfo(np.int8).max]),
|
||||
("int16", "integer", [iinfo(np.int16).min, iinfo(np.int16).max]),
|
||||
("int32", "integer", [iinfo(np.int32).min, iinfo(np.int32).max]),
|
||||
("int64", "integer", [iinfo(np.int64).min, iinfo(np.int64).max]),
|
||||
("uint8", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max]),
|
||||
("uint16", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max]),
|
||||
("uint32", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max]),
|
||||
("uint64", "unsigned", [iinfo(np.uint64).min, iinfo(np.uint64).max]),
|
||||
("int16", "integer", [iinfo(np.int8).min, iinfo(np.int8).max + 1]),
|
||||
("int32", "integer", [iinfo(np.int16).min, iinfo(np.int16).max + 1]),
|
||||
("int64", "integer", [iinfo(np.int32).min, iinfo(np.int32).max + 1]),
|
||||
("int16", "integer", [iinfo(np.int8).min - 1, iinfo(np.int16).max]),
|
||||
("int32", "integer", [iinfo(np.int16).min - 1, iinfo(np.int32).max]),
|
||||
("int64", "integer", [iinfo(np.int32).min - 1, iinfo(np.int64).max]),
|
||||
("uint16", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]),
|
||||
("uint32", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]),
|
||||
("uint64", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]),
|
||||
],
|
||||
)
|
||||
def test_downcast_limits(dtype, downcast, min_max):
|
||||
# see gh-14404: test the limits of each downcast.
|
||||
series = to_numeric(Series(min_max), downcast=downcast)
|
||||
assert series.dtype == dtype
|
||||
|
||||
|
||||
def test_downcast_float64_to_float32():
|
||||
# GH-43693: Check float64 preservation when >= 16,777,217
|
||||
series = Series([16777217.0, np.finfo(np.float64).max, np.nan], dtype=np.float64)
|
||||
result = to_numeric(series, downcast="float")
|
||||
|
||||
assert series.dtype == result.dtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser,expected",
|
||||
[
|
||||
(
|
||||
Series([0, 9223372036854775808]),
|
||||
Series([0, 9223372036854775808], dtype=np.uint64),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_downcast_uint64(ser, expected):
|
||||
# see gh-14422:
|
||||
# BUG: to_numeric doesn't work uint64 numbers
|
||||
|
||||
result = to_numeric(ser, downcast="unsigned")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,exp_data",
|
||||
[
|
||||
(
|
||||
[200, 300, "", "NaN", 30000000000000000000],
|
||||
[200, 300, np.nan, np.nan, 30000000000000000000],
|
||||
),
|
||||
(
|
||||
["12345678901234567890", "1234567890", "ITEM"],
|
||||
[12345678901234567890, 1234567890, np.nan],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_coerce_uint64_conflict(data, exp_data):
|
||||
# see gh-17007 and gh-17125
|
||||
#
|
||||
# Still returns float despite the uint64-nan conflict,
|
||||
# which would normally force the casting to object.
|
||||
result = to_numeric(Series(data), errors="coerce")
|
||||
expected = Series(exp_data, dtype=float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"errors,exp",
|
||||
[
|
||||
("ignore", Series(["12345678901234567890", "1234567890", "ITEM"])),
|
||||
("raise", "Unable to parse string"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_non_coerce_uint64_conflict(errors, exp):
|
||||
# see gh-17007 and gh-17125
|
||||
#
|
||||
# For completeness.
|
||||
ser = Series(["12345678901234567890", "1234567890", "ITEM"])
|
||||
|
||||
if isinstance(exp, str):
|
||||
with pytest.raises(ValueError, match=exp):
|
||||
to_numeric(ser, errors=errors)
|
||||
else:
|
||||
result = to_numeric(ser, errors=errors)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"])
|
||||
@pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"])
|
||||
def test_downcast_empty(dc1, dc2):
|
||||
# GH32493
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
to_numeric([], downcast=dc1),
|
||||
to_numeric([], downcast=dc2),
|
||||
check_dtype=False,
|
||||
)
|
||||
|
||||
|
||||
def test_failure_to_convert_uint64_string_to_NaN():
|
||||
# GH 32394
|
||||
result = to_numeric("uint64", errors="coerce")
|
||||
assert np.isnan(result)
|
||||
|
||||
ser = Series([32, 64, np.nan])
|
||||
result = to_numeric(Series(["32", "64", "uint64"]), errors="coerce")
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"strrep",
|
||||
[
|
||||
"243.164",
|
||||
"245.968",
|
||||
"249.585",
|
||||
"259.745",
|
||||
"265.742",
|
||||
"272.567",
|
||||
"279.196",
|
||||
"280.366",
|
||||
"275.034",
|
||||
"271.351",
|
||||
"272.889",
|
||||
"270.627",
|
||||
"280.828",
|
||||
"290.383",
|
||||
"308.153",
|
||||
"319.945",
|
||||
"336.0",
|
||||
"344.09",
|
||||
"351.385",
|
||||
"356.178",
|
||||
"359.82",
|
||||
"361.03",
|
||||
"367.701",
|
||||
"380.812",
|
||||
"387.98",
|
||||
"391.749",
|
||||
"391.171",
|
||||
"385.97",
|
||||
"385.345",
|
||||
"386.121",
|
||||
"390.996",
|
||||
"399.734",
|
||||
"413.073",
|
||||
"421.532",
|
||||
"430.221",
|
||||
"437.092",
|
||||
"439.746",
|
||||
"446.01",
|
||||
"451.191",
|
||||
"460.463",
|
||||
"469.779",
|
||||
"472.025",
|
||||
"479.49",
|
||||
"474.864",
|
||||
"467.54",
|
||||
"471.978",
|
||||
],
|
||||
)
|
||||
def test_precision_float_conversion(strrep):
|
||||
# GH 31364
|
||||
result = to_numeric(strrep)
|
||||
|
||||
assert result == float(strrep)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected",
|
||||
[
|
||||
(["1", "2", None], Series([1, 2, np.nan], dtype="Int64")),
|
||||
(["1", "2", "3"], Series([1, 2, 3], dtype="Int64")),
|
||||
(["1", "2", 3], Series([1, 2, 3], dtype="Int64")),
|
||||
(["1", "2", 3.5], Series([1, 2, 3.5], dtype="Float64")),
|
||||
(["1", None, 3.5], Series([1, np.nan, 3.5], dtype="Float64")),
|
||||
(["1", "2", "3.5"], Series([1, 2, 3.5], dtype="Float64")),
|
||||
],
|
||||
)
|
||||
def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected):
|
||||
# https://github.com/pandas-dev/pandas/issues/37262
|
||||
s = Series(values, dtype=nullable_string_dtype)
|
||||
result = to_numeric(s)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numeric_from_nullable_string_coerce(nullable_string_dtype):
|
||||
# GH#52146
|
||||
values = ["a", "1"]
|
||||
ser = Series(values, dtype=nullable_string_dtype)
|
||||
result = to_numeric(ser, errors="coerce")
|
||||
expected = Series([pd.NA, 1], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numeric_from_nullable_string_ignore(nullable_string_dtype):
|
||||
# GH#52146
|
||||
values = ["a", "1"]
|
||||
ser = Series(values, dtype=nullable_string_dtype)
|
||||
expected = ser.copy()
|
||||
msg = "errors='ignore' is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = to_numeric(ser, errors="ignore")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, input_dtype, downcast, expected_dtype",
|
||||
(
|
||||
([1, 1], "Int64", "integer", "Int8"),
|
||||
([1.0, pd.NA], "Float64", "integer", "Int8"),
|
||||
([1.0, 1.1], "Float64", "integer", "Float64"),
|
||||
([1, pd.NA], "Int64", "integer", "Int8"),
|
||||
([450, 300], "Int64", "integer", "Int16"),
|
||||
([1, 1], "Float64", "integer", "Int8"),
|
||||
([np.iinfo(np.int64).max - 1, 1], "Int64", "integer", "Int64"),
|
||||
([1, 1], "Int64", "signed", "Int8"),
|
||||
([1.0, 1.0], "Float32", "signed", "Int8"),
|
||||
([1.0, 1.1], "Float64", "signed", "Float64"),
|
||||
([1, pd.NA], "Int64", "signed", "Int8"),
|
||||
([450, -300], "Int64", "signed", "Int16"),
|
||||
([np.iinfo(np.uint64).max - 1, 1], "UInt64", "signed", "UInt64"),
|
||||
([1, 1], "Int64", "unsigned", "UInt8"),
|
||||
([1.0, 1.0], "Float32", "unsigned", "UInt8"),
|
||||
([1.0, 1.1], "Float64", "unsigned", "Float64"),
|
||||
([1, pd.NA], "Int64", "unsigned", "UInt8"),
|
||||
([450, -300], "Int64", "unsigned", "Int64"),
|
||||
([-1, -1], "Int32", "unsigned", "Int32"),
|
||||
([1, 1], "Float64", "float", "Float32"),
|
||||
([1, 1.1], "Float64", "float", "Float32"),
|
||||
([1, 1], "Float32", "float", "Float32"),
|
||||
([1, 1.1], "Float32", "float", "Float32"),
|
||||
),
|
||||
)
|
||||
def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype):
|
||||
arr = pd.array(data, dtype=input_dtype)
|
||||
result = to_numeric(arr, downcast=downcast)
|
||||
expected = pd.array(data, dtype=expected_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_downcast_nullable_mask_is_copied():
|
||||
# GH38974
|
||||
|
||||
arr = pd.array([1, 2, pd.NA], dtype="Int64")
|
||||
|
||||
result = to_numeric(arr, downcast="integer")
|
||||
expected = pd.array([1, 2, pd.NA], dtype="Int8")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
arr[1] = pd.NA # should not modify result
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numeric_scientific_notation():
|
||||
# GH 15898
|
||||
result = to_numeric("1.7e+308")
|
||||
expected = np.float64(1.7e308)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [9876543210.0, 2.0**128])
|
||||
def test_to_numeric_large_float_not_downcast_to_float_32(val):
|
||||
# GH 19729
|
||||
expected = Series([val])
|
||||
result = to_numeric(expected, downcast="float")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val, dtype", [(1, "Int64"), (1.5, "Float64"), (True, "boolean")]
|
||||
)
|
||||
def test_to_numeric_dtype_backend(val, dtype):
|
||||
# GH#50505
|
||||
ser = Series([val], dtype=object)
|
||||
result = to_numeric(ser, dtype_backend="numpy_nullable")
|
||||
expected = Series([val], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val, dtype",
|
||||
[
|
||||
(1, "Int64"),
|
||||
(1.5, "Float64"),
|
||||
(True, "boolean"),
|
||||
(1, "int64[pyarrow]"),
|
||||
(1.5, "float64[pyarrow]"),
|
||||
(True, "bool[pyarrow]"),
|
||||
],
|
||||
)
|
||||
def test_to_numeric_dtype_backend_na(val, dtype):
|
||||
# GH#50505
|
||||
if "pyarrow" in dtype:
|
||||
pytest.importorskip("pyarrow")
|
||||
dtype_backend = "pyarrow"
|
||||
else:
|
||||
dtype_backend = "numpy_nullable"
|
||||
ser = Series([val, None], dtype=object)
|
||||
result = to_numeric(ser, dtype_backend=dtype_backend)
|
||||
expected = Series([val, pd.NA], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val, dtype, downcast",
|
||||
[
|
||||
(1, "Int8", "integer"),
|
||||
(1.5, "Float32", "float"),
|
||||
(1, "Int8", "signed"),
|
||||
(1, "int8[pyarrow]", "integer"),
|
||||
(1.5, "float[pyarrow]", "float"),
|
||||
(1, "int8[pyarrow]", "signed"),
|
||||
],
|
||||
)
|
||||
def test_to_numeric_dtype_backend_downcasting(val, dtype, downcast):
|
||||
# GH#50505
|
||||
if "pyarrow" in dtype:
|
||||
pytest.importorskip("pyarrow")
|
||||
dtype_backend = "pyarrow"
|
||||
else:
|
||||
dtype_backend = "numpy_nullable"
|
||||
ser = Series([val, None], dtype=object)
|
||||
result = to_numeric(ser, dtype_backend=dtype_backend, downcast=downcast)
|
||||
expected = Series([val, pd.NA], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"smaller, dtype_backend",
|
||||
[["UInt8", "numpy_nullable"], ["uint8[pyarrow]", "pyarrow"]],
|
||||
)
|
||||
def test_to_numeric_dtype_backend_downcasting_uint(smaller, dtype_backend):
|
||||
# GH#50505
|
||||
if dtype_backend == "pyarrow":
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = Series([1, pd.NA], dtype="UInt64")
|
||||
result = to_numeric(ser, dtype_backend=dtype_backend, downcast="unsigned")
|
||||
expected = Series([1, pd.NA], dtype=smaller)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
"Int64",
|
||||
"UInt64",
|
||||
"Float64",
|
||||
"boolean",
|
||||
"int64[pyarrow]",
|
||||
"uint64[pyarrow]",
|
||||
"float64[pyarrow]",
|
||||
"bool[pyarrow]",
|
||||
],
|
||||
)
|
||||
def test_to_numeric_dtype_backend_already_nullable(dtype):
|
||||
# GH#50505
|
||||
if "pyarrow" in dtype:
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = Series([1, pd.NA], dtype=dtype)
|
||||
result = to_numeric(ser, dtype_backend="numpy_nullable")
|
||||
expected = Series([1, pd.NA], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numeric_dtype_backend_error(dtype_backend):
|
||||
# GH#50505
|
||||
ser = Series(["a", "b", ""])
|
||||
expected = ser.copy()
|
||||
with pytest.raises(ValueError, match="Unable to parse string"):
|
||||
to_numeric(ser, dtype_backend=dtype_backend)
|
||||
|
||||
msg = "errors='ignore' is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = to_numeric(ser, dtype_backend=dtype_backend, errors="ignore")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = to_numeric(ser, dtype_backend=dtype_backend, errors="coerce")
|
||||
if dtype_backend == "pyarrow":
|
||||
dtype = "double[pyarrow]"
|
||||
else:
|
||||
dtype = "Float64"
|
||||
expected = Series([np.nan, np.nan, np.nan], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_invalid_dtype_backend():
|
||||
ser = Series([1, 2, 3])
|
||||
msg = (
|
||||
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
|
||||
"'pyarrow' are allowed."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_numeric(ser, dtype_backend="numpy")
|
||||
|
||||
|
||||
def test_coerce_pyarrow_backend():
|
||||
# GH 52588
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
ser = Series(list("12x"), dtype=ArrowDtype(pa.string()))
|
||||
result = to_numeric(ser, errors="coerce", dtype_backend="pyarrow")
|
||||
expected = Series([1, 2, None], dtype=ArrowDtype(pa.int64()))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
from datetime import time
|
||||
import locale
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
from pandas.core.tools.times import to_time
|
||||
|
||||
# The tests marked with this are locale-dependent.
|
||||
# They pass, except when the machine locale is zh_CN or it_IT.
|
||||
fails_on_non_english = pytest.mark.xfail(
|
||||
locale.getlocale()[0] in ("zh_CN", "it_IT"),
|
||||
reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
|
||||
strict=False,
|
||||
)
|
||||
|
||||
|
||||
class TestToTime:
|
||||
@pytest.mark.parametrize(
|
||||
"time_string",
|
||||
[
|
||||
"14:15",
|
||||
"1415",
|
||||
pytest.param("2:15pm", marks=fails_on_non_english),
|
||||
pytest.param("0215pm", marks=fails_on_non_english),
|
||||
"14:15:00",
|
||||
"141500",
|
||||
pytest.param("2:15:00pm", marks=fails_on_non_english),
|
||||
pytest.param("021500pm", marks=fails_on_non_english),
|
||||
time(14, 15),
|
||||
],
|
||||
)
|
||||
def test_parsers_time(self, time_string):
|
||||
# GH#11818
|
||||
assert to_time(time_string) == time(14, 15)
|
||||
|
||||
def test_odd_format(self):
|
||||
new_string = "14.15"
|
||||
msg = r"Cannot convert arg \['14\.15'\] to a time"
|
||||
if not PY311:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_time(new_string)
|
||||
assert to_time(new_string, format="%H.%M") == time(14, 15)
|
||||
|
||||
def test_arraylike(self):
|
||||
arg = ["14:15", "20:20"]
|
||||
expected_arr = [time(14, 15), time(20, 20)]
|
||||
assert to_time(arg) == expected_arr
|
||||
assert to_time(arg, format="%H:%M") == expected_arr
|
||||
assert to_time(arg, infer_time_format=True) == expected_arr
|
||||
assert to_time(arg, format="%I:%M%p", errors="coerce") == [None, None]
|
||||
|
||||
msg = "errors='ignore' is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = to_time(arg, format="%I:%M%p", errors="ignore")
|
||||
tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_))
|
||||
|
||||
msg = "Cannot convert.+to a time with given format"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_time(arg, format="%I:%M%p", errors="raise")
|
||||
|
||||
tm.assert_series_equal(
|
||||
to_time(Series(arg, name="test")), Series(expected_arr, name="test")
|
||||
)
|
||||
|
||||
res = to_time(np.array(arg))
|
||||
assert isinstance(res, list)
|
||||
assert res == expected_arr
|
||||
|
|
@ -0,0 +1,340 @@
|
|||
from datetime import (
|
||||
time,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import IS64
|
||||
from pandas.errors import OutOfBoundsTimedelta
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
isna,
|
||||
to_timedelta,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import TimedeltaArray
|
||||
|
||||
|
||||
class TestTimedeltas:
|
||||
def test_to_timedelta_dt64_raises(self):
|
||||
# Passing datetime64-dtype data to TimedeltaIndex is no longer
|
||||
# supported GH#29794
|
||||
msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"
|
||||
|
||||
ser = Series([pd.NaT])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
to_timedelta(ser)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.to_frame().apply(to_timedelta)
|
||||
|
||||
@pytest.mark.parametrize("readonly", [True, False])
|
||||
def test_to_timedelta_readonly(self, readonly):
|
||||
# GH#34857
|
||||
arr = np.array([], dtype=object)
|
||||
if readonly:
|
||||
arr.setflags(write=False)
|
||||
result = to_timedelta(arr)
|
||||
expected = to_timedelta([])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_null(self):
|
||||
result = to_timedelta(["", ""])
|
||||
assert isna(result).all()
|
||||
|
||||
def test_to_timedelta_same_np_timedelta64(self):
|
||||
# pass thru
|
||||
result = to_timedelta(np.array([np.timedelta64(1, "s")]))
|
||||
expected = pd.Index(np.array([np.timedelta64(1, "s")]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_series(self):
|
||||
# Series
|
||||
expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
|
||||
result = to_timedelta(Series(["1d", "1days 00:00:01"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_units(self):
|
||||
# with units
|
||||
result = TimedeltaIndex(
|
||||
[np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")]
|
||||
)
|
||||
expected = to_timedelta([0, 10], unit="s")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, unit",
|
||||
[
|
||||
["int64", "s"],
|
||||
["int64", "m"],
|
||||
["int64", "h"],
|
||||
["timedelta64[s]", "s"],
|
||||
["timedelta64[D]", "D"],
|
||||
],
|
||||
)
|
||||
def test_to_timedelta_units_dtypes(self, dtype, unit):
|
||||
# arrays of various dtypes
|
||||
arr = np.array([1] * 5, dtype=dtype)
|
||||
result = to_timedelta(arr, unit=unit)
|
||||
exp_dtype = "m8[ns]" if dtype == "int64" else "m8[s]"
|
||||
expected = TimedeltaIndex([np.timedelta64(1, unit)] * 5, dtype=exp_dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_oob_non_nano(self):
|
||||
arr = np.array([pd.NaT._value + 1], dtype="timedelta64[m]")
|
||||
|
||||
msg = (
|
||||
"Cannot convert -9223372036854775807 minutes to "
|
||||
r"timedelta64\[s\] without overflow"
|
||||
)
|
||||
with pytest.raises(OutOfBoundsTimedelta, match=msg):
|
||||
to_timedelta(arr)
|
||||
|
||||
with pytest.raises(OutOfBoundsTimedelta, match=msg):
|
||||
TimedeltaIndex(arr)
|
||||
|
||||
with pytest.raises(OutOfBoundsTimedelta, match=msg):
|
||||
TimedeltaArray._from_sequence(arr, dtype="m8[s]")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))]
|
||||
)
|
||||
@pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"])
|
||||
@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
|
||||
def test_to_timedelta_dataframe(self, arg, errors):
|
||||
# GH 11776
|
||||
with pytest.raises(TypeError, match="1-d array"):
|
||||
to_timedelta(arg, errors=errors)
|
||||
|
||||
def test_to_timedelta_invalid_errors(self):
|
||||
# bad value for errors parameter
|
||||
msg = "errors must be one of"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(["foo"], errors="never")
|
||||
|
||||
@pytest.mark.parametrize("arg", [[1, 2], 1])
|
||||
def test_to_timedelta_invalid_unit(self, arg):
|
||||
# these will error
|
||||
msg = "invalid unit abbreviation: foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(arg, unit="foo")
|
||||
|
||||
def test_to_timedelta_time(self):
|
||||
# time not supported ATM
|
||||
msg = (
|
||||
"Value must be Timedelta, string, integer, float, timedelta or convertible"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(time(second=1))
|
||||
assert to_timedelta(time(second=1), errors="coerce") is pd.NaT
|
||||
|
||||
def test_to_timedelta_bad_value(self):
|
||||
msg = "Could not convert 'foo' to NumPy timedelta"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(["foo", "bar"])
|
||||
|
||||
def test_to_timedelta_bad_value_coerce(self):
|
||||
tm.assert_index_equal(
|
||||
TimedeltaIndex([pd.NaT, pd.NaT]),
|
||||
to_timedelta(["foo", "bar"], errors="coerce"),
|
||||
)
|
||||
|
||||
tm.assert_index_equal(
|
||||
TimedeltaIndex(["1 day", pd.NaT, "1 min"]),
|
||||
to_timedelta(["1 day", "bar", "1 min"], errors="coerce"),
|
||||
)
|
||||
|
||||
def test_to_timedelta_invalid_errors_ignore(self):
|
||||
# gh-13613: these should not error because errors='ignore'
|
||||
msg = "errors='ignore' is deprecated"
|
||||
invalid_data = "apple"
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = to_timedelta(invalid_data, errors="ignore")
|
||||
assert invalid_data == result
|
||||
|
||||
invalid_data = ["apple", "1 days"]
|
||||
expected = np.array(invalid_data, dtype=object)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = to_timedelta(invalid_data, errors="ignore")
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
invalid_data = pd.Index(["apple", "1 days"])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = to_timedelta(invalid_data, errors="ignore")
|
||||
tm.assert_index_equal(invalid_data, result)
|
||||
|
||||
invalid_data = Series(["apple", "1 days"])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = to_timedelta(invalid_data, errors="ignore")
|
||||
tm.assert_series_equal(invalid_data, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val, errors",
|
||||
[
|
||||
("1M", True),
|
||||
("1 M", True),
|
||||
("1Y", True),
|
||||
("1 Y", True),
|
||||
("1y", True),
|
||||
("1 y", True),
|
||||
("1m", False),
|
||||
("1 m", False),
|
||||
("1 day", False),
|
||||
("2day", False),
|
||||
],
|
||||
)
|
||||
def test_unambiguous_timedelta_values(self, val, errors):
|
||||
# GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y'
|
||||
# in pd.to_timedelta
|
||||
msg = "Units 'M', 'Y' and 'y' do not represent unambiguous timedelta"
|
||||
if errors:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(val)
|
||||
else:
|
||||
# check it doesn't raise
|
||||
to_timedelta(val)
|
||||
|
||||
def test_to_timedelta_via_apply(self):
|
||||
# GH 5458
|
||||
expected = Series([np.timedelta64(1, "s")])
|
||||
result = Series(["00:00:01"]).apply(to_timedelta)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series([to_timedelta("00:00:01")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_inference_without_warning(self):
|
||||
# GH#41731 inference produces a warning in the Series constructor,
|
||||
# but _not_ in to_timedelta
|
||||
vals = ["00:00:01", pd.NaT]
|
||||
with tm.assert_produces_warning(None):
|
||||
result = to_timedelta(vals)
|
||||
|
||||
expected = TimedeltaIndex([pd.Timedelta(seconds=1), pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_on_missing_values(self):
|
||||
# GH5438
|
||||
timedelta_NaT = np.timedelta64("NaT")
|
||||
|
||||
actual = to_timedelta(Series(["00:00:01", np.nan]))
|
||||
expected = Series(
|
||||
[np.timedelta64(1000000000, "ns"), timedelta_NaT],
|
||||
dtype=f"{tm.ENDIAN}m8[ns]",
|
||||
)
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
ser = Series(["00:00:01", pd.NaT], dtype="m8[ns]")
|
||||
actual = to_timedelta(ser)
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
@pytest.mark.parametrize("val", [np.nan, pd.NaT, pd.NA])
|
||||
def test_to_timedelta_on_missing_values_scalar(self, val):
|
||||
actual = to_timedelta(val)
|
||||
assert actual._value == np.timedelta64("NaT").astype("int64")
|
||||
|
||||
@pytest.mark.parametrize("val", [np.nan, pd.NaT, pd.NA])
|
||||
def test_to_timedelta_on_missing_values_list(self, val):
|
||||
actual = to_timedelta([val])
|
||||
assert actual[0]._value == np.timedelta64("NaT").astype("int64")
|
||||
|
||||
@pytest.mark.xfail(not IS64, reason="Floating point error")
|
||||
def test_to_timedelta_float(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/25077
|
||||
arr = np.arange(0, 1, 1e-6)[-10:]
|
||||
result = to_timedelta(arr, unit="s")
|
||||
expected_asi8 = np.arange(999990000, 10**9, 1000, dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.asi8, expected_asi8)
|
||||
|
||||
def test_to_timedelta_coerce_strings_unit(self):
|
||||
arr = np.array([1, 2, "error"], dtype=object)
|
||||
result = to_timedelta(arr, unit="ns", errors="coerce")
|
||||
expected = to_timedelta([1, 2, pd.NaT], unit="ns")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_ignore_strings_unit(self):
|
||||
arr = np.array([1, 2, "error"], dtype=object)
|
||||
msg = "errors='ignore' is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = to_timedelta(arr, unit="ns", errors="ignore")
|
||||
tm.assert_numpy_array_equal(result, arr)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expected_val, result_val", [[timedelta(days=2), 2], [None, None]]
|
||||
)
|
||||
def test_to_timedelta_nullable_int64_dtype(self, expected_val, result_val):
|
||||
# GH 35574
|
||||
expected = Series([timedelta(days=1), expected_val])
|
||||
result = to_timedelta(Series([1, result_val], dtype="Int64"), unit="days")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("input", "expected"),
|
||||
[
|
||||
("8:53:08.71800000001", "8:53:08.718"),
|
||||
("8:53:08.718001", "8:53:08.718001"),
|
||||
("8:53:08.7180000001", "8:53:08.7180000001"),
|
||||
("-8:53:08.71800000001", "-8:53:08.718"),
|
||||
("8:53:08.7180000089", "8:53:08.718000008"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("func", [pd.Timedelta, to_timedelta])
|
||||
def test_to_timedelta_precision_over_nanos(self, input, expected, func):
|
||||
# GH: 36738
|
||||
expected = pd.Timedelta(expected)
|
||||
result = func(input)
|
||||
assert result == expected
|
||||
|
||||
def test_to_timedelta_zerodim(self, fixed_now_ts):
|
||||
# ndarray.item() incorrectly returns int for dt64[ns] and td64[ns]
|
||||
dt64 = fixed_now_ts.to_datetime64()
|
||||
arg = np.array(dt64)
|
||||
|
||||
msg = (
|
||||
"Value must be Timedelta, string, integer, float, timedelta "
|
||||
"or convertible, not datetime64"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
to_timedelta(arg)
|
||||
|
||||
arg2 = arg.view("m8[ns]")
|
||||
result = to_timedelta(arg2)
|
||||
assert isinstance(result, pd.Timedelta)
|
||||
assert result._value == dt64.view("i8")
|
||||
|
||||
def test_to_timedelta_numeric_ea(self, any_numeric_ea_dtype):
|
||||
# GH#48796
|
||||
ser = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
|
||||
result = to_timedelta(ser)
|
||||
expected = Series([pd.Timedelta(1, unit="ns"), pd.NaT])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_to_timedelta_fraction(self):
|
||||
result = to_timedelta(1.0 / 3, unit="h")
|
||||
expected = pd.Timedelta("0 days 00:19:59.999999998")
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):
|
||||
# GH 52425
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = Series([1, 2], dtype=f"{any_numeric_ea_dtype.lower()}[pyarrow]")
|
||||
result = to_timedelta(ser)
|
||||
expected = Series([1, 2], dtype="timedelta64[ns]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("unit", ["ns", "ms"])
|
||||
def test_from_timedelta_arrow_dtype(unit):
|
||||
# GH 54298
|
||||
pytest.importorskip("pyarrow")
|
||||
expected = Series([timedelta(1)], dtype=f"duration[{unit}][pyarrow]")
|
||||
result = to_timedelta(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
Loading…
Add table
Add a link
Reference in a new issue