Skip to content

Commit 5d9b64e

Browse files
committed
Add timedelta[s|ms|us|ns] numpy types
1 parent 5a654d3 commit 5d9b64e

8 files changed

Lines changed: 68 additions & 28 deletions

File tree

src/duckdb_py/include/duckdb_python/numpy/numpy_type.hpp

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,28 @@ namespace duckdb {
1818
// Pandas Specific Types (e.g., categorical, datetime_tz,...)
1919
enum class NumpyNullableType : uint8_t {
2020
//! NumPy dtypes
21-
BOOL, //! bool_, bool8
22-
INT_8, //! byte, int8
23-
UINT_8, //! ubyte, uint8
24-
INT_16, //! int16, short
25-
UINT_16, //! uint16, ushort
26-
INT_32, //! int32, intc
27-
UINT_32, //! uint32, uintc,
28-
INT_64, //! int64, int0, int_, intp, matrix
29-
UINT_64, //! uint64, uint, uint0, uintp
30-
FLOAT_16, //! float16, half
31-
FLOAT_32, //! float32, single
32-
FLOAT_64, //! float64, float_, double
33-
OBJECT, //! object
34-
UNICODE, //! <U1, unicode_, str_, str0
35-
DATETIME_S, //! datetime64[s], <M8[s]
36-
DATETIME_MS, //! datetime64[ms], <M8[ms]
37-
DATETIME_NS, //! datetime64[ns], <M8[ns]
38-
DATETIME_US, //! datetime64[us], <M8[us]
39-
TIMEDELTA, //! timedelta64[D], timedelta64
21+
BOOL, //! bool_, bool8
22+
INT_8, //! byte, int8
23+
UINT_8, //! ubyte, uint8
24+
INT_16, //! int16, short
25+
UINT_16, //! uint16, ushort
26+
INT_32, //! int32, intc
27+
UINT_32, //! uint32, uintc,
28+
INT_64, //! int64, int0, int_, intp, matrix
29+
UINT_64, //! uint64, uint, uint0, uintp
30+
FLOAT_16, //! float16, half
31+
FLOAT_32, //! float32, single
32+
FLOAT_64, //! float64, float_, double
33+
OBJECT, //! object
34+
UNICODE, //! <U1, unicode_, str_, str0
35+
DATETIME_S, //! datetime64[s], <M8[s]
36+
DATETIME_MS, //! datetime64[ms], <M8[ms]
37+
DATETIME_NS, //! datetime64[ns], <M8[ns]
38+
DATETIME_US, //! datetime64[us], <M8[us]
39+
TIMEDELTA_NS, //! timedelta64[ns]
40+
TIMEDELTA_US, //! timedelta64[us]
41+
TIMEDELTA_MS, //! timedelta64[ms]
42+
TIMEDELTA_S, //! timedelta64[s]
4043

4144
//! ------------------------------------------------------------
4245
//! Extension Types

src/duckdb_py/numpy/array_wrapper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ struct IntervalConvert {
112112
template <class DUCKDB_T, class NUMPY_T>
113113
static int64_t ConvertValue(interval_t val, NumpyAppendData &append_data) {
114114
(void)append_data;
115-
return Interval::GetNanoseconds(val);
115+
return Interval::GetMicro(val);
116116
}
117117

118118
template <class NUMPY_T, bool PANDAS>

src/duckdb_py/numpy/numpy_scan.cpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,10 @@ void NumpyScan::Scan(PandasColumnBindData &bind_data, idx_t count, idx_t offset,
302302
}
303303
break;
304304
}
305-
case NumpyNullableType::TIMEDELTA: {
305+
case NumpyNullableType::TIMEDELTA_NS:
306+
case NumpyNullableType::TIMEDELTA_US:
307+
case NumpyNullableType::TIMEDELTA_MS:
308+
case NumpyNullableType::TIMEDELTA_S: {
306309
auto src_ptr = reinterpret_cast<const int64_t *>(array.data());
307310
auto tgt_ptr = FlatVector::GetData<interval_t>(out);
308311
auto &mask = FlatVector::Validity(out);
@@ -314,7 +317,25 @@ void NumpyScan::Scan(PandasColumnBindData &bind_data, idx_t count, idx_t offset,
314317
mask.SetInvalid(row);
315318
continue;
316319
}
317-
int64_t micro = src_ptr[source_idx] / 1000;
320+
321+
int64_t micro;
322+
switch (bind_data.numpy_type.type) {
323+
case NumpyNullableType::TIMEDELTA_NS:
324+
micro = src_ptr[source_idx] / 1000; // ns -> us
325+
break;
326+
case NumpyNullableType::TIMEDELTA_US:
327+
micro = src_ptr[source_idx]; // already us
328+
break;
329+
case NumpyNullableType::TIMEDELTA_MS:
330+
micro = src_ptr[source_idx] * 1000; // ms -> us
331+
break;
332+
case NumpyNullableType::TIMEDELTA_S:
333+
micro = src_ptr[source_idx] * 1000000; // s -> us
334+
break;
335+
default:
336+
throw InternalException("Unexpected timedelta type");
337+
}
338+
318339
int64_t days = micro / Interval::MICROS_PER_DAY;
319340
micro = micro % Interval::MICROS_PER_DAY;
320341
int64_t months = days / Interval::DAYS_PER_MONTH;

src/duckdb_py/numpy/raw_array_wrapper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ string RawArrayWrapper::DuckDBToNumpyDtype(const LogicalType &type) {
108108
case LogicalTypeId::DATE:
109109
return "datetime64[us]";
110110
case LogicalTypeId::INTERVAL:
111-
return "timedelta64[ns]";
111+
return "timedelta64[us]";
112112
case LogicalTypeId::TIME:
113113
case LogicalTypeId::TIME_TZ:
114114
case LogicalTypeId::VARCHAR:

src/duckdb_py/numpy/type.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,16 @@ static NumpyNullableType ConvertNumpyTypeInternal(const string &col_type_str) {
6565
return NumpyNullableType::OBJECT;
6666
}
6767
if (col_type_str == "timedelta64[ns]") {
68-
return NumpyNullableType::TIMEDELTA;
68+
return NumpyNullableType::TIMEDELTA_NS;
69+
}
70+
if (col_type_str == "timedelta64[us]") {
71+
return NumpyNullableType::TIMEDELTA_US;
72+
}
73+
if (col_type_str == "timedelta64[ms]") {
74+
return NumpyNullableType::TIMEDELTA_MS;
75+
}
76+
if (col_type_str == "timedelta64[s]") {
77+
return NumpyNullableType::TIMEDELTA_S;
6978
}
7079
// We use 'StartsWith' because it might have ', tz' at the end, indicating timezone
7180
if (StringUtil::StartsWith(col_type_str, "datetime64[ns")) {
@@ -143,7 +152,10 @@ LogicalType NumpyToLogicalType(const NumpyType &col_type) {
143152
return LogicalType::VARCHAR;
144153
case NumpyNullableType::OBJECT:
145154
return LogicalType::VARCHAR;
146-
case NumpyNullableType::TIMEDELTA:
155+
case NumpyNullableType::TIMEDELTA_NS:
156+
case NumpyNullableType::TIMEDELTA_US:
157+
case NumpyNullableType::TIMEDELTA_MS:
158+
case NumpyNullableType::TIMEDELTA_S:
147159
return LogicalType::INTERVAL;
148160
case NumpyNullableType::DATETIME_MS: {
149161
if (col_type.has_timezone) {

tests/fast/pandas/test_df_object_resolution.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
import numpy as np
1010
import pandas as pd
1111
import pytest
12+
from conftest import is_string_dtype
1213

1314
import duckdb
14-
from tests.conftest import is_string_dtype
1515

1616
standard_vector_size = duckdb.__standard_vector_size__
1717

tests/fast/pandas/test_stride.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ def test_stride_timedelta(self, duckdb_cursor):
5757
]
5858
}
5959
)
60-
pd.testing.assert_frame_equal(roundtrip, expected)
60+
# DuckDB INTERVAL type stores in microseconds, so output is always timedelta64[us]
61+
# Check values match without strict dtype comparison
62+
pd.testing.assert_frame_equal(roundtrip, expected, check_dtype=False)
6163

6264
def test_stride_fp64(self, duckdb_cursor):
6365
expected_df = pd.DataFrame(np.arange(20, dtype="float64").reshape(5, 4), columns=["a", "b", "c", "d"])

tests/fast/pandas/test_timestamp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ def test_timestamp_timedelta(self):
6565
}
6666
)
6767
df_from_duck = duckdb.from_df(df).df()
68-
assert df_from_duck.equals(df)
68+
# DuckDB INTERVAL type stores in microseconds, so output is always timedelta64[us]
69+
# Check values match without strict dtype comparison
70+
pd.testing.assert_frame_equal(df_from_duck, df, check_dtype=False)
6971

7072
@pytest.mark.xfail(
7173
condition=platform.system() == "Emscripten" and os.environ.get("TZ") != "UTC",

0 commit comments

Comments
 (0)