Skip to content

Commit f41329d

Browse files
committed
Fix numeric conversion logic
1 parent da5d721 commit f41329d

2 files changed

Lines changed: 164 additions & 20 deletions

File tree

src/duckdb_py/native/python_conversion.cpp

Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ Value TransformDictionaryToStruct(const PyDictionary &dict, const LogicalType &t
9292
child_list_t<Value> struct_values;
9393
for (idx_t i = 0; i < dict.len; i++) {
9494
auto &key = struct_target ? StructType::GetChildName(target_type, i) : struct_keys[i];
95-
auto value_index = key_mapping[key];
95+
auto value_index = struct_target ? key_mapping[key] : i;
9696
auto &child_type = struct_target ? StructType::GetChildType(target_type, i) : LogicalType::UNKNOWN;
9797
auto val = TransformPythonValue(dict.values.attr("__getitem__")(value_index), child_type);
9898
struct_values.emplace_back(make_pair(std::move(key), std::move(val)));
@@ -240,6 +240,49 @@ bool TryTransformPythonIntegerToDouble(Value &res, py::handle ele) {
240240
return true;
241241
}
242242

243+
// Converts a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value by decomposing it into upper
244+
// and lower 64-bit components. Tries HUGEINT first; falls back to UHUGEINT for large positive values.
245+
static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &target_type) {
246+
auto ptr = ele.ptr();
247+
248+
// Extract lower 64 bits (two's complement, works for negative values too)
249+
uint64_t lower = PyLong_AsUnsignedLongLongMask(ptr);
250+
if (lower == static_cast<uint64_t>(-1) && PyErr_Occurred()) {
251+
PyErr_Clear();
252+
throw InvalidInputException("Failed to convert Python integer to 128-bit integer: %s",
253+
std::string(py::str(ele)));
254+
}
255+
256+
// Extract upper bits by right-shifting by 64
257+
py::int_ shift_amount(64);
258+
py::object upper_obj = py::reinterpret_steal<py::object>(PyNumber_Rshift(ptr, shift_amount.ptr()));
259+
260+
// Try signed 128-bit (hugeint) first
261+
int overflow;
262+
int64_t upper_signed = PyLong_AsLongLongAndOverflow(upper_obj.ptr(), &overflow);
263+
if (overflow == 0 && !(upper_signed == -1 && PyErr_Occurred())) {
264+
auto val = Value::HUGEINT(hugeint_t {upper_signed, lower});
265+
if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::HUGEINT) {
266+
return val;
267+
}
268+
return val.DefaultCastAs(target_type);
269+
}
270+
PyErr_Clear();
271+
272+
// Try unsigned 128-bit (uhugeint)
273+
uint64_t upper_unsigned = PyLong_AsUnsignedLongLong(upper_obj.ptr());
274+
if (PyErr_Occurred()) {
275+
PyErr_Clear();
276+
throw InvalidInputException("Python integer too large for 128-bit integer type: %s", std::string(py::str(ele)));
277+
}
278+
279+
auto val = Value::UHUGEINT(uhugeint_t {upper_unsigned, lower});
280+
if (target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::UHUGEINT) {
281+
return val;
282+
}
283+
return val.DefaultCastAs(target_type);
284+
}
285+
243286
void TransformPythonUnsigned(uint64_t value, Value &res) {
244287
if (value > (uint64_t)std::numeric_limits<uint32_t>::max()) {
245288
res = Value::UBIGINT(value);
@@ -263,7 +306,6 @@ bool TrySniffPythonNumeric(Value &res, int64_t value) {
263306
return true;
264307
}
265308

266-
// TODO: add support for HUGEINT
267309
bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type) {
268310
auto ptr = ele.ptr();
269311

@@ -275,9 +317,7 @@ bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &ta
275317
throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64",
276318
std::string(pybind11::str(ele))));
277319
}
278-
auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::HUGEINT : target_type;
279-
auto numeric_string = std::string(py::str(ele));
280-
res = Value(numeric_string).DefaultCastAs(cast_as);
320+
res = TransformPythonLongToHugeInt(ele, target_type);
281321
return true;
282322
} else if (overflow == 1) {
283323
if (target_type.InternalType() == PhysicalType::INT64) {
@@ -287,18 +327,18 @@ bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &ta
287327
uint64_t unsigned_value = PyLong_AsUnsignedLongLong(ptr);
288328
if (PyErr_Occurred()) {
289329
PyErr_Clear();
290-
return TryTransformPythonIntegerToDouble(res, ele);
291-
} else {
292-
TransformPythonUnsigned(unsigned_value, res);
330+
res = TransformPythonLongToHugeInt(ele, target_type);
331+
return true;
293332
}
333+
TransformPythonUnsigned(unsigned_value, res);
294334
PyErr_Clear();
295335
return true;
296-
} else if (value == -1 && PyErr_Occurred()) {
336+
}
337+
if (value == -1 && PyErr_Occurred()) {
297338
return false;
298339
}
299340

300341
// The value is int64_t or smaller
301-
302342
switch (target_type.id()) {
303343
case LogicalTypeId::UNKNOWN:
304344
return TrySniffPythonNumeric(res, value);
@@ -476,13 +516,17 @@ struct PythonValueConversion {
476516
target_type.ToString());
477517
}
478518
default:
479-
throw ConversionException("Could not convert 'float' to type %s", target_type.ToString());
519+
result = Value::DOUBLE(val).DefaultCastAs(target_type);
520+
break;
480521
}
481522
}
482523
static void HandleLongAsDouble(Value &result, const LogicalType &target_type, double val) {
483524
auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::DOUBLE : target_type;
484525
result = Value::DOUBLE(val).DefaultCastAs(cast_as);
485526
}
527+
static void HandleLongOverflow(Value &result, const LogicalType &target_type, py::handle ele) {
528+
result = TransformPythonLongToHugeInt(ele, target_type);
529+
}
486530
static void HandleUnsignedBigint(Value &result, const LogicalType &target_type, uint64_t val) {
487531
auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::UBIGINT : target_type;
488532
result = Value::UBIGINT(val).DefaultCastAs(cast_as);
@@ -648,14 +692,17 @@ struct PythonVectorConversion {
648692
break;
649693
}
650694
default:
651-
throw TypeMismatchException(
652-
LogicalType::DOUBLE, result.GetType(),
653-
"Python Conversion Failure: Expected a value of type %s, but got a value of type double");
695+
FallbackValueConversion(result, result_offset, Value::DOUBLE(val).DefaultCastAs(result.GetType()));
696+
break;
654697
}
655698
}
656699
static void HandleLongAsDouble(Vector &result, const idx_t &result_offset, double val) {
657700
FallbackValueConversion(result, result_offset, Value::DOUBLE(val));
658701
}
702+
static void HandleLongOverflow(Vector &result, const idx_t &result_offset, py::handle ele) {
703+
Value result_val = TransformPythonLongToHugeInt(ele, result.GetType());
704+
FallbackValueConversion(result, result_offset, std::move(result_val));
705+
}
659706
static void HandleUnsignedBigint(Vector &result, const idx_t &result_offset, uint64_t value) {
660707
// this code path is only called for values in the range of [INT64_MAX...UINT64_MAX]
661708
switch (result.GetType().id()) {
@@ -966,12 +1013,7 @@ void TransformPythonObjectInternal(py::handle ele, A &result, const B &param, bo
9661013
conversion_target);
9671014
}
9681015
}
969-
double number = PyLong_AsDouble(ele.ptr());
970-
if (number == -1.0 && PyErr_Occurred()) {
971-
PyErr_Clear();
972-
throw InvalidInputException("An error occurred attempting to convert a python integer");
973-
}
974-
OP::HandleLongAsDouble(result, param, number);
1016+
OP::HandleLongOverflow(result, param, ele);
9751017
} else if (value == -1 && PyErr_Occurred()) {
9761018
throw InvalidInputException("An error occurred attempting to convert a python integer");
9771019
} else {

tests/fast/test_type_conversion.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""Regression tests for Python-to-DuckDB type conversion bugs.
2+
3+
Issue #115: Float conversion error with UNION containing float
4+
Issue #171: Dictionary key case sensitivity not respected for parameter bindings
5+
Issue #330: Integers >64-bit lose precision via double conversion
6+
"""
7+
8+
import pytest
9+
10+
import duckdb
11+
from duckdb.sqltypes import BIGINT, DOUBLE, HUGEINT, UHUGEINT, VARCHAR, DuckDBPyType
12+
13+
14+
class TestIssue115FloatToUnion:
15+
"""HandleDouble should use DefaultCastAs for unknown target types like UNION."""
16+
17+
def test_udf_float_to_union_type(self):
18+
conn = duckdb.connect()
19+
conn.create_function(
20+
"return_float",
21+
lambda: 1.5,
22+
return_type=duckdb.union_type({"u1": VARCHAR, "u2": BIGINT, "u3": DOUBLE}),
23+
)
24+
result = conn.sql("SELECT return_float()").fetchone()[0]
25+
assert result == 1.5
26+
27+
def test_udf_dict_with_float_in_union_struct(self):
28+
"""Original repro from issue #115."""
29+
conn = duckdb.connect()
30+
31+
arr = [{"a": 1, "b": 1.2}, {"a": 3, "b": 2.4}]
32+
33+
def test():
34+
return arr
35+
36+
return_type = DuckDBPyType(list[dict[str, int | float]])
37+
conn.create_function("test", test, return_type=return_type)
38+
result = conn.sql("SELECT test()").fetchone()[0]
39+
assert len(result) == 2
40+
assert result[0]["b"] == pytest.approx(1.2)
41+
assert result[1]["b"] == pytest.approx(2.4)
42+
43+
44+
class TestIssue171DictKeyCaseSensitivity:
45+
"""Dict keys differing only by case must preserve their individual values."""
46+
47+
def test_case_sensitive_dict_keys(self):
48+
result = duckdb.execute("SELECT ?", [{"Key": "first", "key": "second"}]).fetchone()[0]
49+
assert result["Key"] == "first"
50+
assert result["key"] == "second"
51+
52+
def test_case_sensitive_dict_keys_three_variants(self):
53+
result = duckdb.execute("SELECT ?", [{"abc": 1, "ABC": 2, "Abc": 3}]).fetchone()[0]
54+
assert result["abc"] == 1
55+
assert result["ABC"] == 2
56+
assert result["Abc"] == 3
57+
58+
59+
class TestIssue330LargeIntegerPrecision:
60+
"""Integers >64-bit must not lose precision via double conversion."""
61+
62+
# --- Parameter binding path (TryTransformPythonNumeric) ---
63+
64+
def test_param_hugeint_large(self):
65+
"""Value with >52 significant bits must not lose precision."""
66+
value = (2**128 - 1) // 15 * 7 # 0x77777777777777777777777777777777
67+
result = duckdb.execute("SELECT ?::HUGEINT", [value]).fetchone()[0]
68+
assert result == value
69+
70+
def test_param_uhugeint_max(self):
71+
"""2**128-1 must not overflow when cast to UHUGEINT."""
72+
value = 2**128 - 1
73+
result = duckdb.execute("SELECT ?::UHUGEINT", [value]).fetchone()[0]
74+
assert result == value
75+
76+
def test_param_auto_sniff(self):
77+
"""2**64 without explicit cast should sniff as HUGEINT, not lose precision."""
78+
value = 2**64
79+
result = duckdb.execute("SELECT ?", [value]).fetchone()[0]
80+
assert result == value
81+
82+
def test_param_negative_hugeint_no_regression(self):
83+
"""Negative overflow path (already correct) must not regress."""
84+
value = -(2**64)
85+
result = duckdb.execute("SELECT ?::HUGEINT", [value]).fetchone()[0]
86+
assert result == value
87+
88+
# --- UDF return path (TransformPythonObjectInternal template) ---
89+
90+
def test_udf_return_large_hugeint(self):
91+
value = (2**128 - 1) // 15 * 7
92+
conn = duckdb.connect()
93+
conn.create_function("big_hugeint", lambda: value, return_type=HUGEINT)
94+
result = conn.sql("SELECT big_hugeint()").fetchone()[0]
95+
assert result == value
96+
97+
def test_udf_return_large_uhugeint(self):
98+
value = 2**128 - 1
99+
conn = duckdb.connect()
100+
conn.create_function("big_uhugeint", lambda: value, return_type=UHUGEINT)
101+
result = conn.sql("SELECT big_uhugeint()").fetchone()[0]
102+
assert result == value

0 commit comments

Comments
 (0)