1313
1414namespace duckdb {
1515
16+ // Like DefaultCastAs, but handles UNION targets by finding the first compatible member. DefaultCastAs raises a
17+ // Conversion Error when multiple UNION members have the same type (e.g. UNION(u1 DOUBLE, u2 DOUBLE)), so for UNION
18+ // targets we resolve the member ourselves.
19+ static Value CastToTarget (Value val, const LogicalType &target_type) {
20+ if (target_type.id () != LogicalTypeId::UNION) {
21+ return val.DefaultCastAs (target_type);
22+ }
23+
24+ auto member_count = UnionType::GetMemberCount (target_type);
25+ auto &source_type = val.type ();
26+
27+ // First pass: if there's an exact type match we use that
28+ for (idx_t i = 0 ; i < member_count; i++) {
29+ if (UnionType::GetMemberType (target_type, i) == source_type) {
30+ return Value::UNION (UnionType::CopyMemberTypes (target_type), NumericCast<uint8_t >(i), std::move (val));
31+ }
32+ }
33+
34+ // Second pass: if there's a type we can implicitly cast to, we do that
35+ for (idx_t i = 0 ; i < member_count; i++) {
36+ auto member_type = UnionType::GetMemberType (target_type, i);
37+ Value candidate = val;
38+ if (candidate.DefaultTryCastAs (member_type)) {
39+ return Value::UNION (UnionType::CopyMemberTypes (target_type), NumericCast<uint8_t >(i), std::move (candidate));
40+ }
41+ }
42+ throw ConversionException (" Could not convert value of type %s to %s" , source_type.ToString (),
43+ target_type.ToString ());
44+ }
45+
1646static Value EmptyMapValue () {
1747 auto map_type = LogicalType::MAP (LogicalType::SQLNULL, LogicalType::SQLNULL);
1848 return Value::MAP (ListType::GetChildType (map_type), vector<Value>());
@@ -92,7 +122,7 @@ Value TransformDictionaryToStruct(const PyDictionary &dict, const LogicalType &t
92122 child_list_t <Value> struct_values;
93123 for (idx_t i = 0 ; i < dict.len ; i++) {
94124 auto &key = struct_target ? StructType::GetChildName (target_type, i) : struct_keys[i];
95- auto value_index = key_mapping[key];
125+ auto value_index = struct_target ? key_mapping[key] : i ;
96126 auto &child_type = struct_target ? StructType::GetChildType (target_type, i) : LogicalType::UNKNOWN;
97127 auto val = TransformPythonValue (dict.values .attr (" __getitem__" )(value_index), child_type);
98128 struct_values.emplace_back (make_pair (std::move (key), std::move (val)));
@@ -230,150 +260,108 @@ Value TransformTupleToStruct(py::handle ele, const LogicalType &target_type = Lo
230260 return result;
231261}
232262
233- bool TryTransformPythonIntegerToDouble (Value &res, py::handle ele) {
234- double number = PyLong_AsDouble (ele.ptr ());
235- if (number == -1.0 && PyErr_Occurred ()) {
263+ // Tries to convert a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value
264+ // by decomposing it into upper and lower 64-bit components. Tries HUGEINT first; falls back to
265+ // UHUGEINT for large positive values. Returns false if the value doesn't fit in 128 bits.
266+ static bool TryTransformPythonLongToHugeInt (py::handle ele, const LogicalType &target_type, Value &result) {
267+ auto ptr = ele.ptr ();
268+
269+ // Extract lower 64 bits (two's complement, works for negative values too)
270+ uint64_t lower = PyLong_AsUnsignedLongLongMask (ptr);
271+ if (lower == static_cast <uint64_t >(-1 ) && PyErr_Occurred ()) {
236272 PyErr_Clear ();
237273 return false ;
238274 }
239- res = Value::DOUBLE (number);
275+
276+ // Extract upper bits by right-shifting by 64
277+ py::int_ shift_amount (64 );
278+ py::object upper_obj = py::reinterpret_steal<py::object>(PyNumber_Rshift (ptr, shift_amount.ptr ()));
279+
280+ // Try signed 128-bit (hugeint) first
281+ int overflow;
282+ int64_t upper_signed = PyLong_AsLongLongAndOverflow (upper_obj.ptr (), &overflow);
283+ if (overflow == 0 && !(upper_signed == -1 && PyErr_Occurred ())) {
284+ auto val = Value::HUGEINT (hugeint_t {upper_signed, lower});
285+ if (target_type.id () == LogicalTypeId::UNKNOWN || target_type.id () == LogicalTypeId::HUGEINT) {
286+ result = val;
287+ } else {
288+ result = CastToTarget (std::move (val), target_type);
289+ }
290+ return true ;
291+ }
292+ PyErr_Clear ();
293+
294+ // Try unsigned 128-bit (uhugeint)
295+ uint64_t upper_unsigned = PyLong_AsUnsignedLongLong (upper_obj.ptr ());
296+ if (PyErr_Occurred ()) {
297+ PyErr_Clear ();
298+ return false ;
299+ }
300+
301+ auto val = Value::UHUGEINT (uhugeint_t {upper_unsigned, lower});
302+ if (target_type.id () == LogicalTypeId::UNKNOWN || target_type.id () == LogicalTypeId::UHUGEINT) {
303+ result = val;
304+ } else {
305+ result = CastToTarget (std::move (val), target_type);
306+ }
240307 return true ;
241308}
242309
243- void TransformPythonUnsigned (uint64_t value, Value &res) {
244- if (value > (uint64_t )std::numeric_limits<uint32_t >::max ()) {
245- res = Value::UBIGINT (value);
246- } else if (value > (int64_t )std::numeric_limits<uint16_t >::max ()) {
247- res = Value::UINTEGER (value);
248- } else if (value > (int64_t )std::numeric_limits<uint16_t >::max ()) {
249- res = Value::USMALLINT (value);
250- } else {
251- res = Value::UTINYINT (value);
310+ // Throwing wrapper for contexts that require a result (e.g. prepared statement parameters).
311+ static Value TransformPythonLongToHugeInt (py::handle ele, const LogicalType &target_type) {
312+ Value result;
313+ if (!TryTransformPythonLongToHugeInt (ele, target_type, result)) {
314+ throw InvalidInputException (" Python integer too large for 128-bit integer type: %s" , std::string (py::str (ele)));
252315 }
316+ return result;
253317}
254318
255- bool TrySniffPythonNumeric (Value &res, int64_t value) {
319+ // Picks the tightest DuckDB integer type (>=INT32) for an int64 value when no target type is specified.
320+ static Value SniffIntegerValue (int64_t value) {
256321 if (value < (int64_t )std::numeric_limits<int32_t >::min () || value > (int64_t )std::numeric_limits<int32_t >::max ()) {
257- res = Value::BIGINT (value);
258- } else {
259- // To match default duckdb behavior, numeric values without a specified type should not become a smaller type
260- // than INT32
261- res = Value::INTEGER (value);
322+ return Value::BIGINT (value);
262323 }
263- return true ;
324+ return Value::INTEGER (value) ;
264325}
265326
266- // TODO: add support for HUGEINT
267- bool TryTransformPythonNumeric (Value &res, py::handle ele, const LogicalType &target_type) {
327+ // Sniffs the tightest DuckDB integer type for a Python integer.
328+ // Progressively widens: int64 → uint64 → hugeint → uhugeint.
329+ // Returns SQLNULL if the value doesn't fit in any DuckDB integer type (> 128-bit).
330+ LogicalType SniffPythonIntegerType (py::handle ele) {
268331 auto ptr = ele.ptr ();
269332
333+ // Step 1: Try int64
270334 int overflow;
271- int64_t value = PyLong_AsLongLongAndOverflow (ptr, &overflow);
272- if (overflow == -1 ) {
273- PyErr_Clear ();
274- if (target_type.id () == LogicalTypeId::BIGINT) {
275- throw InvalidInputException (StringUtil::Format (" Failed to cast value: Python value '%s' to INT64" ,
276- std::string (pybind11::str (ele))));
277- }
278- auto cast_as = target_type.id () == LogicalTypeId::UNKNOWN ? LogicalType::HUGEINT : target_type;
279- auto numeric_string = std::string (py::str (ele));
280- res = Value (numeric_string).DefaultCastAs (cast_as);
281- return true ;
282- } else if (overflow == 1 ) {
283- if (target_type.InternalType () == PhysicalType::INT64) {
284- throw InvalidInputException (StringUtil::Format (" Failed to cast value: Python value '%s' to INT64" ,
285- std::string (pybind11::str (ele))));
286- }
287- uint64_t unsigned_value = PyLong_AsUnsignedLongLong (ptr);
288- if (PyErr_Occurred ()) {
289- PyErr_Clear ();
290- return TryTransformPythonIntegerToDouble (res, ele);
291- } else {
292- TransformPythonUnsigned (unsigned_value, res);
293- }
294- PyErr_Clear ();
295- return true ;
296- } else if (value == -1 && PyErr_Occurred ()) {
297- return false ;
298- }
299-
300- // The value is int64_t or smaller
335+ const int64_t value = PyLong_AsLongLongAndOverflow (ptr, &overflow);
301336
302- switch (target_type.id ()) {
303- case LogicalTypeId::UNKNOWN:
304- return TrySniffPythonNumeric (res, value);
305- case LogicalTypeId::HUGEINT: {
306- res = Value::HUGEINT (value);
307- return true ;
308- }
309- case LogicalTypeId::UHUGEINT: {
310- if (value < 0 ) {
311- return false ;
312- }
313- res = Value::UHUGEINT (value);
314- return true ;
315- }
316- case LogicalTypeId::BIGINT: {
317- res = Value::BIGINT (value);
318- return true ;
319- }
320- case LogicalTypeId::INTEGER: {
321- if (value < NumericLimits<int32_t >::Minimum () || value > NumericLimits<int32_t >::Maximum ()) {
322- return false ;
323- }
324- res = Value::INTEGER (value);
325- return true ;
326- }
327- case LogicalTypeId::SMALLINT: {
328- if (value < NumericLimits<int16_t >::Minimum () || value > NumericLimits<int16_t >::Maximum ()) {
329- return false ;
330- }
331- res = Value::SMALLINT (value);
332- return true ;
333- }
334- case LogicalTypeId::TINYINT: {
335- if (value < NumericLimits<int8_t >::Minimum () || value > NumericLimits<int8_t >::Maximum ()) {
336- return false ;
337- }
338- res = Value::TINYINT (value);
339- return true ;
340- }
341- case LogicalTypeId::UBIGINT: {
342- if (value < 0 ) {
343- return false ;
344- }
345- res = Value::UBIGINT (value);
346- return true ;
347- }
348- case LogicalTypeId::UINTEGER: {
349- if (value < 0 || value > (int64_t )NumericLimits<uint32_t >::Maximum ()) {
350- return false ;
351- }
352- res = Value::UINTEGER (value);
353- return true ;
354- }
355- case LogicalTypeId::USMALLINT: {
356- if (value < 0 || value > (int64_t )NumericLimits<uint16_t >::Maximum ()) {
357- return false ;
337+ if (overflow == 0 ) {
338+ if (value == -1 && PyErr_Occurred ()) {
339+ PyErr_Clear ();
340+ return LogicalType::SQLNULL;
358341 }
359- res = Value::USMALLINT (value);
360- return true ;
361- }
362- case LogicalTypeId::UTINYINT: {
363- if (value < 0 || value > (int64_t )NumericLimits<uint8_t >::Maximum ()) {
364- return false ;
342+ if (value < static_cast <int64_t >(std::numeric_limits<int32_t >::min ()) ||
343+ value > static_cast <int64_t >(std::numeric_limits<int32_t >::max ())) {
344+ return LogicalType::BIGINT;
365345 }
366- res = Value::UTINYINT (value);
367- return true ;
346+ return LogicalType::INTEGER;
368347 }
369- default : {
370- if (!TrySniffPythonNumeric (res, value)) {
371- return false ;
348+ PyErr_Clear ();
349+
350+ // Step 2: For positive overflow, try uint64
351+ if (overflow == 1 ) {
352+ (void )PyLong_AsUnsignedLongLong (ptr);
353+ if (!PyErr_Occurred ()) {
354+ return LogicalType::UBIGINT;
372355 }
373- res = res.DefaultCastAs (target_type, true );
374- return true ;
356+ PyErr_Clear ();
375357 }
358+
359+ // Step 3: Try 128-bit (hugeint/uhugeint)
360+ Value res;
361+ if (!TryTransformPythonLongToHugeInt (ele, LogicalType::UNKNOWN, res)) {
362+ return LogicalType::SQLNULL;
376363 }
364+ return res.type ();
377365}
378366
379367Value TransformDictionary (const PyDictionary &dict) {
@@ -476,33 +464,22 @@ struct PythonValueConversion {
476464 target_type.ToString ());
477465 }
478466 default :
479- throw ConversionException (" Could not convert 'float' to type %s" , target_type.ToString ());
467+ result = CastToTarget (Value::DOUBLE (val), target_type);
468+ break ;
480469 }
481470 }
482- static void HandleLongAsDouble (Value &result, const LogicalType &target_type, double val) {
483- auto cast_as = target_type.id () == LogicalTypeId::UNKNOWN ? LogicalType::DOUBLE : target_type;
484- result = Value::DOUBLE (val).DefaultCastAs (cast_as);
471+ static void HandleLongOverflow (Value &result, const LogicalType &target_type, py::handle ele) {
472+ result = TransformPythonLongToHugeInt (ele, target_type);
485473 }
486474 static void HandleUnsignedBigint (Value &result, const LogicalType &target_type, uint64_t val) {
487475 auto cast_as = target_type.id () == LogicalTypeId::UNKNOWN ? LogicalType::UBIGINT : target_type;
488- result = Value::UBIGINT (val). DefaultCastAs ( cast_as);
476+ result = CastToTarget ( Value::UBIGINT (val), cast_as);
489477 }
490478 static void HandleBigint (Value &res, const LogicalType &target_type, int64_t value) {
491- switch (target_type.id ()) {
492- case LogicalTypeId::UNKNOWN: {
493- if (value < (int64_t )std::numeric_limits<int32_t >::min () ||
494- value > (int64_t )std::numeric_limits<int32_t >::max ()) {
495- res = Value::BIGINT (value);
496- } else {
497- // To match default duckdb behavior, numeric values without a specified type should not become a smaller
498- // type than INT32
499- res = Value::INTEGER (value);
500- }
501- break ;
502- }
503- default :
504- res = Value::BIGINT (value).DefaultCastAs (target_type);
505- break ;
479+ if (target_type.id () == LogicalTypeId::UNKNOWN) {
480+ res = SniffIntegerValue (value);
481+ } else {
482+ res = CastToTarget (SniffIntegerValue (value), target_type);
506483 }
507484 }
508485
@@ -511,7 +488,7 @@ struct PythonValueConversion {
511488 (target_type.id () == LogicalTypeId::VARCHAR && !target_type.HasAlias ())) {
512489 result = Value (value);
513490 } else {
514- result = Value (value). DefaultCastAs ( target_type);
491+ result = CastToTarget ( Value (value), target_type);
515492 }
516493 }
517494
@@ -648,13 +625,13 @@ struct PythonVectorConversion {
648625 break ;
649626 }
650627 default :
651- throw TypeMismatchException (
652- LogicalType::DOUBLE, result.GetType (),
653- " Python Conversion Failure: Expected a value of type %s, but got a value of type double" );
628+ FallbackValueConversion (result, result_offset, CastToTarget (Value::DOUBLE (val), result.GetType ()));
629+ break ;
654630 }
655631 }
656- static void HandleLongAsDouble (Vector &result, const idx_t &result_offset, double val) {
657- FallbackValueConversion (result, result_offset, Value::DOUBLE (val));
632+ static void HandleLongOverflow (Vector &result, const idx_t &result_offset, py::handle ele) {
633+ Value result_val = TransformPythonLongToHugeInt (ele, result.GetType ());
634+ FallbackValueConversion (result, result_offset, std::move (result_val));
658635 }
659636 static void HandleUnsignedBigint (Vector &result, const idx_t &result_offset, uint64_t value) {
660637 // this code path is only called for values in the range of [INT64_MAX...UINT64_MAX]
@@ -669,7 +646,7 @@ struct PythonVectorConversion {
669646 FlatVector::GetData<uint64_t >(result)[result_offset] = value;
670647 break ;
671648 default :
672- FallbackValueConversion (result, result_offset, Value::UBIGINT (value));
649+ FallbackValueConversion (result, result_offset, CastToTarget ( Value::UBIGINT (value), result. GetType () ));
673650 break ;
674651 }
675652 }
@@ -740,7 +717,7 @@ struct PythonVectorConversion {
740717 break ;
741718 }
742719 default :
743- FallbackValueConversion (result, result_offset, Value::BIGINT (value));
720+ FallbackValueConversion (result, result_offset, CastToTarget ( Value::BIGINT (value), result. GetType () ));
744721 break ;
745722 }
746723 }
@@ -966,12 +943,7 @@ void TransformPythonObjectInternal(py::handle ele, A &result, const B ¶m, bo
966943 conversion_target);
967944 }
968945 }
969- double number = PyLong_AsDouble (ele.ptr ());
970- if (number == -1.0 && PyErr_Occurred ()) {
971- PyErr_Clear ();
972- throw InvalidInputException (" An error occurred attempting to convert a python integer" );
973- }
974- OP::HandleLongAsDouble (result, param, number);
946+ OP::HandleLongOverflow (result, param, ele);
975947 } else if (value == -1 && PyErr_Occurred ()) {
976948 throw InvalidInputException (" An error occurred attempting to convert a python integer" );
977949 } else {
0 commit comments