@@ -260,16 +260,6 @@ Value TransformTupleToStruct(py::handle ele, const LogicalType &target_type = Lo
260260 return result;
261261}
262262
263- bool TryTransformPythonIntegerToDouble (Value &res, py::handle ele) {
264- double number = PyLong_AsDouble (ele.ptr ());
265- if (number == -1.0 && PyErr_Occurred ()) {
266- PyErr_Clear ();
267- return false ;
268- }
269- res = Value::DOUBLE (number);
270- return true ;
271- }
272-
273263// Tries to convert a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value
274264// by decomposing it into upper and lower 64-bit components. Tries HUGEINT first; falls back to
275265// UHUGEINT for large positive values. Returns false if the value doesn't fit in 128 bits.
@@ -326,135 +316,52 @@ static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &tar
326316 return result;
327317}
328318
329- void TransformPythonUnsigned (uint64_t value, Value &res) {
330- if (value > (uint64_t )std::numeric_limits<uint32_t >::max ()) {
331- res = Value::UBIGINT (value);
332- } else if (value > (int64_t )std::numeric_limits<uint16_t >::max ()) {
333- res = Value::UINTEGER (value);
334- } else if (value > (int64_t )std::numeric_limits<uint16_t >::max ()) {
335- res = Value::USMALLINT (value);
336- } else {
337- res = Value::UTINYINT (value);
338- }
339- }
340-
341- bool TrySniffPythonNumeric (Value &res, int64_t value) {
319+ // Picks the tightest DuckDB integer type (>=INT32) for an int64 value when no target type is specified.
320+ static Value SniffIntegerValue (int64_t value) {
342321 if (value < (int64_t )std::numeric_limits<int32_t >::min () || value > (int64_t )std::numeric_limits<int32_t >::max ()) {
343- res = Value::BIGINT (value);
344- } else {
345- // To match default duckdb behavior, numeric values without a specified type should not become a smaller type
346- // than INT32
347- res = Value::INTEGER (value);
322+ return Value::BIGINT (value);
348323 }
349- return true ;
324+ return Value::INTEGER (value) ;
350325}
351326
352- bool TryTransformPythonNumeric (Value &res, py::handle ele, const LogicalType &target_type) {
327+ // Sniffs the tightest DuckDB integer type for a Python integer.
328+ // Progressively widens: int64 → uint64 → hugeint → uhugeint.
329+ // Returns SQLNULL if the value doesn't fit in any DuckDB integer type (> 128-bit).
330+ LogicalType SniffPythonIntegerType (py::handle ele) {
353331 auto ptr = ele.ptr ();
354332
333+ // Step 1: Try int64
355334 int overflow;
356- int64_t value = PyLong_AsLongLongAndOverflow (ptr, &overflow);
357- if (overflow == -1 ) {
358- PyErr_Clear ();
359- if (target_type.id () == LogicalTypeId::BIGINT) {
360- throw InvalidInputException (StringUtil::Format (" Failed to cast value: Python value '%s' to INT64" ,
361- std::string (pybind11::str (ele))));
362- }
363- return TryTransformPythonLongToHugeInt (ele, target_type, res);
364- } else if (overflow == 1 ) {
365- if (target_type.InternalType () == PhysicalType::INT64) {
366- throw InvalidInputException (StringUtil::Format (" Failed to cast value: Python value '%s' to INT64" ,
367- std::string (pybind11::str (ele))));
368- }
369- uint64_t unsigned_value = PyLong_AsUnsignedLongLong (ptr);
370- if (PyErr_Occurred ()) {
371- PyErr_Clear ();
372- return TryTransformPythonLongToHugeInt (ele, target_type, res);
373- }
374- TransformPythonUnsigned (unsigned_value, res);
375- PyErr_Clear ();
376- return true ;
377- }
378- if (value == -1 && PyErr_Occurred ()) {
379- return false ;
380- }
335+ const int64_t value = PyLong_AsLongLongAndOverflow (ptr, &overflow);
381336
382- // The value is int64_t or smaller
383- switch (target_type.id ()) {
384- case LogicalTypeId::UNKNOWN:
385- return TrySniffPythonNumeric (res, value);
386- case LogicalTypeId::HUGEINT: {
387- res = Value::HUGEINT (value);
388- return true ;
389- }
390- case LogicalTypeId::UHUGEINT: {
391- if (value < 0 ) {
392- return false ;
393- }
394- res = Value::UHUGEINT (value);
395- return true ;
396- }
397- case LogicalTypeId::BIGINT: {
398- res = Value::BIGINT (value);
399- return true ;
400- }
401- case LogicalTypeId::INTEGER: {
402- if (value < NumericLimits<int32_t >::Minimum () || value > NumericLimits<int32_t >::Maximum ()) {
403- return false ;
404- }
405- res = Value::INTEGER (value);
406- return true ;
407- }
408- case LogicalTypeId::SMALLINT: {
409- if (value < NumericLimits<int16_t >::Minimum () || value > NumericLimits<int16_t >::Maximum ()) {
410- return false ;
411- }
412- res = Value::SMALLINT (value);
413- return true ;
414- }
415- case LogicalTypeId::TINYINT: {
416- if (value < NumericLimits<int8_t >::Minimum () || value > NumericLimits<int8_t >::Maximum ()) {
417- return false ;
418- }
419- res = Value::TINYINT (value);
420- return true ;
421- }
422- case LogicalTypeId::UBIGINT: {
423- if (value < 0 ) {
424- return false ;
425- }
426- res = Value::UBIGINT (value);
427- return true ;
428- }
429- case LogicalTypeId::UINTEGER: {
430- if (value < 0 || value > (int64_t )NumericLimits<uint32_t >::Maximum ()) {
431- return false ;
337+ if (overflow == 0 ) {
338+ if (value == -1 && PyErr_Occurred ()) {
339+ PyErr_Clear ();
340+ return LogicalType::SQLNULL;
432341 }
433- res = Value::UINTEGER (value);
434- return true ;
435- }
436- case LogicalTypeId::USMALLINT: {
437- if (value < 0 || value > (int64_t )NumericLimits<uint16_t >::Maximum ()) {
438- return false ;
342+ if (value < static_cast <int64_t >(std::numeric_limits<int32_t >::min ()) ||
343+ value > static_cast <int64_t >(std::numeric_limits<int32_t >::max ())) {
344+ return LogicalType::BIGINT;
439345 }
440- res = Value::USMALLINT (value);
441- return true ;
346+ return LogicalType::INTEGER;
442347 }
443- case LogicalTypeId::UTINYINT: {
444- if (value < 0 || value > (int64_t )NumericLimits<uint8_t >::Maximum ()) {
445- return false ;
446- }
447- res = Value::UTINYINT (value);
448- return true ;
449- }
450- default : {
451- if (!TrySniffPythonNumeric (res, value)) {
452- return false ;
348+ PyErr_Clear ();
349+
350+ // Step 2: For positive overflow, try uint64
351+ if (overflow == 1 ) {
352+ (void )PyLong_AsUnsignedLongLong (ptr);
353+ if (!PyErr_Occurred ()) {
354+ return LogicalType::UBIGINT;
453355 }
454- res = CastToTarget (std::move (res), target_type);
455- return true ;
356+ PyErr_Clear ();
456357 }
358+
359+ // Step 3: Try 128-bit (hugeint/uhugeint)
360+ Value res;
361+ if (!TryTransformPythonLongToHugeInt (ele, LogicalType::UNKNOWN, res)) {
362+ return LogicalType::SQLNULL;
457363 }
364+ return res.type ();
458365}
459366
460367Value TransformDictionary (const PyDictionary &dict) {
@@ -561,10 +468,6 @@ struct PythonValueConversion {
561468 break ;
562469 }
563470 }
564- static void HandleLongAsDouble (Value &result, const LogicalType &target_type, double val) {
565- auto cast_as = target_type.id () == LogicalTypeId::UNKNOWN ? LogicalType::DOUBLE : target_type;
566- result = CastToTarget (Value::DOUBLE (val), cast_as);
567- }
568471 static void HandleLongOverflow (Value &result, const LogicalType &target_type, py::handle ele) {
569472 result = TransformPythonLongToHugeInt (ele, target_type);
570473 }
@@ -573,21 +476,10 @@ struct PythonValueConversion {
573476 result = CastToTarget (Value::UBIGINT (val), cast_as);
574477 }
575478 static void HandleBigint (Value &res, const LogicalType &target_type, int64_t value) {
576- switch (target_type.id ()) {
577- case LogicalTypeId::UNKNOWN: {
578- if (value < (int64_t )std::numeric_limits<int32_t >::min () ||
579- value > (int64_t )std::numeric_limits<int32_t >::max ()) {
580- res = Value::BIGINT (value);
581- } else {
582- // To match default duckdb behavior, numeric values without a specified type should not become a smaller
583- // type than INT32
584- res = Value::INTEGER (value);
585- }
586- break ;
587- }
588- default :
589- res = CastToTarget (Value::BIGINT (value), target_type);
590- break ;
479+ if (target_type.id () == LogicalTypeId::UNKNOWN) {
480+ res = SniffIntegerValue (value);
481+ } else {
482+ res = CastToTarget (SniffIntegerValue (value), target_type);
591483 }
592484 }
593485
@@ -737,9 +629,6 @@ struct PythonVectorConversion {
737629 break ;
738630 }
739631 }
740- static void HandleLongAsDouble (Vector &result, const idx_t &result_offset, double val) {
741- FallbackValueConversion (result, result_offset, Value::DOUBLE (val));
742- }
743632 static void HandleLongOverflow (Vector &result, const idx_t &result_offset, py::handle ele) {
744633 Value result_val = TransformPythonLongToHugeInt (ele, result.GetType ());
745634 FallbackValueConversion (result, result_offset, std::move (result_val));
0 commit comments