Skip to content

Commit 16621e8

Browse files
committed
Simplify related code paths
1 parent 6262e0b commit 16621e8

3 files changed

Lines changed: 40 additions & 152 deletions

File tree

src/duckdb_py/include/duckdb_python/python_conversion.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ enum class PythonObjectType {
4545

4646
PythonObjectType GetPythonObjectType(py::handle &ele);
4747

48-
bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN);
48+
LogicalType SniffPythonIntegerType(py::handle ele);
4949
bool DictionaryHasMapFormat(const PyDictionary &dict);
5050
void TransformPythonObject(py::handle ele, Vector &vector, idx_t result_offset, bool nan_as_null = true);
5151
Value TransformPythonValue(py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN,

src/duckdb_py/native/python_conversion.cpp

Lines changed: 36 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -260,16 +260,6 @@ Value TransformTupleToStruct(py::handle ele, const LogicalType &target_type = Lo
260260
return result;
261261
}
262262

263-
bool TryTransformPythonIntegerToDouble(Value &res, py::handle ele) {
264-
double number = PyLong_AsDouble(ele.ptr());
265-
if (number == -1.0 && PyErr_Occurred()) {
266-
PyErr_Clear();
267-
return false;
268-
}
269-
res = Value::DOUBLE(number);
270-
return true;
271-
}
272-
273263
// Tries to convert a Python integer that overflows int64/uint64 into a HUGEINT or UHUGEINT Value
274264
// by decomposing it into upper and lower 64-bit components. Tries HUGEINT first; falls back to
275265
// UHUGEINT for large positive values. Returns false if the value doesn't fit in 128 bits.
@@ -326,135 +316,52 @@ static Value TransformPythonLongToHugeInt(py::handle ele, const LogicalType &tar
326316
return result;
327317
}
328318

329-
void TransformPythonUnsigned(uint64_t value, Value &res) {
330-
if (value > (uint64_t)std::numeric_limits<uint32_t>::max()) {
331-
res = Value::UBIGINT(value);
332-
} else if (value > (int64_t)std::numeric_limits<uint16_t>::max()) {
333-
res = Value::UINTEGER(value);
334-
} else if (value > (int64_t)std::numeric_limits<uint16_t>::max()) {
335-
res = Value::USMALLINT(value);
336-
} else {
337-
res = Value::UTINYINT(value);
338-
}
339-
}
340-
341-
bool TrySniffPythonNumeric(Value &res, int64_t value) {
319+
// Picks the tightest DuckDB integer type (>=INT32) for an int64 value when no target type is specified.
320+
static Value SniffIntegerValue(int64_t value) {
342321
if (value < (int64_t)std::numeric_limits<int32_t>::min() || value > (int64_t)std::numeric_limits<int32_t>::max()) {
343-
res = Value::BIGINT(value);
344-
} else {
345-
// To match default duckdb behavior, numeric values without a specified type should not become a smaller type
346-
// than INT32
347-
res = Value::INTEGER(value);
322+
return Value::BIGINT(value);
348323
}
349-
return true;
324+
return Value::INTEGER(value);
350325
}
351326

352-
bool TryTransformPythonNumeric(Value &res, py::handle ele, const LogicalType &target_type) {
327+
// Sniffs the tightest DuckDB integer type for a Python integer.
328+
// Progressively widens: int64 → uint64 → hugeint → uhugeint.
329+
// Returns SQLNULL if the value doesn't fit in any DuckDB integer type (> 128-bit).
330+
LogicalType SniffPythonIntegerType(py::handle ele) {
353331
auto ptr = ele.ptr();
354332

333+
// Step 1: Try int64
355334
int overflow;
356-
int64_t value = PyLong_AsLongLongAndOverflow(ptr, &overflow);
357-
if (overflow == -1) {
358-
PyErr_Clear();
359-
if (target_type.id() == LogicalTypeId::BIGINT) {
360-
throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64",
361-
std::string(pybind11::str(ele))));
362-
}
363-
return TryTransformPythonLongToHugeInt(ele, target_type, res);
364-
} else if (overflow == 1) {
365-
if (target_type.InternalType() == PhysicalType::INT64) {
366-
throw InvalidInputException(StringUtil::Format("Failed to cast value: Python value '%s' to INT64",
367-
std::string(pybind11::str(ele))));
368-
}
369-
uint64_t unsigned_value = PyLong_AsUnsignedLongLong(ptr);
370-
if (PyErr_Occurred()) {
371-
PyErr_Clear();
372-
return TryTransformPythonLongToHugeInt(ele, target_type, res);
373-
}
374-
TransformPythonUnsigned(unsigned_value, res);
375-
PyErr_Clear();
376-
return true;
377-
}
378-
if (value == -1 && PyErr_Occurred()) {
379-
return false;
380-
}
335+
const int64_t value = PyLong_AsLongLongAndOverflow(ptr, &overflow);
381336

382-
// The value is int64_t or smaller
383-
switch (target_type.id()) {
384-
case LogicalTypeId::UNKNOWN:
385-
return TrySniffPythonNumeric(res, value);
386-
case LogicalTypeId::HUGEINT: {
387-
res = Value::HUGEINT(value);
388-
return true;
389-
}
390-
case LogicalTypeId::UHUGEINT: {
391-
if (value < 0) {
392-
return false;
393-
}
394-
res = Value::UHUGEINT(value);
395-
return true;
396-
}
397-
case LogicalTypeId::BIGINT: {
398-
res = Value::BIGINT(value);
399-
return true;
400-
}
401-
case LogicalTypeId::INTEGER: {
402-
if (value < NumericLimits<int32_t>::Minimum() || value > NumericLimits<int32_t>::Maximum()) {
403-
return false;
404-
}
405-
res = Value::INTEGER(value);
406-
return true;
407-
}
408-
case LogicalTypeId::SMALLINT: {
409-
if (value < NumericLimits<int16_t>::Minimum() || value > NumericLimits<int16_t>::Maximum()) {
410-
return false;
411-
}
412-
res = Value::SMALLINT(value);
413-
return true;
414-
}
415-
case LogicalTypeId::TINYINT: {
416-
if (value < NumericLimits<int8_t>::Minimum() || value > NumericLimits<int8_t>::Maximum()) {
417-
return false;
418-
}
419-
res = Value::TINYINT(value);
420-
return true;
421-
}
422-
case LogicalTypeId::UBIGINT: {
423-
if (value < 0) {
424-
return false;
425-
}
426-
res = Value::UBIGINT(value);
427-
return true;
428-
}
429-
case LogicalTypeId::UINTEGER: {
430-
if (value < 0 || value > (int64_t)NumericLimits<uint32_t>::Maximum()) {
431-
return false;
337+
if (overflow == 0) {
338+
if (value == -1 && PyErr_Occurred()) {
339+
PyErr_Clear();
340+
return LogicalType::SQLNULL;
432341
}
433-
res = Value::UINTEGER(value);
434-
return true;
435-
}
436-
case LogicalTypeId::USMALLINT: {
437-
if (value < 0 || value > (int64_t)NumericLimits<uint16_t>::Maximum()) {
438-
return false;
342+
if (value < static_cast<int64_t>(std::numeric_limits<int32_t>::min()) ||
343+
value > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
344+
return LogicalType::BIGINT;
439345
}
440-
res = Value::USMALLINT(value);
441-
return true;
346+
return LogicalType::INTEGER;
442347
}
443-
case LogicalTypeId::UTINYINT: {
444-
if (value < 0 || value > (int64_t)NumericLimits<uint8_t>::Maximum()) {
445-
return false;
446-
}
447-
res = Value::UTINYINT(value);
448-
return true;
449-
}
450-
default: {
451-
if (!TrySniffPythonNumeric(res, value)) {
452-
return false;
348+
PyErr_Clear();
349+
350+
// Step 2: For positive overflow, try uint64
351+
if (overflow == 1) {
352+
(void)PyLong_AsUnsignedLongLong(ptr);
353+
if (!PyErr_Occurred()) {
354+
return LogicalType::UBIGINT;
453355
}
454-
res = CastToTarget(std::move(res), target_type);
455-
return true;
356+
PyErr_Clear();
456357
}
358+
359+
// Step 3: Try 128-bit (hugeint/uhugeint)
360+
Value res;
361+
if (!TryTransformPythonLongToHugeInt(ele, LogicalType::UNKNOWN, res)) {
362+
return LogicalType::SQLNULL;
457363
}
364+
return res.type();
458365
}
459366

460367
Value TransformDictionary(const PyDictionary &dict) {
@@ -561,10 +468,6 @@ struct PythonValueConversion {
561468
break;
562469
}
563470
}
564-
static void HandleLongAsDouble(Value &result, const LogicalType &target_type, double val) {
565-
auto cast_as = target_type.id() == LogicalTypeId::UNKNOWN ? LogicalType::DOUBLE : target_type;
566-
result = CastToTarget(Value::DOUBLE(val), cast_as);
567-
}
568471
static void HandleLongOverflow(Value &result, const LogicalType &target_type, py::handle ele) {
569472
result = TransformPythonLongToHugeInt(ele, target_type);
570473
}
@@ -573,21 +476,10 @@ struct PythonValueConversion {
573476
result = CastToTarget(Value::UBIGINT(val), cast_as);
574477
}
575478
static void HandleBigint(Value &res, const LogicalType &target_type, int64_t value) {
576-
switch (target_type.id()) {
577-
case LogicalTypeId::UNKNOWN: {
578-
if (value < (int64_t)std::numeric_limits<int32_t>::min() ||
579-
value > (int64_t)std::numeric_limits<int32_t>::max()) {
580-
res = Value::BIGINT(value);
581-
} else {
582-
// To match default duckdb behavior, numeric values without a specified type should not become a smaller
583-
// type than INT32
584-
res = Value::INTEGER(value);
585-
}
586-
break;
587-
}
588-
default:
589-
res = CastToTarget(Value::BIGINT(value), target_type);
590-
break;
479+
if (target_type.id() == LogicalTypeId::UNKNOWN) {
480+
res = SniffIntegerValue(value);
481+
} else {
482+
res = CastToTarget(SniffIntegerValue(value), target_type);
591483
}
592484
}
593485

@@ -737,9 +629,6 @@ struct PythonVectorConversion {
737629
break;
738630
}
739631
}
740-
static void HandleLongAsDouble(Vector &result, const idx_t &result_offset, double val) {
741-
FallbackValueConversion(result, result_offset, Value::DOUBLE(val));
742-
}
743632
static void HandleLongOverflow(Vector &result, const idx_t &result_offset, py::handle ele) {
744633
Value result_val = TransformPythonLongToHugeInt(ele, result.GetType());
745634
FallbackValueConversion(result, result_offset, std::move(result_val));

src/duckdb_py/pandas/analyzer.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -363,12 +363,11 @@ LogicalType PandasAnalyzer::GetItemType(py::object ele, bool &can_convert) {
363363
case PythonObjectType::Bool:
364364
return LogicalType::BOOLEAN;
365365
case PythonObjectType::Integer: {
366-
Value integer;
367-
if (!TryTransformPythonNumeric(integer, ele)) {
366+
auto type = SniffPythonIntegerType(ele);
367+
if (type.id() == LogicalTypeId::SQLNULL) {
368368
can_convert = false;
369-
return LogicalType::SQLNULL;
370369
}
371-
return integer.type();
370+
return type;
372371
}
373372
case PythonObjectType::Float:
374373
if (std::isnan(PyFloat_AsDouble(ele.ptr()))) {

0 commit comments

Comments
 (0)