@@ -1468,3 +1468,165 @@ def test_coalesce(df):
14681468 assert result .column (0 ) == pa .array (
14691469 ["Hello" , "fallback" , "!" ], type = pa .string_view ()
14701470 )
1471+
1472+
1473+ def test_greatest (df ):
1474+ ctx = SessionContext ()
1475+ batch = pa .RecordBatch .from_arrays (
1476+ [
1477+ pa .array ([1 , 5 , None ]),
1478+ pa .array ([3 , 2 , None ]),
1479+ pa .array ([2 , 8 , None ]),
1480+ ],
1481+ names = ["a" , "b" , "c" ],
1482+ )
1483+ df_test = ctx .create_dataframe ([[batch ]])
1484+
1485+ # Test greatest with two columns
1486+ result = df_test .select (
1487+ f .greatest (column ("a" ), column ("b" )).alias ("greatest_ab" )
1488+ ).collect ()[0 ]
1489+ assert result .column (0 ) == pa .array ([3 , 5 , None ], type = pa .int64 ())
1490+
1491+ # Test greatest with three columns
1492+ result = df_test .select (
1493+ f .greatest (column ("a" ), column ("b" ), column ("c" )).alias ("greatest_abc" )
1494+ ).collect ()[0 ]
1495+ assert result .column (0 ) == pa .array ([3 , 8 , None ], type = pa .int64 ())
1496+
1497+ # Test greatest with nulls mixed in (partial nulls)
1498+ batch2 = pa .RecordBatch .from_arrays (
1499+ [
1500+ pa .array ([None , 10 ]),
1501+ pa .array ([5 , None ]),
1502+ ],
1503+ names = ["x" , "y" ],
1504+ )
1505+ df_test2 = ctx .create_dataframe ([[batch2 ]])
1506+ result = df_test2 .select (f .greatest (column ("x" ), column ("y" )).alias ("g" )).collect ()[
1507+ 0
1508+ ]
1509+ assert result .column (0 ) == pa .array ([5 , 10 ], type = pa .int64 ())
1510+
1511+ # Test greatest with string columns
1512+ batch3 = pa .RecordBatch .from_arrays (
1513+ [
1514+ pa .array (["apple" , "cherry" ]),
1515+ pa .array (["banana" , "apricot" ]),
1516+ ],
1517+ names = ["s1" , "s2" ],
1518+ )
1519+ df_test3 = ctx .create_dataframe ([[batch3 ]])
1520+ result = df_test3 .select (
1521+ f .greatest (column ("s1" ), column ("s2" )).alias ("g" )
1522+ ).collect ()[0 ]
1523+ assert result .column (0 ).to_pylist () == ["banana" , "cherry" ]
1524+
1525+
1526+ def test_least (df ):
1527+ ctx = SessionContext ()
1528+ batch = pa .RecordBatch .from_arrays (
1529+ [
1530+ pa .array ([1 , 5 , None ]),
1531+ pa .array ([3 , 2 , None ]),
1532+ pa .array ([2 , 8 , None ]),
1533+ ],
1534+ names = ["a" , "b" , "c" ],
1535+ )
1536+ df_test = ctx .create_dataframe ([[batch ]])
1537+
1538+ # Test least with two columns
1539+ result = df_test .select (
1540+ f .least (column ("a" ), column ("b" )).alias ("least_ab" )
1541+ ).collect ()[0 ]
1542+ assert result .column (0 ) == pa .array ([1 , 2 , None ], type = pa .int64 ())
1543+
1544+ # Test least with three columns
1545+ result = df_test .select (
1546+ f .least (column ("a" ), column ("b" ), column ("c" )).alias ("least_abc" )
1547+ ).collect ()[0 ]
1548+ assert result .column (0 ) == pa .array ([1 , 2 , None ], type = pa .int64 ())
1549+
1550+ # Test least with partial nulls
1551+ batch2 = pa .RecordBatch .from_arrays (
1552+ [
1553+ pa .array ([None , 10 ]),
1554+ pa .array ([5 , None ]),
1555+ ],
1556+ names = ["x" , "y" ],
1557+ )
1558+ df_test2 = ctx .create_dataframe ([[batch2 ]])
1559+ result = df_test2 .select (f .least (column ("x" ), column ("y" )).alias ("l" )).collect ()[0 ]
1560+ assert result .column (0 ) == pa .array ([5 , 10 ], type = pa .int64 ())
1561+
1562+ # Test least with string columns
1563+ batch3 = pa .RecordBatch .from_arrays (
1564+ [
1565+ pa .array (["apple" , "cherry" ]),
1566+ pa .array (["banana" , "apricot" ]),
1567+ ],
1568+ names = ["s1" , "s2" ],
1569+ )
1570+ df_test3 = ctx .create_dataframe ([[batch3 ]])
1571+ result = df_test3 .select (f .least (column ("s1" ), column ("s2" )).alias ("l" )).collect ()[
1572+ 0
1573+ ]
1574+ assert result .column (0 ).to_pylist () == ["apple" , "apricot" ]
1575+
1576+
1577+ def test_nvl2 (df ):
1578+ ctx = SessionContext ()
1579+ batch = pa .RecordBatch .from_arrays (
1580+ [
1581+ pa .array ([None , 1 , None , 4 ]),
1582+ pa .array ([10 , 20 , 30 , 40 ]),
1583+ pa .array ([100 , 200 , 300 , 400 ]),
1584+ ],
1585+ names = ["a" , "b" , "c" ],
1586+ )
1587+ df_test = ctx .create_dataframe ([[batch ]])
1588+
1589+ # nvl2 returns b when a is not null, c when a is null
1590+ result = df_test .select (
1591+ f .nvl2 (column ("a" ), column ("b" ), column ("c" )).alias ("result" )
1592+ ).collect ()[0 ]
1593+ assert result .column (0 ) == pa .array ([100 , 20 , 300 , 40 ], type = pa .int64 ())
1594+
1595+ # Test with string columns
1596+ batch2 = pa .RecordBatch .from_arrays (
1597+ [
1598+ pa .array (["x" , None ]),
1599+ pa .array (["not_null" , "not_null" ]),
1600+ pa .array (["is_null" , "is_null" ]),
1601+ ],
1602+ names = ["a" , "b" , "c" ],
1603+ )
1604+ df_test2 = ctx .create_dataframe ([[batch2 ]])
1605+ result = df_test2 .select (
1606+ f .nvl2 (column ("a" ), column ("b" ), column ("c" )).alias ("result" )
1607+ ).collect ()[0 ]
1608+ assert result .column (0 ).to_pylist () == ["not_null" , "is_null" ]
1609+
1610+
1611+ def test_ifnull (df ):
1612+ ctx = SessionContext ()
1613+ batch = pa .RecordBatch .from_arrays (
1614+ [
1615+ pa .array ([None , 1 , None , 4 ]),
1616+ pa .array ([10 , 20 , 30 , 40 ]),
1617+ ],
1618+ names = ["a" , "b" ],
1619+ )
1620+ df_test = ctx .create_dataframe ([[batch ]])
1621+
1622+ # ifnull returns a when a is not null, b when a is null (same as nvl)
1623+ result = df_test .select (
1624+ f .ifnull (column ("a" ), column ("b" )).alias ("result" )
1625+ ).collect ()[0 ]
1626+ assert result .column (0 ) == pa .array ([10 , 1 , 30 , 4 ], type = pa .int64 ())
1627+
1628+ # Verify ifnull matches nvl behavior
1629+ result_nvl = df_test .select (
1630+ f .nvl (column ("a" ), column ("b" )).alias ("nvl_result" )
1631+ ).collect ()[0 ]
1632+ assert result .column (0 ) == result_nvl .column (0 )
0 commit comments